Azure AI Agent Service Integration
Azure AI Agent Service is a managed platform for building and running AI agents using the Azure AI Projects SDK. This guide shows how to add full distributed tracing with Nexus: per-run traces, tool call spans (CodeInterpreter, FileSearch, BingGrounding), token usage tracking, and agent failure surfacing.
Installation
pip install keylightdigital-nexus azure-ai-projects azure-identity
Get your Nexus API key from Dashboard → API Keys and set environment variables:
export NEXUS_API_KEY="nxs_your_key_here"
export AZURE_AI_PROJECT_CONN_STR="<your-project-connection-string>"
Find your project connection string in the Azure AI Foundry portal under your project → Overview → Connection string.
Basic thread run trace
Azure AI Agent Service is thread-based: every conversation is a Thread, and each invocation is a Run. Wrap each run in a Nexus trace so you can see status, duration, and metadata in the dashboard:
import os
import time
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from nexus_sdk import NexusClient
nexus = NexusClient(api_key=os.environ["NEXUS_API_KEY"])
client = AIProjectClient.from_connection_string(
conn_str=os.environ["AZURE_AI_PROJECT_CONN_STR"],
credential=DefaultAzureCredential(),
)
agent = client.agents.create_agent(
model="gpt-4o",
name="research-agent",
instructions="You are a concise research assistant.",
)
def run_agent(task: str) -> str:
thread = client.agents.create_thread()
client.agents.create_message(
thread_id=thread.id,
role="user",
content=task,
)
trace = nexus.start_trace({
"agent_id": "azure-agent-service",
"name": "run: " + task[:60],
"status": "running",
"started_at": nexus.now(),
"metadata": {
"task": task[:200],
"agent_id": agent.id,
"thread_id": thread.id,
"model": "gpt-4o",
},
})
run = client.agents.create_run(thread_id=thread.id, agent_id=agent.id)
try:
while run.status in ("queued", "in_progress", "requires_action"):
time.sleep(0.5)
run = client.agents.get_run(thread_id=thread.id, run_id=run.id)
if run.status == "completed":
nexus.end_trace(trace["trace_id"], {"status": "success"})
else:
nexus.end_trace(trace["trace_id"], {
"status": "error",
"metadata": {
"run_status": run.status,
"error_code": run.last_error.code if run.last_error else None,
"error_message": run.last_error.message if run.last_error else None,
},
})
messages = client.agents.list_messages(thread_id=thread.id)
return messages.data[0].content[0].text.value
except Exception as e:
nexus.end_trace(trace["trace_id"], {
"status": "error",
"metadata": {"error": str(e)},
})
raise
Tool call spans
Azure AI Agent Service exposes run steps that show exactly which tools were invoked. Fetch run steps after the run completes and record each tool invocation as a Nexus span:
import os
import time
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import CodeInterpreterTool
from azure.identity import DefaultAzureCredential
from nexus_sdk import NexusClient
nexus = NexusClient(api_key=os.environ["NEXUS_API_KEY"])
client = AIProjectClient.from_connection_string(
conn_str=os.environ["AZURE_AI_PROJECT_CONN_STR"],
credential=DefaultAzureCredential(),
)
# Create agent with CodeInterpreter tool
agent = client.agents.create_agent(
model="gpt-4o",
name="tools-agent",
instructions="You are a helpful assistant. Use code to compute answers.",
tools=CodeInterpreterTool().definitions,
)
def run_with_tool_spans(task: str) -> str:
thread = client.agents.create_thread()
client.agents.create_message(thread_id=thread.id, role="user", content=task)
trace = nexus.start_trace({
"agent_id": "azure-agent-tools",
"name": "run: " + task[:60],
"status": "running",
"started_at": nexus.now(),
})
run = client.agents.create_run(thread_id=thread.id, agent_id=agent.id)
try:
while run.status in ("queued", "in_progress", "requires_action"):
time.sleep(0.5)
run = client.agents.get_run(thread_id=thread.id, run_id=run.id)
# Record each run step as a Nexus span
steps = client.agents.list_run_steps(thread_id=thread.id, run_id=run.id)
for step in steps.data:
if step.type == "tool_calls":
for tool_call in step.step_details.tool_calls:
tool_type = tool_call.type # "code_interpreter" / "file_search" / "bing_grounding"
span = nexus.start_span(trace["trace_id"], {
"name": "tool:" + tool_type,
"type": "tool",
"metadata": {
"tool_type": tool_type,
"step_id": step.id,
"step_status": step.status,
},
})
step_usage = {}
if step.usage:
step_usage = {
"prompt_tokens": step.usage.prompt_tokens,
"completion_tokens": step.usage.completion_tokens,
}
nexus.end_span(span["id"], {
"output": "step " + step.id + " " + step.status,
"metadata": {"usage": step_usage},
})
status = "success" if run.status == "completed" else "error"
nexus.end_trace(trace["trace_id"], {"status": status})
messages = client.agents.list_messages(thread_id=thread.id)
return messages.data[0].content[0].text.value
except Exception as e:
nexus.end_trace(trace["trace_id"], {
"status": "error",
"metadata": {"error": str(e)},
})
raise
The tool_type field is code_interpreter, file_search, or bing_grounding. Filter by tool type in the Nexus dashboard to find slow or error-prone tools.
Token usage monitoring
Azure runs expose run.usage with prompt and completion token counts once the run completes. Record these as trace metadata to track costs and spot prompt bloat:
def run_with_token_tracking(task: str, price_per_1k_tokens: float = 0.005) -> str:
thread = client.agents.create_thread()
client.agents.create_message(thread_id=thread.id, role="user", content=task)
trace = nexus.start_trace({
"agent_id": "azure-agent-token-tracking",
"name": "run: " + task[:60],
"status": "running",
"started_at": nexus.now(),
"metadata": {"model": "gpt-4o", "task": task[:200]},
})
run = client.agents.create_run(thread_id=thread.id, agent_id=agent.id)
try:
while run.status in ("queued", "in_progress", "requires_action"):
time.sleep(0.5)
run = client.agents.get_run(thread_id=thread.id, run_id=run.id)
usage_metadata = {}
if run.usage:
total = run.usage.total_tokens
estimated_cost = round((total / 1000) * price_per_1k_tokens, 6)
usage_metadata = {
"prompt_tokens": run.usage.prompt_tokens,
"completion_tokens": run.usage.completion_tokens,
"total_tokens": total,
"estimated_cost_usd": estimated_cost,
}
nexus.end_trace(trace["trace_id"], {
"status": "success" if run.status == "completed" else "error",
"metadata": usage_metadata,
})
messages = client.agents.list_messages(thread_id=thread.id)
return messages.data[0].content[0].text.value
except Exception as e:
nexus.end_trace(trace["trace_id"], {
"status": "error",
"metadata": {"error": str(e)},
})
raise
Sort traces by total_tokens in the Nexus dashboard to find the most expensive runs. A sudden spike in prompt_tokens usually means the conversation history grew unexpectedly large.
Failure surfacing
Runs can fail with run.status == "failed" and a last_error object. Always record these in the trace so failures are visible in the dashboard without reading server logs:
def run_with_failure_surfacing(task: str) -> str | None:
thread = client.agents.create_thread()
client.agents.create_message(thread_id=thread.id, role="user", content=task)
trace = nexus.start_trace({
"agent_id": "azure-agent-monitored",
"name": "run: " + task[:60],
"status": "running",
"started_at": nexus.now(),
})
run = client.agents.create_run(thread_id=thread.id, agent_id=agent.id)
try:
while run.status in ("queued", "in_progress", "requires_action"):
time.sleep(0.5)
run = client.agents.get_run(thread_id=thread.id, run_id=run.id)
if run.status == "failed":
error_code = run.last_error.code if run.last_error else "unknown"
error_msg = run.last_error.message if run.last_error else "no error detail"
nexus.end_trace(trace["trace_id"], {
"status": "error",
"metadata": {
"run_status": "failed",
"error_code": error_code,
"error_message": error_msg,
"thread_id": thread.id,
"run_id": run.id,
},
})
return None
if run.status == "cancelled":
nexus.end_trace(trace["trace_id"], {
"status": "error",
"metadata": {"run_status": "cancelled", "run_id": run.id},
})
return None
nexus.end_trace(trace["trace_id"], {"status": "success"})
messages = client.agents.list_messages(thread_id=thread.id)
return messages.data[0].content[0].text.value
except Exception as e:
nexus.end_trace(trace["trace_id"], {
"status": "error",
"metadata": {"error": str(e)},
})
raise
Common error_code values: rate_limit_exceeded, server_error, invalid_request_error. Filter traces by status: error in the Nexus dashboard for a quick failure summary.
TypeScript
Use @azure/ai-projects with the Nexus TypeScript SDK. The pattern mirrors Python: create thread, start trace, poll run, end trace with status and usage:
npm install @keylightdigital/nexus @azure/ai-projects @azure/identity
import { AIProjectClient } from '@azure/ai-projects';
import { DefaultAzureCredential } from '@azure/identity';
import NexusClient from '@keylightdigital/nexus';
const nexus = new NexusClient(process.env.NEXUS_API_KEY!);
const client = new AIProjectClient(
process.env.AZURE_AI_PROJECT_CONN_STR!,
new DefaultAzureCredential(),
);
async function runAgent(task: string): Promise<string> {
const agent = await client.agents.createAgent('gpt-4o', {
name: 'ts-research-agent',
instructions: 'You are a concise research assistant.',
});
const thread = await client.agents.createThread();
await client.agents.createMessage(thread.id, { role: 'user', content: task });
const trace = await nexus.startTrace({
agent_id: 'azure-agent-ts',
name: 'run: ' + task.slice(0, 60),
status: 'running',
started_at: new Date().toISOString(),
metadata: { task: task.slice(0, 200), model: 'gpt-4o' },
});
let run = await client.agents.createRun(thread.id, agent.id);
try {
while (
run.status === 'queued' ||
run.status === 'in_progress' ||
run.status === 'requires_action'
) {
await new Promise((r) => setTimeout(r, 500));
run = await client.agents.getRun(thread.id, run.id);
}
const usage = run.usage
? {
prompt_tokens: run.usage.promptTokens,
completion_tokens: run.usage.completionTokens,
total_tokens: run.usage.totalTokens,
}
: {};
if (run.status === 'completed') {
await nexus.endTrace(trace.id, { status: 'success', metadata: usage });
} else {
await nexus.endTrace(trace.id, {
status: 'error',
metadata: {
run_status: run.status,
error_code: run.lastError?.code ?? null,
error_message: run.lastError?.message ?? null,
...usage,
},
});
}
const messages = await client.agents.listMessages(thread.id);
const last = messages.data[0];
return last.content[0].type === 'text' ? last.content[0].text.value : '';
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
await nexus.endTrace(trace.id, { status: 'error', metadata: { error: msg } });
throw err;
}
}
Debugging patterns
Run stuck in “requires_action”
This status means the agent invoked a function tool and is waiting for the host to submit tool outputs via submit_tool_outputs_to_run(). If you don't handle this in your polling loop, the run will time out. Check run.required_action.submit_tool_outputs.tool_calls and submit outputs for each call.
Traces stuck in “running” state
A Nexus trace stays running if nexus.end_trace() is never called. Use a try/except/finally block so the trace closes even when the polling loop throws or the run errors out.
High prompt token counts
Azure AI agents accumulate all thread messages in context. If prompt_tokens grows with each run on the same thread, create a new thread per conversation. Use the Nexus trace timeline to correlate token spikes with specific runs.
CodeInterpreter produces unexpected output
Add a tool:code_interpreter span and capture the step details from step.step_details.tool_calls[0].code_interpreter. The input field shows the exact code the model wrote; the outputs array shows what it returned.
Ready to instrument your Azure AI agents?
Start for free — no credit card required. See traces in under 5 minutes.
Start free →