Docs Azure AI Agent Service

Azure AI Agent Service Integration

Azure AI Agent Service is a managed platform for building and running AI agents using the Azure AI Projects SDK. This guide shows how to add full distributed tracing with Nexus: per-run traces, tool call spans (CodeInterpreter, FileSearch, BingGrounding), token usage tracking, and agent failure surfacing.

Installation

pip install keylightdigital-nexus azure-ai-projects azure-identity

Get your Nexus API key from Dashboard → API Keys and set environment variables:

export NEXUS_API_KEY="nxs_your_key_here"
export AZURE_AI_PROJECT_CONN_STR="<your-project-connection-string>"

Find your project connection string in the Azure AI Foundry portal under your project → Overview → Connection string.

Basic thread run trace

Azure AI Agent Service is thread-based: every conversation is a Thread, and each invocation is a Run. Wrap each run in a Nexus trace so you can see status, duration, and metadata in the dashboard:

import os
import time
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from nexus_sdk import NexusClient

nexus = NexusClient(api_key=os.environ["NEXUS_API_KEY"])

client = AIProjectClient.from_connection_string(
    conn_str=os.environ["AZURE_AI_PROJECT_CONN_STR"],
    credential=DefaultAzureCredential(),
)

agent = client.agents.create_agent(
    model="gpt-4o",
    name="research-agent",
    instructions="You are a concise research assistant.",
)

def run_agent(task: str) -> str:
    thread = client.agents.create_thread()
    client.agents.create_message(
        thread_id=thread.id,
        role="user",
        content=task,
    )

    trace = nexus.start_trace({
        "agent_id": "azure-agent-service",
        "name": "run: " + task[:60],
        "status": "running",
        "started_at": nexus.now(),
        "metadata": {
            "task": task[:200],
            "agent_id": agent.id,
            "thread_id": thread.id,
            "model": "gpt-4o",
        },
    })
    run = client.agents.create_run(thread_id=thread.id, agent_id=agent.id)
    try:
        while run.status in ("queued", "in_progress", "requires_action"):
            time.sleep(0.5)
            run = client.agents.get_run(thread_id=thread.id, run_id=run.id)

        if run.status == "completed":
            nexus.end_trace(trace["trace_id"], {"status": "success"})
        else:
            nexus.end_trace(trace["trace_id"], {
                "status": "error",
                "metadata": {
                    "run_status": run.status,
                    "error_code": run.last_error.code if run.last_error else None,
                    "error_message": run.last_error.message if run.last_error else None,
                },
            })

        messages = client.agents.list_messages(thread_id=thread.id)
        return messages.data[0].content[0].text.value
    except Exception as e:
        nexus.end_trace(trace["trace_id"], {
            "status": "error",
            "metadata": {"error": str(e)},
        })
        raise

Tool call spans

Azure AI Agent Service exposes run steps that show exactly which tools were invoked. Fetch run steps after the run completes and record each tool invocation as a Nexus span:

import os
import time
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import CodeInterpreterTool
from azure.identity import DefaultAzureCredential
from nexus_sdk import NexusClient

nexus = NexusClient(api_key=os.environ["NEXUS_API_KEY"])
client = AIProjectClient.from_connection_string(
    conn_str=os.environ["AZURE_AI_PROJECT_CONN_STR"],
    credential=DefaultAzureCredential(),
)

# Create agent with CodeInterpreter tool
agent = client.agents.create_agent(
    model="gpt-4o",
    name="tools-agent",
    instructions="You are a helpful assistant. Use code to compute answers.",
    tools=CodeInterpreterTool().definitions,
)

def run_with_tool_spans(task: str) -> str:
    thread = client.agents.create_thread()
    client.agents.create_message(thread_id=thread.id, role="user", content=task)

    trace = nexus.start_trace({
        "agent_id": "azure-agent-tools",
        "name": "run: " + task[:60],
        "status": "running",
        "started_at": nexus.now(),
    })
    run = client.agents.create_run(thread_id=thread.id, agent_id=agent.id)
    try:
        while run.status in ("queued", "in_progress", "requires_action"):
            time.sleep(0.5)
            run = client.agents.get_run(thread_id=thread.id, run_id=run.id)

        # Record each run step as a Nexus span
        steps = client.agents.list_run_steps(thread_id=thread.id, run_id=run.id)
        for step in steps.data:
            if step.type == "tool_calls":
                for tool_call in step.step_details.tool_calls:
                    tool_type = tool_call.type  # "code_interpreter" / "file_search" / "bing_grounding"
                    span = nexus.start_span(trace["trace_id"], {
                        "name": "tool:" + tool_type,
                        "type": "tool",
                        "metadata": {
                            "tool_type": tool_type,
                            "step_id": step.id,
                            "step_status": step.status,
                        },
                    })
                    step_usage = {}
                    if step.usage:
                        step_usage = {
                            "prompt_tokens": step.usage.prompt_tokens,
                            "completion_tokens": step.usage.completion_tokens,
                        }
                    nexus.end_span(span["id"], {
                        "output": "step " + step.id + " " + step.status,
                        "metadata": {"usage": step_usage},
                    })

        status = "success" if run.status == "completed" else "error"
        nexus.end_trace(trace["trace_id"], {"status": status})

        messages = client.agents.list_messages(thread_id=thread.id)
        return messages.data[0].content[0].text.value
    except Exception as e:
        nexus.end_trace(trace["trace_id"], {
            "status": "error",
            "metadata": {"error": str(e)},
        })
        raise

The tool_type field is code_interpreter, file_search, or bing_grounding. Filter by tool type in the Nexus dashboard to find slow or error-prone tools.

Token usage monitoring

Azure runs expose run.usage with prompt and completion token counts once the run completes. Record these as trace metadata to track costs and spot prompt bloat:

def run_with_token_tracking(task: str, price_per_1k_tokens: float = 0.005) -> str:
    thread = client.agents.create_thread()
    client.agents.create_message(thread_id=thread.id, role="user", content=task)

    trace = nexus.start_trace({
        "agent_id": "azure-agent-token-tracking",
        "name": "run: " + task[:60],
        "status": "running",
        "started_at": nexus.now(),
        "metadata": {"model": "gpt-4o", "task": task[:200]},
    })
    run = client.agents.create_run(thread_id=thread.id, agent_id=agent.id)
    try:
        while run.status in ("queued", "in_progress", "requires_action"):
            time.sleep(0.5)
            run = client.agents.get_run(thread_id=thread.id, run_id=run.id)

        usage_metadata = {}
        if run.usage:
            total = run.usage.total_tokens
            estimated_cost = round((total / 1000) * price_per_1k_tokens, 6)
            usage_metadata = {
                "prompt_tokens": run.usage.prompt_tokens,
                "completion_tokens": run.usage.completion_tokens,
                "total_tokens": total,
                "estimated_cost_usd": estimated_cost,
            }

        nexus.end_trace(trace["trace_id"], {
            "status": "success" if run.status == "completed" else "error",
            "metadata": usage_metadata,
        })
        messages = client.agents.list_messages(thread_id=thread.id)
        return messages.data[0].content[0].text.value
    except Exception as e:
        nexus.end_trace(trace["trace_id"], {
            "status": "error",
            "metadata": {"error": str(e)},
        })
        raise

Sort traces by total_tokens in the Nexus dashboard to find the most expensive runs. A sudden spike in prompt_tokens usually means the conversation history grew unexpectedly large.

Failure surfacing

Runs can fail with run.status == "failed" and a last_error object. Always record these in the trace so failures are visible in the dashboard without reading server logs:

def run_with_failure_surfacing(task: str) -> str | None:
    thread = client.agents.create_thread()
    client.agents.create_message(thread_id=thread.id, role="user", content=task)

    trace = nexus.start_trace({
        "agent_id": "azure-agent-monitored",
        "name": "run: " + task[:60],
        "status": "running",
        "started_at": nexus.now(),
    })
    run = client.agents.create_run(thread_id=thread.id, agent_id=agent.id)
    try:
        while run.status in ("queued", "in_progress", "requires_action"):
            time.sleep(0.5)
            run = client.agents.get_run(thread_id=thread.id, run_id=run.id)

        if run.status == "failed":
            error_code = run.last_error.code if run.last_error else "unknown"
            error_msg = run.last_error.message if run.last_error else "no error detail"
            nexus.end_trace(trace["trace_id"], {
                "status": "error",
                "metadata": {
                    "run_status": "failed",
                    "error_code": error_code,
                    "error_message": error_msg,
                    "thread_id": thread.id,
                    "run_id": run.id,
                },
            })
            return None

        if run.status == "cancelled":
            nexus.end_trace(trace["trace_id"], {
                "status": "error",
                "metadata": {"run_status": "cancelled", "run_id": run.id},
            })
            return None

        nexus.end_trace(trace["trace_id"], {"status": "success"})
        messages = client.agents.list_messages(thread_id=thread.id)
        return messages.data[0].content[0].text.value
    except Exception as e:
        nexus.end_trace(trace["trace_id"], {
            "status": "error",
            "metadata": {"error": str(e)},
        })
        raise

Common error_code values: rate_limit_exceeded, server_error, invalid_request_error. Filter traces by status: error in the Nexus dashboard for a quick failure summary.

TypeScript

Use @azure/ai-projects with the Nexus TypeScript SDK. The pattern mirrors Python: create thread, start trace, poll run, end trace with status and usage:

npm install @keylightdigital/nexus @azure/ai-projects @azure/identity
import { AIProjectClient } from '@azure/ai-projects';
import { DefaultAzureCredential } from '@azure/identity';
import NexusClient from '@keylightdigital/nexus';

const nexus = new NexusClient(process.env.NEXUS_API_KEY!);
const client = new AIProjectClient(
  process.env.AZURE_AI_PROJECT_CONN_STR!,
  new DefaultAzureCredential(),
);

async function runAgent(task: string): Promise<string> {
  const agent = await client.agents.createAgent('gpt-4o', {
    name: 'ts-research-agent',
    instructions: 'You are a concise research assistant.',
  });
  const thread = await client.agents.createThread();
  await client.agents.createMessage(thread.id, { role: 'user', content: task });

  const trace = await nexus.startTrace({
    agent_id: 'azure-agent-ts',
    name: 'run: ' + task.slice(0, 60),
    status: 'running',
    started_at: new Date().toISOString(),
    metadata: { task: task.slice(0, 200), model: 'gpt-4o' },
  });

  let run = await client.agents.createRun(thread.id, agent.id);

  try {
    while (
      run.status === 'queued' ||
      run.status === 'in_progress' ||
      run.status === 'requires_action'
    ) {
      await new Promise((r) => setTimeout(r, 500));
      run = await client.agents.getRun(thread.id, run.id);
    }

    const usage = run.usage
      ? {
          prompt_tokens: run.usage.promptTokens,
          completion_tokens: run.usage.completionTokens,
          total_tokens: run.usage.totalTokens,
        }
      : {};

    if (run.status === 'completed') {
      await nexus.endTrace(trace.id, { status: 'success', metadata: usage });
    } else {
      await nexus.endTrace(trace.id, {
        status: 'error',
        metadata: {
          run_status: run.status,
          error_code: run.lastError?.code ?? null,
          error_message: run.lastError?.message ?? null,
          ...usage,
        },
      });
    }

    const messages = await client.agents.listMessages(thread.id);
    const last = messages.data[0];
    return last.content[0].type === 'text' ? last.content[0].text.value : '';
  } catch (err: unknown) {
    const msg = err instanceof Error ? err.message : String(err);
    await nexus.endTrace(trace.id, { status: 'error', metadata: { error: msg } });
    throw err;
  }
}

Debugging patterns

Run stuck in “requires_action”

This status means the agent invoked a function tool and is waiting for the host to submit tool outputs via submit_tool_outputs_to_run(). If you don't handle this in your polling loop, the run will time out. Check run.required_action.submit_tool_outputs.tool_calls and submit outputs for each call.

Traces stuck in “running” state

A Nexus trace stays running if nexus.end_trace() is never called. Use a try/except/finally block so the trace closes even when the polling loop throws or the run errors out.

High prompt token counts

Azure AI agents accumulate all thread messages in context. If prompt_tokens grows with each run on the same thread, create a new thread per conversation. Use the Nexus trace timeline to correlate token spikes with specific runs.

CodeInterpreter produces unexpected output

Add a tool:code_interpreter span and capture the step details from step.step_details.tool_calls[0].code_interpreter. The input field shows the exact code the model wrote; the outputs array shows what it returned.

Ready to instrument your Azure AI agents?

Start for free — no credit card required. See traces in under 5 minutes.

Start free →