Docs LangGraph

LangGraph Integration

Add distributed tracing to LangGraph StateGraph nodes, routing decisions, and state transitions. Debug infinite loops, wrong routing, and state corruption in production.

Installation

pip install keylightdigital-nexus langgraph openai

Get your API key from Dashboard → API Keys.

Wrapping nodes with spans

Every LangGraph node is a Python function that takes state and returns updated state. Wrap each one in a Nexus span to capture input shape, output, and routing decisions:

import os
from typing import TypedDict
from nexus_sdk import NexusClient

nexus = NexusClient(api_key=os.environ["NEXUS_API_KEY"])

class AgentState(TypedDict):
    messages: list[dict]
    next_action: str
    tool_result: str | None

def node_span(trace_id: str, node_name: str):
    """Decorator that wraps a LangGraph node function in a Nexus span."""
    def decorator(fn):
        def wrapper(state: AgentState) -> AgentState:
            span = nexus.start_span(trace_id, {
                "name": f"node:{node_name}",
                "type": "tool",
                "metadata": {
                    "node": node_name,
                    "msg_count_in": len(state["messages"]),
                    "next_action_in": state.get("next_action", ""),
                },
            })
            try:
                result = fn(state)
                nexus.end_span(span["id"], {
                    "output": str(result.get("next_action", "")),
                    "metadata": {
                        "node": node_name,
                        "next_action_out": result.get("next_action", ""),
                        "msg_count_out": len(result.get("messages", state["messages"])),
                    },
                })
                return result
            except Exception as e:
                nexus.end_span(span["id"], {"error": str(e)})
                raise
        return wrapper
    return decorator

Tracing routing decisions

Router nodes are the most important to instrument — they determine which path the graph takes. Log the input and output action as metadata:

from openai import OpenAI

openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

def make_router_node(trace_id: str):
    @node_span(trace_id, "router")
    def router_node(state: AgentState) -> AgentState:
        response = openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "Reply with exactly one word: 'search', 'answer', or 'clarify'."},
                *state["messages"],
            ],
        )
        action = response.choices[0].message.content.strip().lower()
        # Normalize unexpected outputs to 'answer' to prevent routing loops
        if action not in ("search", "answer", "clarify"):
            action = "answer"
        return {**state, "next_action": action}
    return router_node

def route_decision(state: AgentState) -> str:
    """Called by add_conditional_edges — returns the next node name."""
    return state.get("next_action", "answer")

Recording state metadata

Beyond node-level spans, record trace-level metadata about the run when you start the trace:

def run_graph(query: str, user_id: str) -> str:
    trace = nexus.start_trace({
        "agent_id": "langgraph-agent",
        "name": f"query:{query[:60]}",
        "status": "running",
        "started_at": nexus.now(),
        "metadata": {
            "user_id": user_id,
            "environment": os.environ.get("APP_ENV", "dev"),
            "query_length": len(query),
            "model": "gpt-4o-mini",
        },
    })
    trace_id = trace["trace_id"]
    try:
        # ... build and run graph using trace_id ...
        result = "final answer here"
        nexus.end_trace(trace_id, {"status": "success"})
        return result
    except Exception as e:
        nexus.end_trace(trace_id, {"status": "error", "metadata": {"error": str(e)}})
        raise

Full example: router + tool + answer

from langgraph.graph import StateGraph, END

def run_agent(query: str, user_id: str) -> str:
    trace = nexus.start_trace({
        "agent_id": "langgraph-agent",
        "name": f"query:{query[:60]}",
        "status": "running",
        "started_at": nexus.now(),
        "metadata": {"user_id": user_id, "environment": "production"},
    })
    trace_id = trace["trace_id"]

    try:
        @node_span(trace_id, "router")
        def router_node(state: AgentState) -> AgentState:
            resp = openai_client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": "Reply: 'search' or 'answer'"},
                    *state["messages"],
                ],
            )
            action = resp.choices[0].message.content.strip().lower()
            return {**state, "next_action": action if action in ("search", "answer") else "answer"}

        @node_span(trace_id, "tool_search")
        def search_node(state: AgentState) -> AgentState:
            result = f"Search result for: {state['messages'][-1]['content']}"
            return {
                **state,
                "tool_result": result,
                "messages": [*state["messages"], {"role": "tool", "content": result}],
            }

        @node_span(trace_id, "answer")
        def answer_node(state: AgentState) -> AgentState:
            resp = openai_client.chat.completions.create(
                model="gpt-4o", messages=state["messages"]
            )
            answer = resp.choices[0].message.content
            return {**state, "messages": [*state["messages"], {"role": "assistant", "content": answer}]}

        graph = StateGraph(AgentState)
        graph.add_node("router", router_node)
        graph.add_node("search", search_node)
        graph.add_node("answer", answer_node)
        graph.set_entry_point("router")
        graph.add_conditional_edges("router", lambda s: s.get("next_action", "answer"), {
            "search": "search", "answer": "answer"
        })
        graph.add_edge("search", "answer")
        graph.add_edge("answer", END)

        app = graph.compile()
        final = app.invoke({"messages": [{"role": "user", "content": query}], "next_action": "", "tool_result": None})
        nexus.end_trace(trace_id, {"status": "success"})
        return final["messages"][-1]["content"]

    except Exception as e:
        nexus.end_trace(trace_id, {"status": "error", "metadata": {"error": str(e)}})
        raise

Debugging patterns

Infinite loop

If the same node appears 10+ times in the span waterfall, a conditional edge is stuck. Check next_action_out metadata on the router spans — it will show the same value repeating.

Wrong routing

Compare next_action_out values across successful vs. failing traces. A model returning an unexpected value (e.g. "Search" with capital S) will break string equality routing — normalize in the router node.

State corruption

Log msg_count_in and msg_count_out on each node. If a node shows fewer messages out than in, it is overwriting instead of appending — a common TypedDict merge mistake in Python.

Ready to instrument your LangGraph agents?

Start for free — no credit card required. See traces in under 5 minutes.

Start free →