LangGraph Integration
Add distributed tracing to LangGraph StateGraph nodes, routing decisions, and state transitions. Debug infinite loops, wrong routing, and state corruption in production.
Installation
pip install keylightdigital-nexus langgraph openai
Get your API key from Dashboard → API Keys.
Wrapping nodes with spans
Every LangGraph node is a Python function that takes state and returns updated state. Wrap each one in a Nexus span to capture input shape, output, and routing decisions:
import os
from typing import TypedDict
from nexus_sdk import NexusClient
nexus = NexusClient(api_key=os.environ["NEXUS_API_KEY"])
class AgentState(TypedDict):
messages: list[dict]
next_action: str
tool_result: str | None
def node_span(trace_id: str, node_name: str):
"""Decorator that wraps a LangGraph node function in a Nexus span."""
def decorator(fn):
def wrapper(state: AgentState) -> AgentState:
span = nexus.start_span(trace_id, {
"name": f"node:{node_name}",
"type": "tool",
"metadata": {
"node": node_name,
"msg_count_in": len(state["messages"]),
"next_action_in": state.get("next_action", ""),
},
})
try:
result = fn(state)
nexus.end_span(span["id"], {
"output": str(result.get("next_action", "")),
"metadata": {
"node": node_name,
"next_action_out": result.get("next_action", ""),
"msg_count_out": len(result.get("messages", state["messages"])),
},
})
return result
except Exception as e:
nexus.end_span(span["id"], {"error": str(e)})
raise
return wrapper
return decorator
Tracing routing decisions
Router nodes are the most important to instrument — they determine which path the graph takes. Log the input and output action as metadata:
from openai import OpenAI
openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
def make_router_node(trace_id: str):
@node_span(trace_id, "router")
def router_node(state: AgentState) -> AgentState:
response = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "Reply with exactly one word: 'search', 'answer', or 'clarify'."},
*state["messages"],
],
)
action = response.choices[0].message.content.strip().lower()
# Normalize unexpected outputs to 'answer' to prevent routing loops
if action not in ("search", "answer", "clarify"):
action = "answer"
return {**state, "next_action": action}
return router_node
def route_decision(state: AgentState) -> str:
"""Called by add_conditional_edges — returns the next node name."""
return state.get("next_action", "answer")
Recording state metadata
Beyond node-level spans, record trace-level metadata about the run when you start the trace:
def run_graph(query: str, user_id: str) -> str:
trace = nexus.start_trace({
"agent_id": "langgraph-agent",
"name": f"query:{query[:60]}",
"status": "running",
"started_at": nexus.now(),
"metadata": {
"user_id": user_id,
"environment": os.environ.get("APP_ENV", "dev"),
"query_length": len(query),
"model": "gpt-4o-mini",
},
})
trace_id = trace["trace_id"]
try:
# ... build and run graph using trace_id ...
result = "final answer here"
nexus.end_trace(trace_id, {"status": "success"})
return result
except Exception as e:
nexus.end_trace(trace_id, {"status": "error", "metadata": {"error": str(e)}})
raise
Full example: router + tool + answer
from langgraph.graph import StateGraph, END
def run_agent(query: str, user_id: str) -> str:
trace = nexus.start_trace({
"agent_id": "langgraph-agent",
"name": f"query:{query[:60]}",
"status": "running",
"started_at": nexus.now(),
"metadata": {"user_id": user_id, "environment": "production"},
})
trace_id = trace["trace_id"]
try:
@node_span(trace_id, "router")
def router_node(state: AgentState) -> AgentState:
resp = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "Reply: 'search' or 'answer'"},
*state["messages"],
],
)
action = resp.choices[0].message.content.strip().lower()
return {**state, "next_action": action if action in ("search", "answer") else "answer"}
@node_span(trace_id, "tool_search")
def search_node(state: AgentState) -> AgentState:
result = f"Search result for: {state['messages'][-1]['content']}"
return {
**state,
"tool_result": result,
"messages": [*state["messages"], {"role": "tool", "content": result}],
}
@node_span(trace_id, "answer")
def answer_node(state: AgentState) -> AgentState:
resp = openai_client.chat.completions.create(
model="gpt-4o", messages=state["messages"]
)
answer = resp.choices[0].message.content
return {**state, "messages": [*state["messages"], {"role": "assistant", "content": answer}]}
graph = StateGraph(AgentState)
graph.add_node("router", router_node)
graph.add_node("search", search_node)
graph.add_node("answer", answer_node)
graph.set_entry_point("router")
graph.add_conditional_edges("router", lambda s: s.get("next_action", "answer"), {
"search": "search", "answer": "answer"
})
graph.add_edge("search", "answer")
graph.add_edge("answer", END)
app = graph.compile()
final = app.invoke({"messages": [{"role": "user", "content": query}], "next_action": "", "tool_result": None})
nexus.end_trace(trace_id, {"status": "success"})
return final["messages"][-1]["content"]
except Exception as e:
nexus.end_trace(trace_id, {"status": "error", "metadata": {"error": str(e)}})
raise
Debugging patterns
Infinite loop
If the same node appears 10+ times in the span waterfall, a conditional edge is stuck. Check next_action_out metadata on the router spans — it will show the same value repeating.
Wrong routing
Compare next_action_out values across successful vs. failing traces. A model returning an unexpected value (e.g. "Search" with capital S) will break string equality routing — normalize in the router node.
State corruption
Log msg_count_in and msg_count_out on each node. If a node shows fewer messages out than in, it is overwriting instead of appending — a common TypedDict merge mistake in Python.
Ready to instrument your LangGraph agents?
Start for free — no credit card required. See traces in under 5 minutes.
Start free →