Multi-Tenant AI Agent Cost Attribution with Nexus
When multiple users share the same AI agent backend, token costs accumulate invisibly — you know your total OpenAI bill, but not which users or features are driving it. Here's how to use Nexus span metadata to tag every LLM call with user_id, tenant_id, and feature, then aggregate token spend per tag to build a cost-per-user report and enforce per-tenant budgets.
Why Cost Attribution Matters
In a single-user application, your OpenAI bill tells you enough. In a multi-tenant application — a SaaS product where multiple users or companies share your AI backend — the total bill is almost meaningless without knowing the breakdown:
- Which users are high-spend? One power user running 500 traces/day may be responsible for 80% of your token costs — and they may need a different pricing tier
- Which features drive cost? Your summarization feature might cost 10× what your search feature costs; knowing this shapes product decisions
- Which tenants are over budget? If you sell usage-capped plans, you need to know when a tenant approaches their limit before they hit it
Nexus solves this with span metadata: tag every trace and span with user_id, tenant_id, and feature, then query the Nexus REST API to aggregate token spend per tag. No additional infrastructure required.
Step 1: Tag Every Trace and Span with Attribution Metadata
The core pattern is straightforward: pass metadata to both start_trace() and the span's end() output. Putting metadata in both places lets you aggregate at the trace level (total cost per user session) and at the span level (cost per individual LLM call).
Python example:
import os
from openai import OpenAI
from nexus_client import NexusClient
nexus = NexusClient(api_key=os.environ["NEXUS_API_KEY"], agent_id="my-agent")
openai = OpenAI()
def run_agent(user_id: str, tenant_id: str, feature: str, prompt: str) -> str:
# Tag the trace with cost attribution metadata
trace = nexus.start_trace(
name=f"agent: {prompt[:60]}",
metadata={
"user_id": user_id,
"tenant_id": tenant_id,
"feature": feature,
},
)
span = trace.add_span(
name="llm-call",
input={"prompt": prompt},
)
response = openai.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
)
span.end(
status="ok",
output={
"input_tokens": response.usage.prompt_tokens,
"output_tokens": response.usage.completion_tokens,
# Repeat tags in span metadata for span-level aggregation
"user_id": user_id,
"tenant_id": tenant_id,
"feature": feature,
},
)
trace.end(status="success")
return response.choices[0].message.content
TypeScript equivalent:
import { NexusClient } from '@keylightdigital/nexus'
import OpenAI from 'openai'
const nexus = new NexusClient({ apiKey: process.env.NEXUS_API_KEY!, agentId: 'my-agent' })
const openai = new OpenAI()
async function runAgent(params: {
userId: string
tenantId: string
feature: string
prompt: string
}): Promise<string> {
const trace = await nexus.startTrace({
name: `agent: ${params.prompt.slice(0, 60)}`,
metadata: {
user_id: params.userId,
tenant_id: params.tenantId,
feature: params.feature,
},
})
const span = await trace.addSpan({
name: 'llm-call',
input: { prompt: params.prompt },
})
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: params.prompt }],
})
await span.end({
status: 'ok',
output: {
input_tokens: response.usage?.prompt_tokens ?? 0,
output_tokens: response.usage?.completion_tokens ?? 0,
user_id: params.userId,
tenant_id: params.tenantId,
feature: params.feature,
},
})
await trace.end({ status: 'success' })
return response.choices[0].message.content ?? ''
}
Step 2: Build a Cost-Per-User Report from the Nexus API
Once traces are tagged, query the Nexus REST API to pull recent traces and aggregate token spend by user. The /v1/traces endpoint returns traces with their spans and metadata — iterate over them and sum token counts per user_id:
import os
import requests
from collections import defaultdict
NEXUS_API_KEY = os.environ["NEXUS_API_KEY"]
BASE_URL = "https://nexus.keylightdigital.dev/v1"
HEADERS = {"Authorization": f"Bearer {NEXUS_API_KEY}"}
# GPT-4o-mini pricing (update for your model)
INPUT_COST_PER_1K = 0.00015 # $0.15 / 1M tokens
OUTPUT_COST_PER_1K = 0.00060 # $0.60 / 1M tokens
def fetch_traces(limit=1000):
resp = requests.get(f"{BASE_URL}/traces", headers=HEADERS, params={"limit": limit})
resp.raise_for_status()
return resp.json().get("traces", [])
def cost_report_by_user():
traces = fetch_traces()
spend = defaultdict(lambda: {"input_tokens": 0, "output_tokens": 0, "traces": 0})
for trace in traces:
user_id = trace.get("metadata", {}).get("user_id", "unknown")
spans = trace.get("spans", [])
for span in spans:
out = span.get("output", {}) or {}
spend[user_id]["input_tokens"] += out.get("input_tokens", 0)
spend[user_id]["output_tokens"] += out.get("output_tokens", 0)
spend[user_id]["traces"] += 1
print(f"{'User':<20} {'Traces':>8} {'In Tokens':>12} {'Out Tokens':>12} {'Cost ($)':>10}")
print("-" * 66)
for user_id, data in sorted(spend.items(), key=lambda x: -x[1]["input_tokens"]):
cost = (
data["input_tokens"] / 1000 * INPUT_COST_PER_1K +
data["output_tokens"] / 1000 * OUTPUT_COST_PER_1K
)
print(f"{user_id:<20} {data['traces']:>8} {data['input_tokens']:>12,} {data['output_tokens']:>12,} {cost:>10.4f}")
cost_report_by_user()
Extend this to break down cost by feature:
def cost_report_by_feature():
traces = fetch_traces()
spend = defaultdict(lambda: {"input_tokens": 0, "output_tokens": 0})
for trace in traces:
feature = trace.get("metadata", {}).get("feature", "unknown")
for span in trace.get("spans", []):
out = span.get("output", {}) or {}
spend[feature]["input_tokens"] += out.get("input_tokens", 0)
spend[feature]["output_tokens"] += out.get("output_tokens", 0)
print("\nCost by feature:")
for feature, data in sorted(spend.items(), key=lambda x: -x[1]["input_tokens"]):
cost = (
data["input_tokens"] / 1000 * INPUT_COST_PER_1K +
data["output_tokens"] / 1000 * OUTPUT_COST_PER_1K
)
print(f" {feature}: ${cost:.4f} ({data['input_tokens']:,} in, {data['output_tokens']:,} out)")
Step 3: Per-Tenant Budget Enforcement
For hard budget limits, the cleanest approach is using separate Nexus API keys per tenant — the Nexus plan limits then enforce span counts automatically. For soft limits (warn before enforcing, or custom token-based budgets), query recent spend before each agent call and reject requests that would exceed the budget:
# Per-tenant budget enforcement using Nexus span counts
# Nexus Pro plan tracks spans per API key — use separate keys per tenant for hard limits.
# For soft limits (warn before enforcing), check recent token spend before each call:
TENANT_TOKEN_BUDGET = 100_000 # input tokens per day
def get_tenant_spend_today(tenant_id: str) -> int:
"""Sum input tokens for a tenant across today's traces."""
today = datetime.date.today().isoformat()
traces = fetch_traces(limit=500)
total = 0
for trace in traces:
if trace.get("metadata", {}).get("tenant_id") != tenant_id:
continue
if not trace.get("created_at", "").startswith(today):
continue
for span in trace.get("spans", []):
out = span.get("output", {}) or {}
total += out.get("input_tokens", 0)
return total
def run_agent_with_budget(tenant_id: str, user_id: str, prompt: str) -> str:
spend = get_tenant_spend_today(tenant_id)
if spend >= TENANT_TOKEN_BUDGET:
raise ValueError(f"Tenant {tenant_id} has exceeded their daily token budget")
# Proceed normally — tag the trace for attribution
return run_agent(user_id=user_id, tenant_id=tenant_id, feature="default", prompt=prompt)
This is a synchronous check — add caching (Redis, KV store) to avoid querying the Nexus API on every request in high-throughput applications.
What to Tag and When
A few guidelines for choosing what to put in metadata:
- Always tag at the trace level with
user_idandtenant_id— these are your primary attribution dimensions - Tag with
featurewhen your application has distinct AI-powered features with different expected costs (summarization vs. search vs. code generation) - Repeat key tags in span output — this lets you filter spans by user or feature in the Nexus dashboard without joining on the parent trace
- Avoid PII in metadata — use opaque IDs (
user_123, notjane@example.com) since metadata is stored in the Nexus backend
Get Started
Create a free Nexus account at nexus.keylightdigital.dev/pricing — the free tier includes 10,000 spans/month. Install the Python or TypeScript SDK, add three lines of metadata to your existing agent calls, and you'll have per-user cost data flowing within minutes.
Ready to attribute AI costs to users and features?
Start free — no credit card required. Up to 10,000 spans/month on the free tier.
Start monitoring for free →