perfettoTracing.ts
utils/telemetry/perfettoTracing.ts
No strong subsystem tag
1121
Lines
29792
Bytes
22
Exports
12
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with general runtime concerns. It contains 1121 lines, 12 detected imports, and 22 detected exports.
Important relationships
Detected exports
TraceEventPhaseTraceEventinitializePerfettoTracingisPerfettoTracingEnabledregisterAgentunregisterAgentstartLLMRequestPerfettoSpanendLLMRequestPerfettoSpanstartToolPerfettoSpanendToolPerfettoSpanstartUserInputPerfettoSpanendUserInputPerfettoSpanemitPerfettoInstantemitPerfettoCounterstartInteractionPerfettoSpanendInteractionPerfettoSpangetPerfettoEventsresetPerfettoTracertriggerPeriodicWriteForTestingevictStaleSpansForTestingMAX_EVENTS_FOR_TESTINGevictOldestEventsForTesting
Keywords
agentinfopendingargsnameeventsspanidmetadataperfettoprocessidisenabled
Detected imports
bun:bundlefsfs/promisespath../../bootstrap/state.js../cleanupRegistry.js../debug.js../envUtils.js../errors.js../hash.js../slowOperations.js../teammate.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
/**
* Perfetto Tracing for Claude Code (Ant-only)
*
* This module generates traces in the Chrome Trace Event format that can be
* viewed in ui.perfetto.dev or Chrome's chrome://tracing.
*
* NOTE: This feature is ant-only and eliminated from external builds.
*
* The trace file includes:
* - Agent hierarchy (parent-child relationships in a swarm)
* - API requests with TTFT, TTLT, prompt length, cache stats, msg ID, speculative flag
* - Tool executions with name, duration, and token usage
* - User input waiting time
*
* Usage:
* 1. Enable via CLAUDE_CODE_PERFETTO_TRACE=1 or CLAUDE_CODE_PERFETTO_TRACE=<path>
* 2. Optionally set CLAUDE_CODE_PERFETTO_WRITE_INTERVAL_S=<positive integer> to write the
* trace file periodically (default: write only on exit).
* 3. Run Claude Code normally
* 4. Trace file is written to ~/.claude/traces/trace-<session-id>.json
* or to the specified path
* 5. Open in ui.perfetto.dev to visualize
*/
import { feature } from 'bun:bundle'
import { mkdirSync, writeFileSync } from 'fs'
import { mkdir, writeFile } from 'fs/promises'
import { dirname, join } from 'path'
import { getSessionId } from '../../bootstrap/state.js'
import { registerCleanup } from '../cleanupRegistry.js'
import { logForDebugging } from '../debug.js'
import {
getClaudeConfigHomeDir,
isEnvDefinedFalsy,
isEnvTruthy,
} from '../envUtils.js'
import { errorMessage } from '../errors.js'
import { djb2Hash } from '../hash.js'
import { jsonStringify } from '../slowOperations.js'
import { getAgentId, getAgentName, getParentSessionId } from '../teammate.js'
/**
* Chrome Trace Event format types
* See: https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU
*/
export type TraceEventPhase =
| 'B' // Begin duration event
| 'E' // End duration event
| 'X' // Complete event (with duration)
| 'i' // Instant event
| 'C' // Counter event
| 'b' // Async begin
| 'n' // Async instant
| 'e' // Async end
| 'M' // Metadata event
export type TraceEvent = {
name: string
cat: string
ph: TraceEventPhase
ts: number // Timestamp in microseconds
pid: number // Process ID (we use 1 for main, agent IDs for subagents)
tid: number // Thread ID (we use numeric hash of agent name or 1 for main)
dur?: number // Duration in microseconds (for 'X' events)
args?: Record<string, unknown>
id?: string // For async events
scope?: string
}
/**
* Agent info for tracking hierarchy
*/
type AgentInfo = {
agentId: string
agentName: string
parentAgentId?: string
processId: number
threadId: number
}
/**
* Pending span for tracking begin/end pairs
*/
type PendingSpan = {
name: string
category: string
startTime: number
agentInfo: AgentInfo
args: Record<string, unknown>
}
// Global state for the Perfetto tracer
let isEnabled = false
let tracePath: string | null = null
// Metadata events (ph: 'M' — process/thread names, parent links) are kept
// separate so they survive eviction — Perfetto UI needs them to label
// tracks. Bounded by agent count (~3 events per agent).
const metadataEvents: TraceEvent[] = []
const events: TraceEvent[] = []
// events[] cap. Cron-driven sessions run for days; 22 push sites × many
// turns would otherwise grow unboundedly (periodicWrite flushes to disk but
// does not truncate — it writes the full snapshot). At ~300B/event this is
// ~30MB, enough trace history for any debugging session. Eviction drops the
// oldest half when hit, amortized O(1).
const MAX_EVENTS = 100_000
const pendingSpans = new Map<string, PendingSpan>()
const agentRegistry = new Map<string, AgentInfo>()
let totalAgentCount = 0
let startTimeMs = 0
let spanIdCounter = 0
let traceWritten = false // Flag to avoid double writes
// Map agent IDs to numeric process IDs (Perfetto requires numeric IDs)
let processIdCounter = 1
const agentIdToProcessId = new Map<string, number>()
// Periodic write interval handle
let writeIntervalId: ReturnType<typeof setInterval> | null = null
const STALE_SPAN_TTL_MS = 30 * 60 * 1000 // 30 minutes
const STALE_SPAN_CLEANUP_INTERVAL_MS = 60 * 1000 // 1 minute
let staleSpanCleanupId: ReturnType<typeof setInterval> | null = null
/**
* Convert a string to a numeric hash for use as thread ID
*/
function stringToNumericHash(str: string): number {
return Math.abs(djb2Hash(str)) || 1 // Ensure non-zero
}
/**
* Get or create a numeric process ID for an agent
*/
function getProcessIdForAgent(agentId: string): number {
const existing = agentIdToProcessId.get(agentId)
if (existing !== undefined) return existing
processIdCounter++
agentIdToProcessId.set(agentId, processIdCounter)
return processIdCounter
}
/**
* Get current agent info
*/
function getCurrentAgentInfo(): AgentInfo {
const agentId = getAgentId() ?? getSessionId()
const agentName = getAgentName() ?? 'main'
const parentSessionId = getParentSessionId()
// Check if we've already registered this agent
const existing = agentRegistry.get(agentId)
if (existing) return existing
const info: AgentInfo = {
agentId,
agentName,
parentAgentId: parentSessionId,
processId: agentId === getSessionId() ? 1 : getProcessIdForAgent(agentId),
threadId: stringToNumericHash(agentName),
}
agentRegistry.set(agentId, info)
totalAgentCount++
return info
}
/**
* Get timestamp in microseconds relative to trace start
*/
function getTimestamp(): number {
return (Date.now() - startTimeMs) * 1000
}
/**
* Generate a unique span ID
*/
function generateSpanId(): string {
return `span_${++spanIdCounter}`
}
/**
* Evict pending spans older than STALE_SPAN_TTL_MS.
* Mirrors the TTL cleanup pattern in sessionTracing.ts.
*/
function evictStaleSpans(): void {
const now = getTimestamp()
const ttlUs = STALE_SPAN_TTL_MS * 1000 // Convert ms to microseconds
for (const [spanId, span] of pendingSpans) {
if (now - span.startTime > ttlUs) {
// Emit an end event so the span shows up in the trace as incomplete
events.push({
name: span.name,
cat: span.category,
ph: 'E',
ts: now,
pid: span.agentInfo.processId,
tid: span.agentInfo.threadId,
args: {
...span.args,
evicted: true,
duration_ms: (now - span.startTime) / 1000,
},
})
pendingSpans.delete(spanId)
}
}
}
/**
* Build the full trace document (Chrome Trace JSON format).
*/
function buildTraceDocument(): string {
return jsonStringify({
traceEvents: [...metadataEvents, ...events],
metadata: {
session_id: getSessionId(),
trace_start_time: new Date(startTimeMs).toISOString(),
agent_count: totalAgentCount,
total_event_count: metadataEvents.length + events.length,
},
})
}
/**
* Drop the oldest half of events[] when over MAX_EVENTS. Called from the
* stale-span cleanup interval (60s). The half-batch splice keeps this
* amortized O(1) — we don't pay splice cost per-push. A synthetic marker
* is inserted so the gap is visible in ui.perfetto.dev.
*/
function evictOldestEvents(): void {
if (events.length < MAX_EVENTS) return
const dropped = events.splice(0, MAX_EVENTS / 2)
events.unshift({
name: 'trace_truncated',
cat: '__metadata',
ph: 'i',
ts: dropped[dropped.length - 1]?.ts ?? 0,
pid: 1,
tid: 0,
args: { dropped_events: dropped.length },
})
logForDebugging(
`[Perfetto] Evicted ${dropped.length} oldest events (cap ${MAX_EVENTS})`,
)
}
/**
* Initialize Perfetto tracing
* Call this early in the application lifecycle
*/
export function initializePerfettoTracing(): void {
const envValue = process.env.CLAUDE_CODE_PERFETTO_TRACE
logForDebugging(
`[Perfetto] initializePerfettoTracing called, env value: ${envValue}`,
)
// Wrap in feature() for dead code elimination - entire block removed from external builds
if (feature('PERFETTO_TRACING')) {
if (!envValue || isEnvDefinedFalsy(envValue)) {
logForDebugging(
'[Perfetto] Tracing disabled (env var not set or disabled)',
)
return
}
isEnabled = true
startTimeMs = Date.now()
// Determine trace file path
if (isEnvTruthy(envValue)) {
const tracesDir = join(getClaudeConfigHomeDir(), 'traces')
tracePath = join(tracesDir, `trace-${getSessionId()}.json`)
} else {
// Use the provided path
tracePath = envValue
}
logForDebugging(
`[Perfetto] Tracing enabled, will write to: ${tracePath}, isEnabled=${isEnabled}`,
)
// Start periodic full-trace write if CLAUDE_CODE_PERFETTO_WRITE_INTERVAL_S is a positive integer
const intervalSec = parseInt(
process.env.CLAUDE_CODE_PERFETTO_WRITE_INTERVAL_S ?? '',
10,
)
if (intervalSec > 0) {
writeIntervalId = setInterval(() => {
void periodicWrite()
}, intervalSec * 1000)
// Don't let the interval keep the process alive on its own
if (writeIntervalId.unref) writeIntervalId.unref()
logForDebugging(
`[Perfetto] Periodic write enabled, interval: ${intervalSec}s`,
)
}
// Start stale span cleanup interval
staleSpanCleanupId = setInterval(() => {
evictStaleSpans()
evictOldestEvents()
}, STALE_SPAN_CLEANUP_INTERVAL_MS)
if (staleSpanCleanupId.unref) staleSpanCleanupId.unref()
// Register cleanup to write final trace on exit
registerCleanup(async () => {
logForDebugging('[Perfetto] Cleanup callback invoked')
await writePerfettoTrace()
})
// Also register a beforeExit handler as a fallback
// This ensures the trace is written even if cleanup registry is not called
process.on('beforeExit', () => {
logForDebugging('[Perfetto] beforeExit handler invoked')
void writePerfettoTrace()
})
// Register a synchronous exit handler as a last resort
// This is the final fallback to ensure trace is written before process exits
process.on('exit', () => {
if (!traceWritten) {
logForDebugging(
'[Perfetto] exit handler invoked, writing trace synchronously',
)
writePerfettoTraceSync()
}
})
// Emit process metadata events for main process
const mainAgent = getCurrentAgentInfo()
emitProcessMetadata(mainAgent)
}
}
/**
* Emit metadata events for a process/agent
*/
function emitProcessMetadata(agentInfo: AgentInfo): void {
if (!isEnabled) return
// Process name
metadataEvents.push({
name: 'process_name',
cat: '__metadata',
ph: 'M',
ts: 0,
pid: agentInfo.processId,
tid: 0,
args: { name: agentInfo.agentName },
})
// Thread name (same as process for now)
metadataEvents.push({
name: 'thread_name',
cat: '__metadata',
ph: 'M',
ts: 0,
pid: agentInfo.processId,
tid: agentInfo.threadId,
args: { name: agentInfo.agentName },
})
// Add parent info if available
if (agentInfo.parentAgentId) {
metadataEvents.push({
name: 'parent_agent',
cat: '__metadata',
ph: 'M',
ts: 0,
pid: agentInfo.processId,
tid: 0,
args: {
parent_agent_id: agentInfo.parentAgentId,
},
})
}
}
/**
* Check if Perfetto tracing is enabled
*/
export function isPerfettoTracingEnabled(): boolean {
return isEnabled
}
/**
* Register a new agent in the trace
* Call this when a subagent/teammate is spawned
*/
export function registerAgent(
agentId: string,
agentName: string,
parentAgentId?: string,
): void {
if (!isEnabled) return
const info: AgentInfo = {
agentId,
agentName,
parentAgentId,
processId: getProcessIdForAgent(agentId),
threadId: stringToNumericHash(agentName),
}
agentRegistry.set(agentId, info)
totalAgentCount++
emitProcessMetadata(info)
}
/**
* Unregister an agent from the trace.
* Call this when an agent completes, fails, or is aborted to free memory.
*/
export function unregisterAgent(agentId: string): void {
if (!isEnabled) return
agentRegistry.delete(agentId)
agentIdToProcessId.delete(agentId)
}
/**
* Start an API call span
*/
export function startLLMRequestPerfettoSpan(args: {
model: string
promptTokens?: number
messageId?: string
isSpeculative?: boolean
querySource?: string
}): string {
if (!isEnabled) return ''
const spanId = generateSpanId()
const agentInfo = getCurrentAgentInfo()
pendingSpans.set(spanId, {
name: 'API Call',
category: 'api',
startTime: getTimestamp(),
agentInfo,
args: {
model: args.model,
prompt_tokens: args.promptTokens,
message_id: args.messageId,
is_speculative: args.isSpeculative ?? false,
query_source: args.querySource,
},
})
// Emit begin event
events.push({
name: 'API Call',
cat: 'api',
ph: 'B',
ts: pendingSpans.get(spanId)!.startTime,
pid: agentInfo.processId,
tid: agentInfo.threadId,
args: pendingSpans.get(spanId)!.args,
})
return spanId
}
/**
* End an API call span with response metadata
*/
export function endLLMRequestPerfettoSpan(
spanId: string,
metadata: {
ttftMs?: number
ttltMs?: number
promptTokens?: number
outputTokens?: number
cacheReadTokens?: number
cacheCreationTokens?: number
messageId?: string
success?: boolean
error?: string
/** Time spent in pre-request setup (client creation, retries) before the successful attempt */
requestSetupMs?: number
/** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
attemptStartTimes?: number[]
},
): void {
if (!isEnabled || !spanId) return
const pending = pendingSpans.get(spanId)
if (!pending) return
const endTime = getTimestamp()
const duration = endTime - pending.startTime
const promptTokens =
metadata.promptTokens ?? (pending.args.prompt_tokens as number | undefined)
const ttftMs = metadata.ttftMs
const ttltMs = metadata.ttltMs
const outputTokens = metadata.outputTokens
const cacheReadTokens = metadata.cacheReadTokens
// Compute derived metrics
// ITPS: input tokens per second (prompt processing speed)
const itps =
ttftMs !== undefined && promptTokens !== undefined && ttftMs > 0
? Math.round((promptTokens / (ttftMs / 1000)) * 100) / 100
: undefined
// OTPS: output tokens per second (sampling speed)
const samplingMs =
ttltMs !== undefined && ttftMs !== undefined ? ttltMs - ttftMs : undefined
const otps =
samplingMs !== undefined && outputTokens !== undefined && samplingMs > 0
? Math.round((outputTokens / (samplingMs / 1000)) * 100) / 100
: undefined
// Cache hit rate: percentage of prompt tokens from cache
const cacheHitRate =
cacheReadTokens !== undefined &&
promptTokens !== undefined &&
promptTokens > 0
? Math.round((cacheReadTokens / promptTokens) * 10000) / 100
: undefined
const requestSetupMs = metadata.requestSetupMs
const attemptStartTimes = metadata.attemptStartTimes
// Merge metadata with original args
const args = {
...pending.args,
ttft_ms: ttftMs,
ttlt_ms: ttltMs,
prompt_tokens: promptTokens,
output_tokens: outputTokens,
cache_read_tokens: cacheReadTokens,
cache_creation_tokens: metadata.cacheCreationTokens,
message_id: metadata.messageId ?? pending.args.message_id,
success: metadata.success ?? true,
error: metadata.error,
duration_ms: duration / 1000,
request_setup_ms: requestSetupMs,
// Derived metrics
itps,
otps,
cache_hit_rate_pct: cacheHitRate,
}
// Emit Request Setup sub-span when there was measurable setup time
// (client creation, param building, retries before the successful attempt)
const setupUs =
requestSetupMs !== undefined && requestSetupMs > 0
? requestSetupMs * 1000
: 0
if (setupUs > 0) {
const setupEndTs = pending.startTime + setupUs
events.push({
name: 'Request Setup',
cat: 'api,setup',
ph: 'B',
ts: pending.startTime,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args: {
request_setup_ms: requestSetupMs,
attempt_count: attemptStartTimes?.length ?? 1,
},
})
// Emit retry attempt sub-spans within Request Setup.
// Each failed attempt runs from its start to the next attempt's start.
if (attemptStartTimes && attemptStartTimes.length > 1) {
// attemptStartTimes[0] is the reference point (first attempt).
// Convert wall-clock deltas into Perfetto-relative microseconds.
const baseWallMs = attemptStartTimes[0]!
for (let i = 0; i < attemptStartTimes.length - 1; i++) {
const attemptStartUs =
pending.startTime + (attemptStartTimes[i]! - baseWallMs) * 1000
const attemptEndUs =
pending.startTime + (attemptStartTimes[i + 1]! - baseWallMs) * 1000
events.push({
name: `Attempt ${i + 1} (retry)`,
cat: 'api,retry',
ph: 'B',
ts: attemptStartUs,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args: { attempt: i + 1 },
})
events.push({
name: `Attempt ${i + 1} (retry)`,
cat: 'api,retry',
ph: 'E',
ts: attemptEndUs,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
})
}
}
events.push({
name: 'Request Setup',
cat: 'api,setup',
ph: 'E',
ts: setupEndTs,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
})
}
// Emit sub-spans for First Token and Sampling phases (before API Call end)
// Using B/E pairs in proper nesting order for correct Perfetto visualization
if (ttftMs !== undefined) {
// First Token starts after request setup (if any)
const firstTokenStartTs = pending.startTime + setupUs
const firstTokenEndTs = firstTokenStartTs + ttftMs * 1000
// First Token phase: from successful attempt start to first token
events.push({
name: 'First Token',
cat: 'api,ttft',
ph: 'B',
ts: firstTokenStartTs,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args: {
ttft_ms: ttftMs,
prompt_tokens: promptTokens,
itps,
cache_hit_rate_pct: cacheHitRate,
},
})
events.push({
name: 'First Token',
cat: 'api,ttft',
ph: 'E',
ts: firstTokenEndTs,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
})
// Sampling phase: from first token to last token
// Note: samplingMs = ttltMs - ttftMs still includes setup time in ttltMs,
// so we compute the actual sampling duration for the span as the time from
// first token to API call end (endTime), not samplingMs directly.
const actualSamplingMs =
ttltMs !== undefined ? ttltMs - ttftMs - setupUs / 1000 : undefined
if (actualSamplingMs !== undefined && actualSamplingMs > 0) {
events.push({
name: 'Sampling',
cat: 'api,sampling',
ph: 'B',
ts: firstTokenEndTs,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args: {
sampling_ms: actualSamplingMs,
output_tokens: outputTokens,
otps,
},
})
events.push({
name: 'Sampling',
cat: 'api,sampling',
ph: 'E',
ts: firstTokenEndTs + actualSamplingMs * 1000,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
})
}
}
// Emit API Call end event (after sub-spans)
events.push({
name: pending.name,
cat: pending.category,
ph: 'E',
ts: endTime,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args,
})
pendingSpans.delete(spanId)
}
/**
* Start a tool execution span
*/
export function startToolPerfettoSpan(
toolName: string,
args?: Record<string, unknown>,
): string {
if (!isEnabled) return ''
const spanId = generateSpanId()
const agentInfo = getCurrentAgentInfo()
pendingSpans.set(spanId, {
name: `Tool: ${toolName}`,
category: 'tool',
startTime: getTimestamp(),
agentInfo,
args: {
tool_name: toolName,
...args,
},
})
// Emit begin event
events.push({
name: `Tool: ${toolName}`,
cat: 'tool',
ph: 'B',
ts: pendingSpans.get(spanId)!.startTime,
pid: agentInfo.processId,
tid: agentInfo.threadId,
args: pendingSpans.get(spanId)!.args,
})
return spanId
}
/**
* End a tool execution span
*/
export function endToolPerfettoSpan(
spanId: string,
metadata?: {
success?: boolean
error?: string
resultTokens?: number
},
): void {
if (!isEnabled || !spanId) return
const pending = pendingSpans.get(spanId)
if (!pending) return
const endTime = getTimestamp()
const duration = endTime - pending.startTime
const args = {
...pending.args,
success: metadata?.success ?? true,
error: metadata?.error,
result_tokens: metadata?.resultTokens,
duration_ms: duration / 1000,
}
// Emit end event
events.push({
name: pending.name,
cat: pending.category,
ph: 'E',
ts: endTime,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args,
})
pendingSpans.delete(spanId)
}
/**
* Start a user input waiting span
*/
export function startUserInputPerfettoSpan(context?: string): string {
if (!isEnabled) return ''
const spanId = generateSpanId()
const agentInfo = getCurrentAgentInfo()
pendingSpans.set(spanId, {
name: 'Waiting for User Input',
category: 'user_input',
startTime: getTimestamp(),
agentInfo,
args: {
context,
},
})
// Emit begin event
events.push({
name: 'Waiting for User Input',
cat: 'user_input',
ph: 'B',
ts: pendingSpans.get(spanId)!.startTime,
pid: agentInfo.processId,
tid: agentInfo.threadId,
args: pendingSpans.get(spanId)!.args,
})
return spanId
}
/**
* End a user input waiting span
*/
export function endUserInputPerfettoSpan(
spanId: string,
metadata?: {
decision?: string
source?: string
},
): void {
if (!isEnabled || !spanId) return
const pending = pendingSpans.get(spanId)
if (!pending) return
const endTime = getTimestamp()
const duration = endTime - pending.startTime
const args = {
...pending.args,
decision: metadata?.decision,
source: metadata?.source,
duration_ms: duration / 1000,
}
// Emit end event
events.push({
name: pending.name,
cat: pending.category,
ph: 'E',
ts: endTime,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args,
})
pendingSpans.delete(spanId)
}
/**
* Emit an instant event (marker)
*/
export function emitPerfettoInstant(
name: string,
category: string,
args?: Record<string, unknown>,
): void {
if (!isEnabled) return
const agentInfo = getCurrentAgentInfo()
events.push({
name,
cat: category,
ph: 'i',
ts: getTimestamp(),
pid: agentInfo.processId,
tid: agentInfo.threadId,
args,
})
}
/**
* Emit a counter event for tracking metrics over time
*/
export function emitPerfettoCounter(
name: string,
values: Record<string, number>,
): void {
if (!isEnabled) return
const agentInfo = getCurrentAgentInfo()
events.push({
name,
cat: 'counter',
ph: 'C',
ts: getTimestamp(),
pid: agentInfo.processId,
tid: agentInfo.threadId,
args: values,
})
}
/**
* Start an interaction span (wraps a full user request cycle)
*/
export function startInteractionPerfettoSpan(userPrompt?: string): string {
if (!isEnabled) return ''
const spanId = generateSpanId()
const agentInfo = getCurrentAgentInfo()
pendingSpans.set(spanId, {
name: 'Interaction',
category: 'interaction',
startTime: getTimestamp(),
agentInfo,
args: {
user_prompt_length: userPrompt?.length,
},
})
// Emit begin event
events.push({
name: 'Interaction',
cat: 'interaction',
ph: 'B',
ts: pendingSpans.get(spanId)!.startTime,
pid: agentInfo.processId,
tid: agentInfo.threadId,
args: pendingSpans.get(spanId)!.args,
})
return spanId
}
/**
* End an interaction span
*/
export function endInteractionPerfettoSpan(spanId: string): void {
if (!isEnabled || !spanId) return
const pending = pendingSpans.get(spanId)
if (!pending) return
const endTime = getTimestamp()
const duration = endTime - pending.startTime
// Emit end event
events.push({
name: pending.name,
cat: pending.category,
ph: 'E',
ts: endTime,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args: {
...pending.args,
duration_ms: duration / 1000,
},
})
pendingSpans.delete(spanId)
}
// ---------------------------------------------------------------------------
// Periodic write helpers
// ---------------------------------------------------------------------------
/**
* Stop the periodic write timer.
*/
function stopWriteInterval(): void {
if (staleSpanCleanupId) {
clearInterval(staleSpanCleanupId)
staleSpanCleanupId = null
}
if (writeIntervalId) {
clearInterval(writeIntervalId)
writeIntervalId = null
}
}
/**
* Force-close any remaining open spans at session end.
*/
function closeOpenSpans(): void {
for (const [spanId, pending] of pendingSpans) {
const endTime = getTimestamp()
events.push({
name: pending.name,
cat: pending.category,
ph: 'E',
ts: endTime,
pid: pending.agentInfo.processId,
tid: pending.agentInfo.threadId,
args: {
...pending.args,
incomplete: true,
duration_ms: (endTime - pending.startTime) / 1000,
},
})
pendingSpans.delete(spanId)
}
}
/**
* Write the full trace to disk. Errors are logged but swallowed so that a
* transient I/O problem does not crash the session — the next periodic tick
* (or the final exit write) will retry with a complete snapshot.
*/
async function periodicWrite(): Promise<void> {
if (!isEnabled || !tracePath || traceWritten) return
try {
await mkdir(dirname(tracePath), { recursive: true })
await writeFile(tracePath, buildTraceDocument())
logForDebugging(
`[Perfetto] Periodic write: ${events.length} events to ${tracePath}`,
)
} catch (error) {
logForDebugging(
`[Perfetto] Periodic write failed: ${errorMessage(error)}`,
{ level: 'error' },
)
}
}
/**
* Final async write: close open spans and write the complete trace.
* Idempotent — sets `traceWritten` on success so subsequent calls are no-ops.
*/
async function writePerfettoTrace(): Promise<void> {
if (!isEnabled || !tracePath || traceWritten) {
logForDebugging(
`[Perfetto] Skipping final write: isEnabled=${isEnabled}, tracePath=${tracePath}, traceWritten=${traceWritten}`,
)
return
}
stopWriteInterval()
closeOpenSpans()
logForDebugging(
`[Perfetto] writePerfettoTrace called: events=${events.length}`,
)
try {
await mkdir(dirname(tracePath), { recursive: true })
await writeFile(tracePath, buildTraceDocument())
traceWritten = true
logForDebugging(`[Perfetto] Trace finalized at: ${tracePath}`)
} catch (error) {
logForDebugging(
`[Perfetto] Failed to write final trace: ${errorMessage(error)}`,
{ level: 'error' },
)
}
}
/**
* Final synchronous write (fallback for process 'exit' handler where async is forbidden).
*/
function writePerfettoTraceSync(): void {
if (!isEnabled || !tracePath || traceWritten) {
logForDebugging(
`[Perfetto] Skipping final sync write: isEnabled=${isEnabled}, tracePath=${tracePath}, traceWritten=${traceWritten}`,
)
return
}
stopWriteInterval()
closeOpenSpans()
logForDebugging(
`[Perfetto] writePerfettoTraceSync called: events=${events.length}`,
)
try {
const dir = dirname(tracePath)
// eslint-disable-next-line custom-rules/no-sync-fs -- Only called from process.on('exit') handler
mkdirSync(dir, { recursive: true })
// eslint-disable-next-line custom-rules/no-sync-fs, eslint-plugin-n/no-sync -- Required for process 'exit' handler which doesn't support async
writeFileSync(tracePath, buildTraceDocument())
traceWritten = true
logForDebugging(`[Perfetto] Trace finalized synchronously at: ${tracePath}`)
} catch (error) {
logForDebugging(
`[Perfetto] Failed to write final trace synchronously: ${errorMessage(error)}`,
{ level: 'error' },
)
}
}
/**
* Get all recorded events (for testing)
*/
export function getPerfettoEvents(): TraceEvent[] {
return [...metadataEvents, ...events]
}
/**
* Reset the tracer state (for testing)
*/
export function resetPerfettoTracer(): void {
if (staleSpanCleanupId) {
clearInterval(staleSpanCleanupId)
staleSpanCleanupId = null
}
stopWriteInterval()
metadataEvents.length = 0
events.length = 0
pendingSpans.clear()
agentRegistry.clear()
agentIdToProcessId.clear()
totalAgentCount = 0
processIdCounter = 1
spanIdCounter = 0
isEnabled = false
tracePath = null
startTimeMs = 0
traceWritten = false
}
/**
* Trigger a periodic write immediately (for testing)
*/
export async function triggerPeriodicWriteForTesting(): Promise<void> {
await periodicWrite()
}
/**
* Evict stale spans immediately (for testing)
*/
export function evictStaleSpansForTesting(): void {
evictStaleSpans()
}
export const MAX_EVENTS_FOR_TESTING = MAX_EVENTS
export function evictOldestEventsForTesting(): void {
evictOldestEvents()
}