contextAnalysis.ts
utils/contextAnalysis.ts
273
Lines
7733
Bytes
2
Exports
6
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with repo-context. It contains 273 lines, 6 detected imports, and 2 detected exports.
Important relationships
Detected exports
analyzeContexttokenStatsToStatsigMetrics
Keywords
statstokensblockcasemetricscounttotaltoolforeachpath
Detected imports
@anthropic-ai/sdk/resources/beta/messages/messages.mjs@anthropic-ai/sdk/resources/index.mjs../services/tokenEstimation.js../types/message.js./messages.js./slowOperations.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
import type { BetaContentBlock } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import type {
ContentBlock,
ContentBlockParam,
} from '@anthropic-ai/sdk/resources/index.mjs'
import { roughTokenCountEstimation as countTokens } from '../services/tokenEstimation.js'
import type {
AssistantMessage,
Message,
UserMessage,
} from '../types/message.js'
import { normalizeMessagesForAPI } from './messages.js'
import { jsonStringify } from './slowOperations.js'
type TokenStats = {
toolRequests: Map<string, number>
toolResults: Map<string, number>
humanMessages: number
assistantMessages: number
localCommandOutputs: number
other: number
attachments: Map<string, number>
duplicateFileReads: Map<string, { count: number; tokens: number }>
total: number
}
export function analyzeContext(messages: Message[]): TokenStats {
const stats: TokenStats = {
toolRequests: new Map(),
toolResults: new Map(),
humanMessages: 0,
assistantMessages: 0,
localCommandOutputs: 0,
other: 0,
attachments: new Map(),
duplicateFileReads: new Map(),
total: 0,
}
const toolIdsToToolNames = new Map<string, string>()
const readToolIdToFilePath = new Map<string, string>()
const fileReadStats = new Map<
string,
{ count: number; totalTokens: number }
>()
messages.forEach(msg => {
if (msg.type === 'attachment') {
const type = msg.attachment.type || 'unknown'
stats.attachments.set(type, (stats.attachments.get(type) || 0) + 1)
}
})
const normalizedMessages = normalizeMessagesForAPI(messages)
normalizedMessages.forEach(msg => {
const { content } = msg.message
// Not sure if this path is still used, but adding as a fallback
if (typeof content === 'string') {
const tokens = countTokens(content)
stats.total += tokens
// Check if this is a local command output
if (msg.type === 'user' && content.includes('local-command-stdout')) {
stats.localCommandOutputs += tokens
} else {
stats[msg.type === 'user' ? 'humanMessages' : 'assistantMessages'] +=
tokens
}
} else {
content.forEach(block =>
processBlock(
block,
msg,
stats,
toolIdsToToolNames,
readToolIdToFilePath,
fileReadStats,
),
)
}
})
// Calculate duplicate file reads
fileReadStats.forEach((data, path) => {
if (data.count > 1) {
const averageTokensPerRead = Math.floor(data.totalTokens / data.count)
const duplicateTokens = averageTokensPerRead * (data.count - 1)
stats.duplicateFileReads.set(path, {
count: data.count,
tokens: duplicateTokens,
})
}
})
return stats
}
function processBlock(
block: ContentBlockParam | ContentBlock | BetaContentBlock,
message: UserMessage | AssistantMessage,
stats: TokenStats,
toolIds: Map<string, string>,
readToolPaths: Map<string, string>,
fileReads: Map<string, { count: number; totalTokens: number }>,
): void {
const tokens = countTokens(jsonStringify(block))
stats.total += tokens
switch (block.type) {
case 'text':
// Check if this is a local command output
if (
message.type === 'user' &&
'text' in block &&
block.text.includes('local-command-stdout')
) {
stats.localCommandOutputs += tokens
} else {
stats[
message.type === 'user' ? 'humanMessages' : 'assistantMessages'
] += tokens
}
break
case 'tool_use': {
if ('name' in block && 'id' in block) {
const toolName = block.name || 'unknown'
increment(stats.toolRequests, toolName, tokens)
toolIds.set(block.id, toolName)
// Track Read tool file paths
if (
toolName === 'Read' &&
'input' in block &&
block.input &&
typeof block.input === 'object' &&
'file_path' in block.input
) {
const path = String(
(block.input as Record<string, unknown>).file_path,
)
readToolPaths.set(block.id, path)
}
}
break
}
case 'tool_result': {
if ('tool_use_id' in block) {
const toolName = toolIds.get(block.tool_use_id) || 'unknown'
increment(stats.toolResults, toolName, tokens)
// Track file read tokens
if (toolName === 'Read') {
const path = readToolPaths.get(block.tool_use_id)
if (path) {
const current = fileReads.get(path) || { count: 0, totalTokens: 0 }
fileReads.set(path, {
count: current.count + 1,
totalTokens: current.totalTokens + tokens,
})
}
}
}
break
}
case 'image':
case 'server_tool_use':
case 'web_search_tool_result':
case 'search_result':
case 'document':
case 'thinking':
case 'redacted_thinking':
case 'code_execution_tool_result':
case 'mcp_tool_use':
case 'mcp_tool_result':
case 'container_upload':
case 'web_fetch_tool_result':
case 'bash_code_execution_tool_result':
case 'text_editor_code_execution_tool_result':
case 'tool_search_tool_result':
case 'compaction':
// Don't care about these for now..
stats['other'] += tokens
break
}
}
function increment(map: Map<string, number>, key: string, value: number): void {
map.set(key, (map.get(key) || 0) + value)
}
export function tokenStatsToStatsigMetrics(
stats: TokenStats,
): Record<string, number> {
const metrics: Record<string, number> = {
total_tokens: stats.total,
human_message_tokens: stats.humanMessages,
assistant_message_tokens: stats.assistantMessages,
local_command_output_tokens: stats.localCommandOutputs,
other_tokens: stats.other,
}
stats.attachments.forEach((count, type) => {
metrics[`attachment_${type}_count`] = count
})
stats.toolRequests.forEach((tokens, tool) => {
metrics[`tool_request_${tool}_tokens`] = tokens
})
stats.toolResults.forEach((tokens, tool) => {
metrics[`tool_result_${tool}_tokens`] = tokens
})
const duplicateTotal = [...stats.duplicateFileReads.values()].reduce(
(sum, d) => sum + d.tokens,
0,
)
metrics.duplicate_read_tokens = duplicateTotal
metrics.duplicate_read_file_count = stats.duplicateFileReads.size
if (stats.total > 0) {
metrics.human_message_percent = Math.round(
(stats.humanMessages / stats.total) * 100,
)
metrics.assistant_message_percent = Math.round(
(stats.assistantMessages / stats.total) * 100,
)
metrics.local_command_output_percent = Math.round(
(stats.localCommandOutputs / stats.total) * 100,
)
metrics.duplicate_read_percent = Math.round(
(duplicateTotal / stats.total) * 100,
)
const toolRequestTotal = [...stats.toolRequests.values()].reduce(
(sum, v) => sum + v,
0,
)
const toolResultTotal = [...stats.toolResults.values()].reduce(
(sum, v) => sum + v,
0,
)
metrics.tool_request_percent = Math.round(
(toolRequestTotal / stats.total) * 100,
)
metrics.tool_result_percent = Math.round(
(toolResultTotal / stats.total) * 100,
)
// Add individual tool request percentages
stats.toolRequests.forEach((tokens, tool) => {
metrics[`tool_request_${tool}_percent`] = Math.round(
(tokens / stats.total) * 100,
)
})
// Add individual tool result percentages
stats.toolResults.forEach((tokens, tool) => {
metrics[`tool_result_${tool}_percent`] = Math.round(
(tokens / stats.total) * 100,
)
})
}
return metrics
}