promptCacheBreakDetection.ts
services/api/promptCacheBreakDetection.ts
728
Lines
26288
Bytes
8
Exports
14
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with integrations. It contains 728 lines, 14 detected imports, and 8 detected exports.
Important relationships
Detected exports
CACHE_TTL_1HOUR_MSPromptStateSnapshotrecordPromptStatecheckResponseForCacheBreaknotifyCacheDeletionnotifyCompactioncleanupAgentTrackingresetPromptCacheBreakDetection
Keywords
changesprevcacheagentidquerysourcemodelsystempartsbreakunknown
Detected imports
@anthropic-ai/sdk/resources/beta/messages/messages.mjs@anthropic-ai/sdk/resources/index.mjsdifffs/promisespathsrc/types/ids.jssrc/types/message.jssrc/utils/debug.jssrc/utils/hash.jssrc/utils/log.jssrc/utils/permissions/filesystem.jssrc/utils/slowOperations.js../../constants/querySource.js../analytics/index.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
import { createPatch } from 'diff'
import { mkdir, writeFile } from 'fs/promises'
import { join } from 'path'
import type { AgentId } from 'src/types/ids.js'
import type { Message } from 'src/types/message.js'
import { logForDebugging } from 'src/utils/debug.js'
import { djb2Hash } from 'src/utils/hash.js'
import { logError } from 'src/utils/log.js'
import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js'
import { jsonStringify } from 'src/utils/slowOperations.js'
import type { QuerySource } from '../../constants/querySource.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../analytics/index.js'
function getCacheBreakDiffPath(): string {
const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
let suffix = ''
for (let i = 0; i < 4; i++) {
suffix += chars[Math.floor(Math.random() * chars.length)]
}
return join(getClaudeTempDir(), `cache-break-${suffix}.diff`)
}
type PreviousState = {
systemHash: number
toolsHash: number
/** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips
* (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */
cacheControlHash: number
toolNames: string[]
/** Per-tool schema hash. Diffed to name which tool's description changed
* when toolSchemasChanged but added=removed=0 (77% of tool breaks per
* BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */
perToolHashes: Record<string, number>
systemCharCount: number
model: string
fastMode: boolean
/** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are
* discovered/removed. */
globalCacheStrategy: string
/** Sorted beta header list. Diffed to show which headers were added/removed. */
betas: string[]
/** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
autoModeActive: boolean
/** Overage state flip — should NOT break cache anymore (eligibility is
* latched session-stable in should1hCacheTTL). Tracked to verify the fix. */
isUsingOverage: boolean
/** Cache-editing beta header presence — should NOT break cache anymore
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
cachedMCEnabled: boolean
/** Resolved effort (env → options → model default). Goes into output_config
* or anthropic_internal.effort_override. */
effortValue: string
/** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and
* anthropic_internal changes. */
extraBodyHash: number
callCount: number
pendingChanges: PendingChanges | null
prevCacheReadTokens: number | null
/** Set when cached microcompact sends cache_edits deletions. Cache reads
* will legitimately drop — this is expected, not a break. */
cacheDeletionsPending: boolean
buildDiffableContent: () => string
}
type PendingChanges = {
systemPromptChanged: boolean
toolSchemasChanged: boolean
modelChanged: boolean
fastModeChanged: boolean
cacheControlChanged: boolean
globalCacheStrategyChanged: boolean
betasChanged: boolean
autoModeChanged: boolean
overageChanged: boolean
cachedMCChanged: boolean
effortChanged: boolean
extraBodyChanged: boolean
addedToolCount: number
removedToolCount: number
systemCharDelta: number
addedTools: string[]
removedTools: string[]
changedToolSchemas: string[]
previousModel: string
newModel: string
prevGlobalCacheStrategy: string
newGlobalCacheStrategy: string
addedBetas: string[]
removedBetas: string[]
prevEffortValue: string
newEffortValue: string
buildPrevDiffableContent: () => string
}
const previousStateBySource = new Map<string, PreviousState>()
// Cap the number of tracked sources to prevent unbounded memory growth.
// Each entry stores a ~300KB+ diffableContent string (serialized system prompt
// + tool schemas). Without a cap, spawning many subagents (each with a unique
// agentId key) causes the map to grow indefinitely.
const MAX_TRACKED_SOURCES = 10
const TRACKED_SOURCE_PREFIXES = [
'repl_main_thread',
'sdk',
'agent:custom',
'agent:default',
'agent:builtin',
]
// Minimum absolute token drop required to trigger a cache break warning.
// Small drops (e.g., a few thousand tokens) can happen due to normal variation
// and aren't worth alerting on.
const MIN_CACHE_MISS_TOKENS = 2_000
// Anthropic's server-side prompt cache TTL thresholds to test.
// Cache breaks after these durations are likely due to TTL expiration
// rather than client-side changes.
const CACHE_TTL_5MIN_MS = 5 * 60 * 1000
export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000
// Models to exclude from cache break detection (e.g., haiku has different caching behavior)
function isExcludedModel(model: string): boolean {
return model.includes('haiku')
}
/**
* Returns the tracking key for a querySource, or null if untracked.
* Compact shares the same server-side cache as repl_main_thread
* (same cacheSafeParams), so they share tracking state.
*
* For subagents with a tracked querySource, uses the unique agentId to
* isolate tracking state. This prevents false positive cache break
* notifications when multiple instances of the same agent type run
* concurrently.
*
* Untracked sources (speculation, session_memory, prompt_suggestion, etc.)
* are short-lived forked agents where cache break detection provides no
* value — they run 1-3 turns with a fresh agentId each time, so there's
* nothing meaningful to compare against. Their cache metrics are still
* logged via tengu_api_success for analytics.
*/
function getTrackingKey(
querySource: QuerySource,
agentId?: AgentId,
): string | null {
if (querySource === 'compact') return 'repl_main_thread'
for (const prefix of TRACKED_SOURCE_PREFIXES) {
if (querySource.startsWith(prefix)) return agentId || querySource
}
return null
}
function stripCacheControl(
items: ReadonlyArray<Record<string, unknown>>,
): unknown[] {
return items.map(item => {
if (!('cache_control' in item)) return item
const { cache_control: _, ...rest } = item
return rest
})
}
function computeHash(data: unknown): number {
const str = jsonStringify(data)
if (typeof Bun !== 'undefined') {
const hash = Bun.hash(str)
// Bun.hash can return bigint for large inputs; convert to number safely
return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash
}
// Fallback for non-Bun runtimes (e.g. Node.js via npm global install)
return djb2Hash(str)
}
/** MCP tool names are user-controlled (server config) and may leak filepaths.
* Collapse them to 'mcp'; built-in names are a fixed vocabulary. */
function sanitizeToolName(name: string): string {
return name.startsWith('mcp__') ? 'mcp' : name
}
function computePerToolHashes(
strippedTools: ReadonlyArray<unknown>,
names: string[],
): Record<string, number> {
const hashes: Record<string, number> = {}
for (let i = 0; i < strippedTools.length; i++) {
hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i])
}
return hashes
}
function getSystemCharCount(system: TextBlockParam[]): number {
let total = 0
for (const block of system) {
total += block.text.length
}
return total
}
function buildDiffableContent(
system: TextBlockParam[],
tools: BetaToolUnion[],
model: string,
): string {
const systemText = system.map(b => b.text).join('\n\n')
const toolDetails = tools
.map(t => {
if (!('name' in t)) return 'unknown'
const desc = 'description' in t ? t.description : ''
const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : ''
return `${t.name}\n description: ${desc}\n input_schema: ${schema}`
})
.sort()
.join('\n\n')
return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n`
}
/** Extended tracking snapshot — everything that could affect the server-side
* cache key that we can observe from the client. All fields are optional so
* the call site can add incrementally; undefined fields compare as stable. */
export type PromptStateSnapshot = {
system: TextBlockParam[]
toolSchemas: BetaToolUnion[]
querySource: QuerySource
model: string
agentId?: AgentId
fastMode?: boolean
globalCacheStrategy?: string
betas?: readonly string[]
autoModeActive?: boolean
isUsingOverage?: boolean
cachedMCEnabled?: boolean
effortValue?: string | number
extraBodyParams?: unknown
}
/**
* Phase 1 (pre-call): Record the current prompt/tool state and detect what changed.
* Does NOT fire events — just stores pending changes for phase 2 to use.
*/
export function recordPromptState(snapshot: PromptStateSnapshot): void {
try {
const {
system,
toolSchemas,
querySource,
model,
agentId,
fastMode,
globalCacheStrategy = '',
betas = [],
autoModeActive = false,
isUsingOverage = false,
cachedMCEnabled = false,
effortValue,
extraBodyParams,
} = snapshot
const key = getTrackingKey(querySource, agentId)
if (!key) return
const strippedSystem = stripCacheControl(
system as unknown as ReadonlyArray<Record<string, unknown>>,
)
const strippedTools = stripCacheControl(
toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>,
)
const systemHash = computeHash(strippedSystem)
const toolsHash = computeHash(strippedTools)
// Hash the full system array INCLUDING cache_control — this catches
// scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped
// hash can't see because the text content is identical.
const cacheControlHash = computeHash(
system.map(b => ('cache_control' in b ? b.cache_control : null)),
)
const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown'))
// Only compute per-tool hashes when the aggregate changed — common case
// (tools unchanged) skips N extra jsonStringify calls.
const computeToolHashes = () =>
computePerToolHashes(strippedTools, toolNames)
const systemCharCount = getSystemCharCount(system)
const lazyDiffableContent = () =>
buildDiffableContent(system, toolSchemas, model)
const isFastMode = fastMode ?? false
const sortedBetas = [...betas].sort()
const effortStr = effortValue === undefined ? '' : String(effortValue)
const extraBodyHash =
extraBodyParams === undefined ? 0 : computeHash(extraBodyParams)
const prev = previousStateBySource.get(key)
if (!prev) {
// Evict oldest entries if map is at capacity
while (previousStateBySource.size >= MAX_TRACKED_SOURCES) {
const oldest = previousStateBySource.keys().next().value
if (oldest !== undefined) previousStateBySource.delete(oldest)
}
previousStateBySource.set(key, {
systemHash,
toolsHash,
cacheControlHash,
toolNames,
systemCharCount,
model,
fastMode: isFastMode,
globalCacheStrategy,
betas: sortedBetas,
autoModeActive,
isUsingOverage,
cachedMCEnabled,
effortValue: effortStr,
extraBodyHash,
callCount: 1,
pendingChanges: null,
prevCacheReadTokens: null,
cacheDeletionsPending: false,
buildDiffableContent: lazyDiffableContent,
perToolHashes: computeToolHashes(),
})
return
}
prev.callCount++
const systemPromptChanged = systemHash !== prev.systemHash
const toolSchemasChanged = toolsHash !== prev.toolsHash
const modelChanged = model !== prev.model
const fastModeChanged = isFastMode !== prev.fastMode
const cacheControlChanged = cacheControlHash !== prev.cacheControlHash
const globalCacheStrategyChanged =
globalCacheStrategy !== prev.globalCacheStrategy
const betasChanged =
sortedBetas.length !== prev.betas.length ||
sortedBetas.some((b, i) => b !== prev.betas[i])
const autoModeChanged = autoModeActive !== prev.autoModeActive
const overageChanged = isUsingOverage !== prev.isUsingOverage
const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled
const effortChanged = effortStr !== prev.effortValue
const extraBodyChanged = extraBodyHash !== prev.extraBodyHash
if (
systemPromptChanged ||
toolSchemasChanged ||
modelChanged ||
fastModeChanged ||
cacheControlChanged ||
globalCacheStrategyChanged ||
betasChanged ||
autoModeChanged ||
overageChanged ||
cachedMCChanged ||
effortChanged ||
extraBodyChanged
) {
const prevToolSet = new Set(prev.toolNames)
const newToolSet = new Set(toolNames)
const prevBetaSet = new Set(prev.betas)
const newBetaSet = new Set(sortedBetas)
const addedTools = toolNames.filter(n => !prevToolSet.has(n))
const removedTools = prev.toolNames.filter(n => !newToolSet.has(n))
const changedToolSchemas: string[] = []
if (toolSchemasChanged) {
const newHashes = computeToolHashes()
for (const name of toolNames) {
if (!prevToolSet.has(name)) continue
if (newHashes[name] !== prev.perToolHashes[name]) {
changedToolSchemas.push(name)
}
}
prev.perToolHashes = newHashes
}
prev.pendingChanges = {
systemPromptChanged,
toolSchemasChanged,
modelChanged,
fastModeChanged,
cacheControlChanged,
globalCacheStrategyChanged,
betasChanged,
autoModeChanged,
overageChanged,
cachedMCChanged,
effortChanged,
extraBodyChanged,
addedToolCount: addedTools.length,
removedToolCount: removedTools.length,
addedTools,
removedTools,
changedToolSchemas,
systemCharDelta: systemCharCount - prev.systemCharCount,
previousModel: prev.model,
newModel: model,
prevGlobalCacheStrategy: prev.globalCacheStrategy,
newGlobalCacheStrategy: globalCacheStrategy,
addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)),
removedBetas: prev.betas.filter(b => !newBetaSet.has(b)),
prevEffortValue: prev.effortValue,
newEffortValue: effortStr,
buildPrevDiffableContent: prev.buildDiffableContent,
}
} else {
prev.pendingChanges = null
}
prev.systemHash = systemHash
prev.toolsHash = toolsHash
prev.cacheControlHash = cacheControlHash
prev.toolNames = toolNames
prev.systemCharCount = systemCharCount
prev.model = model
prev.fastMode = isFastMode
prev.globalCacheStrategy = globalCacheStrategy
prev.betas = sortedBetas
prev.autoModeActive = autoModeActive
prev.isUsingOverage = isUsingOverage
prev.cachedMCEnabled = cachedMCEnabled
prev.effortValue = effortStr
prev.extraBodyHash = extraBodyHash
prev.buildDiffableContent = lazyDiffableContent
} catch (e: unknown) {
logError(e)
}
}
/**
* Phase 2 (post-call): Check the API response's cache tokens to determine
* if a cache break actually occurred. If it did, use the pending changes
* from phase 1 to explain why.
*/
export async function checkResponseForCacheBreak(
querySource: QuerySource,
cacheReadTokens: number,
cacheCreationTokens: number,
messages: Message[],
agentId?: AgentId,
requestId?: string | null,
): Promise<void> {
try {
const key = getTrackingKey(querySource, agentId)
if (!key) return
const state = previousStateBySource.get(key)
if (!state) return
// Skip excluded models (e.g., haiku has different caching behavior)
if (isExcludedModel(state.model)) return
const prevCacheRead = state.prevCacheReadTokens
state.prevCacheReadTokens = cacheReadTokens
// Calculate time since last call for TTL detection by finding the most recent
// assistant message timestamp in the messages array (before the current response)
const lastAssistantMessage = messages.findLast(m => m.type === 'assistant')
const timeSinceLastAssistantMsg = lastAssistantMessage
? Date.now() - new Date(lastAssistantMessage.timestamp).getTime()
: null
// Skip the first call — no previous value to compare against
if (prevCacheRead === null) return
const changes = state.pendingChanges
// Cache deletions via cached microcompact intentionally reduce the cached
// prefix. The drop in cache read tokens is expected — reset the baseline
// so we don't false-positive on the next call.
if (state.cacheDeletionsPending) {
state.cacheDeletionsPending = false
logForDebugging(
`[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead} → ${cacheReadTokens} (expected drop)`,
)
// Don't flag as a break — the remaining state is still valid
state.pendingChanges = null
return
}
// Detect a cache break: cache read dropped >5% from previous AND
// the absolute drop exceeds the minimum threshold.
const tokenDrop = prevCacheRead - cacheReadTokens
if (
cacheReadTokens >= prevCacheRead * 0.95 ||
tokenDrop < MIN_CACHE_MISS_TOKENS
) {
state.pendingChanges = null
return
}
// Build explanation from pending changes (if any)
const parts: string[] = []
if (changes) {
if (changes.modelChanged) {
parts.push(
`model changed (${changes.previousModel} → ${changes.newModel})`,
)
}
if (changes.systemPromptChanged) {
const charDelta = changes.systemCharDelta
const charInfo =
charDelta === 0
? ''
: charDelta > 0
? ` (+${charDelta} chars)`
: ` (${charDelta} chars)`
parts.push(`system prompt changed${charInfo}`)
}
if (changes.toolSchemasChanged) {
const toolDiff =
changes.addedToolCount > 0 || changes.removedToolCount > 0
? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)`
: ' (tool prompt/schema changed, same tool set)'
parts.push(`tools changed${toolDiff}`)
}
if (changes.fastModeChanged) {
parts.push('fast mode toggled')
}
if (changes.globalCacheStrategyChanged) {
parts.push(
`global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'} → ${changes.newGlobalCacheStrategy || 'none'})`,
)
}
if (
changes.cacheControlChanged &&
!changes.globalCacheStrategyChanged &&
!changes.systemPromptChanged
) {
// Only report as standalone cause if nothing else explains it —
// otherwise the scope/TTL flip is a consequence, not the root cause.
parts.push('cache_control changed (scope or TTL)')
}
if (changes.betasChanged) {
const added = changes.addedBetas.length
? `+${changes.addedBetas.join(',')}`
: ''
const removed = changes.removedBetas.length
? `-${changes.removedBetas.join(',')}`
: ''
const diff = [added, removed].filter(Boolean).join(' ')
parts.push(`betas changed${diff ? ` (${diff})` : ''}`)
}
if (changes.autoModeChanged) {
parts.push('auto mode toggled')
}
if (changes.overageChanged) {
parts.push('overage state changed (TTL latched, no flip)')
}
if (changes.cachedMCChanged) {
parts.push('cached microcompact toggled')
}
if (changes.effortChanged) {
parts.push(
`effort changed (${changes.prevEffortValue || 'default'} → ${changes.newEffortValue || 'default'})`,
)
}
if (changes.extraBodyChanged) {
parts.push('extra body params changed')
}
}
// Check if time gap suggests TTL expiration
const lastAssistantMsgOver5minAgo =
timeSinceLastAssistantMsg !== null &&
timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS
const lastAssistantMsgOver1hAgo =
timeSinceLastAssistantMsg !== null &&
timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS
// Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql):
// when all client-side flags are false and the gap is under TTL, ~90% of breaks
// are server-side routing/eviction or billed/inference disagreement. Label
// accordingly instead of implying a CC bug hunt.
let reason: string
if (parts.length > 0) {
reason = parts.join(', ')
} else if (lastAssistantMsgOver1hAgo) {
reason = 'possible 1h TTL expiry (prompt unchanged)'
} else if (lastAssistantMsgOver5minAgo) {
reason = 'possible 5min TTL expiry (prompt unchanged)'
} else if (timeSinceLastAssistantMsg !== null) {
reason = 'likely server-side (prompt unchanged, <5min gap)'
} else {
reason = 'unknown cause'
}
logEvent('tengu_prompt_cache_break', {
systemPromptChanged: changes?.systemPromptChanged ?? false,
toolSchemasChanged: changes?.toolSchemasChanged ?? false,
modelChanged: changes?.modelChanged ?? false,
fastModeChanged: changes?.fastModeChanged ?? false,
cacheControlChanged: changes?.cacheControlChanged ?? false,
globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false,
betasChanged: changes?.betasChanged ?? false,
autoModeChanged: changes?.autoModeChanged ?? false,
overageChanged: changes?.overageChanged ?? false,
cachedMCChanged: changes?.cachedMCChanged ?? false,
effortChanged: changes?.effortChanged ?? false,
extraBodyChanged: changes?.extraBodyChanged ?? false,
addedToolCount: changes?.addedToolCount ?? 0,
removedToolCount: changes?.removedToolCount ?? 0,
systemCharDelta: changes?.systemCharDelta ?? 0,
// Tool names are sanitized: built-in names are a fixed vocabulary,
// MCP tools collapse to 'mcp' (user-configured, could leak paths).
addedTools: (changes?.addedTools ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
removedTools: (changes?.removedTools ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
changedToolSchemas: (changes?.changedToolSchemas ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
// Beta header names and cache strategy are fixed enum-like values,
// not code or filepaths. requestId is an opaque server-generated ID.
addedBetas: (changes?.addedBetas ?? []).join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
removedBetas: (changes?.removedBetas ?? []).join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
callNumber: state.callCount,
prevCacheReadTokens: prevCacheRead,
cacheReadTokens,
cacheCreationTokens,
timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1,
lastAssistantMsgOver5minAgo,
lastAssistantMsgOver1hAgo,
requestId: (requestId ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
// Write diff file for ant debugging via --debug. The path is included in
// the summary log so ants can find it (DevBar UI removed — event data
// flows reliably to BQ for analytics).
let diffPath: string | undefined
if (changes?.buildPrevDiffableContent) {
diffPath = await writeCacheBreakDiff(
changes.buildPrevDiffableContent(),
state.buildDiffableContent(),
)
}
const diffSuffix = diffPath ? `, diff: ${diffPath}` : ''
const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead} → ${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]`
logForDebugging(summary, { level: 'warn' })
state.pendingChanges = null
} catch (e: unknown) {
logError(e)
}
}
/**
* Call when cached microcompact sends cache_edits deletions.
* The next API response will have lower cache read tokens — that's
* expected, not a cache break.
*/
export function notifyCacheDeletion(
querySource: QuerySource,
agentId?: AgentId,
): void {
const key = getTrackingKey(querySource, agentId)
const state = key ? previousStateBySource.get(key) : undefined
if (state) {
state.cacheDeletionsPending = true
}
}
/**
* Call after compaction to reset the cache read baseline.
* Compaction legitimately reduces message count, so cache read tokens
* will naturally drop on the next call — that's not a break.
*/
export function notifyCompaction(
querySource: QuerySource,
agentId?: AgentId,
): void {
const key = getTrackingKey(querySource, agentId)
const state = key ? previousStateBySource.get(key) : undefined
if (state) {
state.prevCacheReadTokens = null
}
}
export function cleanupAgentTracking(agentId: AgentId): void {
previousStateBySource.delete(agentId)
}
export function resetPromptCacheBreakDetection(): void {
previousStateBySource.clear()
}
async function writeCacheBreakDiff(
prevContent: string,
newContent: string,
): Promise<string | undefined> {
try {
const diffPath = getCacheBreakDiffPath()
await mkdir(getClaudeTempDir(), { recursive: true })
const patch = createPatch(
'prompt-state',
prevContent,
newContent,
'before',
'after',
)
await writeFile(diffPath, patch)
return diffPath
} catch {
return undefined
}
}