claudeAiLimits.ts
services/claudeAiLimits.ts
516
Lines
16803
Bytes
13
Exports
16
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with integrations. It contains 516 lines, 16 detected imports, and 13 detected exports.
Important relationships
Detected exports
getRateLimitDisplayNameOverageDisabledReasonClaudeAILimitscurrentLimitsgetRawUtilizationstatusListenersemitStatusChangecheckQuotaStatusextractQuotaStatusFromHeadersextractQuotaStatusFromErrorgetRateLimitErrorMessagegetRateLimitWarninggetUsingOverageText
Keywords
headersratelimittypestatusutilizationoverageunifiedratelimitfallbackavailablelimitlimitsresetsatwarning
Detected imports
@anthropic-ai/sdk@anthropic-ai/sdk/resources/index.mjslodash-es/isEqual.js../bootstrap/state.js../utils/auth.js../utils/betas.js../utils/config.js../utils/log.js../utils/model/model.js../utils/privacyLevel.js./analytics/index.js./analytics/index.js./api/claude.js./api/client.js./rateLimitMocking.js./rateLimitMessages.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
import { APIError } from '@anthropic-ai/sdk'
import type { MessageParam } from '@anthropic-ai/sdk/resources/index.mjs'
import isEqual from 'lodash-es/isEqual.js'
import { getIsNonInteractiveSession } from '../bootstrap/state.js'
import { isClaudeAISubscriber } from '../utils/auth.js'
import { getModelBetas } from '../utils/betas.js'
import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
import { logError } from '../utils/log.js'
import { getSmallFastModel } from '../utils/model/model.js'
import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './analytics/index.js'
import { logEvent } from './analytics/index.js'
import { getAPIMetadata } from './api/claude.js'
import { getAnthropicClient } from './api/client.js'
import {
processRateLimitHeaders,
shouldProcessRateLimits,
} from './rateLimitMocking.js'
// Re-export message functions from centralized location
export {
getRateLimitErrorMessage,
getRateLimitWarning,
getUsingOverageText,
} from './rateLimitMessages.js'
type QuotaStatus = 'allowed' | 'allowed_warning' | 'rejected'
type RateLimitType =
| 'five_hour'
| 'seven_day'
| 'seven_day_opus'
| 'seven_day_sonnet'
| 'overage'
export type { RateLimitType }
type EarlyWarningThreshold = {
utilization: number // 0-1 scale: trigger warning when usage >= this
timePct: number // 0-1 scale: trigger warning when time elapsed <= this
}
type EarlyWarningConfig = {
rateLimitType: RateLimitType
claimAbbrev: '5h' | '7d'
windowSeconds: number
thresholds: EarlyWarningThreshold[]
}
// Early warning configurations in priority order (checked first to last)
// Used as fallback when server doesn't send surpassed-threshold header
// Warns users when they're consuming quota faster than the time window allows
const EARLY_WARNING_CONFIGS: EarlyWarningConfig[] = [
{
rateLimitType: 'five_hour',
claimAbbrev: '5h',
windowSeconds: 5 * 60 * 60,
thresholds: [{ utilization: 0.9, timePct: 0.72 }],
},
{
rateLimitType: 'seven_day',
claimAbbrev: '7d',
windowSeconds: 7 * 24 * 60 * 60,
thresholds: [
{ utilization: 0.75, timePct: 0.6 },
{ utilization: 0.5, timePct: 0.35 },
{ utilization: 0.25, timePct: 0.15 },
],
},
]
// Maps claim abbreviations to rate limit types for header-based detection
const EARLY_WARNING_CLAIM_MAP: Record<string, RateLimitType> = {
'5h': 'five_hour',
'7d': 'seven_day',
overage: 'overage',
}
const RATE_LIMIT_DISPLAY_NAMES: Record<RateLimitType, string> = {
five_hour: 'session limit',
seven_day: 'weekly limit',
seven_day_opus: 'Opus limit',
seven_day_sonnet: 'Sonnet limit',
overage: 'extra usage limit',
}
export function getRateLimitDisplayName(type: RateLimitType): string {
return RATE_LIMIT_DISPLAY_NAMES[type] || type
}
/**
* Calculate what fraction of a time window has elapsed.
* Used for time-relative early warning fallback.
* @param resetsAt - Unix epoch timestamp in seconds when the limit resets
* @param windowSeconds - Duration of the window in seconds
* @returns fraction (0-1) of the window that has elapsed
*/
function computeTimeProgress(resetsAt: number, windowSeconds: number): number {
const nowSeconds = Date.now() / 1000
const windowStart = resetsAt - windowSeconds
const elapsed = nowSeconds - windowStart
return Math.max(0, Math.min(1, elapsed / windowSeconds))
}
// Reason why overage is disabled/rejected
// These values come from the API's unified limiter
export type OverageDisabledReason =
| 'overage_not_provisioned' // Overage is not provisioned for this org or seat tier
| 'org_level_disabled' // Organization doesn't have overage enabled
| 'org_level_disabled_until' // Organization overage temporarily disabled
| 'out_of_credits' // Organization has insufficient credits
| 'seat_tier_level_disabled' // Seat tier doesn't have overage enabled
| 'member_level_disabled' // Account specifically has overage disabled
| 'seat_tier_zero_credit_limit' // Seat tier has a zero credit limit
| 'group_zero_credit_limit' // Resolved group limit has a zero credit limit
| 'member_zero_credit_limit' // Account has a zero credit limit
| 'org_service_level_disabled' // Org service specifically has overage disabled
| 'org_service_zero_credit_limit' // Org service has a zero credit limit
| 'no_limits_configured' // No overage limits configured for account
| 'unknown' // Unknown reason, should not happen
export type ClaudeAILimits = {
status: QuotaStatus
// unifiedRateLimitFallbackAvailable is currently used to warn users that set
// their model to Opus whenever they are about to run out of quota. It does
// not change the actual model that is used.
unifiedRateLimitFallbackAvailable: boolean
resetsAt?: number
rateLimitType?: RateLimitType
utilization?: number
overageStatus?: QuotaStatus
overageResetsAt?: number
overageDisabledReason?: OverageDisabledReason
isUsingOverage?: boolean
surpassedThreshold?: number
}
// Exported for testing only
export let currentLimits: ClaudeAILimits = {
status: 'allowed',
unifiedRateLimitFallbackAvailable: false,
isUsingOverage: false,
}
/**
* Raw per-window utilization from response headers, tracked on every API
* response (unlike currentLimits.utilization which is only set when a warning
* threshold fires). Exposed to statusline scripts via getRawUtilization().
*/
type RawWindowUtilization = {
utilization: number // 0-1 fraction
resets_at: number // unix epoch seconds
}
type RawUtilization = {
five_hour?: RawWindowUtilization
seven_day?: RawWindowUtilization
}
let rawUtilization: RawUtilization = {}
export function getRawUtilization(): RawUtilization {
return rawUtilization
}
function extractRawUtilization(headers: globalThis.Headers): RawUtilization {
const result: RawUtilization = {}
for (const [key, abbrev] of [
['five_hour', '5h'],
['seven_day', '7d'],
] as const) {
const util = headers.get(
`anthropic-ratelimit-unified-${abbrev}-utilization`,
)
const reset = headers.get(`anthropic-ratelimit-unified-${abbrev}-reset`)
if (util !== null && reset !== null) {
result[key] = { utilization: Number(util), resets_at: Number(reset) }
}
}
return result
}
type StatusChangeListener = (limits: ClaudeAILimits) => void
export const statusListeners: Set<StatusChangeListener> = new Set()
export function emitStatusChange(limits: ClaudeAILimits) {
currentLimits = limits
statusListeners.forEach(listener => listener(limits))
const hoursTillReset = Math.round(
(limits.resetsAt ? limits.resetsAt - Date.now() / 1000 : 0) / (60 * 60),
)
logEvent('tengu_claudeai_limits_status_changed', {
status:
limits.status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
unifiedRateLimitFallbackAvailable: limits.unifiedRateLimitFallbackAvailable,
hoursTillReset,
})
}
async function makeTestQuery() {
const model = getSmallFastModel()
const anthropic = await getAnthropicClient({
maxRetries: 0,
model,
source: 'quota_check',
})
const messages: MessageParam[] = [{ role: 'user', content: 'quota' }]
const betas = getModelBetas(model)
// biome-ignore lint/plugin: quota check needs raw response access via asResponse()
return anthropic.beta.messages
.create({
model,
max_tokens: 1,
messages,
metadata: getAPIMetadata(),
...(betas.length > 0 ? { betas } : {}),
})
.asResponse()
}
export async function checkQuotaStatus(): Promise<void> {
// Skip network requests if nonessential traffic is disabled
if (isEssentialTrafficOnly()) {
return
}
// Check if we should process rate limits (real subscriber or mock testing)
if (!shouldProcessRateLimits(isClaudeAISubscriber())) {
return
}
// In non-interactive mode (-p), the real query follows immediately and
// extractQuotaStatusFromHeaders() will update limits from its response
// headers (claude.ts), so skip this pre-check API call.
if (getIsNonInteractiveSession()) {
return
}
try {
// Make a minimal request to check quota
const raw = await makeTestQuery()
// Update limits based on the response
extractQuotaStatusFromHeaders(raw.headers)
} catch (error) {
if (error instanceof APIError) {
extractQuotaStatusFromError(error)
}
}
}
/**
* Check if early warning should be triggered based on surpassed-threshold header.
* Returns ClaudeAILimits if a threshold was surpassed, null otherwise.
*/
function getHeaderBasedEarlyWarning(
headers: globalThis.Headers,
unifiedRateLimitFallbackAvailable: boolean,
): ClaudeAILimits | null {
// Check each claim type for surpassed threshold header
for (const [claimAbbrev, rateLimitType] of Object.entries(
EARLY_WARNING_CLAIM_MAP,
)) {
const surpassedThreshold = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-surpassed-threshold`,
)
// If threshold header is present, user has crossed a warning threshold
if (surpassedThreshold !== null) {
const utilizationHeader = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
)
const resetHeader = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-reset`,
)
const utilization = utilizationHeader
? Number(utilizationHeader)
: undefined
const resetsAt = resetHeader ? Number(resetHeader) : undefined
return {
status: 'allowed_warning',
resetsAt,
rateLimitType: rateLimitType as RateLimitType,
utilization,
unifiedRateLimitFallbackAvailable,
isUsingOverage: false,
surpassedThreshold: Number(surpassedThreshold),
}
}
}
return null
}
/**
* Check if time-relative early warning should be triggered for a rate limit type.
* Fallback when server doesn't send surpassed-threshold header.
* Returns ClaudeAILimits if thresholds are exceeded, null otherwise.
*/
function getTimeRelativeEarlyWarning(
headers: globalThis.Headers,
config: EarlyWarningConfig,
unifiedRateLimitFallbackAvailable: boolean,
): ClaudeAILimits | null {
const { rateLimitType, claimAbbrev, windowSeconds, thresholds } = config
const utilizationHeader = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
)
const resetHeader = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-reset`,
)
if (utilizationHeader === null || resetHeader === null) {
return null
}
const utilization = Number(utilizationHeader)
const resetsAt = Number(resetHeader)
const timeProgress = computeTimeProgress(resetsAt, windowSeconds)
// Check if any threshold is exceeded: high usage early in the window
const shouldWarn = thresholds.some(
t => utilization >= t.utilization && timeProgress <= t.timePct,
)
if (!shouldWarn) {
return null
}
return {
status: 'allowed_warning',
resetsAt,
rateLimitType,
utilization,
unifiedRateLimitFallbackAvailable,
isUsingOverage: false,
}
}
/**
* Get early warning limits using header-based detection with time-relative fallback.
* 1. First checks for surpassed-threshold header (new server-side approach)
* 2. Falls back to time-relative thresholds (client-side calculation)
*/
function getEarlyWarningFromHeaders(
headers: globalThis.Headers,
unifiedRateLimitFallbackAvailable: boolean,
): ClaudeAILimits | null {
// Try header-based detection first (preferred when API sends the header)
const headerBasedWarning = getHeaderBasedEarlyWarning(
headers,
unifiedRateLimitFallbackAvailable,
)
if (headerBasedWarning) {
return headerBasedWarning
}
// Fallback: Use time-relative thresholds (client-side calculation)
// This catches users burning quota faster than sustainable
for (const config of EARLY_WARNING_CONFIGS) {
const timeRelativeWarning = getTimeRelativeEarlyWarning(
headers,
config,
unifiedRateLimitFallbackAvailable,
)
if (timeRelativeWarning) {
return timeRelativeWarning
}
}
return null
}
function computeNewLimitsFromHeaders(
headers: globalThis.Headers,
): ClaudeAILimits {
const status =
(headers.get('anthropic-ratelimit-unified-status') as QuotaStatus) ||
'allowed'
const resetsAtHeader = headers.get('anthropic-ratelimit-unified-reset')
const resetsAt = resetsAtHeader ? Number(resetsAtHeader) : undefined
const unifiedRateLimitFallbackAvailable =
headers.get('anthropic-ratelimit-unified-fallback') === 'available'
// Headers for rate limit type and overage support
const rateLimitType = headers.get(
'anthropic-ratelimit-unified-representative-claim',
) as RateLimitType | null
const overageStatus = headers.get(
'anthropic-ratelimit-unified-overage-status',
) as QuotaStatus | null
const overageResetsAtHeader = headers.get(
'anthropic-ratelimit-unified-overage-reset',
)
const overageResetsAt = overageResetsAtHeader
? Number(overageResetsAtHeader)
: undefined
// Reason why overage is disabled (spending cap or wallet empty)
const overageDisabledReason = headers.get(
'anthropic-ratelimit-unified-overage-disabled-reason',
) as OverageDisabledReason | null
// Determine if we're using overage (standard limits rejected but overage allowed)
const isUsingOverage =
status === 'rejected' &&
(overageStatus === 'allowed' || overageStatus === 'allowed_warning')
// Check for early warning based on surpassed-threshold header
// If status is allowed/allowed_warning and we find a surpassed threshold, show warning
let finalStatus: QuotaStatus = status
if (status === 'allowed' || status === 'allowed_warning') {
const earlyWarning = getEarlyWarningFromHeaders(
headers,
unifiedRateLimitFallbackAvailable,
)
if (earlyWarning) {
return earlyWarning
}
// No early warning threshold surpassed
finalStatus = 'allowed'
}
return {
status: finalStatus,
resetsAt,
unifiedRateLimitFallbackAvailable,
...(rateLimitType && { rateLimitType }),
...(overageStatus && { overageStatus }),
...(overageResetsAt && { overageResetsAt }),
...(overageDisabledReason && { overageDisabledReason }),
isUsingOverage,
}
}
/**
* Cache the extra usage disabled reason from API headers.
*/
function cacheExtraUsageDisabledReason(headers: globalThis.Headers): void {
// A null reason means extra usage is enabled (no disabled reason header)
const reason =
headers.get('anthropic-ratelimit-unified-overage-disabled-reason') ?? null
const cached = getGlobalConfig().cachedExtraUsageDisabledReason
if (cached !== reason) {
saveGlobalConfig(current => ({
...current,
cachedExtraUsageDisabledReason: reason,
}))
}
}
export function extractQuotaStatusFromHeaders(
headers: globalThis.Headers,
): void {
// Check if we need to process rate limits
const isSubscriber = isClaudeAISubscriber()
if (!shouldProcessRateLimits(isSubscriber)) {
// If we have any rate limit state, clear it
rawUtilization = {}
if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) {
const defaultLimits: ClaudeAILimits = {
status: 'allowed',
unifiedRateLimitFallbackAvailable: false,
isUsingOverage: false,
}
emitStatusChange(defaultLimits)
}
return
}
// Process headers (applies mocks from /mock-limits command if active)
const headersToUse = processRateLimitHeaders(headers)
rawUtilization = extractRawUtilization(headersToUse)
const newLimits = computeNewLimitsFromHeaders(headersToUse)
// Cache extra usage status (persists across sessions)
cacheExtraUsageDisabledReason(headersToUse)
if (!isEqual(currentLimits, newLimits)) {
emitStatusChange(newLimits)
}
}
export function extractQuotaStatusFromError(error: APIError): void {
if (
!shouldProcessRateLimits(isClaudeAISubscriber()) ||
error.status !== 429
) {
return
}
try {
let newLimits = { ...currentLimits }
if (error.headers) {
// Process headers (applies mocks from /mock-limits command if active)
const headersToUse = processRateLimitHeaders(error.headers)
rawUtilization = extractRawUtilization(headersToUse)
newLimits = computeNewLimitsFromHeaders(headersToUse)
// Cache extra usage status (persists across sessions)
cacheExtraUsageDisabledReason(headersToUse)
}
// For errors, always set status to rejected even if headers are not present.
newLimits.status = 'rejected'
if (!isEqual(currentLimits, newLimits)) {
emitStatusChange(newLimits)
}
} catch (e) {
logError(e as Error)
}
}