gracefulShutdown.ts
utils/gracefulShutdown.ts
No strong subsystem tag
530
Lines
20088
Bytes
6
Exports
21
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with general runtime concerns. It contains 530 lines, 21 detected imports, and 6 detected exports.
Important relationships
Detected exports
setupGracefulShutdowngracefulShutdownSyncisShuttingDownresetShutdownStategetPendingShutdownForTestinggracefulShutdown
Keywords
processexitterminalwritesynccatchanalyticsbeforevoidcleanupexitcode
Detected imports
chalkfslodash-es/memoize.jssignal-exitsrc/entrypoints/agentSdkTypes.js../bootstrap/state.js../ink/instances.js../ink/termio/csi.js../ink/termio/dec.js../ink/termio/osc.js../services/analytics/datadog.js../services/analytics/firstPartyEventLogger.js../services/analytics/index.js../state/AppState.js./cleanupRegistry.js./debug.js./diagLogs.js./envUtils.js./sessionStorage.js./sleep.js./startupProfiler.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
import chalk from 'chalk'
import { writeSync } from 'fs'
import memoize from 'lodash-es/memoize.js'
import { onExit } from 'signal-exit'
import type { ExitReason } from 'src/entrypoints/agentSdkTypes.js'
import {
getIsInteractive,
getIsScrollDraining,
getLastMainRequestId,
getSessionId,
isSessionPersistenceDisabled,
} from '../bootstrap/state.js'
import instances from '../ink/instances.js'
import {
DISABLE_KITTY_KEYBOARD,
DISABLE_MODIFY_OTHER_KEYS,
} from '../ink/termio/csi.js'
import {
DBP,
DFE,
DISABLE_MOUSE_TRACKING,
EXIT_ALT_SCREEN,
SHOW_CURSOR,
} from '../ink/termio/dec.js'
import {
CLEAR_ITERM2_PROGRESS,
CLEAR_TAB_STATUS,
CLEAR_TERMINAL_TITLE,
supportsTabStatus,
wrapForMultiplexer,
} from '../ink/termio/osc.js'
import { shutdownDatadog } from '../services/analytics/datadog.js'
import { shutdown1PEventLogging } from '../services/analytics/firstPartyEventLogger.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../services/analytics/index.js'
import type { AppState } from '../state/AppState.js'
import { runCleanupFunctions } from './cleanupRegistry.js'
import { logForDebugging } from './debug.js'
import { logForDiagnosticsNoPII } from './diagLogs.js'
import { isEnvTruthy } from './envUtils.js'
import { getCurrentSessionTitle, sessionIdExists } from './sessionStorage.js'
import { sleep } from './sleep.js'
import { profileReport } from './startupProfiler.js'
/**
* Clean up terminal modes synchronously before process exit.
* This ensures terminal escape sequences (Kitty keyboard, focus reporting, etc.)
* are properly disabled even if React's componentWillUnmount doesn't run in time.
* Uses writeSync to ensure writes complete before exit.
*
* We unconditionally send all disable sequences because:
* 1. Terminal detection may not always work correctly (e.g., in tmux, screen)
* 2. These sequences are no-ops on terminals that don't support them
* 3. Failing to disable leaves the terminal in a broken state
*/
/* eslint-disable custom-rules/no-sync-fs -- must be sync to flush before process.exit */
function cleanupTerminalModes(): void {
if (!process.stdout.isTTY) {
return
}
try {
// Disable mouse tracking FIRST, before the React unmount tree-walk.
// The terminal needs a round-trip to process this and stop sending
// events; doing it now (not after unmount) gives that time while
// we're busy unmounting. Otherwise events arrive during cooked-mode
// cleanup and either echo to the screen or leak to the shell.
writeSync(1, DISABLE_MOUSE_TRACKING)
// Exit alt screen FIRST so printResumeHint() (and all sequences below)
// land on the main buffer.
//
// Unmount Ink directly rather than writing EXIT_ALT_SCREEN ourselves.
// Ink registered its unmount with signal-exit, so it will otherwise run
// AGAIN inside forceExit() → process.exit(). Two problems with letting
// that happen:
// 1. If we write 1049l here and unmount writes it again later, the
// second one triggers another DECRC — the cursor jumps back over
// the resume hint and the shell prompt lands on the wrong line.
// 2. unmount()'s onRender() must run with altScreenActive=true (alt-
// screen cursor math) AND on the alt buffer. Exiting alt-screen
// here first makes onRender() scribble a REPL frame onto main.
// Calling unmount() now does the final render on the alt buffer,
// unsubscribes from signal-exit, and writes 1049l exactly once.
const inst = instances.get(process.stdout)
if (inst?.isAltScreenActive) {
try {
inst.unmount()
} catch {
// Reconciler/render threw — fall back to manual alt-screen exit
// so printResumeHint still hits the main buffer.
writeSync(1, EXIT_ALT_SCREEN)
}
}
// Catches events that arrived during the unmount tree-walk.
// detachForShutdown() below also drains.
inst?.drainStdin()
// Mark the Ink instance unmounted so signal-exit's deferred ink.unmount()
// early-returns instead of sending redundant EXIT_ALT_SCREEN sequences
// (from its writeSync cleanup block + AlternateScreen's unmount cleanup).
// Those redundant sequences land AFTER printResumeHint() and clobber the
// resume hint on tmux (and possibly other terminals) by restoring the
// saved cursor position. Safe to skip full unmount: this function already
// sends all the terminal-reset sequences, and the process is exiting.
inst?.detachForShutdown()
// Disable extended key reporting — always send both since terminals
// silently ignore whichever they don't implement
writeSync(1, DISABLE_MODIFY_OTHER_KEYS)
writeSync(1, DISABLE_KITTY_KEYBOARD)
// Disable focus events (DECSET 1004)
writeSync(1, DFE)
// Disable bracketed paste mode
writeSync(1, DBP)
// Show cursor
writeSync(1, SHOW_CURSOR)
// Clear iTerm2 progress bar - prevents lingering progress indicator
// that can cause bell sounds when returning to the terminal tab
writeSync(1, CLEAR_ITERM2_PROGRESS)
// Clear tab status (OSC 21337) so a stale dot doesn't linger
if (supportsTabStatus()) writeSync(1, wrapForMultiplexer(CLEAR_TAB_STATUS))
// Clear terminal title so the tab doesn't show stale session info.
// Respect CLAUDE_CODE_DISABLE_TERMINAL_TITLE — if the user opted out of
// title changes, don't clear their existing title on exit either.
if (!isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_TERMINAL_TITLE)) {
if (process.platform === 'win32') {
process.title = ''
} else {
writeSync(1, CLEAR_TERMINAL_TITLE)
}
}
} catch {
// Terminal may already be gone (e.g., SIGHUP after terminal close).
// Ignore write errors since we're exiting anyway.
}
}
let resumeHintPrinted = false
/**
* Print a hint about how to resume the session.
* Only shown for interactive sessions with persistence enabled.
*/
function printResumeHint(): void {
// Only print once (failsafe timer may call this again after normal shutdown)
if (resumeHintPrinted) {
return
}
// Only show with TTY, interactive sessions, and persistence
if (
process.stdout.isTTY &&
getIsInteractive() &&
!isSessionPersistenceDisabled()
) {
try {
const sessionId = getSessionId()
// Don't show resume hint if no session file exists (e.g., subcommands like `claude update`)
if (!sessionIdExists(sessionId)) {
return
}
const customTitle = getCurrentSessionTitle(sessionId)
// Use custom title if available, otherwise fall back to session ID
let resumeArg: string
if (customTitle) {
// Wrap in double quotes, escape backslashes first then quotes
const escaped = customTitle.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
resumeArg = `"${escaped}"`
} else {
resumeArg = sessionId
}
writeSync(
1,
chalk.dim(
`\nResume this session with:\nclaude --resume ${resumeArg}\n`,
),
)
resumeHintPrinted = true
} catch {
// Ignore write errors
}
}
}
/* eslint-enable custom-rules/no-sync-fs */
/**
* Force process exit, handling the case where the terminal is gone.
* When the terminal/PTY is closed (e.g., SIGHUP), process.exit() can throw
* EIO errors because Bun tries to flush stdout to a dead file descriptor.
* In that case, fall back to SIGKILL which always works.
*/
function forceExit(exitCode: number): never {
// Clear failsafe timer since we're exiting now
if (failsafeTimer !== undefined) {
clearTimeout(failsafeTimer)
failsafeTimer = undefined
}
// Drain stdin LAST, right before exit. cleanupTerminalModes() sent
// DISABLE_MOUSE_TRACKING early, but the terminal round-trip plus any
// events already in flight means bytes can arrive during the seconds
// of async cleanup between then and now. Draining here catches them.
// Use the Ink class method (not the standalone drainStdin()) so we
// drain the instance's stdin — when process.stdin is piped,
// getStdinOverride() opens /dev/tty as the real input stream and the
// class method knows about it; the standalone function defaults to
// process.stdin which would early-return on isTTY=false.
try {
instances.get(process.stdout)?.drainStdin()
} catch {
// Terminal may be gone (SIGHUP). Ignore — we are about to exit.
}
try {
process.exit(exitCode)
} catch (e) {
// process.exit() threw. In tests, it's mocked to throw - re-throw so test sees it.
// In production, it's likely EIO from dead terminal - use SIGKILL.
if ((process.env.NODE_ENV as string) === 'test') {
throw e
}
// Fall back to SIGKILL which doesn't try to flush anything.
process.kill(process.pid, 'SIGKILL')
}
// In tests, process.exit may be mocked to return instead of exiting.
// In production, we should never reach here.
if ((process.env.NODE_ENV as string) !== 'test') {
throw new Error('unreachable')
}
// TypeScript trick: cast to never since we know this only happens in tests
// where the mock returns instead of exiting
return undefined as never
}
/**
* Set up global signal handlers for graceful shutdown
*/
export const setupGracefulShutdown = memoize(() => {
// Work around a Bun bug where process.removeListener(sig, fn) resets the
// kernel sigaction for that signal even when other JS listeners remain —
// the signal then falls back to its default action (terminate) and our
// process.on('SIGTERM') handler never runs.
//
// Trigger: any short-lived signal-exit v4 subscriber (e.g. execa per child
// process, or an Ink instance that unmounts). When its unsubscribe runs and
// it was the last v4 subscriber, v4.unload() calls removeListener on every
// signal in its list (SIGTERM, SIGINT, SIGHUP, …), tripping the Bun bug and
// nuking our handlers at the kernel level.
//
// Fix: pin signal-exit v4 loaded by registering a no-op onExit callback that
// is never unsubscribed. This keeps v4's internal emitter count > 0 so
// unload() never runs and removeListener is never called. Harmless under
// Node.js — the pin also ensures signal-exit's process.exit hook stays
// active for Ink cleanup.
onExit(() => {})
process.on('SIGINT', () => {
// In print mode, print.ts registers its own SIGINT handler that aborts
// the in-flight query and calls gracefulShutdown(0); skip here to
// avoid racing with it. Only check print mode — other non-interactive
// sessions (--sdk-url, --init-only, non-TTY) don't register their own
// SIGINT handler and need gracefulShutdown to run.
if (process.argv.includes('-p') || process.argv.includes('--print')) {
return
}
logForDiagnosticsNoPII('info', 'shutdown_signal', { signal: 'SIGINT' })
void gracefulShutdown(0)
})
process.on('SIGTERM', () => {
logForDiagnosticsNoPII('info', 'shutdown_signal', { signal: 'SIGTERM' })
void gracefulShutdown(143) // Exit code 143 (128 + 15) for SIGTERM
})
if (process.platform !== 'win32') {
process.on('SIGHUP', () => {
logForDiagnosticsNoPII('info', 'shutdown_signal', { signal: 'SIGHUP' })
void gracefulShutdown(129) // Exit code 129 (128 + 1) for SIGHUP
})
// Detect orphaned process when terminal closes without delivering SIGHUP.
// macOS revokes TTY file descriptors instead of signaling, leaving the
// process alive but unable to read/write. Periodically check stdin validity.
if (process.stdin.isTTY) {
orphanCheckInterval = setInterval(() => {
// Skip during scroll drain — even a cheap check consumes an event
// loop tick that scroll frames need. 30s interval → missing one is fine.
if (getIsScrollDraining()) return
// process.stdout.writable becomes false when the TTY is revoked
if (!process.stdout.writable || !process.stdin.readable) {
clearInterval(orphanCheckInterval)
logForDiagnosticsNoPII('info', 'shutdown_signal', {
signal: 'orphan_detected',
})
void gracefulShutdown(129)
}
}, 30_000) // Check every 30 seconds
orphanCheckInterval.unref() // Don't keep process alive just for this check
}
}
// Log uncaught exceptions for container observability and analytics
// Error names (e.g., "TypeError") are not sensitive - safe to log
process.on('uncaughtException', error => {
logForDiagnosticsNoPII('error', 'uncaught_exception', {
error_name: error.name,
error_message: error.message.slice(0, 2000),
})
logEvent('tengu_uncaught_exception', {
error_name:
error.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
})
// Log unhandled promise rejections for container observability and analytics
process.on('unhandledRejection', reason => {
const errorName =
reason instanceof Error
? reason.name
: typeof reason === 'string'
? 'string'
: 'unknown'
const errorInfo =
reason instanceof Error
? {
error_name: reason.name,
error_message: reason.message.slice(0, 2000),
error_stack: reason.stack?.slice(0, 4000),
}
: { error_message: String(reason).slice(0, 2000) }
logForDiagnosticsNoPII('error', 'unhandled_rejection', errorInfo)
logEvent('tengu_unhandled_rejection', {
error_name:
errorName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
})
})
export function gracefulShutdownSync(
exitCode = 0,
reason: ExitReason = 'other',
options?: {
getAppState?: () => AppState
setAppState?: (f: (prev: AppState) => AppState) => void
},
): void {
// Set the exit code that will be used when process naturally exits. Note that we do it
// here inside the sync version too so that it is possible to determine if
// gracefulShutdownSync was called by checking process.exitCode.
process.exitCode = exitCode
pendingShutdown = gracefulShutdown(exitCode, reason, options)
.catch(error => {
logForDebugging(`Graceful shutdown failed: ${error}`, { level: 'error' })
cleanupTerminalModes()
printResumeHint()
forceExit(exitCode)
})
// Prevent unhandled rejection: forceExit re-throws in test mode,
// which would escape the .catch() handler above as a new rejection.
.catch(() => {})
}
let shutdownInProgress = false
let failsafeTimer: ReturnType<typeof setTimeout> | undefined
let orphanCheckInterval: ReturnType<typeof setInterval> | undefined
let pendingShutdown: Promise<void> | undefined
/** Check if graceful shutdown is in progress */
export function isShuttingDown(): boolean {
return shutdownInProgress
}
/** Reset shutdown state - only for use in tests */
export function resetShutdownState(): void {
shutdownInProgress = false
resumeHintPrinted = false
if (failsafeTimer !== undefined) {
clearTimeout(failsafeTimer)
failsafeTimer = undefined
}
pendingShutdown = undefined
}
/**
* Returns the in-flight shutdown promise, if any. Only for use in tests
* to await completion before restoring mocks.
*/
export function getPendingShutdownForTesting(): Promise<void> | undefined {
return pendingShutdown
}
// Graceful shutdown function that drains the event loop
export async function gracefulShutdown(
exitCode = 0,
reason: ExitReason = 'other',
options?: {
getAppState?: () => AppState
setAppState?: (f: (prev: AppState) => AppState) => void
/** Printed to stderr after alt-screen exit, before forceExit. */
finalMessage?: string
},
): Promise<void> {
if (shutdownInProgress) {
return
}
shutdownInProgress = true
// Resolve the SessionEnd hook budget before arming the failsafe so the
// failsafe can scale with it. Without this, a user-configured 10s hook
// budget is silently truncated by the 5s failsafe (gh-32712 follow-up).
const { executeSessionEndHooks, getSessionEndHookTimeoutMs } = await import(
'./hooks.js'
)
const sessionEndTimeoutMs = getSessionEndHookTimeoutMs()
// Failsafe: guarantee process exits even if cleanup hangs (e.g., MCP connections).
// Runs cleanupTerminalModes first so a hung cleanup doesn't leave the terminal dirty.
// Budget = max(5s, hook budget + 3.5s headroom for cleanup + analytics flush).
failsafeTimer = setTimeout(
code => {
cleanupTerminalModes()
printResumeHint()
forceExit(code)
},
Math.max(5000, sessionEndTimeoutMs + 3500),
exitCode,
)
failsafeTimer.unref()
// Set the exit code that will be used when process naturally exits
process.exitCode = exitCode
// Exit alt screen and print resume hint FIRST, before any async operations.
// This ensures the hint is visible even if the process is killed during
// cleanup (e.g., SIGKILL during macOS reboot). Without this, the resume
// hint would only appear after cleanup functions, hooks, and analytics
// flush — which can take several seconds.
cleanupTerminalModes()
printResumeHint()
// Flush session data first — this is the most critical cleanup. If the
// terminal is dead (SIGHUP, SSH disconnect), hooks and analytics may hang
// on I/O to a dead TTY or unreachable network, eating into the
// failsafe budget. Session persistence must complete before anything else.
let cleanupTimeoutId: ReturnType<typeof setTimeout> | undefined
try {
const cleanupPromise = (async () => {
try {
await runCleanupFunctions()
} catch {
// Silently ignore cleanup errors
}
})()
await Promise.race([
cleanupPromise,
new Promise((_, reject) => {
cleanupTimeoutId = setTimeout(
rej => rej(new CleanupTimeoutError()),
2000,
reject,
)
}),
])
clearTimeout(cleanupTimeoutId)
} catch {
// Silently handle timeout and other errors
clearTimeout(cleanupTimeoutId)
}
// Execute SessionEnd hooks. Bound both the per-hook default timeout and the
// overall execution via a single budget (CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS,
// default 1.5s). hook.timeout in settings is respected up to this cap.
try {
await executeSessionEndHooks(reason, {
...options,
signal: AbortSignal.timeout(sessionEndTimeoutMs),
timeoutMs: sessionEndTimeoutMs,
})
} catch {
// Ignore SessionEnd hook exceptions (including AbortError on timeout)
}
// Log startup perf before analytics shutdown flushes/cancels timers
try {
profileReport()
} catch {
// Ignore profiling errors during shutdown
}
// Signal to inference that this session's cache can be evicted.
// Fires before analytics flush so the event makes it to the pipeline.
const lastRequestId = getLastMainRequestId()
if (lastRequestId) {
logEvent('tengu_cache_eviction_hint', {
scope:
'session_end' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
last_request_id:
lastRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
}
// Flush analytics — capped at 500ms. Previously unbounded: the 1P exporter
// awaits all pending axios POSTs (10s each), eating the full failsafe budget.
// Lost analytics on slow networks are acceptable; a hanging exit is not.
try {
await Promise.race([
Promise.all([shutdown1PEventLogging(), shutdownDatadog()]),
sleep(500),
])
} catch {
// Ignore analytics shutdown errors
}
if (options?.finalMessage) {
try {
// eslint-disable-next-line custom-rules/no-sync-fs -- must flush before forceExit
writeSync(2, options.finalMessage + '\n')
} catch {
// stderr may be closed (e.g., SSH disconnect). Ignore write errors.
}
}
forceExit(exitCode)
}
class CleanupTimeoutError extends Error {
constructor() {
super('Cleanup timeout')
}
}