mcpOutputStorage.ts
utils/mcpOutputStorage.ts
190
Lines
7086
Bytes
7
Exports
8
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with mcp. It contains 190 lines, 8 detected imports, and 7 detected exports.
Important relationships
Detected exports
getFormatDescriptiongetLargeOutputInstructionsextensionForMimeTypeisBinaryContentTypePersistBinaryResultpersistBinaryContentgetBinaryBlobSavedMessage
Keywords
caseapplicationcontentreadtextjsonfilemimetypeschemasaved
Detected imports
fs/promisespath../services/analytics/index.js../services/mcp/client.js./errors.js./format.js./log.js./toolResultStorage.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
import { writeFile } from 'fs/promises'
import { join } from 'path'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../services/analytics/index.js'
import type { MCPResultType } from '../services/mcp/client.js'
import { toError } from './errors.js'
import { formatFileSize } from './format.js'
import { logError } from './log.js'
import { ensureToolResultsDir, getToolResultsDir } from './toolResultStorage.js'
/**
* Generates a format description string based on the MCP result type and schema.
*/
export function getFormatDescription(
type: MCPResultType,
schema?: unknown,
): string {
switch (type) {
case 'toolResult':
return 'Plain text'
case 'structuredContent':
return schema ? `JSON with schema: ${schema}` : 'JSON'
case 'contentArray':
return schema ? `JSON array with schema: ${schema}` : 'JSON array'
}
}
/**
* Generates instruction text for Claude to read from a saved output file.
*
* @param rawOutputPath - Path to the saved output file
* @param contentLength - Length of the content in characters
* @param formatDescription - Description of the content format
* @param maxReadLength - Optional max chars for Read tool (for Bash output context)
* @returns Instruction text to include in the tool result
*/
export function getLargeOutputInstructions(
rawOutputPath: string,
contentLength: number,
formatDescription: string,
maxReadLength?: number,
): string {
const baseInstructions =
`Error: result (${contentLength.toLocaleString()} characters) exceeds maximum allowed tokens. Output has been saved to ${rawOutputPath}.\n` +
`Format: ${formatDescription}\n` +
`Use offset and limit parameters to read specific portions of the file, search within it for specific content, and jq to make structured queries.\n` +
`REQUIREMENTS FOR SUMMARIZATION/ANALYSIS/REVIEW:\n` +
`- You MUST read the content from the file at ${rawOutputPath} in sequential chunks until 100% of the content has been read.\n`
const truncationWarning = maxReadLength
? `- If you receive truncation warnings when reading the file ("[N lines truncated]"), reduce the chunk size until you have read 100% of the content without truncation ***DO NOT PROCEED UNTIL YOU HAVE DONE THIS***. Bash output is limited to ${maxReadLength.toLocaleString()} chars.\n`
: `- If you receive truncation warnings when reading the file, reduce the chunk size until you have read 100% of the content without truncation.\n`
const completionRequirement = `- Before producing ANY summary or analysis, you MUST explicitly describe what portion of the content you have read. ***If you did not read the entire content, you MUST explicitly state this.***\n`
return baseInstructions + truncationWarning + completionRequirement
}
/**
* Map a mime type to a file extension. Conservative: known types get their
* proper extension; unknown types get 'bin'. The extension matters because
* the Read tool dispatches on it (PDFs, images, etc. need the right ext).
*/
export function extensionForMimeType(mimeType: string | undefined): string {
if (!mimeType) return 'bin'
// Strip any charset/boundary parameter
const mt = (mimeType.split(';')[0] ?? '').trim().toLowerCase()
switch (mt) {
case 'application/pdf':
return 'pdf'
case 'application/json':
return 'json'
case 'text/csv':
return 'csv'
case 'text/plain':
return 'txt'
case 'text/html':
return 'html'
case 'text/markdown':
return 'md'
case 'application/zip':
return 'zip'
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
return 'docx'
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
return 'xlsx'
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
return 'pptx'
case 'application/msword':
return 'doc'
case 'application/vnd.ms-excel':
return 'xls'
case 'audio/mpeg':
return 'mp3'
case 'audio/wav':
return 'wav'
case 'audio/ogg':
return 'ogg'
case 'video/mp4':
return 'mp4'
case 'video/webm':
return 'webm'
case 'image/png':
return 'png'
case 'image/jpeg':
return 'jpg'
case 'image/gif':
return 'gif'
case 'image/webp':
return 'webp'
case 'image/svg+xml':
return 'svg'
default:
return 'bin'
}
}
/**
* Heuristic for whether a content-type header indicates binary content that
* should be saved to disk rather than put into the model context.
* Text-ish types (text/*, json, xml, form data) are treated as non-binary.
*/
export function isBinaryContentType(contentType: string): boolean {
if (!contentType) return false
const mt = (contentType.split(';')[0] ?? '').trim().toLowerCase()
if (mt.startsWith('text/')) return false
// Structured text formats delivered with an application/ type. Use suffix
// or exact match rather than substring so 'openxmlformats' (docx/xlsx) stays binary.
if (mt.endsWith('+json') || mt === 'application/json') return false
if (mt.endsWith('+xml') || mt === 'application/xml') return false
if (mt.startsWith('application/javascript')) return false
if (mt === 'application/x-www-form-urlencoded') return false
return true
}
export type PersistBinaryResult =
| { filepath: string; size: number; ext: string }
| { error: string }
/**
* Write raw binary bytes to the tool-results directory with a mime-derived
* extension. Unlike persistToolResult (which stringifies), this writes the
* bytes as-is so the resulting file can be opened with native tools (Read
* for PDFs, pandas for xlsx, etc.).
*/
export async function persistBinaryContent(
bytes: Buffer,
mimeType: string | undefined,
persistId: string,
): Promise<PersistBinaryResult> {
await ensureToolResultsDir()
const ext = extensionForMimeType(mimeType)
const filepath = join(getToolResultsDir(), `${persistId}.${ext}`)
try {
await writeFile(filepath, bytes)
} catch (error) {
const err = toError(error)
logError(err)
return { error: err.message }
}
// mime type and extension are safe fixed-vocabulary strings (not paths/code)
logEvent('tengu_binary_content_persisted', {
mimeType: (mimeType ??
'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
sizeBytes: bytes.length,
ext: ext as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
return { filepath, size: bytes.length, ext }
}
/**
* Build a short message telling Claude where binary content was saved.
* Just states the path — no prescriptive hint, since what the model can
* actually do with the file depends on provider/tooling.
*/
export function getBinaryBlobSavedMessage(
filepath: string,
mimeType: string | undefined,
size: number,
sourceDescription: string,
): string {
const mt = mimeType || 'unknown type'
return `${sourceDescription}Binary content (${mt}, ${formatFileSize(size)}) saved to ${filepath}`
}