sedEditParser.ts
tools/BashTool/sedEditParser.ts
323
Lines
9562
Bytes
4
Exports
2
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is part of the tool layer, which means it describes actions the system can perform for the user or model.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with tool-system. It contains 323 lines, 2 detected imports, and 4 detected exports.
Important relationships
Detected exports
SedEditInfoisSedInPlaceEditparseSedEditCommandapplySedSubstitution
Keywords
flagsreplacepatternexpressionreplacementflagregexargselsechar
Detected imports
crypto../../utils/bash/shellQuote.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
/**
* Parser for sed edit commands (-i flag substitutions)
* Extracts file paths and substitution patterns to enable file-edit-style rendering
*/
import { randomBytes } from 'crypto'
import { tryParseShellCommand } from '../../utils/bash/shellQuote.js'
// BRE→ERE conversion placeholders (null-byte sentinels, never appear in user input)
const BACKSLASH_PLACEHOLDER = '\x00BACKSLASH\x00'
const PLUS_PLACEHOLDER = '\x00PLUS\x00'
const QUESTION_PLACEHOLDER = '\x00QUESTION\x00'
const PIPE_PLACEHOLDER = '\x00PIPE\x00'
const LPAREN_PLACEHOLDER = '\x00LPAREN\x00'
const RPAREN_PLACEHOLDER = '\x00RPAREN\x00'
const BACKSLASH_PLACEHOLDER_RE = new RegExp(BACKSLASH_PLACEHOLDER, 'g')
const PLUS_PLACEHOLDER_RE = new RegExp(PLUS_PLACEHOLDER, 'g')
const QUESTION_PLACEHOLDER_RE = new RegExp(QUESTION_PLACEHOLDER, 'g')
const PIPE_PLACEHOLDER_RE = new RegExp(PIPE_PLACEHOLDER, 'g')
const LPAREN_PLACEHOLDER_RE = new RegExp(LPAREN_PLACEHOLDER, 'g')
const RPAREN_PLACEHOLDER_RE = new RegExp(RPAREN_PLACEHOLDER, 'g')
export type SedEditInfo = {
/** The file path being edited */
filePath: string
/** The search pattern (regex) */
pattern: string
/** The replacement string */
replacement: string
/** Substitution flags (g, i, etc.) */
flags: string
/** Whether to use extended regex (-E or -r flag) */
extendedRegex: boolean
}
/**
* Check if a command is a sed in-place edit command
* Returns true only for simple sed -i 's/pattern/replacement/flags' file commands
*/
export function isSedInPlaceEdit(command: string): boolean {
const info = parseSedEditCommand(command)
return info !== null
}
/**
* Parse a sed edit command and extract the edit information
* Returns null if the command is not a valid sed in-place edit
*/
export function parseSedEditCommand(command: string): SedEditInfo | null {
const trimmed = command.trim()
// Must start with sed
const sedMatch = trimmed.match(/^\s*sed\s+/)
if (!sedMatch) return null
const withoutSed = trimmed.slice(sedMatch[0].length)
const parseResult = tryParseShellCommand(withoutSed)
if (!parseResult.success) return null
const tokens = parseResult.tokens
// Extract string tokens only
const args: string[] = []
for (const token of tokens) {
if (typeof token === 'string') {
args.push(token)
} else if (
typeof token === 'object' &&
token !== null &&
'op' in token &&
token.op === 'glob'
) {
// Glob patterns are too complex for this simple parser
return null
}
}
// Parse flags and arguments
let hasInPlaceFlag = false
let extendedRegex = false
let expression: string | null = null
let filePath: string | null = null
let i = 0
while (i < args.length) {
const arg = args[i]!
// Handle -i flag (with or without backup suffix)
if (arg === '-i' || arg === '--in-place') {
hasInPlaceFlag = true
i++
// On macOS, -i requires a suffix argument (even if empty string)
// Check if next arg looks like a backup suffix (empty, or starts with dot)
// Don't consume flags (-E, -r) or sed expressions (starting with s, y, d)
if (i < args.length) {
const nextArg = args[i]
// If next arg is empty string or starts with dot, it's a backup suffix
if (
typeof nextArg === 'string' &&
!nextArg.startsWith('-') &&
(nextArg === '' || nextArg.startsWith('.'))
) {
i++ // Skip the backup suffix
}
}
continue
}
if (arg.startsWith('-i')) {
// -i.bak or similar (inline suffix)
hasInPlaceFlag = true
i++
continue
}
// Handle extended regex flags
if (arg === '-E' || arg === '-r' || arg === '--regexp-extended') {
extendedRegex = true
i++
continue
}
// Handle -e flag with expression
if (arg === '-e' || arg === '--expression') {
if (i + 1 < args.length && typeof args[i + 1] === 'string') {
// Only support single expression
if (expression !== null) return null
expression = args[i + 1]!
i += 2
continue
}
return null
}
if (arg.startsWith('--expression=')) {
if (expression !== null) return null
expression = arg.slice('--expression='.length)
i++
continue
}
// Skip other flags we don't understand
if (arg.startsWith('-')) {
// Unknown flag - not safe to parse
return null
}
// Non-flag argument
if (expression === null) {
// First non-flag arg is the expression
expression = arg
} else if (filePath === null) {
// Second non-flag arg is the file path
filePath = arg
} else {
// More than one file - not supported for simple rendering
return null
}
i++
}
// Must have -i flag, expression, and file path
if (!hasInPlaceFlag || !expression || !filePath) {
return null
}
// Parse the substitution expression: s/pattern/replacement/flags
// Only support / as delimiter for simplicity
const substMatch = expression.match(/^s\//)
if (!substMatch) {
return null
}
const rest = expression.slice(2) // Skip 's/'
// Find pattern and replacement by tracking escaped characters
let pattern = ''
let replacement = ''
let flags = ''
let state: 'pattern' | 'replacement' | 'flags' = 'pattern'
let j = 0
while (j < rest.length) {
const char = rest[j]!
if (char === '\\' && j + 1 < rest.length) {
// Escaped character
if (state === 'pattern') {
pattern += char + rest[j + 1]
} else if (state === 'replacement') {
replacement += char + rest[j + 1]
} else {
flags += char + rest[j + 1]
}
j += 2
continue
}
if (char === '/') {
if (state === 'pattern') {
state = 'replacement'
} else if (state === 'replacement') {
state = 'flags'
} else {
// Extra delimiter in flags - unexpected
return null
}
j++
continue
}
if (state === 'pattern') {
pattern += char
} else if (state === 'replacement') {
replacement += char
} else {
flags += char
}
j++
}
// Must have found all three parts (pattern, replacement delimiter, and optional flags)
if (state !== 'flags') {
return null
}
// Validate flags - only allow safe substitution flags
const validFlags = /^[gpimIM1-9]*$/
if (!validFlags.test(flags)) {
return null
}
return {
filePath,
pattern,
replacement,
flags,
extendedRegex,
}
}
/**
* Apply a sed substitution to file content
* Returns the new content after applying the substitution
*/
export function applySedSubstitution(
content: string,
sedInfo: SedEditInfo,
): string {
// Convert sed pattern to JavaScript regex
let regexFlags = ''
// Handle global flag
if (sedInfo.flags.includes('g')) {
regexFlags += 'g'
}
// Handle case-insensitive flag (i or I in sed)
if (sedInfo.flags.includes('i') || sedInfo.flags.includes('I')) {
regexFlags += 'i'
}
// Handle multiline flag (m or M in sed)
if (sedInfo.flags.includes('m') || sedInfo.flags.includes('M')) {
regexFlags += 'm'
}
// Convert sed pattern to JavaScript regex pattern
let jsPattern = sedInfo.pattern
// Unescape \/ to /
.replace(/\\\//g, '/')
// In BRE mode (no -E flag), metacharacters have opposite escaping:
// BRE: \+ means "one or more", + is literal
// ERE/JS: + means "one or more", \+ is literal
// We need to convert BRE escaping to ERE for JavaScript regex
if (!sedInfo.extendedRegex) {
jsPattern = jsPattern
// Step 1: Protect literal backslashes (\\) first - in both BRE and ERE, \\ is literal backslash
.replace(/\\\\/g, BACKSLASH_PLACEHOLDER)
// Step 2: Replace escaped metacharacters with placeholders (these should become unescaped in JS)
.replace(/\\\+/g, PLUS_PLACEHOLDER)
.replace(/\\\?/g, QUESTION_PLACEHOLDER)
.replace(/\\\|/g, PIPE_PLACEHOLDER)
.replace(/\\\(/g, LPAREN_PLACEHOLDER)
.replace(/\\\)/g, RPAREN_PLACEHOLDER)
// Step 3: Escape unescaped metacharacters (these are literal in BRE)
.replace(/\+/g, '\\+')
.replace(/\?/g, '\\?')
.replace(/\|/g, '\\|')
.replace(/\(/g, '\\(')
.replace(/\)/g, '\\)')
// Step 4: Replace placeholders with their JS equivalents
.replace(BACKSLASH_PLACEHOLDER_RE, '\\\\')
.replace(PLUS_PLACEHOLDER_RE, '+')
.replace(QUESTION_PLACEHOLDER_RE, '?')
.replace(PIPE_PLACEHOLDER_RE, '|')
.replace(LPAREN_PLACEHOLDER_RE, '(')
.replace(RPAREN_PLACEHOLDER_RE, ')')
}
// Unescape sed-specific escapes in replacement
// Convert \n to newline, & to $& (match), etc.
// Use a unique placeholder with random salt to prevent injection attacks
const salt = randomBytes(8).toString('hex')
const ESCAPED_AMP_PLACEHOLDER = `___ESCAPED_AMPERSAND_${salt}___`
const jsReplacement = sedInfo.replacement
// Unescape \/ to /
.replace(/\\\//g, '/')
// First escape \& to a placeholder
.replace(/\\&/g, ESCAPED_AMP_PLACEHOLDER)
// Convert & to $& (full match) - use $$& to get literal $& in output
.replace(/&/g, '$$&')
// Convert placeholder back to literal &
.replace(new RegExp(ESCAPED_AMP_PLACEHOLDER, 'g'), '&')
try {
const regex = new RegExp(jsPattern, regexFlags)
return content.replace(regex, jsReplacement)
} catch {
// If regex is invalid, return original content
return content
}
}