secretScanner.ts
services/teamMemorySync/secretScanner.ts
325
Lines
9458
Bytes
4
Exports
1
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with memory-layers, integrations. It contains 325 lines, 1 detected imports, and 4 detected exports.
Important relationships
Detected exports
SecretMatchscanForSecretsgetSecretLabelredactSecrets
Keywords
sourcegitleaksrulea-za-z0-9contenta-z0-9a-f0-9rulesflagsruleid
Detected imports
../../utils/stringUtils.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
/**
* Client-side secret scanner for team memory (PSR M22174).
*
* Scans content for credentials before upload so secrets never leave the
* user's machine. Uses a curated subset of high-confidence rules from
* gitleaks (https://github.com/gitleaks/gitleaks, MIT license) — only
* rules with distinctive prefixes that have near-zero false-positive
* rates are included. Generic keyword-context rules are omitted.
*
* Rule IDs and regexes sourced directly from the public gitleaks config:
* https://github.com/gitleaks/gitleaks/blob/master/config/gitleaks.toml
*
* JS regex notes:
* - gitleaks uses Go regex; inline (?i) and mode groups (?-i:...) are
* not portable to JS. Affected rules are rewritten with explicit
* character classes ([a-zA-Z0-9] instead of (?i)[a-z0-9]).
* - Trailing boundary alternations like (?:[\x60'"\s;]|\\[nr]|$) from
* Go regex are kept (JS $ matches end-of-string in default mode).
*/
import { capitalize } from '../../utils/stringUtils.js'
type SecretRule = {
/** Gitleaks rule ID (kebab-case), used in labels and analytics */
id: string
/** Regex source, lazily compiled on first scan */
source: string
/** Optional JS regex flags (most rules are case-sensitive by default) */
flags?: string
}
export type SecretMatch = {
/** Gitleaks rule ID that matched (e.g., "github-pat", "aws-access-token") */
ruleId: string
/** Human-readable label derived from the rule ID */
label: string
}
// ─── Curated rules ──────────────────────────────────────────────
// High-confidence patterns from gitleaks with distinctive prefixes.
// Ordered roughly by likelihood of appearing in dev-team content.
// Anthropic API key prefix, assembled at runtime so the literal byte
// sequence isn't present in the external bundle (excluded-strings check).
// join() is not constant-folded by the minifier.
const ANT_KEY_PFX = ['sk', 'ant', 'api'].join('-')
const SECRET_RULES: SecretRule[] = [
// — Cloud providers —
{
id: 'aws-access-token',
source: '\\b((?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]{16})\\b',
},
{
id: 'gcp-api-key',
source: '\\b(AIza[\\w-]{35})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'azure-ad-client-secret',
source:
'(?:^|[\\\\\'"\\x60\\s>=:(,)])([a-zA-Z0-9_~.]{3}\\dQ~[a-zA-Z0-9_~.-]{31,34})(?:$|[\\\\\'"\\x60\\s<),])',
},
{
id: 'digitalocean-pat',
source: '\\b(dop_v1_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'digitalocean-access-token',
source: '\\b(doo_v1_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
// — AI APIs —
{
id: 'anthropic-api-key',
source: `\\b(${ANT_KEY_PFX}03-[a-zA-Z0-9_\\-]{93}AA)(?:[\\x60'"\\s;]|\\\\[nr]|$)`,
},
{
id: 'anthropic-admin-api-key',
source:
'\\b(sk-ant-admin01-[a-zA-Z0-9_\\-]{93}AA)(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'openai-api-key',
source:
'\\b(sk-(?:proj|svcacct|admin)-(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})T3BlbkFJ(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})\\b|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'huggingface-access-token',
// gitleaks: hf_(?i:[a-z]{34}) → JS: hf_[a-zA-Z]{34}
source: '\\b(hf_[a-zA-Z]{34})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
// — Version control —
{
id: 'github-pat',
source: 'ghp_[0-9a-zA-Z]{36}',
},
{
id: 'github-fine-grained-pat',
source: 'github_pat_\\w{82}',
},
{
id: 'github-app-token',
source: '(?:ghu|ghs)_[0-9a-zA-Z]{36}',
},
{
id: 'github-oauth',
source: 'gho_[0-9a-zA-Z]{36}',
},
{
id: 'github-refresh-token',
source: 'ghr_[0-9a-zA-Z]{36}',
},
{
id: 'gitlab-pat',
source: 'glpat-[\\w-]{20}',
},
{
id: 'gitlab-deploy-token',
source: 'gldt-[0-9a-zA-Z_\\-]{20}',
},
// — Communication —
{
id: 'slack-bot-token',
source: 'xoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*',
},
{
id: 'slack-user-token',
source: 'xox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}',
},
{
id: 'slack-app-token',
source: 'xapp-\\d-[A-Z0-9]+-\\d+-[a-z0-9]+',
flags: 'i',
},
{
id: 'twilio-api-key',
source: 'SK[0-9a-fA-F]{32}',
},
{
id: 'sendgrid-api-token',
// gitleaks: SG\.(?i)[a-z0-9=_\-\.]{66} → JS: case-insensitive via flag
source: '\\b(SG\\.[a-zA-Z0-9=_\\-.]{66})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
// — Dev tooling —
{
id: 'npm-access-token',
source: '\\b(npm_[a-zA-Z0-9]{36})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'pypi-upload-token',
source: 'pypi-AgEIcHlwaS5vcmc[\\w-]{50,1000}',
},
{
id: 'databricks-api-token',
source: '\\b(dapi[a-f0-9]{32}(?:-\\d)?)(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'hashicorp-tf-api-token',
// gitleaks: (?i)[a-z0-9]{14}\.(?-i:atlasv1)\.[a-z0-9\-_=]{60,70}
// → JS: case-insensitive hex+alnum prefix, literal "atlasv1", case-insensitive suffix
source: '[a-zA-Z0-9]{14}\\.atlasv1\\.[a-zA-Z0-9\\-_=]{60,70}',
},
{
id: 'pulumi-api-token',
source: '\\b(pul-[a-f0-9]{40})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'postman-api-token',
// gitleaks: PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34} → JS: use [a-fA-F0-9]
source:
'\\b(PMAK-[a-fA-F0-9]{24}-[a-fA-F0-9]{34})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
// — Observability —
{
id: 'grafana-api-key',
source:
'\\b(eyJrIjoi[A-Za-z0-9+/]{70,400}={0,3})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'grafana-cloud-api-token',
source: '\\b(glc_[A-Za-z0-9+/]{32,400}={0,3})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'grafana-service-account-token',
source:
'\\b(glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'sentry-user-token',
source: '\\b(sntryu_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'sentry-org-token',
source:
'\\bsntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}(?:LCJyZWdpb25fdXJs|InJlZ2lvbl91cmwi|cmVnaW9uX3VybCI6)[a-zA-Z0-9+/]{10,200}={0,2}_[a-zA-Z0-9+/]{43}',
},
// — Payment / commerce —
{
id: 'stripe-access-token',
source:
'\\b((?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'shopify-access-token',
source: 'shpat_[a-fA-F0-9]{32}',
},
{
id: 'shopify-shared-secret',
source: 'shpss_[a-fA-F0-9]{32}',
},
// — Crypto —
{
id: 'private-key',
source:
'-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----[\\s\\S-]{64,}?-----END[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----',
flags: 'i',
},
]
// Lazily compiled pattern cache — compile once on first scan.
let compiledRules: Array<{ id: string; re: RegExp }> | null = null
function getCompiledRules(): Array<{ id: string; re: RegExp }> {
if (compiledRules === null) {
compiledRules = SECRET_RULES.map(r => ({
id: r.id,
re: new RegExp(r.source, r.flags),
}))
}
return compiledRules
}
/**
* Convert a gitleaks rule ID (kebab-case) to a human-readable label.
* e.g., "github-pat" → "GitHub PAT", "aws-access-token" → "AWS Access Token"
*/
function ruleIdToLabel(ruleId: string): string {
// Words where the canonical capitalization differs from title case
const specialCase: Record<string, string> = {
aws: 'AWS',
gcp: 'GCP',
api: 'API',
pat: 'PAT',
ad: 'AD',
tf: 'TF',
oauth: 'OAuth',
npm: 'NPM',
pypi: 'PyPI',
jwt: 'JWT',
github: 'GitHub',
gitlab: 'GitLab',
openai: 'OpenAI',
digitalocean: 'DigitalOcean',
huggingface: 'HuggingFace',
hashicorp: 'HashiCorp',
sendgrid: 'SendGrid',
}
return ruleId
.split('-')
.map(part => specialCase[part] ?? capitalize(part))
.join(' ')
}
/**
* Scan a string for potential secrets.
*
* Returns one match per rule that fired (deduplicated by rule ID). The
* actual matched text is intentionally NOT returned — we never log or
* display secret values.
*/
export function scanForSecrets(content: string): SecretMatch[] {
const matches: SecretMatch[] = []
const seen = new Set<string>()
for (const rule of getCompiledRules()) {
if (seen.has(rule.id)) {
continue
}
if (rule.re.test(content)) {
seen.add(rule.id)
matches.push({
ruleId: rule.id,
label: ruleIdToLabel(rule.id),
})
}
}
return matches
}
/**
* Get a human-readable label for a gitleaks rule ID.
* Falls back to kebab-to-Title conversion for unknown IDs.
*/
export function getSecretLabel(ruleId: string): string {
return ruleIdToLabel(ruleId)
}
/**
* Redact any matched secrets in-place with [REDACTED].
* Unlike scanForSecrets, this returns the content with spans replaced
* so the surrounding text can still be written to disk safely.
*/
let redactRules: RegExp[] | null = null
export function redactSecrets(content: string): string {
redactRules ??= SECRET_RULES.map(
r => new RegExp(r.source, (r.flags ?? '').replace('g', '') + 'g'),
)
for (const re of redactRules) {
// Replace only the captured group, not the full match — patterns include
// boundary chars (space, quote, ;) outside the group that must survive.
content = content.replace(re, (match, g1) =>
typeof g1 === 'string' ? match.replace(g1, '[REDACTED]') : '[REDACTED]',
)
}
return content
}