| 252 | // ── The scan ────────────────────────────────────────────────────────────────── |
| 253 | |
| 254 | export function scan(input: string, opts: ScanOptions = {}): ScanResult { |
| 255 | const repoVisibility: RepoVisibility = opts.repoVisibility ?? "unknown"; |
| 256 | // #1824: ?? only catches null/undefined, not NaN or <= 0. A bad value |
| 257 | // (NaN from a malformed --max-bytes, or a negative) would make `byteLen > |
| 258 | // maxBytes` always false and silently disable the fail-closed oversize guard. |
| 259 | // Guardrail: any non-finite or non-positive value falls back to the default |
| 260 | // cap. The CLI is the layer that rejects bad args; this is belt-and-suspenders |
| 261 | // so the engine never silently runs uncapped. |
| 262 | const maxBytes = |
| 263 | Number.isFinite(opts.maxBytes) && (opts.maxBytes as number) > 0 |
| 264 | ? (opts.maxBytes as number) |
| 265 | : DEFAULT_MAX_BYTES; |
| 266 | |
| 267 | // Fail CLOSED on oversize input. Check byte length BEFORE heavy work. |
| 268 | const byteLen = Buffer.byteLength(input, "utf8"); |
| 269 | if (byteLen > maxBytes) { |
| 270 | const finding: Finding = { |
| 271 | id: "engine.input_too_large", |
| 272 | tier: "HIGH", |
| 273 | severity: "HIGH", |
| 274 | category: "secret", |
| 275 | description: `Input too large to scan safely (${byteLen} > ${maxBytes} bytes) — blocking fail-closed`, |
| 276 | line: 1, |
| 277 | col: 1, |
| 278 | preview: "", |
| 279 | autoRedactable: false, |
| 280 | repoVisibility, |
| 281 | }; |
| 282 | return { |
| 283 | findings: [finding], |
| 284 | counts: { HIGH: 1, MEDIUM: 0, LOW: 0, WARN: 0 }, |
| 285 | repoVisibility, |
| 286 | oversize: true, |
| 287 | }; |
| 288 | } |
| 289 | |
| 290 | const { normalized, map } = normalizeWithMap(input); |
| 291 | const fenceRanges = toolFenceRanges(normalized); |
| 292 | const allow = new Set(opts.allowlist ?? []); |
| 293 | |
| 294 | const findings: Finding[] = []; |
| 295 | // Dedup by (id, original-offset) so overlapping global matches don't double-count. |
| 296 | const seen = new Set<string>(); |
| 297 | |
| 298 | for (const pat of PATTERNS) { |
| 299 | const re = new RegExp(pat.regex.source, withFlags(pat.regex.flags)); |
| 300 | let m: RegExpExecArray | null; |
| 301 | while ((m = re.exec(normalized)) !== null) { |
| 302 | // Guard against zero-width matches looping forever. |
| 303 | if (m.index === re.lastIndex) re.lastIndex++; |
| 304 | |
| 305 | const span = m[1] ?? m[0]; |
| 306 | const spanStartInMatch = m[1] !== undefined ? m[0].indexOf(m[1]) : 0; |
| 307 | const normOffset = m.index + Math.max(0, spanStartInMatch); |
| 308 | |
| 309 | // Per-span placeholder suppression. |
| 310 | if (isPlaceholderSpan(span)) continue; |
| 311 | if (allow.has(span)) continue; |