* Parses raw memory file content into a MemoryFileInfo. Pure function — no I/O. * * When includeBasePath is given, @include paths are resolved in the same lex * pass and returned alongside the parsed file (so processMemoryFile doesn't * need to lex the same content a second time).
( rawContent: string, filePath: string, type: MemoryType, includeBasePath?: string, )
| 340 | * need to lex the same content a second time). |
| 341 | */ |
| 342 | function parseMemoryFileContent( |
| 343 | rawContent: string, |
| 344 | filePath: string, |
| 345 | type: MemoryType, |
| 346 | includeBasePath?: string, |
| 347 | ): { info: MemoryFileInfo | null; includePaths: string[] } { |
| 348 | // Skip non-text files to prevent loading binary data (images, PDFs, etc.) into memory |
| 349 | const ext = extname(filePath).toLowerCase() |
| 350 | if (ext && !TEXT_FILE_EXTENSIONS.has(ext)) { |
| 351 | logForDebugging(`Skipping non-text file in @include: ${filePath}`) |
| 352 | return { info: null, includePaths: [] } |
| 353 | } |
| 354 | |
| 355 | const { content: withoutFrontmatter, paths } = |
| 356 | parseFrontmatterPaths(rawContent) |
| 357 | |
| 358 | // Lex once so strip and @include-extract share the same tokens. gfm:false |
| 359 | // is required by extract (so ~/path doesn't tokenize as strikethrough) and |
| 360 | // doesn't affect strip (html blocks are a CommonMark rule). |
| 361 | const hasComment = withoutFrontmatter.includes('<!--') |
| 362 | const tokens = |
| 363 | hasComment || includeBasePath !== undefined |
| 364 | ? new Lexer({ gfm: false }).lex(withoutFrontmatter) |
| 365 | : undefined |
| 366 | |
| 367 | // Only rebuild via tokens when a comment actually needs stripping — |
| 368 | // marked normalises \r\n during lex, so round-tripping a CRLF file |
| 369 | // through token.raw would spuriously flip contentDiffersFromDisk. |
| 370 | const strippedContent = |
| 371 | hasComment && tokens |
| 372 | ? stripHtmlCommentsFromTokens(tokens).content |
| 373 | : withoutFrontmatter |
| 374 | |
| 375 | const includePaths = |
| 376 | tokens && includeBasePath !== undefined |
| 377 | ? extractIncludePathsFromTokens(tokens, includeBasePath) |
| 378 | : [] |
| 379 | |
| 380 | // Truncate MEMORY.md entrypoints to the line AND byte caps |
| 381 | let finalContent = strippedContent |
| 382 | if (type === 'AutoMem' || type === 'TeamMem') { |
| 383 | finalContent = truncateEntrypointContent(strippedContent).content |
| 384 | } |
| 385 | |
| 386 | // Covers frontmatter strip, HTML comment strip, and MEMORY.md truncation |
| 387 | const contentDiffersFromDisk = finalContent !== rawContent |
| 388 | return { |
| 389 | info: { |
| 390 | path: filePath, |
| 391 | type, |
| 392 | content: finalContent, |
| 393 | globs: paths, |
| 394 | contentDiffersFromDisk, |
| 395 | rawContent: contentDiffersFromDisk ? rawContent : undefined, |
| 396 | }, |
| 397 | includePaths, |
| 398 | } |
| 399 | } |
no test coverage detected