MCPcopy
hub / github.com/garrytan/gstack / preparePages

Function preparePages

bin/gstack-memory-ingest.ts:1111–1201  ·  view source on GitHub ↗

* Prepare phase: walk sources, apply incremental + optional-secret-scan filters, * parse transcripts/artifacts into PageRecord, render bodies with * frontmatter. Returns the PreparedPage[] to stage + counts of files * filtered at each gate. * * Secret scanning policy (post 2026-05-10 perf revie

(
  args: CliArgs,
  ctx: WalkContext,
  state: IngestState,
)

Source from the content-addressed store, hash-verified

1109 * for users who want belt-and-suspenders.
1110 */
1111function preparePages(
1112 args: CliArgs,
1113 ctx: WalkContext,
1114 state: IngestState,
1115): {
1116 prepared: PreparedPage[];
1117 skippedSecret: number;
1118 skippedDedup: number;
1119 skippedUnattributed: number;
1120 parseFailed: number;
1121 partialPages: number;
1122} {
1123 const prepared: PreparedPage[] = [];
1124 let skippedSecret = 0;
1125 let skippedDedup = 0;
1126 let skippedUnattributed = 0;
1127 let parseFailed = 0;
1128 let partialPages = 0;
1129
1130 for (const { path, type } of walkAllSources(ctx)) {
1131 if (args.limit !== null && prepared.length >= args.limit) break;
1132
1133 if (args.mode === "incremental" && !fileChangedSinceState(path, state)) {
1134 skippedDedup++;
1135 continue;
1136 }
1137
1138 // Optional belt-and-suspenders: when --scan-secrets is set, scan the
1139 // source file with gitleaks and skip dirty ones. Off by default
1140 // because gstack-brain-sync already gates the cross-machine boundary
1141 // and per-file gitleaks costs ~256ms/file (4-8 min on a real corpus).
1142 if (args.scanSecrets) {
1143 const scan = secretScanFile(path);
1144 if (scan.scanner === "gitleaks" && scan.findings.length > 0) {
1145 skippedSecret++;
1146 if (!args.quiet) {
1147 console.error(
1148 `[secret-scan match] ${path} (${scan.findings.length} finding${
1149 scan.findings.length === 1 ? "" : "s"
1150 }); skipped`,
1151 );
1152 }
1153 continue;
1154 }
1155 }
1156
1157 let page: PageRecord;
1158 try {
1159 if (type === "transcript") {
1160 const session = parseTranscriptJsonl(path);
1161 if (!session) {
1162 parseFailed++;
1163 continue;
1164 }
1165 if (!args.includeUnattributed && !session.cwd) {
1166 skippedUnattributed++;
1167 continue;
1168 }

Callers 1

ingestPassFunction · 0.85

Calls 8

secretScanFileFunction · 0.90
walkAllSourcesFunction · 0.85
fileChangedSinceStateFunction · 0.85
parseTranscriptJsonlFunction · 0.85
buildTranscriptPageFunction · 0.85
buildArtifactPageFunction · 0.85
renderPageBodyFunction · 0.85
pushMethod · 0.45

Tested by

no test coverage detected