MCPcopy
hub / github.com/Doorman11991/smallcode / _parseEntry

Method _parseEntry

src/knowledge/loader.js:107–159  ·  view source on GitHub ↗

* Parse a single knowledge file's metadata without loading its full content. * We still read the file once to extract heading + front-matter — these are * tiny so it's cheap. Full content is loaded later only for selected files.

(filePath, rootDir)

Source from the content-addressed store, hash-verified

105 * tiny so it's cheap. Full content is loaded later only for selected files.
106 */
107 _parseEntry(filePath, rootDir) {
108 let stat;
109 try { stat = fs.statSync(filePath); } catch { return null; }
110 // Hard cap on file size — we don't want a rogue 50MB file blowing up memory
111 if (stat.size > 100 * 1024) return null;
112
113 let head;
114 try { head = fs.readFileSync(filePath, 'utf-8'); }
115 catch { return null; }
116
117 const rel = path.relative(rootDir, filePath);
118 const name = path.basename(filePath, path.extname(filePath));
119 // Slug components from path become keywords (binary-search.md → ["binary","search"])
120 const pathTokens = rel.toLowerCase().split(/[\\/_\-.\s]+/).filter(Boolean);
121
122 // Optional YAML-ish front-matter: --- ... ---
123 let keywords = [];
124 let heading = '';
125 let body = head;
126 const fmMatch = head.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
127 if (fmMatch) {
128 const fm = fmMatch[1];
129 body = fmMatch[2];
130 const kwMatch = fm.match(/keywords?:\s*(.+)/i);
131 if (kwMatch) {
132 keywords = kwMatch[1]
133 .replace(/^\[|\]$/g, '')
134 .split(/[,\s]+/)
135 .map(s => s.trim().toLowerCase().replace(/^['"]|['"]$/g, ''))
136 .filter(Boolean);
137 }
138 }
139 const hMatch = body.match(/^#\s+(.+)/m);
140 if (hMatch) heading = hMatch[1].trim();
141
142 // Combine all keyword sources
143 const allKeywords = new Set([
144 ...pathTokens,
145 ...keywords,
146 ...heading.toLowerCase().split(/\W+/).filter(t => t.length > 2),
147 name.toLowerCase(),
148 ]);
149
150 return {
151 path: filePath,
152 relPath: rel,
153 name,
154 heading,
155 keywords: [...allKeywords],
156 size: stat.size,
157 _bodyCache: body, // already-read; keep for tiny files
158 };
159 }
160
161 /**
162 * Tokenize a query into normalized lowercase words ≥ 3 chars.

Callers 1

walkMethod · 0.95

Calls

no outgoing calls

Tested by

no test coverage detected