hub / github.com/Doorman11991/smallcode / _parseEntry

Method _parseEntry

src/knowledge/loader.js:107–159 · view source on GitHub ↗

* Parse a single knowledge file's metadata without loading its full content. * We still read the file once to extract heading + front-matter — these are * tiny so it's cheap. Full content is loaded later only for selected files.

(filePath, rootDir)

Source from the content-addressed store, hash-verified

105	* tiny so it's cheap. Full content is loaded later only for selected files.
106	*/
107	_parseEntry(filePath, rootDir) {
108	let stat;
109	try { stat = fs.statSync(filePath); } catch { return null; }
110	// Hard cap on file size — we don't want a rogue 50MB file blowing up memory
111	if (stat.size > 100 * 1024) return null;
112
113	let head;
114	try { head = fs.readFileSync(filePath, 'utf-8'); }
115	catch { return null; }
116
117	const rel = path.relative(rootDir, filePath);
118	const name = path.basename(filePath, path.extname(filePath));
119	// Slug components from path become keywords (binary-search.md → ["binary","search"])
120	const pathTokens = rel.toLowerCase().split(/[\\/_\-.\s]+/).filter(Boolean);
121
122	// Optional YAML-ish front-matter: --- ... ---
123	let keywords = [];
124	let heading = '';
125	let body = head;
126	const fmMatch = head.match(/^---\n([\s\S]?)\n---\n([\s\S])$/);
127	if (fmMatch) {
128	const fm = fmMatch[1];
129	body = fmMatch[2];
130	const kwMatch = fm.match(/keywords?:\s*(.+)/i);
131	if (kwMatch) {
132	keywords = kwMatch[1]
133	.replace(/^\[\|\]$/g, '')
134	.split(/[,\s]+/)
135	.map(s => s.trim().toLowerCase().replace(/^['"]\|['"]$/g, ''))
136	.filter(Boolean);
137	}
138	}
139	const hMatch = body.match(/^#\s+(.+)/m);
140	if (hMatch) heading = hMatch[1].trim();
141
142	// Combine all keyword sources
143	const allKeywords = new Set([
144	...pathTokens,
145	...keywords,
146	...heading.toLowerCase().split(/\W+/).filter(t => t.length > 2),
147	name.toLowerCase(),
148	]);
149
150	return {
151	path: filePath,
152	relPath: rel,
153	name,
154	heading,
155	keywords: [...allKeywords],
156	size: stat.size,
157	_bodyCache: body, // already-read; keep for tiny files
158	};
159	}
160
161	/**
162	* Tokenize a query into normalized lowercase words ≥ 3 chars.

Callers 1

walkMethod · 0.95

Calls

no outgoing calls

Tested by

no test coverage detected