hub / github.com/colbymchenry/codegraph / scorePathRelevance

Function scorePathRelevance

src/search/query-utils.ts:221–275 · view source on GitHub ↗

(
  filePath: string,
  query: string,
  projectNameTokens?: Set<string>,
)

Source from the content-addressed store, hash-verified

219	* Higher score = more relevant path
220	*/
221	export function scorePathRelevance(
222	filePath: string,
223	query: string,
224	projectNameTokens?: Set<string>,
225	): number {
226	const pathLower = filePath.toLowerCase();
227	const fileName = path.basename(filePath).toLowerCase();
228	const dirName = path.dirname(filePath).toLowerCase();
229	let score = 0;
230
231	// Score per original query WORD, not per sub-token. A single PascalCase word
232	// splits into many sub-tokens (a project name "SuperBizAgent" →
233	// superbizagent / super / biz / agent) that all match the SAME path segment,
234	// so summing per sub-token boosted that path 4× for one concept — enough to
235	// bury the rest of the query's stack (#720). A word matches a path level if
236	// ANY of its sub-tokens do, and counts ONCE; distinct words still each add.
237	// Split the ORIGINAL-case query into words; extractSearchTerms does the
238	// camelCase/snake split per word (so `getUserName` still matches a
239	// `get_user_name` path) — we just attribute each word's matches once.
240	const allWords = query.split(/\s+/).filter((w) => w.length > 0);
241	if (allWords.length === 0) return 0;
242
243	// A query word that just names the PROJECT (its go.mod / package.json / repo
244	// name) carries no discriminative path signal — drop it so the rest of the
245	// query decides the ranking, instead of every file under a `<ProjectName>…/`
246	// tree winning on the project name alone (#720). Only when OTHER words remain,
247	// so a bare project-name query still scores on its path.
248	const words =
249	projectNameTokens && projectNameTokens.size > 0
250	? allWords.filter((w) => !projectNameTokens.has(normalizeNameToken(w)))
251	: allWords;
252	const scored = words.length > 0 ? words : allWords;
253
254	for (const word of scored) {
255	// Use base terms only — stem variants inflate path scores by generating
256	// many near-duplicate terms that all match the same path segments.
257	const subtokens = extractSearchTerms(word, { stems: false });
258	if (subtokens.length === 0) continue;
259	// Exact filename match (strongest)
260	if (subtokens.some((t) => fileName.includes(t))) score += 10;
261	// Directory match
262	if (subtokens.some((t) => dirName.includes(t))) score += 5;
263	// General path match
264	else if (subtokens.some((t) => pathLower.includes(t))) score += 3;
265	}
266
267	// Deprioritize test files unless the query is explicitly about tests
268	const queryLower = query.toLowerCase();
269	const isTestQuery = queryLower.includes('test') \|\| queryLower.includes('spec');
270	if (!isTestQuery && isTestFile(filePath)) {
271	score -= 15;
272	}
273
274	return score;
275	}
276
277	/**
278	* Check if a file path looks like a test file

Callers 3

findRelevantContextMethod · 0.90

searchNodesMethod · 0.90

context-ranking.test.tsFile · 0.90

Calls 4

normalizeNameTokenFunction · 0.85

extractSearchTermsFunction · 0.85

hasMethod · 0.80

isTestFileFunction · 0.70

Tested by

no test coverage detected