MCPcopy
hub / github.com/colbymchenry/codegraph / scorePathRelevance

Function scorePathRelevance

src/search/query-utils.ts:221–275  ·  view source on GitHub ↗
(
  filePath: string,
  query: string,
  projectNameTokens?: Set<string>,
)

Source from the content-addressed store, hash-verified

219 * Higher score = more relevant path
220 */
221export function scorePathRelevance(
222 filePath: string,
223 query: string,
224 projectNameTokens?: Set<string>,
225): number {
226 const pathLower = filePath.toLowerCase();
227 const fileName = path.basename(filePath).toLowerCase();
228 const dirName = path.dirname(filePath).toLowerCase();
229 let score = 0;
230
231 // Score per original query WORD, not per sub-token. A single PascalCase word
232 // splits into many sub-tokens (a project name "SuperBizAgent" →
233 // superbizagent / super / biz / agent) that all match the SAME path segment,
234 // so summing per sub-token boosted that path 4× for one concept — enough to
235 // bury the rest of the query's stack (#720). A word matches a path level if
236 // ANY of its sub-tokens do, and counts ONCE; distinct words still each add.
237 // Split the ORIGINAL-case query into words; extractSearchTerms does the
238 // camelCase/snake split per word (so `getUserName` still matches a
239 // `get_user_name` path) — we just attribute each word's matches once.
240 const allWords = query.split(/\s+/).filter((w) => w.length > 0);
241 if (allWords.length === 0) return 0;
242
243 // A query word that just names the PROJECT (its go.mod / package.json / repo
244 // name) carries no discriminative path signal — drop it so the rest of the
245 // query decides the ranking, instead of every file under a `<ProjectName>…/`
246 // tree winning on the project name alone (#720). Only when OTHER words remain,
247 // so a bare project-name query still scores on its path.
248 const words =
249 projectNameTokens && projectNameTokens.size > 0
250 ? allWords.filter((w) => !projectNameTokens.has(normalizeNameToken(w)))
251 : allWords;
252 const scored = words.length > 0 ? words : allWords;
253
254 for (const word of scored) {
255 // Use base terms only — stem variants inflate path scores by generating
256 // many near-duplicate terms that all match the same path segments.
257 const subtokens = extractSearchTerms(word, { stems: false });
258 if (subtokens.length === 0) continue;
259 // Exact filename match (strongest)
260 if (subtokens.some((t) => fileName.includes(t))) score += 10;
261 // Directory match
262 if (subtokens.some((t) => dirName.includes(t))) score += 5;
263 // General path match
264 else if (subtokens.some((t) => pathLower.includes(t))) score += 3;
265 }
266
267 // Deprioritize test files unless the query is explicitly about tests
268 const queryLower = query.toLowerCase();
269 const isTestQuery = queryLower.includes('test') || queryLower.includes('spec');
270 if (!isTestQuery && isTestFile(filePath)) {
271 score -= 15;
272 }
273
274 return score;
275}
276
277/**
278 * Check if a file path looks like a test file

Callers 3

findRelevantContextMethod · 0.90
searchNodesMethod · 0.90

Calls 4

normalizeNameTokenFunction · 0.85
extractSearchTermsFunction · 0.85
hasMethod · 0.80
isTestFileFunction · 0.70

Tested by

no test coverage detected