( filePath: string, query: string, projectNameTokens?: Set<string>, )
| 219 | * Higher score = more relevant path |
| 220 | */ |
| 221 | export function scorePathRelevance( |
| 222 | filePath: string, |
| 223 | query: string, |
| 224 | projectNameTokens?: Set<string>, |
| 225 | ): number { |
| 226 | const pathLower = filePath.toLowerCase(); |
| 227 | const fileName = path.basename(filePath).toLowerCase(); |
| 228 | const dirName = path.dirname(filePath).toLowerCase(); |
| 229 | let score = 0; |
| 230 | |
| 231 | // Score per original query WORD, not per sub-token. A single PascalCase word |
| 232 | // splits into many sub-tokens (a project name "SuperBizAgent" → |
| 233 | // superbizagent / super / biz / agent) that all match the SAME path segment, |
| 234 | // so summing per sub-token boosted that path 4× for one concept — enough to |
| 235 | // bury the rest of the query's stack (#720). A word matches a path level if |
| 236 | // ANY of its sub-tokens do, and counts ONCE; distinct words still each add. |
| 237 | // Split the ORIGINAL-case query into words; extractSearchTerms does the |
| 238 | // camelCase/snake split per word (so `getUserName` still matches a |
| 239 | // `get_user_name` path) — we just attribute each word's matches once. |
| 240 | const allWords = query.split(/\s+/).filter((w) => w.length > 0); |
| 241 | if (allWords.length === 0) return 0; |
| 242 | |
| 243 | // A query word that just names the PROJECT (its go.mod / package.json / repo |
| 244 | // name) carries no discriminative path signal — drop it so the rest of the |
| 245 | // query decides the ranking, instead of every file under a `<ProjectName>…/` |
| 246 | // tree winning on the project name alone (#720). Only when OTHER words remain, |
| 247 | // so a bare project-name query still scores on its path. |
| 248 | const words = |
| 249 | projectNameTokens && projectNameTokens.size > 0 |
| 250 | ? allWords.filter((w) => !projectNameTokens.has(normalizeNameToken(w))) |
| 251 | : allWords; |
| 252 | const scored = words.length > 0 ? words : allWords; |
| 253 | |
| 254 | for (const word of scored) { |
| 255 | // Use base terms only — stem variants inflate path scores by generating |
| 256 | // many near-duplicate terms that all match the same path segments. |
| 257 | const subtokens = extractSearchTerms(word, { stems: false }); |
| 258 | if (subtokens.length === 0) continue; |
| 259 | // Exact filename match (strongest) |
| 260 | if (subtokens.some((t) => fileName.includes(t))) score += 10; |
| 261 | // Directory match |
| 262 | if (subtokens.some((t) => dirName.includes(t))) score += 5; |
| 263 | // General path match |
| 264 | else if (subtokens.some((t) => pathLower.includes(t))) score += 3; |
| 265 | } |
| 266 | |
| 267 | // Deprioritize test files unless the query is explicitly about tests |
| 268 | const queryLower = query.toLowerCase(); |
| 269 | const isTestQuery = queryLower.includes('test') || queryLower.includes('spec'); |
| 270 | if (!isTestQuery && isTestFile(filePath)) { |
| 271 | score -= 15; |
| 272 | } |
| 273 | |
| 274 | return score; |
| 275 | } |
| 276 | |
| 277 | /** |
| 278 | * Check if a file path looks like a test file |
no test coverage detected