* Fuzzy fallback: when zero FTS/LIKE hits, try an edit-distance * sweep over the distinct symbol-name set. Caps `maxDist` at 2 so * `getUssr` finds `getUser` but `process` doesn't match `prosody`. * Bounded edit distance keeps each comparison cheap; the per-query * scan is O(distinct-nam
(
text: string,
options: { kinds?: NodeKind[]; languages?: Language[]; limit: number }
)
| 928 | * node count on any real codebase. |
| 929 | */ |
| 930 | private searchNodesFuzzy( |
| 931 | text: string, |
| 932 | options: { kinds?: NodeKind[]; languages?: Language[]; limit: number } |
| 933 | ): SearchResult[] { |
| 934 | const { kinds, languages, limit } = options; |
| 935 | const lowered = text.toLowerCase(); |
| 936 | const maxDist = lowered.length <= 4 ? 1 : 2; |
| 937 | |
| 938 | // Pull the distinct name list once. The set is cached on QueryBuilder |
| 939 | // by getAllNodeNames(); even on a 200k-node project the distinct |
| 940 | // name set is typically O(10k) because most names repeat. The |
| 941 | // candidate-cap below bounds memory regardless. |
| 942 | const allNames = this.getAllNodeNames(); |
| 943 | const candidates: Array<{ name: string; dist: number }> = []; |
| 944 | for (const name of allNames) { |
| 945 | const dist = boundedEditDistance(name.toLowerCase(), lowered, maxDist); |
| 946 | if (dist <= maxDist) candidates.push({ name, dist }); |
| 947 | } |
| 948 | candidates.sort((a, b) => a.dist - b.dist); |
| 949 | |
| 950 | // Cap the per-name follow-up queries. Each survivor triggers a |
| 951 | // separate `SELECT * FROM nodes WHERE name = ?`; without this cap |
| 952 | // a project with many similar names (`getUser1`, `getUser2`...) |
| 953 | // could fan out far beyond `limit` queries before the inner-loop |
| 954 | // limit kicks in. |
| 955 | const FUZZY_FOLLOWUP_CAP = Math.max(limit * 2, 50); |
| 956 | const cappedCandidates = candidates.slice(0, FUZZY_FOLLOWUP_CAP); |
| 957 | |
| 958 | const results: SearchResult[] = []; |
| 959 | const seen = new Set<string>(); |
| 960 | for (const c of cappedCandidates) { |
| 961 | if (results.length >= limit) break; |
| 962 | let sql = 'SELECT * FROM nodes WHERE name = ?'; |
| 963 | const params: (string | number)[] = [c.name]; |
| 964 | if (kinds && kinds.length > 0) { |
| 965 | sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`; |
| 966 | params.push(...kinds); |
| 967 | } |
| 968 | if (languages && languages.length > 0) { |
| 969 | sql += ` AND language IN (${languages.map(() => '?').join(',')})`; |
| 970 | params.push(...languages); |
| 971 | } |
| 972 | sql += ' LIMIT 5'; |
| 973 | const rows = this.db.prepare(sql).all(...params) as NodeRow[]; |
| 974 | for (const row of rows) { |
| 975 | if (seen.has(row.id)) continue; |
| 976 | seen.add(row.id); |
| 977 | // Lower the score for each edit step away from the query so |
| 978 | // exact-match fallbacks (dist 0) outrank dist-2 typos. |
| 979 | results.push({ node: rowToNode(row), score: 1 / (1 + c.dist) }); |
| 980 | if (results.length >= limit) break; |
| 981 | } |
| 982 | } |
| 983 | return results; |
| 984 | } |
| 985 | |
| 986 | /** |
| 987 | * FTS5 search with prefix matching |
no test coverage detected