MCPcopy
hub / github.com/colbymchenry/codegraph / searchNodesFuzzy

Method searchNodesFuzzy

src/db/queries.ts:930–984  ·  view source on GitHub ↗

* Fuzzy fallback: when zero FTS/LIKE hits, try an edit-distance * sweep over the distinct symbol-name set. Caps `maxDist` at 2 so * `getUssr` finds `getUser` but `process` doesn't match `prosody`. * Bounded edit distance keeps each comparison cheap; the per-query * scan is O(distinct-nam

(
    text: string,
    options: { kinds?: NodeKind[]; languages?: Language[]; limit: number }
  )

Source from the content-addressed store, hash-verified

928 * node count on any real codebase.
929 */
930 private searchNodesFuzzy(
931 text: string,
932 options: { kinds?: NodeKind[]; languages?: Language[]; limit: number }
933 ): SearchResult[] {
934 const { kinds, languages, limit } = options;
935 const lowered = text.toLowerCase();
936 const maxDist = lowered.length <= 4 ? 1 : 2;
937
938 // Pull the distinct name list once. The set is cached on QueryBuilder
939 // by getAllNodeNames(); even on a 200k-node project the distinct
940 // name set is typically O(10k) because most names repeat. The
941 // candidate-cap below bounds memory regardless.
942 const allNames = this.getAllNodeNames();
943 const candidates: Array<{ name: string; dist: number }> = [];
944 for (const name of allNames) {
945 const dist = boundedEditDistance(name.toLowerCase(), lowered, maxDist);
946 if (dist <= maxDist) candidates.push({ name, dist });
947 }
948 candidates.sort((a, b) => a.dist - b.dist);
949
950 // Cap the per-name follow-up queries. Each survivor triggers a
951 // separate `SELECT * FROM nodes WHERE name = ?`; without this cap
952 // a project with many similar names (`getUser1`, `getUser2`...)
953 // could fan out far beyond `limit` queries before the inner-loop
954 // limit kicks in.
955 const FUZZY_FOLLOWUP_CAP = Math.max(limit * 2, 50);
956 const cappedCandidates = candidates.slice(0, FUZZY_FOLLOWUP_CAP);
957
958 const results: SearchResult[] = [];
959 const seen = new Set<string>();
960 for (const c of cappedCandidates) {
961 if (results.length >= limit) break;
962 let sql = 'SELECT * FROM nodes WHERE name = ?';
963 const params: (string | number)[] = [c.name];
964 if (kinds && kinds.length > 0) {
965 sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
966 params.push(...kinds);
967 }
968 if (languages && languages.length > 0) {
969 sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
970 params.push(...languages);
971 }
972 sql += ' LIMIT 5';
973 const rows = this.db.prepare(sql).all(...params) as NodeRow[];
974 for (const row of rows) {
975 if (seen.has(row.id)) continue;
976 seen.add(row.id);
977 // Lower the score for each edit step away from the query so
978 // exact-match fallbacks (dist 0) outrank dist-2 typos.
979 results.push({ node: rowToNode(row), score: 1 / (1 + c.dist) });
980 if (results.length >= limit) break;
981 }
982 }
983 return results;
984 }
985
986 /**
987 * FTS5 search with prefix matching

Callers 1

searchNodesMethod · 0.95

Calls 7

getAllNodeNamesMethod · 0.95
boundedEditDistanceFunction · 0.90
rowToNodeFunction · 0.85
joinMethod · 0.80
hasMethod · 0.80
allMethod · 0.65
prepareMethod · 0.65

Tested by

no test coverage detected