* Retrieve candidate chunks for grounding, mirroring the site search route's * hybrid strategy: Postgres full-text keyword search for every locale, plus * vector similarity (thresholded) for English — fused by reciprocal rank so a * page found by either signal can ground the answer.
(query: string, locale: string)
| 214 | * page found by either signal can ground the answer. |
| 215 | */ |
| 216 | async function searchDocs(query: string, locale: string) { |
| 217 | const tsConfig = TS_CONFIG[locale] ?? 'simple' |
| 218 | |
| 219 | // Each retrieval path is best-effort and independent: a failure in one still |
| 220 | // lets the other ground the answer (both empty just yields no grounding). |
| 221 | let keywordRows: SearchRow[] = [] |
| 222 | try { |
| 223 | keywordRows = await db |
| 224 | .select(SEARCH_COLUMNS) |
| 225 | .from(docsEmbeddings) |
| 226 | .where( |
| 227 | sql`${docsEmbeddings.chunkTextTsv} @@ plainto_tsquery(${tsConfig}, ${query}) and ${localeFilter(locale)}` |
| 228 | ) |
| 229 | .orderBy( |
| 230 | sql`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery(${tsConfig}, ${query})) DESC` |
| 231 | ) |
| 232 | .limit(SEARCH_CANDIDATES) |
| 233 | } catch (error) { |
| 234 | console.error('Ask AI keyword search failed:', error) |
| 235 | } |
| 236 | |
| 237 | let vectorRows: SearchRow[] = [] |
| 238 | if (locale === DEFAULT_LOCALE) { |
| 239 | // Vector retrieval (embedding call + pgvector query) is best-effort: if it |
| 240 | // fails, fall back to the keyword rows already fetched rather than losing all |
| 241 | // grounding for the turn. |
| 242 | try { |
| 243 | const embedding = await generateSearchEmbedding(query) |
| 244 | const vectorLiteral = JSON.stringify(embedding) |
| 245 | vectorRows = await db |
| 246 | .select(SEARCH_COLUMNS) |
| 247 | .from(docsEmbeddings) |
| 248 | .where( |
| 249 | sql`1 - (${docsEmbeddings.embedding} <=> ${vectorLiteral}::vector) >= ${SIMILARITY_THRESHOLD} and ${localeFilter(locale)}` |
| 250 | ) |
| 251 | .orderBy(sql`${docsEmbeddings.embedding} <=> ${vectorLiteral}::vector`) |
| 252 | .limit(SEARCH_CANDIDATES) |
| 253 | } catch (error) { |
| 254 | console.error('Ask AI vector search failed; using keyword results only:', error) |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | // Reciprocal rank fusion across the two rankings, deduped by chunk. |
| 259 | const scores = new Map<string, number>() |
| 260 | const rowById = new Map<string, SearchRow>() |
| 261 | for (const list of [vectorRows, keywordRows]) { |
| 262 | list.forEach((row, index) => { |
| 263 | scores.set(row.chunkId, (scores.get(row.chunkId) ?? 0) + 1 / (RRF_K + index + 1)) |
| 264 | if (!rowById.has(row.chunkId)) rowById.set(row.chunkId, row) |
| 265 | }) |
| 266 | } |
| 267 | |
| 268 | return [...rowById.values()] |
| 269 | .sort((a, b) => (scores.get(b.chunkId) ?? 0) - (scores.get(a.chunkId) ?? 0)) |
| 270 | .slice(0, SEARCH_LIMIT) |
| 271 | .map((row) => ({ |
| 272 | title: row.title, |
| 273 | url: row.url, |
no test coverage detected