MCPcopy Index your code
hub / github.com/simstudioai/sim / searchDocs

Function searchDocs

apps/docs/app/api/chat/route.ts:216–276  ·  view source on GitHub ↗

* Retrieve candidate chunks for grounding, mirroring the site search route's * hybrid strategy: Postgres full-text keyword search for every locale, plus * vector similarity (thresholded) for English — fused by reciprocal rank so a * page found by either signal can ground the answer.

(query: string, locale: string)

Source from the content-addressed store, hash-verified

214 * page found by either signal can ground the answer.
215 */
216async function searchDocs(query: string, locale: string) {
217 const tsConfig = TS_CONFIG[locale] ?? 'simple'
218
219 // Each retrieval path is best-effort and independent: a failure in one still
220 // lets the other ground the answer (both empty just yields no grounding).
221 let keywordRows: SearchRow[] = []
222 try {
223 keywordRows = await db
224 .select(SEARCH_COLUMNS)
225 .from(docsEmbeddings)
226 .where(
227 sql`${docsEmbeddings.chunkTextTsv} @@ plainto_tsquery(${tsConfig}, ${query}) and ${localeFilter(locale)}`
228 )
229 .orderBy(
230 sql`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery(${tsConfig}, ${query})) DESC`
231 )
232 .limit(SEARCH_CANDIDATES)
233 } catch (error) {
234 console.error('Ask AI keyword search failed:', error)
235 }
236
237 let vectorRows: SearchRow[] = []
238 if (locale === DEFAULT_LOCALE) {
239 // Vector retrieval (embedding call + pgvector query) is best-effort: if it
240 // fails, fall back to the keyword rows already fetched rather than losing all
241 // grounding for the turn.
242 try {
243 const embedding = await generateSearchEmbedding(query)
244 const vectorLiteral = JSON.stringify(embedding)
245 vectorRows = await db
246 .select(SEARCH_COLUMNS)
247 .from(docsEmbeddings)
248 .where(
249 sql`1 - (${docsEmbeddings.embedding} <=> ${vectorLiteral}::vector) >= ${SIMILARITY_THRESHOLD} and ${localeFilter(locale)}`
250 )
251 .orderBy(sql`${docsEmbeddings.embedding} <=> ${vectorLiteral}::vector`)
252 .limit(SEARCH_CANDIDATES)
253 } catch (error) {
254 console.error('Ask AI vector search failed; using keyword results only:', error)
255 }
256 }
257
258 // Reciprocal rank fusion across the two rankings, deduped by chunk.
259 const scores = new Map<string, number>()
260 const rowById = new Map<string, SearchRow>()
261 for (const list of [vectorRows, keywordRows]) {
262 list.forEach((row, index) => {
263 scores.set(row.chunkId, (scores.get(row.chunkId) ?? 0) + 1 / (RRF_K + index + 1))
264 if (!rowById.has(row.chunkId)) rowById.set(row.chunkId, row)
265 })
266 }
267
268 return [...rowById.values()]
269 .sort((a, b) => (scores.get(b.chunkId) ?? 0) - (scores.get(a.chunkId) ?? 0))
270 .slice(0, SEARCH_LIMIT)
271 .map((row) => ({
272 title: row.title,
273 url: row.url,

Callers 1

POSTFunction · 0.85

Calls 5

generateSearchEmbeddingFunction · 0.90
localeFilterFunction · 0.85
errorMethod · 0.80
setMethod · 0.65
getMethod · 0.65

Tested by

no test coverage detected