* Score a text against a set of query tokens. * Returns 0 if no tokens match, otherwise a positive score.
(text: string, tokens: string[])
| 7 | * Returns 0 if no tokens match, otherwise a positive score. |
| 8 | */ |
| 9 | function scoreText(text: string, tokens: string[]): number { |
| 10 | if (!text || tokens.length === 0) return 0; |
| 11 | const lower = text.toLowerCase(); |
| 12 | let score = 0; |
| 13 | |
| 14 | for (const token of tokens) { |
| 15 | const idx = lower.indexOf(token); |
| 16 | if (idx === -1) continue; |
| 17 | |
| 18 | // Base score per token |
| 19 | score += 1; |
| 20 | |
| 21 | // Bonus for word boundary match |
| 22 | const before = idx === 0 || /\W/.test(lower[idx - 1]); |
| 23 | const after = idx + token.length >= lower.length || /\W/.test(lower[idx + token.length]); |
| 24 | if (before && after) score += 0.5; |
| 25 | |
| 26 | // Bonus for more occurrences (capped) |
| 27 | const count = (lower.match(new RegExp(token.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g")) ?? []).length; |
| 28 | score += Math.min(count - 1, 3) * 0.2; |
| 29 | } |
| 30 | |
| 31 | // Penalty if not all tokens match |
| 32 | const matchedTokens = tokens.filter((t) => lower.includes(t)); |
| 33 | if (matchedTokens.length < tokens.length) { |
| 34 | score *= matchedTokens.length / tokens.length; |
| 35 | } |
| 36 | |
| 37 | return score; |
| 38 | } |
| 39 | |
| 40 | /** |
| 41 | * Extract plain-text content from a message for indexing. |