| 1 | export function selectRelevantContent(content: string, query: string, maxLength = 2000): string { |
| 2 | const paragraphs = content.split('\n\n').filter(p => p.trim()) |
| 3 | |
| 4 | // Always include the first paragraph (introduction) |
| 5 | const intro = paragraphs.slice(0, 2).join('\n\n') |
| 6 | |
| 7 | // Extract keywords from the query (simple approach) |
| 8 | const keywords = query.toLowerCase() |
| 9 | .split(/\s+/) |
| 10 | .filter(word => word.length > 3) // Skip short words |
| 11 | .filter(word => !['what', 'when', 'where', 'which', 'how', 'why', 'does', 'with', 'from', 'about'].includes(word)) |
| 12 | |
| 13 | // Find paragraphs that contain keywords |
| 14 | const relevantParagraphs = paragraphs.slice(2, -2) // Skip intro and conclusion |
| 15 | .map((paragraph, index) => ({ |
| 16 | text: paragraph, |
| 17 | score: keywords.filter(keyword => |
| 18 | paragraph.toLowerCase().includes(keyword) |
| 19 | ).length, |
| 20 | index |
| 21 | })) |
| 22 | .filter(p => p.score > 0) |
| 23 | .sort((a, b) => b.score - a.score) |
| 24 | .slice(0, 3) // Take top 3 most relevant paragraphs |
| 25 | .sort((a, b) => a.index - b.index) // Restore original order |
| 26 | .map(p => p.text) |
| 27 | |
| 28 | // Always include the last paragraph if it exists (conclusion) |
| 29 | const conclusion = paragraphs.length > 2 ? paragraphs[paragraphs.length - 1] : '' |
| 30 | |
| 31 | // Combine all parts |
| 32 | let result = intro |
| 33 | if (relevantParagraphs.length > 0) { |
| 34 | result += '\n\n' + relevantParagraphs.join('\n\n') |
| 35 | } |
| 36 | if (conclusion) { |
| 37 | result += '\n\n' + conclusion |
| 38 | } |
| 39 | |
| 40 | // Ensure we don't exceed max length |
| 41 | if (result.length > maxLength) { |
| 42 | result = result.substring(0, maxLength - 3) + '...' |
| 43 | } |
| 44 | |
| 45 | return result |
| 46 | } |