* Basic HTML to text extraction.
(html: string | null)
| 137 | * Basic HTML to text extraction. |
| 138 | */ |
| 139 | function extractTextFromHtml(html: string | null): string | null { |
| 140 | if (!html) return null |
| 141 | |
| 142 | let text = html |
| 143 | |
| 144 | text = decodeHtmlEntities(text) |
| 145 | |
| 146 | text = replaceUntilStable(text, /<style[^>]*>[\s\S]*?<\/style\s*>/gi, '') |
| 147 | text = replaceUntilStable(text, /<script[^>]*>[\s\S]*?<\/script\s*>/gi, '') |
| 148 | |
| 149 | text = text |
| 150 | .replace(/<br\s*\/?>/gi, '\n') |
| 151 | .replace(/<\/p>/gi, '\n\n') |
| 152 | .replace(/<\/div>/gi, '\n') |
| 153 | .replace(/<\/li>/gi, '\n') |
| 154 | |
| 155 | text = replaceUntilStable(text, /<[^>]+>/g, '') |
| 156 | |
| 157 | text = text.replace(/\n{3,}/g, '\n\n').trim() |
| 158 | |
| 159 | return text |
| 160 | } |
no test coverage detected