(text: string)
| 7 | * Removes null bytes and control characters that can cause encoding errors |
| 8 | */ |
| 9 | export function sanitizeTextForUTF8(text: string): string { |
| 10 | if (!text || typeof text !== 'string') { |
| 11 | return '' |
| 12 | } |
| 13 | |
| 14 | return text |
| 15 | .replace(/\0/g, '') // Remove null bytes (0x00) |
| 16 | .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') // Remove control characters except \t(0x09), \n(0x0A), \r(0x0D) |
| 17 | .replace(/\uFFFD/g, '') // Remove Unicode replacement character |
| 18 | .replace(/[\uD800-\uDFFF]/g, '') // Remove unpaired surrogate characters |
| 19 | } |
| 20 | |
| 21 | /** |
| 22 | * Sanitize an array of strings |
no test coverage detected