(prompt: string)
| 23 | */ |
| 24 | |
| 25 | export function partiallySanitizeUnicode(prompt: string): string { |
| 26 | let current = prompt |
| 27 | let previous = '' |
| 28 | let iterations = 0 |
| 29 | const MAX_ITERATIONS = 10 // Safety limit to prevent infinite loops |
| 30 | |
| 31 | // Iteratively sanitize until no more changes occur or max iterations reached |
| 32 | while (current !== previous && iterations < MAX_ITERATIONS) { |
| 33 | previous = current |
| 34 | |
| 35 | // Apply NFKC normalization to handle composed character sequences |
| 36 | current = current.normalize('NFKC') |
| 37 | |
| 38 | // Remove dangerous Unicode categories using explicit character ranges |
| 39 | |
| 40 | // Method 1: Strip dangerous Unicode property classes |
| 41 | // This is the primary defence and is the solution that is widely used in OSS libraries. |
| 42 | current = current.replace(/[\p{Cf}\p{Co}\p{Cn}]/gu, '') |
| 43 | |
| 44 | // Method 2: Explicit character ranges. There are some subtle issues with the above method |
| 45 | // failing in certain environments that don't support regexes for unicode property classes, |
| 46 | // so we also implement a fallback that strips out some specifically known dangerous ranges. |
| 47 | current = current |
| 48 | .replace(/[\u200B-\u200F]/g, '') // Zero-width spaces, LTR/RTL marks |
| 49 | .replace(/[\u202A-\u202E]/g, '') // Directional formatting characters |
| 50 | .replace(/[\u2066-\u2069]/g, '') // Directional isolates |
| 51 | .replace(/[\uFEFF]/g, '') // Byte order mark |
| 52 | .replace(/[\uE000-\uF8FF]/g, '') // Basic Multilingual Plane private use |
| 53 | |
| 54 | iterations++ |
| 55 | } |
| 56 | |
| 57 | // If we hit max iterations, crash loudly. This should only ever happen if there is a bug or if someone purposefully created a deeply nested unicode string. |
| 58 | if (iterations >= MAX_ITERATIONS) { |
| 59 | throw new Error( |
| 60 | `Unicode sanitization reached maximum iterations (${MAX_ITERATIONS}) for input: ${prompt.slice(0, 100)}`, |
| 61 | ) |
| 62 | } |
| 63 | |
| 64 | return current |
| 65 | } |
| 66 | |
| 67 | export function recursivelySanitizeUnicode(value: string): string |
| 68 | export function recursivelySanitizeUnicode<T>(value: T[]): T[] |
no outgoing calls
no test coverage detected