MCPcopy Index your code
hub / github.com/codeaashu/claude-code / partiallySanitizeUnicode

Function partiallySanitizeUnicode

src/utils/sanitization.ts:25–65  ·  view source on GitHub ↗
(prompt: string)

Source from the content-addressed store, hash-verified

23 */
24
25export function partiallySanitizeUnicode(prompt: string): string {
26 let current = prompt
27 let previous = ''
28 let iterations = 0
29 const MAX_ITERATIONS = 10 // Safety limit to prevent infinite loops
30
31 // Iteratively sanitize until no more changes occur or max iterations reached
32 while (current !== previous && iterations < MAX_ITERATIONS) {
33 previous = current
34
35 // Apply NFKC normalization to handle composed character sequences
36 current = current.normalize('NFKC')
37
38 // Remove dangerous Unicode categories using explicit character ranges
39
40 // Method 1: Strip dangerous Unicode property classes
41 // This is the primary defence and is the solution that is widely used in OSS libraries.
42 current = current.replace(/[\p{Cf}\p{Co}\p{Cn}]/gu, '')
43
44 // Method 2: Explicit character ranges. There are some subtle issues with the above method
45 // failing in certain environments that don't support regexes for unicode property classes,
46 // so we also implement a fallback that strips out some specifically known dangerous ranges.
47 current = current
48 .replace(/[\u200B-\u200F]/g, '') // Zero-width spaces, LTR/RTL marks
49 .replace(/[\u202A-\u202E]/g, '') // Directional formatting characters
50 .replace(/[\u2066-\u2069]/g, '') // Directional isolates
51 .replace(/[\uFEFF]/g, '') // Byte order mark
52 .replace(/[\uE000-\uF8FF]/g, '') // Basic Multilingual Plane private use
53
54 iterations++
55 }
56
57 // If we hit max iterations, crash loudly. This should only ever happen if there is a bug or if someone purposefully created a deeply nested unicode string.
58 if (iterations >= MAX_ITERATIONS) {
59 throw new Error(
60 `Unicode sanitization reached maximum iterations (${MAX_ITERATIONS}) for input: ${prompt.slice(0, 100)}`,
61 )
62 }
63
64 return current
65}
66
67export function recursivelySanitizeUnicode(value: string): string
68export function recursivelySanitizeUnicode<T>(value: T[]): T[]

Callers 2

parseDeepLinkFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected