(input: string)
| 107 | * the map exact for the transformations we apply (which are all local). |
| 108 | */ |
| 109 | export function normalizeWithMap(input: string): { |
| 110 | normalized: string; |
| 111 | map: number[]; |
| 112 | } { |
| 113 | const out: string[] = []; |
| 114 | const map: number[] = []; |
| 115 | let i = 0; |
| 116 | while (i < input.length) { |
| 117 | // HTML entity expansion (fixed small set; longest first). |
| 118 | let matchedEntity = false; |
| 119 | for (const ent in HTML_ENTITIES) { |
| 120 | if (input.startsWith(ent, i)) { |
| 121 | const rep = HTML_ENTITIES[ent]; |
| 122 | for (const ch of rep) { |
| 123 | out.push(ch); |
| 124 | map.push(i); |
| 125 | } |
| 126 | i += ent.length; |
| 127 | matchedEntity = true; |
| 128 | break; |
| 129 | } |
| 130 | } |
| 131 | if (matchedEntity) continue; |
| 132 | |
| 133 | const ch = input[i]; |
| 134 | if (ZERO_WIDTH.test(ch)) { |
| 135 | ZERO_WIDTH.lastIndex = 0; |
| 136 | i += 1; |
| 137 | continue; |
| 138 | } |
| 139 | ZERO_WIDTH.lastIndex = 0; |
| 140 | |
| 141 | const norm = ch.normalize("NFKC"); |
| 142 | for (const nch of norm) { |
| 143 | out.push(nch); |
| 144 | map.push(i); |
| 145 | } |
| 146 | i += 1; |
| 147 | } |
| 148 | // Sentinel so an offset == length maps to the original length. |
| 149 | map.push(input.length); |
| 150 | return { normalized: out.join(""), map }; |
| 151 | } |
| 152 | |
| 153 | // ── Offset → line/col on the ORIGINAL text ──────────────────────────────────── |
| 154 |
no test coverage detected