UTF16ToUTF8 converts a UTF-16 code unit offset to a UTF-8 byte offset.
(utf16Offset int)
| 89 | |
| 90 | // UTF16ToUTF8 converts a UTF-16 code unit offset to a UTF-8 byte offset. |
| 91 | func (pm *PositionMap) UTF16ToUTF8(utf16Offset int) int { |
| 92 | if pm.asciiOnly { |
| 93 | return utf16Offset |
| 94 | } |
| 95 | // We need the last entry where (utf8Pos - delta) <= utf16Offset. |
| 96 | // (utf8Pos - delta) is the UTF-16 offset of that entry's character. |
| 97 | lo, hi := 0, len(pm.entries) |
| 98 | for lo < hi { |
| 99 | mid := lo + (hi-lo)/2 |
| 100 | utf16Pos := pm.entries[mid].utf8Pos - pm.entries[mid].delta |
| 101 | if utf16Pos <= utf16Offset { |
| 102 | lo = mid + 1 |
| 103 | } else { |
| 104 | hi = mid |
| 105 | } |
| 106 | } |
| 107 | if lo == 0 { |
| 108 | return utf16Offset |
| 109 | } |
| 110 | return utf16Offset + pm.entries[lo-1].delta |
| 111 | } |