UTF8ToUTF16 converts a UTF-8 byte offset to a UTF-16 code unit offset.
(utf8Offset int)
| 67 | |
| 68 | // UTF8ToUTF16 converts a UTF-8 byte offset to a UTF-16 code unit offset. |
| 69 | func (pm *PositionMap) UTF8ToUTF16(utf8Offset int) int { |
| 70 | if pm.asciiOnly { |
| 71 | return utf8Offset |
| 72 | } |
| 73 | // Binary search: find the last entry where utf8Pos <= utf8Offset |
| 74 | lo, hi := 0, len(pm.entries) |
| 75 | for lo < hi { |
| 76 | mid := lo + (hi-lo)/2 |
| 77 | if pm.entries[mid].utf8Pos <= utf8Offset { |
| 78 | lo = mid + 1 |
| 79 | } else { |
| 80 | hi = mid |
| 81 | } |
| 82 | } |
| 83 | if lo == 0 { |
| 84 | // Before any multi-byte character |
| 85 | return utf8Offset |
| 86 | } |
| 87 | return utf8Offset - pm.entries[lo-1].delta |
| 88 | } |
| 89 | |
| 90 | // UTF16ToUTF8 converts a UTF-16 code unit offset to a UTF-8 byte offset. |
| 91 | func (pm *PositionMap) UTF16ToUTF8(utf16Offset int) int { |