ScriptItemizer divides the string in parts for each different script. Also separates on different embedding levels and unicode.ReplacementChar (replaced by object).
(runes []rune, embeddingLevels []int)
| 19 | |
| 20 | // ScriptItemizer divides the string in parts for each different script. Also separates on different embedding levels and unicode.ReplacementChar (replaced by object). |
| 21 | func ScriptItemizer(runes []rune, embeddingLevels []int) []ScriptItem { |
| 22 | if len(runes) == 0 { |
| 23 | return []ScriptItem{} |
| 24 | } |
| 25 | |
| 26 | i := 0 |
| 27 | items := []ScriptItem{} |
| 28 | scripts := []Script{ScriptUnknown} // script stack for embedding levels |
| 29 | for j, r := range runes { |
| 30 | script, level := LookupScript(r), embeddingLevels[j] |
| 31 | if script == ScriptInherited { |
| 32 | if r == '\u200C' || r == '\u200D' { |
| 33 | script = ScriptCommon |
| 34 | } else if level < len(scripts) { |
| 35 | script = scripts[level] // take level from preceding base character |
| 36 | } else { |
| 37 | script = ScriptUnknown |
| 38 | } |
| 39 | } |
| 40 | prevScript := scripts[len(scripts)-1] |
| 41 | prevLevel := len(scripts) - 1 |
| 42 | if len(scripts)-1 < level { |
| 43 | // increase level |
| 44 | for len(scripts) < level { |
| 45 | scripts = append(scripts, ScriptUnknown) |
| 46 | } |
| 47 | scripts = append(scripts, script) |
| 48 | } else if level < len(scripts)-1 { |
| 49 | // decrease level |
| 50 | scripts[level] = script |
| 51 | scripts = scripts[:level+1] |
| 52 | } else if script == ScriptUnknown || script == ScriptCommon { |
| 53 | script = prevScript |
| 54 | } else { |
| 55 | scripts[level] = script |
| 56 | if prevScript == ScriptUnknown || prevScript == ScriptCommon { |
| 57 | prevScript = script |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | scriptBoundary := script != prevScript |
| 62 | levelBoundary := level != prevLevel |
| 63 | //objectReplacementBoundary := 0 < j && (r == unicode.ReplacementChar) != (runes[j-1] == unicode.ReplacementChar) |
| 64 | objectReplacementBoundary := r == unicode.ReplacementChar || 0 < j && runes[j-1] == unicode.ReplacementChar |
| 65 | if 0 < j && (levelBoundary || scriptBoundary || objectReplacementBoundary) { |
| 66 | items = append(items, ScriptItem{ |
| 67 | Script: prevScript, |
| 68 | Level: prevLevel, |
| 69 | Text: string(runes[i:j]), |
| 70 | }) |
| 71 | i = j |
| 72 | } |
| 73 | } |
| 74 | items = append(items, ScriptItem{ |
| 75 | Script: scripts[len(scripts)-1], |
| 76 | Level: len(scripts) - 1, |
| 77 | Text: string(runes[i:]), |
| 78 | }) |