normalizeUnicodeInner is the inner transformation function used by unicodeNormalizer.
(s string)
| 52 | |
| 53 | // normalizeUnicodeInner is the inner transformation function used by unicodeNormalizer. |
| 54 | func normalizeUnicodeInner(s string) string { |
| 55 | // Handle special characters that NFKD doesn't decompose to ASCII equivalents |
| 56 | // (these are distinct letters in Nordic/Germanic languages, not composed characters) |
| 57 | s = strings.ReplaceAll(s, "æ", "ae") |
| 58 | s = strings.ReplaceAll(s, "Æ", "AE") |
| 59 | s = strings.ReplaceAll(s, "œ", "oe") |
| 60 | s = strings.ReplaceAll(s, "Œ", "OE") |
| 61 | s = strings.ReplaceAll(s, "ø", "o") |
| 62 | s = strings.ReplaceAll(s, "Ø", "O") |
| 63 | s = strings.ReplaceAll(s, "ß", "ss") |
| 64 | s = strings.ReplaceAll(s, "ð", "d") |
| 65 | s = strings.ReplaceAll(s, "Ð", "D") |
| 66 | s = strings.ReplaceAll(s, "þ", "th") |
| 67 | s = strings.ReplaceAll(s, "Þ", "TH") |
| 68 | |
| 69 | // Create transformer fresh per-call (transform.Chain is not thread-safe for concurrent use). |
| 70 | // Caching via unicodeNormalizer prevents repeated transformations for identical inputs. |
| 71 | t := transform.Chain(norm.NFKD, runes.Remove(runes.In(unicode.Mn))) |
| 72 | result, _, err := transform.String(t, s) |
| 73 | if err != nil { |
| 74 | return s |
| 75 | } |
| 76 | return result |
| 77 | } |
| 78 | |
| 79 | // normalized is the inner transformation function used by matchingNormalizer. |
| 80 | func normalized(s string) string { |