(self, text: str)
| 121 | return value |
| 122 | |
| 123 | def _strip_ascii_noise(self, text: str) -> str: |
| 124 | value = text |
| 125 | |
| 126 | for suffix in self.drop_ascii_suffixes: |
| 127 | value = re.sub(rf"{re.escape(suffix)}$", "", value, flags=re.IGNORECASE) |
| 128 | |
| 129 | value = re.sub(r"(?<=[\u4e00-\u9fff])[A-Za-z]+$", "", value) |
| 130 | value = re.sub(r"^[A-Za-z0-9]+(?=[\u4e00-\u9fff])", "", value) |
| 131 | value = re.sub(r"(?<=[\u4e00-\u9fff])[A-Za-z]+(?=[\u4e00-\u9fff])", "", value) |
| 132 | value = re.sub(r"[A-Za-z]+(?=[零一二三四五六七八九十百千万两〇]\u4e00-\u9fff)", "", value) |
| 133 | value = re.sub(r"[A-Za-z]+", "", value) if HAS_CN_RE.search(value) else value |
| 134 | value = re.sub(r"\s+", "", value) |
| 135 | return normalize_source_name(value) |
| 136 | |
| 137 | def _canonical_from_domain(self, domain: str) -> Optional[str]: |
| 138 | if not domain: |
no test coverage detected