MCPcopy
hub / github.com/tickmao/Novel / _strip_ascii_noise

Method _strip_ascii_noise

scripts/source_policy.py:123–135  ·  view source on GitHub ↗
(self, text: str)

Source from the content-addressed store, hash-verified

121 return value
122
123 def _strip_ascii_noise(self, text: str) -> str:
124 value = text
125
126 for suffix in self.drop_ascii_suffixes:
127 value = re.sub(rf"{re.escape(suffix)}$", "", value, flags=re.IGNORECASE)
128
129 value = re.sub(r"(?<=[\u4e00-\u9fff])[A-Za-z]+$", "", value)
130 value = re.sub(r"^[A-Za-z0-9]+(?=[\u4e00-\u9fff])", "", value)
131 value = re.sub(r"(?<=[\u4e00-\u9fff])[A-Za-z]+(?=[\u4e00-\u9fff])", "", value)
132 value = re.sub(r"[A-Za-z]+(?=[零一二三四五六七八九十百千万两〇]\u4e00-\u9fff)", "", value)
133 value = re.sub(r"[A-Za-z]+", "", value) if HAS_CN_RE.search(value) else value
134 value = re.sub(r"\s+", "", value)
135 return normalize_source_name(value)
136
137 def _canonical_from_domain(self, domain: str) -> Optional[str]:
138 if not domain:

Callers 1

canonicalize_nameMethod · 0.95

Calls 1

normalize_source_nameFunction · 0.90

Tested by

no test coverage detected