MCPcopy
hub / github.com/tickmao/Novel / canonicalize_name

Method canonicalize_name

scripts/source_policy.py:149–198  ·  view source on GitHub ↗

产出最终展示名称。 返回 `(final_name, audit_status, reasons)`。

(self, name: str, url: str = "")

Source from the content-addressed store, hash-verified

147 return None
148
149 def canonicalize_name(self, name: str, url: str = "") -> Tuple[str, str, List[str]]:
150 """
151 产出最终展示名称。
152
153 返回 `(final_name, audit_status, reasons)`。
154 """
155 original = normalize_source_name(name)
156 if not original:
157 return "", "rejected", ["名称为空"]
158
159 domain = self.extract_domain(url)
160 mapped = self._canonical_from_domain(domain)
161
162 if not mapped:
163 mapped = self.alias_to_canonical.get(original)
164
165 candidate = normalize_source_name(mapped or original)
166 candidate = self._apply_token_replacements(candidate)
167 candidate = candidate.translate(CHINESE_DIGITS)
168 candidate = self._strip_ascii_noise(candidate)
169 candidate = candidate.replace("·", "")
170
171 reasons: List[str] = []
172
173 if candidate in self.generic_blacklist:
174 reasons.append("名称过于泛化")
175
176 for pattern in self.reject_name_patterns:
177 if pattern.search(candidate):
178 reasons.append("名称命中低质量模式")
179 break
180
181 if len(candidate) < self.min_length:
182 reasons.append("名称过短")
183 if len(candidate) > self.max_length:
184 reasons.append("名称过长")
185
186 if self.require_pure_chinese and not CN_ONLY_RE.match(candidate):
187 reasons.append("名称不是纯中文")
188
189 if HAS_ASCII_OR_DIGIT_RE.search(candidate):
190 reasons.append("名称仍含英文或数字")
191
192 if not HAS_CN_RE.search(candidate):
193 reasons.append("名称不含中文主体")
194
195 if reasons:
196 return candidate, "rejected", reasons
197
198 return candidate, "pure_chinese", []
199
200 def detect_adult_risks(self, source: Dict) -> List[str]:
201 """

Calls 5

extract_domainMethod · 0.95
_strip_ascii_noiseMethod · 0.95
normalize_source_nameFunction · 0.90