产出最终展示名称。 返回 `(final_name, audit_status, reasons)`。
(self, name: str, url: str = "")
| 147 | return None |
| 148 | |
| 149 | def canonicalize_name(self, name: str, url: str = "") -> Tuple[str, str, List[str]]: |
| 150 | """ |
| 151 | 产出最终展示名称。 |
| 152 | |
| 153 | 返回 `(final_name, audit_status, reasons)`。 |
| 154 | """ |
| 155 | original = normalize_source_name(name) |
| 156 | if not original: |
| 157 | return "", "rejected", ["名称为空"] |
| 158 | |
| 159 | domain = self.extract_domain(url) |
| 160 | mapped = self._canonical_from_domain(domain) |
| 161 | |
| 162 | if not mapped: |
| 163 | mapped = self.alias_to_canonical.get(original) |
| 164 | |
| 165 | candidate = normalize_source_name(mapped or original) |
| 166 | candidate = self._apply_token_replacements(candidate) |
| 167 | candidate = candidate.translate(CHINESE_DIGITS) |
| 168 | candidate = self._strip_ascii_noise(candidate) |
| 169 | candidate = candidate.replace("·", "") |
| 170 | |
| 171 | reasons: List[str] = [] |
| 172 | |
| 173 | if candidate in self.generic_blacklist: |
| 174 | reasons.append("名称过于泛化") |
| 175 | |
| 176 | for pattern in self.reject_name_patterns: |
| 177 | if pattern.search(candidate): |
| 178 | reasons.append("名称命中低质量模式") |
| 179 | break |
| 180 | |
| 181 | if len(candidate) < self.min_length: |
| 182 | reasons.append("名称过短") |
| 183 | if len(candidate) > self.max_length: |
| 184 | reasons.append("名称过长") |
| 185 | |
| 186 | if self.require_pure_chinese and not CN_ONLY_RE.match(candidate): |
| 187 | reasons.append("名称不是纯中文") |
| 188 | |
| 189 | if HAS_ASCII_OR_DIGIT_RE.search(candidate): |
| 190 | reasons.append("名称仍含英文或数字") |
| 191 | |
| 192 | if not HAS_CN_RE.search(candidate): |
| 193 | reasons.append("名称不含中文主体") |
| 194 | |
| 195 | if reasons: |
| 196 | return candidate, "rejected", reasons |
| 197 | |
| 198 | return candidate, "pure_chinese", [] |
| 199 | |
| 200 | def detect_adult_risks(self, source: Dict) -> List[str]: |
| 201 | """ |