返回 `(accepted_source, rejected_record)`。
(self, source: Dict)
| 271 | return enriched |
| 272 | |
| 273 | def screen_source(self, source: Dict) -> Tuple[Optional[Dict], Optional[Dict]]: |
| 274 | """ |
| 275 | 返回 `(accepted_source, rejected_record)`。 |
| 276 | """ |
| 277 | record = self.enrich_source(source) |
| 278 | reject_reasons: List[str] = [] |
| 279 | |
| 280 | url = str(record.get("bookSourceUrl", "")).strip() |
| 281 | if not url.startswith(("http://", "https://")): |
| 282 | reject_reasons.append("URL 无效") |
| 283 | |
| 284 | if int(record.get("bookSourceType", 0)) != 0: |
| 285 | reject_reasons.append("不是小说源") |
| 286 | |
| 287 | if self._rule_completeness(record) < 2: |
| 288 | reject_reasons.append("规则不完整") |
| 289 | |
| 290 | media_text = " ".join([ |
| 291 | str(record.get("bookSourceName", "")), |
| 292 | str(record.get("bookSourceGroup", "")), |
| 293 | ]) |
| 294 | if any(keyword in media_text for keyword in MEDIA_KEYWORDS): |
| 295 | reject_reasons.append("非纯小说内容") |
| 296 | |
| 297 | reject_reasons.extend(record.get("_name_audit_reasons", [])) |
| 298 | reject_reasons.extend(record.get("_adult_hit_reasons", [])) |
| 299 | |
| 300 | if reject_reasons: |
| 301 | rejected = { |
| 302 | "originalName": record.get("originalName", ""), |
| 303 | "normalizedName": record.get("normalizedName", ""), |
| 304 | "bookSourceUrl": record.get("bookSourceUrl", ""), |
| 305 | "reasons": list(dict.fromkeys(reject_reasons)), |
| 306 | "domain": record.get("_domain", ""), |
| 307 | } |
| 308 | return None, rejected |
| 309 | |
| 310 | return record, None |
| 311 | |
| 312 | def screen_sources(self, sources: Iterable[Dict]) -> Tuple[List[Dict], List[Dict], Dict]: |
| 313 | source_list = list(sources) |