MCPcopy
hub / github.com/tickmao/Novel / screen_source

Method screen_source

scripts/source_policy.py:273–310  ·  view source on GitHub ↗

返回 `(accepted_source, rejected_record)`。

(self, source: Dict)

Source from the content-addressed store, hash-verified

271 return enriched
272
273 def screen_source(self, source: Dict) -> Tuple[Optional[Dict], Optional[Dict]]:
274 """
275 返回 `(accepted_source, rejected_record)`。
276 """
277 record = self.enrich_source(source)
278 reject_reasons: List[str] = []
279
280 url = str(record.get("bookSourceUrl", "")).strip()
281 if not url.startswith(("http://", "https://")):
282 reject_reasons.append("URL 无效")
283
284 if int(record.get("bookSourceType", 0)) != 0:
285 reject_reasons.append("不是小说源")
286
287 if self._rule_completeness(record) < 2:
288 reject_reasons.append("规则不完整")
289
290 media_text = " ".join([
291 str(record.get("bookSourceName", "")),
292 str(record.get("bookSourceGroup", "")),
293 ])
294 if any(keyword in media_text for keyword in MEDIA_KEYWORDS):
295 reject_reasons.append("非纯小说内容")
296
297 reject_reasons.extend(record.get("_name_audit_reasons", []))
298 reject_reasons.extend(record.get("_adult_hit_reasons", []))
299
300 if reject_reasons:
301 rejected = {
302 "originalName": record.get("originalName", ""),
303 "normalizedName": record.get("normalizedName", ""),
304 "bookSourceUrl": record.get("bookSourceUrl", ""),
305 "reasons": list(dict.fromkeys(reject_reasons)),
306 "domain": record.get("_domain", ""),
307 }
308 return None, rejected
309
310 return record, None
311
312 def screen_sources(self, sources: Iterable[Dict]) -> Tuple[List[Dict], List[Dict], Dict]:
313 source_list = list(sources)

Calls 2

enrich_sourceMethod · 0.95
_rule_completenessMethod · 0.95