(self, source: Dict)
| 242 | ) |
| 243 | |
| 244 | def enrich_source(self, source: Dict) -> Dict: |
| 245 | enriched = deepcopy(source) |
| 246 | original_name = str(source.get("originalName") or source.get("bookSourceName", "")).strip() |
| 247 | final_name, audit_status, name_reasons = self.canonicalize_name( |
| 248 | original_name, |
| 249 | str(source.get("bookSourceUrl", "")), |
| 250 | ) |
| 251 | |
| 252 | adult_risks = self.detect_adult_risks(source) |
| 253 | domain = self.extract_domain(str(source.get("bookSourceUrl", ""))) |
| 254 | |
| 255 | enriched["originalName"] = original_name |
| 256 | enriched["normalizedName"] = final_name |
| 257 | if final_name: |
| 258 | enriched["bookSourceName"] = final_name |
| 259 | if "bookSourceGroup" in enriched: |
| 260 | enriched["bookSourceGroup"] = normalize_group(str(enriched.get("bookSourceGroup", ""))) |
| 261 | enriched["_domain"] = domain |
| 262 | enriched["_name_audit_status"] = audit_status |
| 263 | enriched["_name_audit_reasons"] = name_reasons |
| 264 | enriched["_adult_hit_reasons"] = adult_risks |
| 265 | enriched["_name_quality_score"] = 10 if audit_status == "pure_chinese" else 0 |
| 266 | |
| 267 | base_score = float(source.get("selectionScore") or source.get("score") or calculate_quality_score(source)) |
| 268 | validation_bonus = 3 if source.get("_validation_status") == "valid" else 0 |
| 269 | enriched["selectionScore"] = round(base_score + enriched["_name_quality_score"] + validation_bonus, 2) |
| 270 | |
| 271 | return enriched |
| 272 | |
| 273 | def screen_source(self, source: Dict) -> Tuple[Optional[Dict], Optional[Dict]]: |
| 274 | """ |
no test coverage detected