清洗单个书源
(source: dict, grade: bool = False)
| 246 | |
| 247 | |
| 248 | def clean_source(source: dict, grade: bool = False) -> dict: |
| 249 | """清洗单个书源""" |
| 250 | # 清洗名称 |
| 251 | if "bookSourceName" in source: |
| 252 | source["bookSourceName"] = normalize_source_name(source["bookSourceName"]) |
| 253 | |
| 254 | # 按评分分组(覆盖原有分组) |
| 255 | if grade: |
| 256 | score = calculate_quality_score(source) |
| 257 | source["bookSourceGroup"] = get_grade_group(score) |
| 258 | # 仅清洗分组 |
| 259 | elif "bookSourceGroup" in source: |
| 260 | source["bookSourceGroup"] = normalize_group(source["bookSourceGroup"]) |
| 261 | |
| 262 | # 清洗备注(保留内容,只去表情) |
| 263 | if "bookSourceComment" in source and source["bookSourceComment"]: |
| 264 | # 备注可能包含使用说明,只去除开头的表情 |
| 265 | comment = source["bookSourceComment"] |
| 266 | # 只清理开头的表情符号 |
| 267 | comment = re.sub(r'^[\s]*' + EMOJI_PATTERN.pattern, '', comment) |
| 268 | source["bookSourceComment"] = comment.strip() |
| 269 | |
| 270 | return source |
| 271 | |
| 272 | |
| 273 | def clean_sources(sources: list, grade: bool = False) -> list: |
no test coverage detected