MCPcopy
hub / github.com/tickmao/Novel / clean_source

Function clean_source

scripts/clean.py:248–270  ·  view source on GitHub ↗

清洗单个书源

(source: dict, grade: bool = False)

Source from the content-addressed store, hash-verified

246
247
248def clean_source(source: dict, grade: bool = False) -> dict:
249 """清洗单个书源"""
250 # 清洗名称
251 if "bookSourceName" in source:
252 source["bookSourceName"] = normalize_source_name(source["bookSourceName"])
253
254 # 按评分分组(覆盖原有分组)
255 if grade:
256 score = calculate_quality_score(source)
257 source["bookSourceGroup"] = get_grade_group(score)
258 # 仅清洗分组
259 elif "bookSourceGroup" in source:
260 source["bookSourceGroup"] = normalize_group(source["bookSourceGroup"])
261
262 # 清洗备注(保留内容,只去表情)
263 if "bookSourceComment" in source and source["bookSourceComment"]:
264 # 备注可能包含使用说明,只去除开头的表情
265 comment = source["bookSourceComment"]
266 # 只清理开头的表情符号
267 comment = re.sub(r'^[\s]*' + EMOJI_PATTERN.pattern, '', comment)
268 source["bookSourceComment"] = comment.strip()
269
270 return source
271
272
273def clean_sources(sources: list, grade: bool = False) -> list:

Callers 1

clean_sourcesFunction · 0.70

Calls 4

normalize_source_nameFunction · 0.85
get_grade_groupFunction · 0.85
normalize_groupFunction · 0.85
calculate_quality_scoreFunction · 0.70

Tested by

no test coverage detected