Function clean_source

scripts/clean.py:248–270 · view source on GitHub ↗

清洗单个书源

(source: dict, grade: bool = False)

Source from the content-addressed store, hash-verified

246
247
248	def clean_source(source: dict, grade: bool = False) -> dict:
249	"""清洗单个书源"""
250	# 清洗名称
251	if "bookSourceName" in source:
252	source["bookSourceName"] = normalize_source_name(source["bookSourceName"])
253
254	# 按评分分组（覆盖原有分组）
255	if grade:
256	score = calculate_quality_score(source)
257	source["bookSourceGroup"] = get_grade_group(score)
258	# 仅清洗分组
259	elif "bookSourceGroup" in source:
260	source["bookSourceGroup"] = normalize_group(source["bookSourceGroup"])
261
262	# 清洗备注（保留内容，只去表情）
263	if "bookSourceComment" in source and source["bookSourceComment"]:
264	# 备注可能包含使用说明，只去除开头的表情
265	comment = source["bookSourceComment"]
266	# 只清理开头的表情符号
267	comment = re.sub(r'^[\s]*' + EMOJI_PATTERN.pattern, '', comment)
268	source["bookSourceComment"] = comment.strip()
269
270	return source
271
272
273	def clean_sources(sources: list, grade: bool = False) -> list:

clean_sourcesFunction · 0.70

normalize_source_nameFunction · 0.85

get_grade_groupFunction · 0.85

normalize_groupFunction · 0.85

calculate_quality_scoreFunction · 0.70

no test coverage detected