Class SourceSelector

scripts/source_selector.py:18–262 · view source on GitHub ↗

智能选择器

Source from the content-addressed store, hash-verified

16
17
18	class SourceSelector:
19	"""智能选择器"""
20
21	# 默认配置
22	DEFAULT_MAX_PER_DOMAIN = 2
23	DEFAULT_TARGET_COUNT = 1000
24
25	def __init__(
26	self,
27	max_per_domain: int = DEFAULT_MAX_PER_DOMAIN,
28	target_count: int = DEFAULT_TARGET_COUNT
29	):
30	"""
31	初始化智能选择器
32
33	Args:
34	max_per_domain: 每个域名最多选择的书源数
35	target_count: 目标书源数量
36	"""
37	self.max_per_domain = max_per_domain
38	self.target_count = target_count
39
40	def extract_domain(self, url: str) -> str:
41	"""
42	提取域名
43
44	Args:
45	url: URL 字符串
46
47	Returns:
48	域名
49	"""
50	try:
51	parsed = urlparse(url)
52	domain = parsed.netloc or parsed.path.split('/')[0]
53	# 移除 www. 前缀
54	if domain.startswith('www.'):
55	domain = domain[4:]
56	return domain
57	except Exception:
58	return url
59
60	def calculate_score(self, source: Dict) -> float:
61	"""
62	计算书源评分
63
64	Args:
65	source: 书源字典
66
67	Returns:
68	评分（0-100）
69	"""
70	# 如果已有选择分，优先使用
71	if 'selectionScore' in source:
72	return float(source['selectionScore'])
73
74	# 其次使用已有评分
75	if 'score' in source:

build_inventoryMethod · 0.90

run_selectionMethod · 0.90

mainFunction · 0.85

no outgoing calls

no test coverage detected