智能选择器
| 16 | |
| 17 | |
| 18 | class SourceSelector: |
| 19 | """智能选择器""" |
| 20 | |
| 21 | # 默认配置 |
| 22 | DEFAULT_MAX_PER_DOMAIN = 2 |
| 23 | DEFAULT_TARGET_COUNT = 1000 |
| 24 | |
| 25 | def __init__( |
| 26 | self, |
| 27 | max_per_domain: int = DEFAULT_MAX_PER_DOMAIN, |
| 28 | target_count: int = DEFAULT_TARGET_COUNT |
| 29 | ): |
| 30 | """ |
| 31 | 初始化智能选择器 |
| 32 | |
| 33 | Args: |
| 34 | max_per_domain: 每个域名最多选择的书源数 |
| 35 | target_count: 目标书源数量 |
| 36 | """ |
| 37 | self.max_per_domain = max_per_domain |
| 38 | self.target_count = target_count |
| 39 | |
| 40 | def extract_domain(self, url: str) -> str: |
| 41 | """ |
| 42 | 提取域名 |
| 43 | |
| 44 | Args: |
| 45 | url: URL 字符串 |
| 46 | |
| 47 | Returns: |
| 48 | 域名 |
| 49 | """ |
| 50 | try: |
| 51 | parsed = urlparse(url) |
| 52 | domain = parsed.netloc or parsed.path.split('/')[0] |
| 53 | # 移除 www. 前缀 |
| 54 | if domain.startswith('www.'): |
| 55 | domain = domain[4:] |
| 56 | return domain |
| 57 | except Exception: |
| 58 | return url |
| 59 | |
| 60 | def calculate_score(self, source: Dict) -> float: |
| 61 | """ |
| 62 | 计算书源评分 |
| 63 | |
| 64 | Args: |
| 65 | source: 书源字典 |
| 66 | |
| 67 | Returns: |
| 68 | 评分(0-100) |
| 69 | """ |
| 70 | # 如果已有选择分,优先使用 |
| 71 | if 'selectionScore' in source: |
| 72 | return float(source['selectionScore']) |
| 73 | |
| 74 | # 其次使用已有评分 |
| 75 | if 'score' in source: |
no outgoing calls
no test coverage detected