MCPcopy
hub / github.com/tickmao/Novel / __init__

Method __init__

scripts/source_policy.py:63–101  ·  view source on GitHub ↗
(self, base_dir: Path | str | None = None)

Source from the content-addressed store, hash-verified

61 }
62
63 def __init__(self, base_dir: Path | str | None = None):
64 root = Path(base_dir).resolve() if base_dir else Path(__file__).resolve().parent.parent
65 self.project_root = root
66 if not (self.project_root / "config").exists():
67 for parent in [root] + list(root.parents):
68 if (parent / "config").exists():
69 self.project_root = parent
70 break
71
72 config_dir = self.project_root / "config"
73 self.name_config = _load_json(config_dir / "name_normalization.json", self.DEFAULT_NAME_CONFIG)
74 self.audit_config = _load_json(config_dir / "content_audit.json", self.DEFAULT_AUDIT_CONFIG)
75
76 self.require_pure_chinese = bool(self.name_config.get("require_pure_chinese", True))
77 self.min_length = int(self.name_config.get("min_length", 2))
78 self.max_length = int(self.name_config.get("max_length", 16))
79 self.generic_blacklist = set(self.name_config.get("generic_blacklist", []))
80 self.reject_name_patterns = [
81 re.compile(pattern) for pattern in self.name_config.get("reject_patterns", [])
82 ]
83 self.drop_ascii_suffixes = tuple(self.name_config.get("drop_ascii_suffixes", []))
84 self.token_replacements = self.name_config.get("token_replacements", {})
85 self.domain_to_canonical = {
86 self._normalize_domain(k): v for k, v in self.name_config.get("domain_to_canonical", {}).items()
87 }
88 self.alias_to_canonical = {}
89 for alias, canonical in self.name_config.get("alias_to_canonical", {}).items():
90 self.alias_to_canonical[normalize_source_name(alias)] = canonical
91 self.alias_to_canonical[alias.strip()] = canonical
92
93 self.text_patterns = [
94 (re.compile(item["pattern"]), item["reason"])
95 for item in self.audit_config.get("text_patterns", [])
96 ]
97 self.url_patterns = [
98 (re.compile(item["pattern"]), item["reason"])
99 for item in self.audit_config.get("url_patterns", [])
100 ]
101 self.allow_patterns = tuple(self.audit_config.get("allow_patterns", []))
102
103 def _normalize_domain(self, domain: str) -> str:
104 cleaned = domain.lower().strip()

Callers

nothing calls this directly

Calls 3

_normalize_domainMethod · 0.95
normalize_source_nameFunction · 0.90
_load_jsonFunction · 0.70

Tested by

no test coverage detected