MCPcopy
hub / github.com/Doorman11991/smallcode / main

Function main

scripts/rag_scraper.py:262–297  ·  view source on GitHub ↗
(argv: Optional[Sequence[str]] = None)

Source from the content-addressed store, hash-verified

260
261
262def main(argv: Optional[Sequence[str]] = None) -> int:
263 args = parse_args(argv)
264 cfg, specs = load_config(args.config, args.preset)
265 if cfg:
266 args.cache_dir = Path(str(cfg.get("cacheDir", args.cache_dir)))
267 args.max_files_per_repo = int(cfg.get("maxFilesPerRepo", args.max_files_per_repo))
268 args.max_snippets_per_repo = int(cfg.get("maxSnippetsPerRepo", args.max_snippets_per_repo))
269 args.max_file_bytes = int(cfg.get("maxFileBytes", args.max_file_bytes))
270 args.chunk_lines = int(cfg.get("chunkLines", args.chunk_lines))
271 args.overlap = int(cfg.get("overlap", args.overlap))
272 args.min_chars = int(cfg.get("minChars", args.min_chars))
273 if cfg.get("languages") and not args.languages:
274 args.languages = ",".join(cfg.get("languages")) if isinstance(cfg.get("languages"), list) else str(cfg.get("languages"))
275 for repo in args.repo:
276 specs.append(RepoSpec(url=repo, tags=[]))
277 if not specs:
278 print("No repositories configured. Use --preset starter/broad or add repos to repos.json.", file=sys.stderr)
279 return 2
280
281 args.out.parent.mkdir(parents=True, exist_ok=True)
282 total = 0
283 with args.out.open("w", encoding="utf-8") as fh:
284 for spec in specs:
285 print(f"scraping {spec.url}", file=sys.stderr)
286 try:
287 repo_root = ensure_repo(spec, args.cache_dir)
288 count = 0
289 for rec in scrape_repo(spec, repo_root, args):
290 fh.write(json.dumps(rec, ensure_ascii=False) + "\n")
291 count += 1
292 total += count
293 print(f" snippets: {count}", file=sys.stderr)
294 except (subprocess.CalledProcessError, OSError) as exc:
295 print(f" skipped: {exc}", file=sys.stderr)
296 print(json.dumps({"snippets": total, "out": str(args.out)}))
297 return 0
298
299
300if __name__ == "__main__":

Callers 1

rag_scraper.pyFile · 0.70

Calls 7

parse_argsFunction · 0.85
load_configFunction · 0.85
RepoSpecClass · 0.85
ensure_repoFunction · 0.85
scrape_repoFunction · 0.85
getMethod · 0.65
writeMethod · 0.65

Tested by

no test coverage detected