MCPcopy
hub / github.com/vinta/awesome-python / main

Function main

website/fetch_github_stars.py:96–173  ·  view source on GitHub ↗

Fetch GitHub stars for all repos in README.md, updating the JSON cache.

()

Source from the content-addressed store, hash-verified

94
95
96def main() -> None:
97 """Fetch GitHub stars for all repos in README.md, updating the JSON cache."""
98 token = os.environ.get("GITHUB_TOKEN", "")
99 if not token:
100 print("Error: GITHUB_TOKEN environment variable is required.", file=sys.stderr)
101 sys.exit(1)
102
103 readme_text = README_PATH.read_text(encoding="utf-8")
104 current_repos = extract_github_repos(readme_text)
105 current_repos.add("vinta/awesome-python")
106 print(f"Found {len(current_repos)} GitHub repos in README.md")
107
108 cache = load_stars(CACHE_FILE)
109 now = datetime.now(UTC)
110
111 # Prune entries not in current README
112 pruned = {k: v for k, v in cache.items() if k in current_repos}
113 if len(pruned) < len(cache):
114 print(f"Pruned {len(cache) - len(pruned)} stale cache entries")
115 cache = pruned
116
117 # Determine which repos need fetching (missing or stale)
118 max_age = timedelta(hours=CACHE_MAX_AGE_HOURS)
119 to_fetch = []
120 for repo in sorted(current_repos):
121 entry = cache.get(repo)
122 if entry and "fetched_at" in entry:
123 fetched = datetime.fromisoformat(entry["fetched_at"])
124 if now - fetched < max_age:
125 continue
126 to_fetch.append(repo)
127
128 print(f"{len(to_fetch)} repos to fetch ({len(current_repos) - len(to_fetch)} cached)")
129
130 if not to_fetch:
131 save_cache(cache)
132 print("Cache is up to date.")
133 return
134
135 # Fetch in batches
136 fetched_count = 0
137 skipped_repos: list[str] = []
138
139 now_iso = now.isoformat()
140 total_batches = (len(to_fetch) + BATCH_SIZE - 1) // BATCH_SIZE
141
142 with httpx.Client(
143 headers={"Authorization": f"bearer {token}", "Content-Type": "application/json"},
144 transport=httpx.HTTPTransport(retries=2),
145 timeout=30,
146 ) as client:
147 for batch_num, batch in enumerate(batched(to_fetch, BATCH_SIZE), 1):
148 print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...")
149
150 try:
151 results = fetch_batch(batch, client)
152 except httpx.HTTPStatusError as e:
153 print(f"HTTP error {e.response.status_code}", file=sys.stderr)

Calls 4

load_starsFunction · 0.90
extract_github_reposFunction · 0.85
save_cacheFunction · 0.85
fetch_batchFunction · 0.85