Fetch GitHub stars for all repos in README.md, updating the JSON cache.
()
| 94 | |
| 95 | |
| 96 | def main() -> None: |
| 97 | """Fetch GitHub stars for all repos in README.md, updating the JSON cache.""" |
| 98 | token = os.environ.get("GITHUB_TOKEN", "") |
| 99 | if not token: |
| 100 | print("Error: GITHUB_TOKEN environment variable is required.", file=sys.stderr) |
| 101 | sys.exit(1) |
| 102 | |
| 103 | readme_text = README_PATH.read_text(encoding="utf-8") |
| 104 | current_repos = extract_github_repos(readme_text) |
| 105 | current_repos.add("vinta/awesome-python") |
| 106 | print(f"Found {len(current_repos)} GitHub repos in README.md") |
| 107 | |
| 108 | cache = load_stars(CACHE_FILE) |
| 109 | now = datetime.now(UTC) |
| 110 | |
| 111 | # Prune entries not in current README |
| 112 | pruned = {k: v for k, v in cache.items() if k in current_repos} |
| 113 | if len(pruned) < len(cache): |
| 114 | print(f"Pruned {len(cache) - len(pruned)} stale cache entries") |
| 115 | cache = pruned |
| 116 | |
| 117 | # Determine which repos need fetching (missing or stale) |
| 118 | max_age = timedelta(hours=CACHE_MAX_AGE_HOURS) |
| 119 | to_fetch = [] |
| 120 | for repo in sorted(current_repos): |
| 121 | entry = cache.get(repo) |
| 122 | if entry and "fetched_at" in entry: |
| 123 | fetched = datetime.fromisoformat(entry["fetched_at"]) |
| 124 | if now - fetched < max_age: |
| 125 | continue |
| 126 | to_fetch.append(repo) |
| 127 | |
| 128 | print(f"{len(to_fetch)} repos to fetch ({len(current_repos) - len(to_fetch)} cached)") |
| 129 | |
| 130 | if not to_fetch: |
| 131 | save_cache(cache) |
| 132 | print("Cache is up to date.") |
| 133 | return |
| 134 | |
| 135 | # Fetch in batches |
| 136 | fetched_count = 0 |
| 137 | skipped_repos: list[str] = [] |
| 138 | |
| 139 | now_iso = now.isoformat() |
| 140 | total_batches = (len(to_fetch) + BATCH_SIZE - 1) // BATCH_SIZE |
| 141 | |
| 142 | with httpx.Client( |
| 143 | headers={"Authorization": f"bearer {token}", "Content-Type": "application/json"}, |
| 144 | transport=httpx.HTTPTransport(retries=2), |
| 145 | timeout=30, |
| 146 | ) as client: |
| 147 | for batch_num, batch in enumerate(batched(to_fetch, BATCH_SIZE), 1): |
| 148 | print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...") |
| 149 | |
| 150 | try: |
| 151 | results = fetch_batch(batch, client) |
| 152 | except httpx.HTTPStatusError as e: |
| 153 | print(f"HTTP error {e.response.status_code}", file=sys.stderr) |