MCPcopy
hub / github.com/hesreallyhim/awesome-claude-code / parse_github_url

Function parse_github_url

scripts/utils/github_utils.py:70–129  ·  view source on GitHub ↗

Parse GitHub URL and return API endpoint if it's a GitHub repository content URL. Returns (api_url, is_github, owner, repo) tuple.

(url: str)

Source from the content-addressed store, hash-verified

68
69
70def parse_github_url(url: str) -> tuple[str, bool, str | None, str | None]:
71 """
72 Parse GitHub URL and return API endpoint if it's a GitHub repository content URL.
73 Returns (api_url, is_github, owner, repo) tuple.
74 """
75 # Match GitHub blob or tree URLs - capture everything after /blob/ or /tree/ as one group
76 github_pattern = r"https://github\.com/([^/]+)/([^/]+)/(blob|tree)/(.+)"
77 match = re.match(github_pattern, url)
78
79 if match:
80 owner, repo, _, branch_and_path = match.groups() # _ is blob_or_tree, which we don't need
81 repo = _normalize_repo_name(repo)
82
83 # Split on the first occurrence of a path starting with . or containing a file extension
84 # Common patterns: .github/, .claude/, src/, file.ext
85 parts = branch_and_path.split("/")
86
87 # Find where the file path likely starts
88 branch_parts = []
89 path_parts: list[str] = []
90 found_path_start = False
91
92 for i, part in enumerate(parts):
93 if not found_path_start:
94 # Check if this looks like the start of a file path
95 if (
96 part.startswith(".") # Hidden directories like .github, .claude
97 or "." in part # Files with extensions
98 or part in ["src", "lib", "bin", "scripts", "docs", "test", "tests"]
99 ): # Common directories
100 found_path_start = True
101 path_parts = parts[i:]
102 else:
103 branch_parts.append(part)
104
105 # If we didn't find an obvious path start, treat the last part as the path
106 if not path_parts and parts:
107 branch_parts = parts[:-1] if len(parts) > 1 else parts
108 path_parts = parts[-1:] if len(parts) > 1 else []
109
110 branch = "/".join(branch_parts) if branch_parts else "main"
111 path = "/".join(path_parts)
112
113 # URL-encode the branch name to handle slashes
114 encoded_branch = quote(branch, safe="")
115 api_url = (
116 f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={encoded_branch}"
117 )
118 return api_url, True, owner, repo
119
120 # Check if it's a repository root URL
121 github_repo_pattern = r"https://github\.com/([^/]+)/([^/]+)(?:/.*)?$"
122 match = re.match(github_repo_pattern, url)
123 if match:
124 owner, repo = match.groups()
125 repo = _normalize_repo_name(repo)
126 api_url = f"https://api.github.com/repos/{owner}/{repo}"
127 return api_url, True, owner, repo

Calls 1

_normalize_repo_nameFunction · 0.85