MCPcopy
hub / github.com/wkentaro/gdown / _parse_embedded_folder_view

Function _parse_embedded_folder_view

gdown/download_folder.py:205–268  ·  view source on GitHub ↗
(
    sess: requests.Session,
    folder_id: str,
    verify: bool | str = True,
)

Source from the content-addressed store, hash-verified

203
204
205def _parse_embedded_folder_view(
206 sess: requests.Session,
207 folder_id: str,
208 verify: bool | str = True,
209) -> tuple[str, list[tuple[str, str, str]]]:
210 params = urllib.parse.urlencode({"id": folder_id})
211 url = f"https://drive.google.com/embeddedfolderview?{params}"
212 res = sess.get(url, verify=verify)
213 if res.status_code != 200:
214 raise DownloadError(
215 f"Failed to retrieve folder contents for folder ID: {folder_id} "
216 f"(status code {res.status_code}). "
217 "You may need to change the permission to "
218 "'Anyone with the link', or have had many accesses. "
219 "Check FAQ in https://github.com/wkentaro/gdown?tab=readme-ov-file#faq.",
220 )
221
222 soup = bs4.BeautifulSoup(res.text, features="html.parser")
223
224 if soup.title is None or soup.title.string is None:
225 raise DownloadError(
226 f"Failed to parse folder contents for folder ID: {folder_id}. "
227 "The page structure may have changed.",
228 )
229 folder_name = soup.title.string
230
231 children: list[tuple[str, str, str]] = []
232 for a_tag in soup.find_all(name="a"):
233 href = a_tag.get("href", "")
234 if not isinstance(href, str):
235 continue
236
237 file_match = re.match(
238 pattern=r"https://drive\.google\.com/file/d/([-\w]{25,})/view",
239 string=href,
240 )
241 if file_match:
242 file_id = file_match.group(1)
243 file_name = a_tag.get_text(strip=True)
244 children.append((file_id, file_name, "application/octet-stream"))
245 continue
246
247 # Google-native files (Docs, Sheets, Slides) use docs.google.com
248 docs_match = re.match(
249 pattern=r"https://docs\.google\.com/\w+/d/([-\w]{25,})/",
250 string=href,
251 )
252 if docs_match:
253 file_id = docs_match.group(1)
254 file_name = a_tag.get_text(strip=True)
255 children.append((file_id, file_name, "application/octet-stream"))
256 continue
257
258 folder_match = re.match(
259 pattern=r"https://drive\.google\.com/drive/folders/([-\w]{25,})",
260 string=href,
261 )
262 if folder_match:

Calls 1

DownloadErrorClass · 0.85

Used in the wild real call sites across dependent graphs

searching dependent graphs…