(
sess: requests.Session,
folder_id: str,
verify: bool | str = True,
)
| 203 | |
| 204 | |
| 205 | def _parse_embedded_folder_view( |
| 206 | sess: requests.Session, |
| 207 | folder_id: str, |
| 208 | verify: bool | str = True, |
| 209 | ) -> tuple[str, list[tuple[str, str, str]]]: |
| 210 | params = urllib.parse.urlencode({"id": folder_id}) |
| 211 | url = f"https://drive.google.com/embeddedfolderview?{params}" |
| 212 | res = sess.get(url, verify=verify) |
| 213 | if res.status_code != 200: |
| 214 | raise DownloadError( |
| 215 | f"Failed to retrieve folder contents for folder ID: {folder_id} " |
| 216 | f"(status code {res.status_code}). " |
| 217 | "You may need to change the permission to " |
| 218 | "'Anyone with the link', or have had many accesses. " |
| 219 | "Check FAQ in https://github.com/wkentaro/gdown?tab=readme-ov-file#faq.", |
| 220 | ) |
| 221 | |
| 222 | soup = bs4.BeautifulSoup(res.text, features="html.parser") |
| 223 | |
| 224 | if soup.title is None or soup.title.string is None: |
| 225 | raise DownloadError( |
| 226 | f"Failed to parse folder contents for folder ID: {folder_id}. " |
| 227 | "The page structure may have changed.", |
| 228 | ) |
| 229 | folder_name = soup.title.string |
| 230 | |
| 231 | children: list[tuple[str, str, str]] = [] |
| 232 | for a_tag in soup.find_all(name="a"): |
| 233 | href = a_tag.get("href", "") |
| 234 | if not isinstance(href, str): |
| 235 | continue |
| 236 | |
| 237 | file_match = re.match( |
| 238 | pattern=r"https://drive\.google\.com/file/d/([-\w]{25,})/view", |
| 239 | string=href, |
| 240 | ) |
| 241 | if file_match: |
| 242 | file_id = file_match.group(1) |
| 243 | file_name = a_tag.get_text(strip=True) |
| 244 | children.append((file_id, file_name, "application/octet-stream")) |
| 245 | continue |
| 246 | |
| 247 | # Google-native files (Docs, Sheets, Slides) use docs.google.com |
| 248 | docs_match = re.match( |
| 249 | pattern=r"https://docs\.google\.com/\w+/d/([-\w]{25,})/", |
| 250 | string=href, |
| 251 | ) |
| 252 | if docs_match: |
| 253 | file_id = docs_match.group(1) |
| 254 | file_name = a_tag.get_text(strip=True) |
| 255 | children.append((file_id, file_name, "application/octet-stream")) |
| 256 | continue |
| 257 | |
| 258 | folder_match = re.match( |
| 259 | pattern=r"https://drive\.google\.com/drive/folders/([-\w]{25,})", |
| 260 | string=href, |
| 261 | ) |
| 262 | if folder_match: |
searching dependent graphs…