| 578 | s3url: str = Field(..., description="URL of the file.") |
| 579 | |
| 580 | def download(self, outdir: Path, chunk_size: int = _DEFAULT_CHUNK_SIZE) -> Path: |
| 581 | # SEC-316: `self.name` comes from the (potentially compromised or |
| 582 | # MITM'd) Composio API response. Strip directory components with |
| 583 | # `Path(...).name` so traversal sequences like `../../../foo` collapse |
| 584 | # to `foo`, then verify the resolved output stays under `outdir` so a |
| 585 | # name like `output_evil/foo` (sibling-prefix attack) is also rejected. |
| 586 | safe_name = Path(self.name).name |
| 587 | outfile = outdir / safe_name |
| 588 | if not outfile.resolve().is_relative_to(outdir.resolve()): |
| 589 | raise ErrorDownloadingFile( |
| 590 | f"Path traversal detected: filename '{self.name}' resolves " |
| 591 | "outside the intended output directory." |
| 592 | ) |
| 593 | outdir.mkdir(exist_ok=True, parents=True) |
| 594 | try: |
| 595 | response = requests.get( |
| 596 | url=self.s3url, |
| 597 | stream=True, |
| 598 | timeout=(_CONNECT_TIMEOUT, _READ_TIMEOUT), |
| 599 | ) |
| 600 | except requests.exceptions.RequestException as e: |
| 601 | raise ErrorDownloadingFile( |
| 602 | "Error downloading file: " |
| 603 | f"{_sanitize_url_for_logging(self.s3url)}. Error: {type(e).__name__}" |
| 604 | ) from e |
| 605 | if response.status_code != 200: |
| 606 | response.close() |
| 607 | raise ErrorDownloadingFile( |
| 608 | f"Error downloading file: {_sanitize_url_for_logging(self.s3url)}" |
| 609 | ) |
| 610 | |
| 611 | try: |
| 612 | with outfile.open("wb") as fd: |
| 613 | for chunk in response.iter_content(chunk_size=chunk_size): |
| 614 | fd.write(chunk) |
| 615 | except requests.exceptions.RequestException as e: |
| 616 | raise ErrorDownloadingFile( |
| 617 | "Error downloading file: " |
| 618 | f"{_sanitize_url_for_logging(self.s3url)}. Error: {type(e).__name__}" |
| 619 | ) from e |
| 620 | finally: |
| 621 | response.close() |
| 622 | return outfile |
| 623 | |
| 624 | |
| 625 | # Internal alias — ``FileHelper`` receives the already-adapted context-form |