Pull down a URL, returning a UrlResult object.
(
url: str,
domain_lock_enabled: bool = True,
headers: dict[str, str] | None = None,
allow_redirects: bool = True,
verify_ssl: bool = False,
timeout: float = settings.SENTRY_SOURCE_FETCH_SOCKET_TIMEOUT,
**kwargs: Any,
)
| 148 | |
| 149 | |
| 150 | def fetch_file( |
| 151 | url: str, |
| 152 | domain_lock_enabled: bool = True, |
| 153 | headers: dict[str, str] | None = None, |
| 154 | allow_redirects: bool = True, |
| 155 | verify_ssl: bool = False, |
| 156 | timeout: float = settings.SENTRY_SOURCE_FETCH_SOCKET_TIMEOUT, |
| 157 | **kwargs: Any, |
| 158 | ) -> UrlResult: |
| 159 | """ |
| 160 | Pull down a URL, returning a UrlResult object. |
| 161 | """ |
| 162 | # lock down domains that are problematic |
| 163 | if domain_lock_enabled: |
| 164 | domain_key = get_domain_key(url) |
| 165 | domain_result = cache.get(domain_key) |
| 166 | if domain_result: |
| 167 | domain_result["url"] = url |
| 168 | raise CannotFetch(domain_result) |
| 169 | |
| 170 | logger.debug("Fetching %r from the internet", url) |
| 171 | |
| 172 | with contextlib.ExitStack() as ctx: |
| 173 | http_session = ctx.enter_context(SafeSession()) |
| 174 | |
| 175 | try: |
| 176 | start = time.monotonic() |
| 177 | response = ctx.enter_context( |
| 178 | http_session.get( |
| 179 | url, |
| 180 | allow_redirects=allow_redirects, |
| 181 | verify=verify_ssl, |
| 182 | headers=headers, |
| 183 | timeout=timeout, |
| 184 | stream=True, |
| 185 | **kwargs, |
| 186 | ) |
| 187 | ) |
| 188 | |
| 189 | try: |
| 190 | cl = int(response.headers["content-length"]) |
| 191 | except (LookupError, ValueError): |
| 192 | cl = 0 |
| 193 | if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE: |
| 194 | raise OverflowError() |
| 195 | |
| 196 | outfile = BytesIO() |
| 197 | |
| 198 | cl = 0 |
| 199 | |
| 200 | # Only need to even attempt to read the response body if we |
| 201 | # got a 200 OK |
| 202 | if response.status_code == 200: |
| 203 | for chunk in response.iter_content(16 * 1024): |
| 204 | if time.monotonic() - start > settings.SENTRY_SOURCE_FETCH_TIMEOUT: |
| 205 | raise Timeout() |
| 206 | outfile.write(chunk) |
| 207 | cl += len(chunk) |
nothing calls this directly
no test coverage detected