MCPcopy
hub / github.com/ArchiveBox/ArchiveBox / get_headers

Function get_headers

archivebox/util.py:185–217  ·  view source on GitHub ↗

Download the contents of a remote url and return the headers

(url: str, timeout: int=None)

Source from the content-addressed store, hash-verified

183
184@enforce_types
185def get_headers(url: str, timeout: int=None) -> str:
186 """Download the contents of a remote url and return the headers"""
187 from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
188 timeout = timeout or TIMEOUT
189
190 try:
191 response = requests.head(
192 url,
193 headers={'User-Agent': WGET_USER_AGENT},
194 verify=CHECK_SSL_VALIDITY,
195 timeout=timeout,
196 allow_redirects=True,
197 )
198 if response.status_code >= 400:
199 raise RequestException
200 except ReadTimeout:
201 raise
202 except RequestException:
203 response = requests.get(
204 url,
205 headers={'User-Agent': WGET_USER_AGENT},
206 verify=CHECK_SSL_VALIDITY,
207 timeout=timeout,
208 stream=True
209 )
210
211 return pyjson.dumps(
212 {
213 'Status-Code': response.status_code,
214 **dict(response.headers),
215 },
216 indent=4,
217 )
218
219
220@enforce_types

Callers 1

save_headersFunction · 0.85

Calls 1

getMethod · 0.45

Tested by

no test coverage detected