MCPcopy
hub / github.com/ArchiveBox/ArchiveBox / download_url

Function download_url

archivebox/util.py:165–182  ·  view source on GitHub ↗

Download the contents of a remote url and return the text

(url: str, timeout: int=None)

Source from the content-addressed store, hash-verified

163
164@enforce_types
165def download_url(url: str, timeout: int=None) -> str:
166 """Download the contents of a remote url and return the text"""
167 from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
168 timeout = timeout or TIMEOUT
169 response = requests.get(
170 url,
171 headers={'User-Agent': WGET_USER_AGENT},
172 verify=CHECK_SSL_VALIDITY,
173 timeout=timeout,
174 )
175
176 content_type = response.headers.get('Content-Type', '')
177 encoding = http_content_type_encoding(content_type) or html_body_declared_encoding(response.text)
178
179 if encoding is not None:
180 response.encoding = encoding
181
182 return response.text
183
184@enforce_types
185def get_headers(url: str, timeout: int=None) -> str:

Callers 2

save_file_as_sourceFunction · 0.85
get_htmlFunction · 0.85

Calls 1

getMethod · 0.45

Tested by

no test coverage detected