MCPcopy
hub / github.com/alirezamika/autoscraper / _fetch_html

Method _fetch_html

autoscraper/auto_scraper.py:97–111  ·  view source on GitHub ↗
(cls, url, request_args=None)

Source from the content-addressed store, hash-verified

95
96 @classmethod
97 def _fetch_html(cls, url, request_args=None):
98 request_args = request_args or {}
99 headers = dict(cls.request_headers)
100 if url:
101 headers["Host"] = urlparse(url).netloc
102
103 user_headers = request_args.pop("headers", {})
104 headers.update(user_headers)
105 res = requests.get(url, headers=headers, **request_args)
106 if res.encoding == "ISO-8859-1" and not "ISO-8859-1" in res.headers.get(
107 "Content-Type", ""
108 ):
109 res.encoding = res.apparent_encoding
110 html = res.text
111 return html
112
113 @classmethod
114 def _get_soup(cls, url=None, html=None, request_args=None):

Callers 1

_get_soupMethod · 0.80

Calls

no outgoing calls

Tested by

no test coverage detected