MCPcopy
hub / github.com/Aider-AI/aider / scrape

Method scrape

aider/scrape.py:98–122  ·  view source on GitHub ↗

Scrape a url and turn it into readable markdown if it's HTML. If it's plain text or non-HTML, return it as-is. `url` - the URL to scrape.

(self, url)

Source from the content-addressed store, hash-verified

96 self.verify_ssl = verify_ssl
97
98 def scrape(self, url):
99 """
100 Scrape a url and turn it into readable markdown if it's HTML.
101 If it's plain text or non-HTML, return it as-is.
102
103 `url` - the URL to scrape.
104 """
105
106 if self.playwright_available:
107 content, mime_type = self.scrape_with_playwright(url)
108 else:
109 content, mime_type = self.scrape_with_httpx(url)
110
111 if not content:
112 self.print_error(f"Failed to retrieve content from {url}")
113 return None
114
115 # Check if the content is HTML based on MIME type or content
116 if (mime_type and mime_type.startswith("text/html")) or (
117 mime_type is None and self.looks_like_html(content)
118 ):
119 self.try_pandoc()
120 content = self.html_to_markdown(content)
121
122 return content
123
124 def looks_like_html(self, content):
125 """

Callers 9

mainFunction · 0.95
test_scrape_text_htmlMethod · 0.95
cmd_webMethod · 0.45
do_webMethod · 0.45

Calls 5

scrape_with_httpxMethod · 0.95
looks_like_htmlMethod · 0.95
try_pandocMethod · 0.95
html_to_markdownMethod · 0.95