MCPcopy Index your code
hub / github.com/grapeot/devin.cursorrules / main

Function main

tools/web_scraper.py:165–204  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

163 return False
164
165def main():
166 parser = argparse.ArgumentParser(description='Fetch and extract text content from webpages.')
167 parser.add_argument('urls', nargs='+', help='URLs to process')
168 parser.add_argument('--max-concurrent', type=int, default=5,
169 help='Maximum number of concurrent browser instances (default: 5)')
170 parser.add_argument('--debug', action='store_true',
171 help='Enable debug logging')
172
173 args = parser.parse_args()
174
175 if args.debug:
176 logger.setLevel(logging.DEBUG)
177
178 # Validate URLs
179 valid_urls = []
180 for url in args.urls:
181 if validate_url(url):
182 valid_urls.append(url)
183 else:
184 logger.error(f"Invalid URL: {url}")
185
186 if not valid_urls:
187 logger.error("No valid URLs provided")
188 sys.exit(1)
189
190 start_time = time.time()
191 try:
192 results = asyncio.run(process_urls(valid_urls, args.max_concurrent))
193
194 # Print results to stdout
195 for url, text in zip(valid_urls, results):
196 print(f"\n=== Content from {url} ===")
197 print(text)
198 print("=" * 80)
199
200 logger.info(f"Total processing time: {time.time() - start_time:.2f}s")
201
202 except Exception as e:
203 logger.error(f"Error during execution: {str(e)}")
204 sys.exit(1)
205
206if __name__ == '__main__':
207 main()

Callers 1

web_scraper.pyFile · 0.70

Calls 2

validate_urlFunction · 0.85
process_urlsFunction · 0.85

Tested by

no test coverage detected