()
| 163 | return False |
| 164 | |
| 165 | def main(): |
| 166 | parser = argparse.ArgumentParser(description='Fetch and extract text content from webpages.') |
| 167 | parser.add_argument('urls', nargs='+', help='URLs to process') |
| 168 | parser.add_argument('--max-concurrent', type=int, default=5, |
| 169 | help='Maximum number of concurrent browser instances (default: 5)') |
| 170 | parser.add_argument('--debug', action='store_true', |
| 171 | help='Enable debug logging') |
| 172 | |
| 173 | args = parser.parse_args() |
| 174 | |
| 175 | if args.debug: |
| 176 | logger.setLevel(logging.DEBUG) |
| 177 | |
| 178 | # Validate URLs |
| 179 | valid_urls = [] |
| 180 | for url in args.urls: |
| 181 | if validate_url(url): |
| 182 | valid_urls.append(url) |
| 183 | else: |
| 184 | logger.error(f"Invalid URL: {url}") |
| 185 | |
| 186 | if not valid_urls: |
| 187 | logger.error("No valid URLs provided") |
| 188 | sys.exit(1) |
| 189 | |
| 190 | start_time = time.time() |
| 191 | try: |
| 192 | results = asyncio.run(process_urls(valid_urls, args.max_concurrent)) |
| 193 | |
| 194 | # Print results to stdout |
| 195 | for url, text in zip(valid_urls, results): |
| 196 | print(f"\n=== Content from {url} ===") |
| 197 | print(text) |
| 198 | print("=" * 80) |
| 199 | |
| 200 | logger.info(f"Total processing time: {time.time() - start_time:.2f}s") |
| 201 | |
| 202 | except Exception as e: |
| 203 | logger.error(f"Error during execution: {str(e)}") |
| 204 | sys.exit(1) |
| 205 | |
| 206 | if __name__ == '__main__': |
| 207 | main() |
no test coverage detected