(crawler)
| 76 | print_result(result) |
| 77 | |
| 78 | def add_chunking_strategy(crawler): |
| 79 | # Adding a chunking strategy: RegexChunking |
| 80 | cprint("\n🧩 [bold cyan]Let's add a chunking strategy: RegexChunking![/bold cyan]", True) |
| 81 | cprint("RegexChunking is a simple chunking strategy that splits the text based on a given regex pattern. Let's see it in action!") |
| 82 | result = crawler.run( |
| 83 | url="https://www.nbcnews.com/business", |
| 84 | chunking_strategy=RegexChunking(patterns=["\n\n"]) |
| 85 | ) |
| 86 | cprint("[LOG] 📦 [bold yellow]RegexChunking result:[/bold yellow]") |
| 87 | print_result(result) |
| 88 | |
| 89 | # Adding another chunking strategy: NlpSentenceChunking |
| 90 | cprint("\n🔍 [bold cyan]Time to explore another chunking strategy: NlpSentenceChunking![/bold cyan]", True) |
| 91 | cprint("NlpSentenceChunking uses NLP techniques to split the text into sentences. Let's see how it performs!") |
| 92 | result = crawler.run( |
| 93 | url="https://www.nbcnews.com/business", |
| 94 | chunking_strategy=NlpSentenceChunking() |
| 95 | ) |
| 96 | cprint("[LOG] 📦 [bold yellow]NlpSentenceChunking result:[/bold yellow]") |
| 97 | print_result(result) |
| 98 | |
| 99 | def add_extraction_strategy(crawler): |
| 100 | # Adding an extraction strategy: CosineStrategy |
no test coverage detected
searching dependent graphs…