| 17 | |
| 18 | # Regex-based chunking |
| 19 | class RegexChunking(ChunkingStrategy): |
| 20 | def __init__(self, patterns=None, **kwargs): |
| 21 | if patterns is None: |
| 22 | patterns = [r'\n\n'] # Default split pattern |
| 23 | self.patterns = patterns |
| 24 | |
| 25 | def chunk(self, text: str) -> list: |
| 26 | paragraphs = [text] |
| 27 | for pattern in self.patterns: |
| 28 | new_paragraphs = [] |
| 29 | for paragraph in paragraphs: |
| 30 | new_paragraphs.extend(re.split(pattern, paragraph)) |
| 31 | paragraphs = new_paragraphs |
| 32 | return paragraphs |
| 33 | |
| 34 | # NLP-based sentence chunking |
| 35 | class NlpSentenceChunking(ChunkingStrategy): |
no outgoing calls
searching dependent graphs…