Method
load_parse_and_chunk
(
self,
source: Any,
source_id: str,
source_column: str,
source_type: Optional[str] = None,
)
Source from the content-addressed store, hash-verified
| 91 | """Default chunker for plain text. Chunks by word count.""" |
| 92 | |
| 93 | def load_parse_and_chunk( |
| 94 | self, |
| 95 | source: Any, |
| 96 | source_id: str, |
| 97 | source_column: str, |
| 98 | source_type: Optional[str] = None, |
| 99 | ) -> list[dict]: |
| 100 | # Load |
| 101 | text = self._load(source) |
| 102 | |
| 103 | # Chunk by words |
| 104 | return self._chunk_by_words(text, source_id, source_column) |
| 105 | |
| 106 | def _load(self, source: Any) -> str: |
| 107 | from pathlib import Path |