Load, parse, and chunk a document. Args: source: File path, raw text, bytes, etc. source_id: Document identifier. source_type: Optional type hint. source_column: The column containing the document sources. Returns:
(
self,
source: Any,
source_id: str,
source_column: str,
source_type: Optional[str] = None,
)
| 28 | |
| 29 | @abstractmethod |
| 30 | def load_parse_and_chunk( |
| 31 | self, |
| 32 | source: Any, |
| 33 | source_id: str, |
| 34 | source_column: str, |
| 35 | source_type: Optional[str] = None, |
| 36 | ) -> list[dict]: |
| 37 | """ |
| 38 | Load, parse, and chunk a document. |
| 39 | |
| 40 | Args: |
| 41 | source: File path, raw text, bytes, etc. |
| 42 | source_id: Document identifier. |
| 43 | source_type: Optional type hint. |
| 44 | source_column: The column containing the document sources. |
| 45 | |
| 46 | Returns: |
| 47 | List of chunk dicts with keys: |
| 48 | - chunk_id: str |
| 49 | - original_id: str |
| 50 | - text: str |
| 51 | - chunk_index: int |
| 52 | - (any additional metadata) |
| 53 | """ |
| 54 | pass |
| 55 | |
| 56 | def chunk_dataframe( |
| 57 | self, |