MCPcopy
hub / github.com/feast-dev/feast / chunk_dataframe

Method chunk_dataframe

sdk/python/feast/chunker.py:56–87  ·  view source on GitHub ↗

Chunk all documents in a DataFrame. Args: df: The DataFrame containing the documents to chunk. id_column: The column containing the document IDs. source_column: The column containing the document sources. type_column: The column conta

(
        self,
        df: pd.DataFrame,
        id_column: str,
        source_column: str,
        type_column: Optional[str] = None,
    )

Source from the content-addressed store, hash-verified

54 pass
55
56 def chunk_dataframe(
57 self,
58 df: pd.DataFrame,
59 id_column: str,
60 source_column: str,
61 type_column: Optional[str] = None,
62 ) -> pd.DataFrame:
63 """
64 Chunk all documents in a DataFrame.
65
66 Args:
67 df: The DataFrame containing the documents to chunk.
68 id_column: The column containing the document IDs.
69 source_column: The column containing the document sources.
70 type_column: The column containing the document types.
71 """
72
73 all_chunks = []
74 for row in df.itertuples(index=False):
75 chunks = self.load_parse_and_chunk(
76 getattr(row, source_column),
77 str(getattr(row, id_column)),
78 source_column,
79 getattr(row, type_column) if type_column else None,
80 )
81 all_chunks.extend(chunks)
82
83 if not all_chunks:
84 return pd.DataFrame(
85 columns=["chunk_id", "original_id", source_column, "chunk_index"]
86 )
87 return pd.DataFrame(all_chunks)
88
89
90class TextChunker(BaseChunker):

Callers 2

embed_documentsMethod · 0.80
test_chunk_dataframeMethod · 0.80

Calls 1

load_parse_and_chunkMethod · 0.95

Tested by 1

test_chunk_dataframeMethod · 0.64