MCPcopy
hub / github.com/feast-dev/feast / embed_documents

Method embed_documents

sdk/python/feast/doc_embedder.py:283–380  ·  view source on GitHub ↗

Embed a list of documents and chunk them into a format expected by the FeatureView schema using a Logic Implementation By the user and save the DataFrame to the online store. Args: documents: DataFrame containing the documents to embed. id_column: Column nam

(
        self,
        documents: pd.DataFrame,
        id_column: str,
        source_column: str,
        type_column: Optional[str] = None,
        column_mapping: Optional[tuple[str, str]] = None,
        custom_schema_transform_fn: Optional[
            Callable[[pd.DataFrame], pd.DataFrame]
        ] = None,
    )

Source from the content-addressed store, hash-verified

281 os.chdir(original_cwd)
282
283 def embed_documents(
284 self,
285 documents: pd.DataFrame,
286 id_column: str,
287 source_column: str,
288 type_column: Optional[str] = None,
289 column_mapping: Optional[tuple[str, str]] = None,
290 custom_schema_transform_fn: Optional[
291 Callable[[pd.DataFrame], pd.DataFrame]
292 ] = None,
293 ) -> pd.DataFrame:
294 """
295 Embed a list of documents and chunk them into a format expected by the FeatureView schema using a Logic Implementation By the user and save the DataFrame to the online store.
296
297 Args:
298 documents: DataFrame containing the documents to embed.
299 id_column: Column name containing the document IDs.
300 source_column: Column name containing the document sources.
301 type_column: Column name containing the document types.
302 column_mapping: Tuple mapping source columns to (modality, output column).
303 custom_schema_transform_fn: Custom schema transform function to use for transforming the output of the chunker and embedder into the format expected by the FeatureView schema.
304 Returns:
305 DataFrame with the embedded documents.
306
307 Example:
308 documents = pd.DataFrame({
309 "id": [1, 2, 3],
310 "source": ["source1", "source2", "source3"],
311 "type": ["type1", "type2", "type3"],
312 "text": ["text1", "text2", "text3"],
313 })
314 column_mapping = ("text", "text_embedding")
315 df = embed_documents(documents=documents, id_column="id", source_column="source", type_column="type", column_mapping=column_mapping)
316
317 """
318 if custom_schema_transform_fn is not None:
319 sig = inspect.signature(custom_schema_transform_fn)
320 params = list(sig.parameters.values())
321 if (
322 len(params) != 1
323 or params[0].annotation != pd.DataFrame
324 or sig.return_annotation != pd.DataFrame
325 ):
326 raise ValueError(
327 "custom_schema_transform_fn must be a function that takes a DataFrame and returns a DataFrame"
328 )
329 current_schema_transform_fn = (
330 custom_schema_transform_fn
331 if custom_schema_transform_fn is not None
332 else self.schema_transform_fn
333 )
334
335 if column_mapping is None:
336 column_mapping = ("text", "text_embedding")
337
338 if (
339 current_schema_transform_fn is default_schema_transform_fn
340 and column_mapping[0] == "text"

Calls 4

save_to_online_storeMethod · 0.95
chunk_dataframeMethod · 0.80
embed_dataframeMethod · 0.80
valuesMethod · 0.45