Add embeddings for multiple columns with modality routing. Args: df: Input DataFrame. column_mapping: Dict mapping source_column -> (modality, output_column). Example: { "text": ("text", "text_embedding"),
(
self,
df: pd.DataFrame,
column_mapping: dict[str, tuple[str, str]],
)
| 83 | pass |
| 84 | |
| 85 | def embed_dataframe( |
| 86 | self, |
| 87 | df: pd.DataFrame, |
| 88 | column_mapping: dict[str, tuple[str, str]], |
| 89 | ) -> pd.DataFrame: |
| 90 | """ |
| 91 | Add embeddings for multiple columns with modality routing. |
| 92 | |
| 93 | Args: |
| 94 | df: Input DataFrame. |
| 95 | column_mapping: Dict mapping source_column -> (modality, output_column). |
| 96 | Example: { |
| 97 | "text": ("text", "text_embedding"), |
| 98 | "image_path": ("image", "image_embedding"), |
| 99 | "video_path": ("video", "video_embedding"), |
| 100 | } |
| 101 | """ |
| 102 | df = df.copy() |
| 103 | |
| 104 | for source_column, (modality, output_column) in column_mapping.items(): |
| 105 | inputs = df[source_column].tolist() |
| 106 | embeddings = self.embed(inputs, modality) |
| 107 | df[output_column] = pd.Series( |
| 108 | [emb.tolist() for emb in embeddings], dtype=object, index=df.index |
| 109 | ) |
| 110 | |
| 111 | return df |
| 112 | |
| 113 | |
| 114 | class MultiModalEmbedder(BaseEmbedder): |