| 238 | logger.error(e) |
| 239 | |
| 240 | def add_documents(self, documents: Sequence[Document], namespace: str = "") -> None: |
| 241 | if self.config.collection_name is None: |
| 242 | raise ValueError("No collection name set, cannot ingest docs") |
| 243 | |
| 244 | if len(documents) == 0: |
| 245 | logger.warning("Empty list of documents passed into add_documents") |
| 246 | return |
| 247 | |
| 248 | super().maybe_add_ids(documents) |
| 249 | document_dicts = [doc.model_dump() for doc in documents] |
| 250 | document_ids = [doc.id() for doc in documents] |
| 251 | embedding_vectors = self.embedding_fn([doc.content for doc in documents]) |
| 252 | vectors = [ |
| 253 | { |
| 254 | "id": document_id, |
| 255 | "values": embedding_vector, |
| 256 | "metadata": { |
| 257 | **document_dict["metadata"], |
| 258 | **{ |
| 259 | key: value |
| 260 | for key, value in document_dict.items() |
| 261 | if key != "metadata" |
| 262 | }, |
| 263 | }, |
| 264 | } |
| 265 | for document_dict, document_id, embedding_vector in zip( |
| 266 | document_dicts, document_ids, embedding_vectors |
| 267 | ) |
| 268 | ] |
| 269 | |
| 270 | if self.config.collection_name not in self.list_collections(empty=True): |
| 271 | self.create_collection( |
| 272 | collection_name=self.config.collection_name, replace=True |
| 273 | ) |
| 274 | |
| 275 | index = self.client.Index(name=self.config.collection_name) |
| 276 | batch_size = self.config.batch_size |
| 277 | |
| 278 | for i in range(0, len(documents), batch_size): |
| 279 | try: |
| 280 | if namespace: |
| 281 | index.upsert( |
| 282 | vectors=vectors[i : i + batch_size], namespace=namespace |
| 283 | ) |
| 284 | else: |
| 285 | index.upsert(vectors=vectors[i : i + batch_size]) |
| 286 | except PineconeApiException as e: |
| 287 | logger.error( |
| 288 | f"Unable to add of docs between indices {i} and {batch_size}" |
| 289 | ) |
| 290 | logger.error(e) |
| 291 | |
| 292 | def get_all_documents( |
| 293 | self, prefix: str = "", namespace: str = "" |