MCPcopy
hub / github.com/langroid/langroid / add_documents

Method add_documents

langroid/vector_store/pineconedb.py:240–290  ·  view source on GitHub ↗
(self, documents: Sequence[Document], namespace: str = "")

Source from the content-addressed store, hash-verified

238 logger.error(e)
239
240 def add_documents(self, documents: Sequence[Document], namespace: str = "") -> None:
241 if self.config.collection_name is None:
242 raise ValueError("No collection name set, cannot ingest docs")
243
244 if len(documents) == 0:
245 logger.warning("Empty list of documents passed into add_documents")
246 return
247
248 super().maybe_add_ids(documents)
249 document_dicts = [doc.model_dump() for doc in documents]
250 document_ids = [doc.id() for doc in documents]
251 embedding_vectors = self.embedding_fn([doc.content for doc in documents])
252 vectors = [
253 {
254 "id": document_id,
255 "values": embedding_vector,
256 "metadata": {
257 **document_dict["metadata"],
258 **{
259 key: value
260 for key, value in document_dict.items()
261 if key != "metadata"
262 },
263 },
264 }
265 for document_dict, document_id, embedding_vector in zip(
266 document_dicts, document_ids, embedding_vectors
267 )
268 ]
269
270 if self.config.collection_name not in self.list_collections(empty=True):
271 self.create_collection(
272 collection_name=self.config.collection_name, replace=True
273 )
274
275 index = self.client.Index(name=self.config.collection_name)
276 batch_size = self.config.batch_size
277
278 for i in range(0, len(documents), batch_size):
279 try:
280 if namespace:
281 index.upsert(
282 vectors=vectors[i : i + batch_size], namespace=namespace
283 )
284 else:
285 index.upsert(vectors=vectors[i : i + batch_size])
286 except PineconeApiException as e:
287 logger.error(
288 f"Unable to add of docs between indices {i} and {batch_size}"
289 )
290 logger.error(e)
291
292 def get_all_documents(
293 self, prefix: str = "", namespace: str = ""

Callers 1

vecdbFunction · 0.95

Calls 5

list_collectionsMethod · 0.95
create_collectionMethod · 0.95
maybe_add_idsMethod · 0.80
idMethod · 0.80
embedding_fnMethod · 0.45

Tested by 1

vecdbFunction · 0.76