| 194 | self.delete_collection(collection_name) |
| 195 | |
| 196 | def add_documents(self, documents: Sequence[Document]) -> None: |
| 197 | super().maybe_add_ids(documents) |
| 198 | colls = self.list_collections(empty=True) |
| 199 | if len(documents) == 0: |
| 200 | return |
| 201 | embedding_vecs = self.embedding_fn([doc.content for doc in documents]) |
| 202 | coll_name = self.config.collection_name |
| 203 | if coll_name is None: |
| 204 | raise ValueError("No collection name set, cannot ingest docs") |
| 205 | # self._maybe_set_doc_class_schema(documents[0]) |
| 206 | table_exists = False |
| 207 | if ( |
| 208 | coll_name in colls |
| 209 | and self.client.open_table(coll_name).head(1).shape[0] > 0 |
| 210 | ): |
| 211 | # collection exists and is not empty: |
| 212 | # if replace_collection is True, we'll overwrite the existing collection, |
| 213 | # else we'll append to it. |
| 214 | if self.config.replace_collection: |
| 215 | self.client.drop_table(coll_name) |
| 216 | else: |
| 217 | table_exists = True |
| 218 | |
| 219 | ids = [str(d.id()) for d in documents] |
| 220 | # don't insert all at once, batch in chunks of b, |
| 221 | # else we get an API error |
| 222 | b = self.config.batch_size |
| 223 | |
| 224 | def make_batches() -> Generator[List[Dict[str, Any]], None, None]: |
| 225 | for i in range(0, len(ids), b): |
| 226 | batch = [ |
| 227 | dict( |
| 228 | id=ids[i + j], |
| 229 | vector=embedding_vecs[i + j], |
| 230 | **doc.model_dump(), |
| 231 | ) |
| 232 | for j, doc in enumerate(documents[i : i + b]) |
| 233 | ] |
| 234 | yield batch |
| 235 | |
| 236 | try: |
| 237 | if table_exists: |
| 238 | tbl = self.client.open_table(coll_name) |
| 239 | tbl.add(make_batches()) |
| 240 | else: |
| 241 | batch_gen = make_batches() |
| 242 | batch = next(batch_gen) |
| 243 | # use first batch to create table... |
| 244 | tbl = self.client.create_table( |
| 245 | coll_name, |
| 246 | data=batch, |
| 247 | mode="create", |
| 248 | ) |
| 249 | # ... and add the rest |
| 250 | tbl.add(batch_gen) |
| 251 | except Exception as e: |
| 252 | logger.error( |
| 253 | f""" |