Create a brain from a list of file paths. Args: name (str): The name of the brain. file_paths (list[str | Path]): The list of file paths to add to the brain. vector_db (VectorStore | None): The vector store used to store the processed files.
(
cls,
*,
name: str,
file_paths: list[str | Path],
vector_db: VectorStore | None = None,
storage: StorageBase = TransparentStorage(),
llm: LLMEndpoint | None = None,
embedder: Embeddings | None = None,
skip_file_error: bool = False,
processor_kwargs: dict[str, Any] | None = None,
)
| 298 | |
| 299 | @classmethod |
| 300 | async def afrom_files( |
| 301 | cls, |
| 302 | *, |
| 303 | name: str, |
| 304 | file_paths: list[str | Path], |
| 305 | vector_db: VectorStore | None = None, |
| 306 | storage: StorageBase = TransparentStorage(), |
| 307 | llm: LLMEndpoint | None = None, |
| 308 | embedder: Embeddings | None = None, |
| 309 | skip_file_error: bool = False, |
| 310 | processor_kwargs: dict[str, Any] | None = None, |
| 311 | ): |
| 312 | """ |
| 313 | Create a brain from a list of file paths. |
| 314 | Args: |
| 315 | name (str): The name of the brain. |
| 316 | file_paths (list[str | Path]): The list of file paths to add to the brain. |
| 317 | vector_db (VectorStore | None): The vector store used to store the processed files. |
| 318 | storage (StorageBase): The storage used to store the files. |
| 319 | llm (LLMEndpoint | None): The language model used to generate the answer. |
| 320 | embedder (Embeddings | None): The embeddings used to create the index of the processed files. |
| 321 | skip_file_error (bool): Whether to skip files that cannot be processed. |
| 322 | processor_kwargs (dict[str, Any] | None): Additional arguments for the processor. |
| 323 | Returns: |
| 324 | Brain: The brain created from the file paths. |
| 325 | Example: |
| 326 | ```python |
| 327 | brain = await Brain.afrom_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"]) |
| 328 | brain.print_info() |
| 329 | ``` |
| 330 | """ |
| 331 | if llm is None: |
| 332 | llm = default_llm() |
| 333 | |
| 334 | if embedder is None: |
| 335 | embedder = default_embedder() |
| 336 | |
| 337 | processor_kwargs = processor_kwargs or {} |
| 338 | |
| 339 | brain_id = uuid4() |
| 340 | |
| 341 | # TODO: run in parallel using tasks |
| 342 | |
| 343 | for path in file_paths: |
| 344 | file = await load_qfile(brain_id, path) |
| 345 | await storage.upload_file(file) |
| 346 | |
| 347 | logger.debug(f"uploaded all files to {storage}") |
| 348 | |
| 349 | # Parse files |
| 350 | docs = await process_files( |
| 351 | storage=storage, |
| 352 | skip_file_error=skip_file_error, |
| 353 | **processor_kwargs, |
| 354 | ) |
| 355 | |
| 356 | # Building brain's vectordb |
| 357 | if vector_db is None: |