MCPcopy Index your code
hub / github.com/AsyncFuncAI/deepwiki-open / transform_documents_and_save_to_db

Function transform_documents_and_save_to_db

api/data_pipeline.py:434–458  ·  view source on GitHub ↗

Transforms a list of documents and saves them to a local database. Args: documents (list): A list of `Document` objects. db_path (str): The path to the local database file. embedder_type (str, optional): The embedder type ('openai', 'google', 'ollama').

(
    documents: List[Document], db_path: str, embedder_type: str = None, is_ollama_embedder: bool = None
)

Source from the content-addressed store, hash-verified

432 return data_transformer
433
434def transform_documents_and_save_to_db(
435 documents: List[Document], db_path: str, embedder_type: str = None, is_ollama_embedder: bool = None
436) -> LocalDB:
437 """
438 Transforms a list of documents and saves them to a local database.
439
440 Args:
441 documents (list): A list of `Document` objects.
442 db_path (str): The path to the local database file.
443 embedder_type (str, optional): The embedder type ('openai', 'google', 'ollama').
444 If None, will be determined from configuration.
445 is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead.
446 If None, will be determined from configuration.
447 """
448 # Get the data transformer
449 data_transformer = prepare_data_pipeline(embedder_type, is_ollama_embedder)
450
451 # Save the documents to a local database
452 db = LocalDB()
453 db.register_transformer(transformer=data_transformer, key="split_and_embed")
454 db.load(documents)
455 db.transform(key="split_and_embed")
456 os.makedirs(os.path.dirname(db_path), exist_ok=True)
457 db.save_state(filepath=db_path)
458 return db
459
460def get_github_file_content(repo_url: str, file_path: str, access_token: str = None) -> str:
461 """

Callers 1

prepare_db_indexMethod · 0.85

Calls 1

prepare_data_pipelineFunction · 0.85

Tested by

no test coverage detected