Manages the creation, loading, transformation, and persistence of LocalDB instances.
| 718 | raise ValueError("Unsupported repository type. Only GitHub, GitLab, and Bitbucket are supported.") |
| 719 | |
| 720 | class DatabaseManager: |
| 721 | """ |
| 722 | Manages the creation, loading, transformation, and persistence of LocalDB instances. |
| 723 | """ |
| 724 | |
| 725 | def __init__(self): |
| 726 | self.db = None |
| 727 | self.repo_url_or_path = None |
| 728 | self.repo_paths = None |
| 729 | |
| 730 | def prepare_database(self, repo_url_or_path: str, repo_type: str = None, access_token: str = None, |
| 731 | embedder_type: str = None, is_ollama_embedder: bool = None, |
| 732 | excluded_dirs: List[str] = None, excluded_files: List[str] = None, |
| 733 | included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]: |
| 734 | """ |
| 735 | Create a new database from the repository. |
| 736 | |
| 737 | Args: |
| 738 | repo_type(str): Type of repository |
| 739 | repo_url_or_path (str): The URL or local path of the repository |
| 740 | access_token (str, optional): Access token for private repositories |
| 741 | embedder_type (str, optional): Embedder type to use ('openai', 'google', 'ollama'). |
| 742 | If None, will be determined from configuration. |
| 743 | is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead. |
| 744 | If None, will be determined from configuration. |
| 745 | excluded_dirs (List[str], optional): List of directories to exclude from processing |
| 746 | excluded_files (List[str], optional): List of file patterns to exclude from processing |
| 747 | included_dirs (List[str], optional): List of directories to include exclusively |
| 748 | included_files (List[str], optional): List of file patterns to include exclusively |
| 749 | |
| 750 | Returns: |
| 751 | List[Document]: List of Document objects |
| 752 | """ |
| 753 | # Handle backward compatibility |
| 754 | if embedder_type is None and is_ollama_embedder is not None: |
| 755 | embedder_type = 'ollama' if is_ollama_embedder else None |
| 756 | |
| 757 | self.reset_database() |
| 758 | self._create_repo(repo_url_or_path, repo_type, access_token) |
| 759 | return self.prepare_db_index(embedder_type=embedder_type, excluded_dirs=excluded_dirs, excluded_files=excluded_files, |
| 760 | included_dirs=included_dirs, included_files=included_files) |
| 761 | |
| 762 | def reset_database(self): |
| 763 | """ |
| 764 | Reset the database to its initial state. |
| 765 | """ |
| 766 | self.db = None |
| 767 | self.repo_url_or_path = None |
| 768 | self.repo_paths = None |
| 769 | |
| 770 | def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) -> str: |
| 771 | # Extract owner and repo name to create unique identifier |
| 772 | url_parts = repo_url_or_path.rstrip('/').split('/') |
| 773 | |
| 774 | if repo_type in ["github", "gitlab", "bitbucket"] and len(url_parts) >= 5: |
| 775 | # GitHub URL format: https://github.com/owner/repo |
| 776 | # GitLab URL format: https://gitlab.com/owner/repo or https://gitlab.com/group/subgroup/repo |
| 777 | # Bitbucket URL format: https://bitbucket.org/owner/repo |
no outgoing calls