Prepare the indexed database for the repository. Args: embedder_type (str, optional): Embedder type to use ('openai', 'google', 'ollama'). If None, will be determined from configuration. is_ollama_embedder (bool, opti
(self, embedder_type: str = None, is_ollama_embedder: bool = None,
excluded_dirs: List[str] = None, excluded_files: List[str] = None,
included_dirs: List[str] = None, included_files: List[str] = None)
| 837 | raise |
| 838 | |
| 839 | def prepare_db_index(self, embedder_type: str = None, is_ollama_embedder: bool = None, |
| 840 | excluded_dirs: List[str] = None, excluded_files: List[str] = None, |
| 841 | included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]: |
| 842 | """ |
| 843 | Prepare the indexed database for the repository. |
| 844 | |
| 845 | Args: |
| 846 | embedder_type (str, optional): Embedder type to use ('openai', 'google', 'ollama'). |
| 847 | If None, will be determined from configuration. |
| 848 | is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead. |
| 849 | If None, will be determined from configuration. |
| 850 | excluded_dirs (List[str], optional): List of directories to exclude from processing |
| 851 | excluded_files (List[str], optional): List of file patterns to exclude from processing |
| 852 | included_dirs (List[str], optional): List of directories to include exclusively |
| 853 | included_files (List[str], optional): List of file patterns to include exclusively |
| 854 | |
| 855 | Returns: |
| 856 | List[Document]: List of Document objects |
| 857 | """ |
| 858 | def _embedding_vector_length(doc: Document) -> int: |
| 859 | vector = getattr(doc, "vector", None) |
| 860 | if vector is None: |
| 861 | return 0 |
| 862 | try: |
| 863 | if hasattr(vector, "shape"): |
| 864 | if len(vector.shape) == 0: |
| 865 | return 0 |
| 866 | return int(vector.shape[-1]) |
| 867 | if hasattr(vector, "__len__"): |
| 868 | return int(len(vector)) |
| 869 | except Exception: |
| 870 | return 0 |
| 871 | return 0 |
| 872 | |
| 873 | # Handle backward compatibility |
| 874 | if embedder_type is None and is_ollama_embedder is not None: |
| 875 | embedder_type = 'ollama' if is_ollama_embedder else None |
| 876 | # check the database |
| 877 | if self.repo_paths and os.path.exists(self.repo_paths["save_db_file"]): |
| 878 | logger.info("Loading existing database...") |
| 879 | try: |
| 880 | self.db = LocalDB.load_state(self.repo_paths["save_db_file"]) |
| 881 | documents = self.db.get_transformed_data(key="split_and_embed") |
| 882 | if documents: |
| 883 | lengths = [_embedding_vector_length(doc) for doc in documents] |
| 884 | non_empty = sum(1 for n in lengths if n > 0) |
| 885 | empty = len(lengths) - non_empty |
| 886 | sample_sizes = sorted({n for n in lengths if n > 0})[:3] |
| 887 | logger.info( |
| 888 | "Loaded %s documents from existing database (embeddings: %s non-empty, %s empty; sample_dims=%s)", |
| 889 | len(documents), |
| 890 | non_empty, |
| 891 | empty, |
| 892 | sample_sizes, |
| 893 | ) |
| 894 | |
| 895 | if non_empty == 0: |
| 896 | logger.warning( |
no test coverage detected