Process a single repository and create complete index with optional concurrent processing
(self, repo_path: Path)
| 964 | return file_summary, relationships |
| 965 | |
| 966 | async def process_repository(self, repo_path: Path) -> RepoIndex: |
| 967 | """Process a single repository and create complete index with optional concurrent processing""" |
| 968 | repo_name = repo_path.name |
| 969 | self.logger.info(f"Processing repository: {repo_name}") |
| 970 | |
| 971 | # Step 1: Generate file tree |
| 972 | self.logger.info("Generating file tree structure...") |
| 973 | file_tree = self.generate_file_tree(repo_path) |
| 974 | |
| 975 | # Step 2: Get all files |
| 976 | all_files = self.get_all_repo_files(repo_path) |
| 977 | self.logger.info(f"Found {len(all_files)} files in {repo_name}") |
| 978 | |
| 979 | # Step 3: LLM pre-filtering of relevant files |
| 980 | if self.enable_pre_filtering: |
| 981 | self.logger.info("Using LLM for file pre-filtering...") |
| 982 | selected_file_paths = await self.pre_filter_files(repo_path, file_tree) |
| 983 | else: |
| 984 | self.logger.info("Pre-filtering is disabled, will analyze all files") |
| 985 | selected_file_paths = [] |
| 986 | |
| 987 | # Step 4: Filter file list based on filtering results |
| 988 | if selected_file_paths: |
| 989 | files_to_analyze = self.filter_files_by_paths( |
| 990 | all_files, selected_file_paths, repo_path |
| 991 | ) |
| 992 | self.logger.info( |
| 993 | f"After LLM filtering, will analyze {len(files_to_analyze)} relevant files (from {len(all_files)} total)" |
| 994 | ) |
| 995 | else: |
| 996 | files_to_analyze = all_files |
| 997 | self.logger.info("LLM filtering failed, will analyze all files") |
| 998 | |
| 999 | # Step 5: Analyze filtered files (concurrent or sequential) |
| 1000 | if self.enable_concurrent_analysis and len(files_to_analyze) > 1: |
| 1001 | self.logger.info( |
| 1002 | f"Using concurrent analysis with max {self.max_concurrent_files} parallel files" |
| 1003 | ) |
| 1004 | file_summaries, all_relationships = await self._process_files_concurrently( |
| 1005 | files_to_analyze |
| 1006 | ) |
| 1007 | else: |
| 1008 | self.logger.info("Using sequential file analysis") |
| 1009 | file_summaries, all_relationships = await self._process_files_sequentially( |
| 1010 | files_to_analyze |
| 1011 | ) |
| 1012 | |
| 1013 | # Step 6: Create repository index |
| 1014 | repo_index = RepoIndex( |
| 1015 | repo_name=repo_name, |
| 1016 | total_files=len(all_files), # Record original file count |
| 1017 | file_summaries=file_summaries, |
| 1018 | relationships=all_relationships, |
| 1019 | analysis_metadata={ |
| 1020 | "analysis_date": datetime.now().isoformat(), |
| 1021 | "target_structure_analyzed": self.target_structure[:200] + "...", |
| 1022 | "total_relationships_found": len(all_relationships), |
| 1023 | "high_confidence_relationships": len( |
no test coverage detected