MCPcopy Index your code
hub / github.com/HKUDS/DeepCode / process_repository

Method process_repository

tools/code_indexer.py:966–1048  ·  view source on GitHub ↗

Process a single repository and create complete index with optional concurrent processing

(self, repo_path: Path)

Source from the content-addressed store, hash-verified

964 return file_summary, relationships
965
966 async def process_repository(self, repo_path: Path) -> RepoIndex:
967 """Process a single repository and create complete index with optional concurrent processing"""
968 repo_name = repo_path.name
969 self.logger.info(f"Processing repository: {repo_name}")
970
971 # Step 1: Generate file tree
972 self.logger.info("Generating file tree structure...")
973 file_tree = self.generate_file_tree(repo_path)
974
975 # Step 2: Get all files
976 all_files = self.get_all_repo_files(repo_path)
977 self.logger.info(f"Found {len(all_files)} files in {repo_name}")
978
979 # Step 3: LLM pre-filtering of relevant files
980 if self.enable_pre_filtering:
981 self.logger.info("Using LLM for file pre-filtering...")
982 selected_file_paths = await self.pre_filter_files(repo_path, file_tree)
983 else:
984 self.logger.info("Pre-filtering is disabled, will analyze all files")
985 selected_file_paths = []
986
987 # Step 4: Filter file list based on filtering results
988 if selected_file_paths:
989 files_to_analyze = self.filter_files_by_paths(
990 all_files, selected_file_paths, repo_path
991 )
992 self.logger.info(
993 f"After LLM filtering, will analyze {len(files_to_analyze)} relevant files (from {len(all_files)} total)"
994 )
995 else:
996 files_to_analyze = all_files
997 self.logger.info("LLM filtering failed, will analyze all files")
998
999 # Step 5: Analyze filtered files (concurrent or sequential)
1000 if self.enable_concurrent_analysis and len(files_to_analyze) > 1:
1001 self.logger.info(
1002 f"Using concurrent analysis with max {self.max_concurrent_files} parallel files"
1003 )
1004 file_summaries, all_relationships = await self._process_files_concurrently(
1005 files_to_analyze
1006 )
1007 else:
1008 self.logger.info("Using sequential file analysis")
1009 file_summaries, all_relationships = await self._process_files_sequentially(
1010 files_to_analyze
1011 )
1012
1013 # Step 6: Create repository index
1014 repo_index = RepoIndex(
1015 repo_name=repo_name,
1016 total_files=len(all_files), # Record original file count
1017 file_summaries=file_summaries,
1018 relationships=all_relationships,
1019 analysis_metadata={
1020 "analysis_date": datetime.now().isoformat(),
1021 "target_structure_analyzed": self.target_structure[:200] + "...",
1022 "total_relationships_found": len(all_relationships),
1023 "high_confidence_relationships": len(

Callers 1

build_all_indexesMethod · 0.95

Calls 7

generate_file_treeMethod · 0.95
get_all_repo_filesMethod · 0.95
pre_filter_filesMethod · 0.95
filter_files_by_pathsMethod · 0.95
RepoIndexClass · 0.85

Tested by

no test coverage detected