hub / github.com/HKUDS/DeepCode / process_repository

Method process_repository

tools/code_indexer.py:966–1048 · view source on GitHub ↗

Process a single repository and create complete index with optional concurrent processing

(self, repo_path: Path)

Source from the content-addressed store, hash-verified

964	return file_summary, relationships
965
966	async def process_repository(self, repo_path: Path) -> RepoIndex:
967	"""Process a single repository and create complete index with optional concurrent processing"""
968	repo_name = repo_path.name
969	self.logger.info(f"Processing repository: {repo_name}")
970
971	# Step 1: Generate file tree
972	self.logger.info("Generating file tree structure...")
973	file_tree = self.generate_file_tree(repo_path)
974
975	# Step 2: Get all files
976	all_files = self.get_all_repo_files(repo_path)
977	self.logger.info(f"Found {len(all_files)} files in {repo_name}")
978
979	# Step 3: LLM pre-filtering of relevant files
980	if self.enable_pre_filtering:
981	self.logger.info("Using LLM for file pre-filtering...")
982	selected_file_paths = await self.pre_filter_files(repo_path, file_tree)
983	else:
984	self.logger.info("Pre-filtering is disabled, will analyze all files")
985	selected_file_paths = []
986
987	# Step 4: Filter file list based on filtering results
988	if selected_file_paths:
989	files_to_analyze = self.filter_files_by_paths(
990	all_files, selected_file_paths, repo_path
991	)
992	self.logger.info(
993	f"After LLM filtering, will analyze {len(files_to_analyze)} relevant files (from {len(all_files)} total)"
994	)
995	else:
996	files_to_analyze = all_files
997	self.logger.info("LLM filtering failed, will analyze all files")
998
999	# Step 5: Analyze filtered files (concurrent or sequential)
1000	if self.enable_concurrent_analysis and len(files_to_analyze) > 1:
1001	self.logger.info(
1002	f"Using concurrent analysis with max {self.max_concurrent_files} parallel files"
1003	)
1004	file_summaries, all_relationships = await self._process_files_concurrently(
1005	files_to_analyze
1006	)
1007	else:
1008	self.logger.info("Using sequential file analysis")
1009	file_summaries, all_relationships = await self._process_files_sequentially(
1010	files_to_analyze
1011	)
1012
1013	# Step 6: Create repository index
1014	repo_index = RepoIndex(
1015	repo_name=repo_name,
1016	total_files=len(all_files), # Record original file count
1017	file_summaries=file_summaries,
1018	relationships=all_relationships,
1019	analysis_metadata={
1020	"analysis_date": datetime.now().isoformat(),
1021	"target_structure_analyzed": self.target_structure[:200] + "...",
1022	"total_relationships_found": len(all_relationships),
1023	"high_confidence_relationships": len(

Callers 1

build_all_indexesMethod · 0.95

Calls 7

generate_file_treeMethod · 0.95

get_all_repo_filesMethod · 0.95

pre_filter_filesMethod · 0.95

filter_files_by_pathsMethod · 0.95

_process_files_concurrentlyMethod · 0.95

_process_files_sequentiallyMethod · 0.95

RepoIndexClass · 0.85

Tested by

no test coverage detected