MCPcopy
hub / github.com/tirth8205/code-review-graph / full_build

Function full_build

code_review_graph/incremental.py:820–916  ·  view source on GitHub ↗

Full rebuild of the entire graph. Args: repo_root: Repository root directory. store: Graph database store. recurse_submodules: If True, include files from git submodules. When *None*, falls back to ``CRG_RECURSE_SUBMODULES`` env var.

(
    repo_root: Path,
    store: GraphStore,
    recurse_submodules: bool | None = None,
)

Source from the content-addressed store, hash-verified

818
819
820def full_build(
821 repo_root: Path,
822 store: GraphStore,
823 recurse_submodules: bool | None = None,
824) -> dict:
825 """Full rebuild of the entire graph.
826
827 Args:
828 repo_root: Repository root directory.
829 store: Graph database store.
830 recurse_submodules: If True, include files from git submodules.
831 When *None*, falls back to ``CRG_RECURSE_SUBMODULES`` env var.
832 """
833 parser = CodeParser(repo_root)
834 files = collect_all_files(repo_root, recurse_submodules)
835
836 # Purge stale data from files no longer on disk
837 existing_files = set(store.get_all_files())
838 current_abs = {str(repo_root / f) for f in files}
839 stale_files = existing_files - current_abs
840 for stale in stale_files:
841 store.remove_file_data(stale)
842 # Ensure deletions are persisted before store_file_nodes_edges()
843 # starts its own explicit transaction via BEGIN IMMEDIATE.
844 if stale_files:
845 store.commit()
846
847 total_nodes = 0
848 total_edges = 0
849 errors = []
850 file_count = len(files)
851
852 use_serial = os.environ.get("CRG_SERIAL_PARSE", "") == "1"
853
854 if use_serial or file_count < 8:
855 # Serial fallback (for debugging or tiny repos)
856 for i, rel_path in enumerate(files, 1):
857 full_path = repo_root / rel_path
858 try:
859 source = full_path.read_bytes()
860 fhash = hashlib.sha256(source).hexdigest()
861 nodes, edges = parser.parse_bytes(full_path, source)
862 store.store_file_nodes_edges(str(full_path), nodes, edges, fhash)
863 total_nodes += len(nodes)
864 total_edges += len(edges)
865 except (OSError, PermissionError) as e:
866 errors.append({"file": rel_path, "error": str(e)})
867 except Exception as e:
868 logger.warning("Error parsing %s: %s", rel_path, e)
869 errors.append({"file": rel_path, "error": str(e)})
870 if i % 50 == 0 or i == file_count:
871 logger.info("Progress: %d/%d files parsed", i, file_count)
872 else:
873 # Parallel parsing — store calls remain serial (SQLite single-writer).
874 # Executor kind auto-selected: process on Linux/macOS/Windows-TTY,
875 # thread on Windows-MCP-stdio to avoid pipe-handle inheritance
876 # deadlock (issues #46, #136). Override via CRG_PARSE_EXECUTOR env.
877 args_list = [(rel_path, str(repo_root)) for rel_path in files]

Calls 14

parse_bytesMethod · 0.95
CodeParserClass · 0.85
collect_all_filesFunction · 0.85
_make_executorFunction · 0.85
_store_vcs_metadataFunction · 0.85
_run_rescript_resolverFunction · 0.85
_run_spring_resolverFunction · 0.85
_run_temporal_resolverFunction · 0.85
get_all_filesMethod · 0.80
remove_file_dataMethod · 0.80
commitMethod · 0.80
getMethod · 0.80