Full rebuild of the entire graph. Args: repo_root: Repository root directory. store: Graph database store. recurse_submodules: If True, include files from git submodules. When *None*, falls back to ``CRG_RECURSE_SUBMODULES`` env var.
(
repo_root: Path,
store: GraphStore,
recurse_submodules: bool | None = None,
)
| 818 | |
| 819 | |
| 820 | def full_build( |
| 821 | repo_root: Path, |
| 822 | store: GraphStore, |
| 823 | recurse_submodules: bool | None = None, |
| 824 | ) -> dict: |
| 825 | """Full rebuild of the entire graph. |
| 826 | |
| 827 | Args: |
| 828 | repo_root: Repository root directory. |
| 829 | store: Graph database store. |
| 830 | recurse_submodules: If True, include files from git submodules. |
| 831 | When *None*, falls back to ``CRG_RECURSE_SUBMODULES`` env var. |
| 832 | """ |
| 833 | parser = CodeParser(repo_root) |
| 834 | files = collect_all_files(repo_root, recurse_submodules) |
| 835 | |
| 836 | # Purge stale data from files no longer on disk |
| 837 | existing_files = set(store.get_all_files()) |
| 838 | current_abs = {str(repo_root / f) for f in files} |
| 839 | stale_files = existing_files - current_abs |
| 840 | for stale in stale_files: |
| 841 | store.remove_file_data(stale) |
| 842 | # Ensure deletions are persisted before store_file_nodes_edges() |
| 843 | # starts its own explicit transaction via BEGIN IMMEDIATE. |
| 844 | if stale_files: |
| 845 | store.commit() |
| 846 | |
| 847 | total_nodes = 0 |
| 848 | total_edges = 0 |
| 849 | errors = [] |
| 850 | file_count = len(files) |
| 851 | |
| 852 | use_serial = os.environ.get("CRG_SERIAL_PARSE", "") == "1" |
| 853 | |
| 854 | if use_serial or file_count < 8: |
| 855 | # Serial fallback (for debugging or tiny repos) |
| 856 | for i, rel_path in enumerate(files, 1): |
| 857 | full_path = repo_root / rel_path |
| 858 | try: |
| 859 | source = full_path.read_bytes() |
| 860 | fhash = hashlib.sha256(source).hexdigest() |
| 861 | nodes, edges = parser.parse_bytes(full_path, source) |
| 862 | store.store_file_nodes_edges(str(full_path), nodes, edges, fhash) |
| 863 | total_nodes += len(nodes) |
| 864 | total_edges += len(edges) |
| 865 | except (OSError, PermissionError) as e: |
| 866 | errors.append({"file": rel_path, "error": str(e)}) |
| 867 | except Exception as e: |
| 868 | logger.warning("Error parsing %s: %s", rel_path, e) |
| 869 | errors.append({"file": rel_path, "error": str(e)}) |
| 870 | if i % 50 == 0 or i == file_count: |
| 871 | logger.info("Progress: %d/%d files parsed", i, file_count) |
| 872 | else: |
| 873 | # Parallel parsing — store calls remain serial (SQLite single-writer). |
| 874 | # Executor kind auto-selected: process on Linux/macOS/Windows-TTY, |
| 875 | # thread on Windows-MCP-stdio to avoid pipe-handle inheritance |
| 876 | # deadlock (issues #46, #136). Override via CRG_PARSE_EXECUTOR env. |
| 877 | args_list = [(rel_path, str(repo_root)) for rel_path in files] |