Collect all parseable files in the repo, respecting ignore patterns. Args: repo_root: Repository root directory. recurse_submodules: If True, include files from git submodules. When *None*, falls back to ``CRG_RECURSE_SUBMODULES`` env var.
(
repo_root: Path,
recurse_submodules: bool | None = None,
)
| 665 | |
| 666 | |
| 667 | def collect_all_files( |
| 668 | repo_root: Path, |
| 669 | recurse_submodules: bool | None = None, |
| 670 | ) -> list[str]: |
| 671 | """Collect all parseable files in the repo, respecting ignore patterns. |
| 672 | |
| 673 | Args: |
| 674 | repo_root: Repository root directory. |
| 675 | recurse_submodules: If True, include files from git submodules. |
| 676 | When *None*, falls back to ``CRG_RECURSE_SUBMODULES`` env var. |
| 677 | """ |
| 678 | ignore_patterns = _load_ignore_patterns(repo_root) |
| 679 | parser = CodeParser(repo_root) |
| 680 | files = [] |
| 681 | |
| 682 | # Prefer git ls-files for tracked files |
| 683 | tracked = get_all_tracked_files(repo_root, recurse_submodules) |
| 684 | if tracked: |
| 685 | candidates = tracked |
| 686 | else: |
| 687 | # Fallback: walk directory |
| 688 | candidates = [str(p.relative_to(repo_root)) for p in repo_root.rglob("*") if p.is_file()] |
| 689 | |
| 690 | for rel_path in candidates: |
| 691 | if _should_ignore(rel_path, ignore_patterns): |
| 692 | continue |
| 693 | # Skip paths that would exceed OS filename limits (macOS: 255 bytes |
| 694 | # per component, ~1024 total; Windows: 260 total). |
| 695 | try: |
| 696 | full_path = repo_root / rel_path |
| 697 | except (OSError, ValueError): |
| 698 | logger.debug("Skipping path that cannot be constructed: %s", rel_path) |
| 699 | continue |
| 700 | if len(str(full_path)) > 1000 or any(len(p.encode()) > 255 for p in full_path.parts): |
| 701 | logger.debug("Skipping overlong path: %s", rel_path[:120]) |
| 702 | continue |
| 703 | if not full_path.is_file(): |
| 704 | continue |
| 705 | if full_path.is_symlink(): |
| 706 | continue |
| 707 | if parser.detect_language(full_path) is None: |
| 708 | continue |
| 709 | if _is_binary(full_path): |
| 710 | continue |
| 711 | files.append(rel_path) |
| 712 | |
| 713 | return files |
| 714 | |
| 715 | |
| 716 | _MAX_DEPENDENT_HOPS = int(os.environ.get("CRG_DEPENDENT_HOPS", "2")) |