MCPcopy
hub / github.com/colbymchenry/codegraph / scanDirectoryWalk

Function scanDirectoryWalk

src/extraction/index.ts:989–1110  ·  view source on GitHub ↗

* Filesystem walk fallback for non-git projects.

(
  rootDir: string,
  onProgress?: (current: number, file: string) => void
)

Source from the content-addressed store, hash-verified

987 * Filesystem walk fallback for non-git projects.
988 */
989function scanDirectoryWalk(
990 rootDir: string,
991 onProgress?: (current: number, file: string) => void
992): string[] {
993 const files: string[] = [];
994 let count = 0;
995 const visitedDirs = new Set<string>();
996 // Custom extension → language overrides from the project's codegraph.json.
997 const overrides = loadExtensionOverrides(rootDir);
998
999 // A .gitignore matcher scoped to the directory that declared it. Patterns in
1000 // a nested .gitignore are relative to that directory, so we keep the dir
1001 // alongside the matcher and test paths relative to it — mirroring how git
1002 // applies .gitignore files at every level.
1003 interface ScopedIgnore {
1004 dir: string;
1005 ig: Ignore;
1006 }
1007
1008 const loadIgnore = (dir: string): ScopedIgnore | null => {
1009 const giPath = path.join(dir, '.gitignore');
1010 if (!fs.existsSync(giPath)) return null;
1011 // readGitignorePatterns is defensive: a non-UTF-8 (DLP-encrypted) or
1012 // uncompilable .gitignore is skipped/filtered with a warning, never thrown
1013 // (issue #682) — so the per-file `.ignores()` calls below can't crash.
1014 const patterns = readGitignorePatterns(giPath);
1015 return patterns ? { dir, ig: ignore().add(patterns) } : null;
1016 };
1017
1018 const isIgnored = (fullPath: string, isDir: boolean, matchers: ScopedIgnore[]): boolean => {
1019 for (const { dir, ig } of matchers) {
1020 let rel = normalizePath(path.relative(dir, fullPath));
1021 if (!rel || rel.startsWith('..')) continue; // not under this matcher's dir
1022 if (isDir) rel += '/'; // dir-only rules (e.g. `build/`) only match with the slash
1023 if (ig.ignores(rel)) return true;
1024 }
1025 return false;
1026 };
1027
1028 function walk(dir: string, matchers: ScopedIgnore[]): void {
1029 let realDir: string;
1030 try {
1031 realDir = fs.realpathSync(dir);
1032 } catch {
1033 logDebug('Skipping unresolvable directory', { dir });
1034 return;
1035 }
1036
1037 if (visitedDirs.has(realDir)) {
1038 logDebug('Skipping already-visited directory (symlink cycle)', { dir, realDir });
1039 return;
1040 }
1041 visitedDirs.add(realDir);
1042
1043 // This directory's own .gitignore (if present) applies to everything below it.
1044 // The root's .gitignore is already merged into the seeded base matcher (so a
1045 // negation there can override a built-in default), so skip it here.
1046 const own = dir === rootDir ? null : loadIgnore(dir);

Callers 2

scanDirectoryFunction · 0.85
scanDirectoryAsyncFunction · 0.85

Calls 4

loadExtensionOverridesFunction · 0.90
buildDefaultIgnoreFunction · 0.85
loadExcludeMatcherFunction · 0.85
walkFunction · 0.70

Tested by

no test coverage detected