MCPcopy
hub / github.com/tirth8205/code-review-graph / CodeParser

Class CodeParser

code_review_graph/parser.py:798–6957  ·  view source on GitHub ↗

Parses source files using Tree-sitter and extracts structural information.

Source from the content-addressed store, hash-verified

796
797
798class CodeParser:
799 """Parses source files using Tree-sitter and extracts structural information."""
800
801 _MODULE_CACHE_MAX = 15_000 # Evict cache to cap memory on huge monorepos
802
803 def __init__(self, repo_root: Optional[Path] = None) -> None:
804 self._parsers: dict[str, object] = {}
805 self._module_file_cache: dict[str, Optional[str]] = {}
806 self._export_symbol_cache: dict[str, Optional[str]] = {}
807 self._tsconfig_resolver = TsconfigResolver()
808 # Per-parse cache of Dart pubspec root lookups; see #87
809 self._dart_pubspec_cache: dict[tuple[str, str], Optional[Path]] = {}
810 # Config-driven custom languages (.code-review-graph/languages.toml).
811 # The built-in tables stay shared module-level constants; only when a
812 # repo defines custom languages does this parser switch to merged
813 # copies, so other CodeParser instances (multi-repo registry, worker
814 # processes for other repos) are never affected. See #320.
815 self._extension_map: dict[str, str] = EXTENSION_TO_LANGUAGE
816 self._class_types: dict[str, list[str]] = _CLASS_TYPES
817 self._function_types: dict[str, list[str]] = _FUNCTION_TYPES
818 self._import_types: dict[str, list[str]] = _IMPORT_TYPES
819 self._call_types: dict[str, list[str]] = _CALL_TYPES
820 self._custom_languages: dict[str, CustomLanguage] = {}
821 if repo_root is not None:
822 self._custom_languages = load_custom_languages(
823 Path(repo_root),
824 builtin_extensions=EXTENSION_TO_LANGUAGE,
825 builtin_languages=_builtin_language_names(),
826 )
827 if self._custom_languages:
828 self._extension_map = dict(EXTENSION_TO_LANGUAGE)
829 self._class_types = dict(_CLASS_TYPES)
830 self._function_types = dict(_FUNCTION_TYPES)
831 self._import_types = dict(_IMPORT_TYPES)
832 self._call_types = dict(_CALL_TYPES)
833 for custom in self._custom_languages.values():
834 for ext in custom.extensions:
835 self._extension_map[ext] = custom.name
836 self._class_types[custom.name] = list(custom.class_node_types)
837 self._function_types[custom.name] = list(custom.function_node_types)
838 self._import_types[custom.name] = list(custom.import_node_types)
839 self._call_types[custom.name] = list(custom.call_node_types)
840
841 def _get_parser(self, language: str): # type: ignore[arg-type]
842 if language not in self._parsers:
843 # Custom languages map their name onto a packaged grammar.
844 custom = self._custom_languages.get(language)
845 grammar = custom.grammar if custom is not None else language
846 try:
847 self._parsers[language] = tslp.get_parser(grammar) # type: ignore[arg-type]
848 except (LookupError, ValueError, ImportError) as exc:
849 # language not packaged, or grammar load failed
850 logger.debug("tree-sitter parser unavailable for %s: %s", language, exc)
851 return None
852 return self._parsers[language]
853
854 def detect_language(self, path: Path) -> Optional[str]:
855 """Map a file path to its language name.

Calls

no outgoing calls