Parses source files using Tree-sitter and extracts structural information.
| 796 | |
| 797 | |
| 798 | class CodeParser: |
| 799 | """Parses source files using Tree-sitter and extracts structural information.""" |
| 800 | |
| 801 | _MODULE_CACHE_MAX = 15_000 # Evict cache to cap memory on huge monorepos |
| 802 | |
| 803 | def __init__(self, repo_root: Optional[Path] = None) -> None: |
| 804 | self._parsers: dict[str, object] = {} |
| 805 | self._module_file_cache: dict[str, Optional[str]] = {} |
| 806 | self._export_symbol_cache: dict[str, Optional[str]] = {} |
| 807 | self._tsconfig_resolver = TsconfigResolver() |
| 808 | # Per-parse cache of Dart pubspec root lookups; see #87 |
| 809 | self._dart_pubspec_cache: dict[tuple[str, str], Optional[Path]] = {} |
| 810 | # Config-driven custom languages (.code-review-graph/languages.toml). |
| 811 | # The built-in tables stay shared module-level constants; only when a |
| 812 | # repo defines custom languages does this parser switch to merged |
| 813 | # copies, so other CodeParser instances (multi-repo registry, worker |
| 814 | # processes for other repos) are never affected. See #320. |
| 815 | self._extension_map: dict[str, str] = EXTENSION_TO_LANGUAGE |
| 816 | self._class_types: dict[str, list[str]] = _CLASS_TYPES |
| 817 | self._function_types: dict[str, list[str]] = _FUNCTION_TYPES |
| 818 | self._import_types: dict[str, list[str]] = _IMPORT_TYPES |
| 819 | self._call_types: dict[str, list[str]] = _CALL_TYPES |
| 820 | self._custom_languages: dict[str, CustomLanguage] = {} |
| 821 | if repo_root is not None: |
| 822 | self._custom_languages = load_custom_languages( |
| 823 | Path(repo_root), |
| 824 | builtin_extensions=EXTENSION_TO_LANGUAGE, |
| 825 | builtin_languages=_builtin_language_names(), |
| 826 | ) |
| 827 | if self._custom_languages: |
| 828 | self._extension_map = dict(EXTENSION_TO_LANGUAGE) |
| 829 | self._class_types = dict(_CLASS_TYPES) |
| 830 | self._function_types = dict(_FUNCTION_TYPES) |
| 831 | self._import_types = dict(_IMPORT_TYPES) |
| 832 | self._call_types = dict(_CALL_TYPES) |
| 833 | for custom in self._custom_languages.values(): |
| 834 | for ext in custom.extensions: |
| 835 | self._extension_map[ext] = custom.name |
| 836 | self._class_types[custom.name] = list(custom.class_node_types) |
| 837 | self._function_types[custom.name] = list(custom.function_node_types) |
| 838 | self._import_types[custom.name] = list(custom.import_node_types) |
| 839 | self._call_types[custom.name] = list(custom.call_node_types) |
| 840 | |
| 841 | def _get_parser(self, language: str): # type: ignore[arg-type] |
| 842 | if language not in self._parsers: |
| 843 | # Custom languages map their name onto a packaged grammar. |
| 844 | custom = self._custom_languages.get(language) |
| 845 | grammar = custom.grammar if custom is not None else language |
| 846 | try: |
| 847 | self._parsers[language] = tslp.get_parser(grammar) # type: ignore[arg-type] |
| 848 | except (LookupError, ValueError, ImportError) as exc: |
| 849 | # language not packaged, or grammar load failed |
| 850 | logger.debug("tree-sitter parser unavailable for %s: %s", language, exc) |
| 851 | return None |
| 852 | return self._parsers[language] |
| 853 | |
| 854 | def detect_language(self, path: Path) -> Optional[str]: |
| 855 | """Map a file path to its language name. |
no outgoing calls