Parse pre-read bytes and return extracted nodes and edges. This avoids re-reading the file from disk, eliminating TOCTOU gaps when the caller has already read the bytes (e.g. for hashing).
(self, path: Path, source: bytes)
| 944 | return self.parse_bytes(path, source) |
| 945 | |
| 946 | def parse_bytes(self, path: Path, source: bytes) -> tuple[list[NodeInfo], list[EdgeInfo]]: |
| 947 | """Parse pre-read bytes and return extracted nodes and edges. |
| 948 | |
| 949 | This avoids re-reading the file from disk, eliminating TOCTOU gaps |
| 950 | when the caller has already read the bytes (e.g. for hashing). |
| 951 | """ |
| 952 | language = self.detect_language(path) |
| 953 | if not language: |
| 954 | return [], [] |
| 955 | |
| 956 | # Vue SFCs: parse with vue parser, then delegate script blocks to JS/TS |
| 957 | if language == "vue": |
| 958 | return self._parse_vue(path, source) |
| 959 | |
| 960 | # Svelte SFCs: same approach as Vue — extract <script> blocks |
| 961 | if language == "svelte": |
| 962 | return self._parse_svelte(path, source) |
| 963 | |
| 964 | # Jupyter notebooks: extract code cells and parse as Python |
| 965 | if language == "notebook": |
| 966 | return self._parse_notebook(path, source) |
| 967 | |
| 968 | # Databricks .py notebook exports. The header is ALWAYS the very |
| 969 | # first line, but the file may have CRLF line endings on Windows |
| 970 | # (git's core.autocrlf=true default). Match the first line robustly |
| 971 | # after stripping any trailing ``\r`` so the detection works on both |
| 972 | # platforms. See issue #239. |
| 973 | if language == "python": |
| 974 | first_newline = source.find(b"\n") |
| 975 | first_line = ( |
| 976 | source[:first_newline].rstrip(b"\r") |
| 977 | if first_newline != -1 |
| 978 | else source.rstrip(b"\r") |
| 979 | ) |
| 980 | if first_line == b"# Databricks notebook source": |
| 981 | return self._parse_databricks_py_notebook(path, source) |
| 982 | |
| 983 | # ReScript: regex-based parser (no tree-sitter grammar bundled). |
| 984 | if language == "rescript": |
| 985 | return self._parse_rescript(path, source) |
| 986 | |
| 987 | # SQL: dedicated parser — tree-sitter for tables/views/functions + |
| 988 | # regex fallback for CREATE PROCEDURE (unsupported by the grammar). |
| 989 | if language == "sql": |
| 990 | return self._parse_sql(path, source) |
| 991 | |
| 992 | parser = self._get_parser(language) |
| 993 | if not parser: |
| 994 | return [], [] |
| 995 | |
| 996 | tree = parser.parse(source) |
| 997 | nodes: list[NodeInfo] = [] |
| 998 | edges: list[EdgeInfo] = [] |
| 999 | file_path_str = str(path) |
| 1000 | |
| 1001 | # File node |
| 1002 | test_file = _is_test_file(file_path_str) |
| 1003 | nodes.append(NodeInfo( |