MCPcopy
hub / github.com/tirth8205/code-review-graph / parse_bytes

Method parse_bytes

code_review_graph/parser.py:946–1045  ·  view source on GitHub ↗

Parse pre-read bytes and return extracted nodes and edges. This avoids re-reading the file from disk, eliminating TOCTOU gaps when the caller has already read the bytes (e.g. for hashing).

(self, path: Path, source: bytes)

Source from the content-addressed store, hash-verified

944 return self.parse_bytes(path, source)
945
946 def parse_bytes(self, path: Path, source: bytes) -> tuple[list[NodeInfo], list[EdgeInfo]]:
947 """Parse pre-read bytes and return extracted nodes and edges.
948
949 This avoids re-reading the file from disk, eliminating TOCTOU gaps
950 when the caller has already read the bytes (e.g. for hashing).
951 """
952 language = self.detect_language(path)
953 if not language:
954 return [], []
955
956 # Vue SFCs: parse with vue parser, then delegate script blocks to JS/TS
957 if language == "vue":
958 return self._parse_vue(path, source)
959
960 # Svelte SFCs: same approach as Vue — extract <script> blocks
961 if language == "svelte":
962 return self._parse_svelte(path, source)
963
964 # Jupyter notebooks: extract code cells and parse as Python
965 if language == "notebook":
966 return self._parse_notebook(path, source)
967
968 # Databricks .py notebook exports. The header is ALWAYS the very
969 # first line, but the file may have CRLF line endings on Windows
970 # (git's core.autocrlf=true default). Match the first line robustly
971 # after stripping any trailing ``\r`` so the detection works on both
972 # platforms. See issue #239.
973 if language == "python":
974 first_newline = source.find(b"\n")
975 first_line = (
976 source[:first_newline].rstrip(b"\r")
977 if first_newline != -1
978 else source.rstrip(b"\r")
979 )
980 if first_line == b"# Databricks notebook source":
981 return self._parse_databricks_py_notebook(path, source)
982
983 # ReScript: regex-based parser (no tree-sitter grammar bundled).
984 if language == "rescript":
985 return self._parse_rescript(path, source)
986
987 # SQL: dedicated parser — tree-sitter for tables/views/functions +
988 # regex fallback for CREATE PROCEDURE (unsupported by the grammar).
989 if language == "sql":
990 return self._parse_sql(path, source)
991
992 parser = self._get_parser(language)
993 if not parser:
994 return [], []
995
996 tree = parser.parse(source)
997 nodes: list[NodeInfo] = []
998 edges: list[EdgeInfo] = []
999 file_path_str = str(path)
1000
1001 # File node
1002 test_file = _is_test_file(file_path_str)
1003 nodes.append(NodeInfo(

Calls 15

detect_languageMethod · 0.95
_parse_vueMethod · 0.95
_parse_svelteMethod · 0.95
_parse_notebookMethod · 0.95
_parse_rescriptMethod · 0.95
_parse_sqlMethod · 0.95
_get_parserMethod · 0.95
_collect_file_scopeMethod · 0.95
_extract_from_treeMethod · 0.95
_resolve_call_targetsMethod · 0.95
_qualifyMethod · 0.95