MCPcopy
hub / github.com/tirth8205/code-review-graph / _parse_notebook

Method _parse_notebook

code_review_graph/parser.py:1288–1370  ·  view source on GitHub ↗

Parse a Jupyter notebook by extracting code cells.

(
        self, path: Path, source: bytes,
    )

Source from the content-addressed store, hash-verified

1286 return all_nodes, all_edges
1287
1288 def _parse_notebook(
1289 self, path: Path, source: bytes,
1290 ) -> tuple[list[NodeInfo], list[EdgeInfo]]:
1291 """Parse a Jupyter notebook by extracting code cells."""
1292 try:
1293 nb = json.loads(source)
1294 except (json.JSONDecodeError, UnicodeDecodeError):
1295 return [], []
1296
1297 # Determine kernel language
1298 kernel_lang = (
1299 nb.get("metadata", {}).get("kernelspec", {}).get("language")
1300 or nb.get("metadata", {}).get("language_info", {}).get("name")
1301 or "python"
1302 ).lower()
1303
1304 # Only parse supported languages
1305 supported = {"python", "r"}
1306 if kernel_lang not in supported:
1307 return [], []
1308
1309 # Build CellInfo list from code cells
1310 cells: list[CellInfo] = []
1311 magic_lang_map = {
1312 "%python": "python",
1313 "%sql": "sql",
1314 "%r": "r",
1315 }
1316 skip_magics = {"%scala", "%md", "%sh"}
1317
1318 for cell_idx, cell in enumerate(nb.get("cells", [])):
1319 if cell.get("cell_type") != "code":
1320 continue
1321 lines = cell.get("source", [])
1322 if isinstance(lines, str):
1323 lines = lines.splitlines(keepends=True)
1324 if not lines:
1325 continue
1326
1327 # Check first line for language-switching magic
1328 first_line = lines[0].strip()
1329 cell_lang = kernel_lang
1330 cell_lines = lines
1331
1332 for magic, lang in magic_lang_map.items():
1333 if first_line == magic or first_line.startswith(magic + " "):
1334 cell_lang = lang
1335 cell_lines = lines[1:] # strip magic line
1336 break
1337 else:
1338 # Check for skip magics
1339 for skip in skip_magics:
1340 if first_line == skip or first_line.startswith(skip + " "):
1341 cell_lines = []
1342 break
1343
1344 # Filter %pip, ! lines from Python/R content (not SQL)
1345 if cell_lang in ("python", "r"):

Callers 1

parse_bytesMethod · 0.95

Calls 5

_parse_notebook_cellsMethod · 0.95
CellInfoClass · 0.85
NodeInfoClass · 0.85
getMethod · 0.80
_is_test_fileFunction · 0.70

Tested by

no test coverage detected