Parse a Jupyter notebook by extracting code cells.
(
self, path: Path, source: bytes,
)
| 1286 | return all_nodes, all_edges |
| 1287 | |
| 1288 | def _parse_notebook( |
| 1289 | self, path: Path, source: bytes, |
| 1290 | ) -> tuple[list[NodeInfo], list[EdgeInfo]]: |
| 1291 | """Parse a Jupyter notebook by extracting code cells.""" |
| 1292 | try: |
| 1293 | nb = json.loads(source) |
| 1294 | except (json.JSONDecodeError, UnicodeDecodeError): |
| 1295 | return [], [] |
| 1296 | |
| 1297 | # Determine kernel language |
| 1298 | kernel_lang = ( |
| 1299 | nb.get("metadata", {}).get("kernelspec", {}).get("language") |
| 1300 | or nb.get("metadata", {}).get("language_info", {}).get("name") |
| 1301 | or "python" |
| 1302 | ).lower() |
| 1303 | |
| 1304 | # Only parse supported languages |
| 1305 | supported = {"python", "r"} |
| 1306 | if kernel_lang not in supported: |
| 1307 | return [], [] |
| 1308 | |
| 1309 | # Build CellInfo list from code cells |
| 1310 | cells: list[CellInfo] = [] |
| 1311 | magic_lang_map = { |
| 1312 | "%python": "python", |
| 1313 | "%sql": "sql", |
| 1314 | "%r": "r", |
| 1315 | } |
| 1316 | skip_magics = {"%scala", "%md", "%sh"} |
| 1317 | |
| 1318 | for cell_idx, cell in enumerate(nb.get("cells", [])): |
| 1319 | if cell.get("cell_type") != "code": |
| 1320 | continue |
| 1321 | lines = cell.get("source", []) |
| 1322 | if isinstance(lines, str): |
| 1323 | lines = lines.splitlines(keepends=True) |
| 1324 | if not lines: |
| 1325 | continue |
| 1326 | |
| 1327 | # Check first line for language-switching magic |
| 1328 | first_line = lines[0].strip() |
| 1329 | cell_lang = kernel_lang |
| 1330 | cell_lines = lines |
| 1331 | |
| 1332 | for magic, lang in magic_lang_map.items(): |
| 1333 | if first_line == magic or first_line.startswith(magic + " "): |
| 1334 | cell_lang = lang |
| 1335 | cell_lines = lines[1:] # strip magic line |
| 1336 | break |
| 1337 | else: |
| 1338 | # Check for skip magics |
| 1339 | for skip in skip_magics: |
| 1340 | if first_line == skip or first_line.startswith(skip + " "): |
| 1341 | cell_lines = [] |
| 1342 | break |
| 1343 | |
| 1344 | # Filter %pip, ! lines from Python/R content (not SQL) |
| 1345 | if cell_lang in ("python", "r"): |
no test coverage detected