Parse notebook cells grouped by language. Args: path: Notebook file path. cells: List of CellInfo with index, language, and source. default_language: Default language for the File node.
(
self,
path: Path,
cells: list[CellInfo],
default_language: str,
)
| 1370 | return self._parse_notebook_cells(path, cells, kernel_lang) |
| 1371 | |
| 1372 | def _parse_notebook_cells( |
| 1373 | self, |
| 1374 | path: Path, |
| 1375 | cells: list[CellInfo], |
| 1376 | default_language: str, |
| 1377 | ) -> tuple[list[NodeInfo], list[EdgeInfo]]: |
| 1378 | """Parse notebook cells grouped by language. |
| 1379 | |
| 1380 | Args: |
| 1381 | path: Notebook file path. |
| 1382 | cells: List of CellInfo with index, language, and source. |
| 1383 | default_language: Default language for the File node. |
| 1384 | """ |
| 1385 | file_path_str = str(path) |
| 1386 | test_file = _is_test_file(file_path_str) |
| 1387 | |
| 1388 | # Group cells by language |
| 1389 | lang_cells: dict[str, list[CellInfo]] = {} |
| 1390 | for cell in cells: |
| 1391 | lang_cells.setdefault(cell.language, []).append(cell) |
| 1392 | |
| 1393 | all_nodes: list[NodeInfo] = [] |
| 1394 | all_edges: list[EdgeInfo] = [] |
| 1395 | |
| 1396 | # Track offsets per language for cell_index tagging. |
| 1397 | # Each language group is parsed independently by Tree-sitter, |
| 1398 | # so line numbers restart at 1 for each group. |
| 1399 | all_cell_offsets: list[tuple[int, int, int]] = [] |
| 1400 | max_line = 1 |
| 1401 | |
| 1402 | for lang, lang_group in lang_cells.items(): |
| 1403 | if lang == "sql": |
| 1404 | # SQL: regex-based table extraction |
| 1405 | for cell in lang_group: |
| 1406 | for match in _SQL_TABLE_RE.finditer(cell.source): |
| 1407 | table_name = match.group(1).replace("`", "") |
| 1408 | all_edges.append(EdgeInfo( |
| 1409 | kind="IMPORTS_FROM", |
| 1410 | source=file_path_str, |
| 1411 | target=table_name, |
| 1412 | file_path=file_path_str, |
| 1413 | line=1, |
| 1414 | )) |
| 1415 | continue |
| 1416 | |
| 1417 | if lang not in ("python", "r"): |
| 1418 | continue |
| 1419 | |
| 1420 | ts_parser = self._get_parser(lang) |
| 1421 | if not ts_parser: |
| 1422 | continue |
| 1423 | |
| 1424 | # Concatenate cells of this language. |
| 1425 | # Line numbers start at 1 for each language group because |
| 1426 | # Tree-sitter parses each concatenation independently. |
| 1427 | code_chunks: list[str] = [] |
| 1428 | cell_offsets: list[tuple[int, int, int]] = [] |
| 1429 | current_line = 1 |
no test coverage detected