Extract classes, objects, functions, and imports from a .kt/.kts file.
(path: Path)
| 1586 | |
| 1587 | |
| 1588 | def extract_kotlin(path: Path) -> dict: |
| 1589 | """Extract classes, objects, functions, and imports from a .kt/.kts file.""" |
| 1590 | try: |
| 1591 | import tree_sitter_kotlin as tskotlin |
| 1592 | from tree_sitter import Language, Parser |
| 1593 | except ImportError: |
| 1594 | return {"nodes": [], "edges": [], "error": "tree-sitter-kotlin not installed"} |
| 1595 | |
| 1596 | try: |
| 1597 | language = Language(tskotlin.language()) |
| 1598 | parser = Parser(language) |
| 1599 | source = path.read_bytes() |
| 1600 | tree = parser.parse(source) |
| 1601 | root = tree.root_node |
| 1602 | except Exception as e: |
| 1603 | return {"nodes": [], "edges": [], "error": str(e)} |
| 1604 | |
| 1605 | stem = path.stem |
| 1606 | str_path = str(path) |
| 1607 | nodes: list[dict] = [] |
| 1608 | edges: list[dict] = [] |
| 1609 | seen_ids: set[str] = set() |
| 1610 | |
| 1611 | def add_node(nid: str, label: str, line: int) -> None: |
| 1612 | if nid not in seen_ids: |
| 1613 | seen_ids.add(nid) |
| 1614 | nodes.append({ |
| 1615 | "id": nid, |
| 1616 | "label": label, |
| 1617 | "file_type": "code", |
| 1618 | "source_file": str_path, |
| 1619 | "source_location": f"L{line}", |
| 1620 | }) |
| 1621 | |
| 1622 | def add_edge_raw(src: str, tgt: str, relation: str, line: int, confidence: str = "EXTRACTED", weight: float = 1.0) -> None: |
| 1623 | edges.append({ |
| 1624 | "source": src, |
| 1625 | "target": tgt, |
| 1626 | "relation": relation, |
| 1627 | "confidence": confidence, |
| 1628 | "source_file": str_path, |
| 1629 | "source_location": f"L{line}", |
| 1630 | "weight": weight, |
| 1631 | }) |
| 1632 | |
| 1633 | file_nid = _make_id(stem) |
| 1634 | add_node(file_nid, path.name, 1) |
| 1635 | |
| 1636 | function_bodies: list[tuple[str, object]] = [] |
| 1637 | |
| 1638 | def walk(node, parent_class_nid: str | None = None) -> None: |
| 1639 | t = node.type |
| 1640 | |
| 1641 | if t == "import_header": |
| 1642 | for child in node.children: |
| 1643 | if child.type == "identifier": |
| 1644 | raw = source[child.start_byte:child.end_byte].decode("utf-8", errors="replace") |
| 1645 | tgt_nid = _make_id(raw) |