Extract classes, interfaces, methods, constructors, and imports from a .java file.
(path: Path)
| 753 | |
| 754 | |
| 755 | def extract_java(path: Path) -> dict: |
| 756 | """Extract classes, interfaces, methods, constructors, and imports from a .java file.""" |
| 757 | try: |
| 758 | import tree_sitter_java as tsjava |
| 759 | from tree_sitter import Language, Parser |
| 760 | except ImportError: |
| 761 | return {"nodes": [], "edges": [], "error": "tree-sitter-java not installed"} |
| 762 | |
| 763 | try: |
| 764 | language = Language(tsjava.language()) |
| 765 | parser = Parser(language) |
| 766 | source = path.read_bytes() |
| 767 | tree = parser.parse(source) |
| 768 | root = tree.root_node |
| 769 | except Exception as e: |
| 770 | return {"nodes": [], "edges": [], "error": str(e)} |
| 771 | |
| 772 | stem = path.stem |
| 773 | str_path = str(path) |
| 774 | nodes: list[dict] = [] |
| 775 | edges: list[dict] = [] |
| 776 | seen_ids: set[str] = set() |
| 777 | |
| 778 | def add_node(nid: str, label: str, line: int) -> None: |
| 779 | if nid not in seen_ids: |
| 780 | seen_ids.add(nid) |
| 781 | nodes.append({ |
| 782 | "id": nid, |
| 783 | "label": label, |
| 784 | "file_type": "code", |
| 785 | "source_file": str_path, |
| 786 | "source_location": f"L{line}", |
| 787 | }) |
| 788 | |
| 789 | def add_edge_raw(src: str, tgt: str, relation: str, line: int, confidence: str = "EXTRACTED", weight: float = 1.0) -> None: |
| 790 | edges.append({ |
| 791 | "source": src, |
| 792 | "target": tgt, |
| 793 | "relation": relation, |
| 794 | "confidence": confidence, |
| 795 | "source_file": str_path, |
| 796 | "source_location": f"L{line}", |
| 797 | "weight": weight, |
| 798 | }) |
| 799 | |
| 800 | file_nid = _make_id(stem) |
| 801 | add_node(file_nid, path.name, 1) |
| 802 | |
| 803 | function_bodies: list[tuple[str, object]] = [] |
| 804 | |
| 805 | def _walk_scoped_identifier(node) -> str: |
| 806 | """Reconstruct a dotted import path from nested scoped_identifier nodes.""" |
| 807 | parts: list[str] = [] |
| 808 | cur = node |
| 809 | while cur: |
| 810 | if cur.type == "scoped_identifier": |
| 811 | name_node = cur.child_by_field_name("name") |
| 812 | if name_node: |