Extract functions, methods, type declarations, and imports from a .go file.
(path: Path)
| 400 | |
| 401 | |
| 402 | def extract_go(path: Path) -> dict: |
| 403 | """Extract functions, methods, type declarations, and imports from a .go file.""" |
| 404 | try: |
| 405 | import tree_sitter_go as tsgo |
| 406 | from tree_sitter import Language, Parser |
| 407 | except ImportError: |
| 408 | return {"nodes": [], "edges": [], "error": "tree-sitter-go not installed"} |
| 409 | |
| 410 | try: |
| 411 | language = Language(tsgo.language()) |
| 412 | parser = Parser(language) |
| 413 | source = path.read_bytes() |
| 414 | tree = parser.parse(source) |
| 415 | root = tree.root_node |
| 416 | except Exception as e: |
| 417 | return {"nodes": [], "edges": [], "error": str(e)} |
| 418 | |
| 419 | stem = path.stem |
| 420 | str_path = str(path) |
| 421 | nodes: list[dict] = [] |
| 422 | edges: list[dict] = [] |
| 423 | seen_ids: set[str] = set() |
| 424 | |
| 425 | def add_node(nid: str, label: str, line: int) -> None: |
| 426 | if nid not in seen_ids: |
| 427 | seen_ids.add(nid) |
| 428 | nodes.append({ |
| 429 | "id": nid, |
| 430 | "label": label, |
| 431 | "file_type": "code", |
| 432 | "source_file": str_path, |
| 433 | "source_location": f"L{line}", |
| 434 | }) |
| 435 | |
| 436 | def add_edge_raw(src: str, tgt: str, relation: str, line: int, confidence: str = "EXTRACTED", weight: float = 1.0) -> None: |
| 437 | edges.append({ |
| 438 | "source": src, |
| 439 | "target": tgt, |
| 440 | "relation": relation, |
| 441 | "confidence": confidence, |
| 442 | "source_file": str_path, |
| 443 | "source_location": f"L{line}", |
| 444 | "weight": weight, |
| 445 | }) |
| 446 | |
| 447 | file_nid = _make_id(stem) |
| 448 | add_node(file_nid, path.name, 1) |
| 449 | |
| 450 | function_bodies: list[tuple[str, object]] = [] |
| 451 | |
| 452 | def walk(node) -> None: |
| 453 | t = node.type |
| 454 | |
| 455 | if t == "function_declaration": |
| 456 | name_node = node.child_by_field_name("name") |
| 457 | if name_node: |
| 458 | func_name = source[name_node.start_byte:name_node.end_byte].decode("utf-8", errors="replace") |
| 459 | line = node.start_point[0] + 1 |