Extract functions and includes from a .c/.h file.
(path: Path)
| 923 | |
| 924 | |
| 925 | def extract_c(path: Path) -> dict: |
| 926 | """Extract functions and includes from a .c/.h file.""" |
| 927 | try: |
| 928 | import tree_sitter_c as tsc |
| 929 | from tree_sitter import Language, Parser |
| 930 | except ImportError: |
| 931 | return {"nodes": [], "edges": [], "error": "tree-sitter-c not installed"} |
| 932 | |
| 933 | try: |
| 934 | language = Language(tsc.language()) |
| 935 | parser = Parser(language) |
| 936 | source = path.read_bytes() |
| 937 | tree = parser.parse(source) |
| 938 | root = tree.root_node |
| 939 | except Exception as e: |
| 940 | return {"nodes": [], "edges": [], "error": str(e)} |
| 941 | |
| 942 | stem = path.stem |
| 943 | str_path = str(path) |
| 944 | nodes: list[dict] = [] |
| 945 | edges: list[dict] = [] |
| 946 | seen_ids: set[str] = set() |
| 947 | |
| 948 | def add_node(nid: str, label: str, line: int) -> None: |
| 949 | if nid not in seen_ids: |
| 950 | seen_ids.add(nid) |
| 951 | nodes.append({ |
| 952 | "id": nid, |
| 953 | "label": label, |
| 954 | "file_type": "code", |
| 955 | "source_file": str_path, |
| 956 | "source_location": f"L{line}", |
| 957 | }) |
| 958 | |
| 959 | def add_edge_raw(src: str, tgt: str, relation: str, line: int, confidence: str = "EXTRACTED", weight: float = 1.0) -> None: |
| 960 | edges.append({ |
| 961 | "source": src, |
| 962 | "target": tgt, |
| 963 | "relation": relation, |
| 964 | "confidence": confidence, |
| 965 | "source_file": str_path, |
| 966 | "source_location": f"L{line}", |
| 967 | "weight": weight, |
| 968 | }) |
| 969 | |
| 970 | file_nid = _make_id(stem) |
| 971 | add_node(file_nid, path.name, 1) |
| 972 | |
| 973 | function_bodies: list[tuple[str, object]] = [] |
| 974 | |
| 975 | def _get_func_name_from_declarator(node) -> str | None: |
| 976 | """Recursively unwrap declarator to find the innermost identifier.""" |
| 977 | if node.type == "identifier": |
| 978 | return source[node.start_byte:node.end_byte].decode("utf-8", errors="replace") |
| 979 | decl = node.child_by_field_name("declarator") |
| 980 | if decl: |
| 981 | return _get_func_name_from_declarator(decl) |
| 982 | # fallback: search children for identifier |