MCPcopy
hub / github.com/safishamsi/graphify / extract_cpp

Function extract_cpp

graphify/extract.py:1076–1246  ·  view source on GitHub ↗

Extract functions, classes, and includes from a .cpp/.cc/.cxx/.hpp file.

(path: Path)

Source from the content-addressed store, hash-verified

1074
1075
1076def extract_cpp(path: Path) -> dict:
1077 """Extract functions, classes, and includes from a .cpp/.cc/.cxx/.hpp file."""
1078 try:
1079 import tree_sitter_cpp as tscpp
1080 from tree_sitter import Language, Parser
1081 except ImportError:
1082 return {"nodes": [], "edges": [], "error": "tree-sitter-cpp not installed"}
1083
1084 try:
1085 language = Language(tscpp.language())
1086 parser = Parser(language)
1087 source = path.read_bytes()
1088 tree = parser.parse(source)
1089 root = tree.root_node
1090 except Exception as e:
1091 return {"nodes": [], "edges": [], "error": str(e)}
1092
1093 stem = path.stem
1094 str_path = str(path)
1095 nodes: list[dict] = []
1096 edges: list[dict] = []
1097 seen_ids: set[str] = set()
1098
1099 def add_node(nid: str, label: str, line: int) -> None:
1100 if nid not in seen_ids:
1101 seen_ids.add(nid)
1102 nodes.append({
1103 "id": nid,
1104 "label": label,
1105 "file_type": "code",
1106 "source_file": str_path,
1107 "source_location": f"L{line}",
1108 })
1109
1110 def add_edge_raw(src: str, tgt: str, relation: str, line: int, confidence: str = "EXTRACTED", weight: float = 1.0) -> None:
1111 edges.append({
1112 "source": src,
1113 "target": tgt,
1114 "relation": relation,
1115 "confidence": confidence,
1116 "source_file": str_path,
1117 "source_location": f"L{line}",
1118 "weight": weight,
1119 })
1120
1121 file_nid = _make_id(stem)
1122 add_node(file_nid, path.name, 1)
1123
1124 function_bodies: list[tuple[str, object]] = []
1125
1126 def _get_func_name_from_declarator(node) -> str | None:
1127 """Recursively unwrap declarator to find the innermost identifier."""
1128 if node.type == "identifier":
1129 return source[node.start_byte:node.end_byte].decode("utf-8", errors="replace")
1130 if node.type == "qualified_identifier":
1131 name_node = node.child_by_field_name("name")
1132 if name_node:
1133 return source[name_node.start_byte:name_node.end_byte].decode("utf-8", errors="replace")

Callers 5

test_cpp_no_errorFunction · 0.90
test_cpp_finds_classFunction · 0.90
test_cpp_finds_methodsFunction · 0.90
test_cpp_finds_includesFunction · 0.90
extractFunction · 0.85

Calls 4

_make_idFunction · 0.85
add_nodeFunction · 0.85
walkFunction · 0.85
walk_callsFunction · 0.85

Tested by 4

test_cpp_no_errorFunction · 0.72
test_cpp_finds_classFunction · 0.72
test_cpp_finds_methodsFunction · 0.72
test_cpp_finds_includesFunction · 0.72