Extract classes, interfaces, methods, namespaces, and usings from a .cs file.
(path: Path)
| 1409 | |
| 1410 | |
| 1411 | def extract_csharp(path: Path) -> dict: |
| 1412 | """Extract classes, interfaces, methods, namespaces, and usings from a .cs file.""" |
| 1413 | try: |
| 1414 | import tree_sitter_c_sharp as tscsharp |
| 1415 | from tree_sitter import Language, Parser |
| 1416 | except ImportError: |
| 1417 | return {"nodes": [], "edges": [], "error": "tree-sitter-c-sharp not installed"} |
| 1418 | |
| 1419 | try: |
| 1420 | language = Language(tscsharp.language()) |
| 1421 | parser = Parser(language) |
| 1422 | source = path.read_bytes() |
| 1423 | tree = parser.parse(source) |
| 1424 | root = tree.root_node |
| 1425 | except Exception as e: |
| 1426 | return {"nodes": [], "edges": [], "error": str(e)} |
| 1427 | |
| 1428 | stem = path.stem |
| 1429 | str_path = str(path) |
| 1430 | nodes: list[dict] = [] |
| 1431 | edges: list[dict] = [] |
| 1432 | seen_ids: set[str] = set() |
| 1433 | |
| 1434 | def add_node(nid: str, label: str, line: int) -> None: |
| 1435 | if nid not in seen_ids: |
| 1436 | seen_ids.add(nid) |
| 1437 | nodes.append({ |
| 1438 | "id": nid, |
| 1439 | "label": label, |
| 1440 | "file_type": "code", |
| 1441 | "source_file": str_path, |
| 1442 | "source_location": f"L{line}", |
| 1443 | }) |
| 1444 | |
| 1445 | def add_edge_raw(src: str, tgt: str, relation: str, line: int, confidence: str = "EXTRACTED", weight: float = 1.0) -> None: |
| 1446 | edges.append({ |
| 1447 | "source": src, |
| 1448 | "target": tgt, |
| 1449 | "relation": relation, |
| 1450 | "confidence": confidence, |
| 1451 | "source_file": str_path, |
| 1452 | "source_location": f"L{line}", |
| 1453 | "weight": weight, |
| 1454 | }) |
| 1455 | |
| 1456 | file_nid = _make_id(stem) |
| 1457 | add_node(file_nid, path.name, 1) |
| 1458 | |
| 1459 | function_bodies: list[tuple[str, object]] = [] |
| 1460 | |
| 1461 | def walk(node, parent_class_nid: str | None = None) -> None: |
| 1462 | t = node.type |
| 1463 | |
| 1464 | if t == "using_directive": |
| 1465 | # Extract the namespace name from the using directive |
| 1466 | for child in node.children: |
| 1467 | if child.type in ("qualified_name", "identifier", "name_equals"): |
| 1468 | raw = source[child.start_byte:child.end_byte].decode("utf-8", errors="replace") |