MCPcopy
hub / github.com/HKUDS/MiniRAG / xml_to_json

Function xml_to_json

minirag/utils.py:198–255  ·  view source on GitHub ↗
(xml_file)

Source from the content-addressed store, hash-verified

196
197
198def xml_to_json(xml_file):
199 try:
200 tree = ET.parse(xml_file)
201 root = tree.getroot()
202
203 # Print the root element's tag and attributes to confirm the file has been correctly loaded
204 print(f"Root element: {root.tag}")
205 print(f"Root attributes: {root.attrib}")
206
207 data = {"nodes": [], "edges": []}
208
209 # Use namespace
210 namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
211
212 for node in root.findall(".//node", namespace):
213 node_data = {
214 "id": node.get("id").strip('"'),
215 "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"')
216 if node.find("./data[@key='d0']", namespace) is not None
217 else "",
218 "description": node.find("./data[@key='d1']", namespace).text
219 if node.find("./data[@key='d1']", namespace) is not None
220 else "",
221 "source_id": node.find("./data[@key='d2']", namespace).text
222 if node.find("./data[@key='d2']", namespace) is not None
223 else "",
224 }
225 data["nodes"].append(node_data)
226
227 for edge in root.findall(".//edge", namespace):
228 edge_data = {
229 "source": edge.get("source").strip('"'),
230 "target": edge.get("target").strip('"'),
231 "weight": float(edge.find("./data[@key='d3']", namespace).text)
232 if edge.find("./data[@key='d3']", namespace) is not None
233 else 0.0,
234 "description": edge.find("./data[@key='d4']", namespace).text
235 if edge.find("./data[@key='d4']", namespace) is not None
236 else "",
237 "keywords": edge.find("./data[@key='d5']", namespace).text
238 if edge.find("./data[@key='d5']", namespace) is not None
239 else "",
240 "source_id": edge.find("./data[@key='d6']", namespace).text
241 if edge.find("./data[@key='d6']", namespace) is not None
242 else "",
243 }
244 data["edges"].append(edge_data)
245
246 # Print the number of nodes and edges found
247 print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
248
249 return data
250 except ET.ParseError as e:
251 print(f"Error parsing XML file: {e}")
252 return None
253 except Exception as e:
254 print(f"An error occurred: {e}")
255 return None

Callers 1

convert_xml_to_jsonFunction · 0.90

Calls 1

getMethod · 0.80

Tested by

no test coverage detected