| 1250 | |
| 1251 | |
| 1252 | def _test_CSVDataset_multiple(): |
| 1253 | with tempfile.TemporaryDirectory() as test_dir: |
| 1254 | # generate YAML/CSVs |
| 1255 | meta_yaml_path = os.path.join(test_dir, "meta.yaml") |
| 1256 | edges_csv_path_0 = os.path.join(test_dir, "test_edges_0.csv") |
| 1257 | edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv") |
| 1258 | nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") |
| 1259 | nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") |
| 1260 | graph_csv_path = os.path.join(test_dir, "test_graph.csv") |
| 1261 | meta_yaml_data = { |
| 1262 | "version": "1.0.0", |
| 1263 | "dataset_name": "default_name", |
| 1264 | "node_data": [ |
| 1265 | { |
| 1266 | "file_name": os.path.basename(nodes_csv_path_0), |
| 1267 | "ntype": "user", |
| 1268 | }, |
| 1269 | { |
| 1270 | "file_name": os.path.basename(nodes_csv_path_1), |
| 1271 | "ntype": "item", |
| 1272 | }, |
| 1273 | ], |
| 1274 | "edge_data": [ |
| 1275 | { |
| 1276 | "file_name": os.path.basename(edges_csv_path_0), |
| 1277 | "etype": ["user", "follow", "user"], |
| 1278 | }, |
| 1279 | { |
| 1280 | "file_name": os.path.basename(edges_csv_path_1), |
| 1281 | "etype": ["user", "like", "item"], |
| 1282 | }, |
| 1283 | ], |
| 1284 | "graph_data": {"file_name": os.path.basename(graph_csv_path)}, |
| 1285 | } |
| 1286 | with open(meta_yaml_path, "w") as f: |
| 1287 | yaml.dump(meta_yaml_data, f, sort_keys=False) |
| 1288 | num_nodes = 100 |
| 1289 | num_edges = 500 |
| 1290 | num_graphs = 10 |
| 1291 | num_dims = 3 |
| 1292 | feat_ndata = np.random.rand(num_nodes * num_graphs, num_dims) |
| 1293 | label_ndata = np.random.randint(2, size=num_nodes * num_graphs) |
| 1294 | df = pd.DataFrame( |
| 1295 | { |
| 1296 | "node_id": np.hstack( |
| 1297 | [np.arange(num_nodes) for _ in range(num_graphs)] |
| 1298 | ), |
| 1299 | "label": label_ndata, |
| 1300 | "feat": [line.tolist() for line in feat_ndata], |
| 1301 | "graph_id": np.hstack( |
| 1302 | [np.full(num_nodes, i) for i in range(num_graphs)] |
| 1303 | ), |
| 1304 | } |
| 1305 | ) |
| 1306 | df.to_csv(nodes_csv_path_0, index=False) |
| 1307 | df.to_csv(nodes_csv_path_1, index=False) |
| 1308 | feat_edata = np.random.rand(num_edges * num_graphs, num_dims) |
| 1309 | label_edata = np.random.randint(2, size=num_edges * num_graphs) |