MCPcopy Index your code
hub / github.com/dmlc/dgl / main

Function main

examples/graphbolt/disk_based_feature/node_classification.py:399–532  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

397
398
399def main():
400 torch.set_float32_matmul_precision(args.precision)
401 if not torch.cuda.is_available():
402 args.mode = "cpu-cpu-cpu"
403 print(f"Training in {args.mode} mode.")
404 args.graph_device, args.feature_device, args.device = args.mode.split("-")
405 args.overlap_feature_fetch = args.feature_device == "pinned"
406 args.overlap_graph_fetch = args.graph_device == "pinned"
407
408 """
409 Load and preprocess on-disk dataset.
410 We inspect the in_memory field of the feature_data in the YAML file and modify
411 it to False. This will make sure the feature_data is loaded as DiskBasedFeature.
412 """
413 print("Loading data...")
414 disk_based_feature_keys = None
415 if args.cpu_cache_size_in_gigabytes > 0:
416 disk_based_feature_keys = [("node", None, "feat")]
417
418 dataset = gb.BuiltinDataset(args.dataset, root=args.root)
419 if disk_based_feature_keys is None:
420 disk_based_feature_keys = set()
421 for feature in dataset.yaml_data["feature_data"]:
422 feature_key = (feature["domain"], feature["type"], feature["name"])
423 # Set the in_memory setting to False without modifying YAML file.
424 if feature_key in disk_based_feature_keys:
425 feature["in_memory"] = False
426 dataset = dataset.load()
427
428 # Move the dataset to the selected storage.
429 graph = (
430 dataset.graph.pin_memory_()
431 if args.graph_device == "pinned"
432 else dataset.graph.to(args.graph_device)
433 )
434 features = (
435 dataset.feature.pin_memory_()
436 if args.feature_device == "pinned"
437 else dataset.feature.to(args.feature_device)
438 )
439
440 train_set = dataset.tasks[0].train_set
441 valid_set = dataset.tasks[0].validation_set
442 test_set = dataset.tasks[0].test_set
443 all_nodes_set = dataset.all_nodes_set
444 args.fanout = list(map(int, args.fanout.split(",")))
445 num_classes = dataset.tasks[0].metadata["num_classes"]
446
447 """
448 If the CPU cache size is greater than 0, we wrap the DiskBasedFeature to be
449 a CPUCachedFeature. This internally manages the CPU feature cache by the
450 specified cache replacement policy. This will reduce the amount of data
451 transferred during disk read operations for this feature.
452
453 Note: It is advised to set the CPU cache size to be at least 4 times the number
454 of sampled nodes in a mini-batch, otherwise the feature fetcher might get into
455 a deadlock, causing a hang.
456 """

Callers 1

Calls 9

load_state_dictMethod · 0.80
create_dataloaderFunction · 0.70
SAGEClass · 0.70
trainFunction · 0.70
layerwise_inferFunction · 0.70
loadMethod · 0.45
pin_memory_Method · 0.45
toMethod · 0.45
sizeMethod · 0.45

Tested by

no test coverage detected