MCPcopy
hub / github.com/ultralytics/yolov5 / create_dataset_table

Method create_dataset_table

utils/loggers/wandb/wandb_utils.py:403–447  ·  view source on GitHub ↗

Create and return W&B artifact containing W&B Table of the dataset. arguments: dataset -- instance of LoadImagesAndLabels class used to iterate over the data to build Table class_to_id -- hash map that maps class ids to labels name -- name of the artifact

(self, dataset: LoadImagesAndLabels, class_to_id: Dict[int, str], name: str = 'dataset')

Source from the content-addressed store, hash-verified

401 self.val_table_path_map[data[3]] = data[0]
402
403 def create_dataset_table(self, dataset: LoadImagesAndLabels, class_to_id: Dict[int, str], name: str = 'dataset'):
404 """
405 Create and return W&B artifact containing W&B Table of the dataset.
406
407 arguments:
408 dataset -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
409 class_to_id -- hash map that maps class ids to labels
410 name -- name of the artifact
411
412 returns:
413 dataset artifact to be logged or used
414 """
415 # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
416 artifact = wandb.Artifact(name=name, type="dataset")
417 img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
418 img_files = tqdm(dataset.im_files) if not img_files else img_files
419 for img_file in img_files:
420 if Path(img_file).is_dir():
421 artifact.add_dir(img_file, name='data/images')
422 labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
423 artifact.add_dir(labels_path, name='data/labels')
424 else:
425 artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
426 label_file = Path(img2label_paths([img_file])[0])
427 artifact.add_file(str(label_file), name='data/labels/' +
428 label_file.name) if label_file.exists() else None
429 table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
430 class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
431 for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
432 box_data, img_classes = [], {}
433 for cls, *xywh in labels[:, 1:].tolist():
434 cls = int(cls)
435 box_data.append({
436 "position": {
437 "middle": [xywh[0], xywh[1]],
438 "width": xywh[2],
439 "height": xywh[3]},
440 "class_id": cls,
441 "box_caption": "%s" % (class_to_id[cls])})
442 img_classes[cls] = class_to_id[cls]
443 boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space
444 table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()),
445 Path(paths).name)
446 artifact.add(table, name)
447 return artifact
448
449 def log_training_progress(self, predn, path, names):
450 """

Callers 1

log_dataset_artifactMethod · 0.95

Calls 2

img2label_pathsFunction · 0.90
tolistMethod · 0.80

Tested by

no test coverage detected