MCPcopy
hub / github.com/microsoft/Cream / build_dataset

Function build_dataset

TinyViT/data/build.py:115–154  ·  view source on GitHub ↗
(is_train, config)

Source from the content-addressed store, hash-verified

113
114
115def build_dataset(is_train, config):
116
117 transform = build_transform(is_train, config)
118 dataset_tar_t = TimmDatasetTar
119
120 if config.DATA.DATASET == 'imagenet':
121 prefix = 'train' if is_train else 'val'
122 # load tar dataset
123 data_dir = os.path.join(config.DATA.DATA_PATH, f'{prefix}.tar')
124 if os.path.exists(data_dir):
125 dataset = dataset_tar_t(data_dir, transform=transform)
126 else:
127 root = os.path.join(config.DATA.DATA_PATH, prefix)
128 dataset = datasets.ImageFolder(root, transform=transform)
129 nb_classes = 1000
130 elif config.DATA.DATASET == 'imagenet22k':
131 if is_train:
132 dataset = IN22KDataset(data_root=config.DATA.DATA_PATH, transform=transform,
133 fname_format=config.DATA.FNAME_FORMAT, debug=config.DATA.DEBUG)
134 nb_classes = 21841
135 else:
136 # load ImageNet-1k validation set
137 '''
138 datasets/
139 ├── ImageNet-22k/ # the folder of IN-22k
140 └── ImageNet/ # the folder of IN-1k
141 '''
142 old_data_path = config.DATA.DATA_PATH
143 config.defrost()
144 config.DATA.DATA_PATH = os.path.normpath(
145 os.path.join(old_data_path, '../ImageNet'))
146 config.DATA.DATASET = 'imagenet'
147 dataset, nb_classes = build_dataset(is_train=False, config=config)
148 config.DATA.DATA_PATH = old_data_path
149 config.DATA.DATASET = 'imagenet22k'
150 config.freeze()
151 else:
152 raise NotImplementedError("We only support ImageNet Now.")
153
154 return dataset, nb_classes
155
156
157def build_transform(is_train, config):

Callers 1

build_loaderFunction · 0.70

Calls 2

IN22KDatasetClass · 0.85
build_transformFunction · 0.70

Tested by

no test coverage detected