Load Flick1M dataset. Returns a list of images by a given tag from Flickr1M dataset, it will download Flickr1M from `the official website `__ at the first time you use it. Parameters ------------ tag : str or None Wh
(tag='sky', size=10, path="data", n_threads=50, printable=False)
| 1115 | |
| 1116 | |
| 1117 | def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printable=False): |
| 1118 | """Load Flick1M dataset. |
| 1119 | |
| 1120 | Returns a list of images by a given tag from Flickr1M dataset, |
| 1121 | it will download Flickr1M from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`__ |
| 1122 | at the first time you use it. |
| 1123 | |
| 1124 | Parameters |
| 1125 | ------------ |
| 1126 | tag : str or None |
| 1127 | What images to return. |
| 1128 | - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`__. |
| 1129 | - If you want to get all images, set to ``None``. |
| 1130 | |
| 1131 | size : int |
| 1132 | integer between 1 to 10. 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10. |
| 1133 | path : str |
| 1134 | The path that the data is downloaded to, defaults is ``data/flickr25k/``. |
| 1135 | n_threads : int |
| 1136 | The number of thread to read image. |
| 1137 | printable : boolean |
| 1138 | Whether to print infomation when reading images, default is ``False``. |
| 1139 | |
| 1140 | Examples |
| 1141 | ---------- |
| 1142 | Use 200k images |
| 1143 | |
| 1144 | >>> images = tl.files.load_flickr1M_dataset(tag='zebra', size=2) |
| 1145 | |
| 1146 | Use 1 Million images |
| 1147 | |
| 1148 | >>> images = tl.files.load_flickr1M_dataset(tag='zebra') |
| 1149 | |
| 1150 | """ |
| 1151 | path = os.path.join(path, 'flickr1M') |
| 1152 | logging.info("[Flickr1M] using {}% of images = {}".format(size * 10, size * 100000)) |
| 1153 | images_zip = [ |
| 1154 | 'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip', |
| 1155 | 'images7.zip', 'images8.zip', 'images9.zip' |
| 1156 | ] |
| 1157 | tag_zip = 'tags.zip' |
| 1158 | url = 'http://press.liacs.nl/mirflickr/mirflickr1m/' |
| 1159 | |
| 1160 | # download dataset |
| 1161 | for image_zip in images_zip[0:size]: |
| 1162 | image_folder = image_zip.split(".")[0] |
| 1163 | # logging.info(path+"/"+image_folder) |
| 1164 | if folder_exists(os.path.join(path, image_folder)) is False: |
| 1165 | # logging.info(image_zip) |
| 1166 | logging.info("[Flickr1M] {} is missing in {}".format(image_folder, path)) |
| 1167 | maybe_download_and_extract(image_zip, path, url, extract=True) |
| 1168 | del_file(os.path.join(path, image_zip)) |
| 1169 | # os.system("mv {} {}".format(os.path.join(path, 'images'), os.path.join(path, image_folder))) |
| 1170 | shutil.move(os.path.join(path, 'images'), os.path.join(path, image_folder)) |
| 1171 | else: |
| 1172 | logging.info("[Flickr1M] {} exists in {}".format(image_folder, path)) |
| 1173 | |
| 1174 | # download tag |
nothing calls this directly
no test coverage detected
searching dependent graphs…