(args)
| 42 | |
| 43 | |
| 44 | def main(args): |
| 45 | data_path = args.data_path |
| 46 | image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'] |
| 47 | |
| 48 | num_processes = 47 |
| 49 | work_list = [('openimages_{:0>4}'.format(idx), data_path, image_extensions) for idx in range(1, 48)] |
| 50 | with mp.Pool(processes=num_processes) as pool: |
| 51 | results = pool.map(load_image_path, work_list) |
| 52 | image_paths = [image_path for sublist in results for image_path in sublist] |
| 53 | print('image_paths is loaded') |
| 54 | |
| 55 | |
| 56 | num_processes = max(mp.cpu_count() // 2, 4) |
| 57 | unit = len(image_paths) // num_processes |
| 58 | work_list = [(data_path, image_paths[idx*unit:(idx+1)*unit]) for idx in range(num_processes)] |
| 59 | with mp.Pool(processes=num_processes) as pool: |
| 60 | results = pool.map(check_image_path, work_list) |
| 61 | valid_image_paths = [image_path for sublist in results for image_path in sublist] |
| 62 | print('image_paths is checked') |
| 63 | |
| 64 | |
| 65 | output_json_file_path = os.path.join(data_path, 'image_paths.json') |
| 66 | with open(output_json_file_path, 'w') as outfile: |
| 67 | json.dump(valid_image_paths, outfile, indent=4) |
| 68 | print(f"Image paths have been saved to {output_json_file_path}") |
| 69 | |
| 70 | |
| 71 | if __name__ == "__main__": |
no test coverage detected