()
| 280 | |
| 281 | |
| 282 | def main(): |
| 283 | parser = get_parser() |
| 284 | args = parser.parse_args() |
| 285 | |
| 286 | logging.basicConfig( |
| 287 | format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s", |
| 288 | level=logging.INFO, |
| 289 | force=True, |
| 290 | ) |
| 291 | |
| 292 | logging.info(f"Calculating WER for {args.wav_path}") |
| 293 | |
| 294 | # 1. Prepare Data |
| 295 | logging.info("Reading test list...") |
| 296 | data_list = [] |
| 297 | samples = read_test_list(args.test_list) |
| 298 | for s in samples: |
| 299 | wav_path = str(Path(args.wav_path) / f"{s['id']}.{args.extension}") |
| 300 | if not os.path.exists(wav_path): |
| 301 | logging.warning(f"File missing: {wav_path}") |
| 302 | continue |
| 303 | data_list.append({"wav_path": wav_path, "truth_text": s["text"]}) |
| 304 | total_files = len(data_list) |
| 305 | logging.info(f"Total files: {total_files}.") |
| 306 | |
| 307 | # 2. Worker config |
| 308 | num_gpus = torch.cuda.device_count() |
| 309 | assert num_gpus > 0, "No GPU found. GPU is required." |
| 310 | total_workers = num_gpus * args.nj_per_gpu |
| 311 | |
| 312 | mp.set_start_method("spawn", force=True) |
| 313 | manager = mp.Manager() |
| 314 | rank_queue = manager.Queue() |
| 315 | |
| 316 | for _ in range(args.nj_per_gpu): |
| 317 | for rank in range(num_gpus): |
| 318 | rank_queue.put(rank) |
| 319 | |
| 320 | # 3. Scheduling: Split data into chunks for better load balancing |
| 321 | chunk_size = max(1, args.batch_size) |
| 322 | tasks = [] |
| 323 | for i in range(0, total_files, chunk_size): |
| 324 | tasks.append(data_list[i : i + chunk_size]) |
| 325 | |
| 326 | logging.info( |
| 327 | f"Split data into {len(tasks)} chunks (size ~{chunk_size}). " |
| 328 | f"Spawning {total_workers} workers." |
| 329 | ) |
| 330 | |
| 331 | # 4. Execution |
| 332 | results = [] |
| 333 | |
| 334 | with ProcessPoolExecutor( |
| 335 | max_workers=total_workers, |
| 336 | initializer=process_init, |
| 337 | initargs=(rank_queue, args.model_dir, args.lang), |
| 338 | ) as executor: |
| 339 |
no test coverage detected