MCPcopy
hub / github.com/k2-fsa/OmniVoice / main

Function main

omnivoice/eval/wer/minimax.py:372–592  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

370
371
372def main():
373 parser = get_parser()
374 args = parser.parse_args()
375
376 logging.basicConfig(
377 format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s",
378 level=logging.INFO,
379 force=True,
380 )
381
382 # 1. Prepare Data
383 logging.info("Reading test list...")
384 data_by_lang = defaultdict(list)
385 total_files = 0
386 wav_root = Path(args.wav_path)
387
388 samples = read_test_list(args.test_list)
389 for s in samples:
390 wav_path = str(wav_root / f"{s['id']}.{args.extension}")
391 if not os.path.exists(wav_path):
392 logging.warning(f"File missing: {wav_path}")
393 continue
394
395 lang_id = s.get("language_id") or "unknown"
396 lang_name = s.get("language_name") or "unknown"
397
398 item = {
399 "wav_path": wav_path,
400 "truth_text": s["text"],
401 "lang_id": lang_id,
402 "lang_name": lang_name,
403 }
404 if args.lang and s.get("language_id") != args.lang:
405 continue
406
407 data_by_lang[lang_name].append(item)
408 total_files += 1
409
410 logging.info(f"Total files: {total_files} in {len(data_by_lang)} languages.")
411
412 # 2. Worker config
413 num_gpus = torch.cuda.device_count()
414 assert num_gpus > 0, "No GPU found. GPU is required."
415 total_workers = num_gpus * args.nj_per_gpu
416
417 mp.set_start_method("spawn", force=True)
418 manager = mp.Manager()
419
420 # 3. Scheduling: Split data into Chinese (Paraformer) and non-Chinese (Whisper)
421 zh_items = []
422 non_zh_items = []
423 for lang_name, items in data_by_lang.items():
424 lang_id = items[0].get("lang_id", "") if items else ""
425 if lang_name == "Chinese" or (lang_id and lang_id.startswith("zh")):
426 zh_items.extend(items)
427 else:
428 non_zh_items.extend(items)
429

Callers 1

minimax.pyFile · 0.70

Calls 6

read_test_listFunction · 0.90
log_metricsFunction · 0.90
updateMethod · 0.80
closeMethod · 0.80
get_parserFunction · 0.70
submitMethod · 0.45

Tested by

no test coverage detected