Function main

omnivoice/eval/wer/sensevoice.py:230–340 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

228
229
230	def main():
231	parser = get_parser()
232	args = parser.parse_args()
233
234	logging.basicConfig(
235	format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s",
236	level=logging.INFO,
237	force=True,
238	)
239
240	logging.info("Reading test list and filtering for Cantonese (yue)...")
241	yue_items = []
242	wav_root = Path(args.wav_path)
243
244	samples = read_test_list(args.test_list)
245	for s in samples:
246	lang_id = s.get("language_id", "")
247	if lang_id != "yue":
248	continue
249
250	wav_path = str(wav_root / f"{s['id']}.{args.extension}")
251	if not os.path.exists(wav_path):
252	logging.warning(f"File missing: {wav_path}")
253	continue
254
255	yue_items.append(
256	{
257	"wav_path": wav_path,
258	"truth_text": s["text"],
259	"lang_id": "yue",
260	"lang_name": s.get("language_name", "Cantonese"),
261	}
262	)
263
264	logging.info(f"Total Cantonese files found: {len(yue_items)}.")
265	if len(yue_items) == 0:
266	logging.warning("No files to evaluate. Exiting.")
267	return
268
269	num_gpus = torch.cuda.device_count()
270	assert num_gpus > 0, "No GPU found. GPU is required."
271	total_workers = num_gpus * args.nj_per_gpu
272
273	mp.set_start_method("spawn", force=True)
274	manager = mp.Manager()
275
276	chunk_size = args.chunk_size
277	tasks = []
278	for i in range(0, len(yue_items), chunk_size):
279	tasks.append(yue_items[i : i + chunk_size])
280
281	results = []
282	rank_queue = manager.Queue()
283	for _ in range(args.nj_per_gpu):
284	for rank in range(num_gpus):
285	rank_queue.put(rank)
286
287	with ProcessPoolExecutor(

sensevoice.pyFile · 0.70

read_test_listFunction · 0.90

log_metricsFunction · 0.90

updateMethod · 0.80

closeMethod · 0.80

get_parserFunction · 0.70

submitMethod · 0.45

no test coverage detected