()
| 228 | |
| 229 | |
| 230 | def main(): |
| 231 | parser = get_parser() |
| 232 | args = parser.parse_args() |
| 233 | |
| 234 | logging.basicConfig( |
| 235 | format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s", |
| 236 | level=logging.INFO, |
| 237 | force=True, |
| 238 | ) |
| 239 | |
| 240 | logging.info("Reading test list and filtering for Cantonese (yue)...") |
| 241 | yue_items = [] |
| 242 | wav_root = Path(args.wav_path) |
| 243 | |
| 244 | samples = read_test_list(args.test_list) |
| 245 | for s in samples: |
| 246 | lang_id = s.get("language_id", "") |
| 247 | if lang_id != "yue": |
| 248 | continue |
| 249 | |
| 250 | wav_path = str(wav_root / f"{s['id']}.{args.extension}") |
| 251 | if not os.path.exists(wav_path): |
| 252 | logging.warning(f"File missing: {wav_path}") |
| 253 | continue |
| 254 | |
| 255 | yue_items.append( |
| 256 | { |
| 257 | "wav_path": wav_path, |
| 258 | "truth_text": s["text"], |
| 259 | "lang_id": "yue", |
| 260 | "lang_name": s.get("language_name", "Cantonese"), |
| 261 | } |
| 262 | ) |
| 263 | |
| 264 | logging.info(f"Total Cantonese files found: {len(yue_items)}.") |
| 265 | if len(yue_items) == 0: |
| 266 | logging.warning("No files to evaluate. Exiting.") |
| 267 | return |
| 268 | |
| 269 | num_gpus = torch.cuda.device_count() |
| 270 | assert num_gpus > 0, "No GPU found. GPU is required." |
| 271 | total_workers = num_gpus * args.nj_per_gpu |
| 272 | |
| 273 | mp.set_start_method("spawn", force=True) |
| 274 | manager = mp.Manager() |
| 275 | |
| 276 | chunk_size = args.chunk_size |
| 277 | tasks = [] |
| 278 | for i in range(0, len(yue_items), chunk_size): |
| 279 | tasks.append(yue_items[i : i + chunk_size]) |
| 280 | |
| 281 | results = [] |
| 282 | rank_queue = manager.Queue() |
| 283 | for _ in range(args.nj_per_gpu): |
| 284 | for rank in range(num_gpus): |
| 285 | rank_queue.put(rank) |
| 286 | |
| 287 | with ProcessPoolExecutor( |
no test coverage detected