Read a JSONL test list file. Each line should be a JSON object. Only ``id`` and ``text`` are required; all other fields are optional (default to ``None``): id, text, ref_audio, ref_text, instruct, language_id, language_name, duration, speed Note: ``language_name`` is o
(path)
| 27 | |
| 28 | |
| 29 | def read_test_list(path): |
| 30 | """Read a JSONL test list file. |
| 31 | |
| 32 | Each line should be a JSON object. Only ``id`` and ``text`` are required; |
| 33 | all other fields are optional (default to ``None``): |
| 34 | id, text, ref_audio, ref_text, instruct, |
| 35 | language_id, language_name, duration, speed |
| 36 | |
| 37 | Note: ``language_name`` is only used by evaluation scripts (under |
| 38 | ``omnivoice/eval/``) for grouping and reporting results. The model |
| 39 | itself only consumes ``language_id``. |
| 40 | |
| 41 | Returns a list of dicts. |
| 42 | """ |
| 43 | path = Path(path) |
| 44 | samples = [] |
| 45 | with path.open("r", encoding="utf-8") as f: |
| 46 | for line_no, line in enumerate(f, 1): |
| 47 | line = line.strip() |
| 48 | if not line: |
| 49 | continue |
| 50 | try: |
| 51 | obj = json.loads(line) |
| 52 | except json.JSONDecodeError: |
| 53 | logging.warning(f"Skipping malformed JSON at line {line_no}: {line}") |
| 54 | continue |
| 55 | |
| 56 | sample = { |
| 57 | "id": obj.get("id"), |
| 58 | "text": obj.get("text"), |
| 59 | "ref_audio": obj.get("ref_audio"), |
| 60 | "ref_text": obj.get("ref_text"), |
| 61 | "language_id": obj.get("language_id"), |
| 62 | "language_name": obj.get("language_name"), |
| 63 | "duration": obj.get("duration"), |
| 64 | "speed": obj.get("speed"), |
| 65 | "instruct": obj.get("instruct"), |
| 66 | } |
| 67 | samples.append(sample) |
| 68 | return samples |