()
| 4 | import time |
| 5 | |
| 6 | def main(): |
| 7 | from funasr import AutoModel |
| 8 | from funasr.utils.postprocess_utils import rich_transcription_postprocess |
| 9 | |
| 10 | print("[SenseVoice-SPK] Loading model...") |
| 11 | t0 = time.time() |
| 12 | model = AutoModel( |
| 13 | model="iic/SenseVoiceSmall", |
| 14 | vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch", |
| 15 | vad_kwargs={"max_single_segment_time": 30000}, |
| 16 | spk_model="iic/speech_campplus_sv_zh-cn_16k-common", |
| 17 | device="cpu", |
| 18 | disable_update=True, |
| 19 | ) |
| 20 | print("[SenseVoice-SPK] Model loaded in %.1fs" % (time.time() - t0)) |
| 21 | |
| 22 | wav_path = model.model_path + "/example/zh.mp3" |
| 23 | |
| 24 | print("[SenseVoice-SPK] Running inference...") |
| 25 | t0 = time.time() |
| 26 | res = model.generate( |
| 27 | input=wav_path, cache={}, language="auto", use_itn=True, |
| 28 | batch_size_s=60, merge_vad=True, merge_length_s=15, |
| 29 | ) |
| 30 | print("[SenseVoice-SPK] Inference done in %.1fs" % (time.time() - t0)) |
| 31 | |
| 32 | if not res or len(res) == 0: |
| 33 | print("[SenseVoice-SPK] FAILED - empty result") |
| 34 | return 1 |
| 35 | |
| 36 | result = res[0] |
| 37 | keys = list(result.keys()) |
| 38 | print("[SenseVoice-SPK] Result keys: %s" % keys) |
| 39 | |
| 40 | # Verify text |
| 41 | text = rich_transcription_postprocess(result.get("text", "")) |
| 42 | print("[SenseVoice-SPK] Text: %s" % text) |
| 43 | if not text: |
| 44 | print("[SenseVoice-SPK] FAILED - empty text") |
| 45 | return 1 |
| 46 | |
| 47 | # Verify timestamp exists |
| 48 | ts = result.get("timestamp", None) |
| 49 | if ts is None or len(ts) == 0: |
| 50 | print("[SenseVoice-SPK] FAILED - no timestamp") |
| 51 | return 1 |
| 52 | print("[SenseVoice-SPK] Timestamp count: %d" % len(ts)) |
| 53 | |
| 54 | # Verify sentence_info with speaker labels |
| 55 | si = result.get("sentence_info", None) |
| 56 | if si is None or len(si) == 0: |
| 57 | print("[SenseVoice-SPK] FAILED - no sentence_info") |
| 58 | return 1 |
| 59 | |
| 60 | print("[SenseVoice-SPK] sentence_info:") |
| 61 | for s in si: |
| 62 | print(" spk=%d | [%d-%d] %s" % (s["spk"], s["start"], s["end"], rich_transcription_postprocess(s.get("text", s.get("sentence", ""))))) |
| 63 |
no test coverage detected
searching dependent graphs…