()
| 5 | import os |
| 6 | |
| 7 | def main(): |
| 8 | import soundfile |
| 9 | from funasr import AutoModel |
| 10 | |
| 11 | print("[FSMN-VAD-Streaming] Loading model...") |
| 12 | t0 = time.time() |
| 13 | model = AutoModel( |
| 14 | model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch", |
| 15 | device="cpu", |
| 16 | disable_update=True, |
| 17 | disable_pbar=True, |
| 18 | ) |
| 19 | print("[FSMN-VAD-Streaming] Model loaded in %.1fs" % (time.time() - t0)) |
| 20 | |
| 21 | wav_file = os.path.join(model.model_path, "example/vad_example.wav") |
| 22 | speech, sample_rate = soundfile.read(wav_file) |
| 23 | chunk_size = 200 # ms |
| 24 | chunk_stride = int(chunk_size * sample_rate / 1000) |
| 25 | |
| 26 | total_chunk_num = int((len(speech) - 1) / chunk_stride + 1) |
| 27 | print("[FSMN-VAD-Streaming] Audio: %.2fs, %d chunks of %dms" % ( |
| 28 | len(speech) / sample_rate, total_chunk_num, chunk_size)) |
| 29 | |
| 30 | print("[FSMN-VAD-Streaming] Running streaming inference...") |
| 31 | t0 = time.time() |
| 32 | |
| 33 | cache = {} |
| 34 | all_events = [] |
| 35 | for i in range(total_chunk_num): |
| 36 | speech_chunk = speech[i * chunk_stride:(i + 1) * chunk_stride] |
| 37 | is_final = i == total_chunk_num - 1 |
| 38 | res = model.generate( |
| 39 | input=speech_chunk, |
| 40 | cache=cache, |
| 41 | is_final=is_final, |
| 42 | chunk_size=chunk_size, |
| 43 | ) |
| 44 | if res[0]["value"]: |
| 45 | all_events.extend(res[0]["value"]) |
| 46 | |
| 47 | print("[FSMN-VAD-Streaming] Inference done in %.1fs" % (time.time() - t0)) |
| 48 | |
| 49 | # Parse streaming VAD events into complete segments |
| 50 | # Streaming output: [beg, -1] = speech start, [-1, end] = speech end, [beg, end] = complete |
| 51 | complete_segments = [] |
| 52 | pending_start = None |
| 53 | for event in all_events: |
| 54 | if event[0] >= 0 and event[1] == -1: |
| 55 | pending_start = event[0] |
| 56 | elif event[0] == -1 and event[1] >= 0: |
| 57 | if pending_start is not None: |
| 58 | complete_segments.append([pending_start, event[1]]) |
| 59 | pending_start = None |
| 60 | elif event[0] >= 0 and event[1] >= 0: |
| 61 | complete_segments.append(event) |
| 62 | |
| 63 | print("[FSMN-VAD-Streaming] Raw events: %d, Complete segments: %d" % ( |
| 64 | len(all_events), len(complete_segments))) |
no test coverage detected
searching dependent graphs…