MCPcopy Index your code
hub / github.com/modelscope/FunASR / main

Function main

tests_models/test_fsmn_vad_streaming.py:7–95  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

5import os
6
7def main():
8 import soundfile
9 from funasr import AutoModel
10
11 print("[FSMN-VAD-Streaming] Loading model...")
12 t0 = time.time()
13 model = AutoModel(
14 model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
15 device="cpu",
16 disable_update=True,
17 disable_pbar=True,
18 )
19 print("[FSMN-VAD-Streaming] Model loaded in %.1fs" % (time.time() - t0))
20
21 wav_file = os.path.join(model.model_path, "example/vad_example.wav")
22 speech, sample_rate = soundfile.read(wav_file)
23 chunk_size = 200 # ms
24 chunk_stride = int(chunk_size * sample_rate / 1000)
25
26 total_chunk_num = int((len(speech) - 1) / chunk_stride + 1)
27 print("[FSMN-VAD-Streaming] Audio: %.2fs, %d chunks of %dms" % (
28 len(speech) / sample_rate, total_chunk_num, chunk_size))
29
30 print("[FSMN-VAD-Streaming] Running streaming inference...")
31 t0 = time.time()
32
33 cache = {}
34 all_events = []
35 for i in range(total_chunk_num):
36 speech_chunk = speech[i * chunk_stride:(i + 1) * chunk_stride]
37 is_final = i == total_chunk_num - 1
38 res = model.generate(
39 input=speech_chunk,
40 cache=cache,
41 is_final=is_final,
42 chunk_size=chunk_size,
43 )
44 if res[0]["value"]:
45 all_events.extend(res[0]["value"])
46
47 print("[FSMN-VAD-Streaming] Inference done in %.1fs" % (time.time() - t0))
48
49 # Parse streaming VAD events into complete segments
50 # Streaming output: [beg, -1] = speech start, [-1, end] = speech end, [beg, end] = complete
51 complete_segments = []
52 pending_start = None
53 for event in all_events:
54 if event[0] >= 0 and event[1] == -1:
55 pending_start = event[0]
56 elif event[0] == -1 and event[1] >= 0:
57 if pending_start is not None:
58 complete_segments.append([pending_start, event[1]])
59 pending_start = None
60 elif event[0] >= 0 and event[1] >= 0:
61 complete_segments.append(event)
62
63 print("[FSMN-VAD-Streaming] Raw events: %d, Complete segments: %d" % (
64 len(all_events), len(complete_segments)))

Callers 1

Calls 3

generateMethod · 0.95
AutoModelClass · 0.90
readMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…