MCPcopy
hub / github.com/chidiwilliams/buzz / chunk_iter

Method chunk_iter

buzz/transformers_whisper.py:34–57  ·  view source on GitHub ↗
(inputs, feature_extractor, chunk_len, stride_left, stride_right, dtype=None)

Source from the content-addressed store, hash-verified

32 # Copy of transformers `AutomaticSpeechRecognitionPipeline.chunk_iter` method with custom progress output
33 @staticmethod
34 def chunk_iter(inputs, feature_extractor, chunk_len, stride_left, stride_right, dtype=None):
35 inputs_len = inputs.shape[0]
36 step = chunk_len - stride_left - stride_right
37 for chunk_start_idx in range(0, inputs_len, step):
38
39 # Buzz will print progress to stderr
40 progress = int((chunk_start_idx / inputs_len) * 100)
41 sys.stderr.write(f"{progress}%\n")
42
43 chunk_end_idx = chunk_start_idx + chunk_len
44 chunk = inputs[chunk_start_idx:chunk_end_idx]
45 processed = feature_extractor(chunk, sampling_rate=feature_extractor.sampling_rate, return_tensors="pt")
46 if dtype is not None:
47 processed = processed.to(dtype=dtype)
48 _stride_left = 0 if chunk_start_idx == 0 else stride_left
49 is_last = chunk_end_idx >= inputs_len
50 _stride_right = 0 if is_last else stride_right
51
52 chunk_len = chunk.shape[0]
53 stride = (chunk_len, _stride_left, _stride_right)
54 if chunk.shape[0] > _stride_left:
55 yield {"is_last": is_last, "stride": stride, **processed}
56 if is_last:
57 break
58
59 # Copy of transformers `AutomaticSpeechRecognitionPipeline.preprocess` method with call to custom `chunk_iter`
60 def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):

Callers 1

preprocessMethod · 0.95

Calls 1

writeMethod · 0.80

Tested by

no test coverage detected