Method chunk_iter

buzz/transformers_whisper.py:34–57 · view source on GitHub ↗

(inputs, feature_extractor, chunk_len, stride_left, stride_right, dtype=None)

Source from the content-addressed store, hash-verified

32	# Copy of transformers `AutomaticSpeechRecognitionPipeline.chunk_iter` method with custom progress output
33	@staticmethod
34	def chunk_iter(inputs, feature_extractor, chunk_len, stride_left, stride_right, dtype=None):
35	inputs_len = inputs.shape[0]
36	step = chunk_len - stride_left - stride_right
37	for chunk_start_idx in range(0, inputs_len, step):
38
39	# Buzz will print progress to stderr
40	progress = int((chunk_start_idx / inputs_len) * 100)
41	sys.stderr.write(f"{progress}%\n")
42
43	chunk_end_idx = chunk_start_idx + chunk_len
44	chunk = inputs[chunk_start_idx:chunk_end_idx]
45	processed = feature_extractor(chunk, sampling_rate=feature_extractor.sampling_rate, return_tensors="pt")
46	if dtype is not None:
47	processed = processed.to(dtype=dtype)
48	_stride_left = 0 if chunk_start_idx == 0 else stride_left
49	is_last = chunk_end_idx >= inputs_len
50	_stride_right = 0 if is_last else stride_right
51
52	chunk_len = chunk.shape[0]
53	stride = (chunk_len, _stride_left, _stride_right)
54	if chunk.shape[0] > _stride_left:
55	yield {"is_last": is_last, "stride": stride, **processed}
56	if is_last:
57	break
58
59	# Copy of transformers `AutomaticSpeechRecognitionPipeline.preprocess` method with call to custom `chunk_iter`
60	def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):

preprocessMethod · 0.95

writeMethod · 0.80

no test coverage detected