| 32 | # Copy of transformers `AutomaticSpeechRecognitionPipeline.chunk_iter` method with custom progress output |
| 33 | @staticmethod |
| 34 | def chunk_iter(inputs, feature_extractor, chunk_len, stride_left, stride_right, dtype=None): |
| 35 | inputs_len = inputs.shape[0] |
| 36 | step = chunk_len - stride_left - stride_right |
| 37 | for chunk_start_idx in range(0, inputs_len, step): |
| 38 | |
| 39 | # Buzz will print progress to stderr |
| 40 | progress = int((chunk_start_idx / inputs_len) * 100) |
| 41 | sys.stderr.write(f"{progress}%\n") |
| 42 | |
| 43 | chunk_end_idx = chunk_start_idx + chunk_len |
| 44 | chunk = inputs[chunk_start_idx:chunk_end_idx] |
| 45 | processed = feature_extractor(chunk, sampling_rate=feature_extractor.sampling_rate, return_tensors="pt") |
| 46 | if dtype is not None: |
| 47 | processed = processed.to(dtype=dtype) |
| 48 | _stride_left = 0 if chunk_start_idx == 0 else stride_left |
| 49 | is_last = chunk_end_idx >= inputs_len |
| 50 | _stride_right = 0 if is_last else stride_right |
| 51 | |
| 52 | chunk_len = chunk.shape[0] |
| 53 | stride = (chunk_len, _stride_left, _stride_right) |
| 54 | if chunk.shape[0] > _stride_left: |
| 55 | yield {"is_last": is_last, "stride": stride, **processed} |
| 56 | if is_last: |
| 57 | break |
| 58 | |
| 59 | # Copy of transformers `AutomaticSpeechRecognitionPipeline.preprocess` method with call to custom `chunk_iter` |
| 60 | def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None): |