MCPcopy
hub / github.com/chidiwilliams/buzz / preprocess

Method preprocess

buzz/transformers_whisper.py:60–180  ·  view source on GitHub ↗
(self, inputs, chunk_length_s=0, stride_length_s=None)

Source from the content-addressed store, hash-verified

58
59 # Copy of transformers `AutomaticSpeechRecognitionPipeline.preprocess` method with call to custom `chunk_iter`
60 def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):
61 if isinstance(inputs, str):
62 if inputs.startswith("http://") or inputs.startswith("https://"):
63 # We need to actually check for a real protocol, otherwise it's impossible to use a local file
64 # like http_huggingface_co.png
65 inputs = requests.get(inputs).content
66 else:
67 with open(inputs, "rb") as f:
68 inputs = f.read()
69
70 if isinstance(inputs, bytes):
71 inputs = ffmpeg_read(inputs, self.feature_extractor.sampling_rate)
72
73 stride = None
74 extra = {}
75 if isinstance(inputs, dict):
76 stride = inputs.pop("stride", None)
77 # Accepting `"array"` which is the key defined in `datasets` for
78 # better integration
79 if not ("sampling_rate" in inputs and ("raw" in inputs or "array" in inputs)):
80 raise ValueError(
81 "When passing a dictionary to AutomaticSpeechRecognitionPipeline, the dict needs to contain a "
82 '"raw" key containing the numpy array representing the audio and a "sampling_rate" key, '
83 "containing the sampling_rate associated with that array"
84 )
85
86 _inputs = inputs.pop("raw", None)
87 if _inputs is None:
88 # Remove path which will not be used from `datasets`.
89 inputs.pop("path", None)
90 _inputs = inputs.pop("array", None)
91 in_sampling_rate = inputs.pop("sampling_rate")
92 extra = inputs
93 inputs = _inputs
94 if in_sampling_rate != self.feature_extractor.sampling_rate:
95 if is_torchaudio_available():
96 from torchaudio import functional as F
97 else:
98 raise ImportError(
99 "torchaudio is required to resample audio samples in AutomaticSpeechRecognitionPipeline. "
100 "The torchaudio package can be installed through: `pip install torchaudio`."
101 )
102
103 inputs = F.resample(
104 torch.from_numpy(inputs), in_sampling_rate, self.feature_extractor.sampling_rate
105 ).numpy()
106 ratio = self.feature_extractor.sampling_rate / in_sampling_rate
107 else:
108 ratio = 1
109 if stride is not None:
110 if stride[0] + stride[1] > inputs.shape[0]:
111 raise ValueError("Stride is too large for input")
112
113 # Stride needs to get the chunk length here, it's going to get
114 # swallowed by the `feature_extractor` later, and then batching
115 # can add extra data in the inputs, so we need to keep track
116 # of the original length in the stride so we can cut properly.
117 stride = (inputs.shape[0], int(round(stride[0] * ratio)), int(round(stride[1] * ratio)))

Callers

nothing calls this directly

Calls 2

chunk_iterMethod · 0.95
getMethod · 0.45

Tested by

no test coverage detected