hub / github.com/chidiwilliams/buzz / preprocess

Method preprocess

buzz/transformers_whisper.py:60–180 · view source on GitHub ↗

(self, inputs, chunk_length_s=0, stride_length_s=None)

Source from the content-addressed store, hash-verified

58
59	# Copy of transformers `AutomaticSpeechRecognitionPipeline.preprocess` method with call to custom `chunk_iter`
60	def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):
61	if isinstance(inputs, str):
62	if inputs.startswith("http://") or inputs.startswith("https://"):
63	# We need to actually check for a real protocol, otherwise it's impossible to use a local file
64	# like http_huggingface_co.png
65	inputs = requests.get(inputs).content
66	else:
67	with open(inputs, "rb") as f:
68	inputs = f.read()
69
70	if isinstance(inputs, bytes):
71	inputs = ffmpeg_read(inputs, self.feature_extractor.sampling_rate)
72
73	stride = None
74	extra = {}
75	if isinstance(inputs, dict):
76	stride = inputs.pop("stride", None)
77	# Accepting `"array"` which is the key defined in `datasets` for
78	# better integration
79	if not ("sampling_rate" in inputs and ("raw" in inputs or "array" in inputs)):
80	raise ValueError(
81	"When passing a dictionary to AutomaticSpeechRecognitionPipeline, the dict needs to contain a "
82	'"raw" key containing the numpy array representing the audio and a "sampling_rate" key, '
83	"containing the sampling_rate associated with that array"
84	)
85
86	_inputs = inputs.pop("raw", None)
87	if _inputs is None:
88	# Remove path which will not be used from `datasets`.
89	inputs.pop("path", None)
90	_inputs = inputs.pop("array", None)
91	in_sampling_rate = inputs.pop("sampling_rate")
92	extra = inputs
93	inputs = _inputs
94	if in_sampling_rate != self.feature_extractor.sampling_rate:
95	if is_torchaudio_available():
96	from torchaudio import functional as F
97	else:
98	raise ImportError(
99	"torchaudio is required to resample audio samples in AutomaticSpeechRecognitionPipeline. "
100	"The torchaudio package can be installed through: `pip install torchaudio`."
101	)
102
103	inputs = F.resample(
104	torch.from_numpy(inputs), in_sampling_rate, self.feature_extractor.sampling_rate
105	).numpy()
106	ratio = self.feature_extractor.sampling_rate / in_sampling_rate
107	else:
108	ratio = 1
109	if stride is not None:
110	if stride[0] + stride[1] > inputs.shape[0]:
111	raise ValueError("Stride is too large for input")
112
113	# Stride needs to get the chunk length here, it's going to get
114	# swallowed by the `feature_extractor` later, and then batching
115	# can add extra data in the inputs, so we need to keep track
116	# of the original length in the stride so we can cut properly.
117	stride = (inputs.shape[0], int(round(stride[0] * ratio)), int(round(stride[1] * ratio)))

Callers

nothing calls this directly

Calls 2

chunk_iterMethod · 0.95

getMethod · 0.45

Tested by

no test coverage detected