hub / github.com/mli/autocut / transcribe

Method transcribe

autocut/whisper_model.py:179–268 · view source on GitHub ↗

(
        self,
        input: srt,
        audio: np.ndarray,
        speech_array_indices: List[SPEECH_ARRAY_INDEX],
        lang: LANG,
        prompt: str,
    )

Source from the content-addressed store, hash-verified

177	self.whisper_model = partial(openai.Audio.transcribe, model=model_name)
178
179	def transcribe(
180	self,
181	input: srt,
182	audio: np.ndarray,
183	speech_array_indices: List[SPEECH_ARRAY_INDEX],
184	lang: LANG,
185	prompt: str,
186	) -> List[srt.Subtitle]:
187	res = []
188	name, _ = os.path.splitext(input)
189	raw_audio = AudioSegment.from_file(input)
190	ms_bytes = len(raw_audio[:1].raw_data)
191	audios: List[
192	TypedDict(
193	"AudioInfo", {"input": str, "audio": AudioSegment, "start_ms": float}
194	)
195	] = []
196
197	i = 0
198	for index in speech_array_indices:
199	start = int(index["start"]) / self.sample_rate * 1000
200	end = int(index["end"]) / self.sample_rate * 1000
201	audio_seg = raw_audio[start:end]
202	if len(audio_seg.raw_data) < self.split_audio_bytes:
203	temp_file = f"{name}_temp_{i}.wav"
204	audios.append(
205	{"input": temp_file, "audio": audio_seg, "start_ms": start}
206	)
207	else:
208	logging.info(
209	f"Long audio with a size({len(audio_seg.raw_data)} bytes) greater than 25M({25 * 2 ** 20} bytes) "
210	"will be segmented"
211	"due to Openai's API restrictions on files smaller than 25M"
212	)
213	split_num = len(audio_seg.raw_data) // self.split_audio_bytes + 1
214	for j in range(split_num):
215	temp_file = f"{name}_{i}_temp_{j}.wav"
216	split_audio = audio_seg[
217	j
218	* self.split_audio_bytes
219	// ms_bytes : (j + 1)
220	* self.split_audio_bytes
221	// ms_bytes
222	]
223	audios.append(
224	{
225	"input": temp_file,
226	"audio": split_audio,
227	"start_ms": start + j * self.split_audio_bytes // ms_bytes,
228	}
229	)
230	i += 1
231
232	if len(audios) > 1:
233	from multiprocessing import Pool
234
235	pbar = tqdm(total=len(audios))
236

Callers 5

_transcribeMethod · 0.45

transcribeMethod · 0.45

Calls 1

_transcribeMethod · 0.95

Tested by

no test coverage detected