MCPcopy
hub / github.com/mli/autocut / transcribe

Method transcribe

autocut/whisper_model.py:179–268  ·  view source on GitHub ↗
(
        self,
        input: srt,
        audio: np.ndarray,
        speech_array_indices: List[SPEECH_ARRAY_INDEX],
        lang: LANG,
        prompt: str,
    )

Source from the content-addressed store, hash-verified

177 self.whisper_model = partial(openai.Audio.transcribe, model=model_name)
178
179 def transcribe(
180 self,
181 input: srt,
182 audio: np.ndarray,
183 speech_array_indices: List[SPEECH_ARRAY_INDEX],
184 lang: LANG,
185 prompt: str,
186 ) -> List[srt.Subtitle]:
187 res = []
188 name, _ = os.path.splitext(input)
189 raw_audio = AudioSegment.from_file(input)
190 ms_bytes = len(raw_audio[:1].raw_data)
191 audios: List[
192 TypedDict(
193 "AudioInfo", {"input": str, "audio": AudioSegment, "start_ms": float}
194 )
195 ] = []
196
197 i = 0
198 for index in speech_array_indices:
199 start = int(index["start"]) / self.sample_rate * 1000
200 end = int(index["end"]) / self.sample_rate * 1000
201 audio_seg = raw_audio[start:end]
202 if len(audio_seg.raw_data) < self.split_audio_bytes:
203 temp_file = f"{name}_temp_{i}.wav"
204 audios.append(
205 {"input": temp_file, "audio": audio_seg, "start_ms": start}
206 )
207 else:
208 logging.info(
209 f"Long audio with a size({len(audio_seg.raw_data)} bytes) greater than 25M({25 * 2 ** 20} bytes) "
210 "will be segmented"
211 "due to Openai's API restrictions on files smaller than 25M"
212 )
213 split_num = len(audio_seg.raw_data) // self.split_audio_bytes + 1
214 for j in range(split_num):
215 temp_file = f"{name}_{i}_temp_{j}.wav"
216 split_audio = audio_seg[
217 j
218 * self.split_audio_bytes
219 // ms_bytes : (j + 1)
220 * self.split_audio_bytes
221 // ms_bytes
222 ]
223 audios.append(
224 {
225 "input": temp_file,
226 "audio": split_audio,
227 "start_ms": start + j * self.split_audio_bytes // ms_bytes,
228 }
229 )
230 i += 1
231
232 if len(audios) > 1:
233 from multiprocessing import Pool
234
235 pbar = tqdm(total=len(audios))
236

Callers 5

_transcribeMethod · 0.45
_transcribeMethod · 0.45
_transcribeMethod · 0.45
transcribeMethod · 0.45
transcribeMethod · 0.45

Calls 1

_transcribeMethod · 0.95

Tested by

no test coverage detected