Processes the segments from Whisper and updates the transcript. Uses helper methods to account for differences between backends. Args: segments (list): List of segments returned by the transcriber. duration (float): Duration of the current au
(self, segments, duration)
| 373 | ] |
| 374 | |
| 375 | def update_segments(self, segments, duration): |
| 376 | """ |
| 377 | Processes the segments from Whisper and updates the transcript. |
| 378 | Uses helper methods to account for differences between backends. |
| 379 | |
| 380 | Args: |
| 381 | segments (list): List of segments returned by the transcriber. |
| 382 | duration (float): Duration of the current audio chunk. |
| 383 | |
| 384 | Returns: |
| 385 | dict or None: The last processed segment (if any). |
| 386 | """ |
| 387 | offset = None |
| 388 | self.current_out = '' |
| 389 | last_segment = None |
| 390 | |
| 391 | # Process complete segments only if there are more than one |
| 392 | # and if the last segment's no_speech_prob is below the threshold. |
| 393 | if len(segments) > 1 and self.get_segment_no_speech_prob(segments[-1]) <= self.no_speech_thresh: |
| 394 | for s in segments[:-1]: |
| 395 | text_ = s.text |
| 396 | self.text.append(text_) |
| 397 | with self.lock: |
| 398 | start = self.timestamp_offset + self.get_segment_start(s) |
| 399 | end = self.timestamp_offset + min(duration, self.get_segment_end(s)) |
| 400 | if start >= end: |
| 401 | continue |
| 402 | if self.get_segment_no_speech_prob(s) > self.no_speech_thresh: |
| 403 | continue |
| 404 | speaker = self._identify_speaker(s) |
| 405 | words = self._extract_words(s, self.timestamp_offset) |
| 406 | completed_segment = self.format_segment(start, end, text_, completed=True, speaker=speaker, words=words) |
| 407 | self.transcript.append(completed_segment) |
| 408 | |
| 409 | if self.translation_queue: |
| 410 | try: |
| 411 | self.translation_queue.put(completed_segment.copy(), timeout=0.1) |
| 412 | except queue.Full: |
| 413 | logging.warning("Translation queue is full, skipping segment") |
| 414 | offset = min(duration, self.get_segment_end(s)) |
| 415 | |
| 416 | # Process the last segment if its no_speech_prob is acceptable. |
| 417 | if self.get_segment_no_speech_prob(segments[-1]) <= self.no_speech_thresh: |
| 418 | self.current_out += segments[-1].text |
| 419 | words = self._extract_words(segments[-1], self.timestamp_offset) |
| 420 | with self.lock: |
| 421 | last_segment = self.format_segment( |
| 422 | self.timestamp_offset + self.get_segment_start(segments[-1]), |
| 423 | self.timestamp_offset + min(duration, self.get_segment_end(segments[-1])), |
| 424 | self.current_out, |
| 425 | completed=False, |
| 426 | words=words |
| 427 | ) |
| 428 | |
| 429 | # Handle repeated output logic. |
| 430 | if self.current_out.strip() == self.prev_out.strip() and self.current_out != '': |
| 431 | self.same_output_count += 1 |
| 432 |