MCPcopy
hub / github.com/collabora/WhisperLive / update_segments

Method update_segments

whisper_live/backend/base.py:375–475  ·  view source on GitHub ↗

Processes the segments from Whisper and updates the transcript. Uses helper methods to account for differences between backends. Args: segments (list): List of segments returned by the transcriber. duration (float): Duration of the current au

(self, segments, duration)

Source from the content-addressed store, hash-verified

373 ]
374
375 def update_segments(self, segments, duration):
376 """
377 Processes the segments from Whisper and updates the transcript.
378 Uses helper methods to account for differences between backends.
379
380 Args:
381 segments (list): List of segments returned by the transcriber.
382 duration (float): Duration of the current audio chunk.
383
384 Returns:
385 dict or None: The last processed segment (if any).
386 """
387 offset = None
388 self.current_out = ''
389 last_segment = None
390
391 # Process complete segments only if there are more than one
392 # and if the last segment's no_speech_prob is below the threshold.
393 if len(segments) > 1 and self.get_segment_no_speech_prob(segments[-1]) <= self.no_speech_thresh:
394 for s in segments[:-1]:
395 text_ = s.text
396 self.text.append(text_)
397 with self.lock:
398 start = self.timestamp_offset + self.get_segment_start(s)
399 end = self.timestamp_offset + min(duration, self.get_segment_end(s))
400 if start >= end:
401 continue
402 if self.get_segment_no_speech_prob(s) > self.no_speech_thresh:
403 continue
404 speaker = self._identify_speaker(s)
405 words = self._extract_words(s, self.timestamp_offset)
406 completed_segment = self.format_segment(start, end, text_, completed=True, speaker=speaker, words=words)
407 self.transcript.append(completed_segment)
408
409 if self.translation_queue:
410 try:
411 self.translation_queue.put(completed_segment.copy(), timeout=0.1)
412 except queue.Full:
413 logging.warning("Translation queue is full, skipping segment")
414 offset = min(duration, self.get_segment_end(s))
415
416 # Process the last segment if its no_speech_prob is acceptable.
417 if self.get_segment_no_speech_prob(segments[-1]) <= self.no_speech_thresh:
418 self.current_out += segments[-1].text
419 words = self._extract_words(segments[-1], self.timestamp_offset)
420 with self.lock:
421 last_segment = self.format_segment(
422 self.timestamp_offset + self.get_segment_start(segments[-1]),
423 self.timestamp_offset + min(duration, self.get_segment_end(segments[-1])),
424 self.current_out,
425 completed=False,
426 words=words
427 )
428
429 # Handle repeated output logic.
430 if self.current_out.strip() == self.prev_out.strip() and self.current_out != '':
431 self.same_output_count += 1
432

Calls 7

get_segment_startMethod · 0.95
get_segment_endMethod · 0.95
_identify_speakerMethod · 0.95
_extract_wordsMethod · 0.95
format_segmentMethod · 0.95
_trim_transcriptMethod · 0.95