MCPcopy
hub / github.com/SesameAILabs/csm / _tokenize_segment

Method _tokenize_segment

generator.py:98–106  ·  view source on GitHub ↗

Returns: (seq_len, 33), (seq_len, 33)

(self, segment: Segment)

Source from the content-addressed store, hash-verified

96 return torch.cat(frame_tokens, dim=0), torch.cat(frame_masks, dim=0)
97
98 def _tokenize_segment(self, segment: Segment) -> Tuple[torch.Tensor, torch.Tensor]:
99 """
100 Returns:
101 (seq_len, 33), (seq_len, 33)
102 """
103 text_tokens, text_masks = self._tokenize_text_segment(segment.text, segment.speaker)
104 audio_tokens, audio_masks = self._tokenize_audio(segment.audio)
105
106 return torch.cat([text_tokens, audio_tokens], dim=0), torch.cat([text_masks, audio_masks], dim=0)
107
108 @torch.inference_mode()
109 def generate(

Callers 1

generateMethod · 0.95

Calls 2

_tokenize_audioMethod · 0.95

Tested by

no test coverage detected