(self, value)
| 107 | return audio_values.cpu().float().numpy() |
| 108 | |
| 109 | def put(self, value): |
| 110 | batch_size = value.shape[0] // self.decoder.num_codebooks |
| 111 | if batch_size > 1: |
| 112 | raise ValueError("ParlerTTSStreamer only supports batch size 1") |
| 113 | |
| 114 | if self.token_cache is None: |
| 115 | self.token_cache = value |
| 116 | else: |
| 117 | self.token_cache = torch.concatenate([self.token_cache, value[:, None]], dim=-1) |
| 118 | |
| 119 | if self.token_cache.shape[-1] % self.play_steps == 0: |
| 120 | audio_values = self.apply_delay_pattern_mask(self.token_cache) |
| 121 | self.on_finalized_audio(audio_values[self.to_yield : -self.stride]) |
| 122 | self.to_yield += len(audio_values) - self.to_yield - self.stride |
| 123 | |
| 124 | def end(self): |
| 125 | """Flushes any remaining cache and appends the stop symbol.""" |
no test coverage detected