MCPcopy Index your code
hub / github.com/Standard-Intelligence/hertz-dev / process_audio

Method process_audio

inference_server.py:95–121  ·  view source on GitHub ↗
(self, audio_data)

Source from the content-addressed store, hash-verified

93 print_colored(f"Initialized prompt buffer with {len(self.prompt_buffer)} chunks", "grey")
94
95 async def process_audio(self, audio_data):
96 if self.chunks_until_live > 0:
97 print_colored(f"Serving from prompt buffer, {self.chunks_until_live} chunks left", "grey")
98 chunk = self.prompt_buffer[int(self.replay_seconds * 8) - self.chunks_until_live]
99 self.chunks_until_live -= 1
100
101 if self.chunks_until_live == 0:
102 print_colored("Switching to live processing mode", "green")
103
104 time.sleep(0.05)
105 return chunk
106
107 audio_tensor = T.from_numpy(audio_data).to(device)
108 audio_tensor = audio_tensor.reshape(1, 1, -1)
109 audio_tensor = T.cat([audio_tensor, self.next_model_audio], dim=1)
110
111 with T.autocast(device_type=device, dtype=T.bfloat16), T.inference_mode():
112 curr_model_audio = self.model.next_audio_from_audio(
113 audio_tensor,
114 temps=TEMPS
115 )
116 print(f"Recorded audio shape {self.recorded_audio.shape}, audio tensor shape {audio_tensor.shape}")
117 self.recorded_audio = T.cat([self.recorded_audio.cpu(), audio_tensor.squeeze(0).cpu()], dim=-1)
118
119 self.next_model_audio = curr_model_audio
120
121 return curr_model_audio.float().cpu().numpy()
122
123 def cleanup(self):
124 print_colored("Cleaning up audio processor...", "blue")

Callers 1

websocket_endpointFunction · 0.45

Calls 2

print_coloredFunction · 0.90
next_audio_from_audioMethod · 0.80

Tested by

no test coverage detected