(
text,
tokens,
accepted,
prompt_size,
prompt_tps,
n,
tic,
finish_reason=None,
)
| 411 | |
| 412 | |
| 413 | def _make_response( |
| 414 | text, |
| 415 | tokens, |
| 416 | accepted, |
| 417 | prompt_size, |
| 418 | prompt_tps, |
| 419 | n, |
| 420 | tic, |
| 421 | finish_reason=None, |
| 422 | ): |
| 423 | return GenerationResponse( |
| 424 | text, tokens, accepted, prompt_size, prompt_tps, |
| 425 | n, n / (time.perf_counter() - tic), mx.get_peak_memory() / 1e9, finish_reason, |
| 426 | ) |
| 427 | |
| 428 | |
| 429 | def stream_generate( |
no test coverage detected