(self)
| 271 | return ret[0] |
| 272 | |
| 273 | def step(self) -> List[str]: |
| 274 | batch = self.request_handler.schedule() |
| 275 | |
| 276 | input_token_ids, input_meta_data = self.prepare_input(batch) |
| 277 | # TODO: padding_id is used for generating attn_mask and will be removed if nopad version is supported. |
| 278 | next_tokens = asyncio.run(self.step_(input_token_ids, input_meta_data)) |
| 279 | |
| 280 | # update the request_handler |
| 281 | next_tokens = torch.tensor(next_tokens, dtype=torch.int) |
| 282 | self.request_handler.append_next_tokens(next_tokens) |
| 283 | finished_sequences = self.request_handler.update() |
| 284 | return finished_sequences |
| 285 | |
| 286 | def kill_workers(self): |
| 287 | """ |
nothing calls this directly
no test coverage detected