Low-level API to the executor. Return a "future" GenerationResult which can be waited.
(self, request: GenerationRequest)
| 605 | raise RequestError(str(e)) from e |
| 606 | |
| 607 | def submit(self, request: GenerationRequest) -> GenerationResult: |
| 608 | """ Low-level API to the executor. Return a "future" GenerationResult which can be waited. """ |
| 609 | self.start() |
| 610 | |
| 611 | if self.rank != 0: |
| 612 | raise RuntimeError( |
| 613 | "Only rank 0 can submit requests.\n" |
| 614 | "To fix this, ensure that the llm.generate(...) method is " |
| 615 | "guarded with the `if __name__ == '__main__':` block.") |
| 616 | |
| 617 | client_id = request.id if request.id is not None else self._get_next_client_id( |
| 618 | ) |
| 619 | if request.id is None: |
| 620 | request.set_id(client_id) |
| 621 | |
| 622 | logprob_params = self._get_logprob_params(request) |
| 623 | |
| 624 | result = GenerationResult( |
| 625 | request, |
| 626 | background_error_handler=self._handle_background_error, |
| 627 | executor=self, |
| 628 | disaggregated_params=request.disaggregated_params, |
| 629 | logprob_params=logprob_params) |
| 630 | |
| 631 | self._results[client_id] = result |
| 632 | |
| 633 | request_id = self._enqueue_request(request) |
| 634 | # request_id returned from backend is necessary for the abort_request method. |
| 635 | self._client_id_to_request_id[client_id] = request_id |
| 636 | |
| 637 | self._handle_background_error() |
| 638 | |
| 639 | return result |
| 640 | |
| 641 | def shutdown(self): |
| 642 | if self.doing_shutdown: |
no test coverage detected