Generate completion for the request. The request should be a JSON object with the following fields: - prompt: the prompt to use for the generation. - stream: whether to stream the results or not. - other fields: the sampling parameters (See `SamplingParams` for deta
(self, request: Request)
| 52 | return Response(status_code=200) |
| 53 | |
| 54 | async def generate(self, request: Request) -> Response: |
| 55 | ''' Generate completion for the request. |
| 56 | |
| 57 | The request should be a JSON object with the following fields: |
| 58 | - prompt: the prompt to use for the generation. |
| 59 | - stream: whether to stream the results or not. |
| 60 | - other fields: the sampling parameters (See `SamplingParams` for details). |
| 61 | ''' |
| 62 | request_dict = await request.json() |
| 63 | |
| 64 | prompt = request_dict.pop("prompt", "") |
| 65 | streaming = request_dict.pop("streaming", False) |
| 66 | |
| 67 | sampling_params = SamplingParams(**request_dict) |
| 68 | |
| 69 | try: |
| 70 | promise = self.llm.generate_async(prompt, |
| 71 | streaming=streaming, |
| 72 | sampling_params=sampling_params) |
| 73 | |
| 74 | async def stream_results() -> AsyncGenerator[bytes, None]: |
| 75 | async for output in promise: |
| 76 | yield output.outputs[0].text_diff.encode("utf-8") |
| 77 | |
| 78 | if streaming: |
| 79 | return StreamingResponse(stream_results()) |
| 80 | |
| 81 | # Non-streaming case |
| 82 | await promise.aresult() |
| 83 | return JSONResponse({"text": promise.outputs[0].text}) |
| 84 | except RequestError as e: |
| 85 | return JSONResponse(content=str(e), |
| 86 | status_code=HTTPStatus.BAD_REQUEST) |
| 87 | except CppExecutorError: |
| 88 | # If internal executor error is raised, shutdown the server |
| 89 | signal.raise_signal(signal.SIGINT) |
| 90 | |
| 91 | async def __call__(self, host, port): |
| 92 | config = uvicorn.Config(self.app, |
no test coverage detected