MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / generate

Method generate

examples/apps/fastapi_server.py:54–89  ·  view source on GitHub ↗

Generate completion for the request. The request should be a JSON object with the following fields: - prompt: the prompt to use for the generation. - stream: whether to stream the results or not. - other fields: the sampling parameters (See `SamplingParams` for deta

(self, request: Request)

Source from the content-addressed store, hash-verified

52 return Response(status_code=200)
53
54 async def generate(self, request: Request) -> Response:
55 ''' Generate completion for the request.
56
57 The request should be a JSON object with the following fields:
58 - prompt: the prompt to use for the generation.
59 - stream: whether to stream the results or not.
60 - other fields: the sampling parameters (See `SamplingParams` for details).
61 '''
62 request_dict = await request.json()
63
64 prompt = request_dict.pop("prompt", "")
65 streaming = request_dict.pop("streaming", False)
66
67 sampling_params = SamplingParams(**request_dict)
68
69 try:
70 promise = self.llm.generate_async(prompt,
71 streaming=streaming,
72 sampling_params=sampling_params)
73
74 async def stream_results() -> AsyncGenerator[bytes, None]:
75 async for output in promise:
76 yield output.outputs[0].text_diff.encode("utf-8")
77
78 if streaming:
79 return StreamingResponse(stream_results())
80
81 # Non-streaming case
82 await promise.aresult()
83 return JSONResponse({"text": promise.outputs[0].text})
84 except RequestError as e:
85 return JSONResponse(content=str(e),
86 status_code=HTTPStatus.BAD_REQUEST)
87 except CppExecutorError:
88 # If internal executor error is raised, shutdown the server
89 signal.raise_signal(signal.SIGINT)
90
91 async def __call__(self, host, port):
92 config = uvicorn.Config(self.app,

Callers 15

mainFunction · 0.45
mainFunction · 0.45
__call__Method · 0.45
eval_trt_llmFunction · 0.45
eval_hfFunction · 0.45
mainFunction · 0.45
mainFunction · 0.45
mainFunction · 0.45
mainFunction · 0.45
run_MTPFunction · 0.45
run_Eagle3Function · 0.45
run_ngramFunction · 0.45

Calls 4

SamplingParamsClass · 0.85
popMethod · 0.80
generate_asyncMethod · 0.45
aresultMethod · 0.45

Tested by

no test coverage detected