Function generate

examples/tutorial/opt/inference/opt_server.py:36–62 · view source on GitHub ↗

(request: Request, body: GenerationTaskReq)

Source from the content-addressed store, hash-verified

34	@openapi.body(GenerationTaskReq)
35	@validate(json=GenerationTaskReq)
36	async def generate(request: Request, body: GenerationTaskReq):
37	logger.info(f'{request.ip}:{request.port} - "{request.method} {request.path}" - {body}')
38	key = (body.prompt, body.max_tokens)
39	try:
40	if cache is None:
41	raise MissCacheError()
42	outputs = cache.get(key)
43	output = random.choice(outputs)
44	logger.info("Cache hit")
45	except MissCacheError:
46	inputs = tokenizer(body.prompt, truncation=True, max_length=512)
47	inputs["max_tokens"] = body.max_tokens
48	inputs["top_k"] = body.top_k
49	inputs["top_p"] = body.top_p
50	inputs["temperature"] = body.temperature
51	try:
52	uid = id(body)
53	engine.submit(uid, inputs)
54	output = await engine.wait(uid)
55	assert isinstance(output, Tensor)
56	output = tokenizer.decode(output, skip_special_tokens=True)
57	if cache is not None:
58	cache.add(key, output)
59	except QueueFullError as e:
60	return json({"detail": e.args[0]}, status=406)
61
62	return json({"text": output})
63
64
65	@app.after_server_stop

nothing calls this directly

MissCacheErrorClass · 0.90

tokenizerFunction · 0.85

infoMethod · 0.45

getMethod · 0.45

waitMethod · 0.45

decodeMethod · 0.45

addMethod · 0.45

no test coverage detected

searching dependent graphs…