| 34 | @openapi.body(GenerationTaskReq) |
| 35 | @validate(json=GenerationTaskReq) |
| 36 | async def generate(request: Request, body: GenerationTaskReq): |
| 37 | logger.info(f'{request.ip}:{request.port} - "{request.method} {request.path}" - {body}') |
| 38 | key = (body.prompt, body.max_tokens) |
| 39 | try: |
| 40 | if cache is None: |
| 41 | raise MissCacheError() |
| 42 | outputs = cache.get(key) |
| 43 | output = random.choice(outputs) |
| 44 | logger.info("Cache hit") |
| 45 | except MissCacheError: |
| 46 | inputs = tokenizer(body.prompt, truncation=True, max_length=512) |
| 47 | inputs["max_tokens"] = body.max_tokens |
| 48 | inputs["top_k"] = body.top_k |
| 49 | inputs["top_p"] = body.top_p |
| 50 | inputs["temperature"] = body.temperature |
| 51 | try: |
| 52 | uid = id(body) |
| 53 | engine.submit(uid, inputs) |
| 54 | output = await engine.wait(uid) |
| 55 | assert isinstance(output, Tensor) |
| 56 | output = tokenizer.decode(output, skip_special_tokens=True) |
| 57 | if cache is not None: |
| 58 | cache.add(key, output) |
| 59 | except QueueFullError as e: |
| 60 | return json({"detail": e.args[0]}, status=406) |
| 61 | |
| 62 | return json({"text": output}) |
| 63 | |
| 64 | |
| 65 | @app.after_server_stop |