(request: CompletionRequest)
| 542 | |
| 543 | @app.post("/v1/completions", dependencies=[Depends(check_api_key)]) |
| 544 | async def create_completion(request: CompletionRequest): |
| 545 | error_check_ret = await check_model(request) |
| 546 | if error_check_ret is not None: |
| 547 | return error_check_ret |
| 548 | error_check_ret = check_requests(request) |
| 549 | if error_check_ret is not None: |
| 550 | return error_check_ret |
| 551 | |
| 552 | request.prompt = process_input(request.model, request.prompt) |
| 553 | |
| 554 | worker_addr = await get_worker_address(request.model) |
| 555 | for text in request.prompt: |
| 556 | max_tokens, error_check_ret = await check_length( |
| 557 | request, text, request.max_tokens, worker_addr |
| 558 | ) |
| 559 | if error_check_ret is not None: |
| 560 | return error_check_ret |
| 561 | |
| 562 | if isinstance(max_tokens, int) and max_tokens < request.max_tokens: |
| 563 | request.max_tokens = max_tokens |
| 564 | |
| 565 | if request.stream: |
| 566 | generator = generate_completion_stream_generator( |
| 567 | request, request.n, worker_addr |
| 568 | ) |
| 569 | return StreamingResponse(generator, media_type="text/event-stream") |
| 570 | else: |
| 571 | text_completions = [] |
| 572 | for text in request.prompt: |
| 573 | gen_params = await get_gen_params( |
| 574 | request.model, |
| 575 | worker_addr, |
| 576 | text, |
| 577 | temperature=request.temperature, |
| 578 | top_p=request.top_p, |
| 579 | top_k=request.top_k, |
| 580 | frequency_penalty=request.frequency_penalty, |
| 581 | presence_penalty=request.presence_penalty, |
| 582 | max_tokens=request.max_tokens, |
| 583 | logprobs=request.logprobs, |
| 584 | echo=request.echo, |
| 585 | stop=request.stop, |
| 586 | best_of=request.best_of, |
| 587 | use_beam_search=request.use_beam_search, |
| 588 | ) |
| 589 | for i in range(request.n): |
| 590 | content = asyncio.create_task( |
| 591 | generate_completion(gen_params, worker_addr) |
| 592 | ) |
| 593 | text_completions.append(content) |
| 594 | |
| 595 | try: |
| 596 | all_tasks = await asyncio.gather(*text_completions) |
| 597 | except Exception as e: |
| 598 | return create_error_response(ErrorCode.INTERNAL_ERROR, str(e)) |
| 599 | |
| 600 | choices = [] |
| 601 | usage = UsageInfo() |
nothing calls this directly
no test coverage detected
searching dependent graphs…