hub / github.com/lm-sys/FastChat / create_completion

Function create_completion

fastchat/serve/openai_api_server.py:544–619 · view source on GitHub ↗

(request: CompletionRequest)

Source from the content-addressed store, hash-verified

542
543	@app.post("/v1/completions", dependencies=[Depends(check_api_key)])
544	async def create_completion(request: CompletionRequest):
545	error_check_ret = await check_model(request)
546	if error_check_ret is not None:
547	return error_check_ret
548	error_check_ret = check_requests(request)
549	if error_check_ret is not None:
550	return error_check_ret
551
552	request.prompt = process_input(request.model, request.prompt)
553
554	worker_addr = await get_worker_address(request.model)
555	for text in request.prompt:
556	max_tokens, error_check_ret = await check_length(
557	request, text, request.max_tokens, worker_addr
558	)
559	if error_check_ret is not None:
560	return error_check_ret
561
562	if isinstance(max_tokens, int) and max_tokens < request.max_tokens:
563	request.max_tokens = max_tokens
564
565	if request.stream:
566	generator = generate_completion_stream_generator(
567	request, request.n, worker_addr
568	)
569	return StreamingResponse(generator, media_type="text/event-stream")
570	else:
571	text_completions = []
572	for text in request.prompt:
573	gen_params = await get_gen_params(
574	request.model,
575	worker_addr,
576	text,
577	temperature=request.temperature,
578	top_p=request.top_p,
579	top_k=request.top_k,
580	frequency_penalty=request.frequency_penalty,
581	presence_penalty=request.presence_penalty,
582	max_tokens=request.max_tokens,
583	logprobs=request.logprobs,
584	echo=request.echo,
585	stop=request.stop,
586	best_of=request.best_of,
587	use_beam_search=request.use_beam_search,
588	)
589	for i in range(request.n):
590	content = asyncio.create_task(
591	generate_completion(gen_params, worker_addr)
592	)
593	text_completions.append(content)
594
595	try:
596	all_tasks = await asyncio.gather(*text_completions)
597	except Exception as e:
598	return create_error_response(ErrorCode.INTERNAL_ERROR, str(e))
599
600	choices = []
601	usage = UsageInfo()

Callers

nothing calls this directly

Calls 14

UsageInfoClass · 0.90

CompletionResponseChoiceClass · 0.90

CompletionResponseClass · 0.90

check_modelFunction · 0.85

check_requestsFunction · 0.85

process_inputFunction · 0.85

check_lengthFunction · 0.85

generate_completion_stream_generatorFunction · 0.85

get_gen_paramsFunction · 0.85

generate_completionFunction · 0.85

create_error_responseFunction · 0.85

create_openai_logprobsFunction · 0.85

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…