Function generate_streaming_response

optillm/server.py:551–578 · view source on GitHub ↗

(final_response, model)

Source from the content-addressed store, hash-verified

549	return responses, total_tokens
550
551	def generate_streaming_response(final_response, model):
552	# Generate a unique response ID
553	response_id = f"chatcmpl-{int(time.time()*1000)}"
554	created = int(time.time())
555
556	# Yield the final response with OpenAI-compatible format
557	if isinstance(final_response, list):
558	for index, response in enumerate(final_response):
559	# First chunk includes role
560	yield "data: " + json.dumps({
561	"id": response_id,
562	"object": "chat.completion.chunk",
563	"created": created,
564	"model": model,
565	"choices": [{"delta": {"role": "assistant", "content": response}, "index": index, "finish_reason": "stop"}],
566	}) + "\n\n"
567	else:
568	# First chunk includes role
569	yield "data: " + json.dumps({
570	"id": response_id,
571	"object": "chat.completion.chunk",
572	"created": created,
573	"model": model,
574	"choices": [{"delta": {"role": "assistant", "content": final_response}, "index": 0, "finish_reason": "stop"}],
575	}) + "\n\n"
576
577	# Yield the final message to indicate the stream has ended
578	yield "data: [DONE]\n\n"
579
580	def extract_contents(response_obj):
581	contents = []

proxyFunction · 0.85

no outgoing calls

no test coverage detected