hub / github.com/1rgs/claude-code-proxy / handle_streaming

Function handle_streaming

server.py:922–1211 · view source on GitHub ↗

Handle streaming responses from LiteLLM and convert to Anthropic format.

(response_generator, original_request: MessagesRequest)

Source from the content-addressed store, hash-verified

920
921
922	async def handle_streaming(response_generator, original_request: MessagesRequest):
923	"""Handle streaming responses from LiteLLM and convert to Anthropic format."""
924	try:
925	# Send message_start event
926	message_id = f"msg_{uuid.uuid4().hex[:24]}" # Format similar to Anthropic's IDs
927
928	message_data = {
929	"type": "message_start",
930	"message": {
931	"id": message_id,
932	"type": "message",
933	"role": "assistant",
934	"model": original_request.model,
935	"content": [],
936	"stop_reason": None,
937	"stop_sequence": None,
938	"usage": {
939	"input_tokens": 0,
940	"cache_creation_input_tokens": 0,
941	"cache_read_input_tokens": 0,
942	"output_tokens": 0,
943	},
944	},
945	}
946	yield f"event: message_start\ndata: {json.dumps(message_data)}\n\n"
947
948	# Content block index for the first text block
949	yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': 0, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
950
951	# Send a ping to keep the connection alive (Anthropic does this)
952	yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n"
953
954	tool_index = None
955	current_tool_call = None
956	tool_content = ""
957	accumulated_text = "" # Track accumulated text content
958	text_sent = False # Track if we've sent any text content
959	text_block_closed = False # Track if text block is closed
960	input_tokens = 0
961	output_tokens = 0
962	has_sent_stop_reason = False
963	last_tool_index = 0
964
965	# Process each chunk
966	async for chunk in response_generator:
967	try:
968	# Check if this is the end of the response with usage data
969	if hasattr(chunk, "usage") and chunk.usage is not None:
970	if hasattr(chunk.usage, "prompt_tokens"):
971	input_tokens = chunk.usage.prompt_tokens
972	if hasattr(chunk.usage, "completion_tokens"):
973	output_tokens = chunk.usage.completion_tokens
974
975	# Handle text content
976	if hasattr(chunk, "choices") and len(chunk.choices) > 0:
977	choice = chunk.choices[0]
978
979	# Get the delta from the choice

Callers 1

create_messageFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected