MCPcopy
hub / github.com/1rgs/claude-code-proxy / handle_streaming

Function handle_streaming

server.py:922–1211  ·  view source on GitHub ↗

Handle streaming responses from LiteLLM and convert to Anthropic format.

(response_generator, original_request: MessagesRequest)

Source from the content-addressed store, hash-verified

920
921
922async def handle_streaming(response_generator, original_request: MessagesRequest):
923 """Handle streaming responses from LiteLLM and convert to Anthropic format."""
924 try:
925 # Send message_start event
926 message_id = f"msg_{uuid.uuid4().hex[:24]}" # Format similar to Anthropic's IDs
927
928 message_data = {
929 "type": "message_start",
930 "message": {
931 "id": message_id,
932 "type": "message",
933 "role": "assistant",
934 "model": original_request.model,
935 "content": [],
936 "stop_reason": None,
937 "stop_sequence": None,
938 "usage": {
939 "input_tokens": 0,
940 "cache_creation_input_tokens": 0,
941 "cache_read_input_tokens": 0,
942 "output_tokens": 0,
943 },
944 },
945 }
946 yield f"event: message_start\ndata: {json.dumps(message_data)}\n\n"
947
948 # Content block index for the first text block
949 yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': 0, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
950
951 # Send a ping to keep the connection alive (Anthropic does this)
952 yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n"
953
954 tool_index = None
955 current_tool_call = None
956 tool_content = ""
957 accumulated_text = "" # Track accumulated text content
958 text_sent = False # Track if we've sent any text content
959 text_block_closed = False # Track if text block is closed
960 input_tokens = 0
961 output_tokens = 0
962 has_sent_stop_reason = False
963 last_tool_index = 0
964
965 # Process each chunk
966 async for chunk in response_generator:
967 try:
968 # Check if this is the end of the response with usage data
969 if hasattr(chunk, "usage") and chunk.usage is not None:
970 if hasattr(chunk.usage, "prompt_tokens"):
971 input_tokens = chunk.usage.prompt_tokens
972 if hasattr(chunk.usage, "completion_tokens"):
973 output_tokens = chunk.usage.completion_tokens
974
975 # Handle text content
976 if hasattr(chunk, "choices") and len(chunk.choices) > 0:
977 choice = chunk.choices[0]
978
979 # Get the delta from the choice

Callers 1

create_messageFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected