Handle streaming responses from LiteLLM and convert to Anthropic format.
(response_generator, original_request: MessagesRequest)
| 920 | |
| 921 | |
| 922 | async def handle_streaming(response_generator, original_request: MessagesRequest): |
| 923 | """Handle streaming responses from LiteLLM and convert to Anthropic format.""" |
| 924 | try: |
| 925 | # Send message_start event |
| 926 | message_id = f"msg_{uuid.uuid4().hex[:24]}" # Format similar to Anthropic's IDs |
| 927 | |
| 928 | message_data = { |
| 929 | "type": "message_start", |
| 930 | "message": { |
| 931 | "id": message_id, |
| 932 | "type": "message", |
| 933 | "role": "assistant", |
| 934 | "model": original_request.model, |
| 935 | "content": [], |
| 936 | "stop_reason": None, |
| 937 | "stop_sequence": None, |
| 938 | "usage": { |
| 939 | "input_tokens": 0, |
| 940 | "cache_creation_input_tokens": 0, |
| 941 | "cache_read_input_tokens": 0, |
| 942 | "output_tokens": 0, |
| 943 | }, |
| 944 | }, |
| 945 | } |
| 946 | yield f"event: message_start\ndata: {json.dumps(message_data)}\n\n" |
| 947 | |
| 948 | # Content block index for the first text block |
| 949 | yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': 0, 'content_block': {'type': 'text', 'text': ''}})}\n\n" |
| 950 | |
| 951 | # Send a ping to keep the connection alive (Anthropic does this) |
| 952 | yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n" |
| 953 | |
| 954 | tool_index = None |
| 955 | current_tool_call = None |
| 956 | tool_content = "" |
| 957 | accumulated_text = "" # Track accumulated text content |
| 958 | text_sent = False # Track if we've sent any text content |
| 959 | text_block_closed = False # Track if text block is closed |
| 960 | input_tokens = 0 |
| 961 | output_tokens = 0 |
| 962 | has_sent_stop_reason = False |
| 963 | last_tool_index = 0 |
| 964 | |
| 965 | # Process each chunk |
| 966 | async for chunk in response_generator: |
| 967 | try: |
| 968 | # Check if this is the end of the response with usage data |
| 969 | if hasattr(chunk, "usage") and chunk.usage is not None: |
| 970 | if hasattr(chunk.usage, "prompt_tokens"): |
| 971 | input_tokens = chunk.usage.prompt_tokens |
| 972 | if hasattr(chunk.usage, "completion_tokens"): |
| 973 | output_tokens = chunk.usage.completion_tokens |
| 974 | |
| 975 | # Handle text content |
| 976 | if hasattr(chunk, "choices") and len(chunk.choices) > 0: |
| 977 | choice = chunk.choices[0] |
| 978 | |
| 979 | # Get the delta from the choice |