| 549 | return responses, total_tokens |
| 550 | |
| 551 | def generate_streaming_response(final_response, model): |
| 552 | # Generate a unique response ID |
| 553 | response_id = f"chatcmpl-{int(time.time()*1000)}" |
| 554 | created = int(time.time()) |
| 555 | |
| 556 | # Yield the final response with OpenAI-compatible format |
| 557 | if isinstance(final_response, list): |
| 558 | for index, response in enumerate(final_response): |
| 559 | # First chunk includes role |
| 560 | yield "data: " + json.dumps({ |
| 561 | "id": response_id, |
| 562 | "object": "chat.completion.chunk", |
| 563 | "created": created, |
| 564 | "model": model, |
| 565 | "choices": [{"delta": {"role": "assistant", "content": response}, "index": index, "finish_reason": "stop"}], |
| 566 | }) + "\n\n" |
| 567 | else: |
| 568 | # First chunk includes role |
| 569 | yield "data: " + json.dumps({ |
| 570 | "id": response_id, |
| 571 | "object": "chat.completion.chunk", |
| 572 | "created": created, |
| 573 | "model": model, |
| 574 | "choices": [{"delta": {"role": "assistant", "content": final_response}, "index": 0, "finish_reason": "stop"}], |
| 575 | }) + "\n\n" |
| 576 | |
| 577 | # Yield the final message to indicate the stream has ended |
| 578 | yield "data: [DONE]\n\n" |
| 579 | |
| 580 | def extract_contents(response_obj): |
| 581 | contents = [] |