(self, body: bytes)
| 520 | return text or "", tool_calls |
| 521 | |
| 522 | def handle_chat(self, body: bytes): |
| 523 | req = json.loads(body) |
| 524 | model_name, model_id, think_mode, err = self._resolve_model( |
| 525 | req.get("model", CONFIG["default_model"])) |
| 526 | if err: |
| 527 | self.send_json({"error": {"message": err}}, 400) |
| 528 | return |
| 529 | |
| 530 | tools = req.get("tools") |
| 531 | prompt = messages_to_prompt(req.get("messages", []), tools) |
| 532 | if not prompt.strip(): |
| 533 | self.send_json({"error": {"message": "empty prompt"}}, 400) |
| 534 | return |
| 535 | |
| 536 | stream = req.get("stream", False) |
| 537 | cid = f"chatcmpl-{uuid.uuid4().hex[:12]}" |
| 538 | |
| 539 | if stream and not tools: |
| 540 | # True streaming: forward chunks as they arrive |
| 541 | try: |
| 542 | self.send_response(200) |
| 543 | self.send_header("Content-Type", "text/event-stream") |
| 544 | self.send_header("Cache-Control", "no-cache") |
| 545 | self.send_header("Access-Control-Allow-Origin", "*") |
| 546 | self.end_headers() |
| 547 | for delta_text in gemini_stream_generate_iter(prompt, model_id, think_mode): |
| 548 | chunk = {"id": cid, "object": "chat.completion.chunk", "created": int(time.time()), |
| 549 | "model": model_name, "choices": [{"index": 0, "delta": {"content": delta_text}, "finish_reason": None}]} |
| 550 | self.wfile.write(f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode()) |
| 551 | self.wfile.flush() |
| 552 | # Final chunk |
| 553 | chunk = {"id": cid, "object": "chat.completion.chunk", "created": int(time.time()), |
| 554 | "model": model_name, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]} |
| 555 | self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode()) |
| 556 | self.wfile.write(b"data: [DONE]\n\n") |
| 557 | self.wfile.flush() |
| 558 | except (BrokenPipeError, ConnectionResetError): |
| 559 | pass |
| 560 | except Exception as e: |
| 561 | log(f"Stream error: {e}") |
| 562 | return |
| 563 | |
| 564 | # Non-streaming (or tool calling which needs full response) |
| 565 | try: |
| 566 | text, tool_calls = self._call_gemini(prompt, model_id, think_mode, tools) |
| 567 | except Exception as e: |
| 568 | self.send_json({"error": {"message": f"upstream error: {e}"}}, 502) |
| 569 | return |
| 570 | |
| 571 | msg = {"role": "assistant", "content": text or None} |
| 572 | if tool_calls: |
| 573 | msg["tool_calls"] = tool_calls |
| 574 | finish = "tool_calls" if tool_calls else "stop" |
| 575 | |
| 576 | if stream: |
| 577 | # Stream mode with tools: send as single chunk (need full parse for tool_calls) |
| 578 | self.send_response(200) |
| 579 | self.send_header("Content-Type", "text/event-stream") |
no test coverage detected