流式调用LLM,逐步返回响应内容 Args: system_prompt: 系统提示词 user_prompt: 用户提示词 **kwargs: 额外参数(temperature, top_p等) Yields: 响应文本块(str)
(self, system_prompt: str, user_prompt: str, **kwargs)
| 87 | return "" |
| 88 | |
| 89 | def stream_invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> Generator[str, None, None]: |
| 90 | """ |
| 91 | 流式调用LLM,逐步返回响应内容 |
| 92 | |
| 93 | Args: |
| 94 | system_prompt: 系统提示词 |
| 95 | user_prompt: 用户提示词 |
| 96 | **kwargs: 额外参数(temperature, top_p等) |
| 97 | |
| 98 | Yields: |
| 99 | 响应文本块(str) |
| 100 | """ |
| 101 | current_time = datetime.now().strftime("%Y年%m月%d日%H时%M分") |
| 102 | time_prefix = f"今天的实际时间是{current_time}" |
| 103 | if user_prompt: |
| 104 | user_prompt = f"{time_prefix}\n{user_prompt}" |
| 105 | else: |
| 106 | user_prompt = time_prefix |
| 107 | messages = [ |
| 108 | {"role": "system", "content": system_prompt}, |
| 109 | {"role": "user", "content": user_prompt}, |
| 110 | ] |
| 111 | |
| 112 | allowed_keys = {"temperature", "top_p", "presence_penalty", "frequency_penalty"} |
| 113 | extra_params = {key: value for key, value in kwargs.items() if key in allowed_keys and value is not None} |
| 114 | # 强制使用流式 |
| 115 | extra_params["stream"] = True |
| 116 | |
| 117 | timeout = kwargs.pop("timeout", self.timeout) |
| 118 | |
| 119 | try: |
| 120 | stream = self.client.chat.completions.create( |
| 121 | model=self.model_name, |
| 122 | messages=messages, |
| 123 | timeout=timeout, |
| 124 | **extra_params, |
| 125 | ) |
| 126 | |
| 127 | for chunk in stream: |
| 128 | if chunk.choices and len(chunk.choices) > 0: |
| 129 | delta = chunk.choices[0].delta |
| 130 | if delta and delta.content: |
| 131 | yield delta.content |
| 132 | except Exception as e: |
| 133 | logger.error(f"流式请求失败: {str(e)}") |
| 134 | raise e |
| 135 | |
| 136 | @with_retry(LLM_RETRY_CONFIG) |
| 137 | def stream_invoke_to_string(self, system_prompt: str, user_prompt: str, **kwargs) -> str: |
no test coverage detected