| 37 | return self.generate_with_usage(prompt, temperature).text |
| 38 | |
| 39 | def generate_with_usage(self, prompt: str, temperature: float = 0.0) -> LLMResult: |
| 40 | import litellm |
| 41 | |
| 42 | kwargs: dict = { |
| 43 | "model": self._model_id, |
| 44 | "messages": [{"role": "user", "content": prompt}], |
| 45 | "temperature": temperature, |
| 46 | "max_tokens": 2048, |
| 47 | } |
| 48 | |
| 49 | if self._model_id.startswith(_WATSONX_PREFIX): |
| 50 | kwargs["api_key"] = os.environ["WATSONX_APIKEY"] |
| 51 | kwargs["project_id"] = os.environ["WATSONX_PROJECT_ID"] |
| 52 | if url := os.environ.get("WATSONX_URL"): |
| 53 | kwargs["api_base"] = url |
| 54 | else: |
| 55 | kwargs["api_key"] = os.environ["LITELLM_API_KEY"] |
| 56 | kwargs["api_base"] = os.environ["LITELLM_BASE_URL"] |
| 57 | |
| 58 | response = litellm.completion(**kwargs) |
| 59 | usage = getattr(response, "usage", None) |
| 60 | return LLMResult( |
| 61 | text=response.choices[0].message.content, |
| 62 | input_tokens=int(getattr(usage, "prompt_tokens", 0) or 0), |
| 63 | output_tokens=int(getattr(usage, "completion_tokens", 0) or 0), |
| 64 | ) |