Client for interacting with OpenAI-compatible vision-language models. Args: config: Model configuration.
| 39 | |
| 40 | |
| 41 | class ModelClient: |
| 42 | """ |
| 43 | Client for interacting with OpenAI-compatible vision-language models. |
| 44 | |
| 45 | Args: |
| 46 | config: Model configuration. |
| 47 | """ |
| 48 | |
| 49 | def __init__(self, config: ModelConfig | None = None): |
| 50 | self.config = config or ModelConfig() |
| 51 | self.client = OpenAI(base_url=self.config.base_url, api_key=self.config.api_key) |
| 52 | |
| 53 | def request(self, messages: list[dict[str, Any]]) -> ModelResponse: |
| 54 | """ |
| 55 | Send a request to the model. |
| 56 | |
| 57 | Args: |
| 58 | messages: List of message dictionaries in OpenAI format. |
| 59 | |
| 60 | Returns: |
| 61 | ModelResponse containing thinking and action. |
| 62 | |
| 63 | Raises: |
| 64 | ValueError: If the response cannot be parsed. |
| 65 | """ |
| 66 | # Start timing |
| 67 | start_time = time.time() |
| 68 | time_to_first_token = None |
| 69 | time_to_thinking_end = None |
| 70 | |
| 71 | stream = self.client.chat.completions.create( |
| 72 | messages=messages, |
| 73 | model=self.config.model_name, |
| 74 | max_tokens=self.config.max_tokens, |
| 75 | temperature=self.config.temperature, |
| 76 | top_p=self.config.top_p, |
| 77 | frequency_penalty=self.config.frequency_penalty, |
| 78 | extra_body=self.config.extra_body, |
| 79 | stream=True, |
| 80 | ) |
| 81 | |
| 82 | raw_content = "" |
| 83 | buffer = "" # Buffer to hold content that might be part of a marker |
| 84 | action_markers = ["finish(message=", "do(action="] |
| 85 | in_action_phase = False # Track if we've entered the action phase |
| 86 | first_token_received = False |
| 87 | |
| 88 | for chunk in stream: |
| 89 | if len(chunk.choices) == 0: |
| 90 | continue |
| 91 | if chunk.choices[0].delta.content is not None: |
| 92 | content = chunk.choices[0].delta.content |
| 93 | raw_content += content |
| 94 | |
| 95 | # Record time to first token |
| 96 | if not first_token_received: |
| 97 | time_to_first_token = time.time() - start_time |
| 98 | first_token_received = True |