Generate model response using batch inference. This method queues the request for batch processing and waits for the result. The actual inference is performed asynchronously in a background thread. Args: input: List of chat messages forming the conversa
(
self,
input: List[EvalChatMessage],
tools: List[ToolInfo],
tool_choice: ToolChoice,
config: GenerateConfig,
)
| 102 | self.engine = TransformersEngine(self.model, template=self.template, max_batch_size=self.max_batch_size) |
| 103 | |
| 104 | def generate( |
| 105 | self, |
| 106 | input: List[EvalChatMessage], |
| 107 | tools: List[ToolInfo], |
| 108 | tool_choice: ToolChoice, |
| 109 | config: GenerateConfig, |
| 110 | ) -> ModelOutput: |
| 111 | """ |
| 112 | Generate model response using batch inference. |
| 113 | |
| 114 | This method queues the request for batch processing and waits for the result. |
| 115 | The actual inference is performed asynchronously in a background thread. |
| 116 | |
| 117 | Args: |
| 118 | input: List of chat messages forming the conversation |
| 119 | tools: Available tools for function calling (if supported) |
| 120 | tool_choice: Tool selection strategy |
| 121 | config: Generation configuration |
| 122 | |
| 123 | Returns: |
| 124 | ModelOutput containing the generated response |
| 125 | """ |
| 126 | # Ensure the background batch processing thread is running |
| 127 | global batch_thread |
| 128 | if batch_thread is None: |
| 129 | batch_thread = Thread(target=_process_batches, daemon=True) |
| 130 | batch_thread.start() |
| 131 | |
| 132 | # Convert EvalScope format to ms-swift format |
| 133 | ms_input = convert_request(input, tools) |
| 134 | ms_config = convert_config(config) |
| 135 | |
| 136 | # Package the request for batch processing |
| 137 | batch_input = BatchInferInput( |
| 138 | ms_input=ms_input, ms_config=ms_config, batch_size=config.batch_size, engine=self.engine) |
| 139 | |
| 140 | # Create a future to receive the result asynchronously |
| 141 | future = Future[ModelOutput]() |
| 142 | |
| 143 | # Queue the request for batch processing |
| 144 | batch_queue.put(_QueueItem(input=batch_input, future=future)) |
| 145 | |
| 146 | # Block until the result is available |
| 147 | return future.result() |
| 148 | |
| 149 | |
| 150 | def _process_batches() -> None: |
nothing calls this directly
no test coverage detected