(config *config.ModelConfig, input *schema.OpenAIRequest)
| 286 | } |
| 287 | |
| 288 | func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.OpenAIRequest) error { |
| 289 | if input.Echo { |
| 290 | config.Echo = input.Echo |
| 291 | } |
| 292 | if input.TopK != nil { |
| 293 | config.TopK = input.TopK |
| 294 | } |
| 295 | if input.TopP != nil { |
| 296 | config.TopP = input.TopP |
| 297 | } |
| 298 | if input.MinP != nil { |
| 299 | config.MinP = input.MinP |
| 300 | } |
| 301 | |
| 302 | if input.Backend != "" { |
| 303 | config.Backend = input.Backend |
| 304 | } |
| 305 | |
| 306 | if input.ClipSkip != 0 { |
| 307 | config.Diffusers.ClipSkip = input.ClipSkip |
| 308 | } |
| 309 | |
| 310 | if input.NegativePromptScale != 0 { |
| 311 | config.NegativePromptScale = input.NegativePromptScale |
| 312 | } |
| 313 | |
| 314 | if input.NegativePrompt != "" { |
| 315 | config.NegativePrompt = input.NegativePrompt |
| 316 | } |
| 317 | |
| 318 | if input.RopeFreqBase != 0 { |
| 319 | config.RopeFreqBase = input.RopeFreqBase |
| 320 | } |
| 321 | |
| 322 | if input.RopeFreqScale != 0 { |
| 323 | config.RopeFreqScale = input.RopeFreqScale |
| 324 | } |
| 325 | |
| 326 | if input.Grammar != "" { |
| 327 | config.Grammar = input.Grammar |
| 328 | } |
| 329 | |
| 330 | if input.Temperature != nil { |
| 331 | config.Temperature = input.Temperature |
| 332 | } |
| 333 | |
| 334 | // Resolve the effective reasoning effort (request overrides the model config |
| 335 | // default), store it so gRPCPredictOpts forwards it to the backend as the |
| 336 | // reasoning_effort chat_template_kwarg (what gpt-oss / LFM2.5 read), and map |
| 337 | // it onto the enable_thinking toggle. "none" disables thinking (the #10072 |
| 338 | // use case); a level enables it unless the config already disabled reasoning |
| 339 | // (an operator's explicit disable wins over a request asking to think). |
| 340 | config.ApplyReasoningEffort(input.ReasoningEffort) |
| 341 | |
| 342 | // Forward the client's request metadata so chat-template kwargs set per-request |
| 343 | // (enable_thinking, reasoning_effort, preserve_thinking, ...) reach the backend |
| 344 | // and override the model's reasoning-config defaults. See gRPCPredictOpts. |
| 345 | if len(input.Metadata) > 0 { |
no test coverage detected