( config: LlmConfig, messages: ChatMessage[], overrides?: RequestOverrides, )
| 384 | } |
| 385 | |
| 386 | function buildOpenAiCompatibleBody( |
| 387 | config: LlmConfig, |
| 388 | messages: ChatMessage[], |
| 389 | overrides?: RequestOverrides, |
| 390 | ): Record<string, unknown> { |
| 391 | assertBigModelImageSupport(config, messages) |
| 392 | const reasoning = effectiveReasoning(config, overrides) |
| 393 | const body: Record<string, unknown> = buildOpenAiBody(messages, stripWireAgnosticOverrides(overrides)) |
| 394 | adaptOpenAiStrictCompletionBody(config, body) |
| 395 | adaptKimiBody(config, body) |
| 396 | adaptXiaomiMimoBody(config, body, reasoning) |
| 397 | |
| 398 | if (isDeepSeekEndpoint(config)) { |
| 399 | // DeepSeek V4 thinking mode. `thinking.type=disabled` is the most |
| 400 | // important path for ingestion/rewrite tasks: it prevents the model |
| 401 | // from spending the whole response on `reasoning_content` with no |
| 402 | // final `content`. |
| 403 | if (supportsDeepSeekThinkingParam(config)) { |
| 404 | if (reasoning.mode === "off") { |
| 405 | body.thinking = { type: "disabled" } |
| 406 | } else if (reasoning.mode !== "auto") { |
| 407 | body.thinking = { type: "enabled" } |
| 408 | if (reasoning.mode === "high" || reasoning.mode === "max") { |
| 409 | body.reasoning_effort = reasoning.mode |
| 410 | } |
| 411 | } |
| 412 | } |
| 413 | return body |
| 414 | } |
| 415 | |
| 416 | if (config.provider === "ollama") { |
| 417 | // Ollama's OpenAI-compatible /v1/chat/completions maps reasoning |
| 418 | // control onto `reasoning_effort` ("high"|"medium"|"low"|"none"; |
| 419 | // "none" disables thinking). This is the only lever that stops a |
| 420 | // thinking-capable model — or a non-thinking one Ollama wraps with a |
| 421 | // thinking template — from spending its entire token budget on |
| 422 | // chain-of-thought and ending the stream with an empty `content`, |
| 423 | // which surfaces to the user as the "produced N chars of reasoning, |
| 424 | // but no actual response content" diagnostic. Until this, callers' |
| 425 | // `reasoning: { mode: "off" }` (every structured ingest call) was |
| 426 | // silently dropped on the Ollama path. Non-thinking models (gemma, |
| 427 | // llama) ignore the field harmlessly. "max" has no Ollama analogue, |
| 428 | // so it maps to the strongest supported level, "high". |
| 429 | // See docs.ollama.com/api/openai-compatibility. |
| 430 | if (reasoning.mode === "off") { |
| 431 | body.reasoning_effort = "none" |
| 432 | } else if ( |
| 433 | reasoning.mode === "low" || |
| 434 | reasoning.mode === "medium" || |
| 435 | reasoning.mode === "high" |
| 436 | ) { |
| 437 | body.reasoning_effort = reasoning.mode |
| 438 | } else if (reasoning.mode === "max") { |
| 439 | body.reasoning_effort = "high" |
| 440 | } |
| 441 | return body |
| 442 | } |
| 443 |
no test coverage detected