MCPcopy
hub / github.com/nashsu/llm_wiki / buildOpenAiCompatibleBody

Function buildOpenAiCompatibleBody

src/lib/llm-providers.ts:386–455  ·  view source on GitHub ↗
(
  config: LlmConfig,
  messages: ChatMessage[],
  overrides?: RequestOverrides,
)

Source from the content-addressed store, hash-verified

384}
385
386function buildOpenAiCompatibleBody(
387 config: LlmConfig,
388 messages: ChatMessage[],
389 overrides?: RequestOverrides,
390): Record<string, unknown> {
391 assertBigModelImageSupport(config, messages)
392 const reasoning = effectiveReasoning(config, overrides)
393 const body: Record<string, unknown> = buildOpenAiBody(messages, stripWireAgnosticOverrides(overrides))
394 adaptOpenAiStrictCompletionBody(config, body)
395 adaptKimiBody(config, body)
396 adaptXiaomiMimoBody(config, body, reasoning)
397
398 if (isDeepSeekEndpoint(config)) {
399 // DeepSeek V4 thinking mode. `thinking.type=disabled` is the most
400 // important path for ingestion/rewrite tasks: it prevents the model
401 // from spending the whole response on `reasoning_content` with no
402 // final `content`.
403 if (supportsDeepSeekThinkingParam(config)) {
404 if (reasoning.mode === "off") {
405 body.thinking = { type: "disabled" }
406 } else if (reasoning.mode !== "auto") {
407 body.thinking = { type: "enabled" }
408 if (reasoning.mode === "high" || reasoning.mode === "max") {
409 body.reasoning_effort = reasoning.mode
410 }
411 }
412 }
413 return body
414 }
415
416 if (config.provider === "ollama") {
417 // Ollama's OpenAI-compatible /v1/chat/completions maps reasoning
418 // control onto `reasoning_effort` ("high"|"medium"|"low"|"none";
419 // "none" disables thinking). This is the only lever that stops a
420 // thinking-capable model — or a non-thinking one Ollama wraps with a
421 // thinking template — from spending its entire token budget on
422 // chain-of-thought and ending the stream with an empty `content`,
423 // which surfaces to the user as the "produced N chars of reasoning,
424 // but no actual response content" diagnostic. Until this, callers'
425 // `reasoning: { mode: "off" }` (every structured ingest call) was
426 // silently dropped on the Ollama path. Non-thinking models (gemma,
427 // llama) ignore the field harmlessly. "max" has no Ollama analogue,
428 // so it maps to the strongest supported level, "high".
429 // See docs.ollama.com/api/openai-compatibility.
430 if (reasoning.mode === "off") {
431 body.reasoning_effort = "none"
432 } else if (
433 reasoning.mode === "low" ||
434 reasoning.mode === "medium" ||
435 reasoning.mode === "high"
436 ) {
437 body.reasoning_effort = reasoning.mode
438 } else if (reasoning.mode === "max") {
439 body.reasoning_effort = "high"
440 }
441 return body
442 }
443

Callers 1

getProviderConfigFunction · 0.85

Calls 10

effectiveReasoningFunction · 0.85
buildOpenAiBodyFunction · 0.85
adaptKimiBodyFunction · 0.85
adaptXiaomiMimoBodyFunction · 0.85
isDeepSeekEndpointFunction · 0.85
isQwenThinkingModelFunction · 0.85

Tested by

no test coverage detected