* Make a chat completion request (non-streaming, for tool use). * @param {object} ctx - Shared context { config, conversationHistory, memoryStore, skillManager, pluginLoader, currentTaskType, tokenTracker, sessionStore, getAllTools, _fullscreenRef }
(ctx)
| 15 | * @param {object} ctx - Shared context { config, conversationHistory, memoryStore, skillManager, pluginLoader, currentTaskType, tokenTracker, sessionStore, getAllTools, _fullscreenRef } |
| 16 | */ |
| 17 | async function chatCompletion(ctx) { |
| 18 | const { config, conversationHistory, tokenTracker, sessionStore } = ctx; |
| 19 | const target = config.activeModelTarget || getModelTarget(config, 'default'); |
| 20 | const requestConfig = withModelTarget(config, target); |
| 21 | const baseUrl = target.baseUrl; |
| 22 | |
| 23 | const systemMsg = { |
| 24 | role: 'system', |
| 25 | content: buildSystemPrompt(ctx), |
| 26 | }; |
| 27 | |
| 28 | try { |
| 29 | const { extractImages, formatImagesForAPI, modelSupportsVision } = require('../src/session/images'); |
| 30 | const processedMessages = conversationHistory.map(msg => { |
| 31 | if (msg.role !== 'user' || typeof msg.content !== 'string') return msg; |
| 32 | const images = extractImages(msg.content, process.cwd()); |
| 33 | if (images.length === 0 || !modelSupportsVision(target.model)) return msg; |
| 34 | return { ...msg, content: [{ type: 'text', text: msg.content }, ...formatImagesForAPI(images)] }; |
| 35 | }); |
| 36 | |
| 37 | const _tools = ctx.getAllTools(config); |
| 38 | // Collapse any mid-conversation system messages into a single leading one |
| 39 | // so strict chat templates (Qwen3/Qwen3.5 under llama.cpp --jinja) don't |
| 40 | // 400 with "System message must be at the beginning." See issue #62. |
| 41 | const { consolidateSystemMessages } = require('../src/session/message_normalizer'); |
| 42 | const body = { |
| 43 | model: target.model, |
| 44 | messages: consolidateSystemMessages([systemMsg, ...processedMessages]), |
| 45 | temperature: 0.1, |
| 46 | max_tokens: 4096, |
| 47 | }; |
| 48 | // Only include tools when there are tools to send — some endpoints (OpenWebUI) |
| 49 | // error on an empty tools array rather than treating it as "no tools". |
| 50 | if (_tools && _tools.length > 0) { |
| 51 | body.tools = _tools; |
| 52 | } |
| 53 | |
| 54 | const headers = buildAuthHeaders(requestConfig); |
| 55 | |
| 56 | const response = await fetch(`${baseUrl}/chat/completions`, { |
| 57 | method: 'POST', |
| 58 | headers, |
| 59 | body: JSON.stringify(body), |
| 60 | }); |
| 61 | |
| 62 | if (!response.ok) { |
| 63 | const err = await response.text(); |
| 64 | if (response.status >= 400 && response.status < 500) { |
| 65 | await new Promise(r => setTimeout(r, 2000)); |
| 66 | try { |
| 67 | const retry = await fetch(`${baseUrl}/chat/completions`, { method: 'POST', headers, body: JSON.stringify(body) }); |
| 68 | if (retry.ok) return await retry.json(); |
| 69 | } catch {} |
| 70 | } |
| 71 | // Redact the error response — providers sometimes echo the request |
| 72 | // back, including the Authorization header value, when responding |
| 73 | // with 401/403. Never print raw provider errors verbatim. |
| 74 | console.log(` \x1b[31m✗ API error ${response.status}: ${redactString(err.slice(0, 200))}\x1b[0m`); |
nothing calls this directly
no test coverage detected