(c config.ModelConfig, modelPath string)
| 398 | } |
| 399 | |
| 400 | func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions { |
| 401 | promptCachePath := "" |
| 402 | if c.PromptCachePath != "" { |
| 403 | p := filepath.Join(modelPath, c.PromptCachePath) |
| 404 | err := os.MkdirAll(filepath.Dir(p), 0750) |
| 405 | if err == nil { |
| 406 | promptCachePath = p |
| 407 | } else { |
| 408 | xlog.Error("error creating prompt cache folder", "error", err, "promptCachePath", promptCachePath) |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | // TopK may be nil after SetDefaults for backends that don't use llama.cpp's |
| 413 | // top_k=40 default (issue #6632, e.g. mlx). proto3 int32 can't be unset, so |
| 414 | // send 0 — the value mlx actually wants (top-k disabled). |
| 415 | var topK int32 |
| 416 | if c.TopK != nil { |
| 417 | topK = int32(*c.TopK) |
| 418 | } |
| 419 | |
| 420 | pbOpts := &pb.PredictOptions{ |
| 421 | Temperature: float32(*c.Temperature), |
| 422 | TopP: float32(*c.TopP), |
| 423 | NDraft: c.NDraft, |
| 424 | TopK: topK, |
| 425 | MinP: float32(*c.MinP), |
| 426 | Tokens: int32(*c.Maxtokens), |
| 427 | Threads: int32(*c.Threads), |
| 428 | PromptCacheAll: *c.PromptCacheAll, |
| 429 | PromptCacheRO: c.PromptCacheRO, |
| 430 | PromptCachePath: promptCachePath, |
| 431 | F16KV: *c.F16, |
| 432 | DebugMode: *c.Debug, |
| 433 | Grammar: c.Grammar, |
| 434 | NegativePromptScale: c.NegativePromptScale, |
| 435 | RopeFreqBase: c.RopeFreqBase, |
| 436 | RopeFreqScale: c.RopeFreqScale, |
| 437 | NegativePrompt: c.NegativePrompt, |
| 438 | Mirostat: int32(*c.LLMConfig.Mirostat), |
| 439 | MirostatETA: float32(*c.LLMConfig.MirostatETA), |
| 440 | MirostatTAU: float32(*c.LLMConfig.MirostatTAU), |
| 441 | Debug: *c.Debug, |
| 442 | StopPrompts: c.StopWords, |
| 443 | Repeat: int32(c.RepeatLastN), |
| 444 | FrequencyPenalty: float32(c.FrequencyPenalty), |
| 445 | PresencePenalty: float32(c.PresencePenalty), |
| 446 | Penalty: float32(c.RepeatPenalty), |
| 447 | NKeep: int32(c.Keep), |
| 448 | Batch: int32(c.Batch), |
| 449 | IgnoreEOS: c.IgnoreEOS, |
| 450 | Seed: getSeed(c), |
| 451 | MLock: *c.MMlock, |
| 452 | MMap: *c.MMap, |
| 453 | MainGPU: c.MainGPU, |
| 454 | TensorSplit: c.TensorSplit, |
| 455 | TailFreeSamplingZ: float32(*c.TFZ), |
| 456 | TypicalP: float32(*c.TypicalP), |
| 457 | } |
no test coverage detected