MCPcopy
hub / github.com/mudler/LocalAI / gRPCPredictOpts

Function gRPCPredictOpts

core/backend/options.go:400–496  ·  view source on GitHub ↗
(c config.ModelConfig, modelPath string)

Source from the content-addressed store, hash-verified

398}
399
400func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions {
401 promptCachePath := ""
402 if c.PromptCachePath != "" {
403 p := filepath.Join(modelPath, c.PromptCachePath)
404 err := os.MkdirAll(filepath.Dir(p), 0750)
405 if err == nil {
406 promptCachePath = p
407 } else {
408 xlog.Error("error creating prompt cache folder", "error", err, "promptCachePath", promptCachePath)
409 }
410 }
411
412 // TopK may be nil after SetDefaults for backends that don't use llama.cpp's
413 // top_k=40 default (issue #6632, e.g. mlx). proto3 int32 can't be unset, so
414 // send 0 — the value mlx actually wants (top-k disabled).
415 var topK int32
416 if c.TopK != nil {
417 topK = int32(*c.TopK)
418 }
419
420 pbOpts := &pb.PredictOptions{
421 Temperature: float32(*c.Temperature),
422 TopP: float32(*c.TopP),
423 NDraft: c.NDraft,
424 TopK: topK,
425 MinP: float32(*c.MinP),
426 Tokens: int32(*c.Maxtokens),
427 Threads: int32(*c.Threads),
428 PromptCacheAll: *c.PromptCacheAll,
429 PromptCacheRO: c.PromptCacheRO,
430 PromptCachePath: promptCachePath,
431 F16KV: *c.F16,
432 DebugMode: *c.Debug,
433 Grammar: c.Grammar,
434 NegativePromptScale: c.NegativePromptScale,
435 RopeFreqBase: c.RopeFreqBase,
436 RopeFreqScale: c.RopeFreqScale,
437 NegativePrompt: c.NegativePrompt,
438 Mirostat: int32(*c.LLMConfig.Mirostat),
439 MirostatETA: float32(*c.LLMConfig.MirostatETA),
440 MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
441 Debug: *c.Debug,
442 StopPrompts: c.StopWords,
443 Repeat: int32(c.RepeatLastN),
444 FrequencyPenalty: float32(c.FrequencyPenalty),
445 PresencePenalty: float32(c.PresencePenalty),
446 Penalty: float32(c.RepeatPenalty),
447 NKeep: int32(c.Keep),
448 Batch: int32(c.Batch),
449 IgnoreEOS: c.IgnoreEOS,
450 Seed: getSeed(c),
451 MLock: *c.MMlock,
452 MMap: *c.MMap,
453 MainGPU: c.MainGPU,
454 TensorSplit: c.TensorSplit,
455 TailFreeSamplingZ: float32(*c.TFZ),
456 TypicalP: float32(*c.TypicalP),
457 }

Callers 5

FaceEmbedFunction · 0.85
ModelTokenizeFunction · 0.85
ModelInferenceFunction · 0.85
ModelEmbeddingFunction · 0.85

Calls 3

getSeedFunction · 0.85
ErrorMethod · 0.45

Tested by

no test coverage detected