hub / github.com/mudler/LocalAI / gRPCPredictOpts

Function gRPCPredictOpts

core/backend/options.go:400–496 · view source on GitHub ↗

(c config.ModelConfig, modelPath string)

Source from the content-addressed store, hash-verified

398	}
399
400	func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions {
401	promptCachePath := ""
402	if c.PromptCachePath != "" {
403	p := filepath.Join(modelPath, c.PromptCachePath)
404	err := os.MkdirAll(filepath.Dir(p), 0750)
405	if err == nil {
406	promptCachePath = p
407	} else {
408	xlog.Error("error creating prompt cache folder", "error", err, "promptCachePath", promptCachePath)
409	}
410	}
411
412	// TopK may be nil after SetDefaults for backends that don't use llama.cpp's
413	// top_k=40 default (issue #6632, e.g. mlx). proto3 int32 can't be unset, so
414	// send 0 — the value mlx actually wants (top-k disabled).
415	var topK int32
416	if c.TopK != nil {
417	topK = int32(*c.TopK)
418	}
419
420	pbOpts := &pb.PredictOptions{
421	Temperature: float32(*c.Temperature),
422	TopP: float32(*c.TopP),
423	NDraft: c.NDraft,
424	TopK: topK,
425	MinP: float32(*c.MinP),
426	Tokens: int32(*c.Maxtokens),
427	Threads: int32(*c.Threads),
428	PromptCacheAll: *c.PromptCacheAll,
429	PromptCacheRO: c.PromptCacheRO,
430	PromptCachePath: promptCachePath,
431	F16KV: *c.F16,
432	DebugMode: *c.Debug,
433	Grammar: c.Grammar,
434	NegativePromptScale: c.NegativePromptScale,
435	RopeFreqBase: c.RopeFreqBase,
436	RopeFreqScale: c.RopeFreqScale,
437	NegativePrompt: c.NegativePrompt,
438	Mirostat: int32(*c.LLMConfig.Mirostat),
439	MirostatETA: float32(*c.LLMConfig.MirostatETA),
440	MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
441	Debug: *c.Debug,
442	StopPrompts: c.StopWords,
443	Repeat: int32(c.RepeatLastN),
444	FrequencyPenalty: float32(c.FrequencyPenalty),
445	PresencePenalty: float32(c.PresencePenalty),
446	Penalty: float32(c.RepeatPenalty),
447	NKeep: int32(c.Keep),
448	Batch: int32(c.Batch),
449	IgnoreEOS: c.IgnoreEOS,
450	Seed: getSeed(c),
451	MLock: *c.MMlock,
452	MMap: *c.MMap,
453	MainGPU: c.MainGPU,
454	TensorSplit: c.TensorSplit,
455	TailFreeSamplingZ: float32(*c.TFZ),
456	TypicalP: float32(*c.TypicalP),
457	}

Callers 5

options_internal_test.goFile · 0.85

FaceEmbedFunction · 0.85

ModelTokenizeFunction · 0.85

ModelInferenceFunction · 0.85

ModelEmbeddingFunction · 0.85

Calls 3

getSeedFunction · 0.85

ResolveChatTemplateKwargsMethod · 0.80

ErrorMethod · 0.45

Tested by

no test coverage detected