hub / github.com/mudler/LocalAI / ModelInference

Function ModelInference

core/backend/llm.go:74–424 · view source on GitHub ↗

(ctx context.Context, s string, messages schema.Messages, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool, tools string, toolChoice string, logprobs *int, topLogprobs *int, logitBias map[string]float64, metadata map[string]string)

Source from the content-addressed store, hash-verified

72	var ModelInferenceFunc = ModelInference
73
74	func ModelInference(ctx context.Context, s string, messages schema.Messages, images, videos, audios []string, loader model.ModelLoader, c config.ModelConfig, cl config.ModelConfigLoader, o config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool, tools string, toolChoice string, logprobs int, topLogprobs int, logitBias map[string]float64, metadata map[string]string) (func() (LLMResponse, error), error) {
75	modelFile := c.Model
76
77	// Check if the modelFile exists, if it doesn't try to load it from the gallery
78	if o.AutoloadGalleries { // experimental
79	modelNames, err := galleryop.ListModels(cl, loader, nil, galleryop.SKIP_ALWAYS)
80	if err != nil {
81	return nil, err
82	}
83	modelName := c.Name
84	if modelName == "" {
85	modelName = c.Model
86	}
87	if !slices.Contains(modelNames, modelName) {
88	utils.ResetDownloadTimers()
89	// if we failed to load the model, we try to download it
90	err := gallery.InstallModelFromGallery(ctx, o.Galleries, o.BackendGalleries, o.SystemState, loader, modelName, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans, o.AutoloadBackendGalleries, o.RequireBackendIntegrity)
91	if err != nil {
92	xlog.Error("failed to install model from gallery", "error", err, "model", modelFile)
93	//return nil, err
94	}
95	}
96	}
97
98	// Make the rendered prompt's prefix chain available to the distributed router
99	// for prefix-cache-aware node selection. No-op in single-process mode. The
100	// model id MUST match the id ModelOptions feeds to model.WithModelID, so both
101	// use the shared config.ModelConfig.ModelID() helper (Name with a fallback to
102	// Model) or the chain salt and the tracking key would diverge.
103	//
104	// s is empty for UseTokenizerTemplate models (the backend tokenizes the
105	// structured messages itself), so fall back to a prefix-stable serialization
106	// of the messages - otherwise prefix routing would silently degrade to
107	// round-robin for the bulk of modern chat models.
108	chainSource := s
109	if chainSource == "" {
110	chainSource = messagesPrefixSource(messages)
111	}
112	ctx = distributedhdr.MaybeWithPrefixChain(ctx, c.ModelID(), chainSource)
113
114	opts := ModelOptions(*c, o, model.WithContext(ctx))
115	inferenceModel, err := loader.Load(opts...)
116	if err != nil {
117	recordModelLoadFailure(o, c.Name, c.Backend, err, map[string]any{"model_file": modelFile})
118	return nil, err
119	}
120
121	// Probe the backend for model-scoped metadata after LoadModel succeeds.
122	// Two signals are captured: thinking-mode detection (only meaningful when the
123	// tokenizer template path is active) and the multimodal media marker (needed
124	// by custom chat templates so markers line up with what mtmd expects).
125	// We probe whenever any of those slots is still empty.
126	shouldProbeThinking := needsThinkingProbe(c)
127	needsMarkerProbe := c.MediaMarker == ""
128	if shouldProbeThinking \|\| needsMarkerProbe {
129	modelOpts := grpcModelOpts(*c, o.SystemState.Model.ModelsPath)
130	config.DetectThinkingSupportFromBackend(ctx, c, inferenceModel, modelOpts)
131	// Update the config in the loader so it persists for future requests

Callers 1

PredictMethod · 0.92

Calls 15

ListModelsFunction · 0.92

ResetDownloadTimersFunction · 0.92

InstallModelFromGalleryFunction · 0.92

MaybeWithPrefixChainFunction · 0.92

WithContextFunction · 0.92

DetectThinkingSupportFromBackendFunction · 0.92

InitBackendTracingIfEnabledFunction · 0.92

TruncateToBytesFunction · 0.92

RecordBackendTraceFunction · 0.92

GenerateLLMSummaryFunction · 0.92

messagesPrefixSourceFunction · 0.85

ModelOptionsFunction · 0.85

Tested by

no test coverage detected