MCPcopy
hub / github.com/mudler/LocalAI / ModelInference

Function ModelInference

core/backend/llm.go:74–424  ·  view source on GitHub ↗
(ctx context.Context, s string, messages schema.Messages, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool, tools string, toolChoice string, logprobs *int, topLogprobs *int, logitBias map[string]float64, metadata map[string]string)

Source from the content-addressed store, hash-verified

72var ModelInferenceFunc = ModelInference
73
74func ModelInference(ctx context.Context, s string, messages schema.Messages, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool, tools string, toolChoice string, logprobs *int, topLogprobs *int, logitBias map[string]float64, metadata map[string]string) (func() (LLMResponse, error), error) {
75 modelFile := c.Model
76
77 // Check if the modelFile exists, if it doesn't try to load it from the gallery
78 if o.AutoloadGalleries { // experimental
79 modelNames, err := galleryop.ListModels(cl, loader, nil, galleryop.SKIP_ALWAYS)
80 if err != nil {
81 return nil, err
82 }
83 modelName := c.Name
84 if modelName == "" {
85 modelName = c.Model
86 }
87 if !slices.Contains(modelNames, modelName) {
88 utils.ResetDownloadTimers()
89 // if we failed to load the model, we try to download it
90 err := gallery.InstallModelFromGallery(ctx, o.Galleries, o.BackendGalleries, o.SystemState, loader, modelName, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans, o.AutoloadBackendGalleries, o.RequireBackendIntegrity)
91 if err != nil {
92 xlog.Error("failed to install model from gallery", "error", err, "model", modelFile)
93 //return nil, err
94 }
95 }
96 }
97
98 // Make the rendered prompt's prefix chain available to the distributed router
99 // for prefix-cache-aware node selection. No-op in single-process mode. The
100 // model id MUST match the id ModelOptions feeds to model.WithModelID, so both
101 // use the shared config.ModelConfig.ModelID() helper (Name with a fallback to
102 // Model) or the chain salt and the tracking key would diverge.
103 //
104 // s is empty for UseTokenizerTemplate models (the backend tokenizes the
105 // structured messages itself), so fall back to a prefix-stable serialization
106 // of the messages - otherwise prefix routing would silently degrade to
107 // round-robin for the bulk of modern chat models.
108 chainSource := s
109 if chainSource == "" {
110 chainSource = messagesPrefixSource(messages)
111 }
112 ctx = distributedhdr.MaybeWithPrefixChain(ctx, c.ModelID(), chainSource)
113
114 opts := ModelOptions(*c, o, model.WithContext(ctx))
115 inferenceModel, err := loader.Load(opts...)
116 if err != nil {
117 recordModelLoadFailure(o, c.Name, c.Backend, err, map[string]any{"model_file": modelFile})
118 return nil, err
119 }
120
121 // Probe the backend for model-scoped metadata after LoadModel succeeds.
122 // Two signals are captured: thinking-mode detection (only meaningful when the
123 // tokenizer template path is active) and the multimodal media marker (needed
124 // by custom chat templates so markers line up with what mtmd expects).
125 // We probe whenever any of those slots is still empty.
126 shouldProbeThinking := needsThinkingProbe(c)
127 needsMarkerProbe := c.MediaMarker == ""
128 if shouldProbeThinking || needsMarkerProbe {
129 modelOpts := grpcModelOpts(*c, o.SystemState.Model.ModelsPath)
130 config.DetectThinkingSupportFromBackend(ctx, c, inferenceModel, modelOpts)
131 // Update the config in the loader so it persists for future requests

Callers 1

PredictMethod · 0.92

Calls 15

ListModelsFunction · 0.92
ResetDownloadTimersFunction · 0.92
InstallModelFromGalleryFunction · 0.92
MaybeWithPrefixChainFunction · 0.92
WithContextFunction · 0.92
TruncateToBytesFunction · 0.92
RecordBackendTraceFunction · 0.92
GenerateLLMSummaryFunction · 0.92
messagesPrefixSourceFunction · 0.85
ModelOptionsFunction · 0.85

Tested by

no test coverage detected