MCPcopy
hub / github.com/mudler/LocalAI / Load

Method Load

pkg/model/initializers.go:346–466  ·  view source on GitHub ↗
(opts ...Option)

Source from the content-addressed store, hash-verified

344}
345
346func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
347 o := NewOptions(opts...)
348
349 ml.mu.Lock()
350 distributed := ml.modelRouter != nil
351 ml.mu.Unlock()
352
353 // In distributed mode, SmartRouter must run per inference request so
354 // PickBestReplica (core/services/nodes/replicapicker.go) picks the
355 // least-loaded replica each time. Bypass the local cache and the local
356 // LRU / concurrency-group watchdog enforcement: both are scoped to the
357 // in-process Model store, which in distributed mode only holds stubs for
358 // remote replicas. SmartRouter handles cluster-wide eviction
359 // (evictLRUAndFreeNode) and concurrency-group anti-affinity
360 // (narrowByGroupAntiAffinity) at the scheduler layer.
361 //
362 // TODO(distributed-cache): see LoadModel for the rotating-replica-cache
363 // integration point that would let hot paths skip the per-request DB
364 // round-trip without giving up the shared PickBestReplica policy.
365 if distributed {
366 client, err := ml.backendLoader(opts...)
367 if err != nil {
368 return nil, err
369 }
370 if m := ml.CheckIsLoaded(o.modelID); m != nil && m.Process() == nil {
371 client = newConnectionEvictingClient(client, o.modelID, func() {
372 if err := ml.ShutdownModel(o.modelID); err != nil {
373 xlog.Warn("Failed to shut down remote model after connection error", "model", o.modelID, "error", err)
374 }
375 })
376 }
377 return client, nil
378 }
379
380 // Return earlier if we have a model already loaded
381 // (avoid looping through all the backends)
382 if m := ml.CheckIsLoaded(o.modelID); m != nil {
383 xlog.Debug("Model already loaded", "model", o.modelID)
384 // Update last used time for LRU tracking
385 ml.updateModelLastUsed(m)
386 client := m.GRPC(o.parallelRequests, ml.wd)
387 // Wrap remote models so connection errors during inference trigger eviction
388 if m.Process() == nil {
389 client = newConnectionEvictingClient(client, o.modelID, func() {
390 ml.ShutdownModel(o.modelID)
391 })
392 }
393 return client, nil
394 }
395
396 // Evict any loaded model that shares a concurrency group with the
397 // requested one before applying the global LRU cap — group eviction may
398 // already make room, and otherwise LRU might evict an unrelated model
399 // only for the group check to immediately evict another.
400 ml.enforceGroupExclusivity(o.modelID)
401
402 // Enforce LRU limit before loading a new model
403 ml.enforceLRULimit()

Callers

nothing calls this directly

Calls 15

backendLoaderMethod · 0.95
CheckIsLoadedMethod · 0.95
ShutdownModelMethod · 0.95
updateModelLastUsedMethod · 0.95
enforceLRULimitMethod · 0.95
NewOptionsFunction · 0.85
WithBackendStringFunction · 0.85
ProcessMethod · 0.80
GRPCMethod · 0.80

Tested by

no test coverage detected