Method Load

pkg/model/initializers.go:346–466 · view source on GitHub ↗

(opts ...Option)

Source from the content-addressed store, hash-verified

344	}
345
346	func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
347	o := NewOptions(opts...)
348
349	ml.mu.Lock()
350	distributed := ml.modelRouter != nil
351	ml.mu.Unlock()
352
353	// In distributed mode, SmartRouter must run per inference request so
354	// PickBestReplica (core/services/nodes/replicapicker.go) picks the
355	// least-loaded replica each time. Bypass the local cache and the local
356	// LRU / concurrency-group watchdog enforcement: both are scoped to the
357	// in-process Model store, which in distributed mode only holds stubs for
358	// remote replicas. SmartRouter handles cluster-wide eviction
359	// (evictLRUAndFreeNode) and concurrency-group anti-affinity
360	// (narrowByGroupAntiAffinity) at the scheduler layer.
361	//
362	// TODO(distributed-cache): see LoadModel for the rotating-replica-cache
363	// integration point that would let hot paths skip the per-request DB
364	// round-trip without giving up the shared PickBestReplica policy.
365	if distributed {
366	client, err := ml.backendLoader(opts...)
367	if err != nil {
368	return nil, err
369	}
370	if m := ml.CheckIsLoaded(o.modelID); m != nil && m.Process() == nil {
371	client = newConnectionEvictingClient(client, o.modelID, func() {
372	if err := ml.ShutdownModel(o.modelID); err != nil {
373	xlog.Warn("Failed to shut down remote model after connection error", "model", o.modelID, "error", err)
374	}
375	})
376	}
377	return client, nil
378	}
379
380	// Return earlier if we have a model already loaded
381	// (avoid looping through all the backends)
382	if m := ml.CheckIsLoaded(o.modelID); m != nil {
383	xlog.Debug("Model already loaded", "model", o.modelID)
384	// Update last used time for LRU tracking
385	ml.updateModelLastUsed(m)
386	client := m.GRPC(o.parallelRequests, ml.wd)
387	// Wrap remote models so connection errors during inference trigger eviction
388	if m.Process() == nil {
389	client = newConnectionEvictingClient(client, o.modelID, func() {
390	ml.ShutdownModel(o.modelID)
391	})
392	}
393	return client, nil
394	}
395
396	// Evict any loaded model that shares a concurrency group with the
397	// requested one before applying the global LRU cap — group eviction may
398	// already make room, and otherwise LRU might evict an unrelated model
399	// only for the group check to immediately evict another.
400	ml.enforceGroupExclusivity(o.modelID)
401
402	// Enforce LRU limit before loading a new model
403	ml.enforceLRULimit()

Callers

nothing calls this directly

Calls 15

backendLoaderMethod · 0.95

CheckIsLoadedMethod · 0.95

ShutdownModelMethod · 0.95

updateModelLastUsedMethod · 0.95

enforceGroupExclusivityMethod · 0.95

enforceLRULimitMethod · 0.95

GetAllExternalBackendsMethod · 0.95

NewOptionsFunction · 0.85

newConnectionEvictingClientFunction · 0.85

WithBackendStringFunction · 0.85

ProcessMethod · 0.80

GRPCMethod · 0.80

Tested by

no test coverage detected