MCPcopy
hub / github.com/mudler/LocalAI / estimateModelVRAM

Method estimateModelVRAM

core/services/nodes/router.go:862–900  ·  view source on GitHub ↗

estimateModelVRAM estimates the VRAM required for a model using the unified estimator.

(ctx context.Context, opts *pb.ModelOptions)

Source from the content-addressed store, hash-verified

860
861// estimateModelVRAM estimates the VRAM required for a model using the unified estimator.
862func (r *SmartRouter) estimateModelVRAM(ctx context.Context, opts *pb.ModelOptions) uint64 {
863 estCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
864 defer cancel()
865
866 ctxSize := uint32(opts.ContextSize)
867 if ctxSize == 0 {
868 ctxSize = 8192
869 }
870
871 input := vram.ModelEstimateInput{
872 Options: vram.EstimateOptions{
873 GPULayers: int(opts.NGPULayers),
874 },
875 }
876
877 // Try model file as a local file for GGUF metadata estimation
878 if opts.ModelFile != "" {
879 if _, err := os.Stat(opts.ModelFile); err == nil {
880 input.Files = append(input.Files, vram.FileInput{URI: opts.ModelFile, Size: 0})
881 }
882 }
883
884 // Try HF repo from model name (e.g. "org/model")
885 if opts.Model != "" {
886 if repoID, ok := vram.ExtractHFRepoID(opts.Model); ok {
887 input.HFRepo = repoID
888 }
889 }
890
891 if len(input.Files) == 0 && input.HFRepo == "" && input.Size == "" {
892 return 0
893 }
894
895 result, err := vram.EstimateModelMultiContext(estCtx, input, []uint32{ctxSize})
896 if err != nil {
897 return 0
898 }
899 return result.VRAMForContext(ctxSize)
900}
901
902// installBackendOnNode sends a NATS backend.install request-reply to the node
903// and returns the gRPC address. Concurrent identical calls (same nodeID +

Callers 1

scheduleNewModelMethod · 0.95

Calls 3

ExtractHFRepoIDFunction · 0.92
VRAMForContextMethod · 0.80

Tested by

no test coverage detected