Method estimateModelVRAM

core/services/nodes/router.go:862–900 · view source on GitHub ↗

estimateModelVRAM estimates the VRAM required for a model using the unified estimator.

(ctx context.Context, opts *pb.ModelOptions)

Source from the content-addressed store, hash-verified

860
861	// estimateModelVRAM estimates the VRAM required for a model using the unified estimator.
862	func (r SmartRouter) estimateModelVRAM(ctx context.Context, opts pb.ModelOptions) uint64 {
863	estCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
864	defer cancel()
865
866	ctxSize := uint32(opts.ContextSize)
867	if ctxSize == 0 {
868	ctxSize = 8192
869	}
870
871	input := vram.ModelEstimateInput{
872	Options: vram.EstimateOptions{
873	GPULayers: int(opts.NGPULayers),
874	},
875	}
876
877	// Try model file as a local file for GGUF metadata estimation
878	if opts.ModelFile != "" {
879	if _, err := os.Stat(opts.ModelFile); err == nil {
880	input.Files = append(input.Files, vram.FileInput{URI: opts.ModelFile, Size: 0})
881	}
882	}
883
884	// Try HF repo from model name (e.g. "org/model")
885	if opts.Model != "" {
886	if repoID, ok := vram.ExtractHFRepoID(opts.Model); ok {
887	input.HFRepo = repoID
888	}
889	}
890
891	if len(input.Files) == 0 && input.HFRepo == "" && input.Size == "" {
892	return 0
893	}
894
895	result, err := vram.EstimateModelMultiContext(estCtx, input, []uint32{ctxSize})
896	if err != nil {
897	return 0
898	}
899	return result.VRAMForContext(ctxSize)
900	}
901
902	// installBackendOnNode sends a NATS backend.install request-reply to the node
903	// and returns the gRPC address. Concurrent identical calls (same nodeID +

scheduleNewModelMethod · 0.95

ExtractHFRepoIDFunction · 0.92

EstimateModelMultiContextFunction · 0.92

VRAMForContextMethod · 0.80

no test coverage detected