estimateModelVRAM estimates the VRAM required for a model using the unified estimator.
(ctx context.Context, opts *pb.ModelOptions)
| 860 | |
| 861 | // estimateModelVRAM estimates the VRAM required for a model using the unified estimator. |
| 862 | func (r *SmartRouter) estimateModelVRAM(ctx context.Context, opts *pb.ModelOptions) uint64 { |
| 863 | estCtx, cancel := context.WithTimeout(ctx, 10*time.Second) |
| 864 | defer cancel() |
| 865 | |
| 866 | ctxSize := uint32(opts.ContextSize) |
| 867 | if ctxSize == 0 { |
| 868 | ctxSize = 8192 |
| 869 | } |
| 870 | |
| 871 | input := vram.ModelEstimateInput{ |
| 872 | Options: vram.EstimateOptions{ |
| 873 | GPULayers: int(opts.NGPULayers), |
| 874 | }, |
| 875 | } |
| 876 | |
| 877 | // Try model file as a local file for GGUF metadata estimation |
| 878 | if opts.ModelFile != "" { |
| 879 | if _, err := os.Stat(opts.ModelFile); err == nil { |
| 880 | input.Files = append(input.Files, vram.FileInput{URI: opts.ModelFile, Size: 0}) |
| 881 | } |
| 882 | } |
| 883 | |
| 884 | // Try HF repo from model name (e.g. "org/model") |
| 885 | if opts.Model != "" { |
| 886 | if repoID, ok := vram.ExtractHFRepoID(opts.Model); ok { |
| 887 | input.HFRepo = repoID |
| 888 | } |
| 889 | } |
| 890 | |
| 891 | if len(input.Files) == 0 && input.HFRepo == "" && input.Size == "" { |
| 892 | return 0 |
| 893 | } |
| 894 | |
| 895 | result, err := vram.EstimateModelMultiContext(estCtx, input, []uint32{ctxSize}) |
| 896 | if err != nil { |
| 897 | return 0 |
| 898 | } |
| 899 | return result.VRAMForContext(ctxSize) |
| 900 | } |
| 901 | |
| 902 | // installBackendOnNode sends a NATS backend.install request-reply to the node |
| 903 | // and returns the gRPC address. Concurrent identical calls (same nodeID + |
no test coverage detected