EstimateModelMultiContext provides a unified VRAM estimation entry point that returns estimates at multiple context sizes. It tries (in order): 1. Direct file-based estimation (GGUF metadata or file size heuristic) 2. ParseSizeString from Size field 3. HuggingFace repo file listing 4. Zero result
(ctx context.Context, input ModelEstimateInput, contextSizes []uint32)
| 263 | // 3. HuggingFace repo file listing |
| 264 | // 4. Zero result |
| 265 | func EstimateModelMultiContext(ctx context.Context, input ModelEstimateInput, contextSizes []uint32) (MultiContextEstimate, error) { |
| 266 | if len(contextSizes) == 0 { |
| 267 | contextSizes = []uint32{8192} |
| 268 | } |
| 269 | |
| 270 | // 1. Try direct file estimation |
| 271 | if len(input.Files) > 0 { |
| 272 | result, err := EstimateMultiContext(ctx, input.Files, contextSizes, input.Options, DefaultCachedSizeResolver(), DefaultCachedGGUFReader()) |
| 273 | if err != nil { |
| 274 | xlog.Debug("VRAM estimation from files failed", "error", err) |
| 275 | } |
| 276 | if err == nil && result.SizeBytes > 0 { |
| 277 | return result, nil |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | // 2. Try size string |
| 282 | if input.Size != "" { |
| 283 | if sizeBytes, err := ParseSizeString(input.Size); err != nil { |
| 284 | xlog.Debug("VRAM estimation from size string failed", "error", err, "size", input.Size) |
| 285 | } else if sizeBytes > 0 { |
| 286 | return MultiContextEstimate{ |
| 287 | SizeBytes: sizeBytes, |
| 288 | SizeDisplay: FormatBytes(sizeBytes), |
| 289 | Estimates: buildEstimates(modelProfile{sizeBytes: sizeBytes}, contextSizes, EstimateOptions{}), |
| 290 | }, nil |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | // 3. Try HF repo |
| 295 | hfRepo := input.HFRepo |
| 296 | if repoID, ok := ExtractHFRepoID(hfRepo); ok { |
| 297 | hfRepo = repoID |
| 298 | } |
| 299 | if hfRepo != "" { |
| 300 | totalBytes, err := hfRepoWeightSize(ctx, hfRepo) |
| 301 | if err != nil { |
| 302 | xlog.Debug("VRAM estimation from HF repo failed", "error", err, "repo", hfRepo) |
| 303 | } |
| 304 | if err == nil && totalBytes > 0 { |
| 305 | return MultiContextEstimate{ |
| 306 | SizeBytes: totalBytes, |
| 307 | SizeDisplay: FormatBytes(totalBytes), |
| 308 | Estimates: buildEstimates(modelProfile{sizeBytes: totalBytes}, contextSizes, EstimateOptions{}), |
| 309 | }, nil |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | // 4. No estimation possible |
| 314 | return MultiContextEstimate{}, nil |
| 315 | } |
no test coverage detected