CalculateBPW calculates the best BPW for a given memory and context constraint
(modelID string, memory float64, context int, kvCacheQuant KVCacheQuantisation, quantType string, ollamaModelInfo *OllamaModelInfo)
| 629 | |
| 630 | // CalculateBPW calculates the best BPW for a given memory and context constraint |
| 631 | func CalculateBPW(modelID string, memory float64, context int, kvCacheQuant KVCacheQuantisation, quantType string, ollamaModelInfo *OllamaModelInfo) (interface{}, error) { |
| 632 | logging.DebugLogger.Println("Calculating BPW...") |
| 633 | |
| 634 | switch quantType { |
| 635 | case "exl2": |
| 636 | for _, bpw := range EXL2Options { |
| 637 | vram, err := CalculateVRAM(modelID, bpw, context, kvCacheQuant, ollamaModelInfo) |
| 638 | if err != nil { |
| 639 | return nil, err |
| 640 | } |
| 641 | if vram < memory { |
| 642 | return bpw, nil |
| 643 | } |
| 644 | } |
| 645 | case "gguf": |
| 646 | for name, bpw := range GGUFMapping { |
| 647 | vram, err := CalculateVRAM(modelID, bpw, context, kvCacheQuant, ollamaModelInfo) |
| 648 | if err != nil { |
| 649 | return nil, err |
| 650 | } |
| 651 | if vram < memory { |
| 652 | return name, nil |
| 653 | } |
| 654 | } |
| 655 | default: |
| 656 | return nil, fmt.Errorf("invalid quantisation type: %s", quantType) |
| 657 | } |
| 658 | |
| 659 | return nil, fmt.Errorf("no suitable BPW found for the given memory constraint") |
| 660 | } |
| 661 | |
| 662 | // parseBPWOrQuant takes a string and returns a float64 BPW value |
| 663 | func ParseBPWOrQuant(input string) (float64, error) { |
nothing calls this directly
no test coverage detected