GetBPWValues calculates the BPW values based on the input
(bpw float64, kvCacheQuant KVCacheQuantisation)
| 457 | |
| 458 | // GetBPWValues calculates the BPW values based on the input |
| 459 | func GetBPWValues(bpw float64, kvCacheQuant KVCacheQuantisation) BPWValues { |
| 460 | logging.DebugLogger.Println("Calculating BPW values...") |
| 461 | var lmHeadBPW, kvCacheBPW float64 |
| 462 | |
| 463 | if bpw > 6.0 { |
| 464 | lmHeadBPW = 8.0 |
| 465 | } else { |
| 466 | lmHeadBPW = 6.0 |
| 467 | } |
| 468 | |
| 469 | switch kvCacheQuant { |
| 470 | case KVCacheFP16: |
| 471 | kvCacheBPW = 16 |
| 472 | case KVCacheQ8_0: |
| 473 | kvCacheBPW = 8 |
| 474 | case KVCacheQ4_0: |
| 475 | kvCacheBPW = 4 |
| 476 | default: |
| 477 | kvCacheBPW = 16 // Default to fp16 if not specified |
| 478 | } |
| 479 | |
| 480 | return BPWValues{ |
| 481 | BPW: bpw, |
| 482 | LMHeadBPW: lmHeadBPW, |
| 483 | KVCacheBPW: kvCacheBPW, |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | // CalculateVRAM calculates the VRAM usage for a given model and configuration |
| 488 | func CalculateVRAM(modelID string, bpw float64, context int, kvCacheQuant KVCacheQuantisation, ollamaModelInfo *OllamaModelInfo) (float64, error) { |