getNVIDIAGPUMemory queries NVIDIA GPUs using nvidia-smi
()
| 442 | |
| 443 | // getNVIDIAGPUMemory queries NVIDIA GPUs using nvidia-smi |
| 444 | func getNVIDIAGPUMemory() []GPUMemoryInfo { |
| 445 | // Check if nvidia-smi is available |
| 446 | if _, err := exec.LookPath("nvidia-smi"); err != nil { |
| 447 | return nil |
| 448 | } |
| 449 | |
| 450 | cmd := exec.Command("nvidia-smi", |
| 451 | "--query-gpu=index,name,memory.total,memory.used,memory.free", |
| 452 | "--format=csv,noheader,nounits") |
| 453 | |
| 454 | var stdout, stderr bytes.Buffer |
| 455 | cmd.Stdout = &stdout |
| 456 | cmd.Stderr = &stderr |
| 457 | |
| 458 | if err := cmd.Run(); err != nil { |
| 459 | xlog.Debug("nvidia-smi failed", "error", err, "stderr", stderr.String()) |
| 460 | return nil |
| 461 | } |
| 462 | |
| 463 | var gpus []GPUMemoryInfo |
| 464 | lines := strings.Split(strings.TrimSpace(stdout.String()), "\n") |
| 465 | |
| 466 | for _, line := range lines { |
| 467 | if line == "" { |
| 468 | continue |
| 469 | } |
| 470 | |
| 471 | parts := strings.Split(line, ", ") |
| 472 | if len(parts) < 5 { |
| 473 | continue |
| 474 | } |
| 475 | |
| 476 | idx, _ := strconv.Atoi(strings.TrimSpace(parts[0])) |
| 477 | name := strings.TrimSpace(parts[1]) |
| 478 | totalStr := strings.TrimSpace(parts[2]) |
| 479 | usedStr := strings.TrimSpace(parts[3]) |
| 480 | freeStr := strings.TrimSpace(parts[4]) |
| 481 | |
| 482 | var totalBytes, usedBytes, freeBytes uint64 |
| 483 | var usagePercent float64 |
| 484 | |
| 485 | // Check if memory values are N/A (unified memory devices like GB10) |
| 486 | isNA := totalStr == "[N/A]" || usedStr == "[N/A]" || freeStr == "[N/A]" |
| 487 | |
| 488 | if isNA && isUnifiedMemoryDevice(name) { |
| 489 | // Unified memory device - fall back to system RAM |
| 490 | sysInfo, err := GetSystemRAMInfo() |
| 491 | if err != nil { |
| 492 | xlog.Debug("failed to get system RAM for unified memory device", "error", err, "device", name) |
| 493 | // Still add the GPU but with zero memory info |
| 494 | gpus = append(gpus, GPUMemoryInfo{ |
| 495 | Index: idx, |
| 496 | Name: name, |
| 497 | Vendor: VendorNVIDIA, |
| 498 | TotalVRAM: 0, |
| 499 | UsedVRAM: 0, |
| 500 | FreeVRAM: 0, |
| 501 | UsagePercent: 0, |
no test coverage detected