getAMDGPUMemory queries AMD GPUs using rocm-smi
()
| 581 | |
| 582 | // getAMDGPUMemory queries AMD GPUs using rocm-smi |
| 583 | func getAMDGPUMemory() []GPUMemoryInfo { |
| 584 | // Check if rocm-smi is available |
| 585 | if _, err := exec.LookPath("rocm-smi"); err != nil { |
| 586 | return nil |
| 587 | } |
| 588 | |
| 589 | // Try CSV format first |
| 590 | cmd := exec.Command("rocm-smi", "--showmeminfo", "vram", "--csv") |
| 591 | |
| 592 | var stdout, stderr bytes.Buffer |
| 593 | cmd.Stdout = &stdout |
| 594 | cmd.Stderr = &stderr |
| 595 | |
| 596 | if err := cmd.Run(); err != nil { |
| 597 | xlog.Debug("rocm-smi failed", "error", err, "stderr", stderr.String()) |
| 598 | return nil |
| 599 | } |
| 600 | |
| 601 | var gpus []GPUMemoryInfo |
| 602 | lines := strings.Split(strings.TrimSpace(stdout.String()), "\n") |
| 603 | |
| 604 | // Skip header line |
| 605 | for i, line := range lines { |
| 606 | if i == 0 || line == "" { |
| 607 | continue |
| 608 | } |
| 609 | |
| 610 | parts := strings.Split(line, ",") |
| 611 | if len(parts) < 3 { |
| 612 | continue |
| 613 | } |
| 614 | |
| 615 | // Parse GPU index from first column (usually "GPU[0]" format) |
| 616 | idxStr := strings.TrimSpace(parts[0]) |
| 617 | idx := 0 |
| 618 | if strings.HasPrefix(idxStr, "GPU[") { |
| 619 | idxStr = strings.TrimPrefix(idxStr, "GPU[") |
| 620 | idxStr = strings.TrimSuffix(idxStr, "]") |
| 621 | idx, _ = strconv.Atoi(idxStr) |
| 622 | } |
| 623 | |
| 624 | // Parse memory values (in bytes or MB depending on rocm-smi version) |
| 625 | usedBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64) |
| 626 | totalBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64) |
| 627 | |
| 628 | // If values seem like MB, convert to bytes |
| 629 | if totalBytes < 1000000 { |
| 630 | usedBytes *= 1024 * 1024 |
| 631 | totalBytes *= 1024 * 1024 |
| 632 | } |
| 633 | |
| 634 | freeBytes := uint64(0) |
| 635 | if totalBytes > usedBytes { |
| 636 | freeBytes = totalBytes - usedBytes |
| 637 | } |
| 638 | |
| 639 | usagePercent := 0.0 |
| 640 | if totalBytes > 0 { |
no test coverage detected