MCPcopy
hub / github.com/determined-ai/determined / Detect

Function Detect

agent/internal/detect/detect.go:19–111  ·  view source on GitHub ↗

Detect the devices available. If artificial devices are configured, prefers those, otherwise, we detect cuda, rocm, cpu (or no) devices based on the configured slot type.

(slotType, agentID, visibleGPUs string, artificialSlots int)

Source from the content-addressed store, hash-verified

17// Detect the devices available. If artificial devices are configured, prefers those, otherwise,
18// we detect cuda, rocm, cpu (or no) devices based on the configured slot type.
19func Detect(slotType, agentID, visibleGPUs string, artificialSlots int) ([]device.Device, error) {
20 // Log detected nvidia version.
21 v, err := getNvidiaVersion()
22 if err != nil {
23 return nil, fmt.Errorf("failed to get nvidia version: %w", err)
24 } else if v != "" {
25 log.Infof("Nvidia driver version: %s", v)
26 }
27
28 // Log detected rocm version.
29 v, err = getRocmVersion()
30 if err != nil {
31 return nil, fmt.Errorf("failed to get rocm version: %w", err)
32 } else if v != "" {
33 log.Infof("Rocm driver version: %s", v)
34 }
35
36 // Detect devices available to the agent.
37 var detected []device.Device
38 switch {
39 case artificialSlots > 0:
40 // Generate random UUIDs consistent across agent restarts as long as
41 // agentID is the same.
42 rnd, sErr := randFromString(agentID)
43 if sErr != nil {
44 return nil, sErr
45 }
46
47 for i := 0; i < artificialSlots; i++ {
48 u, rErr := uuid.NewRandomFromReader(rnd)
49 if rErr != nil {
50 return nil, rErr
51 }
52 id := u.String()
53 detected = append(detected, device.Device{
54 ID: device.ID(i), Brand: "Artificial", UUID: id, Type: device.CPU,
55 })
56 }
57 case slotType == "none":
58 detected = []device.Device{}
59 case slotType == "cuda" || slotType == "gpu":
60 // Support "gpu" for backwards compatibility.
61 detected, err = detectCudaGPUs(visibleGPUs)
62 if err != nil {
63 return nil, errors.Wrap(
64 err,
65 "error while gathering GPU info through nvidia-smi command",
66 )
67 }
68 case slotType == "rocm":
69 detected, err = detectRocmGPUs(visibleGPUs)
70 if err != nil {
71 return nil, errors.Wrap(err, "error while gathering GPU info through rocm-smi command")
72 }
73 case slotType == "cpu":
74 detected, err = detectCPUs()
75 if err != nil {
76 return nil, err

Callers 1

runMethod · 0.92

Calls 12

IDTypeAlias · 0.92
getNvidiaVersionFunction · 0.85
getRocmVersionFunction · 0.85
randFromStringFunction · 0.85
detectCudaGPUsFunction · 0.85
detectRocmGPUsFunction · 0.85
detectCPUsFunction · 0.85
InfofMethod · 0.80
appendFunction · 0.50
ErrorfMethod · 0.45
StringMethod · 0.45
InfoMethod · 0.45

Tested by

no test coverage detected