MCPcopy
hub / github.com/google/gvisor / toggleCudaProcs

Function toggleCudaProcs

pkg/sentry/control/state_cuda.go:321–449  ·  view source on GitHub ↗
(sctx context.Context, k *kernel.Kernel, cudaCheckpointPath string, cudaProcs []*kernel.ThreadGroup, timeline *timing.Timeline, sequential bool)

Source from the content-addressed store, hash-verified

319}
320
321func toggleCudaProcs(sctx context.Context, k *kernel.Kernel, cudaCheckpointPath string, cudaProcs []*kernel.ThreadGroup, timeline *timing.Timeline, sequential bool) error {
322 start := time.Now()
323
324 // Open /dev/null once for the stdin of all cuda-checkpoint processes.
325 nullVD := k.VFS().NewAnonVirtualDentry("null")
326 defer nullVD.DecRef(sctx)
327 nullFD, err := memdev.NewNullFD(sctx, nullVD.Mount(), nullVD.Dentry(), vfs.OpenOptions{})
328 if err != nil {
329 log.Warningf("Failed to open /dev/null for cuda-checkpoint stdin: %v", err)
330 } else {
331 defer nullFD.DecRef(sctx)
332 }
333
334 // Call cuda-checkpoint for each CUDA PID.
335 ckptTimerNames := make([]string, len(cudaProcs))
336 for i, cudaProc := range cudaProcs {
337 ckptTimerNames[i] = fmt.Sprintf("cuda-ckpt %s", cudaProc.ID())
338 }
339 var ckptTimings []*timing.Lease
340 if !sequential {
341 ckptTimelines := timeline.MultiFork(ckptTimerNames)
342 ckptTimings = make([]*timing.Lease, len(cudaProcs))
343 for i := range cudaProcs {
344 ckptTimings[i] = ckptTimelines[i].Lease()
345 }
346 }
347 defer func() {
348 for _, t := range ckptTimings {
349 t.End()
350 }
351 }()
352 proc := &Proc{Kernel: k}
353 ckptProcs := make(map[*kernel.ThreadGroup]checkpointProc)
354 var errs []error
355 for i, cudaProc := range cudaProcs {
356 var ckptTiming *timing.Lease
357 if sequential {
358 ckptTiming = timeline.Fork(ckptTimerNames[i]).Lease()
359 } else {
360 ckptTiming = ckptTimings[i]
361 }
362 ckptProc, cleanup, err := invokeCudaCheckpoint(sctx, k, proc, cudaCheckpointPath, cudaProc, "--toggle", nullFD)
363 if err != nil {
364 ckptTiming.Reached("invoke error")
365 errs = append(errs, err)
366 break
367 }
368 if ckptProc.tg == nil {
369 ckptTiming.Reached("tg nil")
370 continue
371 }
372 ckptProcs[cudaProc] = ckptProc
373 ckptTimeline := ckptTiming.Transfer()
374 if sequential {
375 ckptProc.tg.WaitExited()
376 if status := ckptProc.tg.ExitStatus(); status != 0 {
377 ckptTimeline.Reached("exec error")
378 }

Callers 2

preSaveCudaFunction · 0.85
postResumeCudaFunction · 0.85

Calls 15

ReachedMethod · 0.95
TransferMethod · 0.95
NewNullFDFunction · 0.92
WarningfFunction · 0.92
IsLoggingFunction · 0.92
DebugfFunction · 0.92
InfofFunction · 0.92
invokeCudaCheckpointFunction · 0.85
deleteStruct · 0.85
NewAnonVirtualDentryMethod · 0.80
VFSMethod · 0.80
LeaseMethod · 0.80

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…