getUsernsFD returns pinnable user namespace's file descriptor.
(uidMaps, gidMaps []syscall.SysProcIDMap)
| 28 | |
| 29 | // getUsernsFD returns pinnable user namespace's file descriptor. |
| 30 | func getUsernsFD(uidMaps, gidMaps []syscall.SysProcIDMap) (_ *os.File, retErr error) { |
| 31 | if !sys.SupportsPidFD() { |
| 32 | return nil, fmt.Errorf("kernel doesn't support pidfd") |
| 33 | |
| 34 | } |
| 35 | |
| 36 | var pidfd int |
| 37 | |
| 38 | proc, err := os.StartProcess("/proc/self/exe", []string{"containerd[getUsernsFD]"}, &os.ProcAttr{ |
| 39 | Sys: &syscall.SysProcAttr{ |
| 40 | Cloneflags: unix.CLONE_NEWUSER, |
| 41 | UidMappings: uidMaps, |
| 42 | GidMappings: gidMaps, |
| 43 | // NOTE: It's reexec but it's not heavy because subprocess |
| 44 | // be in PTRACE_TRACEME mode before performing execve. |
| 45 | Ptrace: true, |
| 46 | Pdeathsig: syscall.SIGKILL, |
| 47 | PidFD: &pidfd, |
| 48 | }, |
| 49 | }) |
| 50 | if err != nil { |
| 51 | return nil, fmt.Errorf("failed to start noop process for unshare: %w", err) |
| 52 | } |
| 53 | |
| 54 | if pidfd == -1 { |
| 55 | proc.Kill() |
| 56 | proc.Wait() |
| 57 | return nil, fmt.Errorf("failed to prevent pid reused issue because pidfd isn't supported") |
| 58 | } |
| 59 | |
| 60 | pidFD := os.NewFile(uintptr(pidfd), "pidfd") |
| 61 | defer func() { |
| 62 | unix.PidfdSendSignal(int(pidFD.Fd()), unix.SIGKILL, nil, 0) |
| 63 | |
| 64 | pidfdWaitid(pidFD) |
| 65 | |
| 66 | pidFD.Close() |
| 67 | }() |
| 68 | |
| 69 | // NOTE: |
| 70 | // |
| 71 | // The usernsFD will hold the userns reference in kernel. Even if the |
| 72 | // child process is reaped, the usernsFD is still valid. |
| 73 | usernsFD, err := os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid)) |
| 74 | if err != nil { |
| 75 | return nil, fmt.Errorf("failed to get userns file descriptor for /proc/%d/user/ns: %w", proc.Pid, err) |
| 76 | } |
| 77 | defer func() { |
| 78 | if retErr != nil { |
| 79 | usernsFD.Close() |
| 80 | } |
| 81 | }() |
| 82 | |
| 83 | // Ensure the child process is still alive. If the err is ESRCH, we |
| 84 | // should return error because we can't guarantee the usernsFD and |
| 85 | // u[g]idmapFile are valid. It's safe to return error and retry. |
| 86 | if err := unix.PidfdSendSignal(int(pidFD.Fd()), 0, nil, 0); err != nil { |
| 87 | return nil, fmt.Errorf("failed to ensure child process is alive: %w", err) |
searching dependent graphs…