startProcessWithUsernsLocked expects the os thread to be locked already. It does 1. setresuid() to the targetHostUID user to attribute further user namespace creations to it 2. sets the thread's effective capabilities back to the original user's to allow writing to /proc/uid_map as well as to create
(targetHostUID int, unshareFlags uintptr, uidMaps, gidMaps []syscall.SysProcIDMap)
| 157 | // killed, and the thread is *not* setresuid()'ed back to the original user. In this case the |
| 158 | // thread should be killed off by the caller to avoid using a thread in a bad state |
| 159 | func startProcessWithUsernsLocked(targetHostUID int, unshareFlags uintptr, uidMaps, gidMaps []syscall.SysProcIDMap) (int, int, error) { |
| 160 | originalEUID := os.Geteuid() |
| 161 | |
| 162 | originalCaps, err := getCurrentCaps() |
| 163 | if err != nil { |
| 164 | return -1, -1, fmt.Errorf("failed to read current capabilities: %w", err) |
| 165 | } |
| 166 | |
| 167 | if _, _, errno := syscall.RawSyscall(unix.SYS_SETRESUID, ^uintptr(0), uintptr(targetHostUID), ^uintptr(0)); errno != 0 { |
| 168 | return -1, -1, fmt.Errorf("failed to set effective UID: %w", errno) |
| 169 | } |
| 170 | |
| 171 | err = setCurrentCaps(originalCaps) |
| 172 | if err != nil { |
| 173 | return -1, -1, fmt.Errorf("failed to restore capabilities: %w", err) |
| 174 | } |
| 175 | |
| 176 | var pidfd int |
| 177 | proc, err := os.StartProcess("/proc/self/exe", []string{"UnshareAfterEnterUserns"}, &os.ProcAttr{ |
| 178 | Sys: &syscall.SysProcAttr{ |
| 179 | // clone new user namespace first and then unshare |
| 180 | Cloneflags: unix.CLONE_NEWUSER, |
| 181 | Unshareflags: unshareFlags, |
| 182 | UidMappings: uidMaps, |
| 183 | GidMappings: gidMaps, |
| 184 | GidMappingsEnableSetgroups: true, |
| 185 | // NOTE: It's reexec but it's not heavy because subprocess |
| 186 | // be in PTRACE_TRACEME mode before performing execve. |
| 187 | Ptrace: true, |
| 188 | Pdeathsig: syscall.SIGKILL, |
| 189 | PidFD: &pidfd, |
| 190 | }, |
| 191 | }) |
| 192 | if err != nil { |
| 193 | return -1, -1, fmt.Errorf("failed to start noop process for unshare: %w", err) |
| 194 | } |
| 195 | |
| 196 | if pidfd == -1 { |
| 197 | proc.Kill() |
| 198 | proc.Wait() |
| 199 | return -1, -1, fmt.Errorf("kernel doesn't support CLONE_PIDFD") |
| 200 | } |
| 201 | |
| 202 | if _, _, errno := syscall.RawSyscall(unix.SYS_SETRESUID, ^uintptr(0), uintptr(originalEUID), ^uintptr(0)); errno != 0 { |
| 203 | proc.Kill() |
| 204 | proc.Wait() |
| 205 | unix.Close(pidfd) |
| 206 | return -1, -1, fmt.Errorf("failed to restore UID: %w", errno) |
| 207 | } |
| 208 | |
| 209 | return proc.Pid, pidfd, nil |
| 210 | } |
| 211 | |
| 212 | func pidfdWaitid(pidfd int) error { |
| 213 | return IgnoringEINTR(func() error { |
no test coverage detected
searching dependent graphs…