TestSSHCheckModeSessionLossReDelegates reproduces the failure in https://github.com/juanfont/headscale/issues/3305 with a real client: an SSH connection in check mode is pending a verdict when the control plane restarts, which drops the in-memory auth cache so the session the client is still polling
(t *testing.T)
| 1270 | // a fresh check rather than dead-ending the now-defunct auth_id — and once that |
| 1271 | // fresh check is approved the SSH connection completes. |
| 1272 | func TestSSHCheckModeSessionLossReDelegates(t *testing.T) { |
| 1273 | IntegrationSkip(t) |
| 1274 | |
| 1275 | scenario := sshScenario(t, sshCheckPolicy(), "ssh-sessionloss", 1) |
| 1276 | defer scenario.ShutdownAssertNoPanics(t) |
| 1277 | |
| 1278 | allClients, err := scenario.ListTailscaleClients() |
| 1279 | requireNoErrListClients(t, err) |
| 1280 | |
| 1281 | user1Clients, err := scenario.ListTailscaleClients("user1") |
| 1282 | requireNoErrListClients(t, err) |
| 1283 | |
| 1284 | headscale, err := scenario.Headscale() |
| 1285 | require.NoError(t, err) |
| 1286 | |
| 1287 | err = scenario.WaitForTailscaleSync() |
| 1288 | requireNoErrSync(t, err) |
| 1289 | |
| 1290 | _, err = scenario.ListTailscaleClientsFQDNs() |
| 1291 | requireNoErrListFQDN(t, err) |
| 1292 | |
| 1293 | for _, client := range user1Clients { |
| 1294 | for _, peer := range allClients { |
| 1295 | if client.Hostname() == peer.Hostname() { |
| 1296 | continue |
| 1297 | } |
| 1298 | |
| 1299 | // Start SSH — blocks waiting for the check verdict while the |
| 1300 | // pending auth session sits in the control plane's cache. Allow a |
| 1301 | // generous window: the flow spans a full control-plane restart. |
| 1302 | sshResult := doSSHCheckWithTimeout(t, client, peer, 120*time.Second) |
| 1303 | |
| 1304 | firstAuthID := findSSHCheckAuthID(t, headscale) |
| 1305 | |
| 1306 | // Restart the control plane: the in-memory auth cache is dropped |
| 1307 | // (the on-disk database and keys persist), so the auth_id the |
| 1308 | // client is still polling for no longer exists. |
| 1309 | err := headscale.Restart() |
| 1310 | require.NoError(t, err, "restarting headscale should succeed") |
| 1311 | |
| 1312 | err = scenario.WaitForTailscaleSync() |
| 1313 | requireNoErrSync(t, err) |
| 1314 | |
| 1315 | // The client keeps polling the now-missing auth_id; with the fix the |
| 1316 | // server re-delegates a fresh session instead of returning an error |
| 1317 | // the client cannot recover from. A new auth_id only appears if the |
| 1318 | // re-delegation happened. |
| 1319 | secondAuthID := findNewSSHCheckAuthID(t, headscale, firstAuthID) |
| 1320 | require.NotEqual(t, firstAuthID, secondAuthID, |
| 1321 | "a lost session under an active check must re-delegate with a new auth_id") |
| 1322 | |
| 1323 | // Approve the re-delegated session; the SSH connection must now |
| 1324 | // complete instead of hanging until it times out. |
| 1325 | _, err = headscale.Execute( |
| 1326 | []string{ |
| 1327 | "headscale", "auth", "approve", |
| 1328 | "--auth-id", secondAuthID, |
| 1329 | }, |
nothing calls this directly
no test coverage detected