RestartAndWait closes the current session (if any) so the watcher reconnects, then blocks until the next successful reconnect, ctx cancellation, supervisor shutdown (Stop or Failed), or timeout. RestartAndWait does NOT recover from a terminal state (Stopped/Failed): callers that want "restart even
(ctx context.Context, timeout time.Duration)
| 456 | // call Start when terminal. The Toolset.Restart wrappers in pkg/tools/mcp |
| 457 | // and pkg/tools/builtin do exactly that. |
| 458 | func (s *Supervisor) RestartAndWait(ctx context.Context, timeout time.Duration) error { |
| 459 | s.mu.Lock() |
| 460 | if s.stopping { |
| 461 | s.mu.Unlock() |
| 462 | return ErrNotStarted |
| 463 | } |
| 464 | restartCh := s.restarted |
| 465 | doneCh := s.done |
| 466 | state := s.tracker.State() |
| 467 | sess := s.session |
| 468 | s.forceRestart = true |
| 469 | s.mu.Unlock() |
| 470 | |
| 471 | // Only force-close if currently usable. If the watcher already detected |
| 472 | // the disconnect, closing now would race with tryRestart. |
| 473 | if state.IsUsable() && sess != nil { |
| 474 | _ = sess.Close(context.WithoutCancel(ctx)) |
| 475 | } |
| 476 | |
| 477 | select { |
| 478 | case <-restartCh: |
| 479 | return nil |
| 480 | case <-doneCh: |
| 481 | // Stop or terminal Failed; surface the supervisor's last error. |
| 482 | if err := s.tracker.LastError(); err != nil { |
| 483 | return err |
| 484 | } |
| 485 | return ErrNotStarted |
| 486 | case <-ctx.Done(): |
| 487 | return ctx.Err() |
| 488 | case <-time.After(timeout): |
| 489 | return errors.New("timed out waiting for supervisor reconnect") |
| 490 | } |
| 491 | } |
| 492 | |
| 493 | // signalDone closes the done channel if it is not already closed. Idempotent. |
| 494 | // Takes mu so concurrent Start can replace `done` without racing. |