fix(supervisor): make backoff sleep interruptible by Stop/Shutdown

Replace the bare sleep(delay).await in the Restart backoff arm with a
tokio::select! over the timer and cmd_rx. Stop/Shutdown are now handled
immediately during backoff (Stop → Stopped, Shutdown → clean exit);
Start/Restart/Reconfigure skip the remaining delay and retry at once.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 12:31:32 +02:00
parent 3e4ad79137
commit b366df0482
+57 -6
View File
@@ -220,14 +220,65 @@ impl<S: Spawner> SupervisorTask<S> {
let delay = self.backoff.next();
sleep(delay).await;
enum Action {
RetryNow,
Cancel,
Exit,
}
match self.do_start().await {
Ok(c) => child = Some(c),
Err(err) => {
warn!(name = %self.cfg.name, error = %err, "restart spawn failed");
self.set_state(ServerState::Failed);
let mut delay_fut = std::pin::pin!(sleep(delay));
let action = tokio::select! {
_ = &mut delay_fut => Action::RetryNow,
cmd = self.cmd_rx.recv() => match cmd {
None => Action::Exit,
Some(SupervisorCmd::Stop { ack }) => {
let _ = ack.send(StopAck::NotRunning);
Action::Cancel
}
Some(SupervisorCmd::Shutdown { ack }) => {
let _ = ack.send(());
return;
}
Some(SupervisorCmd::Start { ack }) => {
let _ = ack.send(StartAck::Started);
Action::RetryNow
}
Some(SupervisorCmd::Restart { ack }) => {
let _ = ack.send(());
Action::RetryNow
}
Some(SupervisorCmd::Reconfigure { new, ack }) => {
self.cfg = new;
self.backoff = Backoff::new(
self.cfg.restart.backoff_initial,
self.cfg.restart.backoff_max,
);
self.retry_window = RetryWindow::new(
Duration::from_secs(60),
self.cfg.restart.max_retries_per_minute,
);
let _ = ack.send(());
Action::RetryNow
}
},
};
match action {
Action::RetryNow => {
match self.do_start().await {
Ok(c) => child = Some(c),
Err(err) => {
warn!(name = %self.cfg.name, error = %err, "restart spawn failed");
self.set_state(ServerState::Failed);
}
}
}
Action::Cancel => {
self.started_at = None;
self.set_state(ServerState::Stopped);
}
Action::Exit => return,
}
}
}