fix(supervisor): make backoff sleep interruptible by Stop/Shutdown
Replace the bare sleep(delay).await in the Restart backoff arm with a tokio::select! over the timer and cmd_rx. Stop/Shutdown are now handled immediately during backoff (Stop → Stopped, Shutdown → clean exit); Start/Restart/Reconfigure skip the remaining delay and retry at once. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -220,14 +220,65 @@ impl<S: Spawner> SupervisorTask<S> {
|
||||
|
||||
let delay = self.backoff.next();
|
||||
|
||||
sleep(delay).await;
|
||||
enum Action {
|
||||
RetryNow,
|
||||
Cancel,
|
||||
Exit,
|
||||
}
|
||||
|
||||
match self.do_start().await {
|
||||
Ok(c) => child = Some(c),
|
||||
Err(err) => {
|
||||
warn!(name = %self.cfg.name, error = %err, "restart spawn failed");
|
||||
self.set_state(ServerState::Failed);
|
||||
let mut delay_fut = std::pin::pin!(sleep(delay));
|
||||
|
||||
let action = tokio::select! {
|
||||
_ = &mut delay_fut => Action::RetryNow,
|
||||
cmd = self.cmd_rx.recv() => match cmd {
|
||||
None => Action::Exit,
|
||||
Some(SupervisorCmd::Stop { ack }) => {
|
||||
let _ = ack.send(StopAck::NotRunning);
|
||||
Action::Cancel
|
||||
}
|
||||
Some(SupervisorCmd::Shutdown { ack }) => {
|
||||
let _ = ack.send(());
|
||||
return;
|
||||
}
|
||||
Some(SupervisorCmd::Start { ack }) => {
|
||||
let _ = ack.send(StartAck::Started);
|
||||
Action::RetryNow
|
||||
}
|
||||
Some(SupervisorCmd::Restart { ack }) => {
|
||||
let _ = ack.send(());
|
||||
Action::RetryNow
|
||||
}
|
||||
Some(SupervisorCmd::Reconfigure { new, ack }) => {
|
||||
self.cfg = new;
|
||||
self.backoff = Backoff::new(
|
||||
self.cfg.restart.backoff_initial,
|
||||
self.cfg.restart.backoff_max,
|
||||
);
|
||||
self.retry_window = RetryWindow::new(
|
||||
Duration::from_secs(60),
|
||||
self.cfg.restart.max_retries_per_minute,
|
||||
);
|
||||
let _ = ack.send(());
|
||||
Action::RetryNow
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
match action {
|
||||
Action::RetryNow => {
|
||||
match self.do_start().await {
|
||||
Ok(c) => child = Some(c),
|
||||
Err(err) => {
|
||||
warn!(name = %self.cfg.name, error = %err, "restart spawn failed");
|
||||
self.set_state(ServerState::Failed);
|
||||
}
|
||||
}
|
||||
}
|
||||
Action::Cancel => {
|
||||
self.started_at = None;
|
||||
self.set_state(ServerState::Stopped);
|
||||
}
|
||||
Action::Exit => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user