fix(supervisor): make backoff sleep interruptible by Stop/Shutdown
Replace the bare sleep(delay).await in the Restart backoff arm with a tokio::select! over the timer and cmd_rx. Stop/Shutdown are now handled immediately during backoff (Stop → Stopped, Shutdown → clean exit); Start/Restart/Reconfigure skip the remaining delay and retry at once. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -220,8 +220,52 @@ impl<S: Spawner> SupervisorTask<S> {
|
|||||||
|
|
||||||
let delay = self.backoff.next();
|
let delay = self.backoff.next();
|
||||||
|
|
||||||
sleep(delay).await;
|
enum Action {
|
||||||
|
RetryNow,
|
||||||
|
Cancel,
|
||||||
|
Exit,
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut delay_fut = std::pin::pin!(sleep(delay));
|
||||||
|
|
||||||
|
let action = tokio::select! {
|
||||||
|
_ = &mut delay_fut => Action::RetryNow,
|
||||||
|
cmd = self.cmd_rx.recv() => match cmd {
|
||||||
|
None => Action::Exit,
|
||||||
|
Some(SupervisorCmd::Stop { ack }) => {
|
||||||
|
let _ = ack.send(StopAck::NotRunning);
|
||||||
|
Action::Cancel
|
||||||
|
}
|
||||||
|
Some(SupervisorCmd::Shutdown { ack }) => {
|
||||||
|
let _ = ack.send(());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Some(SupervisorCmd::Start { ack }) => {
|
||||||
|
let _ = ack.send(StartAck::Started);
|
||||||
|
Action::RetryNow
|
||||||
|
}
|
||||||
|
Some(SupervisorCmd::Restart { ack }) => {
|
||||||
|
let _ = ack.send(());
|
||||||
|
Action::RetryNow
|
||||||
|
}
|
||||||
|
Some(SupervisorCmd::Reconfigure { new, ack }) => {
|
||||||
|
self.cfg = new;
|
||||||
|
self.backoff = Backoff::new(
|
||||||
|
self.cfg.restart.backoff_initial,
|
||||||
|
self.cfg.restart.backoff_max,
|
||||||
|
);
|
||||||
|
self.retry_window = RetryWindow::new(
|
||||||
|
Duration::from_secs(60),
|
||||||
|
self.cfg.restart.max_retries_per_minute,
|
||||||
|
);
|
||||||
|
let _ = ack.send(());
|
||||||
|
Action::RetryNow
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
match action {
|
||||||
|
Action::RetryNow => {
|
||||||
match self.do_start().await {
|
match self.do_start().await {
|
||||||
Ok(c) => child = Some(c),
|
Ok(c) => child = Some(c),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@@ -230,6 +274,13 @@ impl<S: Spawner> SupervisorTask<S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Action::Cancel => {
|
||||||
|
self.started_at = None;
|
||||||
|
self.set_state(ServerState::Stopped);
|
||||||
|
}
|
||||||
|
Action::Exit => return,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user