From b366df04827d67fa09d6880d3d8b884b31df0c90 Mon Sep 17 00:00:00 2001 From: Anders Olsson Date: Mon, 25 May 2026 12:31:32 +0200 Subject: [PATCH] fix(supervisor): make backoff sleep interruptible by Stop/Shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the bare sleep(delay).await in the Restart backoff arm with a tokio::select! over the timer and cmd_rx. Stop/Shutdown are now handled immediately during backoff (Stop → Stopped, Shutdown → clean exit); Start/Restart/Reconfigure skip the remaining delay and retry at once. Co-Authored-By: Claude Sonnet 4.6 --- crates/xy-supervisor/src/supervisor.rs | 63 +++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 6 deletions(-) diff --git a/crates/xy-supervisor/src/supervisor.rs b/crates/xy-supervisor/src/supervisor.rs index c1bdf64..8c343e7 100644 --- a/crates/xy-supervisor/src/supervisor.rs +++ b/crates/xy-supervisor/src/supervisor.rs @@ -220,14 +220,65 @@ impl SupervisorTask { let delay = self.backoff.next(); - sleep(delay).await; + enum Action { + RetryNow, + Cancel, + Exit, + } - match self.do_start().await { - Ok(c) => child = Some(c), - Err(err) => { - warn!(name = %self.cfg.name, error = %err, "restart spawn failed"); - self.set_state(ServerState::Failed); + let mut delay_fut = std::pin::pin!(sleep(delay)); + + let action = tokio::select! { + _ = &mut delay_fut => Action::RetryNow, + cmd = self.cmd_rx.recv() => match cmd { + None => Action::Exit, + Some(SupervisorCmd::Stop { ack }) => { + let _ = ack.send(StopAck::NotRunning); + Action::Cancel + } + Some(SupervisorCmd::Shutdown { ack }) => { + let _ = ack.send(()); + return; + } + Some(SupervisorCmd::Start { ack }) => { + let _ = ack.send(StartAck::Started); + Action::RetryNow + } + Some(SupervisorCmd::Restart { ack }) => { + let _ = ack.send(()); + Action::RetryNow + } + Some(SupervisorCmd::Reconfigure { new, ack }) => { + self.cfg = new; + self.backoff = Backoff::new( + self.cfg.restart.backoff_initial, + self.cfg.restart.backoff_max, + ); + self.retry_window = RetryWindow::new( + Duration::from_secs(60), + self.cfg.restart.max_retries_per_minute, + ); + let _ = ack.send(()); + Action::RetryNow + } + }, + }; + + match action { + Action::RetryNow => { + match self.do_start().await { + Ok(c) => child = Some(c), + Err(err) => { + warn!(name = %self.cfg.name, error = %err, "restart spawn failed"); + self.set_state(ServerState::Failed); + } + } } + Action::Cancel => { + self.started_at = None; + self.set_state(ServerState::Stopped); + } + Action::Exit => return, } } }