fix(supervisor): publish full status (pid, port, uptime, restart_count, last_exit) via watch channel

Replace watch::Receiver<ServerState> on SupervisorHandle with watch::Receiver<Status>,
a richer snapshot type that carries pid, port, uptime_secs, restart_count and last_exit.
SupervisorTask maintains current_pid and publishes a fresh Status on every state
transition; handlers.rs reads the full Status so list/status no longer return
zeroed/None fields.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 12:30:56 +02:00
parent ae6ed1cf0a
commit 3e4ad79137
4 changed files with 80 additions and 36 deletions
+2 -1
View File
@@ -13,5 +13,6 @@ pub use logs::{LogSink, RecordedLine, RingBuffer, RotatingLogWriter};
pub use policy::{RestartDecision, decide}; pub use policy::{RestartDecision, decide};
pub use retry_window::RetryWindow; pub use retry_window::RetryWindow;
pub use supervisor::{ pub use supervisor::{
RealSpawner, Spawner, StartAck, StopAck, SupervisorCmd, SupervisorHandle, SupervisorTask, RealSpawner, Spawner, StartAck, Status, StopAck, SupervisorCmd, SupervisorHandle,
SupervisorTask,
}; };
+49 -19
View File
@@ -42,11 +42,21 @@ pub enum StopAck {
NotRunning, NotRunning,
} }
#[derive(Debug, Clone)]
pub struct Status {
pub state: ServerState,
pub pid: Option<u32>,
pub port: u16,
pub uptime_secs: Option<u64>,
pub restart_count: u32,
pub last_exit: Option<i32>,
}
#[derive(Clone)] #[derive(Clone)]
pub struct SupervisorHandle { pub struct SupervisorHandle {
pub name: String, pub name: String,
pub tx: mpsc::Sender<SupervisorCmd>, pub tx: mpsc::Sender<SupervisorCmd>,
pub state: watch::Receiver<ServerState>, pub status: watch::Receiver<Status>,
pub log_sink: LogSink, pub log_sink: LogSink,
} }
@@ -60,13 +70,14 @@ pub struct SupervisorTask<S: Spawner> {
cfg: ServerConfig, cfg: ServerConfig,
log_sink: LogSink, log_sink: LogSink,
spawner: S, spawner: S,
state_tx: watch::Sender<ServerState>, status_tx: watch::Sender<Status>,
cmd_rx: mpsc::Receiver<SupervisorCmd>, cmd_rx: mpsc::Receiver<SupervisorCmd>,
backoff: Backoff, backoff: Backoff,
retry_window: RetryWindow, retry_window: RetryWindow,
restart_count: u32, restart_count: u32,
last_exit: Option<i32>, last_exit: Option<i32>,
started_at: Option<Instant>, started_at: Option<Instant>,
current_pid: Option<u32>,
} }
impl<S: Spawner> SupervisorTask<S> { impl<S: Spawner> SupervisorTask<S> {
@@ -74,7 +85,7 @@ impl<S: Spawner> SupervisorTask<S> {
cfg: ServerConfig, cfg: ServerConfig,
log_sink: LogSink, log_sink: LogSink,
spawner: S, spawner: S,
state_tx: watch::Sender<ServerState>, status_tx: watch::Sender<Status>,
cmd_rx: mpsc::Receiver<SupervisorCmd>, cmd_rx: mpsc::Receiver<SupervisorCmd>,
) -> Self { ) -> Self {
let backoff = Backoff::new(cfg.restart.backoff_initial, cfg.restart.backoff_max); let backoff = Backoff::new(cfg.restart.backoff_initial, cfg.restart.backoff_max);
@@ -85,18 +96,28 @@ impl<S: Spawner> SupervisorTask<S> {
cfg, cfg,
log_sink, log_sink,
spawner, spawner,
state_tx, status_tx,
cmd_rx, cmd_rx,
backoff, backoff,
retry_window, retry_window,
restart_count: 0, restart_count: 0,
last_exit: None, last_exit: None,
started_at: None, started_at: None,
current_pid: None,
} }
} }
fn set_state(&self, s: ServerState) { fn set_state(&mut self, s: ServerState) {
let _ = self.state_tx.send(s); let uptime_secs = self.started_at.map(|t| t.elapsed().as_secs());
let _ = self.status_tx.send(Status {
state: s,
pid: self.current_pid,
port: self.cfg.port,
uptime_secs,
restart_count: self.restart_count,
last_exit: self.last_exit,
});
} }
pub async fn run(mut self) { pub async fn run(mut self) {
@@ -174,6 +195,7 @@ impl<S: Spawner> SupervisorTask<S> {
child = None; child = None;
self.last_exit = code; self.last_exit = code;
self.current_pid = None;
let now = Instant::now(); let now = Instant::now();
@@ -221,6 +243,7 @@ impl<S: Spawner> SupervisorTask<S> {
self.restart_count = self.restart_count.saturating_add(1); self.restart_count = self.restart_count.saturating_add(1);
self.started_at = Some(Instant::now()); self.started_at = Some(Instant::now());
self.current_pid = Some(c.pid());
self.backoff.reset(); self.backoff.reset();
self.set_state(ServerState::Running); self.set_state(ServerState::Running);
@@ -244,6 +267,7 @@ impl<S: Spawner> SupervisorTask<S> {
} }
} }
self.current_pid = None;
self.started_at = None; self.started_at = None;
self.set_state(ServerState::Stopped); self.set_state(ServerState::Stopped);
} }
@@ -317,34 +341,40 @@ mod tests {
LogSink::new(name.to_string(), writer, 1024) LogSink::new(name.to_string(), writer, 1024)
} }
async fn wait_for(rx: &mut watch::Receiver<ServerState>, want: ServerState) { fn initial_status(cfg: &ServerConfig) -> Status {
Status {
state: ServerState::Stopped,
pid: None,
port: cfg.port,
uptime_secs: None,
restart_count: 0,
last_exit: None,
}
}
async fn wait_for(rx: &mut watch::Receiver<Status>, want: ServerState) {
let deadline = tokio::time::Instant::now() + Duration::from_secs(2); let deadline = tokio::time::Instant::now() + Duration::from_secs(2);
loop { loop {
if *rx.borrow() == want { if rx.borrow().state == want {
return; return;
} }
tokio::select! { tokio::select! {
_ = rx.changed() => {} _ = rx.changed() => {}
_ = tokio::time::sleep_until(deadline) => panic!("never reached {want:?}, last={:?}", *rx.borrow()), _ = tokio::time::sleep_until(deadline) => panic!("never reached {want:?}, last={:?}", rx.borrow().state),
} }
} }
} }
#[tokio::test] #[tokio::test]
async fn start_runs_to_running_and_stop_to_stopped() { async fn start_runs_to_running_and_stop_to_stopped() {
let cfg = cfg("x", RestartPolicy::Never, 5);
let (mock, mut ctl) = MockChild::new(1); let (mock, mut ctl) = MockChild::new(1);
let queue = Arc::new(Mutex::new(vec![mock])); let queue = Arc::new(Mutex::new(vec![mock]));
let spawner = QueueSpawner { queue }; let spawner = QueueSpawner { queue };
let (state_tx, mut state_rx) = watch::channel(ServerState::Stopped); let (status_tx, mut status_rx) = watch::channel(initial_status(&cfg));
let (cmd_tx, cmd_rx) = mpsc::channel(8); let (cmd_tx, cmd_rx) = mpsc::channel(8);
let task = SupervisorTask::new( let task = SupervisorTask::new(cfg, sink("x"), spawner, status_tx, cmd_rx);
cfg("x", RestartPolicy::Never, 5),
sink("x"),
spawner,
state_tx,
cmd_rx,
);
let h = tokio::spawn(task.run()); let h = tokio::spawn(task.run());
let (ack_tx, ack_rx) = oneshot::channel(); let (ack_tx, ack_rx) = oneshot::channel();
@@ -353,10 +383,10 @@ mod tests {
.await .await
.unwrap(); .unwrap();
assert_eq!(ack_rx.await.unwrap(), StartAck::Started); assert_eq!(ack_rx.await.unwrap(), StartAck::Started);
wait_for(&mut state_rx, ServerState::Running).await; wait_for(&mut status_rx, ServerState::Running).await;
ctl.exit_tx.take().unwrap().send(Some(0)).unwrap(); ctl.exit_tx.take().unwrap().send(Some(0)).unwrap();
wait_for(&mut state_rx, ServerState::Stopped).await; wait_for(&mut status_rx, ServerState::Stopped).await;
let (ack_tx, ack_rx) = oneshot::channel(); let (ack_tx, ack_rx) = oneshot::channel();
cmd_tx cmd_tx
+16 -12
View File
@@ -163,14 +163,16 @@ async fn list(reg: &Registry) -> Result<Vec<ServerSummary>, ApiError> {
let mut out = Vec::new(); let mut out = Vec::new();
for (name, entry) in reg.snapshot().await { for (name, entry) in reg.snapshot().await {
let s = entry.handle.status.borrow();
out.push(ServerSummary { out.push(ServerSummary {
name, name,
state: *entry.handle.state.borrow(), state: s.state,
pid: None, pid: s.pid,
port: 0, port: s.port,
uptime_secs: None, uptime_secs: s.uptime_secs,
restart_count: 0, restart_count: s.restart_count,
last_exit: None, last_exit: s.last_exit,
}); });
} }
@@ -185,15 +187,17 @@ async fn status(reg: &Registry, name: &str) -> Result<StatusDetail, ApiError> {
)); ));
}; };
let s = entry.handle.status.borrow();
Ok(StatusDetail { Ok(StatusDetail {
summary: ServerSummary { summary: ServerSummary {
name: entry.handle.name.clone(), name: entry.handle.name.clone(),
state: *entry.handle.state.borrow(), state: s.state,
pid: None, pid: s.pid,
port: 0, port: s.port,
uptime_secs: None, uptime_secs: s.uptime_secs,
restart_count: 0, restart_count: s.restart_count,
last_exit: None, last_exit: s.last_exit,
}, },
recent_transitions: Vec::new(), recent_transitions: Vec::new(),
}) })
+13 -4
View File
@@ -8,7 +8,7 @@ use xy_ipc::{Connection, bind};
use xy_protocol::{ServerConfig, ServerState, kdl_parse::load_all_configs}; use xy_protocol::{ServerConfig, ServerState, kdl_parse::load_all_configs};
use xy_supervisor::{ use xy_supervisor::{
logs::{LogSink, RotatingLogWriter}, logs::{LogSink, RotatingLogWriter},
supervisor::{RealSpawner, SupervisorCmd, SupervisorHandle, SupervisorTask}, supervisor::{RealSpawner, Status, SupervisorCmd, SupervisorHandle, SupervisorTask},
}; };
pub mod handlers; pub mod handlers;
@@ -39,19 +39,28 @@ pub fn spawn_supervisor(paths: &Paths, cfg: ServerConfig) -> Result<SupervisorHa
let sink = LogSink::new(cfg.name.clone(), writer, RING_BUFFER_BYTES); let sink = LogSink::new(cfg.name.clone(), writer, RING_BUFFER_BYTES);
let (state_tx, state_rx) = watch::channel(ServerState::Stopped); let initial_status = Status {
state: ServerState::Stopped,
pid: None,
port: cfg.port,
uptime_secs: None,
restart_count: 0,
last_exit: None,
};
let (status_tx, status_rx) = watch::channel(initial_status);
let (cmd_tx, cmd_rx) = mpsc::channel(16); let (cmd_tx, cmd_rx) = mpsc::channel(16);
let name = cfg.name.clone(); let name = cfg.name.clone();
let task = SupervisorTask::new(cfg, sink.clone(), RealSpawner, state_tx, cmd_rx); let task = SupervisorTask::new(cfg, sink.clone(), RealSpawner, status_tx, cmd_rx);
tokio::spawn(task.run()); tokio::spawn(task.run());
Ok(SupervisorHandle { Ok(SupervisorHandle {
name, name,
tx: cmd_tx, tx: cmd_tx,
state: state_rx, status: status_rx,
log_sink: sink, log_sink: sink,
}) })
} }