fix(supervisor): publish full status (pid, port, uptime, restart_count, last_exit) via watch channel

Replace watch::Receiver<ServerState> on SupervisorHandle with watch::Receiver<Status>,
a richer snapshot type that carries pid, port, uptime_secs, restart_count and last_exit.
SupervisorTask maintains current_pid and publishes a fresh Status on every state
transition; handlers.rs reads the full Status so list/status no longer return
zeroed/None fields.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 12:30:56 +02:00
parent ae6ed1cf0a
commit 3e4ad79137
4 changed files with 80 additions and 36 deletions
+2 -1
View File
@@ -13,5 +13,6 @@ pub use logs::{LogSink, RecordedLine, RingBuffer, RotatingLogWriter};
pub use policy::{RestartDecision, decide};
pub use retry_window::RetryWindow;
pub use supervisor::{
RealSpawner, Spawner, StartAck, StopAck, SupervisorCmd, SupervisorHandle, SupervisorTask,
RealSpawner, Spawner, StartAck, Status, StopAck, SupervisorCmd, SupervisorHandle,
SupervisorTask,
};
+49 -19
View File
@@ -42,11 +42,21 @@ pub enum StopAck {
NotRunning,
}
#[derive(Debug, Clone)]
pub struct Status {
pub state: ServerState,
pub pid: Option<u32>,
pub port: u16,
pub uptime_secs: Option<u64>,
pub restart_count: u32,
pub last_exit: Option<i32>,
}
#[derive(Clone)]
pub struct SupervisorHandle {
pub name: String,
pub tx: mpsc::Sender<SupervisorCmd>,
pub state: watch::Receiver<ServerState>,
pub status: watch::Receiver<Status>,
pub log_sink: LogSink,
}
@@ -60,13 +70,14 @@ pub struct SupervisorTask<S: Spawner> {
cfg: ServerConfig,
log_sink: LogSink,
spawner: S,
state_tx: watch::Sender<ServerState>,
status_tx: watch::Sender<Status>,
cmd_rx: mpsc::Receiver<SupervisorCmd>,
backoff: Backoff,
retry_window: RetryWindow,
restart_count: u32,
last_exit: Option<i32>,
started_at: Option<Instant>,
current_pid: Option<u32>,
}
impl<S: Spawner> SupervisorTask<S> {
@@ -74,7 +85,7 @@ impl<S: Spawner> SupervisorTask<S> {
cfg: ServerConfig,
log_sink: LogSink,
spawner: S,
state_tx: watch::Sender<ServerState>,
status_tx: watch::Sender<Status>,
cmd_rx: mpsc::Receiver<SupervisorCmd>,
) -> Self {
let backoff = Backoff::new(cfg.restart.backoff_initial, cfg.restart.backoff_max);
@@ -85,18 +96,28 @@ impl<S: Spawner> SupervisorTask<S> {
cfg,
log_sink,
spawner,
state_tx,
status_tx,
cmd_rx,
backoff,
retry_window,
restart_count: 0,
last_exit: None,
started_at: None,
current_pid: None,
}
}
fn set_state(&self, s: ServerState) {
let _ = self.state_tx.send(s);
fn set_state(&mut self, s: ServerState) {
let uptime_secs = self.started_at.map(|t| t.elapsed().as_secs());
let _ = self.status_tx.send(Status {
state: s,
pid: self.current_pid,
port: self.cfg.port,
uptime_secs,
restart_count: self.restart_count,
last_exit: self.last_exit,
});
}
pub async fn run(mut self) {
@@ -174,6 +195,7 @@ impl<S: Spawner> SupervisorTask<S> {
child = None;
self.last_exit = code;
self.current_pid = None;
let now = Instant::now();
@@ -221,6 +243,7 @@ impl<S: Spawner> SupervisorTask<S> {
self.restart_count = self.restart_count.saturating_add(1);
self.started_at = Some(Instant::now());
self.current_pid = Some(c.pid());
self.backoff.reset();
self.set_state(ServerState::Running);
@@ -244,6 +267,7 @@ impl<S: Spawner> SupervisorTask<S> {
}
}
self.current_pid = None;
self.started_at = None;
self.set_state(ServerState::Stopped);
}
@@ -317,34 +341,40 @@ mod tests {
LogSink::new(name.to_string(), writer, 1024)
}
async fn wait_for(rx: &mut watch::Receiver<ServerState>, want: ServerState) {
fn initial_status(cfg: &ServerConfig) -> Status {
Status {
state: ServerState::Stopped,
pid: None,
port: cfg.port,
uptime_secs: None,
restart_count: 0,
last_exit: None,
}
}
async fn wait_for(rx: &mut watch::Receiver<Status>, want: ServerState) {
let deadline = tokio::time::Instant::now() + Duration::from_secs(2);
loop {
if *rx.borrow() == want {
if rx.borrow().state == want {
return;
}
tokio::select! {
_ = rx.changed() => {}
_ = tokio::time::sleep_until(deadline) => panic!("never reached {want:?}, last={:?}", *rx.borrow()),
_ = tokio::time::sleep_until(deadline) => panic!("never reached {want:?}, last={:?}", rx.borrow().state),
}
}
}
#[tokio::test]
async fn start_runs_to_running_and_stop_to_stopped() {
let cfg = cfg("x", RestartPolicy::Never, 5);
let (mock, mut ctl) = MockChild::new(1);
let queue = Arc::new(Mutex::new(vec![mock]));
let spawner = QueueSpawner { queue };
let (state_tx, mut state_rx) = watch::channel(ServerState::Stopped);
let (status_tx, mut status_rx) = watch::channel(initial_status(&cfg));
let (cmd_tx, cmd_rx) = mpsc::channel(8);
let task = SupervisorTask::new(
cfg("x", RestartPolicy::Never, 5),
sink("x"),
spawner,
state_tx,
cmd_rx,
);
let task = SupervisorTask::new(cfg, sink("x"), spawner, status_tx, cmd_rx);
let h = tokio::spawn(task.run());
let (ack_tx, ack_rx) = oneshot::channel();
@@ -353,10 +383,10 @@ mod tests {
.await
.unwrap();
assert_eq!(ack_rx.await.unwrap(), StartAck::Started);
wait_for(&mut state_rx, ServerState::Running).await;
wait_for(&mut status_rx, ServerState::Running).await;
ctl.exit_tx.take().unwrap().send(Some(0)).unwrap();
wait_for(&mut state_rx, ServerState::Stopped).await;
wait_for(&mut status_rx, ServerState::Stopped).await;
let (ack_tx, ack_rx) = oneshot::channel();
cmd_tx
+16 -12
View File
@@ -163,14 +163,16 @@ async fn list(reg: &Registry) -> Result<Vec<ServerSummary>, ApiError> {
let mut out = Vec::new();
for (name, entry) in reg.snapshot().await {
let s = entry.handle.status.borrow();
out.push(ServerSummary {
name,
state: *entry.handle.state.borrow(),
pid: None,
port: 0,
uptime_secs: None,
restart_count: 0,
last_exit: None,
state: s.state,
pid: s.pid,
port: s.port,
uptime_secs: s.uptime_secs,
restart_count: s.restart_count,
last_exit: s.last_exit,
});
}
@@ -185,15 +187,17 @@ async fn status(reg: &Registry, name: &str) -> Result<StatusDetail, ApiError> {
));
};
let s = entry.handle.status.borrow();
Ok(StatusDetail {
summary: ServerSummary {
name: entry.handle.name.clone(),
state: *entry.handle.state.borrow(),
pid: None,
port: 0,
uptime_secs: None,
restart_count: 0,
last_exit: None,
state: s.state,
pid: s.pid,
port: s.port,
uptime_secs: s.uptime_secs,
restart_count: s.restart_count,
last_exit: s.last_exit,
},
recent_transitions: Vec::new(),
})
+13 -4
View File
@@ -8,7 +8,7 @@ use xy_ipc::{Connection, bind};
use xy_protocol::{ServerConfig, ServerState, kdl_parse::load_all_configs};
use xy_supervisor::{
logs::{LogSink, RotatingLogWriter},
supervisor::{RealSpawner, SupervisorCmd, SupervisorHandle, SupervisorTask},
supervisor::{RealSpawner, Status, SupervisorCmd, SupervisorHandle, SupervisorTask},
};
pub mod handlers;
@@ -39,19 +39,28 @@ pub fn spawn_supervisor(paths: &Paths, cfg: ServerConfig) -> Result<SupervisorHa
let sink = LogSink::new(cfg.name.clone(), writer, RING_BUFFER_BYTES);
let (state_tx, state_rx) = watch::channel(ServerState::Stopped);
let initial_status = Status {
state: ServerState::Stopped,
pid: None,
port: cfg.port,
uptime_secs: None,
restart_count: 0,
last_exit: None,
};
let (status_tx, status_rx) = watch::channel(initial_status);
let (cmd_tx, cmd_rx) = mpsc::channel(16);
let name = cfg.name.clone();
let task = SupervisorTask::new(cfg, sink.clone(), RealSpawner, state_tx, cmd_rx);
let task = SupervisorTask::new(cfg, sink.clone(), RealSpawner, status_tx, cmd_rx);
tokio::spawn(task.run());
Ok(SupervisorHandle {
name,
tx: cmd_tx,
state: state_rx,
status: status_rx,
log_sink: sink,
})
}