From 40ffd9b06cec6593e5838886c8bc10af5084f083 Mon Sep 17 00:00:00 2001 From: Anders Olsson Date: Fri, 5 Jun 2026 18:16:54 +0200 Subject: [PATCH] fix(pidfile): replace stale pidfile after unclean shutdown PidFile::acquire used create_new(true) with Drop-based cleanup, so a pidfile surviving power loss or SIGKILL made the daemon refuse to start until the file was deleted by hand. On AlreadyExists, read the recorded PID and probe it with kill(pid, 0): ESRCH (or unparseable content) means stale, so remove the file and retry the atomic create. A live PID keeps the refusal and now names the holding process. The retry loop is bounded to stay race-safe against a concurrent starter. Closes #1 Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/xy/src/pidfile.rs | 94 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/crates/xy/src/pidfile.rs b/crates/xy/src/pidfile.rs index a7ff1dc..d013b04 100644 --- a/crates/xy/src/pidfile.rs +++ b/crates/xy/src/pidfile.rs @@ -1,8 +1,10 @@ use std::fs::{File, OpenOptions}; -use std::io::Write; +use std::io::{ErrorKind, Write}; use std::os::unix::fs::OpenOptionsExt; use std::path::{Path, PathBuf}; +use nix::{errno::Errno, sys::signal, unistd::Pid}; + #[derive(Debug)] pub struct PidFile { path: PathBuf, @@ -11,6 +13,32 @@ pub struct PidFile { impl PidFile { pub fn acquire(path: &Path) -> std::io::Result { + for _ in 0..3 { + match Self::create(path) { + Err(err) if err.kind() == ErrorKind::AlreadyExists => match read_live_pid(path)? { + Some(pid) => { + return Err(std::io::Error::new( + ErrorKind::AlreadyExists, + format!("pidfile {} held by live process {pid}", path.display()), + )); + } + None => match std::fs::remove_file(path) { + Ok(()) => continue, + Err(err) if err.kind() == ErrorKind::NotFound => continue, + Err(err) => return Err(err), + }, + }, + result => return result, + } + } + + Err(std::io::Error::new( + ErrorKind::AlreadyExists, + format!("pidfile {} keeps reappearing", path.display()), + )) + } + + fn create(path: &Path) -> std::io::Result { let mut f = OpenOptions::new() .write(true) .create_new(true) @@ -24,6 +52,23 @@ impl PidFile { } } +fn read_live_pid(path: &Path) -> std::io::Result> { + let content = match std::fs::read_to_string(path) { + Ok(content) => content, + Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err), + }; + + let Ok(pid) = content.trim().parse::() else { + return Ok(None); + }; + + match signal::kill(Pid::from_raw(pid as i32), None) { + Err(Errno::ESRCH) => Ok(None), + _ => Ok(Some(pid)), + } +} + impl Drop for PidFile { fn drop(&mut self) { let _ = std::fs::remove_file(&self.path); @@ -54,4 +99,51 @@ mod tests { } assert!(!p.exists()); } + + fn dead_pid() -> u32 { + let mut child = std::process::Command::new("true").spawn().unwrap(); + + child.wait().unwrap(); + + child.id() + } + + #[test] + fn acquire_replaces_stale_pidfile() { + let dir = tempdir().unwrap(); + let p = dir.path().join("x.pid"); + + std::fs::write(&p, format!("{}\n", dead_pid())).unwrap(); + + let _g = PidFile::acquire(&p).unwrap(); + let content = std::fs::read_to_string(&p).unwrap(); + + assert_eq!(content.trim(), std::process::id().to_string()); + } + + #[test] + fn acquire_replaces_garbage_pidfile() { + let dir = tempdir().unwrap(); + let p = dir.path().join("x.pid"); + + std::fs::write(&p, "not a pid\n").unwrap(); + + let _g = PidFile::acquire(&p).unwrap(); + let content = std::fs::read_to_string(&p).unwrap(); + + assert_eq!(content.trim(), std::process::id().to_string()); + } + + #[test] + fn acquire_fails_when_pid_alive() { + let dir = tempdir().unwrap(); + let p = dir.path().join("x.pid"); + + std::fs::write(&p, format!("{}\n", std::process::id())).unwrap(); + + let err = PidFile::acquire(&p).unwrap_err(); + + assert_eq!(err.kind(), std::io::ErrorKind::AlreadyExists); + assert!(p.exists()); + } }