fix(pidfile): replace stale pidfile after unclean shutdown

PidFile::acquire used create_new(true) with Drop-based cleanup, so a
pidfile surviving power loss or SIGKILL made the daemon refuse to start
until the file was deleted by hand.

On AlreadyExists, read the recorded PID and probe it with kill(pid, 0):
ESRCH (or unparseable content) means stale, so remove the file and
retry the atomic create. A live PID keeps the refusal and now names the
holding process. The retry loop is bounded to stay race-safe against a
concurrent starter.

Closes #1

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-05 18:16:54 +02:00
parent 00c7e7e812
commit 40ffd9b06c
+93 -1
View File
@@ -1,8 +1,10 @@
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::io::{ErrorKind, Write};
use std::os::unix::fs::OpenOptionsExt;
use std::path::{Path, PathBuf};
use nix::{errno::Errno, sys::signal, unistd::Pid};
#[derive(Debug)]
pub struct PidFile {
path: PathBuf,
@@ -11,6 +13,32 @@ pub struct PidFile {
impl PidFile {
pub fn acquire(path: &Path) -> std::io::Result<Self> {
for _ in 0..3 {
match Self::create(path) {
Err(err) if err.kind() == ErrorKind::AlreadyExists => match read_live_pid(path)? {
Some(pid) => {
return Err(std::io::Error::new(
ErrorKind::AlreadyExists,
format!("pidfile {} held by live process {pid}", path.display()),
));
}
None => match std::fs::remove_file(path) {
Ok(()) => continue,
Err(err) if err.kind() == ErrorKind::NotFound => continue,
Err(err) => return Err(err),
},
},
result => return result,
}
}
Err(std::io::Error::new(
ErrorKind::AlreadyExists,
format!("pidfile {} keeps reappearing", path.display()),
))
}
fn create(path: &Path) -> std::io::Result<Self> {
let mut f = OpenOptions::new()
.write(true)
.create_new(true)
@@ -24,6 +52,23 @@ impl PidFile {
}
}
fn read_live_pid(path: &Path) -> std::io::Result<Option<u32>> {
let content = match std::fs::read_to_string(path) {
Ok(content) => content,
Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(err),
};
let Ok(pid) = content.trim().parse::<u32>() else {
return Ok(None);
};
match signal::kill(Pid::from_raw(pid as i32), None) {
Err(Errno::ESRCH) => Ok(None),
_ => Ok(Some(pid)),
}
}
impl Drop for PidFile {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.path);
@@ -54,4 +99,51 @@ mod tests {
}
assert!(!p.exists());
}
fn dead_pid() -> u32 {
let mut child = std::process::Command::new("true").spawn().unwrap();
child.wait().unwrap();
child.id()
}
#[test]
fn acquire_replaces_stale_pidfile() {
let dir = tempdir().unwrap();
let p = dir.path().join("x.pid");
std::fs::write(&p, format!("{}\n", dead_pid())).unwrap();
let _g = PidFile::acquire(&p).unwrap();
let content = std::fs::read_to_string(&p).unwrap();
assert_eq!(content.trim(), std::process::id().to_string());
}
#[test]
fn acquire_replaces_garbage_pidfile() {
let dir = tempdir().unwrap();
let p = dir.path().join("x.pid");
std::fs::write(&p, "not a pid\n").unwrap();
let _g = PidFile::acquire(&p).unwrap();
let content = std::fs::read_to_string(&p).unwrap();
assert_eq!(content.trim(), std::process::id().to_string());
}
#[test]
fn acquire_fails_when_pid_alive() {
let dir = tempdir().unwrap();
let p = dir.path().join("x.pid");
std::fs::write(&p, format!("{}\n", std::process::id())).unwrap();
let err = PidFile::acquire(&p).unwrap_err();
assert_eq!(err.kind(), std::io::ErrorKind::AlreadyExists);
assert!(p.exists());
}
}