fix(pidfile): replace stale pidfile after unclean shutdown
PidFile::acquire used create_new(true) with Drop-based cleanup, so a pidfile surviving power loss or SIGKILL made the daemon refuse to start until the file was deleted by hand. On AlreadyExists, read the recorded PID and probe it with kill(pid, 0): ESRCH (or unparseable content) means stale, so remove the file and retry the atomic create. A live PID keeps the refusal and now names the holding process. The retry loop is bounded to stay race-safe against a concurrent starter. Closes #1 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,10 @@
|
|||||||
use std::fs::{File, OpenOptions};
|
use std::fs::{File, OpenOptions};
|
||||||
use std::io::Write;
|
use std::io::{ErrorKind, Write};
|
||||||
use std::os::unix::fs::OpenOptionsExt;
|
use std::os::unix::fs::OpenOptionsExt;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use nix::{errno::Errno, sys::signal, unistd::Pid};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct PidFile {
|
pub struct PidFile {
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
@@ -11,6 +13,32 @@ pub struct PidFile {
|
|||||||
|
|
||||||
impl PidFile {
|
impl PidFile {
|
||||||
pub fn acquire(path: &Path) -> std::io::Result<Self> {
|
pub fn acquire(path: &Path) -> std::io::Result<Self> {
|
||||||
|
for _ in 0..3 {
|
||||||
|
match Self::create(path) {
|
||||||
|
Err(err) if err.kind() == ErrorKind::AlreadyExists => match read_live_pid(path)? {
|
||||||
|
Some(pid) => {
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
ErrorKind::AlreadyExists,
|
||||||
|
format!("pidfile {} held by live process {pid}", path.display()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
None => match std::fs::remove_file(path) {
|
||||||
|
Ok(()) => continue,
|
||||||
|
Err(err) if err.kind() == ErrorKind::NotFound => continue,
|
||||||
|
Err(err) => return Err(err),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
result => return result,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(std::io::Error::new(
|
||||||
|
ErrorKind::AlreadyExists,
|
||||||
|
format!("pidfile {} keeps reappearing", path.display()),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create(path: &Path) -> std::io::Result<Self> {
|
||||||
let mut f = OpenOptions::new()
|
let mut f = OpenOptions::new()
|
||||||
.write(true)
|
.write(true)
|
||||||
.create_new(true)
|
.create_new(true)
|
||||||
@@ -24,6 +52,23 @@ impl PidFile {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_live_pid(path: &Path) -> std::io::Result<Option<u32>> {
|
||||||
|
let content = match std::fs::read_to_string(path) {
|
||||||
|
Ok(content) => content,
|
||||||
|
Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None),
|
||||||
|
Err(err) => return Err(err),
|
||||||
|
};
|
||||||
|
|
||||||
|
let Ok(pid) = content.trim().parse::<u32>() else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
|
match signal::kill(Pid::from_raw(pid as i32), None) {
|
||||||
|
Err(Errno::ESRCH) => Ok(None),
|
||||||
|
_ => Ok(Some(pid)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Drop for PidFile {
|
impl Drop for PidFile {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
let _ = std::fs::remove_file(&self.path);
|
let _ = std::fs::remove_file(&self.path);
|
||||||
@@ -54,4 +99,51 @@ mod tests {
|
|||||||
}
|
}
|
||||||
assert!(!p.exists());
|
assert!(!p.exists());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn dead_pid() -> u32 {
|
||||||
|
let mut child = std::process::Command::new("true").spawn().unwrap();
|
||||||
|
|
||||||
|
child.wait().unwrap();
|
||||||
|
|
||||||
|
child.id()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn acquire_replaces_stale_pidfile() {
|
||||||
|
let dir = tempdir().unwrap();
|
||||||
|
let p = dir.path().join("x.pid");
|
||||||
|
|
||||||
|
std::fs::write(&p, format!("{}\n", dead_pid())).unwrap();
|
||||||
|
|
||||||
|
let _g = PidFile::acquire(&p).unwrap();
|
||||||
|
let content = std::fs::read_to_string(&p).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(content.trim(), std::process::id().to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn acquire_replaces_garbage_pidfile() {
|
||||||
|
let dir = tempdir().unwrap();
|
||||||
|
let p = dir.path().join("x.pid");
|
||||||
|
|
||||||
|
std::fs::write(&p, "not a pid\n").unwrap();
|
||||||
|
|
||||||
|
let _g = PidFile::acquire(&p).unwrap();
|
||||||
|
let content = std::fs::read_to_string(&p).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(content.trim(), std::process::id().to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn acquire_fails_when_pid_alive() {
|
||||||
|
let dir = tempdir().unwrap();
|
||||||
|
let p = dir.path().join("x.pid");
|
||||||
|
|
||||||
|
std::fs::write(&p, format!("{}\n", std::process::id())).unwrap();
|
||||||
|
|
||||||
|
let err = PidFile::acquire(&p).unwrap_err();
|
||||||
|
|
||||||
|
assert_eq!(err.kind(), std::io::ErrorKind::AlreadyExists);
|
||||||
|
assert!(p.exists());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user