From b0d2c247df786595141999a15809f04ac76f6655 Mon Sep 17 00:00:00 2001 From: Anders Olsson Date: Thu, 4 Jun 2026 21:41:29 +0200 Subject: [PATCH] docs(plans): tier 4 hardening batch 1 (#1 #2 #21) Co-Authored-By: Claude Opus 4.8 (1M context) --- .../2026-06-04-tier4-hardening-batch1.md | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-04-tier4-hardening-batch1.md diff --git a/docs/superpowers/plans/2026-06-04-tier4-hardening-batch1.md b/docs/superpowers/plans/2026-06-04-tier4-hardening-batch1.md new file mode 100644 index 0000000..4805471 --- /dev/null +++ b/docs/superpowers/plans/2026-06-04-tier4-hardening-batch1.md @@ -0,0 +1,148 @@ +# Tier 4 Hardening — Batch 1 (#1, #2, #21) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: superpowers:subagent-driven-development. Steps use `- [ ]`. + +**Goal:** The mechanical, well-specified hardening items — graceful HTTP shutdown (#1), configurable DB pool size (#2), and audit logging for vocabulary/term/authority creation (#21). (The design-heavy Tier 4 items #20/#5/#7 are handled separately.) + +**Tech Stack:** Rust (axum 0.8, sqlx, tokio, anyhow). Backend-only. + +**Conventions:** nightly fmt; clippy `-D warnings`; no codename. Test infra: `DATABASE_URL=postgres://postgres:postgres@localhost:5442/cms_dev`, `MEILI_URL=http://localhost:7700`, `MEILI_MASTER_KEY=masterKey` (`#[sqlx::test]` provisions its own DB). + +--- + +## Task 1: #1 — graceful shutdown + +**Files:** `crates/server/src/lib.rs`, `crates/server/Cargo.toml` (tokio `signal` feature if missing). + +- [ ] **Step 1: Ensure tokio `signal` feature.** Check `crates/server/Cargo.toml`'s `tokio` dependency features include `"signal"`. If the workspace `tokio` is `features = ["full"]` it's already included; otherwise add `"signal"` (and `"macros"`/`"rt-multi-thread"` if not already). Verify with `cargo build -p server`. + +- [ ] **Step 2: Add a shutdown-signal future** in `crates/server/src/lib.rs` (above `serve`): +```rust +/// Resolves when the process receives SIGINT (Ctrl-C) or SIGTERM, so the server can +/// drain in-flight requests before exiting. +async fn shutdown_signal() { + let ctrl_c = async { + tokio::signal::ctrl_c() + .await + .expect("install Ctrl-C handler"); + }; + + #[cfg(unix)] + let terminate = async { + tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("install SIGTERM handler") + .recv() + .await; + }; + + #[cfg(not(unix))] + let terminate = std::future::pending::<()>(); + + tokio::select! { + _ = ctrl_c => {}, + _ = terminate => {}, + } + + tracing::info!("shutdown signal received; draining"); +} +``` + +- [ ] **Step 3: Wire it into `serve`.** Change the `axum::serve(...)` call: +```rust + axum::serve(listener, app) + .with_graceful_shutdown(shutdown_signal()) + .await + .context("running the HTTP server")?; +``` + +- [ ] **Step 4: Verify.** `cargo +nightly fmt`; `cargo clippy -p server --all-targets`; `DATABASE_URL=postgres://postgres:postgres@localhost:5442/cms_dev cargo test -p server` (the existing `serve.rs` smoke test still passes — it aborts the handle, which is unaffected). Commit: +```bash +git add crates/server +git commit -m "feat(server): graceful shutdown on SIGINT/SIGTERM (#1)" +``` + +--- + +## Task 2: #2 — configurable DB pool size + +**Files:** `crates/db/src/lib.rs`, `crates/server/src/config.rs`, `crates/server/src/lib.rs`. + +`Db::connect` currently hardcodes `.max_connections(5)`. + +- [ ] **Step 1: Parameterize `Db::connect`.** In `crates/db/src/lib.rs`: +```rust + /// Connect to the database at `database_url`, opening a connection pool with at most + /// `max_connections` connections. + pub async fn connect(database_url: &str, max_connections: u32) -> Result { + let pool = PgPoolOptions::new() + .max_connections(max_connections) + .connect(database_url) + .await?; + + Ok(Self { pool }) + } +``` + +- [ ] **Step 2: Add the config knob.** In `crates/server/src/config.rs`, add a field to `Config`: +```rust + /// Maximum size of the PostgreSQL connection pool. + #[arg(long = "db-max-connections", env = "DB_MAX_CONNECTIONS", default_value_t = 5)] + pub db_max_connections: u32, +``` + +- [ ] **Step 3: Thread it through the two `Db::connect` call sites** in `crates/server/src/lib.rs`: + - In `run`: `Db::connect(&config.database_url, config.db_max_connections)`. + - In `create_user` (the CLI one-shot — it has only `database_url: &str`, no `Config`): pass a small fixed default, `Db::connect(database_url, 2)` (a one-shot CLI needs minimal connections), and add a brief comment. + +- [ ] **Step 4: Verify.** `cargo +nightly fmt`; `cargo clippy --workspace --all-targets`; `DATABASE_URL=postgres://postgres:postgres@localhost:5442/cms_dev cargo test -p server`. Confirm `cargo run -p server -- --help` shows the new `--db-max-connections` flag (optional). Commit: +```bash +git add crates/db crates/server +git commit -m "feat(server): configurable DB pool size via --db-max-connections/DB_MAX_CONNECTIONS (#2)" +``` + +--- + +## Task 3: #21 — audit vocabulary/term/authority creation + +**Files:** `crates/db/src/vocab.rs`, `crates/db/src/authority.rs`, `crates/api/src/admin_vocab.rs`, `crates/api/src/admin_authorities.rs`; Test in `crates/api/tests/admin_catalog.rs`. + +The three admin create paths (`create_vocabulary`, `add_term`, `create_authority`) take no `AuditActor` and write no audit entry. The catalogue object writes do — **`db::catalog::create_object` is the template**: it takes `actor: AuditActor` and calls `audit::record(&mut *conn, &NewAuditEvent { actor, action: AuditAction::Created, entity_type, entity_id, ... })` inside the same transaction. READ `create_object` (`crates/db/src/catalog.rs`) and `audit::record` / `NewAuditEvent` (`crates/db/src/audit.rs`, `domain::NewAuditEvent`) first to copy the exact shape. + +- [ ] **Step 1: Add `actor` + audit to the db functions.** Each must run the insert **and** the audit record in one transaction (so they're atomic), mirroring `create_object`: + - `db::vocab::create_vocabulary` — currently `(executor: E, key: &str)`. Change to `(conn: &mut sqlx::PgConnection, actor: AuditActor, key: &str)` (tx-connection like `add_term`), insert the vocabulary, then `audit::record(&mut *conn, &NewAuditEvent { actor, action: Created, entity_type: "vocabulary", entity_id: , ... })`. Return the `Vocabulary` as before. + - `db::vocab::add_term` — currently `(conn: &mut PgConnection, new: &NewTerm)`. Add `actor: AuditActor`; after inserting the term, record an audit entry (`entity_type: "term"`, `entity_id: `). + - `db::authority::create_authority` — add `actor: AuditActor`; record (`entity_type: "authority"`, `entity_id: `). + Match `create_object`'s `NewAuditEvent` field names exactly (e.g. `changes`/`metadata` may be empty/None — copy whatever `create_object` passes for a creation with no field diff). + +- [ ] **Step 2: Thread the actor through the handlers.** In `crates/api/src/admin_vocab.rs` (`create_vocabulary`, `add_term`) and `crates/api/src/admin_authorities.rs` (`create_authority`): + - Change `_auth: Authorized` → `auth: Authorized`. + - Build the actor as the object handlers do: `AuditActor::User(auth.user.id.to_uuid())`. To avoid duplicating the helper, either make `admin_objects::actor` `pub(crate)` and import it, or inline `AuditActor::User(auth.user.id.to_uuid())` at each site (it's a one-liner — pick the cleaner option; if you make the helper shared, take `&AuthUser`). + - `create_vocabulary` handler currently calls `db::vocab::create_vocabulary(state.db.pool(), &req.key)` on the **pool** — change it to open a transaction (`let mut tx = state.db.pool().begin().await...`), call the new `create_vocabulary(&mut tx, actor, &req.key)`, then `tx.commit()` (like `add_term`'s handler already does). `add_term`/`create_authority` handlers already use a tx — just pass the actor. + +- [ ] **Step 3: Test** — add to `crates/api/tests/admin_catalog.rs` (it already seeds an editor + logs in). After creating a vocabulary (or term/authority) via the API, assert an audit row exists attributing the user. Use `db::audit::history_for` (or a direct `SELECT` on `audit_log`) to find the entry — read the file for how existing tests inspect audit rows (the object tests likely already do this; mirror them). Minimal: create a vocabulary, then query `audit_log` for `entity_type='vocabulary'` with the created id and assert `actor_kind='user'` + the right `actor_id`. Name it e.g. `creating_a_vocabulary_writes_an_audit_entry`. + +- [ ] **Step 4: Verify.** `cargo +nightly fmt`; `cargo clippy --workspace --all-targets`; `DATABASE_URL=postgres://postgres:postgres@localhost:5442/cms_dev MEILI_URL=http://localhost:7700 MEILI_MASTER_KEY=masterKey cargo test -p api -p db`. All green. Commit: +```bash +git add crates/db crates/api +git commit -m "feat: audit vocabulary/term/authority creation, attributing the acting user (#21)" +``` + +--- + +## Task 4: Verification + +- [ ] **Step 1:** `DATABASE_URL=postgres://postgres:postgres@localhost:5442/cms_dev MEILI_URL=http://localhost:7700 MEILI_MASTER_KEY=masterKey cargo test --workspace` — all green. +- [ ] **Step 2:** `cargo clippy --workspace --all-targets` and `cargo +nightly fmt --check` — clean. +- [ ] **Step 3:** `git grep -in 'biggus\|dickus' -- crates` → none. +- [ ] **Step 4:** Confirm `Cargo.lock` is committed if any dependency/feature changed (e.g. tokio `signal` feature does not add a new lockfile entry, but verify `git status` is clean after the commits — no dangling `M Cargo.lock`). + +--- + +## Self-Review (completed) +- **Spec coverage:** #1 (graceful shutdown) → T1; #2 (configurable pool) → T2; #21 (audit 3 admin creates) → T3. ✓ +- **Placeholder scan:** none — concrete code for #1/#2; #21 points at `create_object`/`audit::record` as the exact template to mirror (the audit-event field names live there and must match, so copying beats guessing). +- **Type consistency:** `Db::connect(url, max: u32)` updated at both call sites (run + create_user); `db_max_connections: u32` matches `max_connections(u32)`; the three db create fns gain `actor: AuditActor` and the handlers pass `AuditActor::User(auth.user.id.to_uuid())` consistently with `admin_objects::actor`. + +## Notes +- #21 keeps within the current audit model (`AuditAction::Created` + non-null `entity_type`/`entity_id`) — no schema change needed (the auth-event model extension is the separate #7). +- Watch the `Cargo.lock`: if the tokio `signal` feature pulls a new transitive crate, stage the root `Cargo.lock` in the same commit (don't leave it dangling).