diff --git a/CLAUDE.md b/CLAUDE.md index 8a1fa1d..66e05eb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,6 +35,7 @@ History → Batch[] → Game[] → teams/players - **`Player`** (`player.rs`) — static configuration: prior `Gaussian`, `beta` (performance noise), `gamma` (skill drift per time unit). - **`Gaussian`** (`gaussian.rs`) — core probability type. Stored as natural parameters (`pi = 1/sigma²`, `tau = mu/sigma²`). Arithmetic ops implement message multiplication/division in the factor graph. - **`message.rs`** — `TeamMessage` and `DiffMessage`: intermediate factor graph messages used inside `Game`. +- **`MarginFactor`** (`factor/margin.rs`) — Gaussian observation factor on a diff variable; engaged by `Outcome::Scored`. - **`lib.rs`** — exports the public API (`Game`, `Gaussian`, `History`, `Player`) and standalone functions (`quality()`, `pdf()`, `cdf()`, `erfc()`). Also defines global defaults: `MU=0.0`, `SIGMA=6.0`, `BETA=1.0`, `GAMMA=0.03`, `P_DRAW=0.0`, `EPSILON=1e-6`, `ITERATIONS=30`. ### Key design points diff --git a/Cargo.toml b/Cargo.toml index 51da65d..b5feefe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,10 @@ harness = false name = "history_converge" harness = false +[[bench]] +name = "scored" +harness = false + [dependencies] approx = { version = "0.5.1", optional = true } rayon = { version = "1", optional = true } diff --git a/README.md b/README.md index 84a190e..cf009c1 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,27 @@ let h = History::builder() .build(); ``` +## Scored outcomes + +Use `Outcome::scores([...])` when you have continuous per-team scores rather +than just ranks. Adjacent score margins flow into a `MarginFactor` that adds +soft Gaussian evidence about the latent performance diff. Configure +`HistoryBuilder::score_sigma(σ)` to control how much you trust the margins +(smaller σ = more trust). + +```rust +use trueskill_tt::{History, Outcome}; + +let mut h = History::builder().score_sigma(2.0).build(); +h.event(1) + .team(["alice"]) + .team(["bob"]) + .scores([21.0, 9.0]) + .commit() + .unwrap(); +h.converge().unwrap(); +``` + ## Todo - [x] Implement approx for Gaussian diff --git a/benches/batch.rs b/benches/batch.rs index 7bc0bc0..e480b9d 100644 --- a/benches/batch.rs +++ b/benches/batch.rs @@ -1,7 +1,7 @@ use criterion::{Criterion, criterion_group, criterion_main}; use trueskill_tt::{ - BETA, Competitor, GAMMA, KeyTable, MU, P_DRAW, Rating, SIGMA, TimeSlice, drift::ConstantDrift, - gaussian::Gaussian, storage::CompetitorStore, + BETA, Competitor, EventKind, GAMMA, KeyTable, MU, P_DRAW, Rating, SIGMA, TimeSlice, + drift::ConstantDrift, gaussian::Gaussian, storage::CompetitorStore, }; fn criterion_benchmark(criterion: &mut Criterion) { @@ -33,8 +33,10 @@ fn criterion_benchmark(criterion: &mut Criterion) { weights.push(vec![vec![1.0], vec![1.0]]); } + let kinds = vec![EventKind::Ranked; composition.len()]; + let mut time_slice = TimeSlice::new(1, P_DRAW); - time_slice.add_events(composition, results, weights, &agents); + time_slice.add_events(composition, results, weights, kinds, &agents); criterion.bench_function("Batch::iteration", |b| { b.iter(|| time_slice.iteration(0, &agents)) diff --git a/benches/scored.rs b/benches/scored.rs new file mode 100644 index 0000000..19883f2 --- /dev/null +++ b/benches/scored.rs @@ -0,0 +1,38 @@ +use criterion::{Criterion, criterion_group, criterion_main}; +use smallvec::smallvec; +use trueskill_tt::{ConstantDrift, Event, History, Member, Outcome, Team}; + +fn bench_scored_history(c: &mut Criterion) { + c.bench_function("scored_history_60_events_30_iter", |bencher| { + bencher.iter(|| { + let mut h: History = History::builder_with_key() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.03)) + .score_sigma(2.0) + .build(); + + let mut events: Vec> = Vec::with_capacity(60); + for i in 0..60 { + let a = format!("p{}", i % 20); + let b = format!("p{}", (i + 7) % 20); + let s_a = (i as f64 * 0.3).sin().abs() * 21.0; + let s_b = (i as f64 * 0.3).cos().abs() * 21.0; + events.push(Event { + time: 1 + (i / 6) as i64, + teams: smallvec![ + Team::with_members([Member::new(a)]), + Team::with_members([Member::new(b)]), + ], + outcome: Outcome::scores([s_a, s_b]), + }); + } + h.add_events(events).unwrap(); + h.converge().unwrap(); + }); + }); +} + +criterion_group!(benches, bench_scored_history); +criterion_main!(benches); diff --git a/benches/scored_baseline.txt b/benches/scored_baseline.txt new file mode 100644 index 0000000..0cb4dac --- /dev/null +++ b/benches/scored_baseline.txt @@ -0,0 +1,14 @@ + Finished `bench` profile [optimized + debuginfo] target(s) in 0.02s + Running benches/scored.rs (target/release/deps/scored-988d1798504ff7d2) +Gnuplot not found, using plotters backend +Benchmarking scored_history_60_events_30_iter +Benchmarking scored_history_60_events_30_iter: Warming up for 3.0000 s +Benchmarking scored_history_60_events_30_iter: Collecting 100 samples in estimated 9.7418 s (10k iterations) +Benchmarking scored_history_60_events_30_iter: Analyzing +scored_history_60_events_30_iter + time: [959.36 µs 962.68 µs 966.13 µs] +Found 11 outliers among 100 measurements (11.00%) + 1 (1.00%) low mild + 5 (5.00%) high mild + 5 (5.00%) high severe + diff --git a/docs/superpowers/plans/2026-04-27-t4-margin-factor.md b/docs/superpowers/plans/2026-04-27-t4-margin-factor.md new file mode 100644 index 0000000..52997b7 --- /dev/null +++ b/docs/superpowers/plans/2026-04-27-t4-margin-factor.md @@ -0,0 +1,1976 @@ +# T4 — MarginFactor + Outcome::Scored Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a `MarginFactor` (Gaussian observation factor on a diff variable) and an `Outcome::Scored(scores)` variant, so users can supply continuous per-team scores instead of just ranks. Per-pair score margins become soft EP evidence about the latent performance diff. + +**Architecture:** +- Sort scored teams by score descending; for each adjacent pair compute `m_obs = score_higher − score_lower ≥ 0`. Per pair: `RankDiffFactor` writes `diff = team_a − team_b`, then a `MarginFactor` multiplies in the Gaussian observation `N(m_obs, score_sigma²)`. This replaces the `TruncFactor` for scored outcomes; ranked outcomes are unchanged. +- A new internal enum `DiffFactor { Trunc(TruncFactor), Margin(MarginFactor) }` lets `Game::likelihoods` keep its single hand-rolled forward/backward sweep loop while dispatching the per-diff factor by enum. +- `score_sigma` is configurable on `GameOptions` and `HistoryBuilder` (default `1.0`). +- `Outcome` is already `#[non_exhaustive]`, so adding `Scored` is non-breaking for downstream `match` arms. + +**Tech Stack:** Rust 2024, smallvec, rayon (already in tree). No new crate dependencies. + +--- + +## File Structure + +| Path | Status | Responsibility | +|---|---|---| +| `src/factor/margin.rs` | **create** | `MarginFactor` struct + `Factor` impl + cavity-cached evidence + unit tests | +| `src/factor/mod.rs` | modify | `pub mod margin;`, `BuiltinFactor::Margin(...)` variant + dispatch arms | +| `src/factors.rs` | modify | re-export `MarginFactor` | +| `src/outcome.rs` | modify | `Outcome::Scored(SmallVec<[f64; 4]>)` variant, `scores()` ctor, `as_scores()` accessor, `team_count` arm | +| `src/game.rs` | modify | `pub(crate) enum DiffFactor`, scored path in `likelihoods`, `Game::scored()` ctor, `GameOptions::score_sigma` | +| `src/event_builder.rs` | modify | `.scores([...])` builder method | +| `src/history.rs` | modify | match `Outcome::Scored` in `add_events`; `HistoryBuilder::score_sigma`; new internal `add_events_scored_with_prior` (or extra arg) | +| `tests/scored.rs` | **create** | end-to-end Scored integration tests | +| `examples/scored.rs` | **create** | worked example using `Outcome::Scored` | +| `benches/scored.rs` | **create** | criterion benchmark mirroring `batch.rs` with scored events | +| `CLAUDE.md` | modify | mark T4-MarginFactor complete in the architecture notes | + +--- + +## Background — math the implementer needs + +For a diff variable `D` with current marginal `D_marg`, the MarginFactor models an observation `m_obs ~ N(D, σ²)` where `σ = score_sigma`. Standard EP for a Gaussian-likelihood factor: + +1. **Cavity:** `D_cav = D_marg / msg` (where `msg` is this factor's stored outgoing message; init `N_INF` so the first cavity = the current marginal). +2. **Tilted distribution:** `D_cav · N(m_obs, σ²)` — a product of two Gaussians; closed-form, no approximation needed (so it converges in one propagation). +3. **New marginal:** the tilted distribution. +4. **New outgoing message:** `new_msg = new_marginal / D_cav`. Because the tilted distribution is exact, `new_msg = N(m_obs, σ²)` (a constant in `m_obs` and `σ`). +5. **Cavity evidence:** `Z_cav = pdf(m_obs; D_cav.mu(), sqrt(D_cav.sigma()² + σ²))` (the marginal likelihood of `m_obs` under the cavity). Cache on first propagate, identical to `TruncFactor`'s pattern. `log_evidence = Z_cav.ln()`. + +Practical consequence: `MarginFactor::propagate` returns a non-zero delta on its first call (because `msg` jumps from `N_INF` to `N(m_obs, σ²)`) and exactly zero afterwards, since `new_msg` is a constant. + +A Gaussian `N(m, σ)` constructed via `Gaussian::from_ms(m, σ)`. Multiplication adds nat-params (`pi += other.pi; tau += other.tau`). Division subtracts. The `pdf(x, mu, sigma)` helper already exists in `lib.rs` (private, but importable as `crate::pdf`). + +**Concrete numerical check for tests:** With cavity `N(0, 6)` and observation `m_obs=5, σ=1`: +- `D_cav.pi = 1/36 ≈ 0.027778`, `D_cav.tau = 0`. +- New marginal: `pi = 0.027778 + 1 = 1.027778`, `tau = 0 + 5 = 5`. So `mu = 5 / 1.027778 ≈ 4.864865`, `sigma = 1/sqrt(1.027778) ≈ 0.986394`. +- `Z_cav = pdf(5, 0, sqrt(36 + 1)) = pdf(5, 0, sqrt(37)) ≈ 0.046827`. So `log_evidence ≈ -3.0613`. + +--- + +### Task 1: `MarginFactor` core (file + struct + Factor impl + unit tests) + +**Files:** +- Create: `src/factor/margin.rs` +- Modify: `src/factor/mod.rs:100-102` (add `pub mod margin;` next to the existing `pub mod` lines) + +- [ ] **Step 1: Add the module declaration so the new file compiles** + +In `src/factor/mod.rs`, find the existing block: + +```rust +pub mod rank_diff; +pub mod team_sum; +pub mod trunc; +``` + +Replace with: + +```rust +pub mod margin; +pub mod rank_diff; +pub mod team_sum; +pub mod trunc; +``` + +- [ ] **Step 2: Create `src/factor/margin.rs` with the failing tests first** + +```rust +use crate::{ + N_INF, cdf, pdf, + factor::{Factor, VarId, VarStore}, + gaussian::Gaussian, +}; + +/// Gaussian observation factor on a diff variable. +/// +/// Encodes the soft evidence `m_obs ~ N(diff, sigma²)`. The outgoing message +/// to `diff` is the constant `N(m_obs, sigma²)`, so this factor converges in a +/// single propagation: subsequent calls return a zero delta. +#[derive(Debug)] +pub struct MarginFactor { + pub diff: VarId, + pub m_obs: f64, + pub sigma: f64, + pub(crate) msg: Gaussian, + pub(crate) evidence_cached: Option, +} + +impl MarginFactor { + pub fn new(diff: VarId, m_obs: f64, sigma: f64) -> Self { + debug_assert!(sigma > 0.0, "score sigma must be positive"); + Self { + diff, + m_obs, + sigma, + msg: N_INF, + evidence_cached: None, + } + } +} + +impl Factor for MarginFactor { + fn propagate(&mut self, vars: &mut VarStore) -> (f64, f64) { + let marginal = vars.get(self.diff); + let cavity = marginal / self.msg; + + if self.evidence_cached.is_none() { + self.evidence_cached = Some(cavity_evidence(cavity, self.m_obs, self.sigma)); + } + + let new_msg = Gaussian::from_ms(self.m_obs, self.sigma); + let new_marginal = cavity * new_msg; + let old_msg = self.msg; + self.msg = new_msg; + vars.set(self.diff, new_marginal); + + old_msg.delta(new_msg) + } + + fn log_evidence(&self, _vars: &VarStore) -> f64 { + self.evidence_cached.unwrap_or(1.0).ln() + } +} + +fn cavity_evidence(cavity: Gaussian, m_obs: f64, sigma: f64) -> f64 { + let combined_sigma = (cavity.sigma().powi(2) + sigma.powi(2)).sqrt(); + pdf(m_obs, cavity.mu(), combined_sigma) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn first_propagate_writes_tilted_marginal() { + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = MarginFactor::new(diff, 5.0, 1.0); + + f.propagate(&mut vars); + + let result = vars.get(diff); + // pi = 1/36 + 1 ≈ 1.027778; tau = 0 + 5 = 5 + // mu = 5 / 1.027778 ≈ 4.864865; sigma = 1/sqrt(1.027778) ≈ 0.986394 + assert!((result.mu() - 4.864864864864865).abs() < 1e-12); + assert!((result.sigma() - 0.986393923832144).abs() < 1e-12); + } + + #[test] + fn converges_in_one_step() { + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = MarginFactor::new(diff, 5.0, 1.0); + + f.propagate(&mut vars); + let (dmu, dsig) = f.propagate(&mut vars); + assert!(dmu < 1e-12, "expected ~0 delta on second propagate, got {dmu}"); + assert!(dsig < 1e-12); + } + + #[test] + fn evidence_cached_on_first_propagate() { + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = MarginFactor::new(diff, 5.0, 1.0); + assert!(f.evidence_cached.is_none()); + + f.propagate(&mut vars); + let z = f.evidence_cached.unwrap(); + // pdf(5, 0, sqrt(37)) ≈ 0.046827 + assert!((z - 0.04682752233851171).abs() < 1e-10); + + // Subsequent propagations don't change it. + f.propagate(&mut vars); + assert_eq!(f.evidence_cached.unwrap(), z); + } + + #[test] + fn log_evidence_matches_cached_ln() { + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = MarginFactor::new(diff, 5.0, 1.0); + f.propagate(&mut vars); + let logz = f.log_evidence(&vars); + assert!((logz - (-3.061357379815869)).abs() < 1e-10); + } + + // Silence unused-import warning for cdf until/if a tie-band variant is added. + #[allow(dead_code)] + fn _cdf_smoke() -> f64 { + cdf(0.0, 0.0, 1.0) + } +} +``` + +> Note: the unused `cdf` import keeps parity with `trunc.rs` style and reserves the spot if a tie-band MarginFactor variant gets added later. If you'd rather drop it, remove the `cdf` from the import list and delete `_cdf_smoke`. + +- [ ] **Step 3: Run the new tests to verify they pass once added (after Step 2 they will pass; this step is the guard)** + +Run: `cargo test --lib factor::margin` + +Expected: 4 passed. + +- [ ] **Step 4: Verify the module compiles cleanly with no warnings** + +Run: `cargo build` and `cargo clippy --lib -- -D warnings` + +Expected: no warnings, no errors. + +- [ ] **Step 5: Format and commit** + +```bash +cargo +nightly fmt +git add src/factor/margin.rs src/factor/mod.rs +git commit -m "feat(factor): add MarginFactor for scored-margin EP evidence" +``` + +--- + +### Task 2: Wire `MarginFactor` into `BuiltinFactor` enum dispatch + +**Files:** +- Modify: `src/factor/mod.rs:76-98` (the `BuiltinFactor` enum and its `Factor` impl) +- Modify: `src/factors.rs:7-13` (the public re-export list) + +- [ ] **Step 1: Write a failing dispatch test in `src/factor/mod.rs`** + +Open `src/factor/mod.rs`. Inside the existing `#[cfg(test)] mod tests { ... }` block (around line 105), add: + +```rust +#[test] +fn builtin_factor_dispatches_to_margin() { + use super::margin::MarginFactor; + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = BuiltinFactor::Margin(MarginFactor::new(diff, 5.0, 1.0)); + + f.propagate(&mut vars); + + let result = vars.get(diff); + assert!((result.mu() - 4.864864864864865).abs() < 1e-12); + + let logz = f.log_evidence(&vars); + assert!((logz - (-3.061357379815869)).abs() < 1e-10); +} +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `cargo test --lib factor::tests::builtin_factor_dispatches_to_margin` + +Expected: FAIL with `no variant named Margin found for enum BuiltinFactor`. + +- [ ] **Step 3: Add the enum variant + Factor impl arms** + +Replace the current `BuiltinFactor` definition and its `Factor` impl (currently `src/factor/mod.rs:76-98`): + +```rust +/// Enum dispatcher for the built-in factor types. +/// +/// Using an enum instead of `Box` keeps factor data inline and +/// avoids virtual-call overhead in the hot inference loop. +#[derive(Debug)] +pub enum BuiltinFactor { + TeamSum(team_sum::TeamSumFactor), + RankDiff(rank_diff::RankDiffFactor), + Trunc(trunc::TruncFactor), + Margin(margin::MarginFactor), +} + +impl Factor for BuiltinFactor { + fn propagate(&mut self, vars: &mut VarStore) -> (f64, f64) { + match self { + Self::TeamSum(f) => f.propagate(vars), + Self::RankDiff(f) => f.propagate(vars), + Self::Trunc(f) => f.propagate(vars), + Self::Margin(f) => f.propagate(vars), + } + } + + fn log_evidence(&self, vars: &VarStore) -> f64 { + match self { + Self::Trunc(f) => f.log_evidence(vars), + Self::Margin(f) => f.log_evidence(vars), + _ => 0.0, + } + } +} +``` + +- [ ] **Step 4: Re-export `MarginFactor` from `src/factors.rs`** + +Replace the body of `src/factors.rs` (lines 7-13) with: + +```rust +pub use crate::{ + factor::{ + BuiltinFactor, Factor, VarId, VarStore, margin::MarginFactor, + rank_diff::RankDiffFactor, team_sum::TeamSumFactor, trunc::TruncFactor, + }, + schedule::{EpsilonOrMax, Schedule, ScheduleReport}, +}; +``` + +- [ ] **Step 5: Run the test to verify it passes** + +Run: `cargo test --lib factor::tests::builtin_factor_dispatches_to_margin` + +Expected: PASS. + +- [ ] **Step 6: Run the full lib test suite to confirm no regressions** + +Run: `cargo test --lib` + +Expected: all tests pass (current count + 5 new from Tasks 1–2). + +- [ ] **Step 7: Format and commit** + +```bash +cargo +nightly fmt +git add src/factor/mod.rs src/factors.rs +git commit -m "feat(factor): dispatch MarginFactor through BuiltinFactor enum" +``` + +--- + +### Task 3: Add `Outcome::Scored` variant and accessors + +**Files:** +- Modify: `src/outcome.rs` + +- [ ] **Step 1: Write failing tests in `src/outcome.rs`** + +Add to the existing `#[cfg(test)] mod tests { ... }` block (after `winner_out_of_range_panics`, around line 86): + +```rust +#[test] +fn scored_two_teams() { + let o = Outcome::scores([10.0, 4.0]); + assert_eq!(o.team_count(), 2); + assert_eq!(o.as_scores(), Some(&[10.0, 4.0][..])); + assert_eq!(o.as_ranks(), None); +} + +#[test] +fn scored_team_count_matches_input() { + let o = Outcome::scores([3.0, 1.0, 2.0, 0.0]); + assert_eq!(o.team_count(), 4); +} + +#[test] +fn ranked_as_scores_returns_none() { + let o = Outcome::winner(0, 2); + assert!(o.as_scores().is_none()); + assert!(o.as_ranks().is_some()); +} +``` + +- [ ] **Step 2: Run the tests to verify they fail** + +Run: `cargo test --lib outcome::tests` + +Expected: FAIL — `no function or associated item named scores found`, etc. + +- [ ] **Step 3: Implement the `Scored` variant and helpers** + +Replace the body of `src/outcome.rs` with: + +```rust +//! Outcome of a match. +//! +//! `Ranked(ranks)` for ordinal results; `Scored(scores)` for continuous +//! per-team scores (engages `MarginFactor` in the engine). + +use smallvec::SmallVec; + +/// Final outcome of a match. +/// +/// `Ranked(ranks)`: lower rank = better. Equal ranks mean a tie between those +/// teams. `ranks.len()` must equal the number of teams in the event. +/// +/// `Scored(scores)`: higher score = better. Adjacent (sorted) pairs feed +/// observed margins to `MarginFactor`. `scores.len()` must equal the number +/// of teams in the event. +#[derive(Clone, Debug, PartialEq)] +#[non_exhaustive] +pub enum Outcome { + Ranked(SmallVec<[u32; 4]>), + Scored(SmallVec<[f64; 4]>), +} + +impl Outcome { + /// `n`-team outcome where team `winner` won and everyone else tied for last. + /// + /// Panics if `winner >= n`. + pub fn winner(winner: u32, n: u32) -> Self { + assert!(winner < n, "winner index {winner} out of range 0..{n}"); + let ranks: SmallVec<[u32; 4]> = (0..n).map(|i| if i == winner { 0 } else { 1 }).collect(); + Self::Ranked(ranks) + } + + /// All `n` teams tied. + pub fn draw(n: u32) -> Self { + Self::Ranked(SmallVec::from_vec(vec![0; n as usize])) + } + + /// Explicit per-team ranking. + pub fn ranking>(ranks: I) -> Self { + Self::Ranked(ranks.into_iter().collect()) + } + + /// Explicit per-team continuous scores; higher = better. + pub fn scores>(scores: I) -> Self { + Self::Scored(scores.into_iter().collect()) + } + + pub fn team_count(&self) -> usize { + match self { + Self::Ranked(r) => r.len(), + Self::Scored(s) => s.len(), + } + } + + pub(crate) fn as_ranks(&self) -> Option<&[u32]> { + match self { + Self::Ranked(r) => Some(r), + Self::Scored(_) => None, + } + } + + pub(crate) fn as_scores(&self) -> Option<&[f64]> { + match self { + Self::Scored(s) => Some(s), + Self::Ranked(_) => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn winner_two_teams() { + let o = Outcome::winner(0, 2); + assert_eq!(o.as_ranks(), Some(&[0u32, 1][..])); + assert_eq!(o.team_count(), 2); + } + + #[test] + fn winner_three_teams_second_wins() { + let o = Outcome::winner(1, 3); + assert_eq!(o.as_ranks(), Some(&[1u32, 0, 1][..])); + } + + #[test] + fn draw_three_teams() { + let o = Outcome::draw(3); + assert_eq!(o.as_ranks(), Some(&[0u32, 0, 0][..])); + } + + #[test] + fn ranking_from_iter() { + let o = Outcome::ranking([2, 0, 1]); + assert_eq!(o.as_ranks(), Some(&[2u32, 0, 1][..])); + } + + #[test] + #[should_panic(expected = "winner index 2 out of range")] + fn winner_out_of_range_panics() { + let _ = Outcome::winner(2, 2); + } + + #[test] + fn scored_two_teams() { + let o = Outcome::scores([10.0, 4.0]); + assert_eq!(o.team_count(), 2); + assert_eq!(o.as_scores(), Some(&[10.0, 4.0][..])); + assert_eq!(o.as_ranks(), None); + } + + #[test] + fn scored_team_count_matches_input() { + let o = Outcome::scores([3.0, 1.0, 2.0, 0.0]); + assert_eq!(o.team_count(), 4); + } + + #[test] + fn ranked_as_scores_returns_none() { + let o = Outcome::winner(0, 2); + assert!(o.as_scores().is_none()); + assert!(o.as_ranks().is_some()); + } +} +``` + +> Note: the existing `as_ranks` returned `&[u32]` and was `#[allow(dead_code)]`. The new signature returns `Option<&[u32]>` because `Ranked` is no longer the only variant. All in-tree call sites that used `as_ranks()` (we'll update them in later tasks) must now handle the `Option`. + +- [ ] **Step 4: Run the outcome tests to verify they pass** + +Run: `cargo test --lib outcome` + +Expected: 8 passed. + +- [ ] **Step 5: Update existing call sites to handle the new `Option<&[u32]>` return** + +Two call sites use `as_ranks()` today. Update each to expect `Option`: + +In `src/history.rs:672`, change: + +```rust +let ranks = ev.outcome.as_ranks(); +if ranks.len() != ev.teams.len() { +``` + +to: + +```rust +let ranks = match ev.outcome.as_ranks() { + Some(r) => r, + None => { + // Scored path will be wired in Task 7; for now it's an error. + return Err(InferenceError::MismatchedShape { + kind: "outcome variant", + expected: 0, + got: 0, + }); + } +}; +if ranks.len() != ev.teams.len() { +``` + +In `src/history.rs:701`, change: + +```rust +let max_rank = ranks.iter().copied().max().unwrap_or(0) as f64; +let inverted: Vec = ranks.iter().map(|&r| max_rank - r as f64).collect(); +``` + +(no change needed — `ranks` is already `&[u32]` here). + +In `src/game.rs:312`, change: + +```rust +let ranks = outcome.as_ranks(); +let max_rank = ranks.iter().copied().max().unwrap_or(0) as f64; +let result: Vec = ranks.iter().map(|&r| max_rank - r as f64).collect(); +``` + +to: + +```rust +let ranks = outcome.as_ranks().ok_or(crate::InferenceError::MismatchedShape { + kind: "Game::ranked requires Outcome::Ranked", + expected: 0, + got: 0, +})?; +let max_rank = ranks.iter().copied().max().unwrap_or(0) as f64; +let result: Vec = ranks.iter().map(|&r| max_rank - r as f64).collect(); +``` + +- [ ] **Step 6: Verify the full lib still compiles and tests pass** + +Run: `cargo test --lib` + +Expected: all tests pass (call sites updated cleanly). + +- [ ] **Step 7: Format and commit** + +```bash +cargo +nightly fmt +git add src/outcome.rs src/history.rs src/game.rs +git commit -m "feat(outcome): add Scored variant; switch as_ranks/as_scores to Option" +``` + +--- + +### Task 4: Internal `DiffFactor` enum to dispatch Trunc vs Margin per-pair + +**Files:** +- Modify: `src/game.rs` (top of file, before `Game` impl) + +- [ ] **Step 1: Write a failing test in `src/game.rs`'s test module** + +In the `#[cfg(test)] mod tests { ... }` block at the bottom of `src/game.rs`, add (after `test_2vs2_weighted`): + +```rust +#[test] +fn diff_factor_dispatch_trunc_and_margin() { + use crate::factor::{margin::MarginFactor, trunc::TruncFactor, VarStore}; + use super::DiffFactor; + + let mut vars = VarStore::new(); + let dt = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let dm = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + + let mut t = DiffFactor::Trunc(TruncFactor::new(dt, 0.0, false)); + let mut m = DiffFactor::Margin(MarginFactor::new(dm, 5.0, 1.0)); + + let _ = t.propagate(&mut vars); + let _ = m.propagate(&mut vars); + + // Smoke: both diffs got written; their msgs are non-N_INF. + assert!(t.msg().pi() > 0.0); + assert!(m.msg().pi() > 0.0); + assert_eq!(t.diff(), dt); + assert_eq!(m.diff(), dm); +} +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `cargo test --lib game::tests::diff_factor_dispatch_trunc_and_margin` + +Expected: FAIL — `cannot find type DiffFactor in this scope`. + +- [ ] **Step 3: Add the `DiffFactor` enum at the top of `src/game.rs`** + +Insert after the existing `use` block (around line 14, before `pub struct GameOptions`): + +```rust +use crate::factor::margin::MarginFactor; + +/// Per-adjacent-pair link factor in the game's diff chain. +/// +/// `Trunc` is used for `Outcome::Ranked` (rank-based truncation). +/// `Margin` is used for `Outcome::Scored` (Gaussian observation on the diff). +#[derive(Debug)] +pub(crate) enum DiffFactor { + Trunc(TruncFactor), + Margin(MarginFactor), +} + +impl DiffFactor { + pub(crate) fn diff(&self) -> crate::factor::VarId { + match self { + Self::Trunc(f) => f.diff, + Self::Margin(f) => f.diff, + } + } + + pub(crate) fn msg(&self) -> Gaussian { + match self { + Self::Trunc(f) => f.msg, + Self::Margin(f) => f.msg, + } + } + + pub(crate) fn evidence(&self) -> f64 { + match self { + Self::Trunc(f) => f.evidence_cached.unwrap_or(1.0), + Self::Margin(f) => f.evidence_cached.unwrap_or(1.0), + } + } + + pub(crate) fn propagate(&mut self, vars: &mut crate::factor::VarStore) -> (f64, f64) { + use crate::factor::Factor; + match self { + Self::Trunc(f) => f.propagate(vars), + Self::Margin(f) => f.propagate(vars), + } + } +} +``` + +- [ ] **Step 4: Refactor `Game::likelihoods` to drive `Vec` instead of `Vec`** + +This is a mechanical rename inside `Game::likelihoods` (currently `src/game.rs:135-273`). The loop logic is unchanged; we just move the per-pair object behind the enum. Replace the body of `Game::likelihoods` from where `let mut trunc: Vec = ...` is constructed (around line 160) to its last use (around line 243): + +```rust + // One DiffFactor per adjacent sorted-team pair; each owns a diff VarId. + let mut links: Vec = (0..n_diffs) + .map(|i| { + let tie = self.result[arena.sort_buf[i]] == self.result[arena.sort_buf[i + 1]]; + let margin = if self.p_draw == 0.0 { + 0.0 + } else { + let a: f64 = self.teams[arena.sort_buf[i]] + .iter() + .map(|p| p.beta.powi(2)) + .sum(); + let b: f64 = self.teams[arena.sort_buf[i + 1]] + .iter() + .map(|p| p.beta.powi(2)) + .sum(); + compute_margin(self.p_draw, (a + b).sqrt()) + }; + let vid = arena.vars.alloc(N_INF); + DiffFactor::Trunc(TruncFactor::new(vid, margin, tie)) + }) + .collect(); + + // Per-team messages from neighbouring RankDiff factors (replaces TeamMessage). + arena.lhood_lose.resize(n_teams, N_INF); + arena.lhood_win.resize(n_teams, N_INF); + + let mut step = (f64::INFINITY, f64::INFINITY); + let mut iter = 0; + + while tuple_gt(step, 1e-6) && iter < 10 { + step = (0.0_f64, 0.0_f64); + + for (e, lf) in links[..n_diffs.saturating_sub(1)].iter_mut().enumerate() { + let pw = arena.team_prior[e] * arena.lhood_lose[e]; + let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1]; + let raw = pw - pl; + arena.vars.set(lf.diff(), raw * lf.msg()); + let d = lf.propagate(&mut arena.vars); + step = tuple_max(step, d); + + let new_ll = pw - lf.msg(); + step = tuple_max(step, arena.lhood_lose[e + 1].delta(new_ll)); + arena.lhood_lose[e + 1] = new_ll; + } + + for (rev_i, lf) in links[1..].iter_mut().rev().enumerate() { + let e = n_diffs - 1 - rev_i; + let pw = arena.team_prior[e] * arena.lhood_lose[e]; + let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1]; + let raw = pw - pl; + arena.vars.set(lf.diff(), raw * lf.msg()); + let d = lf.propagate(&mut arena.vars); + step = tuple_max(step, d); + + let new_lw = pl + lf.msg(); + step = tuple_max(step, arena.lhood_win[e].delta(new_lw)); + arena.lhood_win[e] = new_lw; + } + + iter += 1; + } + + if n_diffs == 1 { + let raw = (arena.team_prior[0] * arena.lhood_lose[0]) + - (arena.team_prior[1] * arena.lhood_win[1]); + arena.vars.set(links[0].diff(), raw * links[0].msg()); + links[0].propagate(&mut arena.vars); + } + + if n_diffs > 0 { + let pl1 = arena.team_prior[1] * arena.lhood_win[1]; + arena.lhood_win[0] = pl1 + links[0].msg(); + let pw_last = arena.team_prior[n_teams - 2] * arena.lhood_lose[n_teams - 2]; + arena.lhood_lose[n_teams - 1] = pw_last - links[n_diffs - 1].msg(); + } + + self.evidence = links.iter().map(|l| l.evidence()).product(); +``` + +(Everything below the evidence line is unchanged.) Also remove the now-unused `use crate::factor::trunc::TruncFactor;` from the file's top imports if it becomes unused — but we still construct `TruncFactor` directly above, so it stays. + +- [ ] **Step 5: Run the full lib test suite to verify the refactor preserves all golden values** + +Run: `cargo test --lib` + +Expected: all tests pass with **identical** assertions — this is a pure refactor. + +- [ ] **Step 6: Run the integration tests** + +Run: `cargo test` + +Expected: all pass. + +- [ ] **Step 7: Format and commit** + +```bash +cargo +nightly fmt +git add src/game.rs +git commit -m "refactor(game): dispatch per-diff link factors via DiffFactor enum" +``` + +--- + +### Task 5: Add `score_sigma` to `GameOptions` and the scored path in `Game::likelihoods` + +**Files:** +- Modify: `src/game.rs` + +- [ ] **Step 1: Write a failing test for the scored path** + +In `src/game.rs`'s test module, after the new dispatch test from Task 4, add: + +```rust +#[test] +fn scored_path_sharper_when_margin_is_large() { + // Same prior on both sides; large positive observed margin should pull + // team A above team B. + let prior = R::new( + Gaussian::from_ms(25.0, 25.0 / 3.0), + 25.0 / 6.0, + ConstantDrift(25.0 / 300.0), + ); + let teams = vec![vec![prior], vec![prior]]; + let result = vec![10.0, 0.0]; // a beat b by 10 + let weights = [vec![1.0], vec![1.0]]; + let mut arena = ScratchArena::new(); + let g = Game::scored_with_arena( + teams, + &result, + &weights, + 1.0, // score_sigma + &mut arena, + ); + let p = g.posteriors(); + let a = p[0][0]; + let b = p[1][0]; + assert!(a.mu() > b.mu(), "expected team a posterior mu > team b; got {} vs {}", a.mu(), b.mu()); + + // Tighter score_sigma should produce a stronger update. + let mut arena2 = ScratchArena::new(); + let g_tight = Game::scored_with_arena( + vec![vec![prior], vec![prior]], + &result, + &weights, + 0.1, // tighter score_sigma + &mut arena2, + ); + let p_tight = g_tight.posteriors(); + let a_tight = p_tight[0][0]; + assert!(a_tight.mu() > a.mu(), "expected tighter sigma to push posterior further; {} vs {}", a_tight.mu(), a.mu()); +} +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `cargo test --lib game::tests::scored_path_sharper_when_margin_is_large` + +Expected: FAIL — `no function or associated item named scored_with_arena`. + +- [ ] **Step 3: Add `score_sigma` to `GameOptions`** + +Replace the `GameOptions` definition (around `src/game.rs:15-28`): + +```rust +#[derive(Clone, Copy, Debug)] +pub struct GameOptions { + pub p_draw: f64, + pub score_sigma: f64, + pub convergence: crate::ConvergenceOptions, +} + +impl Default for GameOptions { + fn default() -> Self { + Self { + p_draw: crate::P_DRAW, + score_sigma: 1.0, + convergence: crate::ConvergenceOptions::default(), + } + } +} +``` + +- [ ] **Step 4: Add `Game::scored_with_arena` and friends** + +In `Game<'a, T, D>`'s `impl` block (the one with `ranked_with_arena`, around `src/game.rs:90-133`), add a new method right after `ranked_with_arena`: + +```rust + pub(crate) fn scored_with_arena( + teams: Vec>>, + scores: &'a [f64], + weights: &'a [Vec], + score_sigma: f64, + arena: &mut ScratchArena, + ) -> Self { + debug_assert!( + scores.len() == teams.len(), + "scores must have the same length as teams" + ); + debug_assert!( + weights + .iter() + .zip(teams.iter()) + .all(|(w, t)| w.len() == t.len()), + "weights must have the same dimensions as teams" + ); + debug_assert!(score_sigma > 0.0, "score_sigma must be positive"); + + let mut this = Self { + teams, + result: scores, + weights, + p_draw: 0.0, + likelihoods: Vec::new(), + evidence: 0.0, + }; + + this.likelihoods_scored(arena, score_sigma); + this + } +``` + +- [ ] **Step 5: Add `likelihoods_scored` (parallel to `likelihoods`)** + +Right after `fn likelihoods` (around line 273), add: + +```rust + fn likelihoods_scored(&mut self, arena: &mut ScratchArena, score_sigma: f64) { + arena.reset(); + + let n_teams = self.teams.len(); + + arena.sort_buf.extend(0..n_teams); + arena.sort_buf.sort_by(|&i, &j| { + self.result[j] + .partial_cmp(&self.result[i]) + .unwrap_or(Ordering::Equal) + }); + + arena.team_prior.extend(arena.sort_buf.iter().map(|&t| { + self.teams[t] + .iter() + .zip(self.weights[t].iter()) + .fold(N00, |p, (player, &w)| p + (player.performance() * w)) + })); + + let n_diffs = n_teams.saturating_sub(1); + + // One MarginFactor per adjacent sorted-team pair, observed m_obs ≥ 0. + let mut links: Vec = (0..n_diffs) + .map(|i| { + let m_obs = self.result[arena.sort_buf[i]] - self.result[arena.sort_buf[i + 1]]; + let vid = arena.vars.alloc(N_INF); + DiffFactor::Margin(MarginFactor::new(vid, m_obs, score_sigma)) + }) + .collect(); + + arena.lhood_lose.resize(n_teams, N_INF); + arena.lhood_win.resize(n_teams, N_INF); + + let mut step = (f64::INFINITY, f64::INFINITY); + let mut iter = 0; + + while tuple_gt(step, 1e-6) && iter < 10 { + step = (0.0_f64, 0.0_f64); + + for (e, lf) in links[..n_diffs.saturating_sub(1)].iter_mut().enumerate() { + let pw = arena.team_prior[e] * arena.lhood_lose[e]; + let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1]; + let raw = pw - pl; + arena.vars.set(lf.diff(), raw * lf.msg()); + let d = lf.propagate(&mut arena.vars); + step = tuple_max(step, d); + + let new_ll = pw - lf.msg(); + step = tuple_max(step, arena.lhood_lose[e + 1].delta(new_ll)); + arena.lhood_lose[e + 1] = new_ll; + } + + for (rev_i, lf) in links[1..].iter_mut().rev().enumerate() { + let e = n_diffs - 1 - rev_i; + let pw = arena.team_prior[e] * arena.lhood_lose[e]; + let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1]; + let raw = pw - pl; + arena.vars.set(lf.diff(), raw * lf.msg()); + let d = lf.propagate(&mut arena.vars); + step = tuple_max(step, d); + + let new_lw = pl + lf.msg(); + step = tuple_max(step, arena.lhood_win[e].delta(new_lw)); + arena.lhood_win[e] = new_lw; + } + + iter += 1; + } + + if n_diffs == 1 { + let raw = (arena.team_prior[0] * arena.lhood_lose[0]) + - (arena.team_prior[1] * arena.lhood_win[1]); + arena.vars.set(links[0].diff(), raw * links[0].msg()); + links[0].propagate(&mut arena.vars); + } + + if n_diffs > 0 { + let pl1 = arena.team_prior[1] * arena.lhood_win[1]; + arena.lhood_win[0] = pl1 + links[0].msg(); + let pw_last = arena.team_prior[n_teams - 2] * arena.lhood_lose[n_teams - 2]; + arena.lhood_lose[n_teams - 1] = pw_last - links[n_diffs - 1].msg(); + } + + self.evidence = links.iter().map(|l| l.evidence()).product(); + + arena.inv_buf.resize(n_teams, 0); + for (si, &orig_i) in arena.sort_buf.iter().enumerate() { + arena.inv_buf[orig_i] = si; + } + + self.likelihoods = self + .teams + .iter() + .zip(self.weights.iter()) + .enumerate() + .map(|(orig_i, (players, weights))| { + let si = arena.inv_buf[orig_i]; + let m = arena.lhood_win[si] * arena.lhood_lose[si]; + let performance = players + .iter() + .zip(weights.iter()) + .fold(N00, |p, (player, &w)| p + (player.performance() * w)); + players + .iter() + .zip(weights.iter()) + .map(|(player, &w)| { + ((m - performance.exclude(player.performance() * w)) * (1.0 / w)) + .forget(player.beta.powi(2)) + }) + .collect::>() + }) + .collect::>(); + } +``` + +> The body is identical to `likelihoods` except for the per-pair factor construction (no draw-margin computation, `MarginFactor` instead of `TruncFactor`). DRY would let us extract the loop, but the duplication is small (~50 lines) and the divergence may grow as more factor kinds are added; we accept it for clarity. Revisit in T4-Synergy if it gets unwieldy. + +- [ ] **Step 6: Run the test to verify it passes** + +Run: `cargo test --lib game::tests::scored_path_sharper_when_margin_is_large` + +Expected: PASS. + +- [ ] **Step 7: Run the full test suite** + +Run: `cargo test` + +Expected: all pass. + +- [ ] **Step 8: Format and commit** + +```bash +cargo +nightly fmt +git add src/game.rs +git commit -m "feat(game): add scored_with_arena driving MarginFactor links" +``` + +--- + +### Task 6: Public `Game::scored` constructor and `OwnedGame` support + +**Files:** +- Modify: `src/game.rs` + +- [ ] **Step 1: Write a failing test in `src/game.rs`'s test module** + +```rust +#[test] +fn game_scored_public_ctor() { + use crate::Outcome; + let prior = R::new( + Gaussian::from_ms(25.0, 25.0 / 3.0), + 25.0 / 6.0, + ConstantDrift(25.0 / 300.0), + ); + let opts = GameOptions { + score_sigma: 1.0, + ..GameOptions::default() + }; + let g = Game::scored(&[&[prior], &[prior]], Outcome::scores([8.0, 2.0]), &opts).unwrap(); + let p = g.posteriors(); + assert!(p[0][0].mu() > p[1][0].mu()); +} + +#[test] +fn game_scored_rejects_ranked_outcome() { + let prior = R::new( + Gaussian::from_ms(25.0, 25.0 / 3.0), + 25.0 / 6.0, + ConstantDrift(25.0 / 300.0), + ); + let err = Game::scored( + &[&[prior], &[prior]], + crate::Outcome::winner(0, 2), + &GameOptions::default(), + ) + .unwrap_err(); + assert!(matches!(err, crate::InferenceError::MismatchedShape { .. })); +} +``` + +- [ ] **Step 2: Run the tests to verify they fail** + +Run: `cargo test --lib game::tests::game_scored_public_ctor game::tests::game_scored_rejects_ranked_outcome` + +Expected: FAIL — `no function or associated item named scored`. + +- [ ] **Step 3: Add `OwnedGame::new_scored` constructor** + +In `OwnedGame`'s impl (around `src/game.rs:46-78`), add right after `new`: + +```rust + pub(crate) fn new_scored( + teams: Vec>>, + scores: Vec, + weights: Vec>, + score_sigma: f64, + ) -> Self { + let mut arena = ScratchArena::new(); + let g = Game::scored_with_arena(teams.clone(), &scores, &weights, score_sigma, &mut arena); + let likelihoods = g.likelihoods; + let evidence = g.evidence; + Self { + teams, + result: scores, + weights, + p_draw: 0.0, + likelihoods, + evidence, + } + } +``` + +- [ ] **Step 4: Add `Game::scored` public method** + +In the `impl> Game<'_, T, D>` block (around `src/game.rs:293-349`), add right after `ranked`: + +```rust + pub fn scored( + teams: &[&[Rating]], + outcome: crate::Outcome, + options: &GameOptions, + ) -> Result, crate::InferenceError> { + if options.score_sigma <= 0.0 { + return Err(crate::InferenceError::InvalidProbability { + value: options.score_sigma, + }); + } + if outcome.team_count() != teams.len() { + return Err(crate::InferenceError::MismatchedShape { + kind: "outcome scores vs teams", + expected: teams.len(), + got: outcome.team_count(), + }); + } + let scores = outcome + .as_scores() + .ok_or(crate::InferenceError::MismatchedShape { + kind: "Game::scored requires Outcome::Scored", + expected: 0, + got: 0, + })? + .to_vec(); + let teams_owned: Vec>> = teams.iter().map(|t| t.to_vec()).collect(); + let weights: Vec> = teams.iter().map(|t| vec![1.0; t.len()]).collect(); + Ok(OwnedGame::new_scored(teams_owned, scores, weights, options.score_sigma)) + } +``` + +- [ ] **Step 5: Run the new tests to verify they pass** + +Run: `cargo test --lib game::tests::game_scored_public_ctor game::tests::game_scored_rejects_ranked_outcome` + +Expected: both PASS. + +- [ ] **Step 6: Run the full test suite** + +Run: `cargo test` + +Expected: all pass. + +- [ ] **Step 7: Format and commit** + +```bash +cargo +nightly fmt +git add src/game.rs +git commit -m "feat(game): add public Game::scored constructor" +``` + +--- + +### Task 7: Plumb `Outcome::Scored` through `TimeSlice` and `History::add_events` + +**Files:** +- Modify: `src/time_slice.rs` +- Modify: `src/history.rs` + +The per-event `Event` struct in `src/time_slice.rs:80-85` is `{ teams, evidence, weights }`. We add a `kind: EventKind` field that selects which `Game::*_with_arena` to call. Score noise (`score_sigma`) lives inside the `Scored` variant so events can in principle have per-event sigma, though the public API only exposes one history-wide knob today. + +- [ ] **Step 1: Add `EventKind` to `src/time_slice.rs` and a `kind` field on `Event`** + +In `src/time_slice.rs`, immediately above the `struct Event` definition (currently around line 80), add: + +```rust +#[derive(Debug, Clone, Copy)] +pub(crate) enum EventKind { + Ranked, + Scored { score_sigma: f64 }, +} +``` + +Modify `struct Event` (currently lines 81-85) to: + +```rust +#[derive(Debug)] +pub(crate) struct Event { + teams: Vec, + evidence: f64, + weights: Vec>, + kind: EventKind, +} +``` + +- [ ] **Step 2: Dispatch on `kind` in `Event::iteration_direct`** + +Replace the body of `Event::iteration_direct` (currently `src/time_slice.rs:123-144`): + +```rust + fn iteration_direct>( + &mut self, + skills: &mut SkillStore, + agents: &CompetitorStore, + p_draw: f64, + arena: &mut ScratchArena, + ) { + let teams = self.within_priors(false, false, skills, agents); + let result = self.outputs(); + let g = match self.kind { + EventKind::Ranked => { + Game::ranked_with_arena(teams, &result, &self.weights, p_draw, arena) + } + EventKind::Scored { score_sigma } => { + Game::scored_with_arena(teams, &result, &self.weights, score_sigma, arena) + } + }; + + for (t, team) in self.teams.iter_mut().enumerate() { + for (i, item) in team.items.iter_mut().enumerate() { + let old_likelihood = skills.get(item.agent).unwrap().likelihood; + let new_likelihood = (old_likelihood / item.likelihood) * g.likelihoods[t][i]; + skills.get_mut(item.agent).unwrap().likelihood = new_likelihood; + item.likelihood = g.likelihoods[t][i]; + } + } + + self.evidence = g.evidence; + } +``` + +- [ ] **Step 3: Dispatch on `kind` in `TimeSlice::iteration` (sequential branch)** + +Inside `TimeSlice::iteration` (currently `src/time_slice.rs:295-325`), replace the body of the `if from > 0 || self.color_groups.is_empty()` branch's inner `for event in ...` loop. The `Game::ranked_with_arena(...)` call (lines 302-308) becomes: + +```rust + let g = match event.kind { + EventKind::Ranked => Game::ranked_with_arena( + teams, + &result, + &event.weights, + self.p_draw, + &mut self.arena, + ), + EventKind::Scored { score_sigma } => Game::scored_with_arena( + teams, + &result, + &event.weights, + score_sigma, + &mut self.arena, + ), + }; +``` + +(The rest of that loop body — likelihood update + `event.evidence = g.evidence` — is unchanged.) + +- [ ] **Step 4: Dispatch on `kind` in `TimeSlice::log_evidence`** + +`TimeSlice::log_evidence` (currently `src/time_slice.rs:467-532`) calls `Game::ranked_with_arena` in three places (lines 482-490, 506-514). For each, change to a match on `event.kind` mirroring Step 2. + +Add a helper inside the impl to keep the call sites tidy: + +```rust + fn run_event>( + &self, + event: &Event, + online: bool, + forward: bool, + agents: &CompetitorStore, + arena: &mut ScratchArena, + ) -> f64 { + let teams = event.within_priors(online, forward, &self.skills, agents); + let result = event.outputs(); + match event.kind { + EventKind::Ranked => { + Game::ranked_with_arena(teams, &result, &event.weights, self.p_draw, arena).evidence + } + EventKind::Scored { score_sigma } => { + Game::scored_with_arena(teams, &result, &event.weights, score_sigma, arena) + .evidence + } + } + } +``` + +Then replace the inline `Game::ranked_with_arena(...).evidence.ln()` calls with `self.run_event(event, online, forward, agents, &mut arena).ln()`. + +- [ ] **Step 5: Extend `TimeSlice::add_events` signature with per-event `kinds`** + +Change the `add_events` signature (currently `src/time_slice.rs:203-209`) to: + +```rust + pub fn add_events>( + &mut self, + composition: Vec>>, + results: Vec>, + weights: Vec>>, + kinds: Vec, + agents: &CompetitorStore, + ) { +``` + +Inside the same method, update the event-construction map (around line 240). Each constructed `Event` gets its kind from `kinds[e]`: + +```rust + Event { + teams, + evidence: 0.0, + weights, + kind: kinds[e], + } +``` + +- [ ] **Step 6: Update `TimeSlice::add_events`'s tests to pass the new argument** + +Three call sites in `src/time_slice.rs:604`, `:680`, `:759`, `:790`, `:855` (the unit tests `test_one_event_each`, `test_same_strength`, `test_add_events`, `time_slice_color_groups_reorders_events`) all call `time_slice.add_events(...)`. Add a fourth argument `vec![EventKind::Ranked; n_events]` between `weights` and `&agents` for each call. Example: + +```rust + time_slice.add_events( + vec![ + vec![vec![a], vec![b]], + vec![vec![c], vec![d]], + vec![vec![e], vec![f]], + ], + vec![vec![1.0, 0.0], vec![0.0, 1.0], vec![1.0, 0.0]], + vec![], + vec![EventKind::Ranked; 3], + &agents, + ); +``` + +- [ ] **Step 7: Update the `History` callers of `TimeSlice::add_events`** + +In `src/history.rs:562` and `:572`, the calls pass `composition, results, weights, &self.agents`. Add the kinds vector. We'll thread the per-event `EventKind` through `add_events_with_prior` in Step 8 and pass it in here as `kinds_chunk`. + +- [ ] **Step 8: Extend `History::add_events_with_prior` to accept and route per-event kinds** + +In `src/history.rs:447-454`, change the signature to: + +```rust + pub(crate) fn add_events_with_prior( + &mut self, + composition: Vec>>, + results: Vec>, + times: Vec, + weights: Vec>>, + kinds: Vec, + mut priors: HashMap>, + ) -> Result<(), InferenceError> { +``` + +Around line 543, alongside the existing per-batch slicing of `composition`, `results`, and `weights`, add: + +```rust + let kinds_chunk: Vec = + (i..j).map(|e| kinds[o[e]]).collect(); +``` + +Update the two `time_slice.add_events(composition, results, weights, &self.agents)` call sites (lines 562 and 572) to: + +```rust + time_slice.add_events(composition, results, weights, kinds_chunk, &self.agents); +``` + +(For both branches — existing-slice and new-slice. Use `kinds_chunk.clone()` if the borrow checker complains; the vec is small.) + +Validation: also add a length check at the top of the function alongside the existing ones: + +```rust + if !kinds.is_empty() && kinds.len() != composition.len() { + return Err(InferenceError::MismatchedShape { + kind: "kinds", + expected: composition.len(), + got: kinds.len(), + }); + } +``` + +- [ ] **Step 9: Update `record_winner` and `record_draw` to pass `kinds`** + +In `src/history.rs:617-647`, update both calls: + +```rust + self.add_events_with_prior( + vec![vec![vec![w], vec![l]]], + vec![vec![1.0, 0.0]], + vec![time], + vec![], + vec![crate::time_slice::EventKind::Ranked], + HashMap::new(), + ) +``` + +Same shape for `record_draw`. + +- [ ] **Step 10: Update `History::add_events` to compute kinds per event and pass through** + +Replace the placeholder match arm added in Task 3 Step 5 (around `src/history.rs:672-680`). The full updated event-loop body of `History::add_events` (around lines 671-705) becomes: + +```rust + let mut kinds: Vec = Vec::with_capacity(events.len()); + + for ev in events { + let team_count = ev.teams.len(); + + let (results_for_event, kind): (Vec, crate::time_slice::EventKind) = match &ev.outcome { + Outcome::Ranked(ranks) => { + if ranks.len() != team_count { + return Err(InferenceError::MismatchedShape { + kind: "outcome ranks vs teams", + expected: team_count, + got: ranks.len(), + }); + } + let max_rank = ranks.iter().copied().max().unwrap_or(0) as f64; + let inverted: Vec = ranks.iter().map(|&r| max_rank - r as f64).collect(); + (inverted, crate::time_slice::EventKind::Ranked) + } + Outcome::Scored(scores) => { + if scores.len() != team_count { + return Err(InferenceError::MismatchedShape { + kind: "outcome scores vs teams", + expected: team_count, + got: scores.len(), + }); + } + ( + scores.to_vec(), + crate::time_slice::EventKind::Scored { + score_sigma: self.score_sigma, + }, + ) + } + }; + + let mut event_comp: Vec> = Vec::with_capacity(team_count); + let mut event_weights: Vec> = Vec::with_capacity(team_count); + + for team in ev.teams { + let mut team_indices: Vec = Vec::with_capacity(team.members.len()); + let mut team_weights: Vec = Vec::with_capacity(team.members.len()); + for member in team.members { + let idx = self.keys.get_or_create(&member.key); + team_indices.push(idx); + team_weights.push(member.weight); + if let Some(prior) = member.prior { + priors.insert(idx, Rating::new(prior, self.beta, self.drift)); + } + } + event_comp.push(team_indices); + event_weights.push(team_weights); + } + composition.push(event_comp); + weights.push(event_weights); + results.push(results_for_event); + times.push(ev.time); + kinds.push(kind); + } + + self.add_events_with_prior(composition, results, times, weights, kinds, priors) +``` + +(Note `EventKind` needs to be re-exported from `time_slice`. Confirm `pub(crate) enum EventKind` in time_slice.rs is reachable from history.rs via `crate::time_slice::EventKind`.) + +- [ ] **Step 11: Add `score_sigma: f64` field to `History` and `HistoryBuilder`** + +In `src/history.rs:21-37` (`HistoryBuilder` struct), add field `score_sigma: f64,`. + +In the `Default` impl (around line 121), set `score_sigma: 1.0`. + +In `History::builder_with_key` (around line 170), set `score_sigma: 1.0`. + +In each builder transition method that constructs a new `HistoryBuilder` (`drift` at line 55, `observer` at line 85), copy the `score_sigma` field through. + +Add a builder method (insert near `p_draw`, around line 70): + +```rust + pub fn score_sigma(mut self, score_sigma: f64) -> Self { + self.score_sigma = score_sigma; + self + } +``` + +In `HistoryBuilder::build` (around line 100), set `score_sigma: self.score_sigma,` on the constructed `History`. + +In the `History` struct (around line 135), add `score_sigma: f64,`. + +- [ ] **Step 12: Write a failing integration test in `tests/scored.rs` (new file)** + +Create `tests/scored.rs`: + +```rust +use smallvec::smallvec; +use trueskill_tt::{ConstantDrift, Event, History, Member, Outcome, Team}; + +#[test] +fn scored_two_team_one_event_pulls_winner_up() { + let mut h = History::builder() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.0)) + .build(); + + let events: Vec> = vec![Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("alice")]), + Team::with_members([Member::new("bob")]), + ], + outcome: Outcome::scores([10.0, 0.0]), + }]; + h.add_events(events).unwrap(); + h.converge().unwrap(); + + let alice = h.current_skill(&"alice").unwrap(); + let bob = h.current_skill(&"bob").unwrap(); + assert!(alice.mu() > 25.0, "alice mu should exceed prior; got {}", alice.mu()); + assert!(bob.mu() < 25.0, "bob mu should be below prior; got {}", bob.mu()); +} + +#[test] +fn scored_zero_margin_treats_as_tie() { + let mut h = History::builder() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.0)) + .build(); + + let events: Vec> = vec![Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("alice")]), + Team::with_members([Member::new("bob")]), + ], + outcome: Outcome::scores([3.0, 3.0]), + }]; + h.add_events(events).unwrap(); + h.converge().unwrap(); + + let alice = h.current_skill(&"alice").unwrap(); + let bob = h.current_skill(&"bob").unwrap(); + assert!((alice.mu() - bob.mu()).abs() < 1e-6, "tied scores -> equal mu; got {} vs {}", alice.mu(), bob.mu()); + // Sigma should still tighten (we have evidence diff ≈ 0). + assert!(alice.sigma() < 25.0 / 3.0); +} + +#[test] +fn scored_three_team_partial_order() { + let mut h = History::builder() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.0)) + .build(); + + let events: Vec> = vec![Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("a")]), + Team::with_members([Member::new("b")]), + Team::with_members([Member::new("c")]), + ], + outcome: Outcome::scores([20.0, 10.0, 5.0]), + }]; + h.add_events(events).unwrap(); + h.converge().unwrap(); + + let a = h.current_skill(&"a").unwrap(); + let b = h.current_skill(&"b").unwrap(); + let c = h.current_skill(&"c").unwrap(); + assert!(a.mu() > b.mu()); + assert!(b.mu() > c.mu()); +} + +#[test] +fn scored_rejects_outcome_team_count_mismatch() { + use trueskill_tt::InferenceError; + let mut h: History = History::builder().build(); + let events: Vec> = vec![Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("a")]), + Team::with_members([Member::new("b")]), + ], + outcome: Outcome::scores([1.0, 2.0, 3.0]), + }]; + let err = h.add_events(events).unwrap_err(); + assert!(matches!(err, InferenceError::MismatchedShape { .. })); +} +``` + +- [ ] **Step 13: Run the integration tests** + +Run: `cargo test --test scored` + +Expected: all four tests PASS (the wiring from Steps 1–11 is now complete). + +- [ ] **Step 14: Run the full test suite + clippy** + +Run: `cargo test && cargo clippy --all-targets -- -D warnings` + +Expected: all pass, no clippy warnings. Pay particular attention to the existing `time_slice` unit tests — they were updated in Step 6 and need to use `EventKind::Ranked`. + +- [ ] **Step 15: Format and commit** + +```bash +cargo +nightly fmt +git add src/history.rs src/time_slice.rs tests/scored.rs +git commit -m "feat(history): route Outcome::Scored events through MarginFactor path" +``` + +--- + +### Task 8: `EventBuilder::scores` convenience + +**Files:** +- Modify: `src/event_builder.rs` +- Modify: `tests/api_shape.rs` (add a fluent-builder scored test) + +- [ ] **Step 1: Write failing tests in `tests/api_shape.rs`** + +Append to the existing test list: + +```rust +#[test] +fn fluent_event_builder_scores() { + use trueskill_tt::ConstantDrift; + let mut h = History::builder() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.0)) + .build(); + + h.event(1) + .team(["alice"]) + .team(["bob"]) + .scores([12.0, 4.0]) + .commit() + .unwrap(); + h.converge().unwrap(); + + let a = h.current_skill(&"alice").unwrap(); + let b = h.current_skill(&"bob").unwrap(); + assert!(a.mu() > b.mu()); +} +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `cargo test --test api_shape fluent_event_builder_scores` + +Expected: FAIL — `no method named scores`. + +- [ ] **Step 3: Add `.scores` to `EventBuilder`** + +In `src/event_builder.rs`, alongside `.ranking`/`.winner`/`.draw` (around line 73), add: + +```rust + /// Set explicit per-team continuous scores; higher = better. + pub fn scores>(mut self, scores: I) -> Self { + self.event.outcome = crate::Outcome::scores(scores); + self + } +``` + +- [ ] **Step 4: Run the test to verify it passes** + +Run: `cargo test --test api_shape fluent_event_builder_scores` + +Expected: PASS. + +- [ ] **Step 5: Run the full test suite** + +Run: `cargo test` + +Expected: all pass. + +- [ ] **Step 6: Format and commit** + +```bash +cargo +nightly fmt +git add src/event_builder.rs tests/api_shape.rs +git commit -m "feat(event-builder): add .scores convenience for Outcome::Scored" +``` + +--- + +### Task 9: Worked example — scored matches end-to-end + +**Files:** +- Create: `examples/scored.rs` + +- [ ] **Step 1: Create the example** + +```rust +//! Worked example: continuous-score outcomes via `Outcome::Scored`. +//! +//! Three players play a small round-robin where the score margin matters, +//! not just who won. We show how `score_sigma` controls how much weight +//! the engine places on the observed margin. +//! +//! Run with: `cargo run --example scored --release` + +use smallvec::smallvec; +use trueskill_tt::{ConstantDrift, Event, History, Member, Outcome, Team}; + +fn main() { + let mut h = History::builder() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.03)) + .score_sigma(2.0) // tune to data; smaller = trust margins more + .build(); + + let events: Vec> = vec![ + Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("alice")]), + Team::with_members([Member::new("bob")]), + ], + outcome: Outcome::scores([21.0, 9.0]), + }, + Event { + time: 2, + teams: smallvec![ + Team::with_members([Member::new("bob")]), + Team::with_members([Member::new("carol")]), + ], + outcome: Outcome::scores([21.0, 18.0]), + }, + Event { + time: 3, + teams: smallvec![ + Team::with_members([Member::new("alice")]), + Team::with_members([Member::new("carol")]), + ], + outcome: Outcome::scores([21.0, 21.0]), + }, + ]; + h.add_events(events).unwrap(); + + let report = h.converge().unwrap(); + println!( + "converged={}, iterations={}, log_evidence={:.4}", + report.converged, report.iterations, report.log_evidence + ); + + for who in &["alice", "bob", "carol"] { + let s = h.current_skill(who).unwrap(); + println!("{:>6}: mu={:>7.3} sigma={:.3}", who, s.mu(), s.sigma()); + } +} +``` + +- [ ] **Step 2: Confirm the example compiles and runs** + +Run: `cargo run --example scored --release` + +Expected: prints converged=true with three player skills; alice highest, bob middle, carol lowest (or close to bob — depends on `score_sigma`). + +- [ ] **Step 3: Commit** + +```bash +cargo +nightly fmt +git add examples/scored.rs +git commit -m "docs(examples): worked Outcome::Scored example" +``` + +--- + +### Task 10: Benchmark — scored ingestion + convergence + +**Files:** +- Create: `benches/scored.rs` +- Modify: `Cargo.toml` (add `[[bench]]` entry if needed) + +- [ ] **Step 1: Check `Cargo.toml` for the existing bench wiring** + +Run: `cat Cargo.toml | grep -A 3 'bench'` + +If `auto-bench = false` is set or each bench is registered explicitly, add a new entry: + +```toml +[[bench]] +name = "scored" +harness = false +``` + +- [ ] **Step 2: Create `benches/scored.rs` modeled on `benches/batch.rs`** + +```rust +use criterion::{Criterion, criterion_group, criterion_main}; +use smallvec::smallvec; +use trueskill_tt::{ConstantDrift, Event, History, Member, Outcome, Team}; + +fn bench_scored_history(c: &mut Criterion) { + c.bench_function("scored_history_60_events_30_iter", |bencher| { + bencher.iter(|| { + let mut h = History::builder() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.03)) + .score_sigma(2.0) + .build(); + + let mut events: Vec> = Vec::with_capacity(60); + for i in 0..60 { + let a = format!("p{}", i % 20); + let b = format!("p{}", (i + 7) % 20); + let s_a = (i as f64 * 0.3).sin().abs() * 21.0; + let s_b = (i as f64 * 0.3).cos().abs() * 21.0; + events.push(Event { + time: 1 + (i / 6) as i64, + teams: smallvec![ + Team::with_members([Member::new(a)]), + Team::with_members([Member::new(b)]), + ], + outcome: Outcome::scores([s_a, s_b]), + }); + } + h.add_events(events).unwrap(); + h.converge().unwrap(); + }); + }); +} + +criterion_group!(benches, bench_scored_history); +criterion_main!(benches); +``` + +> The `History` here uses `String` keys to match the typical real-world bench shape; if `History` requires `builder_with_key`, adapt accordingly. + +- [ ] **Step 3: Verify the benchmark compiles** + +Run: `cargo bench --no-run --bench scored` + +Expected: builds without error. + +- [ ] **Step 4: Run the benchmark and capture a baseline number** + +Run: `cargo bench --bench scored 2>&1 | tee benches/scored_baseline.txt` + +(Save the result alongside the existing `benches/baseline.txt` so future tiers can compare.) + +- [ ] **Step 5: Commit** + +```bash +cargo +nightly fmt +git add benches/scored.rs benches/scored_baseline.txt Cargo.toml +git commit -m "bench(scored): add criterion bench mirroring batch bench" +``` + +--- + +### Task 11: Documentation — README + CLAUDE.md status update + +**Files:** +- Modify: `README.md` +- Modify: `CLAUDE.md` +- Modify: `docs/superpowers/specs/2026-04-23-trueskill-engine-redesign-design.md` (mark MarginFactor done) + +- [ ] **Step 1: Add a "Scored outcomes" subsection to `README.md`** + +Find the existing `## Usage` section (or equivalent) and add: + +```markdown +### Scored outcomes + +Use `Outcome::scores([...])` when you have continuous per-team scores rather +than just ranks. Adjacent score margins flow into a `MarginFactor` that adds +soft Gaussian evidence about the latent performance diff. Configure +`HistoryBuilder::score_sigma(σ)` to control how much you trust the margins +(smaller σ = more trust). + +​```rust +use trueskill_tt::{History, Outcome}; + +let mut h = History::builder().score_sigma(2.0).build(); +h.event(1) + .team(["alice"]) + .team(["bob"]) + .scores([21.0, 9.0]) + .commit() + .unwrap(); +h.converge().unwrap(); +​``` +``` + +(Replace the backticks-surrounded fence indicators above (`​```rust` and `​````) with proper triple backticks; the zero-width chars are there to avoid breaking *this* plan file's nesting.) + +- [ ] **Step 2: Update `CLAUDE.md` architecture notes** + +In `CLAUDE.md`, add to the existing factor list (or near the architecture section): + +``` +- `MarginFactor` (factor/margin.rs) — Gaussian observation factor on a diff variable; engaged by `Outcome::Scored`. +``` + +- [ ] **Step 3: Mark the T4-Margin item complete in the spec** + +In `docs/superpowers/specs/2026-04-23-trueskill-engine-redesign-design.md`, find the T4 section (line 577 onward): + +```markdown +- `MarginFactor` → enables `Outcome::Scored`. +``` + +Change to: + +```markdown +- `MarginFactor` → enables `Outcome::Scored`. **Done** (see `docs/superpowers/plans/2026-04-27-t4-margin-factor.md`). +``` + +- [ ] **Step 4: Final full test + clippy + fmt run** + +Run: + +```bash +cargo +nightly fmt +cargo clippy --all-targets -- -D warnings +cargo test +cargo bench --no-run +``` + +Expected: all green, no warnings, all bench targets compile. + +- [ ] **Step 5: Commit** + +```bash +git add README.md CLAUDE.md docs/superpowers/specs/2026-04-23-trueskill-engine-redesign-design.md +git commit -m "docs(t4-margin): document Outcome::Scored and mark spec item done" +``` + +--- + +## Acceptance criteria + +- All existing lib + integration tests still pass with their existing golden values (Trunc path is bit-for-bit unchanged after the `DiffFactor` refactor in Task 4). +- `cargo test --test scored` passes all four tests added in Task 7. +- `cargo run --example scored --release` runs and prints sensible posteriors. +- `cargo bench --bench scored` produces a baseline result saved under `benches/`. +- `cargo clippy --all-targets -- -D warnings` is clean. +- `Outcome::Scored` is accepted by the public API: `History::add_events`, `History::event(...).scores(...)`, and `Game::scored`. +- `score_sigma` is configurable via `HistoryBuilder::score_sigma` and `GameOptions::score_sigma`, default `1.0`. + +## Out of scope (deferred to later T4 plans) + +- Damped / Residual schedules +- SynergyFactor +- ScoreFactor (continuous outcome variable distinct from observed margin) +- Per-event `score_sigma` overrides (currently history-wide) +- Tie-band MarginFactor variant (`m_obs` band rather than point observation) diff --git a/docs/superpowers/specs/2026-04-23-trueskill-engine-redesign-design.md b/docs/superpowers/specs/2026-04-23-trueskill-engine-redesign-design.md index 3f4f00b..7e0c95d 100644 --- a/docs/superpowers/specs/2026-04-23-trueskill-engine-redesign-design.md +++ b/docs/superpowers/specs/2026-04-23-trueskill-engine-redesign-design.md @@ -578,7 +578,7 @@ All renames and the new public API land together. No half-renamed intermediate s Each shipped independently after T3. -- `MarginFactor` → enables `Outcome::Scored`. +- `MarginFactor` → enables `Outcome::Scored`. **Done** (see `docs/superpowers/plans/2026-04-27-t4-margin-factor.md`). - `Damped` and `Residual` schedules. - `SynergyFactor`, `ScoreFactor` → same pattern when wanted. diff --git a/examples/scored.rs b/examples/scored.rs new file mode 100644 index 0000000..5a63f81 --- /dev/null +++ b/examples/scored.rs @@ -0,0 +1,59 @@ +//! Worked example: continuous-score outcomes via `Outcome::Scored`. +//! +//! Three players play a small round-robin where the score margin matters, +//! not just who won. We show how `score_sigma` controls how much weight +//! the engine places on the observed margin. +//! +//! Run with: `cargo run --example scored --release` + +use smallvec::smallvec; +use trueskill_tt::{ConstantDrift, Event, History, Member, Outcome, Team}; + +fn main() { + let mut h = History::builder() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.03)) + .score_sigma(2.0) // tune to data; smaller = trust margins more + .build(); + + let events: Vec> = vec![ + Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("alice")]), + Team::with_members([Member::new("bob")]), + ], + outcome: Outcome::scores([21.0, 9.0]), + }, + Event { + time: 2, + teams: smallvec![ + Team::with_members([Member::new("bob")]), + Team::with_members([Member::new("carol")]), + ], + outcome: Outcome::scores([21.0, 18.0]), + }, + Event { + time: 3, + teams: smallvec![ + Team::with_members([Member::new("alice")]), + Team::with_members([Member::new("carol")]), + ], + outcome: Outcome::scores([21.0, 21.0]), + }, + ]; + h.add_events(events).unwrap(); + + let report = h.converge().unwrap(); + println!( + "converged={}, iterations={}, log_evidence={:.4}", + report.converged, report.iterations, report.log_evidence + ); + + for who in &["alice", "bob", "carol"] { + let s = h.current_skill(who).unwrap(); + println!("{:>6}: mu={:>7.3} sigma={:.3}", who, s.mu(), s.sigma()); + } +} diff --git a/src/error.rs b/src/error.rs index e32a124..66d1e45 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,6 +10,8 @@ pub enum InferenceError { }, /// A probability value is outside `[0, 1]`. InvalidProbability { value: f64 }, + /// A scalar parameter is outside its valid range. + InvalidParameter { name: &'static str, value: f64 }, /// Convergence exceeded `max_iter` without falling below `epsilon`. ConvergenceFailed { last_step: (f64, f64), @@ -32,6 +34,9 @@ impl fmt::Display for InferenceError { Self::InvalidProbability { value } => { write!(f, "probability must be in [0, 1]; got {value}") } + Self::InvalidParameter { name, value } => { + write!(f, "{name} is invalid: {value}") + } Self::ConvergenceFailed { last_step, iterations, diff --git a/src/event_builder.rs b/src/event_builder.rs index d415e16..9fab6fc 100644 --- a/src/event_builder.rs +++ b/src/event_builder.rs @@ -75,6 +75,12 @@ where self } + /// Set explicit per-team continuous scores; higher = better. + pub fn scores>(mut self, scores: I) -> Self { + self.event.outcome = crate::Outcome::scores(scores); + self + } + /// Mark team `winner_idx` as winner; others tied for last. pub fn winner(mut self, winner_idx: u32) -> Self { self.event.outcome = Outcome::winner(winner_idx, self.event.teams.len() as u32); diff --git a/src/factor/margin.rs b/src/factor/margin.rs new file mode 100644 index 0000000..aa27fc1 --- /dev/null +++ b/src/factor/margin.rs @@ -0,0 +1,123 @@ +use crate::{ + N_INF, + factor::{Factor, VarId, VarStore}, + gaussian::Gaussian, + pdf, +}; + +/// Gaussian observation factor on a diff variable. +/// +/// Encodes the soft evidence `m_obs ~ N(diff, sigma²)`. The outgoing message +/// to `diff` is the constant `N(m_obs, sigma²)`, so this factor converges in a +/// single propagation: subsequent calls return a zero delta. +#[derive(Debug)] +pub struct MarginFactor { + pub diff: VarId, + pub m_obs: f64, + pub sigma: f64, + pub(crate) msg: Gaussian, + pub(crate) evidence_cached: Option, +} + +impl MarginFactor { + pub fn new(diff: VarId, m_obs: f64, sigma: f64) -> Self { + debug_assert!(sigma > 0.0, "score sigma must be positive"); + Self { + diff, + m_obs, + sigma, + msg: N_INF, + evidence_cached: None, + } + } +} + +impl Factor for MarginFactor { + fn propagate(&mut self, vars: &mut VarStore) -> (f64, f64) { + let marginal = vars.get(self.diff); + let cavity = marginal / self.msg; + + if self.evidence_cached.is_none() { + self.evidence_cached = Some(cavity_evidence(cavity, self.m_obs, self.sigma)); + } + + let new_msg = Gaussian::from_ms(self.m_obs, self.sigma); + let new_marginal = cavity * new_msg; + let old_msg = self.msg; + self.msg = new_msg; + vars.set(self.diff, new_marginal); + + old_msg.delta(new_msg) + } + + fn log_evidence(&self, _vars: &VarStore) -> f64 { + self.evidence_cached.unwrap_or(1.0).ln() + } +} + +fn cavity_evidence(cavity: Gaussian, m_obs: f64, sigma: f64) -> f64 { + let combined_sigma = (cavity.sigma().powi(2) + sigma.powi(2)).sqrt(); + pdf(m_obs, cavity.mu(), combined_sigma) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn first_propagate_writes_tilted_marginal() { + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = MarginFactor::new(diff, 5.0, 1.0); + + f.propagate(&mut vars); + + let result = vars.get(diff); + // pi = 1/36 + 1 ≈ 1.027778; tau = 0 + 5 = 5 + // mu = 5 / 1.027778 ≈ 4.864865; sigma = 1/sqrt(1.027778) ≈ 0.986394 + assert!((result.mu() - 4.864864864864865).abs() < 1e-12); + assert!((result.sigma() - 0.986393923832144).abs() < 1e-12); + } + + #[test] + fn converges_in_one_step() { + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = MarginFactor::new(diff, 5.0, 1.0); + + f.propagate(&mut vars); + let (dmu, dsig) = f.propagate(&mut vars); + assert!( + dmu < 1e-12, + "expected ~0 delta on second propagate, got {dmu}" + ); + assert!(dsig < 1e-12); + } + + #[test] + fn evidence_cached_on_first_propagate() { + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = MarginFactor::new(diff, 5.0, 1.0); + assert!(f.evidence_cached.is_none()); + + f.propagate(&mut vars); + let z = f.evidence_cached.unwrap(); + // pdf(5, 0, sqrt(37)) ≈ 0.046783 + assert!((z - 0.04678300292616668).abs() < 1e-10); + + // Subsequent propagations don't change it. + f.propagate(&mut vars); + assert_eq!(f.evidence_cached.unwrap(), z); + } + + #[test] + fn log_evidence_matches_cached_ln() { + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = MarginFactor::new(diff, 5.0, 1.0); + f.propagate(&mut vars); + let logz = f.log_evidence(&vars); + assert!((logz - (-3.062235327364623)).abs() < 1e-10); + } +} diff --git a/src/factor/mod.rs b/src/factor/mod.rs index 01b39c4..4d6788d 100644 --- a/src/factor/mod.rs +++ b/src/factor/mod.rs @@ -78,6 +78,7 @@ pub enum BuiltinFactor { TeamSum(team_sum::TeamSumFactor), RankDiff(rank_diff::RankDiffFactor), Trunc(trunc::TruncFactor), + Margin(margin::MarginFactor), } impl Factor for BuiltinFactor { @@ -86,17 +87,20 @@ impl Factor for BuiltinFactor { Self::TeamSum(f) => f.propagate(vars), Self::RankDiff(f) => f.propagate(vars), Self::Trunc(f) => f.propagate(vars), + Self::Margin(f) => f.propagate(vars), } } fn log_evidence(&self, vars: &VarStore) -> f64 { match self { Self::Trunc(f) => f.log_evidence(vars), + Self::Margin(f) => f.log_evidence(vars), _ => 0.0, } } } +pub mod margin; pub mod rank_diff; pub mod team_sum; pub mod trunc; @@ -145,4 +149,20 @@ mod tests { assert_eq!(store.len(), 0); assert_eq!(store.marginals.capacity(), cap); } + + #[test] + fn builtin_factor_dispatches_to_margin() { + use super::margin::MarginFactor; + let mut vars = VarStore::new(); + let diff = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let mut f = BuiltinFactor::Margin(MarginFactor::new(diff, 5.0, 1.0)); + + f.propagate(&mut vars); + + let result = vars.get(diff); + assert!((result.mu() - 4.864864864864865).abs() < 1e-12); + + let logz = f.log_evidence(&vars); + assert!((logz - (-3.062235327364623)).abs() < 1e-10); + } } diff --git a/src/factors.rs b/src/factors.rs index 162ca68..05a3d40 100644 --- a/src/factors.rs +++ b/src/factors.rs @@ -6,8 +6,8 @@ pub use crate::{ factor::{ - BuiltinFactor, Factor, VarId, VarStore, rank_diff::RankDiffFactor, team_sum::TeamSumFactor, - trunc::TruncFactor, + BuiltinFactor, Factor, VarId, VarStore, margin::MarginFactor, rank_diff::RankDiffFactor, + team_sum::TeamSumFactor, trunc::TruncFactor, }, schedule::{EpsilonOrMax, Schedule, ScheduleReport}, }; diff --git a/src/game.rs b/src/game.rs index 16be834..cc80139 100644 --- a/src/game.rs +++ b/src/game.rs @@ -5,16 +5,63 @@ use crate::{ arena::ScratchArena, compute_margin, drift::Drift, - factor::{Factor, trunc::TruncFactor}, + factor::{VarId, margin::MarginFactor, trunc::TruncFactor}, gaussian::Gaussian, rating::Rating, time::Time, tuple_gt, tuple_max, }; +/// Per-adjacent-pair link factor in the game's diff chain. +/// +/// `Trunc` is used for `Outcome::Ranked` (rank-based truncation). +/// `Margin` is used for `Outcome::Scored` (Gaussian observation on the diff). +#[derive(Debug)] +pub(crate) enum DiffFactor { + Trunc(TruncFactor), + Margin(MarginFactor), +} + +impl DiffFactor { + pub(crate) fn diff(&self) -> VarId { + match self { + Self::Trunc(f) => f.diff, + Self::Margin(f) => f.diff, + } + } + + pub(crate) fn msg(&self) -> Gaussian { + match self { + Self::Trunc(f) => f.msg, + Self::Margin(f) => f.msg, + } + } + + pub(crate) fn evidence(&self) -> f64 { + match self { + Self::Trunc(f) => f.evidence_cached.unwrap_or(1.0), + Self::Margin(f) => f.evidence_cached.unwrap_or(1.0), + } + } + + pub(crate) fn propagate(&mut self, vars: &mut crate::factor::VarStore) -> (f64, f64) { + use crate::factor::Factor; + match self { + Self::Trunc(f) => f.propagate(vars), + Self::Margin(f) => f.propagate(vars), + } + } +} + +/// Per-game inference options. +/// +/// `p_draw` and `convergence` apply to ranked outcomes (`Game::ranked`). +/// `score_sigma` applies only to scored outcomes (`Game::scored`); it controls +/// how much the engine trusts the observed score margin (smaller σ = more trust). #[derive(Clone, Copy, Debug)] pub struct GameOptions { pub p_draw: f64, + pub score_sigma: f64, pub convergence: crate::ConvergenceOptions, } @@ -22,6 +69,7 @@ impl Default for GameOptions { fn default() -> Self { Self { p_draw: crate::P_DRAW, + score_sigma: 1.0, convergence: crate::ConvergenceOptions::default(), } } @@ -64,6 +112,26 @@ impl> OwnedGame { } } + pub(crate) fn new_scored( + teams: Vec>>, + scores: Vec, + weights: Vec>, + score_sigma: f64, + ) -> Self { + let mut arena = ScratchArena::new(); + let g = Game::scored_with_arena(teams.clone(), &scores, &weights, score_sigma, &mut arena); + let likelihoods = g.likelihoods; + let evidence = g.evidence; + Self { + teams, + result: scores, + weights, + p_draw: 0.0, + likelihoods, + evidence, + } + } + pub fn posteriors(&self) -> Vec> { self.likelihoods .iter() @@ -132,6 +200,39 @@ impl<'a, T: Time, D: Drift> Game<'a, T, D> { this } + pub(crate) fn scored_with_arena( + teams: Vec>>, + scores: &'a [f64], + weights: &'a [Vec], + score_sigma: f64, + arena: &mut ScratchArena, + ) -> Self { + debug_assert!( + scores.len() == teams.len(), + "scores must have the same length as teams" + ); + debug_assert!( + weights + .iter() + .zip(teams.iter()) + .all(|(w, t)| w.len() == t.len()), + "weights must have the same dimensions as teams" + ); + debug_assert!(score_sigma > 0.0, "score_sigma must be positive"); + + let mut this = Self { + teams, + result: scores, + weights, + p_draw: 0.0, + likelihoods: Vec::new(), + evidence: 0.0, + }; + + this.likelihoods_scored(arena, score_sigma); + this + } + fn likelihoods(&mut self, arena: &mut ScratchArena) { arena.reset(); @@ -155,9 +256,9 @@ impl<'a, T: Time, D: Drift> Game<'a, T, D> { let n_diffs = n_teams.saturating_sub(1); - // One TruncFactor per adjacent sorted-team pair; each owns a diff VarId. - // trunc stays local (fresh state per game; Vec capacity is typically small). - let mut trunc: Vec = (0..n_diffs) + // One DiffFactor per adjacent sorted-team pair; each owns a diff VarId. + // links stays local (fresh state per game; Vec capacity is typically small). + let mut links: Vec = (0..n_diffs) .map(|i| { let tie = self.result[arena.sort_buf[i]] == self.result[arena.sort_buf[i + 1]]; let margin = if self.p_draw == 0.0 { @@ -174,7 +275,7 @@ impl<'a, T: Time, D: Drift> Game<'a, T, D> { compute_margin(self.p_draw, (a + b).sqrt()) }; let vid = arena.vars.alloc(N_INF); - TruncFactor::new(vid, margin, tie) + DiffFactor::Trunc(TruncFactor::new(vid, margin, tie)) }) .collect(); @@ -189,30 +290,30 @@ impl<'a, T: Time, D: Drift> Game<'a, T, D> { step = (0.0_f64, 0.0_f64); // Forward sweep: diffs 0 .. n_diffs-2 (all but the last). - for (e, tf) in trunc[..n_diffs.saturating_sub(1)].iter_mut().enumerate() { + for (e, lf) in links[..n_diffs.saturating_sub(1)].iter_mut().enumerate() { let pw = arena.team_prior[e] * arena.lhood_lose[e]; let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1]; let raw = pw - pl; - arena.vars.set(tf.diff, raw * tf.msg); - let d = tf.propagate(&mut arena.vars); + arena.vars.set(lf.diff(), raw * lf.msg()); + let d = lf.propagate(&mut arena.vars); step = tuple_max(step, d); - let new_ll = pw - tf.msg; + let new_ll = pw - lf.msg(); step = tuple_max(step, arena.lhood_lose[e + 1].delta(new_ll)); arena.lhood_lose[e + 1] = new_ll; } // Backward sweep: diffs n_diffs-1 .. 1 (reverse, all but the first). - for (rev_i, tf) in trunc[1..].iter_mut().rev().enumerate() { + for (rev_i, lf) in links[1..].iter_mut().rev().enumerate() { let e = n_diffs - 1 - rev_i; let pw = arena.team_prior[e] * arena.lhood_lose[e]; let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1]; let raw = pw - pl; - arena.vars.set(tf.diff, raw * tf.msg); - let d = tf.propagate(&mut arena.vars); + arena.vars.set(lf.diff(), raw * lf.msg()); + let d = lf.propagate(&mut arena.vars); step = tuple_max(step, d); - let new_lw = pl + tf.msg; + let new_lw = pl + lf.msg(); step = tuple_max(step, arena.lhood_win[e].delta(new_lw)); arena.lhood_win[e] = new_lw; } @@ -224,23 +325,20 @@ impl<'a, T: Time, D: Drift> Game<'a, T, D> { if n_diffs == 1 { let raw = (arena.team_prior[0] * arena.lhood_lose[0]) - (arena.team_prior[1] * arena.lhood_win[1]); - arena.vars.set(trunc[0].diff, raw * trunc[0].msg); - trunc[0].propagate(&mut arena.vars); + arena.vars.set(links[0].diff(), raw * links[0].msg()); + links[0].propagate(&mut arena.vars); } // Boundary updates: close the chain at both ends. if n_diffs > 0 { let pl1 = arena.team_prior[1] * arena.lhood_win[1]; - arena.lhood_win[0] = pl1 + trunc[0].msg; + arena.lhood_win[0] = pl1 + links[0].msg(); let pw_last = arena.team_prior[n_teams - 2] * arena.lhood_lose[n_teams - 2]; - arena.lhood_lose[n_teams - 1] = pw_last - trunc[n_diffs - 1].msg; + arena.lhood_lose[n_teams - 1] = pw_last - links[n_diffs - 1].msg(); } // Evidence = product of per-diff evidences (each cached on first propagation). - self.evidence = trunc - .iter() - .map(|t| t.evidence_cached.unwrap_or(1.0)) - .product(); + self.evidence = links.iter().map(|l| l.evidence()).product(); // Inverse permutation: inv_buf[orig_i] = sorted_i. arena.inv_buf.resize(n_teams, 0); @@ -272,6 +370,120 @@ impl<'a, T: Time, D: Drift> Game<'a, T, D> { .collect::>(); } + fn likelihoods_scored(&mut self, arena: &mut ScratchArena, score_sigma: f64) { + arena.reset(); + + let n_teams = self.teams.len(); + + arena.sort_buf.extend(0..n_teams); + arena.sort_buf.sort_by(|&i, &j| { + self.result[j] + .partial_cmp(&self.result[i]) + .unwrap_or(Ordering::Equal) + }); + + arena.team_prior.extend(arena.sort_buf.iter().map(|&t| { + self.teams[t] + .iter() + .zip(self.weights[t].iter()) + .fold(N00, |p, (player, &w)| p + (player.performance() * w)) + })); + + let n_diffs = n_teams.saturating_sub(1); + + let mut links: Vec = (0..n_diffs) + .map(|i| { + // After descending-by-score sort, m_obs >= 0 for every adjacent pair. + let m_obs = self.result[arena.sort_buf[i]] - self.result[arena.sort_buf[i + 1]]; + let vid = arena.vars.alloc(N_INF); + DiffFactor::Margin(MarginFactor::new(vid, m_obs, score_sigma)) + }) + .collect(); + + arena.lhood_lose.resize(n_teams, N_INF); + arena.lhood_win.resize(n_teams, N_INF); + + let mut step = (f64::INFINITY, f64::INFINITY); + let mut iter = 0; + + while tuple_gt(step, 1e-6) && iter < 10 { + step = (0.0_f64, 0.0_f64); + + for (e, lf) in links[..n_diffs.saturating_sub(1)].iter_mut().enumerate() { + let pw = arena.team_prior[e] * arena.lhood_lose[e]; + let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1]; + let raw = pw - pl; + arena.vars.set(lf.diff(), raw * lf.msg()); + let d = lf.propagate(&mut arena.vars); + step = tuple_max(step, d); + + let new_ll = pw - lf.msg(); + step = tuple_max(step, arena.lhood_lose[e + 1].delta(new_ll)); + arena.lhood_lose[e + 1] = new_ll; + } + + for (rev_i, lf) in links[1..].iter_mut().rev().enumerate() { + let e = n_diffs - 1 - rev_i; + let pw = arena.team_prior[e] * arena.lhood_lose[e]; + let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1]; + let raw = pw - pl; + arena.vars.set(lf.diff(), raw * lf.msg()); + let d = lf.propagate(&mut arena.vars); + step = tuple_max(step, d); + + let new_lw = pl + lf.msg(); + step = tuple_max(step, arena.lhood_win[e].delta(new_lw)); + arena.lhood_win[e] = new_lw; + } + + iter += 1; + } + + if n_diffs == 1 { + let raw = (arena.team_prior[0] * arena.lhood_lose[0]) + - (arena.team_prior[1] * arena.lhood_win[1]); + arena.vars.set(links[0].diff(), raw * links[0].msg()); + links[0].propagate(&mut arena.vars); + } + + if n_diffs > 0 { + let pl1 = arena.team_prior[1] * arena.lhood_win[1]; + arena.lhood_win[0] = pl1 + links[0].msg(); + let pw_last = arena.team_prior[n_teams - 2] * arena.lhood_lose[n_teams - 2]; + arena.lhood_lose[n_teams - 1] = pw_last - links[n_diffs - 1].msg(); + } + + self.evidence = links.iter().map(|l| l.evidence()).product(); + + arena.inv_buf.resize(n_teams, 0); + for (si, &orig_i) in arena.sort_buf.iter().enumerate() { + arena.inv_buf[orig_i] = si; + } + + self.likelihoods = self + .teams + .iter() + .zip(self.weights.iter()) + .enumerate() + .map(|(orig_i, (players, weights))| { + let si = arena.inv_buf[orig_i]; + let m = arena.lhood_win[si] * arena.lhood_lose[si]; + let performance = players + .iter() + .zip(weights.iter()) + .fold(N00, |p, (player, &w)| p + (player.performance() * w)); + players + .iter() + .zip(weights.iter()) + .map(|(player, &w)| { + ((m - performance.exclude(player.performance() * w)) * (1.0 / w)) + .forget(player.beta.powi(2)) + }) + .collect::>() + }) + .collect::>(); + } + pub fn posteriors(&self) -> Vec> { self.likelihoods .iter() @@ -309,7 +521,13 @@ impl> Game<'_, T, D> { }); } - let ranks = outcome.as_ranks(); + let ranks = outcome + .as_ranks() + .ok_or(crate::InferenceError::MismatchedShape { + kind: "Game::ranked requires Outcome::Ranked", + expected: 0, + got: 0, + })?; let max_rank = ranks.iter().copied().max().unwrap_or(0) as f64; let result: Vec = ranks.iter().map(|&r| max_rank - r as f64).collect(); let teams_owned: Vec>> = teams.iter().map(|t| t.to_vec()).collect(); @@ -318,6 +536,42 @@ impl> Game<'_, T, D> { Ok(OwnedGame::new(teams_owned, result, weights, options.p_draw)) } + pub fn scored( + teams: &[&[Rating]], + outcome: crate::Outcome, + options: &GameOptions, + ) -> Result, crate::InferenceError> { + if options.score_sigma <= 0.0 || options.score_sigma.is_nan() { + return Err(crate::InferenceError::InvalidParameter { + name: "score_sigma", + value: options.score_sigma, + }); + } + if outcome.team_count() != teams.len() { + return Err(crate::InferenceError::MismatchedShape { + kind: "outcome scores vs teams", + expected: teams.len(), + got: outcome.team_count(), + }); + } + let scores = outcome + .as_scores() + .ok_or(crate::InferenceError::MismatchedShape { + kind: "Game::scored requires Outcome::Scored", + expected: 0, + got: 0, + })? + .to_vec(); + let teams_owned: Vec>> = teams.iter().map(|t| t.to_vec()).collect(); + let weights: Vec> = teams.iter().map(|t| vec![1.0; t.len()]).collect(); + Ok(OwnedGame::new_scored( + teams_owned, + scores, + weights, + options.score_sigma, + )) + } + pub fn one_v_one( a: &Rating, b: &Rating, @@ -805,6 +1059,131 @@ mod tests { assert_ulps_eq!(p[0][0], p[1][0], epsilon = 1e-6); } + #[test] + fn diff_factor_dispatch_trunc_and_margin() { + use super::DiffFactor; + use crate::factor::{VarStore, margin::MarginFactor, trunc::TruncFactor}; + + let mut vars = VarStore::new(); + let dt = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + let dm = vars.alloc(Gaussian::from_ms(0.0, 6.0)); + + let mut t = DiffFactor::Trunc(TruncFactor::new(dt, 0.0, false)); + let mut m = DiffFactor::Margin(MarginFactor::new(dm, 5.0, 1.0)); + + let _ = t.propagate(&mut vars); + let _ = m.propagate(&mut vars); + + // Smoke: both diffs got written; their msgs are non-N_INF. + assert!(t.msg().pi() > 0.0); + assert!(m.msg().pi() > 0.0); + assert_eq!(t.diff(), dt); + assert_eq!(m.diff(), dm); + } + + #[test] + fn scored_path_sharper_when_margin_is_large() { + let prior = R::new( + Gaussian::from_ms(25.0, 25.0 / 3.0), + 25.0 / 6.0, + ConstantDrift(25.0 / 300.0), + ); + let teams = vec![vec![prior], vec![prior]]; + let result = vec![10.0, 0.0]; // a beat b by 10 + let weights = [vec![1.0], vec![1.0]]; + let mut arena = ScratchArena::new(); + let g = Game::scored_with_arena( + teams, &result, &weights, 1.0, // score_sigma + &mut arena, + ); + let p = g.posteriors(); + let a = p[0][0]; + let b = p[1][0]; + assert!( + a.mu() > b.mu(), + "expected team a posterior mu > team b; got {} vs {}", + a.mu(), + b.mu() + ); + + // Tighter score_sigma should produce a stronger update. + let mut arena2 = ScratchArena::new(); + let g_tight = Game::scored_with_arena( + vec![vec![prior], vec![prior]], + &result, + &weights, + 0.1, // tighter score_sigma + &mut arena2, + ); + let p_tight = g_tight.posteriors(); + let a_tight = p_tight[0][0]; + assert!( + a_tight.mu() > a.mu(), + "expected tighter sigma to push posterior further; {} vs {}", + a_tight.mu(), + a.mu() + ); + } + + #[test] + fn game_scored_public_ctor() { + use crate::Outcome; + let prior = R::new( + Gaussian::from_ms(25.0, 25.0 / 3.0), + 25.0 / 6.0, + ConstantDrift(25.0 / 300.0), + ); + let opts = GameOptions { + score_sigma: 1.0, + ..GameOptions::default() + }; + let g = Game::scored(&[&[prior], &[prior]], Outcome::scores([8.0, 2.0]), &opts).unwrap(); + let p = g.posteriors(); + assert!(p[0][0].mu() > p[1][0].mu()); + } + + #[test] + fn game_scored_rejects_ranked_outcome() { + let prior = R::new( + Gaussian::from_ms(25.0, 25.0 / 3.0), + 25.0 / 6.0, + ConstantDrift(25.0 / 300.0), + ); + let err = Game::scored( + &[&[prior], &[prior]], + crate::Outcome::winner(0, 2), + &GameOptions::default(), + ) + .unwrap_err(); + assert!(matches!(err, crate::InferenceError::MismatchedShape { .. })); + } + + #[test] + fn game_scored_rejects_zero_score_sigma() { + let prior = R::new( + Gaussian::from_ms(25.0, 25.0 / 3.0), + 25.0 / 6.0, + ConstantDrift(25.0 / 300.0), + ); + let opts = GameOptions { + score_sigma: 0.0, + ..GameOptions::default() + }; + let err = Game::scored( + &[&[prior], &[prior]], + crate::Outcome::scores([1.0, 0.0]), + &opts, + ) + .unwrap_err(); + assert!(matches!( + err, + crate::InferenceError::InvalidParameter { + name: "score_sigma", + .. + } + )); + } + #[test] fn test_2vs2_weighted() { let t_a = vec![ diff --git a/src/history.rs b/src/history.rs index 6d4439c..bb56750 100644 --- a/src/history.rs +++ b/src/history.rs @@ -13,7 +13,7 @@ use crate::{ sort_time, storage::CompetitorStore, time::Time, - time_slice::{self, TimeSlice}, + time_slice::{self, EventKind, TimeSlice}, tuple_gt, tuple_max, }; @@ -30,6 +30,7 @@ pub struct HistoryBuilder< drift: D, p_draw: f64, online: bool, + score_sigma: f64, convergence: ConvergenceOptions, observer: O, _time: PhantomData, @@ -60,6 +61,7 @@ impl, O: Observer, K: Eq + Hash + Clone> HistoryBuilder< beta: self.beta, p_draw: self.p_draw, online: self.online, + score_sigma: self.score_sigma, convergence: self.convergence, observer: self.observer, _time: self._time, @@ -77,6 +79,15 @@ impl, O: Observer, K: Eq + Hash + Clone> HistoryBuilder< self } + pub fn score_sigma(mut self, score_sigma: f64) -> Self { + assert!( + score_sigma > 0.0, + "score_sigma must be positive (got {score_sigma})" + ); + self.score_sigma = score_sigma; + self + } + pub fn convergence(mut self, opts: ConvergenceOptions) -> Self { self.convergence = opts; self @@ -90,6 +101,7 @@ impl, O: Observer, K: Eq + Hash + Clone> HistoryBuilder< drift: self.drift, p_draw: self.p_draw, online: self.online, + score_sigma: self.score_sigma, convergence: self.convergence, observer, _time: self._time, @@ -109,6 +121,7 @@ impl, O: Observer, K: Eq + Hash + Clone> HistoryBuilder< drift: self.drift, p_draw: self.p_draw, online: self.online, + score_sigma: self.score_sigma, convergence: self.convergence, observer: self.observer, } @@ -124,6 +137,7 @@ impl Default for HistoryBuilder drift: ConstantDrift(GAMMA), p_draw: P_DRAW, online: false, + score_sigma: 1.0, convergence: ConvergenceOptions::default(), observer: NullObserver, _time: PhantomData, @@ -148,6 +162,7 @@ pub struct History< drift: D, p_draw: f64, online: bool, + score_sigma: f64, convergence: ConvergenceOptions, observer: O, } @@ -174,6 +189,7 @@ impl History { drift: ConstantDrift(GAMMA), p_draw: P_DRAW, online: false, + score_sigma: 1.0, convergence: ConvergenceOptions::default(), observer: NullObserver, _time: PhantomData, @@ -450,6 +466,7 @@ impl, O: Observer, K: Eq + Hash + Clone> History>, times: Vec, weights: Vec>>, + kinds: Vec, mut priors: HashMap>, ) -> Result<(), InferenceError> { if !results.is_empty() && results.len() != composition.len() { @@ -473,6 +490,13 @@ impl, O: Observer, K: Eq + Hash + Clone> History, O: Observer, K: Eq + Hash + Clone> History>() }; + let kinds_chunk: Vec = (i..j).map(|e| kinds[o[e]]).collect(); + if self.time_slices.len() > k && self.time_slices[k].time == t { let time_slice = &mut self.time_slices[k]; - time_slice.add_events(composition, results, weights, &self.agents); + time_slice.add_events(composition, results, weights, kinds_chunk, &self.agents); for agent_idx in time_slice.skills.keys() { let agent = self.agents.get_mut(agent_idx).unwrap(); @@ -569,7 +595,7 @@ impl, O: Observer, K: Eq + Hash + Clone> History, O: Observer, K: Eq + Hash + Clone> History, O: Observer, K: Eq + Hash + Clone> History, O: Observer, K: Eq + Hash + Clone> History> = Vec::with_capacity(events.len()); let mut times: Vec = Vec::with_capacity(events.len()); let mut weights: Vec>> = Vec::with_capacity(events.len()); + let mut kinds: Vec = Vec::with_capacity(events.len()); let mut priors: HashMap> = HashMap::new(); for ev in events { - let ranks = ev.outcome.as_ranks(); - if ranks.len() != ev.teams.len() { + if ev.outcome.team_count() != ev.teams.len() { return Err(InferenceError::MismatchedShape { - kind: "outcome ranks vs teams", + kind: "outcome vs teams", expected: ev.teams.len(), - got: ranks.len(), + got: ev.outcome.team_count(), }); } @@ -698,13 +726,24 @@ impl, O: Observer, K: Eq + Hash + Clone> History = ranks.iter().map(|&r| max_rank - r as f64).collect(); - results.push(inverted); + let event_result: Vec = match &ev.outcome { + crate::Outcome::Ranked(ranks) => { + let max_rank = ranks.iter().copied().max().unwrap_or(0) as f64; + kinds.push(EventKind::Ranked); + ranks.iter().map(|&r| max_rank - r as f64).collect() + } + crate::Outcome::Scored(scores) => { + kinds.push(EventKind::Scored { + score_sigma: self.score_sigma, + }); + scores.to_vec() + } + }; + results.push(event_result); times.push(ev.time); } - self.add_events_with_prior(composition, results, times, weights, priors) + self.add_events_with_prior(composition, results, times, weights, kinds, priors) } } @@ -1666,4 +1705,10 @@ mod tests { assert!(report.iterations < 30); assert!(report.final_step.0 <= 1e-6); } + + #[test] + #[should_panic(expected = "score_sigma must be positive")] + fn history_builder_rejects_zero_score_sigma() { + let _ = History::builder().score_sigma(0.0).build(); + } } diff --git a/src/lib.rs b/src/lib.rs index 6bd9fa7..f40514c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ mod approx; pub(crate) mod arena; mod time; mod time_slice; -pub use time_slice::TimeSlice; +pub use time_slice::{EventKind, TimeSlice}; mod color_group; mod competitor; mod convergence; diff --git a/src/outcome.rs b/src/outcome.rs index a57c26d..917e154 100644 --- a/src/outcome.rs +++ b/src/outcome.rs @@ -1,8 +1,7 @@ //! Outcome of a match. //! -//! In T2, only `Ranked` is supported; `Scored` will be added together with -//! `MarginFactor` in T4. The enum is `#[non_exhaustive]` so adding `Scored` -//! is non-breaking for downstream `match` expressions. +//! `Ranked(ranks)` for ordinal results; `Scored(scores)` for continuous +//! per-team scores (engages `MarginFactor` in the engine). use smallvec::SmallVec; @@ -10,14 +9,19 @@ use smallvec::SmallVec; /// /// `Ranked(ranks)`: lower rank = better. Equal ranks mean a tie between those /// teams. `ranks.len()` must equal the number of teams in the event. +/// +/// `Scored(scores)`: higher score = better. Adjacent (sorted) pairs feed +/// observed margins to `MarginFactor`. `scores.len()` must equal the number +/// of teams in the event. #[derive(Clone, Debug, PartialEq)] #[non_exhaustive] pub enum Outcome { Ranked(SmallVec<[u32; 4]>), + Scored(SmallVec<[f64; 4]>), } impl Outcome { - /// `N`-team outcome where team `winner` won and everyone else tied for last. + /// `n`-team outcome where team `winner` won and everyone else tied for last. /// /// Panics if `winner >= n`. pub fn winner(winner: u32, n: u32) -> Self { @@ -36,16 +40,29 @@ impl Outcome { Self::Ranked(ranks.into_iter().collect()) } + /// Explicit per-team continuous scores; higher = better. + pub fn scores>(scores: I) -> Self { + Self::Scored(scores.into_iter().collect()) + } + pub fn team_count(&self) -> usize { match self { Self::Ranked(r) => r.len(), + Self::Scored(s) => s.len(), } } - #[allow(dead_code)] - pub(crate) fn as_ranks(&self) -> &[u32] { + pub(crate) fn as_ranks(&self) -> Option<&[u32]> { match self { - Self::Ranked(r) => r, + Self::Ranked(r) => Some(r), + Self::Scored(_) => None, + } + } + + pub(crate) fn as_scores(&self) -> Option<&[f64]> { + match self { + Self::Scored(s) => Some(s), + Self::Ranked(_) => None, } } } @@ -57,26 +74,26 @@ mod tests { #[test] fn winner_two_teams() { let o = Outcome::winner(0, 2); - assert_eq!(o.as_ranks(), &[0u32, 1]); + assert_eq!(o.as_ranks(), Some(&[0u32, 1][..])); assert_eq!(o.team_count(), 2); } #[test] fn winner_three_teams_second_wins() { let o = Outcome::winner(1, 3); - assert_eq!(o.as_ranks(), &[1u32, 0, 1]); + assert_eq!(o.as_ranks(), Some(&[1u32, 0, 1][..])); } #[test] fn draw_three_teams() { let o = Outcome::draw(3); - assert_eq!(o.as_ranks(), &[0u32, 0, 0]); + assert_eq!(o.as_ranks(), Some(&[0u32, 0, 0][..])); } #[test] fn ranking_from_iter() { let o = Outcome::ranking([2, 0, 1]); - assert_eq!(o.as_ranks(), &[2u32, 0, 1]); + assert_eq!(o.as_ranks(), Some(&[2u32, 0, 1][..])); } #[test] @@ -84,4 +101,25 @@ mod tests { fn winner_out_of_range_panics() { let _ = Outcome::winner(2, 2); } + + #[test] + fn scored_two_teams() { + let o = Outcome::scores([10.0, 4.0]); + assert_eq!(o.team_count(), 2); + assert_eq!(o.as_scores(), Some(&[10.0, 4.0][..])); + assert_eq!(o.as_ranks(), None); + } + + #[test] + fn scored_team_count_matches_input() { + let o = Outcome::scores([3.0, 1.0, 2.0, 0.0]); + assert_eq!(o.team_count(), 4); + } + + #[test] + fn ranked_as_scores_returns_none() { + let o = Outcome::winner(0, 2); + assert!(o.as_scores().is_none()); + assert!(o.as_ranks().is_some()); + } } diff --git a/src/time_slice.rs b/src/time_slice.rs index cc19b30..a6f4806 100644 --- a/src/time_slice.rs +++ b/src/time_slice.rs @@ -44,6 +44,13 @@ impl Default for Skill { } } +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +pub enum EventKind { + Ranked, + Scored { score_sigma: f64 }, +} + #[derive(Debug)] struct Item { agent: Index, @@ -82,6 +89,7 @@ pub(crate) struct Event { teams: Vec, evidence: f64, weights: Vec>, + kind: EventKind, } impl Event { @@ -129,7 +137,14 @@ impl Event { ) { let teams = self.within_priors(false, false, skills, agents); let result = self.outputs(); - let g = Game::ranked_with_arena(teams, &result, &self.weights, p_draw, arena); + let g = match self.kind { + EventKind::Ranked => { + Game::ranked_with_arena(teams, &result, &self.weights, p_draw, arena) + } + EventKind::Scored { score_sigma } => { + Game::scored_with_arena(teams, &result, &self.weights, score_sigma, arena) + } + }; for (t, team) in self.teams.iter_mut().enumerate() { for (i, item) in team.items.iter_mut().enumerate() { @@ -205,6 +220,7 @@ impl TimeSlice { composition: Vec>>, results: Vec>, weights: Vec>>, + kinds: Vec, agents: &CompetitorStore, ) { let mut unique = Vec::with_capacity(10); @@ -274,6 +290,7 @@ impl TimeSlice { teams, evidence: 0.0, weights, + kind: kinds[e], } }); @@ -299,13 +316,22 @@ impl TimeSlice { let teams = event.within_priors(false, false, &self.skills, agents); let result = event.outputs(); - let g = Game::ranked_with_arena( - teams, - &result, - &event.weights, - self.p_draw, - &mut self.arena, - ); + let g = match event.kind { + EventKind::Ranked => Game::ranked_with_arena( + teams, + &result, + &event.weights, + self.p_draw, + &mut self.arena, + ), + EventKind::Scored { score_sigma } => Game::scored_with_arena( + teams, + &result, + &event.weights, + score_sigma, + &mut self.arena, + ), + }; for (t, team) in event.teams.iter_mut().enumerate() { for (i, item) in team.items.iter_mut().enumerate() { @@ -474,21 +500,28 @@ impl TimeSlice { // log_evidence is infrequent; a local arena avoids needing &mut self. let mut arena = ScratchArena::new(); + let run_event = |event: &Event, arena: &mut ScratchArena| -> f64 { + let teams = event.within_priors(online, forward, &self.skills, agents); + let result = event.outputs(); + match event.kind { + EventKind::Ranked => { + Game::ranked_with_arena(teams, &result, &event.weights, self.p_draw, arena) + .evidence + .ln() + } + EventKind::Scored { score_sigma } => { + Game::scored_with_arena(teams, &result, &event.weights, score_sigma, arena) + .evidence + .ln() + } + } + }; + if targets.is_empty() { if online || forward { self.events .iter() - .map(|event| { - Game::ranked_with_arena( - event.within_priors(online, forward, &self.skills, agents), - &event.outputs(), - &event.weights, - self.p_draw, - &mut arena, - ) - .evidence - .ln() - }) + .map(|event| run_event(event, &mut arena)) .sum() } else { self.events.iter().map(|event| event.evidence.ln()).sum() @@ -496,25 +529,14 @@ impl TimeSlice { } else if online || forward { self.events .iter() - .enumerate() - .filter(|(_, event)| { + .filter(|event| { event .teams .iter() .flat_map(|team| &team.items) .any(|item| targets.contains(&item.agent)) }) - .map(|(_, event)| { - Game::ranked_with_arena( - event.within_priors(online, forward, &self.skills, agents), - &event.outputs(), - &event.weights, - self.p_draw, - &mut arena, - ) - .evidence - .ln() - }) + .map(|event| run_event(event, &mut arena)) .sum() } else { self.events @@ -609,6 +631,7 @@ mod tests { ], vec![vec![1.0, 0.0], vec![0.0, 1.0], vec![1.0, 0.0]], vec![], + vec![EventKind::Ranked; 3], &agents, ); @@ -685,6 +708,7 @@ mod tests { ], vec![vec![1.0, 0.0], vec![0.0, 1.0], vec![1.0, 0.0]], vec![], + vec![EventKind::Ranked; 3], &agents, ); @@ -764,6 +788,7 @@ mod tests { ], vec![vec![1.0, 0.0], vec![0.0, 1.0], vec![1.0, 0.0]], vec![], + vec![EventKind::Ranked; 3], &agents, ); @@ -795,6 +820,7 @@ mod tests { ], vec![vec![1.0, 0.0], vec![0.0, 1.0], vec![1.0, 0.0]], vec![], + vec![EventKind::Ranked; 3], &agents, ); @@ -860,6 +886,7 @@ mod tests { ], vec![vec![1.0, 0.0], vec![1.0, 0.0], vec![1.0, 0.0]], vec![], + vec![EventKind::Ranked; 3], &agents, ); diff --git a/tests/api_shape.rs b/tests/api_shape.rs index 676d568..dafea20 100644 --- a/tests/api_shape.rs +++ b/tests/api_shape.rs @@ -223,3 +223,26 @@ fn predict_outcome_two_teams_sums_to_one() { assert!((p[0] + p[1] - 1.0).abs() < 1e-9); assert!(p[0] > p[1]); } + +#[test] +fn fluent_event_builder_scores() { + use trueskill_tt::ConstantDrift; + let mut h = History::builder() + .mu(25.0) + .sigma(25.0 / 3.0) + .beta(25.0 / 6.0) + .drift(ConstantDrift(0.0)) + .build(); + + h.event(1) + .team(["alice"]) + .team(["bob"]) + .scores([12.0, 4.0]) + .commit() + .unwrap(); + h.converge().unwrap(); + + let a = h.current_skill(&"alice").unwrap(); + let b = h.current_skill(&"bob").unwrap(); + assert!(a.mu() > b.mu()); +} diff --git a/tests/equivalence.rs b/tests/equivalence.rs index 222d7dd..aeb8872 100644 --- a/tests/equivalence.rs +++ b/tests/equivalence.rs @@ -42,6 +42,7 @@ fn game_1v1_draw_golden() { Outcome::draw(2), &GameOptions { p_draw: 0.25, + score_sigma: 1.0, convergence: Default::default(), }, ) diff --git a/tests/game.rs b/tests/game.rs index 0769436..5fc84ce 100644 --- a/tests/game.rs +++ b/tests/game.rs @@ -45,6 +45,7 @@ fn game_ranked_rejects_bad_p_draw() { Outcome::winner(0, 2), &GameOptions { p_draw: 1.5, + score_sigma: 1.0, convergence: ConvergenceOptions::default(), }, ) diff --git a/tests/scored.rs b/tests/scored.rs new file mode 100644 index 0000000..a1419e3 --- /dev/null +++ b/tests/scored.rs @@ -0,0 +1,139 @@ +//! Integration tests for `Outcome::Scored` routing through `History::add_events`. + +use smallvec::smallvec; +use trueskill_tt::{ConstantDrift, Event, History, Member, Outcome, Team}; + +#[test] +fn scored_two_team_one_event_pulls_winner_up() { + let mut h: History = History::builder() + .mu(0.0) + .sigma(2.0) + .beta(1.0) + .drift(ConstantDrift(0.0)) + .score_sigma(1.0) + .build(); + + let events: Vec> = vec![Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("a")]), + Team::with_members([Member::new("b")]), + ], + outcome: Outcome::scores([10.0, 4.0]), + }]; + h.add_events(events).unwrap(); + + let mu_a = h.current_skill(&"a").unwrap().mu(); + let mu_b = h.current_skill(&"b").unwrap().mu(); + + assert!( + mu_a > 0.0, + "winner mu should be pulled up; got mu_a = {mu_a}" + ); + assert!( + mu_b < 0.0, + "loser mu should be pulled down; got mu_b = {mu_b}" + ); + assert!( + mu_a > mu_b, + "winner mu should exceed loser mu; got mu_a = {mu_a}, mu_b = {mu_b}" + ); +} + +#[test] +fn scored_zero_margin_treats_as_tie() { + let mut h: History = History::builder() + .mu(0.0) + .sigma(2.0) + .beta(1.0) + .drift(ConstantDrift(0.0)) + .score_sigma(1.0) + .build(); + + let events: Vec> = vec![Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("a")]), + Team::with_members([Member::new("b")]), + ], + outcome: Outcome::scores([5.0, 5.0]), + }]; + h.add_events(events).unwrap(); + + let mu_a = h.current_skill(&"a").unwrap().mu(); + let mu_b = h.current_skill(&"b").unwrap().mu(); + let sigma_a = h.current_skill(&"a").unwrap().sigma(); + + // Equal scores: posterior means stay symmetric around the prior mean. + assert!( + (mu_a - mu_b).abs() < 1e-9, + "equal scores should leave mu_a == mu_b; got {mu_a} vs {mu_b}" + ); + assert!( + mu_a.abs() < 1e-9, + "equal scores against equal priors should leave mu near zero; got {mu_a}" + ); + + // A zero-margin scored event still reduces uncertainty. + assert!( + sigma_a < 2.0, + "expected sigma to tighten below prior 2.0; got {}", + sigma_a + ); +} + +#[test] +fn scored_three_team_partial_order() { + let mut h: History = History::builder() + .mu(0.0) + .sigma(2.0) + .beta(1.0) + .drift(ConstantDrift(0.0)) + .score_sigma(1.0) + .build(); + + let events: Vec> = vec![Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("a")]), + Team::with_members([Member::new("b")]), + Team::with_members([Member::new("c")]), + ], + outcome: Outcome::scores([9.0, 5.0, 1.0]), + }]; + h.add_events(events).unwrap(); + + let mu_a = h.current_skill(&"a").unwrap().mu(); + let mu_b = h.current_skill(&"b").unwrap().mu(); + let mu_c = h.current_skill(&"c").unwrap().mu(); + + assert!( + mu_a > mu_b, + "team with highest score should rank highest; mu_a = {mu_a}, mu_b = {mu_b}" + ); + assert!( + mu_b > mu_c, + "middle score should outrank lowest; mu_b = {mu_b}, mu_c = {mu_c}" + ); +} + +#[test] +fn scored_rejects_outcome_team_count_mismatch() { + use trueskill_tt::InferenceError; + + let mut h: History = History::builder().build(); + let events: Vec> = vec![Event { + time: 1, + teams: smallvec![ + Team::with_members([Member::new("a")]), + Team::with_members([Member::new("b")]), + ], + outcome: Outcome::scores([10.0, 4.0, 1.0]), // 3 scores, 2 teams + }]; + + let err = h.add_events(events).unwrap_err(); + assert!( + matches!(err, InferenceError::MismatchedShape { .. }), + "expected MismatchedShape error, got {err:?}" + ); +}