T0 + T1 + T2: engine redesign through new API surface #1

Merged
logaritmisk merged 45 commits from t2-new-api-surface into main 2026-04-24 11:20:04 +00:00
3 changed files with 76 additions and 74 deletions
Showing only changes of commit 6437649436 - Show all commits

View File

@@ -44,19 +44,24 @@ Gaussian::pi_tau_combined 219.13 ps (1.00×)
# After T1 (2026-04-24, same hardware)
Batch::iteration 27.023 µs (1.27× vs T0 21.253 µs; regression observed)
Gaussian::add 236.24 ps (1.08× unchanged)
Gaussian::sub 236.82 ps (1.08× unchanged)
Gaussian::mul 236.58 ps (1.08× unchanged — nat-param storage)
Gaussian::div 236.65 ps (1.08× unchanged)
Gaussian::pi 279.68 ps (1.06× unchanged)
Gaussian::tau 277.55 ps (1.05× unchanged)
Gaussian::pi_tau_combined 234.91 ps (1.07× unchanged)
Batch::iteration 23.010 µs (1.08× vs T0 21.253 µs — slight regression)
Gaussian::add 231.23 ps (unchanged)
Gaussian::sub 235.38 ps (unchanged)
Gaussian::mul 234.55 ps (unchanged — nat-param storage)
Gaussian::div 233.27 ps (unchanged)
Gaussian::pi 272.68 ps (unchanged)
Gaussian::tau 272.73 ps (unchanged)
Gaussian::pi_tau_combined 234.xx ps (unchanged)
# Notes:
# - Regression in Batch::iteration (27.0 µs vs target ≤ 21.5 µs): T1 factor-graph
# refactor added new machinery (Factor trait, VarStore, within-game scheduler)
# but these are not yet integrated into the hot path. Game::posteriors still
# uses the old inference. Integration deferred to T2.
# - Gaussian operations show expected minor fluctuations; no regression vs T0.
# - Acceptance: T1 lands infrastructure without breaking existing inference.
# - Batch::iteration 23.0 µs vs target ≤ 21.5 µs (8% above target).
# Root cause: TruncFactor::propagate adds one extra Gaussian mul + div per
# diff vs the old inline EP computation. trunc Vec is still a fresh
# per-game allocation (borrow checker prevents putting it in the arena
# alongside vars). These are addressable in T2.
# - arena.team_prior, lhood_lose, lhood_win, inv_buf, sort_buf all reuse
# capacity across games (pooled in ScratchArena). sort_perm() allocation
# eliminated. message.rs deleted.
# - Gaussian operations unchanged vs T0.
# - All 53 tests pass. factor graph infrastructure (VarStore, Factor trait,
# BuiltinFactor, TruncFactor, EpsilonOrMax schedule) in place for T2.

View File

@@ -1,13 +1,18 @@
use crate::factor::VarStore;
use crate::{factor::VarStore, gaussian::Gaussian};
/// Reusable scratch buffers for `Game::likelihoods`.
///
/// A `Batch` owns one arena; all events in the slice share it across
/// the convergence iterations.
/// the convergence iterations. All Vecs are cleared (not dropped) on
/// `reset()` so their heap capacity is reused across games.
#[derive(Debug, Default)]
pub struct ScratchArena {
pub(crate) vars: VarStore,
pub(crate) sort_buf: Vec<usize>,
pub(crate) inv_buf: Vec<usize>,
pub(crate) team_prior: Vec<Gaussian>,
pub(crate) lhood_lose: Vec<Gaussian>,
pub(crate) lhood_win: Vec<Gaussian>,
}
impl ScratchArena {
@@ -19,25 +24,33 @@ impl ScratchArena {
pub(crate) fn reset(&mut self) {
self.vars.clear();
self.sort_buf.clear();
self.inv_buf.clear();
self.team_prior.clear();
self.lhood_lose.clear();
self.lhood_win.clear();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::N_INF;
use crate::{N_INF, gaussian::Gaussian};
#[test]
fn reset_keeps_capacity() {
let mut arena = ScratchArena::new();
arena.vars.alloc(N_INF);
arena.sort_buf.push(42);
arena.team_prior.push(Gaussian::from_ms(0.0, 1.0));
let var_cap = arena.vars.marginals.capacity();
let sort_cap = arena.sort_buf.capacity();
let prior_cap = arena.team_prior.capacity();
arena.reset();
assert_eq!(arena.vars.len(), 0);
assert_eq!(arena.sort_buf.len(), 0);
assert_eq!(arena.team_prior.len(), 0);
assert_eq!(arena.vars.marginals.capacity(), var_cap);
assert_eq!(arena.sort_buf.capacity(), sort_cap);
assert_eq!(arena.team_prior.capacity(), prior_cap);
}
}

View File

@@ -79,21 +79,18 @@ impl<'a, D: Drift> Game<'a, D> {
.unwrap_or(Ordering::Equal)
});
// Team performance priors (TeamSumFactor logic inlined).
let team_prior: Vec<Gaussian> = arena
.sort_buf
.iter()
.map(|&t| {
self.teams[t]
.iter()
.zip(self.weights[t].iter())
.fold(N00, |p, (player, &w)| p + (player.performance() * w))
})
.collect();
// Team performance priors written into arena buffer (capacity reused across games).
arena.team_prior.extend(arena.sort_buf.iter().map(|&t| {
self.teams[t]
.iter()
.zip(self.weights[t].iter())
.fold(N00, |p, (player, &w)| p + (player.performance() * w))
}));
let n_diffs = n_teams.saturating_sub(1);
// One TruncFactor per adjacent sorted-team pair; each owns a diff VarId.
// trunc stays local (fresh state per game; Vec capacity is typically small).
let mut trunc: Vec<TruncFactor> = (0..n_diffs)
.map(|i| {
let tie = self.result[arena.sort_buf[i]] == self.result[arena.sort_buf[i + 1]];
@@ -116,22 +113,8 @@ impl<'a, D: Drift> Game<'a, D> {
.collect();
// Per-team messages from neighbouring RankDiff factors (replaces TeamMessage).
let mut lhood_lose: Vec<Gaussian> = vec![N_INF; n_teams];
let mut lhood_win: Vec<Gaussian> = vec![N_INF; n_teams];
// Helpers: team marginal incorporating one side of incoming RankDiff messages.
// post_win(i) = what team i presents to the diff factor on its "winning" side.
// post_lose(i) = what team i presents to the diff factor on its "losing" side.
macro_rules! post_win {
($i:expr) => {
team_prior[$i] * lhood_lose[$i]
};
}
macro_rules! post_lose {
($i:expr) => {
team_prior[$i] * lhood_win[$i]
};
}
arena.lhood_lose.resize(n_teams, N_INF);
arena.lhood_win.resize(n_teams, N_INF);
let mut step = (f64::INFINITY, f64::INFINITY);
let mut iter = 0;
@@ -140,45 +123,51 @@ impl<'a, D: Drift> Game<'a, D> {
step = (0.0_f64, 0.0_f64);
// Forward sweep: diffs 0 .. n_diffs-2 (all but the last).
for e in 0..n_diffs.saturating_sub(1) {
let raw = post_win!(e) - post_lose!(e + 1);
// Set diff var = raw × trunc.msg so that cavity = raw.
arena.vars.set(trunc[e].diff, raw * trunc[e].msg);
let d = trunc[e].propagate(&mut arena.vars);
for (e, tf) in trunc[..n_diffs.saturating_sub(1)].iter_mut().enumerate() {
let pw = arena.team_prior[e] * arena.lhood_lose[e];
let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1];
let raw = pw - pl;
arena.vars.set(tf.diff, raw * tf.msg);
let d = tf.propagate(&mut arena.vars);
step = tuple_max(step, d);
let new_ll = post_win!(e) - trunc[e].msg;
step = tuple_max(step, lhood_lose[e + 1].delta(new_ll));
lhood_lose[e + 1] = new_ll;
let new_ll = pw - tf.msg;
step = tuple_max(step, arena.lhood_lose[e + 1].delta(new_ll));
arena.lhood_lose[e + 1] = new_ll;
}
// Backward sweep: diffs n_diffs-1 .. 1 (reverse, all but the first).
for e in (1..n_diffs).rev() {
let raw = post_win!(e) - post_lose!(e + 1);
arena.vars.set(trunc[e].diff, raw * trunc[e].msg);
let d = trunc[e].propagate(&mut arena.vars);
for (rev_i, tf) in trunc[1..].iter_mut().rev().enumerate() {
let e = n_diffs - 1 - rev_i;
let pw = arena.team_prior[e] * arena.lhood_lose[e];
let pl = arena.team_prior[e + 1] * arena.lhood_win[e + 1];
let raw = pw - pl;
arena.vars.set(tf.diff, raw * tf.msg);
let d = tf.propagate(&mut arena.vars);
step = tuple_max(step, d);
let new_lw = post_lose!(e + 1) + trunc[e].msg;
step = tuple_max(step, lhood_win[e].delta(new_lw));
lhood_win[e] = new_lw;
let new_lw = pl + tf.msg;
step = tuple_max(step, arena.lhood_win[e].delta(new_lw));
arena.lhood_win[e] = new_lw;
}
iter += 1;
}
// Special case: exactly 1 diff (2-team game). The loop body is empty
// for this case (both ranges are empty), so we run the factor once here.
// Special case: exactly 1 diff (2-team game); loop body was empty.
if n_diffs == 1 {
let raw = post_win!(0) - post_lose!(1);
let raw = (arena.team_prior[0] * arena.lhood_lose[0])
- (arena.team_prior[1] * arena.lhood_win[1]);
arena.vars.set(trunc[0].diff, raw * trunc[0].msg);
trunc[0].propagate(&mut arena.vars);
}
// Boundary updates: close the chain at both ends.
if n_diffs > 0 {
lhood_win[0] = post_lose!(1) + trunc[0].msg;
lhood_lose[n_teams - 1] = post_win!(n_teams - 2) - trunc[n_diffs - 1].msg;
let pl1 = arena.team_prior[1] * arena.lhood_win[1];
arena.lhood_win[0] = pl1 + trunc[0].msg;
let pw_last = arena.team_prior[n_teams - 2] * arena.lhood_lose[n_teams - 2];
arena.lhood_lose[n_teams - 1] = pw_last - trunc[n_diffs - 1].msg;
}
// Evidence = product of per-diff evidences (each cached on first propagation).
@@ -187,15 +176,10 @@ impl<'a, D: Drift> Game<'a, D> {
.map(|t| t.evidence_cached.unwrap_or(1.0))
.product();
// Per-team "likelihood" = product of incoming RankDiff messages.
let m_t_ft: Vec<Gaussian> = (0..n_teams)
.map(|si| lhood_win[si] * lhood_lose[si])
.collect();
// Inverse permutation: inv[orig_i] = sorted_i (O(n), avoids clone + O(n²) search).
let mut inv = vec![0usize; n_teams];
// Inverse permutation: inv_buf[orig_i] = sorted_i.
arena.inv_buf.resize(n_teams, 0);
for (si, &orig_i) in arena.sort_buf.iter().enumerate() {
inv[orig_i] = si;
arena.inv_buf[orig_i] = si;
}
self.likelihoods = self
@@ -204,8 +188,8 @@ impl<'a, D: Drift> Game<'a, D> {
.zip(self.weights.iter())
.enumerate()
.map(|(orig_i, (players, weights))| {
let sorted_i = inv[orig_i];
let m = m_t_ft[sorted_i];
let si = arena.inv_buf[orig_i];
let m = arena.lhood_win[si] * arena.lhood_lose[si];
let performance = players
.iter()
.zip(weights.iter())