Files
trueskill-tt/tests/game.rs
Anders Olsson 8b53cacd64 T4 (MarginFactor): scored outcomes via Gaussian-margin EP evidence
Adds soft Gaussian-observation evidence on the per-pair diff variable,
enabling continuous score margins as a richer alternative to ranks.

Public API:
- `Outcome::Scored([scores])` (non-breaking enum extension under
  `#[non_exhaustive]`).
- `Game::scored(teams, outcome, options)` constructor parallel to
  `Game::ranked`.
- `EventBuilder::scores([...])` fluent helper.
- `HistoryBuilder::score_sigma(σ)` knob (default 1.0, validated > 0).
- `GameOptions::score_sigma`.
- `EventKind` re-exported from `lib.rs` (annotated `#[non_exhaustive]`).
- New `InferenceError::InvalidParameter { name, value }` variant.

Internals:
- `MarginFactor` (`factor/margin.rs`): Gaussian observation factor that
  closes in one EP step; cavity-cached log-evidence mirrors `TruncFactor`.
- `BuiltinFactor::Margin` dispatch arm.
- `DiffFactor` enum in `game.rs` lets `Game::likelihoods` and the new
  `likelihoods_scored` share the per-pair link abstraction.
- Per-event `EventKind { Ranked, Scored { score_sigma } }` routed through
  `TimeSlice::add_events`, `iteration_direct`, and `log_evidence`.

Tests: 88 lib + 27 integration (4 new in `tests/scored.rs`); existing
goldens byte-identical.  Bench: `benches/scored.rs` baseline ~960µs for
60 events × 20-player pool with default convergence.

Plan: docs/superpowers/plans/2026-04-27-t4-margin-factor.md
Spec item marked Done.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 08:47:36 +02:00

98 lines
2.5 KiB
Rust

use trueskill_tt::{
ConstantDrift, ConvergenceOptions, Game, GameOptions, Gaussian, InferenceError, Outcome, Rating,
};
type R = Rating<i64, ConstantDrift>;
fn default_rating() -> R {
R::new(
Gaussian::from_ms(25.0, 25.0 / 3.0),
25.0 / 6.0,
ConstantDrift(25.0 / 300.0),
)
}
#[test]
fn game_ranked_1v1_golden() {
let a = default_rating();
let b = default_rating();
let g = Game::<i64, _>::ranked(
&[&[a], &[b]],
Outcome::winner(0, 2),
&GameOptions::default(),
)
.unwrap();
let p = g.posteriors();
assert!(p[0][0].mu() > 25.0);
assert!(p[1][0].mu() < 25.0);
assert!((p[0][0].sigma() - p[1][0].sigma()).abs() < 1e-6);
}
#[test]
fn game_one_v_one_shortcut() {
let a = default_rating();
let b = default_rating();
let (a_post, b_post) = Game::<i64, _>::one_v_one(&a, &b, Outcome::winner(0, 2)).unwrap();
assert!(a_post.mu() > 25.0);
assert!(b_post.mu() < 25.0);
}
#[test]
fn game_ranked_rejects_bad_p_draw() {
let a = R::new(Gaussian::default(), 1.0, ConstantDrift(0.0));
let err = Game::<i64, _>::ranked(
&[&[a], &[a]],
Outcome::winner(0, 2),
&GameOptions {
p_draw: 1.5,
score_sigma: 1.0,
convergence: ConvergenceOptions::default(),
},
)
.unwrap_err();
assert!(matches!(err, InferenceError::InvalidProbability { .. }));
}
#[test]
fn game_ranked_rejects_mismatched_ranks() {
let a = R::new(Gaussian::default(), 1.0, ConstantDrift(0.0));
let err = Game::<i64, _>::ranked(
&[&[a], &[a]],
Outcome::ranking([0, 1, 2]),
&GameOptions::default(),
)
.unwrap_err();
assert!(matches!(err, InferenceError::MismatchedShape { .. }));
}
#[test]
fn game_free_for_all_three_players() {
let a = default_rating();
let b = default_rating();
let c = default_rating();
let g = Game::<i64, _>::free_for_all(
&[&a, &b, &c],
Outcome::ranking([0, 1, 2]),
&GameOptions::default(),
)
.unwrap();
let p = g.posteriors();
assert_eq!(p.len(), 3);
assert!(p[0][0].mu() > p[1][0].mu());
assert!(p[1][0].mu() > p[2][0].mu());
}
#[test]
fn game_log_evidence_is_finite() {
let a = default_rating();
let b = default_rating();
let g = Game::<i64, _>::ranked(
&[&[a], &[b]],
Outcome::winner(0, 2),
&GameOptions::default(),
)
.unwrap();
assert!(g.log_evidence().is_finite());
assert!(g.log_evidence() < 0.0);
}