T4 (MarginFactor): scored outcomes via Gaussian-margin EP evidence
Adds soft Gaussian-observation evidence on the per-pair diff variable,
enabling continuous score margins as a richer alternative to ranks.
Public API:
- `Outcome::Scored([scores])` (non-breaking enum extension under
`#[non_exhaustive]`).
- `Game::scored(teams, outcome, options)` constructor parallel to
`Game::ranked`.
- `EventBuilder::scores([...])` fluent helper.
- `HistoryBuilder::score_sigma(σ)` knob (default 1.0, validated > 0).
- `GameOptions::score_sigma`.
- `EventKind` re-exported from `lib.rs` (annotated `#[non_exhaustive]`).
- New `InferenceError::InvalidParameter { name, value }` variant.
Internals:
- `MarginFactor` (`factor/margin.rs`): Gaussian observation factor that
closes in one EP step; cavity-cached log-evidence mirrors `TruncFactor`.
- `BuiltinFactor::Margin` dispatch arm.
- `DiffFactor` enum in `game.rs` lets `Game::likelihoods` and the new
`likelihoods_scored` share the per-pair link abstraction.
- Per-event `EventKind { Ranked, Scored { score_sigma } }` routed through
`TimeSlice::add_events`, `iteration_direct`, and `log_evidence`.
Tests: 88 lib + 27 integration (4 new in `tests/scored.rs`); existing
goldens byte-identical. Bench: `benches/scored.rs` baseline ~960µs for
60 events × 20-player pool with default convergence.
Plan: docs/superpowers/plans/2026-04-27-t4-margin-factor.md
Spec item marked Done.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
use criterion::{Criterion, criterion_group, criterion_main};
|
||||
use trueskill_tt::{
|
||||
BETA, Competitor, GAMMA, KeyTable, MU, P_DRAW, Rating, SIGMA, TimeSlice, drift::ConstantDrift,
|
||||
gaussian::Gaussian, storage::CompetitorStore,
|
||||
BETA, Competitor, EventKind, GAMMA, KeyTable, MU, P_DRAW, Rating, SIGMA, TimeSlice,
|
||||
drift::ConstantDrift, gaussian::Gaussian, storage::CompetitorStore,
|
||||
};
|
||||
|
||||
fn criterion_benchmark(criterion: &mut Criterion) {
|
||||
@@ -33,8 +33,10 @@ fn criterion_benchmark(criterion: &mut Criterion) {
|
||||
weights.push(vec![vec![1.0], vec![1.0]]);
|
||||
}
|
||||
|
||||
let kinds = vec![EventKind::Ranked; composition.len()];
|
||||
|
||||
let mut time_slice = TimeSlice::new(1, P_DRAW);
|
||||
time_slice.add_events(composition, results, weights, &agents);
|
||||
time_slice.add_events(composition, results, weights, kinds, &agents);
|
||||
|
||||
criterion.bench_function("Batch::iteration", |b| {
|
||||
b.iter(|| time_slice.iteration(0, &agents))
|
||||
|
||||
38
benches/scored.rs
Normal file
38
benches/scored.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
use criterion::{Criterion, criterion_group, criterion_main};
|
||||
use smallvec::smallvec;
|
||||
use trueskill_tt::{ConstantDrift, Event, History, Member, Outcome, Team};
|
||||
|
||||
fn bench_scored_history(c: &mut Criterion) {
|
||||
c.bench_function("scored_history_60_events_30_iter", |bencher| {
|
||||
bencher.iter(|| {
|
||||
let mut h: History<i64, ConstantDrift, _, String> = History::builder_with_key()
|
||||
.mu(25.0)
|
||||
.sigma(25.0 / 3.0)
|
||||
.beta(25.0 / 6.0)
|
||||
.drift(ConstantDrift(0.03))
|
||||
.score_sigma(2.0)
|
||||
.build();
|
||||
|
||||
let mut events: Vec<Event<i64, String>> = Vec::with_capacity(60);
|
||||
for i in 0..60 {
|
||||
let a = format!("p{}", i % 20);
|
||||
let b = format!("p{}", (i + 7) % 20);
|
||||
let s_a = (i as f64 * 0.3).sin().abs() * 21.0;
|
||||
let s_b = (i as f64 * 0.3).cos().abs() * 21.0;
|
||||
events.push(Event {
|
||||
time: 1 + (i / 6) as i64,
|
||||
teams: smallvec![
|
||||
Team::with_members([Member::new(a)]),
|
||||
Team::with_members([Member::new(b)]),
|
||||
],
|
||||
outcome: Outcome::scores([s_a, s_b]),
|
||||
});
|
||||
}
|
||||
h.add_events(events).unwrap();
|
||||
h.converge().unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_scored_history);
|
||||
criterion_main!(benches);
|
||||
14
benches/scored_baseline.txt
Normal file
14
benches/scored_baseline.txt
Normal file
@@ -0,0 +1,14 @@
|
||||
Finished `bench` profile [optimized + debuginfo] target(s) in 0.02s
|
||||
Running benches/scored.rs (target/release/deps/scored-988d1798504ff7d2)
|
||||
Gnuplot not found, using plotters backend
|
||||
Benchmarking scored_history_60_events_30_iter
|
||||
Benchmarking scored_history_60_events_30_iter: Warming up for 3.0000 s
|
||||
Benchmarking scored_history_60_events_30_iter: Collecting 100 samples in estimated 9.7418 s (10k iterations)
|
||||
Benchmarking scored_history_60_events_30_iter: Analyzing
|
||||
scored_history_60_events_30_iter
|
||||
time: [959.36 µs 962.68 µs 966.13 µs]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
1 (1.00%) low mild
|
||||
5 (5.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
|
||||
Reference in New Issue
Block a user