diff --git a/src/time_slice.rs b/src/time_slice.rs index cb1eed2..988d072 100644 --- a/src/time_slice.rs +++ b/src/time_slice.rs @@ -85,13 +85,13 @@ pub(crate) struct Event { } /// Output of a single event's inference pass — ready to apply back to shared state. +/// +/// Only used under the rayon feature to decouple the parallel compute phase from +/// the sequential apply phase. Without rayon the direct-write path is used instead. +#[cfg(feature = "rayon")] struct EventOutput { - /// New per-team/per-item likelihoods (same shape as `event.teams`). likelihoods: Vec>, evidence: f64, - /// (agent index, new skill likelihood) pairs for the sequential apply step - /// that updates `SkillStore`. Computed while holding `&SkillStore` so the - /// caller only needs `&mut SkillStore` when writing back. skill_updates: Vec<(Index, Gaussian)>, } @@ -130,6 +130,10 @@ impl Event { /// Compute the inference update for this event, returning an `EventOutput` /// that describes the mutations to apply. Takes only shared references so /// it can run inside a parallel closure. + /// + /// Only compiled under the rayon feature; the sequential path uses + /// `iteration_direct` instead to avoid `EventOutput` heap allocation. + #[cfg(feature = "rayon")] fn compute>( &self, skills: &SkillStore, @@ -141,7 +145,6 @@ impl Event { let result = self.outputs(); let g = Game::ranked_with_arena(teams, &result, &self.weights, p_draw, &mut arena); - // Pre-compute new skill likelihoods while we still hold &skills. let mut skill_updates: Vec<(Index, Gaussian)> = Vec::new(); for (t, team) in self.teams.iter().enumerate() { for (i, item) in team.items.iter().enumerate() { @@ -163,6 +166,7 @@ impl Event { /// Apply an `EventOutput` back onto this event's mutable item likelihoods /// and evidence. The `SkillStore` updates are applied separately by the /// caller to avoid conflicting borrows. + #[cfg(feature = "rayon")] fn apply_output(&mut self, output: &EventOutput) { self.evidence = output.evidence; for (t, team) in self.teams.iter_mut().enumerate() { @@ -171,6 +175,33 @@ impl Event { } } } + + /// Direct in-loop update: mutates self and `skills` inline with no + /// intermediate allocation. Used by the sequential (no rayon) sweep path + /// to match T2 performance. + #[cfg(not(feature = "rayon"))] + fn iteration_direct>( + &mut self, + skills: &mut SkillStore, + agents: &CompetitorStore, + p_draw: f64, + arena: &mut ScratchArena, + ) { + let teams = self.within_priors(false, false, skills, agents); + let result = self.outputs(); + let g = Game::ranked_with_arena(teams, &result, &self.weights, p_draw, arena); + + for (t, team) in self.teams.iter_mut().enumerate() { + for (i, item) in team.items.iter_mut().enumerate() { + let old_likelihood = skills.get(item.agent).unwrap().likelihood; + let new_likelihood = (old_likelihood / item.likelihood) * g.likelihoods[t][i]; + skills.get_mut(item.agent).unwrap().likelihood = new_likelihood; + item.likelihood = g.likelihoods[t][i]; + } + } + + self.evidence = g.evidence; + } } #[derive(Debug)] @@ -355,40 +386,24 @@ impl TimeSlice { /// Full event sweep using the color-group partition. Colors are processed /// sequentially; within each color the inner loop is parallel under rayon. + #[cfg(feature = "rayon")] fn sweep_color_groups>(&mut self, agents: &CompetitorStore) { - // We need &self.skills (immutable) and &mut self.events (mutable) at the - // same time. Rust allows this because they are distinct struct fields. - // The parallel closure captures &self.skills and &self.p_draw by shared - // ref; it returns owned EventOutput values that we apply sequentially. + use rayon::prelude::*; + for color_idx in 0..self.color_groups.groups.len() { if self.color_groups.groups[color_idx].is_empty() { continue; } let range = self.color_groups.color_range(color_idx); - // Compute phase — parallel under rayon, sequential otherwise. - // Borrows: &self.skills and &agents are shared refs captured by the closure; - // &mut self.events[range] is the mutable slice for par_iter_mut. let p_draw = self.p_draw; let skills: &SkillStore = &self.skills; - #[cfg(feature = "rayon")] - let outputs: Vec = { - use rayon::prelude::*; - self.events[range.clone()] - .par_iter() - .map(|ev| ev.compute(skills, agents, p_draw)) - .collect() - }; - - #[cfg(not(feature = "rayon"))] let outputs: Vec = self.events[range.clone()] - .iter() + .par_iter() .map(|ev| ev.compute(skills, agents, p_draw)) .collect(); - // Apply phase — sequential: write skill likelihoods back to self.skills, - // then update per-event item likelihoods and evidence. for (ev, output) in self.events[range].iter_mut().zip(outputs.iter()) { for &(agent, new_skill_lhood) in &output.skill_updates { self.skills.get_mut(agent).unwrap().likelihood = new_skill_lhood; @@ -398,6 +413,27 @@ impl TimeSlice { } } + /// Full event sweep using the color-group partition, sequential direct-write path. + /// Events within each color group are updated inline — no EventOutput allocation — + /// matching the T2 performance profile. + #[cfg(not(feature = "rayon"))] + fn sweep_color_groups>(&mut self, agents: &CompetitorStore) { + for color_idx in 0..self.color_groups.groups.len() { + if self.color_groups.groups[color_idx].is_empty() { + continue; + } + let range = self.color_groups.color_range(color_idx); + + // Borrow self.events as a mutable slice for this color range. + // self.skills and self.arena are separate fields — disjoint borrows are + // allowed within a single method body. + let p_draw = self.p_draw; + for ev in &mut self.events[range] { + ev.iteration_direct(&mut self.skills, agents, p_draw, &mut self.arena); + } + } + } + #[allow(dead_code)] pub(crate) fn convergence>(&mut self, agents: &CompetitorStore) -> usize { let epsilon = 1e-6;