diff --git a/Cargo.toml b/Cargo.toml index 37bd60c..129ceb1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,13 @@ authors = ["Anders Olsson "] edition = "2018" [dependencies] -ndarray = "0.13" \ No newline at end of file +expm = "0.1" +ndarray = "0.13" +ndarray-linalg = { version = "0.12" } +cblas = "0.2" +lapacke = "0.2" +statrs = "0.10" +ordered-float = "1.0" +rand = "0.6" +rand_xoshiro = "0.1" +openblas-src = { version = "0.8", features = ["static"] } \ No newline at end of file diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..8190413 --- /dev/null +++ b/build.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +export LIBRARY_PATH="$LIBRARY_PATH:/usr/local/gfortran/lib" + +cargo run --example basic diff --git a/examples/basic.rs b/examples/basic.rs index a35e7b4..f452799 100644 --- a/examples/basic.rs +++ b/examples/basic.rs @@ -1,3 +1,5 @@ +extern crate openblas_src; + use kickscore as ks; fn main() { diff --git a/src/condest.rs b/src/condest.rs new file mode 100644 index 0000000..3539662 --- /dev/null +++ b/src/condest.rs @@ -0,0 +1,973 @@ +//! This crate implements the matrix 1-norm estimator by [Higham and Tisseur]. +//! +//! [Higham and Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf +use std::cmp; +use std::collections::BTreeSet; +use std::slice; + +use ndarray::{prelude::*, s, ArrayBase, Data, DataMut, Dimension, Ix1, Ix2}; +use ordered_float::NotNan; +use rand::{thread_rng, Rng, SeedableRng}; +use rand_xoshiro::Xoshiro256StarStar; + +pub struct Normest1 { + n: usize, + t: usize, + rng: Xoshiro256StarStar, + x_matrix: Array2, + y_matrix: Array2, + z_matrix: Array2, + w_vector: Array1, + sign_matrix: Array2, + sign_matrix_old: Array2, + column_is_parallel: Vec, + indices: Vec, + indices_history: BTreeSet, + h: Vec>, +} + +/// A trait to generalize over 1-norm estimates of a matrix `A`, matrix powers `A^m`, +/// or matrix products `A1 * A2 * ... * An`. +/// +/// In the 1-norm estimator, one repeatedly constructs a matrix-matrix product between some n×n +/// matrix X and some other n×t matrix Y. If one wanted to estimate the 1-norm of a matrix m times +/// itself, X^m, it might thus be computationally less expensive to repeatedly apply +/// X * ( * ( X ... ( X * Y ) rather than to calculate Z = X^m = X * X * ... * X and then apply Z * +/// Y. In the first case, one has several matrix-matrix multiplications with complexity O(m*n*n*t), +/// while in the latter case one has O(m*n*n*n) (plus one more O(n*n*t)). +/// +/// So in case of t << n, it is cheaper to repeatedly apply matrix multiplication to the smaller +/// matrix on the RHS, rather than to construct one definite matrix on the LHS. Of course, this is +/// modified by the number of iterations needed when performing the norm estimate, sustained +/// performance of the matrix multiplication method used, etc. +/// +/// It is at the designation of the user to check what is more efficient: to pass in one definite +/// matrix or choose the alternative route described here. +trait LinearOperator { + fn multiply_matrix( + &self, + b: &mut ArrayBase, + c: &mut ArrayBase, + transpose: bool, + ) where + S: DataMut; +} + +impl LinearOperator for ArrayBase +where + S1: Data, +{ + fn multiply_matrix( + &self, + b: &mut ArrayBase, + c: &mut ArrayBase, + transpose: bool, + ) where + S2: DataMut, + { + let (n_rows, n_cols) = self.dim(); + assert_eq!( + n_rows, n_cols, + "Number of rows and columns does not match: `self` has to be a square matrix" + ); + let n = n_rows; + + let (b_n, b_t) = b.dim(); + let (c_n, c_t) = b.dim(); + + assert_eq!( + n, b_n, + "Number of rows of b not equal to number of rows of `self`." + ); + assert_eq!( + n, c_n, + "Number of rows of c not equal to number of rows of `self`." + ); + + assert_eq!( + b_t, c_t, + "Number of columns of b not equal to number of columns of c." + ); + + let t = b_t; + + let (a_slice, a_layout) = + as_slice_with_layout(self).expect("Matrix `self` not contiguous."); + let (b_slice, b_layout) = as_slice_with_layout(b).expect("Matrix `b` not contiguous."); + let (c_slice, c_layout) = as_slice_with_layout_mut(c).expect("Matrix `c` not contiguous."); + + assert_eq!(a_layout, b_layout); + assert_eq!(a_layout, c_layout); + + let layout = a_layout; + + let a_transpose = if transpose { + cblas::Transpose::Ordinary + } else { + cblas::Transpose::None + }; + + unsafe { + cblas::dgemm( + layout, + a_transpose, + cblas::Transpose::None, + n as i32, + t as i32, + n as i32, + 1.0, + a_slice, + n as i32, + b_slice, + t as i32, + 0.0, + c_slice, + t as i32, + ) + } + } +} + +impl LinearOperator for [&ArrayBase] +where + S1: Data, +{ + fn multiply_matrix( + &self, + b: &mut ArrayBase, + c: &mut ArrayBase, + transpose: bool, + ) where + S2: DataMut, + { + if self.len() > 0 { + let mut reversed; + let mut forward; + + // TODO: Investigate, if an enum instead of a trait object might be more performant. + // This probably doesn't matter for large matrices, but could have a measurable impact + // on small ones. + let a_iter: &mut dyn DoubleEndedIterator = if transpose { + reversed = self.iter().rev(); + &mut reversed + } else { + forward = self.iter(); + &mut forward + }; + let a = a_iter.next().unwrap(); // Ok because of if condition + a.multiply_matrix(b, c, transpose); + + // NOTE: The swap in the loop body makes use of the fact that in all instances where + // `multiply_matrix` is used, the values potentially stored in `b` are not required + // anymore. + for a in a_iter { + std::mem::swap(b, c); + a.multiply_matrix(b, c, transpose); + } + } + } +} + +impl LinearOperator for (&ArrayBase, usize) +where + S1: Data, +{ + fn multiply_matrix( + &self, + b: &mut ArrayBase, + c: &mut ArrayBase, + transpose: bool, + ) where + S2: DataMut, + { + let a = self.0; + let m = self.1; + if m > 0 { + a.multiply_matrix(b, c, transpose); + for _ in 1..m { + std::mem::swap(b, c); + self.0.multiply_matrix(b, c, transpose); + } + } + } +} + +impl Normest1 { + pub fn new(n: usize, t: usize) -> Self { + assert!( + t <= n, + "Cannot have more iteration columns t than columns in the matrix." + ); + let rng = + Xoshiro256StarStar::from_rng(&mut thread_rng()).expect("Rng initialization failed."); + let x_matrix = unsafe { Array2::::uninitialized((n, t)) }; + let y_matrix = unsafe { Array2::::uninitialized((n, t)) }; + let z_matrix = unsafe { Array2::::uninitialized((n, t)) }; + + let w_vector = unsafe { Array1::uninitialized(n) }; + + let sign_matrix = unsafe { Array2::::uninitialized((n, t)) }; + let sign_matrix_old = unsafe { Array2::::uninitialized((n, t)) }; + + let column_is_parallel = vec![false; t]; + + let indices = (0..n).collect(); + let indices_history = BTreeSet::new(); + + let h = vec![unsafe { NotNan::unchecked_new(0.0) }; n]; + + Normest1 { + n, + t, + rng, + x_matrix, + y_matrix, + z_matrix, + w_vector, + sign_matrix, + sign_matrix_old, + column_is_parallel, + indices, + indices_history, + h, + } + } + + fn calculate(&mut self, a_linear_operator: &L, itmax: usize) -> f64 + where + L: LinearOperator + ?Sized, + { + assert!(itmax > 1, "normest1 is undefined for iterations itmax < 2"); + + // Explicitly empty the index history; all other quantities will be overwritten at some + // point. + self.indices_history.clear(); + + let n = self.n; + let t = self.t; + + let sample = [-1., 1.0]; + + // “We now explain our choice of starting matrix. We take the first column of X to be the + // vector of 1s, which is the starting vector used in Algorithm 2.1. This has the advantage + // that for a matrix with nonnegative elements the algorithm converges with an exact estimate + // on the second iteration, and such matrices arise in applications, for example as a + // stochastic matrix or as the inverse of an M -matrix.” + // + // “The remaining columns are chosen as rand {− 1 , 1 } , with a check for and correction of + // parallel columns, exactly as for S in the body of the algorithm. We choose random vectors + // because it is difficult to argue for any particular fixed vectors and because randomness + // lessens the importance of counterexamples (see the comments in the next section).” + { + let rng_mut = &mut self.rng; + self.x_matrix + .mapv_inplace(|_| sample[rng_mut.gen_range(0, sample.len())]); + self.x_matrix.column_mut(0).fill(1.); + } + + // Resample the x_matrix to make sure no columns are parallel + find_parallel_columns_in( + &self.x_matrix, + &mut self.y_matrix, + &mut self.column_is_parallel, + ); + for (i, is_parallel) in self.column_is_parallel.iter().enumerate() { + if *is_parallel { + resample_column(&mut self.x_matrix, i, &mut self.rng, &sample); + } + } + + // Set all columns to unit vectors + self.x_matrix.mapv_inplace(|x| x / n as f64); + + let mut estimate = 0.0; + let mut best_index = 0; + + 'optimization_loop: for k in 0..itmax { + // Y = A X + a_linear_operator.multiply_matrix(&mut self.x_matrix, &mut self.y_matrix, false); + + // est = max{‖Y(:,j)‖₁ : j = 1:t} + let (max_norm_index, max_norm) = matrix_onenorm_with_index(&self.y_matrix); + + // if est > est_old or k=2 + if max_norm > estimate || k == 1 { + // ind_best = indⱼ where est = ‖Y(:,j)‖₁, w = Y(:, ind_best) + estimate = max_norm; + best_index = self.indices[max_norm_index]; + self.w_vector.assign(&self.y_matrix.column(max_norm_index)); + } else if k > 1 && max_norm <= estimate { + break 'optimization_loop; + } + + if k >= itmax { + break 'optimization_loop; + } + + // S = sign(Y) + assign_signum_of_array(&self.y_matrix, &mut self.sign_matrix); + + // TODO: Combine the test checking for parallelity between _all_ columns between S + // and S_old with the “if t > 1” test below. + // + // > If every column of S is parallel to a column of Sold, goto (6), end + // + // NOTE: We are reusing `y_matrix` here as a temporary value. + if are_all_columns_parallel_between( + &self.sign_matrix_old, + &self.sign_matrix, + &mut self.y_matrix, + ) { + break 'optimization_loop; + } + + // FIXME: Is an explicit if condition here necessary? + if t > 1 { + // > Ensure that no column of S is parallel to another column of S + // > or to a column of Sold by replacing columns of S by rand{-1,+1} + // + // NOTE: We are reusing `y_matrix` here as a temporary value. + resample_parallel_columns( + &mut self.sign_matrix, + &self.sign_matrix_old, + &mut self.y_matrix, + &mut self.column_is_parallel, + &mut self.rng, + &sample, + ); + } + + // > est_old = est, Sold = S + // NOTE: Other than in the original algorithm, we store the sign matrix at this point + // already. This way, we can reuse the sign matrix as additional workspace which is + // useful when performing matrix multiplication with A^m or A1 A2 ... An (see the + // description of the LinearOperator trait for explanation). + // + // NOTE: We don't “save” the old estimate, because we are using max_norm as another name + // for the new estimate instead of overwriting/reusing est. + self.sign_matrix_old.assign(&self.sign_matrix); + + // Z = A^T S + a_linear_operator.multiply_matrix(&mut self.sign_matrix, &mut self.z_matrix, true); + + // hᵢ= ‖Z(i,:)‖_∞ + let mut max_h = 0.0; + for (row, h_element) in self.z_matrix.genrows().into_iter().zip(self.h.iter_mut()) { + let h = vector_maxnorm(&row); + max_h = if h > max_h { h } else { max_h }; + // Convert f64 to NotNan for using sort_unstable_by below + *h_element = h.into(); + } + + // TODO: This test for equality needs an approximate equality test instead. + if k > 0 && max_h == self.h[best_index].into() { + break 'optimization_loop; + } + + // > Sort h so that h_1 >= ... >= h_n and re-order correspondingly. + // NOTE: h itself doesn't need to be reordered. Only the order of + // the indices is relevant. + { + let h_ref = &self.h; + self.indices + .sort_unstable_by(|i, j| h_ref[*j].cmp(&h_ref[*i])); + } + + self.x_matrix.fill(0.0); + if t > 1 { + // > Replace ind(1:t) by the first t indices in ind(1:n) that are not in ind_hist. + // + // > X(:, j) = e_ind_j, j = 1:t + // + // > ind_hist = [ind_hist ind(1:t)] + // + // NOTE: It's not actually needed to operate on the `indices` vector. What's important + // is that the history of indices, `indices_history`, gets updated with visited indices, + // and that each column of `x_matrix` is assigned that unit vector that is defined by the + // respective index. + // + // If so many indices have already been used that `n_cols - indices_history.len() < t` + // (which means that we have less than `t` unused indices remaining), we have to use a few + // historical indices when filling up the columns in `x_matrix`. For that, we put the + // historical indices after the fresh indices, but otherwise keep the order induced by `h` + // above. + let fresh_indices = cmp::min(t, n - self.indices_history.len()); + if fresh_indices == 0 { + break 'optimization_loop; + } + let mut current_column_fresh = 0; + let mut current_column_historical = fresh_indices; + let mut index_iterator = self.indices.iter(); + + let mut all_first_t_in_history = true; + // First, iterate over the first t sorted indices. + for i in (&mut index_iterator).take(t) { + if !self.indices_history.contains(i) { + all_first_t_in_history = false; + self.x_matrix[(*i, current_column_fresh)] = 1.0; + current_column_fresh += 1; + self.indices_history.insert(*i); + } else if current_column_historical < t { + self.x_matrix[(*i, current_column_historical)] = 1.0; + current_column_historical += 1; + } + } + + // > if ind(1:t) is contained in ind_hist, goto (6), end + if all_first_t_in_history { + break 'optimization_loop; + } + + // Iterate over the remaining indices + 'fill_x: for i in index_iterator { + if current_column_fresh >= t { + break 'fill_x; + } + if !self.indices_history.contains(i) { + self.x_matrix[(*i, current_column_fresh)] = 1.0; + current_column_fresh += 1; + self.indices_history.insert(*i); + } else if current_column_historical < t { + self.x_matrix[(*i, current_column_historical)] = 1.0; + current_column_historical += 1; + } + } + } + } + + estimate + } + + /// Estimate the 1-norm of matrix `a` using up to `itmax` iterations. + pub fn normest1(&mut self, a: &ArrayBase, itmax: usize) -> f64 + where + S: Data, + { + self.calculate(a, itmax) + } + + /// Estimate the 1-norm of a marix `a` to the power `m` up to `itmax` iterations. + pub fn normest1_pow(&mut self, a: &ArrayBase, m: usize, itmax: usize) -> f64 + where + S: Data, + { + self.calculate(&(a, m), itmax) + } + + /// Estimate the 1-norm of a product of matrices `a1 a2 ... an` up to `itmax` iterations. + pub fn normest1_prod(&mut self, aprod: &[&ArrayBase], itmax: usize) -> f64 + where + S: Data, + { + self.calculate(aprod, itmax) + } +} + +/// Estimates the 1-norm of matrix `a`. +/// +/// The parameter `t` is the number of vectors that have to fulfill some bound. See [Higham, +/// Tisseur] for more information. `itmax` is the maximum number of sweeps permitted. +/// +/// **NOTE:** This function allocates on every call. If you want to repeatedly estimate the +/// 1-norm on matrices of the same size, construct a [`Normest1`] first, and call its methods. +/// +/// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf +/// [`Normest1`]: struct.Normest1.html +pub fn normest1(a_matrix: &Array2, t: usize, itmax: usize) -> f64 { + // Assume the matrix is square and take the columns as n. If it's not square, the assertion in + // normest.calculate will fail. + let n = a_matrix.dim().1; + let mut normest1 = Normest1::new(n, t); + normest1.normest1(a_matrix, itmax) +} + +/// Estimates the 1-norm of a matrix `a` to the power `m`, `a^m`. +/// +/// The parameter `t` is the number of vectors that have to fulfill some bound. See [Higham, +/// Tisseur] for more information. `itmax` is the maximum number of sweeps permitted. +/// +/// **NOTE:** This function allocates on every call. If you want to repeatedly estimate the +/// 1-norm on matrices of the same size, construct a [`Normest1`] first, and call its methods. +/// +/// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf +pub fn normest1_pow(a_matrix: &Array2, m: usize, t: usize, itmax: usize) -> f64 { + // Assume the matrix is square and take the columns as n. If it's not square, the assertion in + // normest.calculate will fail. + let n = a_matrix.dim().1; + let mut normest1 = Normest1::new(n, t); + normest1.normest1_pow(a_matrix, m, itmax) +} + +/// Estimates the 1-norm of a product of matrices `a1`, `a2`, ..., `an` passed in as a slice of +/// references. +/// +/// The parameter `t` is the number of vectors that have to fulfill some bound. See [Higham, +/// Tisseur] for more information. `itmax` is the maximum number of sweeps permitted. +/// +/// **NOTE:** This function allocates on every call. If you want to repeatedly estimate the +/// 1-norm on matrices of the same size, construct a [`Normest1`] first, and call its methods. +/// +/// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf +pub fn normest1_prod(a_matrices: &[&Array2], t: usize, itmax: usize) -> f64 { + assert!(a_matrices.len() > 0); + let n = a_matrices[0].dim().1; + let mut normest1 = Normest1::new(n, t); + normest1.normest1_prod(a_matrices, itmax) +} + +/// Assigns the sign of matrix `a` to matrix `b`. +/// +/// Panics if matrices `a` and `b` have different shape and strides, or if either underlying array is +/// non-contiguous. This is to make sure that the iteration order over the matrices is the same. +fn assign_signum_of_array(a: &ArrayBase, b: &mut ArrayBase) +where + S1: Data, + S2: DataMut, + D: Dimension, +{ + assert_eq!(a.strides(), b.strides()); + let (a_slice, a_layout) = as_slice_with_layout(a).expect("Matrix `a` is not contiguous."); + let (b_slice, b_layout) = as_slice_with_layout_mut(b).expect("Matrix `b` is not contiguous."); + assert_eq!(a_layout, b_layout); + + signum_of_slice(a_slice, b_slice); +} + +fn signum_of_slice(source: &[f64], destination: &mut [f64]) { + for (s, d) in source.iter().zip(destination) { + *d = s.signum(); + } +} + +/// Calculate the onenorm of a vector (an `ArrayBase` with dimension `Ix1`). +fn vector_onenorm(a: &ArrayBase) -> f64 +where + S: Data, +{ + let stride = a.strides()[0]; + assert!(stride >= 0); + let stride = stride as usize; + let n_elements = a.len(); + let a_slice = { + let a = a.as_ptr(); + let total_len = n_elements * stride; + unsafe { slice::from_raw_parts(a, total_len) } + }; + + unsafe { cblas::dasum(n_elements as i32, a_slice, stride as i32) } +} + +/// Calculate the maximum norm of a vector (an `ArrayBase` with dimension `Ix1`). +fn vector_maxnorm(a: &ArrayBase) -> f64 +where + S: Data, +{ + let stride = a.strides()[0]; + assert!(stride >= 0); + let stride = stride as usize; + let n_elements = a.len(); + let a_slice = { + let a = a.as_ptr(); + let total_len = n_elements * stride; + unsafe { slice::from_raw_parts(a, total_len) } + }; + + let idx = unsafe { cblas::idamax(n_elements as i32, a_slice, stride as i32) as usize }; + f64::abs(a[idx]) +} + +// /// Calculate the onenorm of a matrix (an `ArrayBase` with dimension `Ix2`). +// fn matrix_onenorm(a: &ArrayBase) -> f64 +// where S: Data, +// { +// let (n_rows, n_cols) = a.dim(); +// if let Some((a_slice, layout)) = as_slice_with_layout(a) { +// let layout = match layout { +// cblas::Layout::RowMajor => lapacke::Layout::RowMajor, +// cblas::Layout::ColumnMajor => lapacke::Layout::ColumnMajor, +// }; +// unsafe { +// lapacke::dlange( +// layout, +// b'1', +// n_rows as i32, +// n_cols as i32, +// a_slice, +// n_rows as i32, +// ) +// } +// // Fall through case for non-contiguous arrays. +// } else { +// a.gencolumns().into_iter() +// .fold(0.0, |max, column| { +// let onenorm = column.fold(0.0, |acc, element| { acc + f64::abs(*element) }); +// if onenorm > max { onenorm } else { max } +// }) +// } +// } + +/// Returns the one-norm of a matrix `a` together with the index of that column for +/// which the norm is maximal. +fn matrix_onenorm_with_index(a: &ArrayBase) -> (usize, f64) +where + S: Data, +{ + let mut max_norm = 0.0; + let mut max_norm_index = 0; + for (i, column) in a.gencolumns().into_iter().enumerate() { + let norm = vector_onenorm(&column); + if norm > max_norm { + max_norm = norm; + max_norm_index = i; + } + } + (max_norm_index, max_norm) +} + +/// Finds columns in the matrix `a` that are parallel to to some other column in `a`. +/// +/// Assumes that all entries of `a` are either +1 or -1. +/// +/// If column `j` of matrix `a` is parallel to some column `i`, `column_is_parallel[i]` is set to +/// `true`. The matrix `c` is used as an intermediate value for the matrix product `a^t * a`. +/// +/// This function does not reset `column_is_parallel` to `false`. Entries that are `true` will be +/// assumed to be parallel and not checked. +/// +/// Panics if arrays `a` and `c` don't have the same dimensions, or if the length of the slice +/// `column_is_parallel` is not equal to the number of columns in `a`. +fn find_parallel_columns_in( + a: &ArrayBase, + c: &mut ArrayBase, + column_is_parallel: &mut [bool], +) where + S1: Data, + S2: DataMut, +{ + let a_dim = a.dim(); + let c_dim = c.dim(); + assert_eq!(a_dim, c_dim); + + let (n_rows, n_cols) = a_dim; + + assert_eq!(column_is_parallel.len(), n_cols); + { + let (a_slice, a_layout) = as_slice_with_layout(a).expect("Matrix `a` is not contiguous."); + let (c_slice, c_layout) = + as_slice_with_layout_mut(c).expect("Matrix `c` is not contiguous."); + assert_eq!(a_layout, c_layout); + let layout = a_layout; + + // NOTE: When calling the wrapped Fortran dsyrk subroutine with row major layout, + // cblas::*syrk changes `'U'` to `'L'` (`Upper` to `Lower`), and `'O'` to `'N'` (`Ordinary` + // to `None`). Different from `cblas::*gemm`, however, it does not automatically make sure + // that the other arguments are changed to make sense in a routine expecting column major + // order (in `cblas::*gemm`, this happens by flipping the matrices `a` and `b` as + // arguments). + // + // So while `cblas::dsyrk` changes transposition and the position of where the results are + // written to, it passes the other arguments on to the Fortran routine as is. + // + // For example, in case matrix `a` is a 4x2 matrix in column major order, and we want to + // perform the operation `a^T a` on it (resulting in a symmetric 2x2 matrix), we would pass + // TRANS='T', N=2 (order of c), K=4 (number of rows because of 'T'), LDA=4 (max(1,k) + // because of 'T'), LDC=2. + // + // But if `a` is in row major order and we want to perform the same operation, we pass + // TRANS='T' (gets translated to 'N'), N=2, K=2 (number of columns, because we 'T' -> 'N'), + // LDA=2 (max(1,n) because of 'N'), LDC=2. + // + // In other words, because of row major order, the Fortran routine actually sees our 4x2 + // matrix as a 2x4 matrix, and if we want to calculate `a^T a`, `cblas::dsyrk` makes sure + // `'N'` is passed. + let (k, lda) = match layout { + cblas::Layout::ColumnMajor => (n_cols, n_rows), + cblas::Layout::RowMajor => (n_rows, n_cols), + }; + unsafe { + cblas::dsyrk( + layout, + cblas::Part::Upper, + cblas::Transpose::Ordinary, + n_cols as i32, + k as i32, + 1.0, + a_slice, + lda as i32, + 0.0, + c_slice, + n_cols as i32, + ); + } + } + + // c is upper triangular and contains all pair-wise vector products: + // + // x x x x x + // . x x x x + // . . x x x + // . . . x x + // . . . . x + + // Don't check more rows than we have columns + 'rows: for (i, row) in c.genrows().into_iter().enumerate().take(n_cols) { + // Skip if the column is already found to be parallel or if we are checking + // the last column + if column_is_parallel[i] || i >= n_cols - 1 { + continue 'rows; + } + for (j, element) in row.slice(s![i + 1..]).iter().enumerate() { + // Check if the vectors are parallel or anti-parallel + if f64::abs(*element) == n_rows as f64 { + column_is_parallel[i + j + 1] = true; + } + } + } +} + +/// Checks whether any columns of the matrix `a` are parallel to any columns of `b`. +/// +/// Assumes that we have parallelity only if all entries of two columns `a` and `b` are either +1 +/// or -1. +/// +/// `The matrix `c` is used as an intermediate value for the matrix product `a^t * b`. +/// +/// `column_is_parallel[j]` is set to `true` if column `j` of matrix `a` is parallel to some column +/// `i` of the matrix `b`, +/// +/// This function does not reset `column_is_parallel` to `false`. Entries that are `true` will be +/// assumed to be parallel and not checked. +/// +/// Panics if arrays `a`, `b`, and `c` don't have the same dimensions, or if the length of the slice +/// `column_is_parallel` is not equal to the number of columns in `a`. +fn find_parallel_columns_between( + a: &ArrayBase, + b: &ArrayBase, + c: &mut ArrayBase, + column_is_parallel: &mut [bool], +) where + S1: Data, + S2: Data, + S3: DataMut, +{ + let a_dim = a.dim(); + let b_dim = b.dim(); + let c_dim = c.dim(); + assert_eq!(a_dim, b_dim); + assert_eq!(a_dim, c_dim); + + let (n_rows, n_cols) = a_dim; + + assert_eq!(column_is_parallel.len(), n_cols); + + // Extra scope, because c_slice needs to be dropped after the dgemm + { + let (a_slice, a_layout) = as_slice_with_layout(&a).expect("Matrix `a` not contiguous."); + let (b_slice, b_layout) = as_slice_with_layout(&b).expect("Matrix `b` not contiguous."); + let (c_slice, c_layout) = as_slice_with_layout_mut(c).expect("Matrix `c` not contiguous."); + + assert_eq!(a_layout, b_layout); + assert_eq!(a_layout, c_layout); + + let layout = a_layout; + + unsafe { + cblas::dgemm( + layout, + cblas::Transpose::Ordinary, + cblas::Transpose::None, + n_cols as i32, + n_cols as i32, + n_rows as i32, + 1.0, + a_slice, + n_cols as i32, + b_slice, + n_cols as i32, + 0.0, + c_slice, + n_cols as i32, + ); + } + } + + // We are iterating over the rows because it's more memory efficient (for row-major array). In + // terms of logic there is no difference: we simply check if the current column of a (that's + // the outer loop) is parallel to any column of b (inner loop). By iterating via columns we would check if + // any column of a is parallel to the, in that case, current column of b. + // TODO: Implement for column major arrays. + 'rows: for (i, row) in c.genrows().into_iter().enumerate().take(n_cols) { + // Skip if the column is already found to be parallel the last column. + if column_is_parallel[i] { + continue 'rows; + } + for element in row { + if f64::abs(*element) == n_rows as f64 { + column_is_parallel[i] = true; + continue 'rows; + } + } + } +} + +/// Check if every column in `a` is parallel to some column in `b`. +/// +/// Assumes that we have parallelity only if all entries of two columns `a` and `b` are either +1 +/// or -1. +fn are_all_columns_parallel_between( + a: &ArrayBase, + b: &ArrayBase, + c: &mut ArrayBase, +) -> bool +where + S1: Data, + S2: DataMut, +{ + let a_dim = a.dim(); + let b_dim = b.dim(); + let c_dim = c.dim(); + assert_eq!(a_dim, b_dim); + assert_eq!(a_dim, c_dim); + + let (n_rows, n_cols) = a_dim; + + // Extra scope, because c_slice needs to be dropped after the dgemm + { + let (a_slice, a_layout) = as_slice_with_layout(&a).expect("Matrix `a` not contiguous."); + let (b_slice, b_layout) = as_slice_with_layout(&b).expect("Matrix `b` not contiguous."); + let (c_slice, c_layout) = as_slice_with_layout_mut(c).expect("Matrix `c` not contiguous."); + + assert_eq!(a_layout, b_layout); + assert_eq!(a_layout, c_layout); + + let layout = a_layout; + + unsafe { + cblas::dgemm( + layout, + cblas::Transpose::Ordinary, + cblas::Transpose::None, + n_cols as i32, + n_cols as i32, + n_rows as i32, + 1.0, + a_slice, + n_cols as i32, + b_slice, + n_cols as i32, + 0.0, + c_slice, + n_rows as i32, + ); + } + } + + // We are iterating over the rows because it's more memory efficient (for row-major array). In + // terms of logic there is no difference: we simply check if a specific column of a is parallel + // to any column of b. By iterating via columns we would check if any column of a is parallel + // to a specific column of b. + 'rows: for row in c.genrows() { + for element in row { + // If a parallel column was found, cut to the next one. + if f64::abs(*element) == n_rows as f64 { + continue 'rows; + } + } + // This return statement should only be reached if not a single column parallel to the + // current one was found. + return false; + } + true +} + +/// Find parallel columns in matrix `a` and columns in `a` that are parallel to any columns in +/// matrix `b`, and replace those with random vectors. Returns `true` if resampling has taken place. +fn resample_parallel_columns( + a: &mut ArrayBase, + b: &ArrayBase, + c: &mut ArrayBase, + column_is_parallel: &mut [bool], + rng: &mut R, + sample: &[f64], +) -> bool +where + S1: DataMut, + S2: Data, + S3: DataMut, + R: Rng, +{ + column_is_parallel.iter_mut().for_each(|x| { + *x = false; + }); + find_parallel_columns_in(a, c, column_is_parallel); + find_parallel_columns_between(a, b, c, column_is_parallel); + let mut has_resampled = false; + for (i, is_parallel) in column_is_parallel.into_iter().enumerate() { + if *is_parallel { + resample_column(a, i, rng, sample); + has_resampled = true; + } + } + has_resampled +} + +/// Resamples column `i` of matrix `a` with elements drawn from `sample` using `rng`. +/// +/// Panics if `i` exceeds the number of columns in `a`. +fn resample_column(a: &mut ArrayBase, i: usize, rng: &mut R, sample: &[f64]) +where + S: DataMut, + R: Rng, +{ + assert!( + i < a.dim().1, + "Trying to resample column with index exceeding matrix dimensions" + ); + assert!(sample.len() > 0); + a.column_mut(i) + .mapv_inplace(|_| sample[rng.gen_range(0, sample.len())]); +} + +/// Returns slice and layout underlying an array `a`. +fn as_slice_with_layout(a: &ArrayBase) -> Option<(&[T], cblas::Layout)> +where + S: Data, + D: Dimension, +{ + if let Some(a_slice) = a.as_slice() { + Some((a_slice, cblas::Layout::RowMajor)) + } else if let Some(a_slice) = a.as_slice_memory_order() { + Some((a_slice, cblas::Layout::ColumnMajor)) + } else { + None + } +} + +/// Returns mutable slice and layout underlying an array `a`. +fn as_slice_with_layout_mut(a: &mut ArrayBase) -> Option<(&mut [T], cblas::Layout)> +where + S: DataMut, + D: Dimension, +{ + if a.as_slice_mut().is_some() { + Some((a.as_slice_mut().unwrap(), cblas::Layout::RowMajor)) + } else if a.as_slice_memory_order_mut().is_some() { + Some(( + a.as_slice_memory_order_mut().unwrap(), + cblas::Layout::ColumnMajor, + )) + } else { + None + } + // XXX: The above is a workaround for Rust not having non-lexical lifetimes yet. + // More information here: + // http://smallcultfollowing.com/babysteps/blog/2016/04/27/non-lexical-lifetimes-introduction/#problem-case-3-conditional-control-flow-across-functions + // + // if let Some(slice) = a.as_slice_mut() { + // Some((slice, cblas::Layout::RowMajor)) + // } else if let Some(slice) = a.as_slice_memory_order_mut() { + // Some((slice, cblas::Layout::ColumnMajor)) + // } else { + // None + // } +} diff --git a/src/expm.rs b/src/expm.rs new file mode 100644 index 0000000..c3e5266 --- /dev/null +++ b/src/expm.rs @@ -0,0 +1,781 @@ +/// This crate contains `expm`, an implementation of Algorithm 6.1 by [Al-Mohy, Higham] in the Rust +/// programming language. It calculates the exponential of a matrix. See the linked paper for more +/// information. +/// +/// An important ingredient is `normest1`, Algorithm 2.4 in [Higham, Tisseur], which estimates +/// the 1-norm of a matrix. +/// +/// Furthermore, to fully understand the algorithm as described in the original paper, one has to +/// understand that the factor $\lvert C_{2m+1} \rvert$ arises during the Padé approximation of the +/// exponential function. The derivation is described in [Gautschi 2012], pp. 363--365, and the +/// factor reads: +/// +/// \begin{equation} +/// C_{n,m} = (-1)^n \frac{n!m!}{(n+m)!(n+m+1)!}, +/// \end{equation} +/// +/// or using only the diagonal elements, $m=n$: +/// +/// \begin{equation} +/// C_m = (-1)^m \frac{m!m!}{(2m)!(2m+1)!} +/// \end{equation} +/// +/// +/// [Al-Mohy, Higham]: http://eprints.ma.man.ac.uk/1300/1/covered/MIMS_ep2009_9.pdf +/// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf +/// [Gautschi 2012]: https://doi.org/10.1007/978-0-8176-8259-0 +use ndarray::{self, prelude::*, Data, DataMut, Dimension, Zip}; + +use crate::condest::Normest1; + +// Can we calculate these at compile time? +const THETA_3: f64 = 1.495585217958292e-2; +const THETA_5: f64 = 2.539398330063230e-1; +const THETA_7: f64 = 9.504178996162932e-1; +const THETA_9: f64 = 2.097847961257068e0; +// const THETA_13: f64 = 5.371920351148152e0 // Alg 3.1 +const THETA_13: f64 = 4.25; // Alg 5.1 + +const PADE_COEFF_3: [f64; 4] = [120., 60., 12., 1.]; +const PADE_COEFF_5: [f64; 6] = [30240., 15120., 3360., 420., 30., 1.]; +const PADE_COEFF_7: [f64; 8] = [ + 17297280., 8648640., 1995840., 277200., 25200., 1512., 56., 1., +]; +const PADE_COEFF_9: [f64; 10] = [ + 17643225600., + 8821612800., + 2075673600., + 302702400., + 30270240., + 2162160., + 110880., + 3960., + 90., + 1., +]; +const PADE_COEFF_13: [f64; 14] = [ + 64764752532480000., + 32382376266240000., + 7771770303897600., + 1187353796428800., + 129060195264000., + 10559470521600., + 670442572800., + 33522128640., + 1323241920., + 40840800., + 960960., + 16380., + 182., + 1., +]; + +/// Calculates the of leading terms in the backward error function for the [m/m] Padé approximant +/// to the exponential function, i.e. it calculates: +/// +/// \begin{align} +/// C_{2m+1} &= \frac{(m!)^2}{(2m)!(2m+1)!} \\ +/// &= \frac{1}{\binom{2m}{m} (2m+1)!} +/// \end{align} +/// +/// NOTE: Depending on the notation used in the scientific papers, the coefficient `C` is, +/// confusingly, sometimes indexed `C_i` and sometimes `C_{2m+1}`. These essentially mean the same +/// thing and is due to the power series expansion of the backward error function: +/// +/// \begin{equation} +/// h(x) = \sum^\infty_{i=2m+1} C_i x^i +/// \end{equation} +fn pade_error_coefficient(m: u64) -> f64 { + use statrs::function::factorial::{binomial, factorial}; + + return 1.0 / (binomial(2 * m, m) * factorial(2 * m + 1)); +} + +#[allow(non_camel_case_types)] +struct PadeOrder_3; +#[allow(non_camel_case_types)] +struct PadeOrder_5; +#[allow(non_camel_case_types)] +struct PadeOrder_7; +#[allow(non_camel_case_types)] +struct PadeOrder_9; +#[allow(non_camel_case_types)] +struct PadeOrder_13; + +enum PadeOrders { + _3, + _5, + _7, + _9, + _13, +} + +trait PadeOrder { + const ORDER: u64; + + /// Return the coefficients arising in both the numerator as well as in the denominator of the + /// Padé approximant (they are the same, due to $p(x) = q(-x)$. + /// + /// TODO: This is a great usecase for const generics, returning &[u64; Self::ORDER] and + /// possibly calculating the values at compile time instead of hardcoding them. + /// Maybe possible once RFC 2000 lands? See the PR https://github.com/rust-lang/rust/pull/53645 + fn coefficients() -> &'static [f64]; + + fn calculate_pade_sums( + a: &ArrayBase, + a_powers: &[&ArrayBase], + u: &mut ArrayBase, + v: &mut ArrayBase, + work: &mut ArrayBase, + ) where + S1: Data, + S2: DataMut, + S3: DataMut; +} + +macro_rules! impl_padeorder { + ($($ty:ty, $m:literal, $const_coeff:ident),+) => { + +$( + +impl PadeOrder for $ty { + const ORDER: u64 = $m; + + fn coefficients() -> &'static [f64] { + &$const_coeff + } + + fn calculate_pade_sums( + a: &ArrayBase, + a_powers: &[&ArrayBase], + u: &mut ArrayBase, + v: &mut ArrayBase, + work: &mut ArrayBase, + ) + where S1: Data, + S2: DataMut, + S3: DataMut, + { + assert_eq!(a_powers.len(), ($m - 1)/2 + 1); + + let (n_rows, n_cols) = a.dim(); + assert_eq!(n_rows, n_cols, "Pade sum only defined for square matrices."); + let n = n_rows as i32; + + // Iterator to get 2 coefficients, c_{2i} and c_{2i+1}, and 1 matrix power at a time. + let mut iterator = Self::coefficients().chunks_exact(2).zip(a_powers.iter()); + + // First element from the iterator. + // + // NOTE: The unwrap() and unreachable!() are permissable because the assertion above + // ensures the validity. + // + // TODO: An optimization is probably to just set u and v to zero and only assign the + // coefficients to its diagonal, given that A_0 = A^0 = 1. + let (c_0, c_1, a_pow) = match iterator.next().unwrap() { + (&[c_0, c_1], a_pow) => (c_0, c_1, a_pow), + _ => unreachable!() + }; + + work.zip_mut_with(a_pow, |x, &y| *x = c_1 * y); + v.zip_mut_with(a_pow, |x, &y| *x = c_0 * y); + + // Rest of the iterator + while let Some(item) = iterator.next() { + let (c_2k, c_2k1, a_pow) = match item { + (&[c_2k, c_2k1], a_pow) => (c_2k, c_2k1, a_pow), + _ => unreachable!() + }; + + work.zip_mut_with(a_pow, |x, &y| *x = *x + c_2k1 * y); + v.zip_mut_with(a_pow, |x, &y| *x = *x + c_2k * y); + } + + let (a_slice, a_layout) = as_slice_with_layout(a).expect("Matrix `a` not contiguous."); + let (work_slice, _) = as_slice_with_layout(work).expect("Matrix `work` not contiguous."); + let (u_slice, u_layout) = as_slice_with_layout_mut(u).expect("Matrix `u` not contiguous."); + assert_eq!(a_layout, u_layout, "Memory layout mismatch between matrices; currently only row major matrices are supported."); + let layout = a_layout; + unsafe { + cblas::dgemm( + layout, + cblas::Transpose::None, + cblas::Transpose::None, + n, + n, + n, + 1.0, + a_slice, + n, + work_slice, + n, + 0.0, + u_slice, + n, + ) + } + } +} + +)+ +} +} + +impl_padeorder!( + PadeOrder_3, + 3, + PADE_COEFF_3, + PadeOrder_5, + 5, + PADE_COEFF_5, + PadeOrder_7, + 7, + PADE_COEFF_7, + PadeOrder_9, + 9, + PADE_COEFF_9 +); + +impl PadeOrder for PadeOrder_13 { + const ORDER: u64 = 13; + + fn coefficients() -> &'static [f64] { + &PADE_COEFF_13 + } + + fn calculate_pade_sums( + a: &ArrayBase, + a_powers: &[&ArrayBase], + u: &mut ArrayBase, + v: &mut ArrayBase, + work: &mut ArrayBase, + ) where + S1: Data, + S2: DataMut, + S3: DataMut, + { + assert_eq!(a_powers.len(), (13 - 1) / 2 + 1); + + let (n_rows, n_cols) = a.dim(); + assert_eq!(n_rows, n_cols, "Pade sum only defined for square matrices."); + let n = n_rows; + + let coefficients = Self::coefficients(); + + Zip::from(&mut *work) + .and(a_powers[0]) + .and(a_powers[1]) + .and(a_powers[2]) + .and(a_powers[3]) + .apply(|x, &a0, &a2, &a4, &a6| { + *x = *x + + coefficients[1] * a0 + + coefficients[3] * a2 + + coefficients[5] * a4 + + coefficients[7] * a6; + }); + + { + let (a_slice, a_layout) = as_slice_with_layout(a).expect("Matrix `a` not contiguous."); + let (work_slice, _) = + as_slice_with_layout(work).expect("Matrix `work` not contiguous."); + let (u_slice, u_layout) = + as_slice_with_layout_mut(u).expect("Matrix `u` not contiguous."); + assert_eq!(a_layout, u_layout, "Memory layout mismatch between matrices; currently only row major matrices are supported."); + let layout = a_layout; + unsafe { + cblas::dgemm( + layout, + cblas::Transpose::None, + cblas::Transpose::None, + n as i32, + n as i32, + n as i32, + 1.0, + a_slice, + n as i32, + work_slice, + n as i32, + 0.0, + u_slice, + n as i32, + ) + } + } + + Zip::from(&mut *work) + .and(a_powers[1]) + .and(a_powers[2]) + .and(a_powers[3]) + .apply(|x, &a2, &a4, &a6| { + *x = coefficients[8] * a2 + coefficients[10] * a4 + coefficients[12] * a6; + }); + + { + let (a6_slice, a6_layout) = + as_slice_with_layout(a_powers[3]).expect("Matrix `a6` not contiguous."); + let (work_slice, _) = + as_slice_with_layout(work).expect("Matrix `work` not contiguous."); + let (v_slice, v_layout) = + as_slice_with_layout_mut(v).expect("Matrix `v` not contiguous."); + assert_eq!(a6_layout, v_layout, "Memory layout mismatch between matrices; currently only row major matrices are supported."); + let layout = a6_layout; + unsafe { + cblas::dgemm( + layout, + cblas::Transpose::None, + cblas::Transpose::None, + n as i32, + n as i32, + n as i32, + 1.0, + a6_slice, + n as i32, + work_slice, + n as i32, + 0.0, + v_slice, + n as i32, + ) + } + } + + Zip::from(v) + .and(a_powers[0]) + .and(a_powers[1]) + .and(a_powers[2]) + .and(a_powers[3]) + .apply(|x, &a0, &a2, &a4, &a6| { + *x = *x + + coefficients[0] * a0 + + coefficients[2] * a2 + + coefficients[4] * a4 + + coefficients[6] * a6; + }) + } +} + +/// Storage for calculating the matrix exponential. +pub struct Expm { + n: usize, + itmax: usize, + eye: Array2, + a1: Array2, + a2: Array2, + a4: Array2, + a6: Array2, + a8: Array2, + a_abs: Array2, + u: Array2, + work: Array2, + pivot: Array1, + normest1: Normest1, + layout: cblas::Layout, +} + +impl Expm { + /// Allocates all space to calculate the matrix exponential for a square matrix of dimension + /// n×n. + pub fn new(n: usize) -> Self { + let eye = Array2::::eye(n); + let a1 = Array2::::zeros((n, n)); + let a2 = Array2::::zeros((n, n)); + let a4 = Array2::::zeros((n, n)); + let a6 = Array2::::zeros((n, n)); + let a8 = Array2::::zeros((n, n)); + let a_abs = Array2::::zeros((n, n)); + let u = Array2::::zeros((n, n)); + let work = Array2::::zeros((n, n)); + let pivot = Array1::::zeros(n); + let layout = cblas::Layout::RowMajor; + + // TODO: Investigate what an optimal value for t is when estimating the 1-norm. + // Python's SciPY uses t=2. Why? + let t = 2; + let itmax = 5; + + let normest1 = Normest1::new(n, t); + + Expm { + n, + itmax, + eye, + a1, + a2, + a4, + a6, + a8, + a_abs, + u, + work, + pivot, + normest1, + layout, + } + } + + /// Calculate the matrix exponential of the n×n matrix `a` storing the result in matrix `b`. + /// + /// NOTE: Panics if input matrices `a` and `b` don't have matching dimensions, are not square, + /// not in row-major order, or don't have the same dimension as the `Expm` object `expm` is + /// called on. + pub fn expm(&mut self, a: &ArrayBase, b: &mut ArrayBase) + where + S1: Data, + S2: DataMut, + { + assert_eq!( + a.dim(), + b.dim(), + "Input matrices `a` and `b` have to have matching dimensions." + ); + let (n_rows, n_cols) = a.dim(); + assert_eq!( + n_rows, n_cols, + "expm is only implemented for square matrices." + ); + assert_eq!( + n_rows, self.n, + "Dimension mismatch between matrix `a` and preconfigured `Expm` struct." + ); + + // Rename b to v to be in line with the nomenclature of the original paper. + let v = b; + + self.a1.assign(a); + + let n = self.n as i32; + + { + let (a_slice, a_layout) = + as_slice_with_layout(&self.a1).expect("Matrix `a` not contiguous."); + let (a2_slice, _) = + as_slice_with_layout_mut(&mut self.a2).expect("Matrix `a2` not contiguous."); + assert_eq!(a_layout, self.layout, "Memory layout mismatch between matrices; currently only row major matrices are supported."); + unsafe { + cblas::dgemm( + self.layout, + cblas::Transpose::None, + cblas::Transpose::None, + n, + n, + n, + 1.0, + a_slice, + n, + a_slice, + n, + 0.0, + a2_slice, + n as i32, + ) + } + } + + let d4_estimated = self + .normest1 + .normest1_pow(&self.a2, 2, self.itmax) + .powf(1.0 / 4.0); + let d6_estimated = self + .normest1 + .normest1_pow(&self.a2, 3, self.itmax) + .powf(1.0 / 6.0); + let eta_1 = d4_estimated.max(d6_estimated); + + if eta_1 <= THETA_3 && self.ell(3) == 0 { + println!("eta_1 condition"); + self.solve_via_pade(PadeOrders::_3, v); + return; + } + + { + let (a2_slice, _) = + as_slice_with_layout(&self.a2).expect("Matrix `a2` not contiguous."); + let (a4_slice, _) = + as_slice_with_layout_mut(&mut self.a4).expect("Matrix `a4` not contiguous."); + unsafe { + cblas::dgemm( + self.layout, + cblas::Transpose::None, + cblas::Transpose::None, + self.n as i32, + self.n as i32, + self.n as i32, + 1.0, + a2_slice, + n as i32, + a2_slice, + n as i32, + 0.0, + a4_slice, + n as i32, + ) + } + } + + let d4_precise = self.normest1.normest1(&self.a4, self.itmax).powf(1.0 / 4.0); + let eta_2 = d4_precise.max(d6_estimated); + + if eta_2 <= THETA_5 && self.ell(5) == 0 { + println!("eta_2 condition"); + self.solve_via_pade(PadeOrders::_5, v); + return; + } + + { + let (a2_slice, _) = + as_slice_with_layout(&self.a2).expect("Matrix `a2` not contiguous."); + let (a4_slice, _) = + as_slice_with_layout(&self.a4).expect("Matrix `a4` not contiguous."); + let (a6_slice, _) = + as_slice_with_layout_mut(&mut self.a6).expect("Matrix `a6` not contiguous."); + unsafe { + cblas::dgemm( + self.layout, + cblas::Transpose::None, + cblas::Transpose::None, + self.n as i32, + self.n as i32, + self.n as i32, + 1.0, + a2_slice, + n as i32, + a4_slice, + n as i32, + 0.0, + a6_slice, + n as i32, + ) + } + } + + let d6_precise = self.normest1.normest1(&self.a6, self.itmax).powf(1.0 / 6.0); + let d8_estimated = self.normest1.normest1_pow(&self.a4, 2, self.itmax); + let eta_3 = d6_precise.max(d8_estimated); + + if eta_3 <= THETA_7 && self.ell(7) == 0 { + println!("eta_3 (first) condition"); + self.solve_via_pade(PadeOrders::_7, v); + return; + } + + { + let (a4_slice, _) = + as_slice_with_layout(&self.a4).expect("Matrix `a4` not contiguous."); + let (a8_slice, _) = + as_slice_with_layout_mut(&mut self.a8).expect("Matrix `a8` not contiguous."); + unsafe { + cblas::dgemm( + self.layout, + cblas::Transpose::None, + cblas::Transpose::None, + self.n as i32, + self.n as i32, + self.n as i32, + 1.0, + a4_slice, + n as i32, + a4_slice, + n as i32, + 0.0, + a8_slice, + n as i32, + ) + } + } + + if eta_3 <= THETA_9 && self.ell(9) == 0 { + println!("eta_3 (second) condition"); + self.solve_via_pade(PadeOrders::_9, v); + return; + } + + let eta_4 = d8_estimated.max( + self.normest1 + .normest1_prod(&[&self.a4, &self.a6], self.itmax) + .powf(1.0 / 10.0), + ); + let eta_5 = eta_3.min(eta_4); + + use std::cmp; + use std::f64; + let mut s = cmp::max(f64::ceil(f64::log2(eta_5 / THETA_13)) as i32, 0); + self.a1.mapv_inplace(|x| x / 2f64.powi(s)); + s = s + self.ell(13); + self.a1.zip_mut_with(a, |x, &y| *x = y / 2f64.powi(s)); + self.a2.mapv_inplace(|x| x / 2f64.powi(2 * s)); + self.a4.mapv_inplace(|x| x / 2f64.powi(4 * s)); + self.a6.mapv_inplace(|x| x / 2f64.powi(6 * s)); + + self.solve_via_pade(PadeOrders::_13, v); + + // TODO: Call code fragment 2.1 in the paper if `a` is triangular, instead of the code below. + // + // NOTE: it's guaranteed that s >= 0 by its definition. + let (u_slice, _) = + as_slice_with_layout_mut(&mut self.u).expect("Matrix `u` not contiguous."); + + // NOTE: v initially contains r after `solve_via_pade`. + let (v_slice, _) = as_slice_with_layout_mut(v).expect("Matrix `v` not contiguous."); + for _ in 0..s { + unsafe { + cblas::dgemm( + self.layout, + cblas::Transpose::None, + cblas::Transpose::None, + self.n as i32, + self.n as i32, + self.n as i32, + 1.0, + v_slice, + n as i32, + v_slice, + n as i32, + 0.0, + u_slice, + n as i32, + ) + } + + u_slice.swap_with_slice(v_slice); + } + } + + /// A helper function (as it is called in the original paper) returning the + /// $\max(\lceil \log_2(\alpha/u) / 2m \rceil, 0)$, where + /// $\alpha = \lvert c_{2m+1}\rvert \texttt{normest}(\lvert A\rvert^{2m+1})/\lVertA\rVert_1$. + fn ell(&mut self, m: usize) -> i32 { + Zip::from(&mut self.a_abs) + .and(&self.a1) + .apply(|x, &y| *x = y.abs()); + + let c2m1 = pade_error_coefficient(m as u64); + + let norm_abs_a_2m1 = self + .normest1 + .normest1_pow(&self.a_abs, 2 * m + 1, self.itmax); + let norm_a = self.normest1.normest1(&self.a1, self.itmax); + let alpha = c2m1.abs() * norm_abs_a_2m1 / norm_a; + + // The unit roundoff, defined as half the machine epsilon. + let u = std::f64::EPSILON / 2.0; + + use std::cmp; + use std::f64; + + cmp::max(0, f64::ceil(f64::log2(alpha / u) / (2 * m) as f64) as i32) + } + + fn solve_via_pade(&mut self, pade_order: PadeOrders, v: &mut ArrayBase) + where + S: DataMut, + { + use PadeOrders::*; + + macro_rules! pade { + ($order:ty, [$(&$apow:expr),+]) => { + <$order as PadeOrder>::calculate_pade_sums(&self.a1, &[$(&$apow),+], &mut self.u, v, &mut self.work); + } + } + + match pade_order { + _3 => pade!(PadeOrder_3, [&self.eye, &self.a2]), + _5 => pade!(PadeOrder_5, [&self.eye, &self.a2, &self.a4]), + _7 => pade!(PadeOrder_7, [&self.eye, &self.a2, &self.a4, &self.a6]), + _9 => pade!( + PadeOrder_9, + [&self.eye, &self.a2, &self.a4, &self.a6, &self.a8] + ), + _13 => pade!(PadeOrder_13, [&self.eye, &self.a2, &self.a4, &self.a6]), + }; + + // Here we set v = p <- u + v and u = q <- -u + v, overwriting u and v via work. + self.work.assign(v); + + Zip::from(&mut *v).and(&self.u).apply(|x, &y| { + *x = *x + y; + }); + + Zip::from(&mut self.u).and(&self.work).apply(|x, &y| { + *x = -*x + y; + }); + + let (u_slice, _) = + as_slice_with_layout_mut(&mut self.u).expect("Matrix `u` not contiguous."); + let (v_slice, _) = as_slice_with_layout_mut(v).expect("Matrix `v` not contiguous."); + let (pivot_slice, _) = + as_slice_with_layout_mut(&mut self.pivot).expect("Vector `pivot` not contiguous."); + + let n = self.n as i32; + + let layout = { + match self.layout { + cblas::Layout::ColumnMajor => lapacke::Layout::ColumnMajor, + cblas::Layout::RowMajor => lapacke::Layout::RowMajor, + } + }; + + // FIXME: Handle the info for error management. + let _ = unsafe { lapacke::dgesv(layout, n, n, u_slice, n, pivot_slice, v_slice, n) }; + } +} + +/// Calculate the matrix exponential of the n×n matrix `a` storing the result in matrix `b`. +/// +/// NOTE: Panics if input matrices `a` and `b` don't have matching dimensions, are not square, +/// not in row-major order, or don't have the same dimension as the `Expm` object `expm` is +/// called on. +pub fn expm(a: &ArrayBase, b: &mut ArrayBase) +where + S1: Data, + S2: DataMut, +{ + let (n, _) = a.dim(); + + let mut expm = Expm::new(n); + expm.expm(a, b); +} + +/// Returns slice and layout underlying an array `a`. +fn as_slice_with_layout(a: &ArrayBase) -> Option<(&[T], cblas::Layout)> +where + S: Data, + D: Dimension, +{ + if let Some(a_slice) = a.as_slice() { + Some((a_slice, cblas::Layout::RowMajor)) + } else if let Some(a_slice) = a.as_slice_memory_order() { + Some((a_slice, cblas::Layout::ColumnMajor)) + } else { + None + } +} + +/// Returns mutable slice and layout underlying an array `a`. +fn as_slice_with_layout_mut(a: &mut ArrayBase) -> Option<(&mut [T], cblas::Layout)> +where + S: DataMut, + D: Dimension, +{ + if a.as_slice_mut().is_some() { + Some((a.as_slice_mut().unwrap(), cblas::Layout::RowMajor)) + } else if a.as_slice_memory_order_mut().is_some() { + Some(( + a.as_slice_memory_order_mut().unwrap(), + cblas::Layout::ColumnMajor, + )) + } else { + None + } + // XXX: The above is a workaround for Rust not having non-lexical lifetimes yet. + // More information here: + // http://smallcultfollowing.com/babysteps/blog/2016/04/27/non-lexical-lifetimes-introduction/#problem-case-3-conditional-control-flow-across-functions + // + // if let Some(slice) = a.as_slice_mut() { + // Some((slice, cblas::Layout::RowMajor)) + // } else if let Some(slice) = a.as_slice_memory_order_mut() { + // Some((slice, cblas::Layout::ColumnMajor)) + // } else { + // None + // } +} diff --git a/src/fitter.rs b/src/fitter.rs index fc464eb..1f83fd5 100644 --- a/src/fitter.rs +++ b/src/fitter.rs @@ -1,11 +1,21 @@ -mod batch; mod recursive; -pub use batch::BatchFitter; pub use recursive::RecursiveFitter; pub trait Fitter { fn add_sample(&mut self, t: f64) -> usize; fn allocate(&mut self); fn fit(&mut self); + + fn vs(&self, idx: usize) -> f64; + fn vs_mut(&mut self, idx: usize) -> &mut f64; + + fn ms(&self, idx: usize) -> f64; + fn ms_mut(&mut self, idx: usize) -> &mut f64; + + fn xs(&self, idx: usize) -> f64; + fn xs_mut(&mut self, idx: usize) -> &mut f64; + + fn ns(&self, idx: usize) -> f64; + fn ns_mut(&mut self, idx: usize) -> &mut f64; } diff --git a/src/fitter/batch.rs b/src/fitter/batch.rs deleted file mode 100644 index fbed8c5..0000000 --- a/src/fitter/batch.rs +++ /dev/null @@ -1,62 +0,0 @@ -use crate::kernel::Kernel; - -use super::Fitter; - -pub struct BatchFitter { - ts_new: Vec, - kernel: Box, - ts: Vec, - ms: Vec, - vs: Vec, - ns: Vec, - xs: Vec, - is_fitted: bool, -} - -impl BatchFitter { - pub fn new(kernel: Box) -> Self { - BatchFitter { - ts_new: Vec::new(), - kernel, - ts: Vec::new(), - ms: Vec::new(), - vs: Vec::new(), - ns: Vec::new(), - xs: Vec::new(), - is_fitted: true, - } - } -} - -impl Fitter for BatchFitter { - fn add_sample(&mut self, t: f64) -> usize { - let idx = self.ts.len() + self.ts_new.len(); - - self.ts_new.push(t); - self.is_fitted = false; - - idx - } - - fn allocate(&mut self) { - todo!(); - - let n_new = self.ts_new.len(); - // let zeros = - /* - n_new = len(self.ts_new) - zeros = np.zeros(n_new) - self.ts = np.concatenate((self.ts, self.ts_new)) - self.ms = np.concatenate((self.ms, zeros)) - self.vs = np.concatenate((self.vs, self.kernel.k_diag(self.ts_new))) - self.ns = np.concatenate((self.ns, zeros)) - self.xs = np.concatenate((self.xs, zeros)) - # Clear the list of pending samples. - self.ts_new = list() - */ - } - - fn fit(&mut self) { - todo!(); - } -} diff --git a/src/fitter/recursive.rs b/src/fitter/recursive.rs index 3902c23..afcae66 100644 --- a/src/fitter/recursive.rs +++ b/src/fitter/recursive.rs @@ -1,5 +1,3 @@ -use std::mem; - use ndarray::prelude::*; use ndarray::stack; @@ -18,8 +16,8 @@ pub struct RecursiveFitter { is_fitted: bool, h: Array1, i: ArrayD, - a: ArrayD, - q: ArrayD, + a: Vec>, + q: Vec>, m_p: ArrayD, p_p: ArrayD, m_f: ArrayD, @@ -44,8 +42,8 @@ impl RecursiveFitter { is_fitted: true, h, i: Array::eye(m).into_dyn(), - a: Array::zeros((0, m, m)).into_dyn(), - q: Array::zeros((0, m, m)).into_dyn(), + a: Vec::new(), + q: Vec::new(), m_p: Array::zeros((0, m)).into_dyn(), p_p: Array::zeros((0, m, m)).into_dyn(), m_f: Array::zeros((0, m)).into_dyn(), @@ -90,7 +88,7 @@ impl Fitter for RecursiveFitter { .collect::>(); let mean = Array::from_shape_vec((self.ts_new.len(), self.kernel.order()), mean) - .unwrap() + .expect("failed to create mean matrix") .into_dyn(); let cov = self @@ -109,7 +107,7 @@ impl Fitter for RecursiveFitter { (self.ts_new.len(), self.kernel.order(), self.kernel.order()), cov, ) - .unwrap() + .expect("failed to create cov matrix") .into_dyn(); self.m_p = stack![Axis(0), self.m_p, mean]; @@ -122,35 +120,58 @@ impl Fitter for RecursiveFitter { self.p_s = stack![Axis(0), self.p_s, cov]; // Compute the new transition and noise covariance matrices. - let m = self.kernel.order(); - - self.a = stack![Axis(0), self.a, Array::zeros((n_new, m, m)).into_dyn()]; - self.q = stack![Axis(0), self.q, Array::zeros((n_new, m, m)).into_dyn()]; - for i in (self.ts.len() - n_new)..self.ts.len() { if i == 0 { continue; } - self.a[i - 1] = self.kernel.transition(self.ts[i - 1], self.ts[1]); - // self.q[i - 1] = self.kernel.noise_cov(self.ts[i - 1], self.ts[1]); + self.a + .push(self.kernel.transition(self.ts[i - 1], self.ts[i])); + self.q + .push(self.kernel.noise_cov(self.ts[i - 1], self.ts[i])); } - todo!(); + let m = self.kernel.order(); - /* - for i in range(len(self.ts) - n_new, len(self.ts)): - if i == 0: - # Very first sample, no need to compute anything. - continue - self._A[i-1] = self.kernel.transition(self.ts[i-1], self.ts[i]) - self._Q[i-1] = self.kernel.noise_cov(self.ts[i-1], self.ts[i]) - # Clear the list of pending samples. - self.ts_new = list() - */ + self.a.push(Array2::zeros((m, m))); + self.q.push(Array2::zeros((m, m))); + + self.ts_new.clear(); } fn fit(&mut self) { todo!(); } + + fn vs(&self, idx: usize) -> f64 { + *&self.vs[idx] + } + + fn vs_mut(&mut self, idx: usize) -> &mut f64 { + &mut self.vs[idx] + } + + fn ms(&self, idx: usize) -> f64 { + *&self.ms[idx] + } + + fn ms_mut(&mut self, idx: usize) -> &mut f64 { + &mut self.ms[idx] + } + + fn xs(&self, idx: usize) -> f64 { + *&self.xs[idx] + } + + fn xs_mut(&mut self, idx: usize) -> &mut f64 { + &mut self.xs[idx] + } + + fn ns(&self, idx: usize) -> f64 { + *&self.ns[idx] + } + + fn ns_mut(&mut self, idx: usize) -> &mut f64 { + &mut self.ns[idx] + } } diff --git a/src/kernel.rs b/src/kernel.rs index 4fa2f78..eec4a5d 100644 --- a/src/kernel.rs +++ b/src/kernel.rs @@ -14,7 +14,38 @@ pub trait Kernel { fn state_mean(&self, t: f64) -> Array1; fn state_cov(&self, t: f64) -> Array2; fn measurement_vector(&self) -> Array1; - fn transition(&self, t0: f64, t1: f64) -> Array2; + fn feedback(&self) -> Array2; + + fn transition(&self, t0: f64, t1: f64) -> Array2 { + let f = self.feedback(); + + let a = f * (t1 - t0); + let mut b = Array2::::zeros(a.dim()); + + crate::expm::expm(&a, &mut b); + + b + } + + fn noise_cov(&self, t0: f64, t1: f64) -> Array2 { + /* + mat = self.noise_effect.dot(self.noise_density).dot(self.noise_effect.T) + #print(g) + print(mat) + Phi = np.vstack(( + np.hstack((self.feedback, mat)), + np.hstack((np.zeros_like(mat), -self.feedback.T)))) + print(Phi) + m = self.order + AB = np.dot(sp.linalg.expm(Phi * (t2 - t1)), np.eye(2*m, m, k=-m)) + print(AB) + return sp.linalg.solve(AB[m:,:].T, AB[:m,:].T) + */ + + // let mat = self.noise_effect() + + todo!(); + } } impl Kernel for Vec> { @@ -74,7 +105,57 @@ impl Kernel for Vec> { Array1::from(data) } - fn transition(&self, t0: f64, t1: f64) -> Array2 { - todo!(); + fn feedback(&self) -> Array2 { + let data = self + .iter() + .map(|kernel| kernel.feedback()) + .collect::>(); + + let dim = data + .iter() + .fold((0, 0), |(w, h), m| (w + m.ncols(), h + m.nrows())); + + let mut feedback = Array2::zeros(dim); + + let mut r_d = 0; + let mut c_d = 0; + + for m in data { + for ((r, c), v) in m.indexed_iter() { + feedback[(r + r_d, c + c_d)] = *v; + } + + r_d += m.nrows(); + c_d += m.ncols(); + } + + feedback + } + + fn noise_cov(&self, t0: f64, t1: f64) -> Array2 { + let data = self + .iter() + .map(|kernel| kernel.noise_cov(t0, t1)) + .collect::>(); + + let dim = data + .iter() + .fold((0, 0), |(w, h), m| (w + m.ncols(), h + m.nrows())); + + let mut cov = Array2::zeros(dim); + + let mut r_d = 0; + let mut c_d = 0; + + for m in data { + for ((r, c), v) in m.indexed_iter() { + cov[(r + r_d, c + c_d)] = *v; + } + + r_d += m.nrows(); + c_d += m.ncols(); + } + + cov } } diff --git a/src/kernel/constant.rs b/src/kernel/constant.rs index ff6db44..6f6e32d 100644 --- a/src/kernel/constant.rs +++ b/src/kernel/constant.rs @@ -33,7 +33,15 @@ impl Kernel for Constant { array![1.0] } + fn feedback(&self) -> Array2 { + array![[0.0]] + } + fn transition(&self, t0: f64, t1: f64) -> Array2 { - todo!(); + array![[1.0]] + } + + fn noise_cov(&self, t0: f64, t1: f64) -> Array2 { + array![[0.0]] } } diff --git a/src/kernel/exponential.rs b/src/kernel/exponential.rs index 3e2b9af..60391c2 100644 --- a/src/kernel/exponential.rs +++ b/src/kernel/exponential.rs @@ -34,7 +34,15 @@ impl Kernel for Exponential { array![1.0] } + fn feedback(&self) -> Array2 { + (-1.0 / self.l_scale) * array![[1.0]] + } + fn transition(&self, t0: f64, t1: f64) -> Array2 { - todo!(); + -(t1 - t0) / self.l_scale * array![[1.0]] + } + + fn noise_cov(&self, t0: f64, t1: f64) -> Array2 { + self.var * (1.0 - (-2.0 * (t1 - t0) / self.l_scale).exp()) * array![[1.0]] } } diff --git a/src/kernel/matern52.rs b/src/kernel/matern52.rs index 3c725be..e23e043 100644 --- a/src/kernel/matern52.rs +++ b/src/kernel/matern52.rs @@ -45,7 +45,56 @@ impl Kernel for Matern52 { array![1.0, 0.0, 0.0] } + fn feedback(&self) -> Array2 { + let a = self.lambda; + + array![ + [0.0, 1.0, 0.0], + [0.0, 0.0, 1.0], + [-a.powi(3), -3.0 * a.powi(2), -3.0 * a], + ] + } + fn transition(&self, t0: f64, t1: f64) -> Array2 { - todo!(); + let d = t1 - t0; + let a = self.lambda; + let da = d * a; + + let ba = array![ + [(da * da) / 2.0 + da + 1.0, d * (da + 1.0), d * d / 2.0], + [ + -(da * da * a) / 2.0, + -da * da + da + 1.0, + -(d / 2.0) * (da - 2.0) + ], + [ + (da * a * a / 2.0) * (da - 2.0), + (da * a) * (da - 3.0), + (da * da - 4.0 * da + 2.0) / 2.0 + ] + ]; + + (-da).exp() * ba + } + + fn noise_cov(&self, t0: f64, t1: f64) -> Array2 { + let d = t1 - t0; + let a = self.lambda; + let da = d * a; + + let c = (-2.0 * da).exp(); + + let x11 = -(1.0 / 3.0) + * (c * (2.0 * da.powi(4) + 4.0 * da.powi(3) + 6.0 * da * da + 6.0 * da + 3.0) - 3.0); + let x12 = c * (2.0 / 3.0) * a * da.powi(4); + let x13 = -(a * a / 3.0) + * (c * (2.0 * da.powi(4) - 4.0 * da.powi(3) - 2.0 * da * da - 2.0 * da - 1.0) + 1.0); + let x22 = -(a * a / 3.0) + * (c * (2.0 * da.powi(4) - 4.0 * da.powi(3) + 2.0 * da * da + 2.0 * da + 1.0) - 1.0); + let x23 = c * (2.0 / 3.0) * da * da * a.powi(3) * (da - 2.0).powi(2); + let x33 = -(a.powi(4) / 3.0) + * (c * (2.0 * da.powi(4) - 12.0 * da.powi(3) + 22.0 * da * da - 10.0 * da + 3.0) - 3.0); + + self.var * array![[x11, x12, x13], [x12, x22, x23], [x13, x23, x33]] } } diff --git a/src/lib.rs b/src/lib.rs index bcb0555..07487ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,14 @@ // https://github.com/lucasmaystre/kickscore/tree/master/kickscore + +mod condest; +mod expm; mod fitter; mod item; pub mod kernel; mod model; pub mod observation; mod storage; +mod utils; pub use kernel::Kernel; pub use model::*; diff --git a/src/model.rs b/src/model.rs index e3f7dbd..61c19f1 100644 --- a/src/model.rs +++ b/src/model.rs @@ -93,8 +93,8 @@ impl BinaryModel { for obs in &mut self.observations { let diff = match method { - BinaryModelFitMethod::Ep => obs.ep_update(lr), - BinaryModelFitMethod::Kl => obs.kl_update(lr), + BinaryModelFitMethod::Ep => obs.ep_update(lr, &mut self.storage), + BinaryModelFitMethod::Kl => obs.kl_update(lr, &mut self.storage), }; if diff > max_diff { diff --git a/src/observation.rs b/src/observation.rs index 87ce49c..d1a4e1a 100644 --- a/src/observation.rs +++ b/src/observation.rs @@ -1,8 +1,12 @@ mod ordinal; +use crate::storage::Storage; + pub use ordinal::*; pub trait Observation { - fn ep_update(&mut self, lr: f64) -> f64; - fn kl_update(&mut self, lr: f64) -> f64; + fn match_moments(&self, mean_cav: f64, cov_cav: f64) -> (f64, f64, f64); + + fn ep_update(&mut self, lr: f64, storage: &mut Storage) -> f64; + fn kl_update(&mut self, lr: f64, storage: &mut Storage) -> f64; } diff --git a/src/observation/ordinal.rs b/src/observation/ordinal.rs index 96c7822..2af9d6a 100644 --- a/src/observation/ordinal.rs +++ b/src/observation/ordinal.rs @@ -1,7 +1,18 @@ use crate::storage::Storage; +use crate::utils::logphi; use super::Observation; +fn mm_probit_win(mean_cav: f64, cov_cav: f64) -> (f64, f64, f64) { + // Adapted from the GPML function `likErf.m`. + let z = mean_cav / (1.0 + cov_cav).sqrt(); + let (logpart, val) = logphi(z); + let dlogpart = val / (1.0 + cov_cav).sqrt(); // 1st derivative w.r.t. mean. + let d2logpart = -val * (z + val) / (1.0 + cov_cav); + + (logpart, dlogpart, d2logpart) +} + pub struct ProbitWinObservation { m: usize, items: Vec, @@ -10,7 +21,7 @@ pub struct ProbitWinObservation { ns_cav: Vec, xs_cav: Vec, t: f64, - logpart: usize, + logpart: f64, exp_ll: usize, margin: f64, } @@ -29,10 +40,10 @@ impl ProbitWinObservation { .iter() .map(|(id, _)| storage.get_item(*id).fitter.add_sample(t)) .collect(), - ns_cav: Vec::new(), - xs_cav: Vec::new(), + ns_cav: (0..elems.len()).map(|_| 0.0).collect(), + xs_cav: (0..elems.len()).map(|_| 0.0).collect(), t, - logpart: 0, + logpart: 0.0, exp_ll: 0, margin, } @@ -40,11 +51,63 @@ impl ProbitWinObservation { } impl Observation for ProbitWinObservation { - fn ep_update(&mut self, lr: f64) -> f64 { - todo!(); + fn match_moments(&self, mean_cav: f64, cov_cav: f64) -> (f64, f64, f64) { + mm_probit_win(mean_cav - self.margin, cov_cav) } - fn kl_update(&mut self, lr: f64) -> f64 { + fn ep_update(&mut self, lr: f64, storage: &mut Storage) -> f64 { + // Mean and variance of the cavity distribution in function space. + let mut f_mean_cav = 0.0; + let mut f_var_cav = 0.0; + + for i in 0..self.m { + let item = storage.item(self.items[i]); + let idx = self.indices[i]; + let coeff = self.coeffs[i]; + + // Compute the natural parameters of the cavity distribution. + let x_tot = 1.0 / item.fitter.vs(idx); + let n_tot = x_tot * item.fitter.ms(idx); + let x_cav = x_tot - item.fitter.xs(idx); + let n_cav = n_tot - item.fitter.ns(idx); + + self.xs_cav[i] = x_cav; + self.ns_cav[i] = n_cav; + + // Adjust the function-space cavity mean & variance. + f_mean_cav += coeff * n_cav / x_cav; + f_var_cav += coeff * coeff / x_cav; + } + + // Moment matching. + let (logpart, dlogpart, d2logpart) = self.match_moments(f_mean_cav, f_var_cav); + + for i in 0..self.m { + let item = storage.item_mut(self.items[i]); + let idx = self.indices[i]; + let coeff = self.coeffs[i]; + + let x_cav = self.xs_cav[i]; + let n_cav = self.ns_cav[i]; + + // Update the elements' parameters. + let denom = 1.0 + coeff * coeff * d2logpart / x_cav; + let x = -coeff * coeff * d2logpart / denom; + let n = coeff * (dlogpart - coeff * (n_cav / x_cav) * d2logpart) / denom; + + *item.fitter.xs_mut(idx) = (1.0 - lr) * item.fitter.xs(idx) + lr * x; + *item.fitter.ns_mut(idx) = (1.0 - lr) * item.fitter.ns(idx) + lr * n; + } + + let diff = (self.logpart - logpart).abs(); + + // Save log partition function value for the log-likelihood. + self.logpart = logpart; + + diff + } + + fn kl_update(&mut self, lr: f64, storage: &mut Storage) -> f64 { todo!(); } } @@ -60,11 +123,15 @@ impl LogitWinObservation { } impl Observation for LogitWinObservation { - fn ep_update(&mut self, lr: f64) -> f64 { + fn match_moments(&self, mean_cav: f64, cov_cav: f64) -> (f64, f64, f64) { todo!(); } - fn kl_update(&mut self, lr: f64) -> f64 { + fn ep_update(&mut self, lr: f64, storage: &mut Storage) -> f64 { + todo!(); + } + + fn kl_update(&mut self, lr: f64, storage: &mut Storage) -> f64 { todo!(); } } diff --git a/src/storage.rs b/src/storage.rs index 9eb6dac..8ff8404 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -34,6 +34,14 @@ impl Storage { &mut self.items[id] } + pub fn item(&self, id: usize) -> &Item { + &self.items[id] + } + + pub fn item_mut(&mut self, id: usize) -> &mut Item { + &mut self.items[id] + } + pub fn items_mut(&mut self) -> impl Iterator { self.items.iter_mut() } diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..eb283fe --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,113 @@ +use std::f64::consts::PI; + +const CS: [f64; 14] = [ + 0.00048204, + -0.00142906, + 0.0013200243174, + 0.0009461589032, + -0.0045563339802, + 0.00556964649138, + 0.00125993961762116, + -0.01621575378835404, + 0.02629651521057465, + -0.001829764677455021, + 2.0 * (1.0 - PI / 3.0), + (4.0 - PI) / 3.0, + 1.0, + 1.0, +]; + +const RS: [f64; 5] = [ + 1.2753666447299659525, + 5.019049726784267463450, + 6.1602098531096305441, + 7.409740605964741794425, + 2.9788656263939928886, +]; + +const QS: [f64; 6] = [ + 2.260528520767326969592, + 9.3960340162350541504, + 12.048951927855129036034, + 17.081440747466004316, + 9.608965327192787870698, + 3.3690752069827527677, +]; + +/// Normal cumulative density function. +fn normcdf(x: f64) -> f64 { + // If X ~ N(0,1), returns P(X < x). + // erfc(-x / SQRT2) / 2.0 + + todo!(); +} + +/// Compute the log of the normal cumulative density function. +pub fn logphi(z: f64) -> (f64, f64) { + // Adapted from the GPML function `logphi.m`. + let SQRT2: f64 = 2.0f64.sqrt(); + let SQRT2PI: f64 = (2.0 * PI).sqrt(); + + if z * z < 0.0492 { + // First case: z close to zero. + let coef = -z / SQRT2PI; + let mut val = 0.0; + + for c in &CS { + val = coef * (c + val); + } + + let res = -2.0 * val - 2.0f64.ln(); + let dres = (-(z * z) / 2.0 - res).exp() / SQRT2PI; + + (res, dres) + } else if z < -11.3137 { + // Second case: z very small. + let mut num = 0.5641895835477550741; + + for r in &RS { + num = -z * num / SQRT2 + r; + } + + let mut den = 1.0; + + for q in &QS { + den = -z * den / SQRT2 + q; + } + + let res = (num / (2.0 * den)).ln() - (z * z) / 2.0; + let dres = (den / num).abs() * (2.0 / PI).sqrt(); + + (res, dres) + } else { + let res = normcdf(z).ln(); + let dres = (-(z * z) / 2.0 - res).exp() / SQRT2PI; + + (res, dres) + } + + /* + if z * z < 0.0492: + # First case: z close to zero. + coef = -z / SQRT2PI + val = 0 + for c in CS: + val = coef * (c + val) + res = -2 * val - log(2) + dres = exp(-(z * z) / 2 - res) / SQRT2PI + elif z < -11.3137: + # Second case: z very small. + num = 0.5641895835477550741 + for r in RS: + num = -z * num / SQRT2 + r + den = 1.0 + for q in QS: + den = -z * den / SQRT2 + q + res = log(num / (2 * den)) - (z * z) / 2 + dres = abs(den / num) * sqrt(2.0 / pi) + else: + res = log(normcdf(z)) + dres = exp(-(z * z) / 2 - res) / SQRT2PI + return res, dres + */ +}