hpstat/src/pava.rs

91 lines
3.3 KiB
Rust

use nalgebra::DVector;
/// Apply pool adjacent violators algorithm (PAVA) to fit monotonic (isotonic) regression
///
/// Ports https://github.com/scikit-learn/scikit-learn/blob/9cb11110280a555fd881455d65a48694e1f6860d/sklearn/_isotonic.pyx#L11
/// Author: Nelle Varoquaux, Andrew Tulloch, Antony Lee
/// An excellent implementation, kudos to the sklearn team!
///
/// BSD 3-Clause License
///
/// Copyright (c) 2007-2023 The scikit-learn developers.
/// All rights reserved.
///
/// Redistribution and use in source and binary forms, with or without
/// modification, are permitted provided that the following conditions are met:
///
/// * Redistributions of source code must retain the above copyright notice, this
/// list of conditions and the following disclaimer.
///
/// * Redistributions in binary form must reproduce the above copyright notice,
/// this list of conditions and the following disclaimer in the documentation
/// and/or other materials provided with the distribution.
///
/// * Neither the name of the copyright holder nor the names of its
/// contributors may be used to endorse or promote products derived from
/// this software without specific prior written permission.
///
/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
/// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
/// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
/// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
/// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
/// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
/// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pub fn monotonic_regression_pava(mut y: DVector<f64>, mut w: DVector<f64>) -> DVector<f64> {
// target describes a list of blocks. At any time, if [i..j] (inclusive) is an active block, then target[i] := j and target[j] := i
// For "active" indices (block starts):
// w[i] := sum{w_orig[j], j=[i..target[i]]}
// y[i] := sum{y_orig[j]*w_orig[j], j=[i..target[i]]} / w[i]
let n = y.nrows();
let mut target = DVector::from_iterator(n, 0..n);
let mut i = 0;
while i < n {
let mut k = target[i] + 1;
if k == n {
break;
}
if y[i] < y[k] {
i = k;
continue;
}
let mut sum_wy = w[i] * y[i];
let mut sum_w = w[i];
loop {
// We are within a decreasing subsequence
let prev_y = y[k];
sum_wy += w[k] * y[k];
sum_w += w[k];
k = target[k] + 1;
if k == n || prev_y < y[k] {
// Non-singleton decreasing subsequence is finished, update first entry
y[i] = sum_wy / sum_w;
w[i] = sum_w;
target[i] = k - 1;
target[k - 1] = i;
if i > 0 {
// Backtrack if we can. This makes the algorithm single-pass and ensures O(n) complexity
i = target[i - 1];
}
// Otherwise, restart from the same point
break
}
}
}
// Reconstruct the solution
let mut i = 0;
while i < n {
let k = target[i] + 1;
let y_i = y[i];
y.view_mut((i + 1, 0), (k - i - 1, 1)).fill(y_i); // y[i+1:k] = y[i]
i = k;
}
return y;
}