81% speedup Also resolves issue where confidence interval search would occasionally never terminate
711 lines
25 KiB
Rust
711 lines
25 KiB
Rust
// hpstat: High-performance statistics implementations
|
|
// Copyright © 2023 Lee Yingtong Li (RunasSudo)
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
const Z_97_5: f64 = 1.959964; // This is the limit of resolution for an f64
|
|
const CHI2_1DF_95: f64 = 3.8414588;
|
|
|
|
use std::fs::File;
|
|
use std::io::{self, BufReader};
|
|
|
|
use clap::{Args, ValueEnum};
|
|
use indicatif::{ParallelProgressIterator, ProgressBar, ProgressDrawTarget, ProgressStyle};
|
|
use nalgebra::{DMatrix, DVector, Matrix2xX};
|
|
use prettytable::{Table, format, row};
|
|
use rayon::prelude::*;
|
|
use serde::{Serialize, Deserialize};
|
|
|
|
use crate::csv::read_csv;
|
|
use crate::pava::monotonic_regression_pava;
|
|
use crate::term::UnconditionalTermLike;
|
|
|
|
#[derive(Args)]
|
|
pub struct TurnbullArgs {
|
|
/// Path to CSV input file containing the observations
|
|
#[arg()]
|
|
input: String,
|
|
|
|
/// Output format
|
|
#[arg(long, value_enum, default_value="text")]
|
|
output: OutputFormat,
|
|
|
|
/// Maximum number of iterations to attempt
|
|
#[arg(long, default_value="1000")]
|
|
max_iterations: u32,
|
|
|
|
/// Terminate algorithm when the absolute change in log-likelihood is less than this tolerance
|
|
#[arg(long, default_value="0.01")]
|
|
ll_tolerance: f64,
|
|
|
|
/// Method for computing standard error or survival probabilities
|
|
#[arg(long, value_enum, default_value="oim")]
|
|
se_method: SEMethod,
|
|
|
|
/// Threshold for dropping failure probability in --se-method oim-drop-zeros
|
|
#[arg(long, default_value="0.0001")]
|
|
zero_tolerance: f64,
|
|
|
|
/// Desired precision of confidence limits in --se-method likelihood-ratio
|
|
#[arg(long, default_value="0.01")]
|
|
ci_precision: f64,
|
|
}
|
|
|
|
#[derive(ValueEnum, Clone)]
|
|
enum OutputFormat {
|
|
Text,
|
|
Json
|
|
}
|
|
|
|
#[derive(ValueEnum, Clone)]
|
|
pub enum SEMethod {
|
|
None,
|
|
OIM,
|
|
OIMDropZeros,
|
|
LikelihoodRatio,
|
|
}
|
|
|
|
pub fn main(args: TurnbullArgs) {
|
|
// Read data
|
|
let data_times = read_data(&args.input);
|
|
|
|
// Fit regression
|
|
let progress_bar = ProgressBar::with_draw_target(Some(0), ProgressDrawTarget::term_like(Box::new(UnconditionalTermLike::stderr())));
|
|
let result = fit_turnbull(data_times, progress_bar, args.max_iterations, args.ll_tolerance, args.se_method, args.zero_tolerance, args.ci_precision);
|
|
|
|
// Display output
|
|
match args.output {
|
|
OutputFormat::Text => {
|
|
println!();
|
|
println!();
|
|
println!("LL = {:.5}", result.ll_model);
|
|
|
|
let mut summary = Table::new();
|
|
let format = format::FormatBuilder::new()
|
|
.separators(&[format::LinePosition::Top, format::LinePosition::Title, format::LinePosition::Bottom], format::LineSeparator::new('-', '+', '+', '+'))
|
|
.padding(2, 2)
|
|
.build();
|
|
summary.set_format(format);
|
|
|
|
if let Some(survival_prob_se) = &result.survival_prob_se {
|
|
// Standard errors available
|
|
summary.set_titles(row!["Time", c->"Surv. Prob.", c->"Std Err.", H2c->"(95% CI)"]);
|
|
summary.add_row(row![r->"0.000", r->"1.00000", "", "", ""]);
|
|
for (i, prob) in result.survival_prob.iter().enumerate() {
|
|
summary.add_row(row![
|
|
r->format!("{:.3}", result.failure_intervals[i].1),
|
|
r->format!("{:.5}", prob),
|
|
r->format!("{:.5}", survival_prob_se[i]),
|
|
r->format!("({:.5},", prob - Z_97_5 * survival_prob_se[i]),
|
|
format!("{:.5})", prob + Z_97_5 * survival_prob_se[i]),
|
|
]);
|
|
}
|
|
summary.add_row(row![r->format!("{:.3}", result.failure_intervals.last().unwrap().1), r->"0.00000", "", "", ""]);
|
|
} else if let Some(survival_prob_ci) = &result.survival_prob_ci {
|
|
// No standard errors, just print CIs
|
|
summary.set_titles(row!["Time", c->"Surv. Prob.", H2c->"(95% CI)"]);
|
|
summary.add_row(row![r->"0.000", r->"1.00000", "", ""]);
|
|
for (i, prob) in result.survival_prob.iter().enumerate() {
|
|
summary.add_row(row![
|
|
r->format!("{:.3}", result.failure_intervals[i].1),
|
|
r->format!("{:.5}", prob),
|
|
r->format!("({:.5},", survival_prob_ci[i].0),
|
|
format!("{:.5})", survival_prob_ci[i].1),
|
|
]);
|
|
}
|
|
summary.add_row(row![r->format!("{:.3}", result.failure_intervals.last().unwrap().1), r->"0.00000", "", ""]);
|
|
} else {
|
|
// No standard errors or CIs
|
|
summary.set_titles(row!["Time", c->"Surv. Prob."]);
|
|
summary.add_row(row![r->"0.000", r->"1.00000"]);
|
|
for (i, prob) in result.survival_prob.iter().enumerate() {
|
|
summary.add_row(row![
|
|
r->format!("{:.3}", result.failure_intervals[i].1),
|
|
r->format!("{:.5}", prob),
|
|
]);
|
|
}
|
|
summary.add_row(row![r->format!("{:.3}", result.failure_intervals.last().unwrap().1), r->"0.00000"]);
|
|
}
|
|
|
|
summary.printstd();
|
|
}
|
|
OutputFormat::Json => {
|
|
println!("{}", serde_json::to_string(&result).unwrap());
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn read_data(path: &str) -> Matrix2xX<f64> {
|
|
// Read CSV into memory
|
|
let (_headers, records) = match path {
|
|
"-" => read_csv(io::stdin().lock()),
|
|
_ => read_csv(BufReader::new(File::open(path).expect("IO error")))
|
|
};
|
|
|
|
// Read data into matrices
|
|
|
|
// Represent data_times as 2xX rather than Xx2 matrix to allow par_column_iter in code_times_as_indexes (no par_row_iter)
|
|
// Serendipitously, from_vec fills column-by-column
|
|
let data_times = Matrix2xX::from_vec(records);
|
|
|
|
// TODO: Fail on left time > right time
|
|
// TODO: Fail on left time < 0
|
|
|
|
return data_times;
|
|
}
|
|
|
|
struct TurnbullData {
|
|
data_time_interval_indexes: Vec<(usize, usize)>,
|
|
|
|
// Cached intermediate values
|
|
intervals: Vec<(f64, f64)>,
|
|
}
|
|
|
|
impl TurnbullData {
|
|
fn num_obs(&self) -> usize {
|
|
return self.data_time_interval_indexes.len();
|
|
}
|
|
|
|
fn num_intervals(&self) -> usize {
|
|
return self.intervals.len();
|
|
}
|
|
}
|
|
|
|
/// Constrains the survival probability at a particular time s[time_index] == survival_prob
|
|
struct Constraint {
|
|
time_index: usize,
|
|
survival_prob: f64,
|
|
}
|
|
|
|
pub fn fit_turnbull(data_times: Matrix2xX<f64>, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, se_method: SEMethod, zero_tolerance: f64, ci_precision: f64) -> TurnbullResult {
|
|
// ----------------------
|
|
// Prepare for regression
|
|
|
|
// Get Turnbull intervals
|
|
let intervals = get_turnbull_intervals(&data_times);
|
|
|
|
// Recode times as indexes
|
|
let data_time_interval_indexes = code_times_as_indexes(&data_times, &intervals);
|
|
|
|
// Initialise p
|
|
// Faster to repeatedly index Vec than DVector, and we don't do any matrix arithmetic, so represent this as Vec
|
|
let p = vec![1.0 / intervals.len() as f64; intervals.len()];
|
|
|
|
let data = TurnbullData {
|
|
data_time_interval_indexes: data_time_interval_indexes,
|
|
intervals: intervals,
|
|
};
|
|
|
|
// ------------------------------------------
|
|
// Apply iterative algorithm to fit estimator
|
|
|
|
progress_bar.set_style(ProgressStyle::with_template("[{elapsed_precise}] {bar:40} {msg}").unwrap());
|
|
progress_bar.set_length(u64::MAX);
|
|
progress_bar.reset();
|
|
progress_bar.println("Running EM-ICM algorithm to fit Turnbull estimator");
|
|
|
|
let (p, ll) = fit_turnbull_estimator(&data, progress_bar.clone(), max_iterations, ll_tolerance, p, None);
|
|
|
|
// Get survival probabilities (1 - cumulative failure probability), excluding at t=0 (prob=1) and t=inf (prob=0)
|
|
let mut survival_prob: Vec<f64> = Vec::with_capacity(data.num_intervals() - 1);
|
|
let mut acc = 1.0;
|
|
for j in 0..(data.num_intervals() - 1) {
|
|
acc -= p[j];
|
|
survival_prob.push(acc);
|
|
}
|
|
|
|
// --------------------------------------------------
|
|
// Compute standard errors for survival probabilities
|
|
|
|
let mut survival_prob_se = None;
|
|
let mut survival_prob_ci = None;
|
|
|
|
match se_method {
|
|
SEMethod::None => {}
|
|
SEMethod::OIM => {
|
|
survival_prob_se = Some(survival_prob_oim_se(&data, &p, zero_tolerance, false));
|
|
}
|
|
SEMethod::OIMDropZeros => {
|
|
survival_prob_se = Some(survival_prob_oim_se(&data, &p, zero_tolerance, true));
|
|
}
|
|
SEMethod::LikelihoodRatio => {
|
|
let s = p_to_s(&p);
|
|
let oim_se = survival_prob_oim_se(&data, &p, zero_tolerance, true);
|
|
|
|
progress_bar.set_style(ProgressStyle::with_template("[{elapsed_precise}] {bar:40} CI {pos}/{len}").unwrap());
|
|
progress_bar.set_length(data.num_intervals() as u64 - 1);
|
|
progress_bar.reset();
|
|
progress_bar.println("Computing confidence intervals by likelihood ratio test");
|
|
|
|
let confidence_intervals = (1..data.num_intervals()).into_par_iter()
|
|
.map(|j| survival_prob_likelihood_ratio_ci(&data, ProgressBar::hidden(), max_iterations, ll_tolerance, ci_precision, &p, ll, &s, &oim_se, j))
|
|
.progress_with(progress_bar.clone())
|
|
.collect();
|
|
|
|
survival_prob_ci = Some(confidence_intervals);
|
|
}
|
|
}
|
|
|
|
return TurnbullResult {
|
|
failure_intervals: data.intervals,
|
|
failure_prob: p,
|
|
survival_prob: survival_prob,
|
|
survival_prob_se: survival_prob_se,
|
|
survival_prob_ci: survival_prob_ci,
|
|
ll_model: ll,
|
|
};
|
|
}
|
|
|
|
fn get_turnbull_intervals(data_times: &Matrix2xX<f64>) -> Vec<(f64, f64)> {
|
|
let mut all_time_points: Vec<(f64, bool)> = Vec::new(); // Vec of (time, is_left)
|
|
all_time_points.extend(data_times.row(1).iter().map(|t| (*t, false))); // So we have right bounds before left bounds when sorted - ensures correct behaviour since intervals are left-open
|
|
all_time_points.extend(data_times.row(0).iter().map(|t| (*t, true)));
|
|
all_time_points.dedup();
|
|
all_time_points.sort_by(|(t1, _), (t2, _)| t1.partial_cmp(t2).unwrap());
|
|
|
|
let mut intervals: Vec<(f64, f64)> = Vec::new();
|
|
for i in 1..all_time_points.len() {
|
|
if all_time_points[i - 1].1 == true && all_time_points[i].1 == false {
|
|
intervals.push((all_time_points[i - 1].0, all_time_points[i].0));
|
|
}
|
|
}
|
|
|
|
return intervals;
|
|
}
|
|
|
|
fn code_times_as_indexes(data_times: &Matrix2xX<f64>, intervals: &Vec<(f64, f64)>) -> Vec<(usize, usize)> {
|
|
return data_times.par_column_iter().map(|t| {
|
|
let tleft = t[0];
|
|
let tright = t[1];
|
|
|
|
// Left index is first interval >= observation left bound
|
|
let left_index = intervals.iter().enumerate().find(|(_i, (ileft, _))| *ileft >= tleft).unwrap().0;
|
|
|
|
// Right index is last interval <= observation right bound
|
|
let right_index = intervals.iter().enumerate().rev().find(|(_i, (_, iright))| *iright <= tright).unwrap().0;
|
|
|
|
(left_index, right_index)
|
|
}).collect();
|
|
}
|
|
|
|
fn fit_turnbull_estimator(data: &TurnbullData, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, mut p: Vec<f64>, constraint: Option<Constraint>) -> (Vec<f64>, f64) {
|
|
// Pre-compute S, the survival probability at the start of each interval
|
|
let mut s = p_to_s(&p);
|
|
|
|
// Get likelihood for each observation
|
|
let likelihood_obs = get_likelihood_obs(data, &s);
|
|
let mut ll_model: f64 = likelihood_obs.iter().map(|l| l.ln()).sum();
|
|
|
|
let mut iteration = 1;
|
|
loop {
|
|
// -------
|
|
// EM step
|
|
|
|
let p_after_em = do_em_step(data, &p, &s, &constraint);
|
|
let s_after_em = p_to_s(&p_after_em);
|
|
|
|
let likelihood_obs_after_em = get_likelihood_obs(data, &s_after_em);
|
|
let ll_model_after_em: f64 = likelihood_obs_after_em.iter().map(|l| l.ln()).sum();
|
|
|
|
p = p_after_em;
|
|
s = s_after_em;
|
|
|
|
// --------
|
|
// ICM step
|
|
|
|
let (p_new, s_new, ll_model_new) = do_icm_step(data, &p, &s, ll_tolerance, &constraint, ll_model_after_em);
|
|
|
|
let ll_change = ll_model_new - ll_model;
|
|
let converged = ll_change <= ll_tolerance;
|
|
|
|
p = p_new;
|
|
s = s_new;
|
|
ll_model = ll_model_new;
|
|
|
|
// Estimate progress bar according to either the order of magnitude of the ll_change relative to tolerance, or iteration/max_iterations
|
|
let progress2 = (iteration as f64 / max_iterations as f64 * u64::MAX as f64) as u64;
|
|
let progress3 = ((-ll_change.log10()).max(0.0) / -ll_tolerance.log10() * u64::MAX as f64) as u64;
|
|
|
|
// Update progress bar
|
|
progress_bar.set_position(progress_bar.position().max(progress3.max(progress2)));
|
|
progress_bar.set_message(format!("Iteration {} (LL = {:.4}, ΔLL = {:.4})", iteration + 1, ll_model, ll_change));
|
|
|
|
if converged {
|
|
progress_bar.println(format!("Converged in {} iterations", iteration));
|
|
break;
|
|
}
|
|
|
|
iteration += 1;
|
|
if iteration > max_iterations {
|
|
panic!("Exceeded --max-iterations");
|
|
}
|
|
}
|
|
|
|
return (p, ll_model);
|
|
}
|
|
|
|
fn p_to_s(p: &Vec<f64>) -> Vec<f64> {
|
|
let mut s = Vec::with_capacity(p.len() + 1); // Survival probabilities
|
|
let mut survival = 1.0;
|
|
s.push(1.0);
|
|
for p_j in p.iter() {
|
|
survival -= p_j;
|
|
s.push(survival);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
fn s_to_lambda(s: &Vec<f64>) -> Vec<f64> {
|
|
// S = 1 means Λ = -inf and S = 0 means Λ = inf so skip these
|
|
let mut lambda = Vec::with_capacity(s.len() - 2); // Cumulative hazard
|
|
for s_j in &s[1..(s.len() - 1)] {
|
|
lambda.push((-s_j.ln()).ln());
|
|
}
|
|
return lambda;
|
|
}
|
|
|
|
fn get_likelihood_obs(data: &TurnbullData, s: &Vec<f64>) -> Vec<f64> {
|
|
return data.data_time_interval_indexes
|
|
.par_iter()
|
|
.map(|(idx_left, idx_right)| s[*idx_left] - s[*idx_right + 1])
|
|
.collect(); // TODO: Return iterator directly
|
|
}
|
|
|
|
fn do_em_step(data: &TurnbullData, p: &Vec<f64>, s: &Vec<f64>, constraint: &Option<Constraint>) -> Vec<f64> {
|
|
// Compute contributions to m
|
|
let mut m_contrib = vec![0.0; data.num_intervals()];
|
|
for (idx_left, idx_right) in data.data_time_interval_indexes.iter() {
|
|
let contrib = 1.0 / (s[*idx_left] - s[*idx_right + 1]);
|
|
|
|
// Adds to m for the first interval in the observation
|
|
m_contrib[*idx_left] += contrib;
|
|
|
|
// Subtracts from m for the first interval beyond the observation
|
|
if *idx_right + 1 < data.num_intervals() {
|
|
m_contrib[*idx_right + 1] -= contrib;
|
|
}
|
|
}
|
|
|
|
// Compute m
|
|
let mut m = Vec::with_capacity(data.num_intervals());
|
|
let mut m_last = 0.0;
|
|
for m_contrib_j in m_contrib {
|
|
let m_next = m_last + m_contrib_j / (data.num_obs() as f64);
|
|
m.push(m_next);
|
|
m_last = m_next;
|
|
}
|
|
|
|
// Update p
|
|
// p := p * m
|
|
let mut p_new: Vec<f64> = p.par_iter().zip(m.into_par_iter()).map(|(p_j, m_j)| p_j * m_j).collect();
|
|
|
|
// Constrain if required
|
|
if let Some(c) = &constraint {
|
|
let cur_fail_prob: f64 = p_new[0..c.time_index].iter().copied().sum();
|
|
// Not sure why borrow checker thinks there is an unused borrow here...
|
|
let _ = &mut p_new[0..c.time_index].iter_mut().for_each(|x| *x *= (1.0 - c.survival_prob) / cur_fail_prob); // Desired failure probability over current failure probability
|
|
let _ = &mut p_new[c.time_index..].iter_mut().for_each(|x| *x *= c.survival_prob / (1.0 - cur_fail_prob));
|
|
}
|
|
|
|
return p_new;
|
|
}
|
|
|
|
fn do_icm_step(data: &TurnbullData, p: &Vec<f64>, s: &Vec<f64>, ll_tolerance: f64, constraint: &Option<Constraint>, ll_model: f64) -> (Vec<f64>, Vec<f64>, f64) {
|
|
// Compute Λ, the cumulative hazard
|
|
// Since Λ = -inf when survival is 1, and Λ = inf when survival is 0, these are omitted
|
|
// The entry at lambda[j] corresponds to the survival immediately before time point j + 1
|
|
let lambda = s_to_lambda(&s);
|
|
|
|
// Compute gradient and diagonal of Hessian
|
|
let mut gradient = vec![0.0; data.num_intervals() - 1];
|
|
let mut hessdiag = vec![0.0; data.num_intervals() - 1];
|
|
for (idx_left, idx_right) in data.data_time_interval_indexes.iter() {
|
|
let denom = s[*idx_left] - s[*idx_right + 1];
|
|
|
|
// Add to gradient[j] when j + 1 == idx_right + 1
|
|
// Add to hessdiag[j] when j + 1 == idx_right + 1
|
|
if *idx_right < gradient.len() {
|
|
let j = *idx_right;
|
|
gradient[j] += (-lambda[j].exp() + lambda[j]).exp() / denom;
|
|
|
|
let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
|
|
let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
|
|
hessdiag[j] += a - b;
|
|
}
|
|
|
|
// Subtract from gradient[j] when j + 1 == idx_left
|
|
// Add to hessdiag[j] when j + 1 == idx_left
|
|
if *idx_left > 0 {
|
|
let j = *idx_left - 1;
|
|
gradient[j] -= (-lambda[j].exp() + lambda[j]).exp() / denom;
|
|
|
|
let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
|
|
let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
|
|
hessdiag[j] += -a - b;
|
|
}
|
|
}
|
|
|
|
// Description in Anderson-Bergman (2017) is slightly misleading
|
|
// Since we are maximising the likelihood, the second derivatives will be negative
|
|
// And we will move in the direction of the gradient
|
|
// So there are a few more negative signs here than suggested
|
|
|
|
let weights = -DVector::from_vec(hessdiag.clone()) / 2.0;
|
|
let gradient_over_hessdiag = DVector::from_vec(gradient.par_iter().zip(hessdiag.par_iter()).map(|(g, h)| g / h).collect());
|
|
|
|
let mut s_new;
|
|
let mut p_new;
|
|
let mut ll_model_new: f64;
|
|
|
|
// Take as large a step as possible while the log-likelihood increases
|
|
let mut step_size_exponent: i32 = 0;
|
|
loop {
|
|
let step_size = 0.5_f64.powi(step_size_exponent);
|
|
let lambda_target = -gradient_over_hessdiag.clone() * step_size + DVector::from_vec(lambda.clone());
|
|
|
|
let lambda_new = monotonic_regression_pava(lambda_target, weights.clone());
|
|
|
|
// Convert Λ to S to p
|
|
s_new = Vec::with_capacity(data.num_intervals() + 1);
|
|
p_new = Vec::with_capacity(data.num_intervals());
|
|
|
|
let mut survival = 1.0;
|
|
s_new.push(1.0);
|
|
for lambda_j in lambda_new.iter() {
|
|
let next_survival = (-lambda_j.exp()).exp();
|
|
s_new.push(next_survival);
|
|
p_new.push(survival - next_survival);
|
|
survival = next_survival;
|
|
}
|
|
s_new.push(0.0);
|
|
p_new.push(survival);
|
|
|
|
let likelihood_obs_new = get_likelihood_obs(data, &s_new);
|
|
ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum();
|
|
|
|
// Constrain if required
|
|
if let Some(c) = constraint {
|
|
let cur_survival_prob = s_new[c.time_index];
|
|
let _ = &mut p_new[0..c.time_index].iter_mut().for_each(|x| *x *= (1.0 - c.survival_prob) / (1.0 - cur_survival_prob)); // Desired failure probability over current failure probability
|
|
let _ = &mut p_new[c.time_index..].iter_mut().for_each(|x| *x *= c.survival_prob / cur_survival_prob);
|
|
|
|
s_new = p_to_s(&p_new);
|
|
let likelihood_obs_new = get_likelihood_obs(data, &s_new);
|
|
ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum();
|
|
}
|
|
|
|
if ll_model_new > ll_model {
|
|
return (p_new, s_new, ll_model_new);
|
|
}
|
|
|
|
if ll_model - ll_model_new < ll_tolerance {
|
|
// LL decreased but by less than ll_tolerance
|
|
// This might happen because the EM algorithm already obtained the exact solution
|
|
return (p.clone(), s.clone(), ll_model);
|
|
}
|
|
|
|
step_size_exponent += 1;
|
|
|
|
if step_size_exponent > 10 {
|
|
// If ICM does not increase log-likelihood, then simply return the original estimate and retry EM again
|
|
// If neither increases the log-likelihood, then we will have have converged within fit_turnbull_estimator
|
|
return (p.clone(), s.clone(), ll_model);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn survival_prob_oim_se(data: &TurnbullData, p: &Vec<f64>, zero_tolerance: f64, drop_zeros: bool) -> Vec<f64> {
|
|
let hessian = compute_hessian(&data, &p);
|
|
|
|
if drop_zeros {
|
|
// Drop rows/columns of Hessian corresponding to intervals with zero failure probability
|
|
let nonzero_intervals: Vec<usize> = (0..(data.num_intervals() - 1)).filter(|i| p[*i] > zero_tolerance).collect();
|
|
|
|
let mut hessian_nonzero: DMatrix<f64> = DMatrix::zeros(nonzero_intervals.len(), nonzero_intervals.len());
|
|
for (nonzero_index1, orig_index1) in nonzero_intervals.iter().enumerate() {
|
|
hessian_nonzero[(nonzero_index1, nonzero_index1)] = hessian[(*orig_index1, *orig_index1)];
|
|
for (nonzero_index2, orig_index2) in nonzero_intervals.iter().enumerate().take(nonzero_index1) {
|
|
hessian_nonzero[(nonzero_index1, nonzero_index2)] = hessian[(*orig_index1, *orig_index2)];
|
|
hessian_nonzero[(nonzero_index2, nonzero_index1)] = hessian[(*orig_index2, *orig_index1)];
|
|
}
|
|
}
|
|
|
|
let vcov = -hessian_nonzero.try_inverse().expect("Matrix not invertible");
|
|
let survival_prob_se_nonzero = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); });
|
|
|
|
let mut se = vec![0.0; data.num_intervals() - 1];
|
|
let mut nonzero_index = 0;
|
|
for orig_index in 0..(data.num_intervals() - 1) {
|
|
if nonzero_intervals.contains(&orig_index) {
|
|
se[orig_index] = survival_prob_se_nonzero[nonzero_index];
|
|
nonzero_index += 1;
|
|
} else {
|
|
se[orig_index] = se[orig_index - 1];
|
|
}
|
|
}
|
|
return se;
|
|
} else {
|
|
// Compute covariance matrix as inverse of negative Hessian
|
|
let vcov = -hessian.try_inverse().expect("Matrix not invertible");
|
|
let se = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); });
|
|
return se.data.as_vec().clone();
|
|
}
|
|
}
|
|
|
|
fn compute_hessian(data: &TurnbullData, p: &Vec<f64>) -> DMatrix<f64> {
|
|
let mut hessian: DMatrix<f64> = DMatrix::zeros(data.num_intervals() - 1, data.num_intervals() - 1);
|
|
|
|
for (idx_left, idx_right) in data.data_time_interval_indexes.iter() {
|
|
// Compute 1 / (Σ_j α_{i,j} p_j)
|
|
let mut one_over_hessian_denominator: f64 = p[*idx_left..(*idx_right + 1)].iter().sum();
|
|
one_over_hessian_denominator = one_over_hessian_denominator.powi(-2);
|
|
|
|
// The numerator of the log-likelihood is -(α_{i,h} - α_{i,h+1})(α_{i,k} - α_{i,k+1})
|
|
// This is nonzero only when α_{i,h} ≠ α_{i,h+1} AND α_{i,k} ≠ α_{i,k+1}
|
|
// Since each observation spans a continuous sequence of intervals, this is true only at two each of h and k at the boundaries of the observation
|
|
// h = last interval not involving the observation, h + 1 = first interval involving the observation, etc.
|
|
|
|
// if *idx_left > 0 { h1 = idx_left - 1; }
|
|
// if *idx_right < data.num_intervals() - 1 { h2 = *idx_right; }
|
|
|
|
if *idx_left > 0 {
|
|
let h1 = idx_left - 1;
|
|
|
|
// (h, k) = (h1, h1)
|
|
// numerator is -(0 - 1)(0 - 1) = -1
|
|
hessian[(h1, h1)] -= one_over_hessian_denominator;
|
|
}
|
|
|
|
if *idx_right < data.num_intervals() - 1 {
|
|
let h2 = *idx_right;
|
|
|
|
// (h, k) = (h2, h2)
|
|
// numerator is -(1 - 0)(1 - 0) = -1
|
|
hessian[(h2, h2)] -= one_over_hessian_denominator;
|
|
|
|
if *idx_left > 0 {
|
|
let h1 = idx_left - 1;
|
|
|
|
// (h, k) = (h1, h2)
|
|
// numerator is -(0 - 1)(1 - 0) = 1
|
|
hessian[(h1, h2)] += one_over_hessian_denominator;
|
|
|
|
// (h, k) = (h2, h1)
|
|
// numerator is -(1 - 0)(0 - 1) = 1
|
|
hessian[(h2, h1)] += one_over_hessian_denominator;
|
|
}
|
|
}
|
|
}
|
|
|
|
return hessian;
|
|
}
|
|
|
|
fn survival_prob_likelihood_ratio_ci(data: &TurnbullData, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, ci_precision: f64, p: &Vec<f64>, ll_model: f64, s: &Vec<f64>, oim_se: &Vec<f64>, time_index: usize) -> (f64, f64) {
|
|
// Compute lower confidence limit
|
|
let mut ci_bound_lower = 0.0;
|
|
let mut ci_bound_upper = s[time_index];
|
|
let mut ci_estimate = s[time_index] - Z_97_5 * oim_se[time_index - 1];
|
|
if ci_estimate < 0.0 {
|
|
ci_estimate = (ci_bound_lower + ci_bound_upper) / 2.0;
|
|
}
|
|
|
|
let mut iteration = 1;
|
|
loop {
|
|
// Get starting guess, constrained at time_index
|
|
let mut p_test = p.clone();
|
|
let cur_survival_prob = s[time_index];
|
|
let _ = &mut p_test[0..time_index].iter_mut().for_each(|x| *x *= (1.0 - ci_estimate) / (1.0 - cur_survival_prob)); // Desired failure probability over current failure probability
|
|
let _ = &mut p_test[time_index..].iter_mut().for_each(|x| *x *= ci_estimate / cur_survival_prob);
|
|
|
|
let (_p, ll_test) = fit_turnbull_estimator(data, progress_bar.clone(), max_iterations, ll_tolerance, p_test, Some(Constraint { time_index: time_index, survival_prob: ci_estimate }));
|
|
let lr_statistic = 2.0 * (ll_model - ll_test);
|
|
|
|
if lr_statistic > CHI2_1DF_95 {
|
|
// CI is too wide
|
|
ci_bound_lower = ci_estimate;
|
|
} else {
|
|
// CI is too narrow
|
|
ci_bound_upper = ci_estimate;
|
|
}
|
|
|
|
ci_estimate = (ci_bound_lower + ci_bound_upper) / 2.0;
|
|
|
|
if ci_bound_upper - ci_bound_lower <= ci_precision {
|
|
// Desired precision has been reached
|
|
break;
|
|
}
|
|
|
|
iteration += 1;
|
|
if iteration > max_iterations {
|
|
panic!("Exceeded --max-iterations");
|
|
}
|
|
}
|
|
|
|
let ci_lower = ci_estimate;
|
|
|
|
// Compute upper confidence limit
|
|
ci_bound_lower = s[time_index];
|
|
ci_bound_upper = 1.0;
|
|
ci_estimate = s[time_index] + Z_97_5 * oim_se[time_index - 1];
|
|
if ci_estimate > 1.0 {
|
|
ci_estimate = (ci_bound_lower + ci_bound_upper) / 2.0;
|
|
}
|
|
|
|
let mut iteration = 1;
|
|
loop {
|
|
// Get starting guess, constrained at time_index
|
|
let mut p_test = p.clone();
|
|
let cur_survival_prob = s[time_index];
|
|
let _ = &mut p_test[0..time_index].iter_mut().for_each(|x| *x *= (1.0 - ci_estimate) / (1.0 - cur_survival_prob)); // Desired failure probability over current failure probability
|
|
let _ = &mut p_test[time_index..].iter_mut().for_each(|x| *x *= ci_estimate / cur_survival_prob);
|
|
|
|
let (_p, ll_test) = fit_turnbull_estimator(data, progress_bar.clone(), max_iterations, ll_tolerance, p_test, Some(Constraint { time_index: time_index, survival_prob: ci_estimate }));
|
|
let lr_statistic = 2.0 * (ll_model - ll_test);
|
|
|
|
if lr_statistic > CHI2_1DF_95 {
|
|
// CI is too wide
|
|
ci_bound_upper = ci_estimate;
|
|
} else {
|
|
// CI is too narrow
|
|
ci_bound_lower = ci_estimate;
|
|
}
|
|
|
|
ci_estimate = (ci_bound_lower + ci_bound_upper) / 2.0;
|
|
|
|
if ci_bound_upper - ci_bound_lower <= ci_precision {
|
|
// Desired precision has been reached
|
|
break;
|
|
}
|
|
|
|
iteration += 1;
|
|
if iteration > max_iterations {
|
|
panic!("Exceeded --max-iterations");
|
|
}
|
|
}
|
|
|
|
let ci_upper = ci_estimate;
|
|
|
|
return (ci_lower, ci_upper);
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct TurnbullResult {
|
|
pub failure_intervals: Vec<(f64, f64)>,
|
|
pub failure_prob: Vec<f64>,
|
|
pub survival_prob: Vec<f64>,
|
|
pub survival_prob_se: Option<Vec<f64>>,
|
|
pub survival_prob_ci: Option<Vec<(f64, f64)>>,
|
|
pub ll_model: f64,
|
|
}
|