From 250cfd879852c4ddada8f6e5c1d94c8af5365d00 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 28 Oct 2023 22:48:59 +1100 Subject: [PATCH 1/5] turnbull: Initial implementation of EM-ICM algorithm --- src/turnbull.rs | 177 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 132 insertions(+), 45 deletions(-) diff --git a/src/turnbull.rs b/src/turnbull.rs index 9d0b49d..d685ce9 100644 --- a/src/turnbull.rs +++ b/src/turnbull.rs @@ -27,6 +27,7 @@ use prettytable::{Table, format, row}; use rayon::prelude::*; use serde::{Serialize, Deserialize}; +use crate::pava::monotonic_regression_pava; use crate::term::UnconditionalTermLike; #[derive(Args)] @@ -205,7 +206,7 @@ pub fn fit_turnbull(data_times: MatrixXx2, progress_bar: ProgressBar, max_i progress_bar.set_style(ProgressStyle::with_template("[{elapsed_precise}] {bar:40} {msg}").unwrap()); progress_bar.set_length(u64::MAX); progress_bar.reset(); - progress_bar.println("Running iterative algorithm to fit Turnbull estimator"); + progress_bar.println("Running EM-ICM algorithm to fit Turnbull estimator"); let (s, ll) = fit_turnbull_estimator(&mut data, progress_bar.clone(), max_iterations, ll_tolerance, s); @@ -285,24 +286,144 @@ fn get_turnbull_intervals(data_times: &MatrixXx2) -> Vec<(f64, f64)> { return intervals; } -fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, mut s: Vec) -> (Vec, f64) { - // Get likelihood for each observation (denominator of μ_ij) - let mut likelihood_obs = get_likelihood_obs(data, &s); +fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, mut p: Vec) -> (Vec, f64) { + // Get likelihood for each observation + let mut likelihood_obs = get_likelihood_obs(data, &p); let mut ll_model: f64 = likelihood_obs.iter().map(|l| l.ln()).sum(); let mut iteration = 1; loop { - // Compute π_j to update s - let pi = compute_pi(data, &s, likelihood_obs); + // ------- + // EM step - let likelihood_obs_new = get_likelihood_obs(data, &pi); - let ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum(); + // Pre-compute S, the survival probability at the start of each interval + let mut s = Vec::with_capacity(data.num_intervals() + 1); + let mut survival = 1.0; + s.push(1.0); + for p_j in p.iter() { + survival -= p_j; + s.push(survival); + } + + // Update p + let mut p_new = Vec::with_capacity(data.num_intervals()); + for j in 0..data.num_intervals() { + let tmp: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right) + //.map(|(idx_left, idx_right)| 1.0 / p[*idx_left..(*idx_right + 1)].iter().sum::()) + .map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1])) + .sum(); + + p_new.push(p[j] * tmp / (data.num_obs() as f64)); + } + + let likelihood_obs_after_em = get_likelihood_obs(data, &p_new); + let ll_model_after_em: f64 = likelihood_obs_after_em.iter().map(|l| l.ln()).sum(); + + p = p_new; + + // -------- + // ICM step + + // Compute Λ + // S = 1 means Λ = -inf and S = 0 means Λ = inf so skip these + let mut lambda = Vec::with_capacity(data.num_intervals() - 1); + let mut survival: f64 = 1.0; + for j in 0..(data.num_intervals() - 1) { + survival -= p[j]; + lambda.push((-survival.ln()).ln()); + } + + // Compute gradient + let mut gradient = DVector::zeros(data.num_intervals() - 1); + for j in 0..(data.num_intervals() - 1) { + let sum_right: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) + .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / p[*idx_left..(*idx_right + 1)].iter().sum::()) + .sum(); + + let sum_left: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j + 1 == *idx_left) + .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / p[*idx_left..(*idx_right + 1)].iter().sum::()) + .sum(); + + gradient[j] = sum_right - sum_left; + } + + // Compute diagonal of Hessian + let mut hessdiag = DVector::zeros(data.num_intervals() - 1); + for j in 0..(data.num_intervals() - 1) { + let sum_left: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j + 1 == *idx_left) + .map(|(idx_left, idx_right)| { + let denom: f64 = p[*idx_left..(*idx_right + 1)].iter().sum(); + let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; + let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); + -a - b + }) + .sum(); + + let sum_right: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) + .map(|(idx_left, idx_right)| { + let denom: f64 = p[*idx_left..(*idx_right + 1)].iter().sum(); + let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; + let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); + a - b + }) + .sum(); + + hessdiag[j] = sum_left + sum_right; + } + + // Description in Anderson-Bergman (2017) is slightly misleading + // Since we are maximising the likelihood, the second derivatives will be negative + // And we will move in the direction of the gradient + // So there are a few more negative signs here than suggested + + let weights = -hessdiag.clone() / 2.0; + + let mut p_new; + let mut ll_model_new: f64; + + // Take as large a step as possible while the log-likelihood increases + let mut step_size_exponent: i32 = 0; + loop { + let step_size = 0.5_f64.powi(step_size_exponent); + let lambda_target = -gradient.component_div(&hessdiag) * step_size + DVector::from_vec(lambda.clone()); + + let lambda_new = monotonic_regression_pava(lambda_target, weights.clone()); + + // Convert Λ to S to p + p_new = Vec::with_capacity(data.num_intervals()); + + let mut survival = 1.0; + for lambda_j in lambda_new.iter() { + let next_survival = (-lambda_j.exp()).exp(); + p_new.push(survival - next_survival); + survival = next_survival; + } + p_new.push(survival); + + let likelihood_obs_new = get_likelihood_obs(data, &p_new); + ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum(); + + if ll_model_new > ll_model_after_em { + break; + } + + step_size_exponent += 1; + + if step_size_exponent > 10 { + panic!("ICM fails to increase log-likelihood"); + } + } let ll_change = ll_model_new - ll_model; let converged = ll_change <= ll_tolerance; - s = pi; - likelihood_obs = likelihood_obs_new; + p = p_new; + //likelihood_obs = likelihood_obs_new; ll_model = ll_model_new; // Estimate progress bar according to either the order of magnitude of the ll_change relative to tolerance, or iteration/max_iterations @@ -324,7 +445,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma } } - return (s, ll_model); + return (p, ll_model); } fn get_likelihood_obs(data: &TurnbullData, s: &Vec) -> Vec { @@ -334,40 +455,6 @@ fn get_likelihood_obs(data: &TurnbullData, s: &Vec) -> Vec { .collect(); } -fn compute_pi(data: &TurnbullData, s: &Vec, likelihood_obs: Vec) -> Vec { - /* - let mut pi: Vec = vec![0.0; data.num_intervals()]; - for ((idx_left, idx_right), likelihood_obs_i) in data.data_time_interval_indexes.iter().zip(likelihood_obs.iter()) { - for j in *idx_left..(*idx_right + 1) { - pi[j] += s[j] / likelihood_obs_i / data.num_obs() as f64; - } - } - */ - - let pi = data.data_time_interval_indexes.par_iter().zip(likelihood_obs.par_iter()) - .fold_with( - // Compute the contributions to pi[j] for each observation and sum them in parallel using fold_with - vec![0.0; data.num_intervals()], - |mut acc, ((idx_left, idx_right), likelihood_obs_i)| { - // Contributions to pi[j] for the i-th observation - for j in *idx_left..(*idx_right + 1) { - acc[j] += s[j] / likelihood_obs_i / data.num_obs() as f64; - } - acc - } - ) - .reduce( - // Reduce all the sub-sums from fold_with into the total sum - || vec![0.0; data.num_intervals()], - |mut acc, subsum| { - acc.iter_mut().zip(subsum.iter()).for_each(|(x, y)| *x += y); - acc - } - ); - - return pi; -} - fn compute_hessian(data: &TurnbullData, s: &Vec) -> DMatrix { let mut hessian: DMatrix = DMatrix::zeros(data.num_intervals() - 1, data.num_intervals() - 1); From 81b0b3f9b54066502531267996226fa73fdf3adb Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 28 Oct 2023 23:08:03 +1100 Subject: [PATCH 2/5] turnbull: Pre-compute survival probabilities --- src/turnbull.rs | 69 +++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/src/turnbull.rs b/src/turnbull.rs index d685ce9..d5b93f1 100644 --- a/src/turnbull.rs +++ b/src/turnbull.rs @@ -287,8 +287,11 @@ fn get_turnbull_intervals(data_times: &MatrixXx2) -> Vec<(f64, f64)> { } fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, mut p: Vec) -> (Vec, f64) { + // Pre-compute S, the survival probability at the start of each interval + let mut s = p_to_s(&p); + // Get likelihood for each observation - let mut likelihood_obs = get_likelihood_obs(data, &p); + let mut likelihood_obs = get_likelihood_obs(data, &s); let mut ll_model: f64 = likelihood_obs.iter().map(|l| l.ln()).sum(); let mut iteration = 1; @@ -296,55 +299,41 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma // ------- // EM step - // Pre-compute S, the survival probability at the start of each interval - let mut s = Vec::with_capacity(data.num_intervals() + 1); - let mut survival = 1.0; - s.push(1.0); - for p_j in p.iter() { - survival -= p_j; - s.push(survival); - } - // Update p let mut p_new = Vec::with_capacity(data.num_intervals()); for j in 0..data.num_intervals() { let tmp: f64 = data.data_time_interval_indexes.iter() .filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right) - //.map(|(idx_left, idx_right)| 1.0 / p[*idx_left..(*idx_right + 1)].iter().sum::()) .map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1])) .sum(); p_new.push(p[j] * tmp / (data.num_obs() as f64)); } - let likelihood_obs_after_em = get_likelihood_obs(data, &p_new); + let mut s_new = p_to_s(&p_new); + let likelihood_obs_after_em = get_likelihood_obs(data, &s_new); let ll_model_after_em: f64 = likelihood_obs_after_em.iter().map(|l| l.ln()).sum(); p = p_new; + s = s_new; // -------- // ICM step - // Compute Λ - // S = 1 means Λ = -inf and S = 0 means Λ = inf so skip these - let mut lambda = Vec::with_capacity(data.num_intervals() - 1); - let mut survival: f64 = 1.0; - for j in 0..(data.num_intervals() - 1) { - survival -= p[j]; - lambda.push((-survival.ln()).ln()); - } + // Compute Λ, the cumulative hazard + let lambda = s_to_lambda(&s); // Compute gradient let mut gradient = DVector::zeros(data.num_intervals() - 1); for j in 0..(data.num_intervals() - 1) { let sum_right: f64 = data.data_time_interval_indexes.iter() .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) - .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / p[*idx_left..(*idx_right + 1)].iter().sum::()) + .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1])) .sum(); let sum_left: f64 = data.data_time_interval_indexes.iter() .filter(|(idx_left, idx_right)| j + 1 == *idx_left) - .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / p[*idx_left..(*idx_right + 1)].iter().sum::()) + .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1])) .sum(); gradient[j] = sum_right - sum_left; @@ -356,7 +345,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma let sum_left: f64 = data.data_time_interval_indexes.iter() .filter(|(idx_left, idx_right)| j + 1 == *idx_left) .map(|(idx_left, idx_right)| { - let denom: f64 = p[*idx_left..(*idx_right + 1)].iter().sum(); + let denom = s[*idx_left] - s[*idx_right + 1]; let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); -a - b @@ -366,7 +355,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma let sum_right: f64 = data.data_time_interval_indexes.iter() .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) .map(|(idx_left, idx_right)| { - let denom: f64 = p[*idx_left..(*idx_right + 1)].iter().sum(); + let denom = s[*idx_left] - s[*idx_right + 1]; let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); a - b @@ -395,17 +384,21 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma let lambda_new = monotonic_regression_pava(lambda_target, weights.clone()); // Convert Λ to S to p + s_new = Vec::with_capacity(data.num_intervals() + 1); p_new = Vec::with_capacity(data.num_intervals()); let mut survival = 1.0; + s_new.push(1.0); for lambda_j in lambda_new.iter() { let next_survival = (-lambda_j.exp()).exp(); + s_new.push(next_survival); p_new.push(survival - next_survival); survival = next_survival; } + s_new.push(0.0); p_new.push(survival); - let likelihood_obs_new = get_likelihood_obs(data, &p_new); + let likelihood_obs_new = get_likelihood_obs(data, &s_new); ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum(); if ll_model_new > ll_model_after_em { @@ -423,7 +416,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma let converged = ll_change <= ll_tolerance; p = p_new; - //likelihood_obs = likelihood_obs_new; + s = s_new; ll_model = ll_model_new; // Estimate progress bar according to either the order of magnitude of the ll_change relative to tolerance, or iteration/max_iterations @@ -448,11 +441,31 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma return (p, ll_model); } +fn p_to_s(p: &Vec) -> Vec { + let mut s = Vec::with_capacity(p.len() + 1); // Survival probabilities + let mut survival = 1.0; + s.push(1.0); + for p_j in p.iter() { + survival -= p_j; + s.push(survival); + } + return s; +} + +fn s_to_lambda(s: &Vec) -> Vec { + // S = 1 means Λ = -inf and S = 0 means Λ = inf so skip these + let mut lambda = Vec::with_capacity(s.len() - 2); // Cumulative hazard + for s_j in &s[1..(s.len() - 1)] { + lambda.push((-s_j.ln()).ln()); + } + return lambda; +} + fn get_likelihood_obs(data: &TurnbullData, s: &Vec) -> Vec { return data.data_time_interval_indexes .par_iter() - .map(|(idx_left, idx_right)| s[*idx_left..(*idx_right + 1)].iter().sum()) - .collect(); + .map(|(idx_left, idx_right)| s[*idx_left] - s[*idx_right + 1]) + .collect(); // TODO: Return iterator directly } fn compute_hessian(data: &TurnbullData, s: &Vec) -> DMatrix { From 37c904bf34af885e43e7170ec177367445ad4ec1 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 28 Oct 2023 23:16:14 +1100 Subject: [PATCH 3/5] turnbull: Refactor for profiling --- src/turnbull.rs | 223 +++++++++++++++++++++++++----------------------- 1 file changed, 117 insertions(+), 106 deletions(-) diff --git a/src/turnbull.rs b/src/turnbull.rs index d5b93f1..6760727 100644 --- a/src/turnbull.rs +++ b/src/turnbull.rs @@ -291,7 +291,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma let mut s = p_to_s(&p); // Get likelihood for each observation - let mut likelihood_obs = get_likelihood_obs(data, &s); + let likelihood_obs = get_likelihood_obs(data, &s); let mut ll_model: f64 = likelihood_obs.iter().map(|l| l.ln()).sum(); let mut iteration = 1; @@ -299,118 +299,19 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma // ------- // EM step - // Update p - let mut p_new = Vec::with_capacity(data.num_intervals()); - for j in 0..data.num_intervals() { - let tmp: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right) - .map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1])) - .sum(); - - p_new.push(p[j] * tmp / (data.num_obs() as f64)); - } + let p_after_em = do_em_step(data, &p, &s); + let s_after_em = p_to_s(&p_after_em); - let mut s_new = p_to_s(&p_new); - let likelihood_obs_after_em = get_likelihood_obs(data, &s_new); + let likelihood_obs_after_em = get_likelihood_obs(data, &s_after_em); let ll_model_after_em: f64 = likelihood_obs_after_em.iter().map(|l| l.ln()).sum(); - p = p_new; - s = s_new; + p = p_after_em; + s = s_after_em; // -------- // ICM step - // Compute Λ, the cumulative hazard - let lambda = s_to_lambda(&s); - - // Compute gradient - let mut gradient = DVector::zeros(data.num_intervals() - 1); - for j in 0..(data.num_intervals() - 1) { - let sum_right: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) - .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1])) - .sum(); - - let sum_left: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j + 1 == *idx_left) - .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1])) - .sum(); - - gradient[j] = sum_right - sum_left; - } - - // Compute diagonal of Hessian - let mut hessdiag = DVector::zeros(data.num_intervals() - 1); - for j in 0..(data.num_intervals() - 1) { - let sum_left: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j + 1 == *idx_left) - .map(|(idx_left, idx_right)| { - let denom = s[*idx_left] - s[*idx_right + 1]; - let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; - let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); - -a - b - }) - .sum(); - - let sum_right: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) - .map(|(idx_left, idx_right)| { - let denom = s[*idx_left] - s[*idx_right + 1]; - let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; - let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); - a - b - }) - .sum(); - - hessdiag[j] = sum_left + sum_right; - } - - // Description in Anderson-Bergman (2017) is slightly misleading - // Since we are maximising the likelihood, the second derivatives will be negative - // And we will move in the direction of the gradient - // So there are a few more negative signs here than suggested - - let weights = -hessdiag.clone() / 2.0; - - let mut p_new; - let mut ll_model_new: f64; - - // Take as large a step as possible while the log-likelihood increases - let mut step_size_exponent: i32 = 0; - loop { - let step_size = 0.5_f64.powi(step_size_exponent); - let lambda_target = -gradient.component_div(&hessdiag) * step_size + DVector::from_vec(lambda.clone()); - - let lambda_new = monotonic_regression_pava(lambda_target, weights.clone()); - - // Convert Λ to S to p - s_new = Vec::with_capacity(data.num_intervals() + 1); - p_new = Vec::with_capacity(data.num_intervals()); - - let mut survival = 1.0; - s_new.push(1.0); - for lambda_j in lambda_new.iter() { - let next_survival = (-lambda_j.exp()).exp(); - s_new.push(next_survival); - p_new.push(survival - next_survival); - survival = next_survival; - } - s_new.push(0.0); - p_new.push(survival); - - let likelihood_obs_new = get_likelihood_obs(data, &s_new); - ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum(); - - if ll_model_new > ll_model_after_em { - break; - } - - step_size_exponent += 1; - - if step_size_exponent > 10 { - panic!("ICM fails to increase log-likelihood"); - } - } + let (p_new, s_new, ll_model_new) = do_icm_step(data, &p, &s, ll_model_after_em); let ll_change = ll_model_new - ll_model; let converged = ll_change <= ll_tolerance; @@ -468,6 +369,116 @@ fn get_likelihood_obs(data: &TurnbullData, s: &Vec) -> Vec { .collect(); // TODO: Return iterator directly } +fn do_em_step(data: &TurnbullData, p: &Vec, s: &Vec) -> Vec { + // Update p + let mut p_new = Vec::with_capacity(data.num_intervals()); + for j in 0..data.num_intervals() { + let tmp: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right) + .map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1])) + .sum(); + + p_new.push(p[j] * tmp / (data.num_obs() as f64)); + } + + return p_new; +} + +fn do_icm_step(data: &TurnbullData, _p: &Vec, s: &Vec, ll_model: f64) -> (Vec, Vec, f64) { + // Compute Λ, the cumulative hazard + let lambda = s_to_lambda(&s); + + // Compute gradient + let mut gradient = DVector::zeros(data.num_intervals() - 1); + for j in 0..(data.num_intervals() - 1) { + let sum_right: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) + .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1])) + .sum(); + + let sum_left: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j + 1 == *idx_left) + .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1])) + .sum(); + + gradient[j] = sum_right - sum_left; + } + + // Compute diagonal of Hessian + let mut hessdiag = DVector::zeros(data.num_intervals() - 1); + for j in 0..(data.num_intervals() - 1) { + let sum_left: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j + 1 == *idx_left) + .map(|(idx_left, idx_right)| { + let denom = s[*idx_left] - s[*idx_right + 1]; + let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; + let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); + -a - b + }) + .sum(); + + let sum_right: f64 = data.data_time_interval_indexes.iter() + .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) + .map(|(idx_left, idx_right)| { + let denom = s[*idx_left] - s[*idx_right + 1]; + let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; + let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); + a - b + }) + .sum(); + + hessdiag[j] = sum_left + sum_right; + } + + // Description in Anderson-Bergman (2017) is slightly misleading + // Since we are maximising the likelihood, the second derivatives will be negative + // And we will move in the direction of the gradient + // So there are a few more negative signs here than suggested + + let weights = -hessdiag.clone() / 2.0; + + let mut s_new; + let mut p_new; + let mut ll_model_new: f64; + + // Take as large a step as possible while the log-likelihood increases + let mut step_size_exponent: i32 = 0; + loop { + let step_size = 0.5_f64.powi(step_size_exponent); + let lambda_target = -gradient.component_div(&hessdiag) * step_size + DVector::from_vec(lambda.clone()); + + let lambda_new = monotonic_regression_pava(lambda_target, weights.clone()); + + // Convert Λ to S to p + s_new = Vec::with_capacity(data.num_intervals() + 1); + p_new = Vec::with_capacity(data.num_intervals()); + + let mut survival = 1.0; + s_new.push(1.0); + for lambda_j in lambda_new.iter() { + let next_survival = (-lambda_j.exp()).exp(); + s_new.push(next_survival); + p_new.push(survival - next_survival); + survival = next_survival; + } + s_new.push(0.0); + p_new.push(survival); + + let likelihood_obs_new = get_likelihood_obs(data, &s_new); + ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum(); + + if ll_model_new > ll_model { + return (p_new, s_new, ll_model_new); + } + + step_size_exponent += 1; + + if step_size_exponent > 10 { + panic!("ICM fails to increase log-likelihood"); + } + } +} + fn compute_hessian(data: &TurnbullData, s: &Vec) -> DMatrix { let mut hessian: DMatrix = DMatrix::zeros(data.num_intervals() - 1, data.num_intervals() - 1); From 85e3ee0dcdeedd683af85181d3a7d992b1485173 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 28 Oct 2023 23:28:42 +1100 Subject: [PATCH 4/5] turnbull: Improve efficiency of EM step as suggested by Anderson-Bergman --- src/turnbull.rs | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/turnbull.rs b/src/turnbull.rs index 6760727..7e31a02 100644 --- a/src/turnbull.rs +++ b/src/turnbull.rs @@ -370,17 +370,33 @@ fn get_likelihood_obs(data: &TurnbullData, s: &Vec) -> Vec { } fn do_em_step(data: &TurnbullData, p: &Vec, s: &Vec) -> Vec { - // Update p - let mut p_new = Vec::with_capacity(data.num_intervals()); - for j in 0..data.num_intervals() { - let tmp: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right) - .map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1])) - .sum(); + // Compute contributions to m + let mut m_contrib = vec![0.0; data.num_intervals()]; + for (idx_left, idx_right) in data.data_time_interval_indexes.iter() { + let contrib = 1.0 / (s[*idx_left] - s[*idx_right + 1]); - p_new.push(p[j] * tmp / (data.num_obs() as f64)); + // Adds to m for the first interval in the observation + m_contrib[*idx_left] += contrib; + + // Subtracts from m for the first interval beyond the observation + if *idx_right + 1 < data.num_intervals() { + m_contrib[*idx_right + 1] -= contrib; + } } + // Compute m + let mut m = Vec::with_capacity(data.num_intervals()); + let mut m_last = 0.0; + for m_contrib_j in m_contrib { + let m_next = m_last + m_contrib_j / (data.num_obs() as f64); + m.push(m_next); + m_last = m_next; + } + + // Update p + // p := p * m + let p_new = p.par_iter().zip(m.into_par_iter()).map(|(p_j, m_j)| p_j * m_j).collect(); + return p_new; } From e726fad99b16db77a9ba0f5465511b38143b9449 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 28 Oct 2023 23:47:00 +1100 Subject: [PATCH 5/5] turnbull: Improve efficiency of ICM step as suggested by Anderson-Bergman --- src/turnbull.rs | 70 +++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/src/turnbull.rs b/src/turnbull.rs index 7e31a02..fd67669 100644 --- a/src/turnbull.rs +++ b/src/turnbull.rs @@ -402,48 +402,37 @@ fn do_em_step(data: &TurnbullData, p: &Vec, s: &Vec) -> Vec { fn do_icm_step(data: &TurnbullData, _p: &Vec, s: &Vec, ll_model: f64) -> (Vec, Vec, f64) { // Compute Λ, the cumulative hazard + // Since Λ = -inf when survival is 1, and Λ = inf when survival is 0, these are omitted + // The entry at lambda[j] corresponds to the survival immediately before time point j + 1 let lambda = s_to_lambda(&s); - // Compute gradient - let mut gradient = DVector::zeros(data.num_intervals() - 1); - for j in 0..(data.num_intervals() - 1) { - let sum_right: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) - .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1])) - .sum(); + // Compute gradient and diagonal of Hessian + let mut gradient = vec![0.0; data.num_intervals() - 1]; + let mut hessdiag = vec![0.0; data.num_intervals() - 1]; + for (idx_left, idx_right) in data.data_time_interval_indexes.iter() { + let denom = s[*idx_left] - s[*idx_right + 1]; - let sum_left: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j + 1 == *idx_left) - .map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1])) - .sum(); + // Add to gradient[j] when j + 1 == idx_right + 1 + // Add to hessdiag[j] when j + 1 == idx_right + 1 + if *idx_right < gradient.len() { + let j = *idx_right; + gradient[j] += (-lambda[j].exp() + lambda[j]).exp() / denom; + + let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; + let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); + hessdiag[j] += a - b; + } - gradient[j] = sum_right - sum_left; - } - - // Compute diagonal of Hessian - let mut hessdiag = DVector::zeros(data.num_intervals() - 1); - for j in 0..(data.num_intervals() - 1) { - let sum_left: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j + 1 == *idx_left) - .map(|(idx_left, idx_right)| { - let denom = s[*idx_left] - s[*idx_right + 1]; - let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; - let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); - -a - b - }) - .sum(); - - let sum_right: f64 = data.data_time_interval_indexes.iter() - .filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1) - .map(|(idx_left, idx_right)| { - let denom = s[*idx_left] - s[*idx_right + 1]; - let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; - let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); - a - b - }) - .sum(); - - hessdiag[j] = sum_left + sum_right; + // Subtract from gradient[j] when j + 1 == idx_left + // Add to hessdiag[j] when j + 1 == idx_left + if *idx_left > 0 { + let j = *idx_left - 1; + gradient[j] -= (-lambda[j].exp() + lambda[j]).exp() / denom; + + let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom; + let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2); + hessdiag[j] += -a - b; + } } // Description in Anderson-Bergman (2017) is slightly misleading @@ -451,7 +440,8 @@ fn do_icm_step(data: &TurnbullData, _p: &Vec, s: &Vec, ll_model: f64) // And we will move in the direction of the gradient // So there are a few more negative signs here than suggested - let weights = -hessdiag.clone() / 2.0; + let weights = -DVector::from_vec(hessdiag.clone()) / 2.0; + let gradient_over_hessdiag = DVector::from_vec(gradient.par_iter().zip(hessdiag.par_iter()).map(|(g, h)| g / h).collect()); let mut s_new; let mut p_new; @@ -461,7 +451,7 @@ fn do_icm_step(data: &TurnbullData, _p: &Vec, s: &Vec, ll_model: f64) let mut step_size_exponent: i32 = 0; loop { let step_size = 0.5_f64.powi(step_size_exponent); - let lambda_target = -gradient.component_div(&hessdiag) * step_size + DVector::from_vec(lambda.clone()); + let lambda_target = -gradient_over_hessdiag.clone() * step_size + DVector::from_vec(lambda.clone()); let lambda_new = monotonic_regression_pava(lambda_target, weights.clone());