From 250cfd879852c4ddada8f6e5c1d94c8af5365d00 Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Sat, 28 Oct 2023 22:48:59 +1100
Subject: [PATCH 1/5] turnbull: Initial implementation of EM-ICM algorithm

---
 src/turnbull.rs | 177 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 132 insertions(+), 45 deletions(-)
diff --git a/src/turnbull.rs b/src/turnbull.rs
index 9d0b49d..d685ce9 100644
--- a/src/turnbull.rs
+++ b/src/turnbull.rs
@@ -27,6 +27,7 @@ use prettytable::{Table, format, row};
 use rayon::prelude::*;
 use serde::{Serialize, Deserialize};
 
+use crate::pava::monotonic_regression_pava;
 use crate::term::UnconditionalTermLike;
 
 #[derive(Args)]
@@ -205,7 +206,7 @@ pub fn fit_turnbull(data_times: MatrixXx2<f64>, progress_bar: ProgressBar, max_i
 	progress_bar.set_style(ProgressStyle::with_template("[{elapsed_precise}] {bar:40} {msg}").unwrap());
 	progress_bar.set_length(u64::MAX);
 	progress_bar.reset();
-	progress_bar.println("Running iterative algorithm to fit Turnbull estimator");
+	progress_bar.println("Running EM-ICM algorithm to fit Turnbull estimator");
 	
 	let (s, ll) = fit_turnbull_estimator(&mut data, progress_bar.clone(), max_iterations, ll_tolerance, s);
 	
@@ -285,24 +286,144 @@ fn get_turnbull_intervals(data_times: &MatrixXx2<f64>) -> Vec<(f64, f64)> {
 	return intervals;
 }
 
-fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, mut s: Vec<f64>) -> (Vec<f64>, f64) {
-	// Get likelihood for each observation (denominator of μ_ij)
-	let mut likelihood_obs = get_likelihood_obs(data, &s);
+fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, mut p: Vec<f64>) -> (Vec<f64>, f64) {
+	// Get likelihood for each observation
+	let mut likelihood_obs = get_likelihood_obs(data, &p);
 	let mut ll_model: f64 = likelihood_obs.iter().map(|l| l.ln()).sum();
 	
 	let mut iteration = 1;
 	loop {
-		// Compute π_j to update s
-		let pi = compute_pi(data, &s, likelihood_obs);
+		// -------
+		// EM step
 		
-		let likelihood_obs_new = get_likelihood_obs(data, &pi);
-		let ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum();
+		// Pre-compute S, the survival probability at the start of each interval
+		let mut s = Vec::with_capacity(data.num_intervals() + 1);
+		let mut survival = 1.0;
+		s.push(1.0);
+		for p_j in p.iter() {
+			survival -= p_j;
+			s.push(survival);
+		}
+		
+		// Update p
+		let mut p_new = Vec::with_capacity(data.num_intervals());
+		for j in 0..data.num_intervals() {
+			let tmp: f64 = data.data_time_interval_indexes.iter()
+				.filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right)
+				//.map(|(idx_left, idx_right)| 1.0 / p[*idx_left..(*idx_right + 1)].iter().sum::<f64>())
+				.map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1]))
+				.sum();
+			
+			p_new.push(p[j] * tmp / (data.num_obs() as f64));
+		}
+		
+		let likelihood_obs_after_em = get_likelihood_obs(data, &p_new);
+		let ll_model_after_em: f64 = likelihood_obs_after_em.iter().map(|l| l.ln()).sum();
+		
+		p = p_new;
+		
+		// --------
+		// ICM step
+		
+		// Compute Λ
+		// S = 1 means Λ = -inf and S = 0 means Λ = inf so skip these
+		let mut lambda = Vec::with_capacity(data.num_intervals() - 1);
+		let mut survival: f64 = 1.0;
+		for j in 0..(data.num_intervals() - 1) {
+			survival -= p[j];
+			lambda.push((-survival.ln()).ln());
+		}
+		
+		// Compute gradient
+		let mut gradient = DVector::zeros(data.num_intervals() - 1);
+		for j in 0..(data.num_intervals() - 1) {
+			let sum_right: f64 = data.data_time_interval_indexes.iter()
+				.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
+				.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / p[*idx_left..(*idx_right + 1)].iter().sum::<f64>())
+				.sum();
+			
+			let sum_left: f64 = data.data_time_interval_indexes.iter()
+				.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
+				.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / p[*idx_left..(*idx_right + 1)].iter().sum::<f64>())
+				.sum();
+			
+			gradient[j] = sum_right - sum_left;
+		}
+		
+		// Compute diagonal of Hessian
+		let mut hessdiag = DVector::zeros(data.num_intervals() - 1);
+		for j in 0..(data.num_intervals() - 1) {
+			let sum_left: f64 = data.data_time_interval_indexes.iter()
+				.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
+				.map(|(idx_left, idx_right)| {
+					let denom: f64 = p[*idx_left..(*idx_right + 1)].iter().sum();
+					let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
+					let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
+					-a - b
+				})
+				.sum();
+			
+			let sum_right: f64 = data.data_time_interval_indexes.iter()
+				.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
+				.map(|(idx_left, idx_right)| {
+					let denom: f64 = p[*idx_left..(*idx_right + 1)].iter().sum();
+					let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
+					let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
+					a - b
+				})
+				.sum();
+			
+			hessdiag[j] = sum_left + sum_right;
+		}
+		
+		// Description in Anderson-Bergman (2017) is slightly misleading
+		// Since we are maximising the likelihood, the second derivatives will be negative
+		// And we will move in the direction of the gradient
+		// So there are a few more negative signs here than suggested
+		
+		let weights = -hessdiag.clone() / 2.0;
+		
+		let mut p_new;
+		let mut ll_model_new: f64;
+		
+		// Take as large a step as possible while the log-likelihood increases
+		let mut step_size_exponent: i32 = 0;
+		loop {
+			let step_size = 0.5_f64.powi(step_size_exponent);
+			let lambda_target = -gradient.component_div(&hessdiag) * step_size + DVector::from_vec(lambda.clone());
+			
+			let lambda_new = monotonic_regression_pava(lambda_target, weights.clone());
+			
+			// Convert Λ to S to p
+			p_new = Vec::with_capacity(data.num_intervals());
+			
+			let mut survival = 1.0;
+			for lambda_j in lambda_new.iter() {
+				let next_survival = (-lambda_j.exp()).exp();
+				p_new.push(survival - next_survival);
+				survival = next_survival;
+			}
+			p_new.push(survival);
+			
+			let likelihood_obs_new = get_likelihood_obs(data, &p_new);
+			ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum();
+			
+			if ll_model_new > ll_model_after_em {
+				break;
+			}
+			
+			step_size_exponent += 1;
+			
+			if step_size_exponent > 10 {
+				panic!("ICM fails to increase log-likelihood");
+			}
+		}
 		
 		let ll_change = ll_model_new - ll_model;
 		let converged = ll_change <= ll_tolerance;
 		
-		s = pi;
-		likelihood_obs = likelihood_obs_new;
+		p = p_new;
+		//likelihood_obs = likelihood_obs_new;
 		ll_model = ll_model_new;
 		
 		// Estimate progress bar according to either the order of magnitude of the ll_change relative to tolerance, or iteration/max_iterations
@@ -324,7 +445,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 		}
 	}
 	
-	return (s, ll_model);
+	return (p, ll_model);
 }
 
 fn get_likelihood_obs(data: &TurnbullData, s: &Vec<f64>) -> Vec<f64> {
@@ -334,40 +455,6 @@ fn get_likelihood_obs(data: &TurnbullData, s: &Vec<f64>) -> Vec<f64> {
 		.collect();
 }
 
-fn compute_pi(data: &TurnbullData, s: &Vec<f64>, likelihood_obs: Vec<f64>) -> Vec<f64> {
-	/*
-	let mut pi: Vec<f64> = vec![0.0; data.num_intervals()];
-	for ((idx_left, idx_right), likelihood_obs_i) in data.data_time_interval_indexes.iter().zip(likelihood_obs.iter()) {
-		for j in *idx_left..(*idx_right + 1) {
-			pi[j] += s[j] / likelihood_obs_i / data.num_obs() as f64;
-		}
-	}
-	*/
-	
-	let pi = data.data_time_interval_indexes.par_iter().zip(likelihood_obs.par_iter())
-		.fold_with(
-			// Compute the contributions to pi[j] for each observation and sum them in parallel using fold_with
-			vec![0.0; data.num_intervals()],
-			|mut acc, ((idx_left, idx_right), likelihood_obs_i)| {
-				// Contributions to pi[j] for the i-th observation
-				for j in *idx_left..(*idx_right + 1) {
-					acc[j] += s[j] / likelihood_obs_i / data.num_obs() as f64;
-				}
-				acc
-			}
-		)
-		.reduce(
-			// Reduce all the sub-sums from fold_with into the total sum
-			|| vec![0.0; data.num_intervals()],
-			|mut acc, subsum| {
-				acc.iter_mut().zip(subsum.iter()).for_each(|(x, y)| *x += y);
-				acc
-			}
-		);
-	
-	return pi;
-}
-
 fn compute_hessian(data: &TurnbullData, s: &Vec<f64>) -> DMatrix<f64> {
 	let mut hessian: DMatrix<f64> = DMatrix::zeros(data.num_intervals() - 1, data.num_intervals() - 1);
 	

From 81b0b3f9b54066502531267996226fa73fdf3adb Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Sat, 28 Oct 2023 23:08:03 +1100
Subject: [PATCH 2/5] turnbull: Pre-compute survival probabilities

---
 src/turnbull.rs | 69 +++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/src/turnbull.rs b/src/turnbull.rs
index d685ce9..d5b93f1 100644
--- a/src/turnbull.rs
+++ b/src/turnbull.rs
@@ -287,8 +287,11 @@ fn get_turnbull_intervals(data_times: &MatrixXx2<f64>) -> Vec<(f64, f64)> {
 }
 
 fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, max_iterations: u32, ll_tolerance: f64, mut p: Vec<f64>) -> (Vec<f64>, f64) {
+	// Pre-compute S, the survival probability at the start of each interval
+	let mut s = p_to_s(&p);
+	
 	// Get likelihood for each observation
-	let mut likelihood_obs = get_likelihood_obs(data, &p);
+	let mut likelihood_obs = get_likelihood_obs(data, &s);
 	let mut ll_model: f64 = likelihood_obs.iter().map(|l| l.ln()).sum();
 	
 	let mut iteration = 1;
@@ -296,55 +299,41 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 		// -------
 		// EM step
 		
-		// Pre-compute S, the survival probability at the start of each interval
-		let mut s = Vec::with_capacity(data.num_intervals() + 1);
-		let mut survival = 1.0;
-		s.push(1.0);
-		for p_j in p.iter() {
-			survival -= p_j;
-			s.push(survival);
-		}
-		
 		// Update p
 		let mut p_new = Vec::with_capacity(data.num_intervals());
 		for j in 0..data.num_intervals() {
 			let tmp: f64 = data.data_time_interval_indexes.iter()
 				.filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right)
-				//.map(|(idx_left, idx_right)| 1.0 / p[*idx_left..(*idx_right + 1)].iter().sum::<f64>())
 				.map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1]))
 				.sum();
 			
 			p_new.push(p[j] * tmp / (data.num_obs() as f64));
 		}
 		
-		let likelihood_obs_after_em = get_likelihood_obs(data, &p_new);
+		let mut s_new = p_to_s(&p_new);
+		let likelihood_obs_after_em = get_likelihood_obs(data, &s_new);
 		let ll_model_after_em: f64 = likelihood_obs_after_em.iter().map(|l| l.ln()).sum();
 		
 		p = p_new;
+		s = s_new;
 		
 		// --------
 		// ICM step
 		
-		// Compute Λ
-		// S = 1 means Λ = -inf and S = 0 means Λ = inf so skip these
-		let mut lambda = Vec::with_capacity(data.num_intervals() - 1);
-		let mut survival: f64 = 1.0;
-		for j in 0..(data.num_intervals() - 1) {
-			survival -= p[j];
-			lambda.push((-survival.ln()).ln());
-		}
+		// Compute Λ, the cumulative hazard
+		let lambda = s_to_lambda(&s);
 		
 		// Compute gradient
 		let mut gradient = DVector::zeros(data.num_intervals() - 1);
 		for j in 0..(data.num_intervals() - 1) {
 			let sum_right: f64 = data.data_time_interval_indexes.iter()
 				.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
-				.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / p[*idx_left..(*idx_right + 1)].iter().sum::<f64>())
+				.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1]))
 				.sum();
 			
 			let sum_left: f64 = data.data_time_interval_indexes.iter()
 				.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
-				.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / p[*idx_left..(*idx_right + 1)].iter().sum::<f64>())
+				.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1]))
 				.sum();
 			
 			gradient[j] = sum_right - sum_left;
@@ -356,7 +345,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 			let sum_left: f64 = data.data_time_interval_indexes.iter()
 				.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
 				.map(|(idx_left, idx_right)| {
-					let denom: f64 = p[*idx_left..(*idx_right + 1)].iter().sum();
+					let denom = s[*idx_left] - s[*idx_right + 1];
 					let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
 					let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
 					-a - b
@@ -366,7 +355,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 			let sum_right: f64 = data.data_time_interval_indexes.iter()
 				.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
 				.map(|(idx_left, idx_right)| {
-					let denom: f64 = p[*idx_left..(*idx_right + 1)].iter().sum();
+					let denom = s[*idx_left] - s[*idx_right + 1];
 					let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
 					let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
 					a - b
@@ -395,17 +384,21 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 			let lambda_new = monotonic_regression_pava(lambda_target, weights.clone());
 			
 			// Convert Λ to S to p
+			s_new = Vec::with_capacity(data.num_intervals() + 1);
 			p_new = Vec::with_capacity(data.num_intervals());
 			
 			let mut survival = 1.0;
+			s_new.push(1.0);
 			for lambda_j in lambda_new.iter() {
 				let next_survival = (-lambda_j.exp()).exp();
+				s_new.push(next_survival);
 				p_new.push(survival - next_survival);
 				survival = next_survival;
 			}
+			s_new.push(0.0);
 			p_new.push(survival);
 			
-			let likelihood_obs_new = get_likelihood_obs(data, &p_new);
+			let likelihood_obs_new = get_likelihood_obs(data, &s_new);
 			ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum();
 			
 			if ll_model_new > ll_model_after_em {
@@ -423,7 +416,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 		let converged = ll_change <= ll_tolerance;
 		
 		p = p_new;
-		//likelihood_obs = likelihood_obs_new;
+		s = s_new;
 		ll_model = ll_model_new;
 		
 		// Estimate progress bar according to either the order of magnitude of the ll_change relative to tolerance, or iteration/max_iterations
@@ -448,11 +441,31 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 	return (p, ll_model);
 }
 
+fn p_to_s(p: &Vec<f64>) -> Vec<f64> {
+	let mut s = Vec::with_capacity(p.len() + 1);  // Survival probabilities
+	let mut survival = 1.0;
+	s.push(1.0);
+	for p_j in p.iter() {
+		survival -= p_j;
+		s.push(survival);
+	}
+	return s;
+}
+
+fn s_to_lambda(s: &Vec<f64>) -> Vec<f64> {
+	// S = 1 means Λ = -inf and S = 0 means Λ = inf so skip these
+	let mut lambda = Vec::with_capacity(s.len() - 2);  // Cumulative hazard
+	for s_j in &s[1..(s.len() - 1)] {
+		lambda.push((-s_j.ln()).ln());
+	}
+	return lambda;
+}
+
 fn get_likelihood_obs(data: &TurnbullData, s: &Vec<f64>) -> Vec<f64> {
 	return data.data_time_interval_indexes
 		.par_iter()
-		.map(|(idx_left, idx_right)| s[*idx_left..(*idx_right + 1)].iter().sum())
-		.collect();
+		.map(|(idx_left, idx_right)| s[*idx_left] - s[*idx_right + 1])
+		.collect();  // TODO: Return iterator directly
 }
 
 fn compute_hessian(data: &TurnbullData, s: &Vec<f64>) -> DMatrix<f64> {

From 37c904bf34af885e43e7170ec177367445ad4ec1 Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Sat, 28 Oct 2023 23:16:14 +1100
Subject: [PATCH 3/5] turnbull: Refactor for profiling

---
 src/turnbull.rs | 223 +++++++++++++++++++++++++-----------------------
 1 file changed, 117 insertions(+), 106 deletions(-)

diff --git a/src/turnbull.rs b/src/turnbull.rs
index d5b93f1..6760727 100644
--- a/src/turnbull.rs
+++ b/src/turnbull.rs
@@ -291,7 +291,7 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 	let mut s = p_to_s(&p);
 	
 	// Get likelihood for each observation
-	let mut likelihood_obs = get_likelihood_obs(data, &s);
+	let likelihood_obs = get_likelihood_obs(data, &s);
 	let mut ll_model: f64 = likelihood_obs.iter().map(|l| l.ln()).sum();
 	
 	let mut iteration = 1;
@@ -299,118 +299,19 @@ fn fit_turnbull_estimator(data: &mut TurnbullData, progress_bar: ProgressBar, ma
 		// -------
 		// EM step
 		
-		// Update p
-		let mut p_new = Vec::with_capacity(data.num_intervals());
-		for j in 0..data.num_intervals() {
-			let tmp: f64 = data.data_time_interval_indexes.iter()
-				.filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right)
-				.map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1]))
-				.sum();
-			
-			p_new.push(p[j] * tmp / (data.num_obs() as f64));
-		}
+		let p_after_em = do_em_step(data, &p, &s);
+		let s_after_em = p_to_s(&p_after_em);
 		
-		let mut s_new = p_to_s(&p_new);
-		let likelihood_obs_after_em = get_likelihood_obs(data, &s_new);
+		let likelihood_obs_after_em = get_likelihood_obs(data, &s_after_em);
 		let ll_model_after_em: f64 = likelihood_obs_after_em.iter().map(|l| l.ln()).sum();
 		
-		p = p_new;
-		s = s_new;
+		p = p_after_em;
+		s = s_after_em;
 		
 		// --------
 		// ICM step
 		
-		// Compute Λ, the cumulative hazard
-		let lambda = s_to_lambda(&s);
-		
-		// Compute gradient
-		let mut gradient = DVector::zeros(data.num_intervals() - 1);
-		for j in 0..(data.num_intervals() - 1) {
-			let sum_right: f64 = data.data_time_interval_indexes.iter()
-				.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
-				.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1]))
-				.sum();
-			
-			let sum_left: f64 = data.data_time_interval_indexes.iter()
-				.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
-				.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1]))
-				.sum();
-			
-			gradient[j] = sum_right - sum_left;
-		}
-		
-		// Compute diagonal of Hessian
-		let mut hessdiag = DVector::zeros(data.num_intervals() - 1);
-		for j in 0..(data.num_intervals() - 1) {
-			let sum_left: f64 = data.data_time_interval_indexes.iter()
-				.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
-				.map(|(idx_left, idx_right)| {
-					let denom = s[*idx_left] - s[*idx_right + 1];
-					let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
-					let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
-					-a - b
-				})
-				.sum();
-			
-			let sum_right: f64 = data.data_time_interval_indexes.iter()
-				.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
-				.map(|(idx_left, idx_right)| {
-					let denom = s[*idx_left] - s[*idx_right + 1];
-					let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
-					let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
-					a - b
-				})
-				.sum();
-			
-			hessdiag[j] = sum_left + sum_right;
-		}
-		
-		// Description in Anderson-Bergman (2017) is slightly misleading
-		// Since we are maximising the likelihood, the second derivatives will be negative
-		// And we will move in the direction of the gradient
-		// So there are a few more negative signs here than suggested
-		
-		let weights = -hessdiag.clone() / 2.0;
-		
-		let mut p_new;
-		let mut ll_model_new: f64;
-		
-		// Take as large a step as possible while the log-likelihood increases
-		let mut step_size_exponent: i32 = 0;
-		loop {
-			let step_size = 0.5_f64.powi(step_size_exponent);
-			let lambda_target = -gradient.component_div(&hessdiag) * step_size + DVector::from_vec(lambda.clone());
-			
-			let lambda_new = monotonic_regression_pava(lambda_target, weights.clone());
-			
-			// Convert Λ to S to p
-			s_new = Vec::with_capacity(data.num_intervals() + 1);
-			p_new = Vec::with_capacity(data.num_intervals());
-			
-			let mut survival = 1.0;
-			s_new.push(1.0);
-			for lambda_j in lambda_new.iter() {
-				let next_survival = (-lambda_j.exp()).exp();
-				s_new.push(next_survival);
-				p_new.push(survival - next_survival);
-				survival = next_survival;
-			}
-			s_new.push(0.0);
-			p_new.push(survival);
-			
-			let likelihood_obs_new = get_likelihood_obs(data, &s_new);
-			ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum();
-			
-			if ll_model_new > ll_model_after_em {
-				break;
-			}
-			
-			step_size_exponent += 1;
-			
-			if step_size_exponent > 10 {
-				panic!("ICM fails to increase log-likelihood");
-			}
-		}
+		let (p_new, s_new, ll_model_new) = do_icm_step(data, &p, &s, ll_model_after_em);
 		
 		let ll_change = ll_model_new - ll_model;
 		let converged = ll_change <= ll_tolerance;
@@ -468,6 +369,116 @@ fn get_likelihood_obs(data: &TurnbullData, s: &Vec<f64>) -> Vec<f64> {
 		.collect();  // TODO: Return iterator directly
 }
 
+fn do_em_step(data: &TurnbullData, p: &Vec<f64>, s: &Vec<f64>) -> Vec<f64> {
+	// Update p
+	let mut p_new = Vec::with_capacity(data.num_intervals());
+	for j in 0..data.num_intervals() {
+		let tmp: f64 = data.data_time_interval_indexes.iter()
+			.filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right)
+			.map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1]))
+			.sum();
+		
+		p_new.push(p[j] * tmp / (data.num_obs() as f64));
+	}
+	
+	return p_new;
+}
+
+fn do_icm_step(data: &TurnbullData, _p: &Vec<f64>, s: &Vec<f64>, ll_model: f64) -> (Vec<f64>, Vec<f64>, f64) {
+	// Compute Λ, the cumulative hazard
+	let lambda = s_to_lambda(&s);
+	
+	// Compute gradient
+	let mut gradient = DVector::zeros(data.num_intervals() - 1);
+	for j in 0..(data.num_intervals() - 1) {
+		let sum_right: f64 = data.data_time_interval_indexes.iter()
+			.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
+			.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1]))
+			.sum();
+		
+		let sum_left: f64 = data.data_time_interval_indexes.iter()
+			.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
+			.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1]))
+			.sum();
+		
+		gradient[j] = sum_right - sum_left;
+	}
+	
+	// Compute diagonal of Hessian
+	let mut hessdiag = DVector::zeros(data.num_intervals() - 1);
+	for j in 0..(data.num_intervals() - 1) {
+		let sum_left: f64 = data.data_time_interval_indexes.iter()
+			.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
+			.map(|(idx_left, idx_right)| {
+				let denom = s[*idx_left] - s[*idx_right + 1];
+				let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
+				let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
+				-a - b
+			})
+			.sum();
+		
+		let sum_right: f64 = data.data_time_interval_indexes.iter()
+			.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
+			.map(|(idx_left, idx_right)| {
+				let denom = s[*idx_left] - s[*idx_right + 1];
+				let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
+				let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
+				a - b
+			})
+			.sum();
+		
+		hessdiag[j] = sum_left + sum_right;
+	}
+	
+	// Description in Anderson-Bergman (2017) is slightly misleading
+	// Since we are maximising the likelihood, the second derivatives will be negative
+	// And we will move in the direction of the gradient
+	// So there are a few more negative signs here than suggested
+	
+	let weights = -hessdiag.clone() / 2.0;
+	
+	let mut s_new;
+	let mut p_new;
+	let mut ll_model_new: f64;
+	
+	// Take as large a step as possible while the log-likelihood increases
+	let mut step_size_exponent: i32 = 0;
+	loop {
+		let step_size = 0.5_f64.powi(step_size_exponent);
+		let lambda_target = -gradient.component_div(&hessdiag) * step_size + DVector::from_vec(lambda.clone());
+		
+		let lambda_new = monotonic_regression_pava(lambda_target, weights.clone());
+		
+		// Convert Λ to S to p
+		s_new = Vec::with_capacity(data.num_intervals() + 1);
+		p_new = Vec::with_capacity(data.num_intervals());
+		
+		let mut survival = 1.0;
+		s_new.push(1.0);
+		for lambda_j in lambda_new.iter() {
+			let next_survival = (-lambda_j.exp()).exp();
+			s_new.push(next_survival);
+			p_new.push(survival - next_survival);
+			survival = next_survival;
+		}
+		s_new.push(0.0);
+		p_new.push(survival);
+		
+		let likelihood_obs_new = get_likelihood_obs(data, &s_new);
+		ll_model_new = likelihood_obs_new.iter().map(|l| l.ln()).sum();
+		
+		if ll_model_new > ll_model {
+			return (p_new, s_new, ll_model_new);
+		}
+		
+		step_size_exponent += 1;
+		
+		if step_size_exponent > 10 {
+			panic!("ICM fails to increase log-likelihood");
+		}
+	}
+}
+
 fn compute_hessian(data: &TurnbullData, s: &Vec<f64>) -> DMatrix<f64> {
 	let mut hessian: DMatrix<f64> = DMatrix::zeros(data.num_intervals() - 1, data.num_intervals() - 1);
 	

From 85e3ee0dcdeedd683af85181d3a7d992b1485173 Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Sat, 28 Oct 2023 23:28:42 +1100
Subject: [PATCH 4/5] turnbull: Improve efficiency of EM step as suggested by
 Anderson-Bergman

---
 src/turnbull.rs | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/turnbull.rs b/src/turnbull.rs
index 6760727..7e31a02 100644
--- a/src/turnbull.rs
+++ b/src/turnbull.rs
@@ -370,17 +370,33 @@ fn get_likelihood_obs(data: &TurnbullData, s: &Vec<f64>) -> Vec<f64> {
 }
 
 fn do_em_step(data: &TurnbullData, p: &Vec<f64>, s: &Vec<f64>) -> Vec<f64> {
-	// Update p
-	let mut p_new = Vec::with_capacity(data.num_intervals());
-	for j in 0..data.num_intervals() {
-		let tmp: f64 = data.data_time_interval_indexes.iter()
-			.filter(|(idx_left, idx_right)| j >= *idx_left && j <= *idx_right)
-			.map(|(idx_left, idx_right)| 1.0 / (s[*idx_left] - s[*idx_right + 1]))
-			.sum();
+	// Compute contributions to m
+	let mut m_contrib = vec![0.0; data.num_intervals()];
+	for (idx_left, idx_right) in data.data_time_interval_indexes.iter() {
+		let contrib = 1.0 / (s[*idx_left] - s[*idx_right + 1]);
 		
-		p_new.push(p[j] * tmp / (data.num_obs() as f64));
+		// Adds to m for the first interval in the observation
+		m_contrib[*idx_left] += contrib;
+		
+		// Subtracts from m for the first interval beyond the observation
+		if *idx_right + 1 < data.num_intervals() {
+			m_contrib[*idx_right + 1] -= contrib;
+		}
 	}
 	
+	// Compute m
+	let mut m = Vec::with_capacity(data.num_intervals());
+	let mut m_last = 0.0;
+	for m_contrib_j in m_contrib {
+		let m_next = m_last + m_contrib_j / (data.num_obs() as f64);
+		m.push(m_next);
+		m_last = m_next;
+	}
+	
+	// Update p
+	// p := p * m
+	let p_new = p.par_iter().zip(m.into_par_iter()).map(|(p_j, m_j)| p_j * m_j).collect();
+	
 	return p_new;
 }
 

From e726fad99b16db77a9ba0f5465511b38143b9449 Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Sat, 28 Oct 2023 23:47:00 +1100
Subject: [PATCH 5/5] turnbull: Improve efficiency of ICM step as suggested by
 Anderson-Bergman

---
 src/turnbull.rs | 70 +++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 40 deletions(-)

diff --git a/src/turnbull.rs b/src/turnbull.rs
index 7e31a02..fd67669 100644
--- a/src/turnbull.rs
+++ b/src/turnbull.rs
@@ -402,48 +402,37 @@ fn do_em_step(data: &TurnbullData, p: &Vec<f64>, s: &Vec<f64>) -> Vec<f64> {
 
 fn do_icm_step(data: &TurnbullData, _p: &Vec<f64>, s: &Vec<f64>, ll_model: f64) -> (Vec<f64>, Vec<f64>, f64) {
 	// Compute Λ, the cumulative hazard
+	// Since Λ = -inf when survival is 1, and Λ = inf when survival is 0, these are omitted
+	// The entry at lambda[j] corresponds to the survival immediately before time point j + 1
 	let lambda = s_to_lambda(&s);
 	
-	// Compute gradient
-	let mut gradient = DVector::zeros(data.num_intervals() - 1);
-	for j in 0..(data.num_intervals() - 1) {
-		let sum_right: f64 = data.data_time_interval_indexes.iter()
-			.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
-			.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1]))
-			.sum();
+	// Compute gradient and diagonal of Hessian
+	let mut gradient = vec![0.0; data.num_intervals() - 1];
+	let mut hessdiag = vec![0.0; data.num_intervals() - 1];
+	for (idx_left, idx_right) in data.data_time_interval_indexes.iter() {
+		let denom = s[*idx_left] - s[*idx_right + 1];
 		
-		let sum_left: f64 = data.data_time_interval_indexes.iter()
-			.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
-			.map(|(idx_left, idx_right)| (-lambda[j].exp() + lambda[j]).exp() / (s[*idx_left] - s[*idx_right + 1]))
-			.sum();
+		// Add to gradient[j] when j + 1 == idx_right + 1
+		// Add to hessdiag[j] when j + 1 == idx_right + 1
+		if *idx_right < gradient.len() {
+			let j = *idx_right;
+			gradient[j] += (-lambda[j].exp() + lambda[j]).exp() / denom;
+			
+			let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
+			let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
+			hessdiag[j] += a - b;
+		}
 		
-		gradient[j] = sum_right - sum_left;
-	}
-	
-	// Compute diagonal of Hessian
-	let mut hessdiag = DVector::zeros(data.num_intervals() - 1);
-	for j in 0..(data.num_intervals() - 1) {
-		let sum_left: f64 = data.data_time_interval_indexes.iter()
-			.filter(|(idx_left, idx_right)| j + 1 == *idx_left)
-			.map(|(idx_left, idx_right)| {
-				let denom = s[*idx_left] - s[*idx_right + 1];
-				let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
-				let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
-				-a - b
-			})
-			.sum();
-		
-		let sum_right: f64 = data.data_time_interval_indexes.iter()
-			.filter(|(idx_left, idx_right)| j + 1 == *idx_right + 1)
-			.map(|(idx_left, idx_right)| {
-				let denom = s[*idx_left] - s[*idx_right + 1];
-				let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
-				let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
-				a - b
-			})
-			.sum();
-		
-		hessdiag[j] = sum_left + sum_right;
+		// Subtract from gradient[j] when j + 1 == idx_left
+		// Add to hessdiag[j] when j + 1 == idx_left
+		if *idx_left > 0 {
+			let j = *idx_left - 1;
+			gradient[j] -= (-lambda[j].exp() + lambda[j]).exp() / denom;
+			
+			let a = ((lambda[j] - lambda[j].exp()).exp() * (1.0 - lambda[j].exp())) / denom;
+			let b = (2.0 * lambda[j] - 2.0 * lambda[j].exp()).exp() / denom.powi(2);
+			hessdiag[j] += -a - b;
+		}
 	}
 	
 	// Description in Anderson-Bergman (2017) is slightly misleading
@@ -451,7 +440,8 @@ fn do_icm_step(data: &TurnbullData, _p: &Vec<f64>, s: &Vec<f64>, ll_model: f64)
 	// And we will move in the direction of the gradient
 	// So there are a few more negative signs here than suggested
 	
-	let weights = -hessdiag.clone() / 2.0;
+	let weights = -DVector::from_vec(hessdiag.clone()) / 2.0;
+	let gradient_over_hessdiag = DVector::from_vec(gradient.par_iter().zip(hessdiag.par_iter()).map(|(g, h)| g / h).collect());
 	
 	let mut s_new;
 	let mut p_new;
@@ -461,7 +451,7 @@ fn do_icm_step(data: &TurnbullData, _p: &Vec<f64>, s: &Vec<f64>, ll_model: f64)
 	let mut step_size_exponent: i32 = 0;
 	loop {
 		let step_size = 0.5_f64.powi(step_size_exponent);
-		let lambda_target = -gradient.component_div(&hessdiag) * step_size + DVector::from_vec(lambda.clone());
+		let lambda_target = -gradient_over_hessdiag.clone() * step_size + DVector::from_vec(lambda.clone());
 		
 		let lambda_new = monotonic_regression_pava(lambda_target, weights.clone());