turnbull: Allow dropping columns/rows of Hessian corresponding to intervals with zero failure probability

2023-10-15 02:39:04 +11:00 · 2023-10-15 02:39:04 +11:00 · dd24de5813
commit dd24de5813
parent 072d453d11
3 changed files with 58 additions and 8 deletions
--- a/docs/turnbull.tex
+++ b/docs/turnbull.tex
@ -1,7 +1,7 @@
 \documentclass[a4paper,12pt]{article}
 \usepackage[math-style=ISO, bold-style=ISO]{unicode-math}
-\setmainfont{TeX Gyre Termes}
+\setmainfont[RawFeature=-tlig]{TeX Gyre Termes}
 \setmathfont{TeX Gyre Termes Math}
 \usepackage{parskip}
@ -61,12 +61,14 @@
 	%
 	The sum of all $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ yields the Hessian of the log-likelihood $\nablasub{\hat{\symbf{F}}} \mathcal{L}$.
-	The covariance matrix of $\hat{\symbf{F}}$ is given by the inverse of $-\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The standard errors for each of $\hat{\symbf{F}}$ are the square roots of the diagonal elements of the covariance matrix, as required.
+	The covariance matrix of $\hat{\symbf{F}}$ is given by the inverse of $-\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The standard errors for each of $\hat{\symbf{F}}$ are the square roots of the diagonal elements of the covariance matrix, as required. Alternatively, when \textit{--se-method oim-drop-zeros} is passed, columns/rows of $\nablasub{\hat{\symbf{F}}} \mathcal{L}$ corresponding with intervals where $\hat{s}_i = 0$ are dropped before the matrix is inverted, which enables greater numerical stability but whose theoretical justification is not well explored [2].
-	{\vspace{0.5cm}\scshape\centering References\par}
+	%{\vspace{0.5cm}\scshape\centering References\par}
 	{\pagebreak\scshape\centering References\par}
 	\begin{enumerate}
 		\item Turnbull BW. The empirical distribution function with arbitrarily grouped, censored and truncated data. \textit{Journal of the Royal Statistical Society, Series B (Methodological)}. 1976;38(3):290–5. \href{https://doi.org/10.1111/j.2517-6161.1976.tb01597.x}{doi: 10.1111\slash j.2517-6161.1976.tb01597.x}
 		\item Goodall RL, Dunn DT, Babiker AG. Interval-censored survival time data: confidence intervals for the non-parametric survivor function. \textit{Statistics in Medicine}. 2004;23(7):1131–45. \href{https://doi.org/10.1002/sim.1682}{doi: 10.1002\slash sim.1682}
 	\end{enumerate}
 \end{document}
--- a/src/turnbull.rs
+++ b/src/turnbull.rs
@ -45,6 +45,14 @@ pub struct TurnbullArgs {
 	/// Terminate algorithm when the absolute change in failure probability in each interval is less than this tolerance
 	#[arg(long, default_value="0.0001")]
 	fail_prob_tolerance: f64,
 	/// Method for computing standard error or survival probabilities
 	#[arg(long, value_enum, default_value="oim")]
 	se_method: SEMethod,
 	/// Threshold for dropping failure probability in --se-method oim-drop-zeros
 	#[arg(long, default_value="0.0001")]
 	zero_tolerance: f64,
 }
 #[derive(ValueEnum, Clone)]
@ -53,13 +61,19 @@ enum OutputFormat {
 	Json
 }
 #[derive(ValueEnum, Clone)]
 pub enum SEMethod {
 	OIM,
 	OIMDropZeros,
 }
 pub fn main(args: TurnbullArgs) {
 	// Read data
 	let data_times = read_data(&args.input);
 	// Fit regression
 	let progress_bar = ProgressBar::with_draw_target(Some(0), ProgressDrawTarget::term_like(Box::new(UnconditionalTermLike::stderr())));
-	let result = fit_turnbull(data_times, progress_bar, args.max_iterations, args.fail_prob_tolerance);
+	let result = fit_turnbull(data_times, progress_bar, args.max_iterations, args.fail_prob_tolerance, args.se_method, args.zero_tolerance);
 	// Display output
 	match args.output {
@ -153,7 +167,7 @@ impl TurnbullData {
 	}
 }
-pub fn fit_turnbull(data_times: MatrixXx2<f64>, progress_bar: ProgressBar, max_iterations: u32, fail_prob_tolerance: f64) -> TurnbullResult {
+pub fn fit_turnbull(data_times: MatrixXx2<f64>, progress_bar: ProgressBar, max_iterations: u32, fail_prob_tolerance: f64, se_method: SEMethod, zero_tolerance: f64) -> TurnbullResult {
 	// ----------------------
 	// Prepare for regression
@ -287,8 +301,42 @@ pub fn fit_turnbull(data_times: MatrixXx2<f64>, progress_bar: ProgressBar, max_i
 	}
 	progress_bar.finish();
-	let vcov = (-hessian).try_inverse().expect("Matrix not invertible");
+	let mut survival_prob_se: DVector<f64>;
-	let survival_prob_se = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); });
+	
 	match se_method {
 		SEMethod::OIM => {
 			// Compute covariance matrix as inverse of negative Hessian
 			let vcov = -hessian.try_inverse().expect("Matrix not invertible");
 			survival_prob_se = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); });
 		}
 		SEMethod::OIMDropZeros => {
 			// Drop rows/columns of Hessian corresponding to intervals with zero failure probability
 			let nonzero_intervals: Vec<usize> = (0..(data.num_intervals() - 1)).filter(|i| s[*i] > zero_tolerance).collect();
 			let mut hessian_nonzero: DMatrix<f64> = DMatrix::zeros(nonzero_intervals.len(), nonzero_intervals.len());
 			for (nonzero_index1, orig_index1) in nonzero_intervals.iter().enumerate() {
 				hessian_nonzero[(nonzero_index1, nonzero_index1)] = hessian[(*orig_index1, *orig_index1)];
 				for (nonzero_index2, orig_index2) in nonzero_intervals.iter().enumerate().take(nonzero_index1) {
 					hessian_nonzero[(nonzero_index1, nonzero_index2)] = hessian[(*orig_index1, *orig_index2)];
 					hessian_nonzero[(nonzero_index2, nonzero_index1)] = hessian[(*orig_index2, *orig_index1)];
 				}
 			}
 			let vcov = -hessian_nonzero.try_inverse().expect("Matrix not invertible");
 			let survival_prob_se_nonzero = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); });
 			survival_prob_se = DVector::zeros(data.num_intervals() - 1);
 			let mut nonzero_index = 0;
 			for orig_index in 0..(data.num_intervals() - 1) {
 				if nonzero_intervals.contains(&orig_index) {
 					survival_prob_se[orig_index] = survival_prob_se_nonzero[nonzero_index];
 					nonzero_index += 1;
 				} else {
 					survival_prob_se[orig_index] = survival_prob_se[orig_index - 1];
 				}
 			}
 		}
 	}
 	return TurnbullResult {
 		failure_intervals: data.intervals,
--- a/tests/turnbull.rs
+++ b/tests/turnbull.rs
@ -26,7 +26,7 @@ fn test_turnbull_minitab() {
 	// Fit regression
 	let progress_bar = ProgressBar::hidden();
-	let result = turnbull::fit_turnbull(data_times, progress_bar, 500, 0.0001);
+	let result = turnbull::fit_turnbull(data_times, progress_bar, 500, 0.0001, turnbull::SEMethod::OIM, 0.0001);
 	assert_eq!(result.failure_intervals[0], (20000.0, 30000.0));
 	assert_eq!(result.failure_intervals[1], (30000.0, 40000.0));