diff --git a/docs/turnbull.tex b/docs/turnbull.tex index b290493..16fffbe 100644 --- a/docs/turnbull.tex +++ b/docs/turnbull.tex @@ -1,7 +1,7 @@ \documentclass[a4paper,12pt]{article} \usepackage[math-style=ISO, bold-style=ISO]{unicode-math} -\setmainfont{TeX Gyre Termes} +\setmainfont[RawFeature=-tlig]{TeX Gyre Termes} \setmathfont{TeX Gyre Termes Math} \usepackage{parskip} @@ -61,12 +61,14 @@ % The sum of all $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ yields the Hessian of the log-likelihood $\nablasub{\hat{\symbf{F}}} \mathcal{L}$. - The covariance matrix of $\hat{\symbf{F}}$ is given by the inverse of $-\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The standard errors for each of $\hat{\symbf{F}}$ are the square roots of the diagonal elements of the covariance matrix, as required. + The covariance matrix of $\hat{\symbf{F}}$ is given by the inverse of $-\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The standard errors for each of $\hat{\symbf{F}}$ are the square roots of the diagonal elements of the covariance matrix, as required. Alternatively, when \textit{--se-method oim-drop-zeros} is passed, columns/rows of $\nablasub{\hat{\symbf{F}}} \mathcal{L}$ corresponding with intervals where $\hat{s}_i = 0$ are dropped before the matrix is inverted, which enables greater numerical stability but whose theoretical justification is not well explored [2]. - {\vspace{0.5cm}\scshape\centering References\par} + %{\vspace{0.5cm}\scshape\centering References\par} + {\pagebreak\scshape\centering References\par} \begin{enumerate} \item Turnbull BW. The empirical distribution function with arbitrarily grouped, censored and truncated data. \textit{Journal of the Royal Statistical Society, Series B (Methodological)}. 1976;38(3):290–5. \href{https://doi.org/10.1111/j.2517-6161.1976.tb01597.x}{doi: 10.1111\slash j.2517-6161.1976.tb01597.x} + \item Goodall RL, Dunn DT, Babiker AG. Interval-censored survival time data: confidence intervals for the non-parametric survivor function. \textit{Statistics in Medicine}. 2004;23(7):1131–45. \href{https://doi.org/10.1002/sim.1682}{doi: 10.1002\slash sim.1682} \end{enumerate} \end{document} diff --git a/src/turnbull.rs b/src/turnbull.rs index 6d6ac67..33f631b 100644 --- a/src/turnbull.rs +++ b/src/turnbull.rs @@ -45,6 +45,14 @@ pub struct TurnbullArgs { /// Terminate algorithm when the absolute change in failure probability in each interval is less than this tolerance #[arg(long, default_value="0.0001")] fail_prob_tolerance: f64, + + /// Method for computing standard error or survival probabilities + #[arg(long, value_enum, default_value="oim")] + se_method: SEMethod, + + /// Threshold for dropping failure probability in --se-method oim-drop-zeros + #[arg(long, default_value="0.0001")] + zero_tolerance: f64, } #[derive(ValueEnum, Clone)] @@ -53,13 +61,19 @@ enum OutputFormat { Json } +#[derive(ValueEnum, Clone)] +pub enum SEMethod { + OIM, + OIMDropZeros, +} + pub fn main(args: TurnbullArgs) { // Read data let data_times = read_data(&args.input); // Fit regression let progress_bar = ProgressBar::with_draw_target(Some(0), ProgressDrawTarget::term_like(Box::new(UnconditionalTermLike::stderr()))); - let result = fit_turnbull(data_times, progress_bar, args.max_iterations, args.fail_prob_tolerance); + let result = fit_turnbull(data_times, progress_bar, args.max_iterations, args.fail_prob_tolerance, args.se_method, args.zero_tolerance); // Display output match args.output { @@ -153,7 +167,7 @@ impl TurnbullData { } } -pub fn fit_turnbull(data_times: MatrixXx2, progress_bar: ProgressBar, max_iterations: u32, fail_prob_tolerance: f64) -> TurnbullResult { +pub fn fit_turnbull(data_times: MatrixXx2, progress_bar: ProgressBar, max_iterations: u32, fail_prob_tolerance: f64, se_method: SEMethod, zero_tolerance: f64) -> TurnbullResult { // ---------------------- // Prepare for regression @@ -287,8 +301,42 @@ pub fn fit_turnbull(data_times: MatrixXx2, progress_bar: ProgressBar, max_i } progress_bar.finish(); - let vcov = (-hessian).try_inverse().expect("Matrix not invertible"); - let survival_prob_se = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); }); + let mut survival_prob_se: DVector; + + match se_method { + SEMethod::OIM => { + // Compute covariance matrix as inverse of negative Hessian + let vcov = -hessian.try_inverse().expect("Matrix not invertible"); + survival_prob_se = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); }); + } + SEMethod::OIMDropZeros => { + // Drop rows/columns of Hessian corresponding to intervals with zero failure probability + let nonzero_intervals: Vec = (0..(data.num_intervals() - 1)).filter(|i| s[*i] > zero_tolerance).collect(); + + let mut hessian_nonzero: DMatrix = DMatrix::zeros(nonzero_intervals.len(), nonzero_intervals.len()); + for (nonzero_index1, orig_index1) in nonzero_intervals.iter().enumerate() { + hessian_nonzero[(nonzero_index1, nonzero_index1)] = hessian[(*orig_index1, *orig_index1)]; + for (nonzero_index2, orig_index2) in nonzero_intervals.iter().enumerate().take(nonzero_index1) { + hessian_nonzero[(nonzero_index1, nonzero_index2)] = hessian[(*orig_index1, *orig_index2)]; + hessian_nonzero[(nonzero_index2, nonzero_index1)] = hessian[(*orig_index2, *orig_index1)]; + } + } + + let vcov = -hessian_nonzero.try_inverse().expect("Matrix not invertible"); + let survival_prob_se_nonzero = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); }); + + survival_prob_se = DVector::zeros(data.num_intervals() - 1); + let mut nonzero_index = 0; + for orig_index in 0..(data.num_intervals() - 1) { + if nonzero_intervals.contains(&orig_index) { + survival_prob_se[orig_index] = survival_prob_se_nonzero[nonzero_index]; + nonzero_index += 1; + } else { + survival_prob_se[orig_index] = survival_prob_se[orig_index - 1]; + } + } + } + } return TurnbullResult { failure_intervals: data.intervals, diff --git a/tests/turnbull.rs b/tests/turnbull.rs index 3963832..04f44d9 100644 --- a/tests/turnbull.rs +++ b/tests/turnbull.rs @@ -26,7 +26,7 @@ fn test_turnbull_minitab() { // Fit regression let progress_bar = ProgressBar::hidden(); - let result = turnbull::fit_turnbull(data_times, progress_bar, 500, 0.0001); + let result = turnbull::fit_turnbull(data_times, progress_bar, 500, 0.0001, turnbull::SEMethod::OIM, 0.0001); assert_eq!(result.failure_intervals[0], (20000.0, 30000.0)); assert_eq!(result.failure_intervals[1], (30000.0, 40000.0));