turnbull: Allow dropping columns/rows of Hessian corresponding to intervals with zero failure probability

This commit is contained in:
RunasSudo 2023-10-15 02:39:04 +11:00
parent 072d453d11
commit dd24de5813
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
3 changed files with 58 additions and 8 deletions

View File

@ -1,7 +1,7 @@
\documentclass[a4paper,12pt]{article} \documentclass[a4paper,12pt]{article}
\usepackage[math-style=ISO, bold-style=ISO]{unicode-math} \usepackage[math-style=ISO, bold-style=ISO]{unicode-math}
\setmainfont{TeX Gyre Termes} \setmainfont[RawFeature=-tlig]{TeX Gyre Termes}
\setmathfont{TeX Gyre Termes Math} \setmathfont{TeX Gyre Termes Math}
\usepackage{parskip} \usepackage{parskip}
@ -61,12 +61,14 @@
% %
The sum of all $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ yields the Hessian of the log-likelihood $\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The sum of all $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ yields the Hessian of the log-likelihood $\nablasub{\hat{\symbf{F}}} \mathcal{L}$.
The covariance matrix of $\hat{\symbf{F}}$ is given by the inverse of $-\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The standard errors for each of $\hat{\symbf{F}}$ are the square roots of the diagonal elements of the covariance matrix, as required. The covariance matrix of $\hat{\symbf{F}}$ is given by the inverse of $-\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The standard errors for each of $\hat{\symbf{F}}$ are the square roots of the diagonal elements of the covariance matrix, as required. Alternatively, when \textit{--se-method oim-drop-zeros} is passed, columns/rows of $\nablasub{\hat{\symbf{F}}} \mathcal{L}$ corresponding with intervals where $\hat{s}_i = 0$ are dropped before the matrix is inverted, which enables greater numerical stability but whose theoretical justification is not well explored [2].
{\vspace{0.5cm}\scshape\centering References\par} %{\vspace{0.5cm}\scshape\centering References\par}
{\pagebreak\scshape\centering References\par}
\begin{enumerate} \begin{enumerate}
\item Turnbull BW. The empirical distribution function with arbitrarily grouped, censored and truncated data. \textit{Journal of the Royal Statistical Society, Series B (Methodological)}. 1976;38(3):290–5. \href{https://doi.org/10.1111/j.2517-6161.1976.tb01597.x}{doi: 10.1111\slash j.2517-6161.1976.tb01597.x} \item Turnbull BW. The empirical distribution function with arbitrarily grouped, censored and truncated data. \textit{Journal of the Royal Statistical Society, Series B (Methodological)}. 1976;38(3):290–5. \href{https://doi.org/10.1111/j.2517-6161.1976.tb01597.x}{doi: 10.1111\slash j.2517-6161.1976.tb01597.x}
\item Goodall RL, Dunn DT, Babiker AG. Interval-censored survival time data: confidence intervals for the non-parametric survivor function. \textit{Statistics in Medicine}. 2004;23(7):1131–45. \href{https://doi.org/10.1002/sim.1682}{doi: 10.1002\slash sim.1682}
\end{enumerate} \end{enumerate}
\end{document} \end{document}

View File

@ -45,6 +45,14 @@ pub struct TurnbullArgs {
/// Terminate algorithm when the absolute change in failure probability in each interval is less than this tolerance /// Terminate algorithm when the absolute change in failure probability in each interval is less than this tolerance
#[arg(long, default_value="0.0001")] #[arg(long, default_value="0.0001")]
fail_prob_tolerance: f64, fail_prob_tolerance: f64,
/// Method for computing standard error or survival probabilities
#[arg(long, value_enum, default_value="oim")]
se_method: SEMethod,
/// Threshold for dropping failure probability in --se-method oim-drop-zeros
#[arg(long, default_value="0.0001")]
zero_tolerance: f64,
} }
#[derive(ValueEnum, Clone)] #[derive(ValueEnum, Clone)]
@ -53,13 +61,19 @@ enum OutputFormat {
Json Json
} }
#[derive(ValueEnum, Clone)]
pub enum SEMethod {
OIM,
OIMDropZeros,
}
pub fn main(args: TurnbullArgs) { pub fn main(args: TurnbullArgs) {
// Read data // Read data
let data_times = read_data(&args.input); let data_times = read_data(&args.input);
// Fit regression // Fit regression
let progress_bar = ProgressBar::with_draw_target(Some(0), ProgressDrawTarget::term_like(Box::new(UnconditionalTermLike::stderr()))); let progress_bar = ProgressBar::with_draw_target(Some(0), ProgressDrawTarget::term_like(Box::new(UnconditionalTermLike::stderr())));
let result = fit_turnbull(data_times, progress_bar, args.max_iterations, args.fail_prob_tolerance); let result = fit_turnbull(data_times, progress_bar, args.max_iterations, args.fail_prob_tolerance, args.se_method, args.zero_tolerance);
// Display output // Display output
match args.output { match args.output {
@ -153,7 +167,7 @@ impl TurnbullData {
} }
} }
pub fn fit_turnbull(data_times: MatrixXx2<f64>, progress_bar: ProgressBar, max_iterations: u32, fail_prob_tolerance: f64) -> TurnbullResult { pub fn fit_turnbull(data_times: MatrixXx2<f64>, progress_bar: ProgressBar, max_iterations: u32, fail_prob_tolerance: f64, se_method: SEMethod, zero_tolerance: f64) -> TurnbullResult {
// ---------------------- // ----------------------
// Prepare for regression // Prepare for regression
@ -287,8 +301,42 @@ pub fn fit_turnbull(data_times: MatrixXx2<f64>, progress_bar: ProgressBar, max_i
} }
progress_bar.finish(); progress_bar.finish();
let vcov = (-hessian).try_inverse().expect("Matrix not invertible"); let mut survival_prob_se: DVector<f64>;
let survival_prob_se = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); });
match se_method {
SEMethod::OIM => {
// Compute covariance matrix as inverse of negative Hessian
let vcov = -hessian.try_inverse().expect("Matrix not invertible");
survival_prob_se = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); });
}
SEMethod::OIMDropZeros => {
// Drop rows/columns of Hessian corresponding to intervals with zero failure probability
let nonzero_intervals: Vec<usize> = (0..(data.num_intervals() - 1)).filter(|i| s[*i] > zero_tolerance).collect();
let mut hessian_nonzero: DMatrix<f64> = DMatrix::zeros(nonzero_intervals.len(), nonzero_intervals.len());
for (nonzero_index1, orig_index1) in nonzero_intervals.iter().enumerate() {
hessian_nonzero[(nonzero_index1, nonzero_index1)] = hessian[(*orig_index1, *orig_index1)];
for (nonzero_index2, orig_index2) in nonzero_intervals.iter().enumerate().take(nonzero_index1) {
hessian_nonzero[(nonzero_index1, nonzero_index2)] = hessian[(*orig_index1, *orig_index2)];
hessian_nonzero[(nonzero_index2, nonzero_index1)] = hessian[(*orig_index2, *orig_index1)];
}
}
let vcov = -hessian_nonzero.try_inverse().expect("Matrix not invertible");
let survival_prob_se_nonzero = vcov.diagonal().apply_into(|x| { *x = x.sqrt(); });
survival_prob_se = DVector::zeros(data.num_intervals() - 1);
let mut nonzero_index = 0;
for orig_index in 0..(data.num_intervals() - 1) {
if nonzero_intervals.contains(&orig_index) {
survival_prob_se[orig_index] = survival_prob_se_nonzero[nonzero_index];
nonzero_index += 1;
} else {
survival_prob_se[orig_index] = survival_prob_se[orig_index - 1];
}
}
}
}
return TurnbullResult { return TurnbullResult {
failure_intervals: data.intervals, failure_intervals: data.intervals,

View File

@ -26,7 +26,7 @@ fn test_turnbull_minitab() {
// Fit regression // Fit regression
let progress_bar = ProgressBar::hidden(); let progress_bar = ProgressBar::hidden();
let result = turnbull::fit_turnbull(data_times, progress_bar, 500, 0.0001); let result = turnbull::fit_turnbull(data_times, progress_bar, 500, 0.0001, turnbull::SEMethod::OIM, 0.0001);
assert_eq!(result.failure_intervals[0], (20000.0, 30000.0)); assert_eq!(result.failure_intervals[0], (20000.0, 30000.0));
assert_eq!(result.failure_intervals[1], (30000.0, 40000.0)); assert_eq!(result.failure_intervals[1], (30000.0, 40000.0));