hpstat/docs/turnbull.tex

\documentclass[a4paper,12pt]{article}

\usepackage[math-style=ISO, bold-style=ISO]{unicode-math}
\setmainfont[RawFeature=-tlig]{TeX Gyre Termes}
\setmathfont{TeX Gyre Termes Math}

\usepackage{parskip}
\usepackage{microtype}
\usepackage[left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry}
\frenchspacing
\setlength{\emergencystretch}{3em}

\usepackage[hidelinks]{hyperref}
\usepackage{mathtools}

\newcommand{\bbeta}{\kern -0.1em\symbf{β}}
\newcommand{\blambda}{\kern -0.1em\symbf{Λ}}
\newcommand{\nablasub}[1]{\nabla_{\kern -0.15em #1}}

\begin{document}
	{\centering\bfseries Supplemental documentation for hpstat \textit{turnbull} command\par}
	
	The hpstat \textit{turnbull} command implements Turnbull's nonparametric survival curve estimation for interval-censored observations [1]. This documentation discusses technical details of the implementation.
	
	Let $\hat{F}(t)$ be a maximum likelihood estimator for the cumulative distribution function for failure times. Turnbull [1] demonstrated that $\hat{F}(t)$ decreases only on the set of what are now called ‘Turnbull intervals’, or ‘innermost intervals’, $(q_j, p_j]$ for $j = 1, 2, …, m$.
	
	Let $s_j$ be the probability of failure within the interval $(q_j, p_j]$. We seek a maximum likelihood estimator for the vector $\symbf{s} = (s_1, s_2, …, s_m)^\mathrm{T}$.
	
	Take the $i$-th observation, $1 ≤ i ≤ n$ , whose failure time falls in $(L_i, R_i]$. Let $α_{i,j} = \mathrm{I}\left((q_j, p_j] \subseteq (L_i, R_i]\right)$.
	
	As discussed by Turnbull [1], noting that we consider only the case of no truncation, we commence with an arbitrary initial guess for $\hat{\symbf{s}}$, and iteratively apply:
	%
	\begin{align*}
		μ_{ij}(\hat{\symbf{s}}) &= \frac{α_{i,j} \hat{s}_j}{\sum_{k=1}^m α_{i,k} \hat{s}_k} \\
		π_j(\hat{\symbf{s}}) &= \frac{\sum_{i=1}^n μ_{ij}(\hat{\symbf{s}})}{n} \\
		\hat{s}_j &\leftarrow π_j(\hat{\symbf{s}}), \qquad \text{for all $j = 1, 2, …, m$}
	\end{align*}
	%
	This yields the maximum likelihood estimator $\hat{\symbf{s}}$.
	
	Now let $\hat{F}_0 = 0 ≤ \hat{F}_1 ≤ \hat{F}_2 ≤ … ≤ \hat{F}_m = 1$ be the values of $\hat{F}(t)$ outside the Turnbull intervals, such that $\hat{s}_j = \hat{F}_j - \hat{F}_{j-1}$. We seek the standard errors of these $\hat{\symbf{F}} = (\hat{F}_1, \hat{F}_2, …, \hat{F}_{m-1})^\mathrm{T}$.
	%
	Note that the log-likelihood $\mathcal{L}_i$ for the $i$-th observation is:
	%
	\begin{align*}
		\mathcal{L}_i &= \log\left(\sum_{j=1}^m α_{i,j} \hat{s}_j\right) \\
		&= \log\left(\sum_{j=1}^m α_{i,j} (\hat{F}_j - \hat{F}_{j-1})\right)
	\end{align*}
	%
	Note the gradient $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ is the vector whose $h$-th element is:
	%
	\begin{align*}
		\frac{\partial \mathcal{L}_i}{\partial \hat{F}_h} &= \frac{α_{i,h} - α_{i,h+1}}{\sum_{j=1}^m α_{i,j} (\hat{F}_j - \hat{F}_{j-1})}
	\end{align*}
	%
	And so the Hessian $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ has $(h, k)$-th elements:
	%
	\begin{align*}
		\frac{\partial \mathcal{L}_i}{\partial \hat{F}_h \partial \hat{F}_k} &= - \frac{( α_{i,h} - α_{i,h+1} ) ( α_{i,k} - α_{i,k+1} )}{\left( \sum_{j=1}^m α_{i,j} (\hat{F}_j - \hat{F}_{j-1}) \right)^2}
	\end{align*}
	%
	The sum of all $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ yields the Hessian of the log-likelihood $\nablasub{\hat{\symbf{F}}} \mathcal{L}$.
	
	The covariance matrix of $\hat{\symbf{F}}$ is given by the inverse of $-\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The standard errors for each of $\hat{\symbf{F}}$ are the square roots of the diagonal elements of the covariance matrix, as required. Alternatively, when \textit{--se-method oim-drop-zeros} is passed, columns/rows of $\nablasub{\hat{\symbf{F}}} \mathcal{L}$ corresponding with intervals where $\hat{s}_i = 0$ are dropped before the matrix is inverted, which enables greater numerical stability but whose theoretical justification is not well explored [2].
	
	%{\vspace{0.5cm}\scshape\centering References\par}
	{\pagebreak\scshape\centering References\par}
	
	\begin{enumerate}
		\item Turnbull BW. The empirical distribution function with arbitrarily grouped, censored and truncated data. \textit{Journal of the Royal Statistical Society, Series B (Methodological)}. 1976;38(3):290–5. \href{https://doi.org/10.1111/j.2517-6161.1976.tb01597.x}{doi: 10.1111\slash j.2517-6161.1976.tb01597.x}
		\item Goodall RL, Dunn DT, Babiker AG. Interval-censored survival time data: confidence intervals for the non-parametric survivor function. \textit{Statistics in Medicine}. 2004;23(7):1131–45. \href{https://doi.org/10.1002/sim.1682}{doi: 10.1002\slash sim.1682}
	\end{enumerate}
	
\end{document}
Add supplemental documentation for turnbull 2023-10-14 06:36:14 +11:00			`\documentclass[a4paper,12pt]{article}`

			`\usepackage[math-style=ISO, bold-style=ISO]{unicode-math}`
turnbull: Allow dropping columns/rows of Hessian corresponding to intervals with zero failure probability 2023-10-15 02:39:04 +11:00			`\setmainfont[RawFeature=-tlig]{TeX Gyre Termes}`
Add supplemental documentation for turnbull 2023-10-14 06:36:14 +11:00			`\setmathfont{TeX Gyre Termes Math}`

			`\usepackage{parskip}`
			`\usepackage{microtype}`
			`\usepackage[left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry}`
			`\frenchspacing`
			`\setlength{\emergencystretch}{3em}`

			`\usepackage[hidelinks]{hyperref}`
			`\usepackage{mathtools}`

			`\newcommand{\bbeta}{\kern -0.1em\symbf{β}}`
			`\newcommand{\blambda}{\kern -0.1em\symbf{Λ}}`
			`\newcommand{\nablasub}[1]{\nabla_{\kern -0.15em #1}}`

			`\begin{document}`
			`{\centering\bfseries Supplemental documentation for hpstat \textit{turnbull} command\par}`

			`The hpstat \textit{turnbull} command implements Turnbull's nonparametric survival curve estimation for interval-censored observations [1]. This documentation discusses technical details of the implementation.`

			`Let $\hat{F}(t)$ be a maximum likelihood estimator for the cumulative distribution function for failure times. Turnbull [1] demonstrated that $\hat{F}(t)$ decreases only on the set of what are now called ‘Turnbull intervals’, or ‘innermost intervals’, $(q_j, p_j]$ for $j = 1, 2, …, m$.`

			`Let $s_j$ be the probability of failure within the interval $(q_j, p_j]$. We seek a maximum likelihood estimator for the vector $\symbf{s} = (s_1, s_2, …, s_m)^\mathrm{T}$.`

			`Take the $i$-th observation, $1 ≤ i ≤ n$ , whose failure time falls in $(L_i, R_i]$. Let $α_{i,j} = \mathrm{I}\left((q_j, p_j] \subseteq (L_i, R_i]\right)$.`

			`As discussed by Turnbull [1], noting that we consider only the case of no truncation, we commence with an arbitrary initial guess for $\hat{\symbf{s}}$, and iteratively apply:`
			`%`
			`\begin{align*}`
			`μ_{ij}(\hat{\symbf{s}}) &= \frac{α_{i,j} \hat{s}_j}{\sum_{k=1}^m α_{i,k} \hat{s}_k} \\`
			`π_j(\hat{\symbf{s}}) &= \frac{\sum_{i=1}^n μ_{ij}(\hat{\symbf{s}})}{n} \\`
			`\hat{s}_j &\leftarrow π_j(\hat{\symbf{s}}), \qquad \text{for all $j = 1, 2, …, m$}`
			`\end{align*}`
			`%`
			`This yields the maximum likelihood estimator $\hat{\symbf{s}}$.`

			`Now let $\hat{F}_0 = 0 ≤ \hat{F}_1 ≤ \hat{F}_2 ≤ … ≤ \hat{F}_m = 1$ be the values of $\hat{F}(t)$ outside the Turnbull intervals, such that $\hat{s}_j = \hat{F}_j - \hat{F}_{j-1}$. We seek the standard errors of these $\hat{\symbf{F}} = (\hat{F}_1, \hat{F}_2, …, \hat{F}_{m-1})^\mathrm{T}$.`
			`%`
			`Note that the log-likelihood $\mathcal{L}_i$ for the $i$-th observation is:`
			`%`
			`\begin{align*}`
			`\mathcal{L}_i &= \log\left(\sum_{j=1}^m α_{i,j} \hat{s}_j\right) \\`
			`&= \log\left(\sum_{j=1}^m α_{i,j} (\hat{F}_j - \hat{F}_{j-1})\right)`
			`\end{align*}`
			`%`
			`Note the gradient $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ is the vector whose $h$-th element is:`
			`%`
			`\begin{align*}`
			`\frac{\partial \mathcal{L}_i}{\partial \hat{F}_h} &= \frac{α_{i,h} - α_{i,h+1}}{\sum_{j=1}^m α_{i,j} (\hat{F}_j - \hat{F}_{j-1})}`
			`\end{align*}`
			`%`
			`And so the Hessian $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ has $(h, k)$-th elements:`
			`%`
			`\begin{align*}`
			`\frac{\partial \mathcal{L}_i}{\partial \hat{F}_h \partial \hat{F}_k} &= - \frac{( α_{i,h} - α_{i,h+1} ) ( α_{i,k} - α_{i,k+1} )}{\left( \sum_{j=1}^m α_{i,j} (\hat{F}_j - \hat{F}_{j-1}) \right)^2}`
			`\end{align*}`
			`%`
			`The sum of all $\nablasub{\hat{\symbf{F}}} \mathcal{L}_i$ yields the Hessian of the log-likelihood $\nablasub{\hat{\symbf{F}}} \mathcal{L}$.`

turnbull: Allow dropping columns/rows of Hessian corresponding to intervals with zero failure probability 2023-10-15 02:39:04 +11:00			The covariance matrix of $\hat{\symbf{F}}$ is given by the inverse of $-\nablasub{\hat{\symbf{F}}} \mathcal{L}$. The standard errors for each of $\hat{\symbf{F}}$ are the square roots of the diagonal elements of the covariance matrix, as required. Alternatively, when \textit{--se-method oim-drop-zeros} is passed, columns/rows of $\nablasub{\hat{\symbf{F}}} \mathcal{L}$ corresponding with intervals where $\hat{s}_i = 0$ are dropped before the matrix is inverted, which enables greater numerical stability but whose theoretical justification is not well explored [2].
Add supplemental documentation for turnbull 2023-10-14 06:36:14 +11:00
turnbull: Allow dropping columns/rows of Hessian corresponding to intervals with zero failure probability 2023-10-15 02:39:04 +11:00			`%{\vspace{0.5cm}\scshape\centering References\par}`
			`{\pagebreak\scshape\centering References\par}`
Add supplemental documentation for turnbull 2023-10-14 06:36:14 +11:00
			`\begin{enumerate}`
			`\item Turnbull BW. The empirical distribution function with arbitrarily grouped, censored and truncated data. \textit{Journal of the Royal Statistical Society, Series B (Methodological)}. 1976;38(3):290–5. \href{https://doi.org/10.1111/j.2517-6161.1976.tb01597.x}{doi: 10.1111\slash j.2517-6161.1976.tb01597.x}`
turnbull: Allow dropping columns/rows of Hessian corresponding to intervals with zero failure probability 2023-10-15 02:39:04 +11:00			`\item Goodall RL, Dunn DT, Babiker AG. Interval-censored survival time data: confidence intervals for the non-parametric survivor function. \textit{Statistics in Medicine}. 2004;23(7):1131–45. \href{https://doi.org/10.1002/sim.1682}{doi: 10.1002\slash sim.1682}`
Add supplemental documentation for turnbull 2023-10-14 06:36:14 +11:00			`\end{enumerate}`

			`\end{document}`