\documentclass[a4paper,12pt]{article}

\usepackage[math-style=ISO, bold-style=ISO]{unicode-math}
\setmainfont{TeX Gyre Termes}
\setmathfont{TeX Gyre Termes Math}

\usepackage{parskip}
\usepackage{microtype}
\usepackage[left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry}
\frenchspacing
\setlength{\emergencystretch}{3em}

\usepackage{mathtools}

\begin{document}
	{\centering\bfseries Supplemental documentation for hpstat \textit{intcox} command\par}
	
	The hpstat \textit{intcox} command implements the Cox proportional hazard model for interval-censored data, using an expectation–maximization-type algorithm by Zeng, Mao \& Lin [1]. Some technical details omitted by Zeng, Mao \& Lin, and some simplifications made possible by \textit{intcox} supporting only time-independent covariates and a proportional hazards model, are described here.
	
	Zeng, Mao \& Lin describe the model in terms of a transformation $G(x) = -\log \int_0^∞ \exp(-xt) f(t) \ \mathrm{d}t$, where $f(t)$ is a density function. When $f(t)$ is the gamma density with unit mean and variance $r$, $G(x) = r^{-1} \log(1 + rx)$. When $r = 0$, $G(x) = x$ and this corresponds with the proportional hazards model, which is the only transformation supported by \textit{intcox}.
	
	In the E-step, the posterior means for latent variables $\hat{E}(W_{ik})$ and $\hat{E}(ξ_i)$ are calculated.
	
	For $\hat{E}(W_{ik})$, when $t_k ≤ L_i$, $\hat{E}(W_{ik}) = 0$, and when $L_i < t_k ≤ R_i$ for $R_i < ∞$:
	%
	$$\hat{E}(W_{ik}) = λ_k \exp(\symbf{β}^\mathrm{T} \symbf{Z}_{ik}) × \frac{\int_{ξ_i} ξ_i \{\exp(-ξ_i S_{i1}) - \exp(-ξ_i S_{i2})\} [1 - \exp\{-ξ_i (S_{i2} - S_{i1})\}]^{-1} f(ξ_i)\ \mathrm{d}ξ_i}{\exp\{-G(S_{i1})\} - \exp\{-G(S_{i2})\} }$$
	%
	Since, in the proportional hazards model, $G(x) = x$, and $f(\cdot)$ has gamma density with unit mean and 0 variance, $ξ_i = 1$ unconditionally. And since all covariates are time-independent:
	%
	\begin{align*}
		\hat{E}(W_{ik}) &= λ_k \exp(\symbf{β}^\mathrm{T} \symbf{Z}_i) × \frac{\{\exp(-S_{i1}) - \exp(-S_{i2})\} [1 - \exp\{-(S_{i2} - S_{i1})\}]^{-1}}{\exp(-S_{i1}) - \exp(-S_{i2}) } \\
		&= \frac{λ_k \exp(\symbf{β}^\mathrm{T} \symbf{Z}_i)}{1 - \exp(S_{i1} - S_{i2})}
	\end{align*}
	%
	In the M-step, Zeng, Mao \& Lin require the parameters $\symbf{β}$ to be updated by solving the following equation, whose left-hand side we denote $\symbf{h}(\symbf{β})$, using the one-step Newton–Raphson method:
	%
	$$\symbf{h}(\symbf{β}) = \sum_{i=1}^n \sum_{k=1}^m I(t_k ≤ R^*_i) \hat{E}(W_{ik}) \left\{ \symbf{Z}_{ik} - \frac{\sum_{j=1}^n I(t_k ≤ R^*_j) \hat{E}(ξ_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_{jk}) \symbf{Z}_{jk}}{\sum_{j=1}^n I(t_k ≤ R^*_j) \hat{E}(ξ_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_{jk})} \right\} = \mathbf{0}$$
	%
	Since $\hat{E}(ξ_i) = 0$, and all covariates are time-independent:
	%
	$$\symbf{h}(\symbf{β}) = \sum_{i=1}^n \sum_{k=1}^m I(t_k ≤ R^*_i) \hat{E}(W_{ik}) \left\{ \symbf{Z}_i - \frac{\sum_{j=1}^n I(t_k ≤ R^*_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_j) \symbf{Z}_j}{\sum_{j=1}^n I(t_k ≤ R^*_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_j)} \right\} = \mathbf{0}$$
	%
	For each $k$, let $s_0 = \sum_{j=1}^n I(t_k ≤ R^*_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_j)$, and $\symbf{s}_1 = \sum_{j=1}^n I(t_k ≤ R^*_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_j) \symbf{Z}_j$, such that we now require:
	%
	$$\symbf{h}(\symbf{β}) = \sum_{i=1}^n \sum_{k=1}^m I(t_k ≤ R^*_i) \hat{E}(W_{ik}) \left\{ \symbf{Z}_i - \frac{\symbf{s}_1}{s_0} \right\} = \mathbf{0}$$
	%
	Denote by $\symbf{S}_2$ the Jacobian of $\symbf{s}_1$, with respect to $\symbf{β}$. This is the square matrix whose $(a, b)$-th element is:
	%
	$$\frac{\partial}{\partial β_b} \sum_{j=1}^n I(t_k ≤ R^*_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_j) Z_{ja} = \sum_{j=1}^n I(t_k ≤ R^*_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_j) Z_{ja} Z_{jb}$$
	%
	In other words, the Jacobian of $\symbf{s}_1$ is:
	%
	$$\symbf{S}_2 = \sum_{j=1}^n I(t_k ≤ R^*_j) \exp(\symbf{β}^\mathrm{T} \symbf{Z}_j) \symbf{Z}_j \symbf{Z}_j^\mathrm{T}$$
	%
	Notice also that $\symbf{s}_1$ is the gradient of $s_0$ with respect to $\symbf{β}$. Applying the quotient rule to $\symbf{s}_1 / s_0$, the Jacobian of $\symbf{h}(\symbf{β})$, then, is:
	%
	\begin{align*}
		\mathbf{J}[\symbf{h}(\symbf{β})] &= \sum_{i=1}^n \sum_{k=1}^m I(t_k ≤ R^*_i) \hat{E}(W_{ik}) \left\{ -\frac{\symbf{S}_2 s_0 - \symbf{s}_1 \symbf{s}_1^\mathrm{T}}{(s_0)^2} \right\} \\
		&= \sum_{i=1}^n \sum_{k=1}^m I(t_k ≤ R^*_i) \hat{E}(W_{ik}) \left\{ \frac{\symbf{s}_1}{s_0} × \frac{\symbf{s}_1^\mathrm{T}}{s_0} - \frac{\symbf{S}_2}{s_0} \right\}
	\end{align*}
	%
	We then apply the Newton–Raphson method to $\symbf{h}(\symbf{β})$ as required by letting:
	%
	$$\symbf{β} := \symbf{β} - \left\{\mathbf{J}[\symbf{h}(\symbf{β})]\right\}^{-1} \symbf{h}(\symbf{β})$$
	
	{\centering\scshape References\par}
	
	\begin{enumerate}
		\item Zeng D, Mao L, Lin DY. Maximum likelihood estimation for semiparametric transformation models with interval-censored data. \textit{Biometrika}. 2016 Jun 1;103(2):253–71.
	\end{enumerate}
\end{document}