scipy-yli/yli/survival.py

#   scipy-yli: Helpful SciPy utilities and recipes
#   Copyright © 2022–2023  Lee Yingtong Li (RunasSudo)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

import numpy as np
from scipy import stats
import statsmodels.api as sm

from .config import config
from .sig_tests import ChiSquaredResult
from .regress import RegressionResult, SingleTerm
from .utils import Estimate, check_nan, cols_for_formula, convert_pandas_nullable

from datetime import datetime
import weakref

def kaplanmeier(df, time, status, by=None, *, ci=True, transform_x=None, transform_y=None, nan_policy='warn'):
	"""
	Generate a Kaplan–Meier plot
	
	Uses the Python *matplotlib* library.
	
	:param df: Data to generate plot for
	:type df: DataFrame
	:param time: Column in *df* for the time to event (numeric or timedelta)
	:type time: str
	:param status: Column in *df* for the status variable (True/False or 1/0)
	:type status: str
	:param by: Column in *df* to stratify by (categorical)
	:type by: str
	:param ci: Whether to plot confidence intervals around the survival function
	:type ci: bool
	:param transform_x: Function to transform x axis by
	:type transform_x: callable
	:param transform_y: Function to transform y axis by
	:type transform_y: callable
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: (Figure, Axes)
	"""
	
	import matplotlib.pyplot as plt
	
	# Check for/clean NaNs
	if by:
		df = check_nan(df[[time, status, by]], nan_policy)
	else:
		df = check_nan(df[[time, status]], nan_policy)
	
	# Covert timedelta to numeric
	df, time_units = survtime_to_numeric(df, time)
	
	fig, ax = plt.subplots()
	
	if by is not None:
		# Group by independent variable
		groups = df.groupby(by)
		
		for group in groups.groups:
			subset = groups.get_group(group)
			handle = plot_survfunc_kaplanmeier(ax, subset[time], subset[status], ci, transform_x, transform_y)
			handle.set_label('{} = {}'.format(by, group))
	else:
		# No grouping
		plot_survfunc_kaplanmeier(ax, df[time], df[status], ci, transform_x, transform_y)
	
	if time_units:
		ax.set_xlabel('{} ({})'.format(time, time_units))
	else:
		ax.set_xlabel(time)
	ax.set_ylabel('Survival probability ({:.0%} CI)'.format(1-config.alpha) if ci else 'Survival probability')
	ax.set_xlim(left=0)
	ax.set_ylim(0, 1)
	ax.legend()
	
	return fig, ax

def plot_survfunc_kaplanmeier(ax, time, status, ci, transform_x=None, transform_y=None):
	# Estimate the survival function
	sf = sm.SurvfuncRight(time, status)
	
	# Draw straight lines
	xpoints = sf.surv_times.repeat(2)[1:]
	ypoints = sf.surv_prob.repeat(2)[:-1]
	handle = ax.plot(xpoints, ypoints)[0]
	
	if transform_x:
		xpoints = transform_x(xpoints)
	if transform_y:
		ypoints = transform_y(ypoints)
	
	if ci:
		zstar = -stats.norm.ppf(config.alpha/2)
		
		# Get confidence intervals
		ci0 = sf.surv_prob - zstar * sf.surv_prob_se
		ci1 = sf.surv_prob + zstar * sf.surv_prob_se
		
		# Plot confidence intervals
		ypoints0 = ci0.repeat(2)[:-1]
		ypoints1 = ci1.repeat(2)[:-1]
		
		if transform_y:
			ypoints0 = transform_y(ypoints0)
			ypoints1 = transform_y(ypoints1)
		
		ax.fill_between(xpoints, ypoints0, ypoints1, alpha=0.3, label='_')
	
	return handle

def turnbull(df, time_left, time_right, by=None, *, transform_x=None, transform_y=None, nan_policy='warn'):
	"""
	Generate a Turnbull estimator plot, which extends the Kaplan–Meier estimator to interval-censored observations
	
	The intervals are assumed to be half-open intervals, (*left*, *right*]. *right* == *np.inf* implies the event was right-censored. Unlike :func:`yli.kaplanmeier`, times must be given as numeric dtypes and not as pandas timedelta.
	
	For ease of interpretation, the survival function is drawn as a step function at the midpoint of the estimate on each interval.
	
	Uses the Python *lifelines* and *matplotlib* libraries.
	
	:param df: Data to generate plot for
	:type df: DataFrame
	:param time_left: Column in *df* for the time to event, left interval endpoint (numeric)
	:type time_left: str
	:param time_right: Column in *df* for the time to event, right interval endpoint (numeric)
	:type time_right: str
	:param by: Column in *df* to stratify by (categorical)
	:type by: str
	:param transform_x: Function to transform x axis by
	:type transform_x: callable
	:param transform_y: Function to transform y axis by
	:type transform_y: callable
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: (Figure, Axes)
	"""
	
	import matplotlib.pyplot as plt
	
	# Check for/clean NaNs
	if by:
		df = check_nan(df[[time_left, time_right, by]], nan_policy)
	else:
		df = check_nan(df[[time_left, time_right]], nan_policy)
	
	fig, ax = plt.subplots()
	
	if by is not None:
		# Group by independent variable
		groups = df.groupby(by)
		
		for group in groups.groups:
			subset = groups.get_group(group)
			handle = plot_survfunc_turnbull(ax, subset[time_left], subset[time_right], transform_x, transform_y)
			handle.set_label('{} = {}'.format(by, group))
	else:
		# No grouping
		plot_survfunc_turnbull(ax, df[time_left], df[time_right], transform_x, transform_y)
	
	ax.set_xlabel('Analysis time')
	ax.set_ylabel('Survival probability')
	ax.set_xlim(left=0)
	ax.set_ylim(0, 1)
	ax.legend()
	
	return fig, ax

def plot_survfunc_turnbull(ax, time_left, time_right, transform_x=None, transform_y=None):
	import lifelines
	
	EPSILON = 1e-10
	
	# TODO: Support left == right => failure was exactly observed
	
	followup_left = time_left + EPSILON  # Add epsilon to make interval half-open
	followup_right = time_right
	
	# Estimate the survival function
	sf = lifelines.KaplanMeierFitter().fit_interval_censoring(followup_left, followup_right)
	
	# Draw straight lines
	xpoints = sf.survival_function_.index.to_numpy().repeat(2)[:-1]
	med = (sf.survival_function_['NPMLE_estimate_upper'] + sf.survival_function_['NPMLE_estimate_lower']) / 2
	ypoints = med.to_numpy().repeat(2)[1:]
	
	if transform_x:
		xpoints = transform_x(xpoints)
	if transform_y:
		ypoints = transform_y(ypoints)
	
	handle = ax.plot(xpoints, ypoints)[0]
	
	return handle

def survtime_to_numeric(df, time):
	"""
	Convert pandas timedelta dtype to float64, auto-detecting the best time unit to display
	
	:param df: Data to check for pandas timedelta dtype
	:type df: DataFrame
	:param time: Column to check for pandas timedelta dtype
	:type df: DataFrame
	
	:return: (*df*, *time_units*)
		
		* **df** (*DataFrame*) – Data with pandas timedelta dtypes converted, which is *not* copied
		* **time_units** (*str*) – Human-readable description of the time unit, or *None* if not converted
	"""
	
	if df[time].dtype == '<m8[ns]':
		df[time] = df[time].dt.total_seconds()
		
		# Auto-detect best time units
		if df[time].max() > 365.24*24*60*60:
			df[time] = df[time] / (365.24*24*60*60)
			time_units = 'years'
		elif df[time].max() > 7*24*60*60 / 12:
			df[time] = df[time] / (7*24*60*60)
			time_units = 'weeks'
		elif df[time].max() > 24*60*60:
			df[time] = df[time] / (24*60*60)
			time_units = 'days'
		elif df[time].max() > 60*60:
			df[time] = df[time] / (60*60)
			time_units = 'hours'
		elif df[time].max() > 60:
			df[time] = df[time] / 60
			time_units = 'minutes'
		else:
			time_units = 'seconds'
		
		return df, time_units
	else:
		return df, None

def logrank(df, time, status, by, nan_policy='warn'):
	"""
	Perform the log-rank test for equality of survival functions
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param time: Column in *df* for the time to event (numeric or timedelta)
	:type time: str
	:param status: Column in *df* for the status variable (True/False or 1/0)
	:type status: str
	:param by: Column in *df* to stratify by (categorical)
	:type by: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.ChiSquaredResult`
	"""
	
	# TODO: Example
	
	# Check for/clean NaNs
	df = check_nan(df[[time, status, by]], nan_policy)
	
	if df[time].dtype == '<m8[ns]':
		df[time] = df[time].dt.total_seconds()
	
	statistic, pvalue = sm.duration.survdiff(df[time], df[status], df[by])
	
	return ChiSquaredResult(statistic=statistic, dof=1, pvalue=pvalue)

# --------------------------------
# Interval-censored Cox regression

def cox_interval_censored(
	df, time_left, time_right, formula, *,
	bootstrap_samples=100,
	nan_policy='warn',
	bool_baselevels=False, exp=True,
):
	# TODO: Documentation
	
	df_ref = weakref.ref(df)
	
	# Check for/clean NaNs in input columns
	columns = [time_left, time_right] + cols_for_formula(formula, df)
	
	df = df[columns]
	df = check_nan(df, nan_policy)
	
	# FIXME: Ensure numeric type for dependent variable
	#df[dep], dep_categories = as_numeric(df[dep])
	if df[time_left].dtype != 'float64' or df[time_right].dtype != 'float64':
		raise NotImplementedError('Time dtypes must be float64')
	
	# Convert pandas nullable types for independent variables
	df = convert_pandas_nullable(df)
	
	# ---------
	# Fit model
	
	# lifelines.CoxPHFitter doesn't do confidence intervals so we use R
	
	import rpy2.robjects as ro
	import rpy2.robjects.packages
	import rpy2.robjects.pandas2ri
	
	# Convert bool to int otherwise rpy2 chokes
	df = df.replace({False: 0, True: 1})
	
	# Import icenReg
	ro.packages.importr('icenReg')
	
	with ro.conversion.localconverter(ro.default_converter + ro.pandas2ri.converter):
		with ro.local_context() as lc:
			# Convert DataFrame to R
			lc['df'] = df
			
			# Transfer other parameters to R
			lc['formula_'] = 'Surv({}, {}, type="interval2") ~ {}'.format(time_left, time_right, formula)
			lc['bootstrap_samples'] = bootstrap_samples
			
			# FIXME: Seed bootstrap RNG?
			
			# Fit the model
			ro.r('model <- ic_sp(as.formula(formula_), data=df, bs_samples=bootstrap_samples)')
			
			model = ro.r('model')
			# Hard to access attributes through rpy2
			term_parameters = ro.r('model$coef')
			term_names = ro.r('names(model$coef)')
			term_cis = ro.r('confint(model)')
			cov_matrix = ro.r('model$var')
			llf = ro.r('model$llk')[0]
			
			# TODO: Handle categorical terms?
			terms = {}
			for i in range(len(term_parameters)):
				# These values not directly exposed so we must calculate them
				se = np.sqrt(cov_matrix[i, i])
				pvalue = 2 * stats.norm(loc=0, scale=se).cdf(-np.abs(term_parameters[i]))
				
				term = SingleTerm(term_names[i], Estimate(term_parameters[i], term_cis[i][0], term_cis[i][1]), pvalue)
				terms[term_names[i]] = term
			
			result = RegressionResult(
				None, df_ref, '({}, {}]'.format(time_left, time_right), formula, nan_policy, None, None,
				model,
				'Interval-Censored Cox Regression', 'CoxIC', 'MLE',
				len(df), None, None, datetime.now(), 'Bootstrap',
				terms,
				llf, None,
				None, None, None,
				[],
				exp
			)
	
	return result
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00			`# scipy-yli: Helpful SciPy utilities and recipes`
			`# Copyright © 2022–2023 Lee Yingtong Li (RunasSudo)`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU Affero General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Affero General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Affero General Public License`
			`# along with this program. If not, see <https://www.gnu.org/licenses/>.`

Implement yli.cox_interval_censored 2023-03-05 02:11:12 +11:00			`import numpy as np`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00			`from scipy import stats`
			`import statsmodels.api as sm`

			`from .config import config`
Implement yli.logrank 2023-02-25 17:23:20 +11:00			`from .sig_tests import ChiSquaredResult`
Implement yli.cox_interval_censored 2023-03-05 02:11:12 +11:00			`from .regress import RegressionResult, SingleTerm`
			`from .utils import Estimate, check_nan, cols_for_formula, convert_pandas_nullable`

			`from datetime import datetime`
			`import weakref`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00
Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`def kaplanmeier(df, time, status, by=None, *, ci=True, transform_x=None, transform_y=None, nan_policy='warn'):`
Add documentation for survival analysis 2023-02-26 00:05:10 +11:00			`"""`
			`Generate a Kaplan–Meier plot`

			`Uses the Python matplotlib library.`

			`:param df: Data to generate plot for`
			`:type df: DataFrame`
			`:param time: Column in df for the time to event (numeric or timedelta)`
			`:type time: str`
			`:param status: Column in df for the status variable (True/False or 1/0)`
			`:type status: str`
			`:param by: Column in df to stratify by (categorical)`
			`:type by: str`
			`:param ci: Whether to plot confidence intervals around the survival function`
			`:type ci: bool`
Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`:param transform_x: Function to transform x axis by`
			`:type transform_x: callable`
			`:param transform_y: Function to transform y axis by`
			`:type transform_y: callable`
Add documentation for survival analysis 2023-02-26 00:05:10 +11:00			:param nan_policy: How to handle nan values (see :ref:`nan-handling`)
			`:type nan_policy: str`

			`:rtype: (Figure, Axes)`
			`"""`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00
			`import matplotlib.pyplot as plt`

			`# Check for/clean NaNs`
			`if by:`
			`df = check_nan(df[[time, status, by]], nan_policy)`
			`else:`
			`df = check_nan(df[[time, status]], nan_policy)`

Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`# Covert timedelta to numeric`
			`df, time_units = survtime_to_numeric(df, time)`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00
			`fig, ax = plt.subplots()`

			`if by is not None:`
			`# Group by independent variable`
			`groups = df.groupby(by)`

			`for group in groups.groups:`
			`subset = groups.get_group(group)`
Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`handle = plot_survfunc_kaplanmeier(ax, subset[time], subset[status], ci, transform_x, transform_y)`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00			`handle.set_label('{} = {}'.format(by, group))`
			`else:`
			`# No grouping`
Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`plot_survfunc_kaplanmeier(ax, df[time], df[status], ci, transform_x, transform_y)`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00
			`if time_units:`
			`ax.set_xlabel('{} ({})'.format(time, time_units))`
			`else:`
			`ax.set_xlabel(time)`
			`ax.set_ylabel('Survival probability ({:.0%} CI)'.format(1-config.alpha) if ci else 'Survival probability')`
Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`ax.set_xlim(left=0)`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00			`ax.set_ylim(0, 1)`
			`ax.legend()`

Add documentation for survival analysis 2023-02-26 00:05:10 +11:00			`return fig, ax`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00
Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`def plot_survfunc_kaplanmeier(ax, time, status, ci, transform_x=None, transform_y=None):`
Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00			`# Estimate the survival function`
			`sf = sm.SurvfuncRight(time, status)`

			`# Draw straight lines`
			`xpoints = sf.surv_times.repeat(2)[1:]`
			`ypoints = sf.surv_prob.repeat(2)[:-1]`
			`handle = ax.plot(xpoints, ypoints)[0]`

Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`if transform_x:`
			`xpoints = transform_x(xpoints)`
			`if transform_y:`
			`ypoints = transform_y(ypoints)`

Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00			`if ci:`
			`zstar = -stats.norm.ppf(config.alpha/2)`

			`# Get confidence intervals`
			`ci0 = sf.surv_prob - zstar * sf.surv_prob_se`
			`ci1 = sf.surv_prob + zstar * sf.surv_prob_se`

			`# Plot confidence intervals`
			`ypoints0 = ci0.repeat(2)[:-1]`
			`ypoints1 = ci1.repeat(2)[:-1]`

Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`if transform_y:`
			`ypoints0 = transform_y(ypoints0)`
			`ypoints1 = transform_y(ypoints1)`

Implement yli.kaplanmeier 2023-02-25 17:15:22 +11:00			`ax.fill_between(xpoints, ypoints0, ypoints1, alpha=0.3, label='_')`

			`return handle`
Implement yli.logrank 2023-02-25 17:23:20 +11:00
Implement yli.turnbull 2023-03-04 21:51:27 +11:00			`def turnbull(df, time_left, time_right, by=None, *, transform_x=None, transform_y=None, nan_policy='warn'):`
			`"""`
			`Generate a Turnbull estimator plot, which extends the Kaplan–Meier estimator to interval-censored observations`

			The intervals are assumed to be half-open intervals, (left, right]. right == np.inf implies the event was right-censored. Unlike :func:`yli.kaplanmeier`, times must be given as numeric dtypes and not as pandas timedelta.

			`For ease of interpretation, the survival function is drawn as a step function at the midpoint of the estimate on each interval.`

			`Uses the Python lifelines and matplotlib libraries.`

			`:param df: Data to generate plot for`
			`:type df: DataFrame`
			`:param time_left: Column in df for the time to event, left interval endpoint (numeric)`
			`:type time_left: str`
			`:param time_right: Column in df for the time to event, right interval endpoint (numeric)`
			`:type time_right: str`
			`:param by: Column in df to stratify by (categorical)`
			`:type by: str`
			`:param transform_x: Function to transform x axis by`
			`:type transform_x: callable`
			`:param transform_y: Function to transform y axis by`
			`:type transform_y: callable`
			:param nan_policy: How to handle nan values (see :ref:`nan-handling`)
			`:type nan_policy: str`

			`:rtype: (Figure, Axes)`
			`"""`

			`import matplotlib.pyplot as plt`

			`# Check for/clean NaNs`
			`if by:`
			`df = check_nan(df[[time_left, time_right, by]], nan_policy)`
			`else:`
			`df = check_nan(df[[time_left, time_right]], nan_policy)`

			`fig, ax = plt.subplots()`

			`if by is not None:`
			`# Group by independent variable`
			`groups = df.groupby(by)`

			`for group in groups.groups:`
			`subset = groups.get_group(group)`
			`handle = plot_survfunc_turnbull(ax, subset[time_left], subset[time_right], transform_x, transform_y)`
			`handle.set_label('{} = {}'.format(by, group))`
			`else:`
			`# No grouping`
			`plot_survfunc_turnbull(ax, df[time_left], df[time_right], transform_x, transform_y)`

			`ax.set_xlabel('Analysis time')`
			`ax.set_ylabel('Survival probability')`
			`ax.set_xlim(left=0)`
			`ax.set_ylim(0, 1)`
			`ax.legend()`

			`return fig, ax`

			`def plot_survfunc_turnbull(ax, time_left, time_right, transform_x=None, transform_y=None):`
			`import lifelines`

			`EPSILON = 1e-10`

			`# TODO: Support left == right => failure was exactly observed`

			`followup_left = time_left + EPSILON # Add epsilon to make interval half-open`
			`followup_right = time_right`

			`# Estimate the survival function`
			`sf = lifelines.KaplanMeierFitter().fit_interval_censoring(followup_left, followup_right)`

			`# Draw straight lines`
			`xpoints = sf.survival_function_.index.to_numpy().repeat(2)[:-1]`
			`med = (sf.survival_function_['NPMLE_estimate_upper'] + sf.survival_function_['NPMLE_estimate_lower']) / 2`
			`ypoints = med.to_numpy().repeat(2)[1:]`

			`if transform_x:`
			`xpoints = transform_x(xpoints)`
			`if transform_y:`
			`ypoints = transform_y(ypoints)`

			`handle = ax.plot(xpoints, ypoints)[0]`

			`return handle`

			`def survtime_to_numeric(df, time):`
			`"""`
			`Convert pandas timedelta dtype to float64, auto-detecting the best time unit to display`

			`:param df: Data to check for pandas timedelta dtype`
			`:type df: DataFrame`
			`:param time: Column to check for pandas timedelta dtype`
			`:type df: DataFrame`

			`:return: (df, time_units)`

			`* df (DataFrame) – Data with pandas timedelta dtypes converted, which is not copied`
			`* time_units (str) – Human-readable description of the time unit, or None if not converted`
			`"""`

			`if df[time].dtype == '<m8[ns]':`
			`df[time] = df[time].dt.total_seconds()`

			`# Auto-detect best time units`
			`if df[time].max() > 365.242460*60:`
			`df[time] = df[time] / (365.242460*60)`
			`time_units = 'years'`
			`elif df[time].max() > 72460*60 / 12:`
			`df[time] = df[time] / (72460*60)`
			`time_units = 'weeks'`
			`elif df[time].max() > 246060:`
			`df[time] = df[time] / (246060)`
			`time_units = 'days'`
			`elif df[time].max() > 60*60:`
			`df[time] = df[time] / (60*60)`
			`time_units = 'hours'`
			`elif df[time].max() > 60:`
			`df[time] = df[time] / 60`
			`time_units = 'minutes'`
			`else:`
			`time_units = 'seconds'`

			`return df, time_units`
			`else:`
			`return df, None`

Implement yli.logrank 2023-02-25 17:23:20 +11:00			`def logrank(df, time, status, by, nan_policy='warn'):`
Add documentation for survival analysis 2023-02-26 00:05:10 +11:00			`"""`
			`Perform the log-rank test for equality of survival functions`

			`:param df: Data to perform the test on`
			`:type df: DataFrame`
			`:param time: Column in df for the time to event (numeric or timedelta)`
			`:type time: str`
			`:param status: Column in df for the status variable (True/False or 1/0)`
			`:type status: str`
			`:param by: Column in df to stratify by (categorical)`
			`:type by: str`
			:param nan_policy: How to handle nan values (see :ref:`nan-handling`)
			`:type nan_policy: str`

			:rtype: :class:`yli.sig_tests.ChiSquaredResult`
			`"""`

			`# TODO: Example`
Implement yli.logrank 2023-02-25 17:23:20 +11:00
			`# Check for/clean NaNs`
			`df = check_nan(df[[time, status, by]], nan_policy)`

			`if df[time].dtype == '<m8[ns]':`
			`df[time] = df[time].dt.total_seconds()`

			`statistic, pvalue = sm.duration.survdiff(df[time], df[status], df[by])`

			`return ChiSquaredResult(statistic=statistic, dof=1, pvalue=pvalue)`
Implement yli.cox_interval_censored 2023-03-05 02:11:12 +11:00
			`# --------------------------------`
			`# Interval-censored Cox regression`

			`def cox_interval_censored(`
			`df, time_left, time_right, formula, *,`
			`bootstrap_samples=100,`
			`nan_policy='warn',`
			`bool_baselevels=False, exp=True,`
			`):`
			`# TODO: Documentation`

			`df_ref = weakref.ref(df)`

			`# Check for/clean NaNs in input columns`
			`columns = [time_left, time_right] + cols_for_formula(formula, df)`

			`df = df[columns]`
			`df = check_nan(df, nan_policy)`

			`# FIXME: Ensure numeric type for dependent variable`
			`#df[dep], dep_categories = as_numeric(df[dep])`
			`if df[time_left].dtype != 'float64' or df[time_right].dtype != 'float64':`
			`raise NotImplementedError('Time dtypes must be float64')`

			`# Convert pandas nullable types for independent variables`
			`df = convert_pandas_nullable(df)`

			`# ---------`
			`# Fit model`

			`# lifelines.CoxPHFitter doesn't do confidence intervals so we use R`

			`import rpy2.robjects as ro`
			`import rpy2.robjects.packages`
			`import rpy2.robjects.pandas2ri`

			`# Convert bool to int otherwise rpy2 chokes`
			`df = df.replace({False: 0, True: 1})`

			`# Import icenReg`
			`ro.packages.importr('icenReg')`

			`with ro.conversion.localconverter(ro.default_converter + ro.pandas2ri.converter):`
			`with ro.local_context() as lc:`
			`# Convert DataFrame to R`
			`lc['df'] = df`

			`# Transfer other parameters to R`
			`lc['formula_'] = 'Surv({}, {}, type="interval2") ~ {}'.format(time_left, time_right, formula)`
			`lc['bootstrap_samples'] = bootstrap_samples`

			`# FIXME: Seed bootstrap RNG?`

			`# Fit the model`
			`ro.r('model <- ic_sp(as.formula(formula_), data=df, bs_samples=bootstrap_samples)')`

			`model = ro.r('model')`
			`# Hard to access attributes through rpy2`
			`term_parameters = ro.r('model$coef')`
			`term_names = ro.r('names(model$coef)')`
			`term_cis = ro.r('confint(model)')`
			`cov_matrix = ro.r('model$var')`
			`llf = ro.r('model$llk')[0]`

			`# TODO: Handle categorical terms?`
			`terms = {}`
			`for i in range(len(term_parameters)):`
			`# These values not directly exposed so we must calculate them`
			`se = np.sqrt(cov_matrix[i, i])`
			`pvalue = 2 * stats.norm(loc=0, scale=se).cdf(-np.abs(term_parameters[i]))`

			`term = SingleTerm(term_names[i], Estimate(term_parameters[i], term_cis[i][0], term_cis[i][1]), pvalue)`
			`terms[term_names[i]] = term`

			`result = RegressionResult(`
			`None, df_ref, '({}, {}]'.format(time_left, time_right), formula, nan_policy, None, None,`
			`model,`
			`'Interval-Censored Cox Regression', 'CoxIC', 'MLE',`
			`len(df), None, None, datetime.now(), 'Bootstrap',`
			`terms,`
			`llf, None,`
			`None, None, None,`
			`[],`
			`exp`
			`)`

			`return result`