Add documentation for survival analysis

2023-02-26 00:05:10 +11:00 · 2023-02-26 00:05:10 +11:00 · d359820f42
commit d359820f42
parent 18727cd950
7 changed files with 64 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -68,6 +68,9 @@ Relevant statistical functions are all directly available from the top-level *yl
 	* *PenalisedLogit*: Model for Firth penalised logistic regression
 	* *regress*: Fit arbitrary regression models
 	* *vif*: Compute the variance inflation factor for independent variables in regression
+* Survival analysis:
+	* *kaplanmeier*: Kaplan–Meier plot
+	* *logrank*: Log-rank test
 * Input/output:
 	* *pickle_write_compressed*, *pickle_read_compressed*: Pickle a pandas DataFrame and compress using LZMA
 	* *pickle_write_encrypted*, *pickle_read_encrypted*: Pickle a pandas DataFrame, compress using LZMA, and encrypt
--- a/docs/index.rst
+++ b/docs/index.rst
@ -9,6 +9,7 @@ scipy-yli API reference
 	descriptives.rst
 	sig_tests.rst
 	regress.rst
+	survival.rst
 	io.rst
 	distributions.rst
 	bayes_factors.rst
--- a/docs/sig_tests.rst
+++ b/docs/sig_tests.rst
@ -27,6 +27,9 @@ Result classes
 .. autoclass:: yli.sig_tests.BrunnerMunzelResult
 	:members:

+.. autoclass:: yli.sig_tests.ChiSquaredResult
+	:members:
+
 .. autoclass:: yli.sig_tests.FTestResult
 	:members:

--- a/docs/survival.rst
+++ b/docs/survival.rst
@ -0,0 +1,9 @@
+Survival analysis
+=================
+
+Functions
+---------
+
+.. autofunction:: yli.kaplanmeier
+
+.. autofunction:: yli.logrank
--- a/yli/regress.py
+++ b/yli/regress.py
@ -723,7 +723,7 @@ def regress(
 	:type formula: str
 	:param exposure: Column in *df* for the exposure variable (numeric, some models only)
 	:type exposure: str
-	:param status: Column in *df* for the status variable (time-to-event models only)
+	:param status: Column in *df* for the status variable (True/False or 1/0, time-to-event models only)
 	:type status: str
 	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
 	:type nan_policy: str
--- a/yli/sig_tests.py
+++ b/yli/sig_tests.py
@ -1,5 +1,5 @@
 #   scipy-yli: Helpful SciPy utilities and recipes
-#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
+#   Copyright © 2022–2023  Lee Yingtong Li (RunasSudo)
 #
 #   This program is free software: you can redistribute it and/or modify
 #   it under the terms of the GNU Affero General Public License as published by
@ -488,7 +488,13 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
 # Pearson chi-squared test

 class ChiSquaredResult:
-	# TODO: Documentation
+	"""
+	Result of a generic test with *χ*:sup:`2`-distributed test statistic
+	
+	See :meth:`yli.logrank`.
+	
+	See also :class:`yli.regress.BrantResult`, :class:`yli.regress.LikelihoodRatioTestResult`, :class:`PearsonChiSquaredResult`.
+	"""
 	
 	def __init__(self, statistic, dof, pvalue):
 		#: *χ*:sup:`2` statistic (*float*)
--- a/yli/survival.py
+++ b/yli/survival.py
@ -22,7 +22,26 @@ from .sig_tests import ChiSquaredResult
 from .utils import check_nan

 def kaplanmeier(df, time, status, by=None, ci=True, nan_policy='warn'):
-	# TODO: Documentation
+	"""
+	Generate a Kaplan–Meier plot
+	
+	Uses the Python *matplotlib* library.
+	
+	:param df: Data to generate plot for
+	:type df: DataFrame
+	:param time: Column in *df* for the time to event (numeric or timedelta)
+	:type time: str
+	:param status: Column in *df* for the status variable (True/False or 1/0)
+	:type status: str
+	:param by: Column in *df* to stratify by (categorical)
+	:type by: str
+	:param ci: Whether to plot confidence intervals around the survival function
+	:type ci: bool
+	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
+	:type nan_policy: str
+	
+	:rtype: (Figure, Axes)
+	"""
 	
 	import matplotlib.pyplot as plt
 	
@ -78,7 +97,7 @@ def kaplanmeier(df, time, status, by=None, ci=True, nan_policy='warn'):
 	ax.set_ylim(0, 1)
 	ax.legend()
 	
-	return ax
+	return fig, ax

 def plot_survfunc(ax, time, status, ci):
 	# Estimate the survival function
@ -105,7 +124,24 @@ def plot_survfunc(ax, time, status, ci):
 	return handle

 def logrank(df, time, status, by, nan_policy='warn'):
-	# TODO: Documentation
+	"""
+	Perform the log-rank test for equality of survival functions
+	
+	:param df: Data to perform the test on
+	:type df: DataFrame
+	:param time: Column in *df* for the time to event (numeric or timedelta)
+	:type time: str
+	:param status: Column in *df* for the status variable (True/False or 1/0)
+	:type status: str
+	:param by: Column in *df* to stratify by (categorical)
+	:type by: str
+	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
+	:type nan_policy: str
+	
+	:rtype: :class:`yli.sig_tests.ChiSquaredResult`
+	"""
+	
+	# TODO: Example
 	
 	# Check for/clean NaNs
 	df = check_nan(df[[time, status, by]], nan_policy)