Add documentation for survival analysis

This commit is contained in:
RunasSudo 2023-02-26 00:05:10 +11:00
parent 18727cd950
commit d359820f42
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
7 changed files with 64 additions and 6 deletions

View File

@ -68,6 +68,9 @@ Relevant statistical functions are all directly available from the top-level *yl
* *PenalisedLogit*: Model for Firth penalised logistic regression
* *regress*: Fit arbitrary regression models
* *vif*: Compute the variance inflation factor for independent variables in regression
* Survival analysis:
* *kaplanmeier*: Kaplan–Meier plot
* *logrank*: Log-rank test
* Input/output:
* *pickle_write_compressed*, *pickle_read_compressed*: Pickle a pandas DataFrame and compress using LZMA
* *pickle_write_encrypted*, *pickle_read_encrypted*: Pickle a pandas DataFrame, compress using LZMA, and encrypt

View File

@ -9,6 +9,7 @@ scipy-yli API reference
descriptives.rst
sig_tests.rst
regress.rst
survival.rst
io.rst
distributions.rst
bayes_factors.rst

View File

@ -27,6 +27,9 @@ Result classes
.. autoclass:: yli.sig_tests.BrunnerMunzelResult
:members:
.. autoclass:: yli.sig_tests.ChiSquaredResult
:members:
.. autoclass:: yli.sig_tests.FTestResult
:members:

9
docs/survival.rst Normal file
View File

@ -0,0 +1,9 @@
Survival analysis
=================
Functions
---------
.. autofunction:: yli.kaplanmeier
.. autofunction:: yli.logrank

View File

@ -723,7 +723,7 @@ def regress(
:type formula: str
:param exposure: Column in *df* for the exposure variable (numeric, some models only)
:type exposure: str
:param status: Column in *df* for the status variable (time-to-event models only)
:param status: Column in *df* for the status variable (True/False or 1/0, time-to-event models only)
:type status: str
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
:type nan_policy: str

View File

@ -1,5 +1,5 @@
# scipy-yli: Helpful SciPy utilities and recipes
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
# Copyright © 2022–2023 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
@ -488,7 +488,13 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
# Pearson chi-squared test
class ChiSquaredResult:
# TODO: Documentation
"""
Result of a generic test with *χ*:sup:`2`-distributed test statistic
See :meth:`yli.logrank`.
See also :class:`yli.regress.BrantResult`, :class:`yli.regress.LikelihoodRatioTestResult`, :class:`PearsonChiSquaredResult`.
"""
def __init__(self, statistic, dof, pvalue):
#: *χ*:sup:`2` statistic (*float*)

View File

@ -22,7 +22,26 @@ from .sig_tests import ChiSquaredResult
from .utils import check_nan
def kaplanmeier(df, time, status, by=None, ci=True, nan_policy='warn'):
# TODO: Documentation
"""
Generate a KaplanMeier plot
Uses the Python *matplotlib* library.
:param df: Data to generate plot for
:type df: DataFrame
:param time: Column in *df* for the time to event (numeric or timedelta)
:type time: str
:param status: Column in *df* for the status variable (True/False or 1/0)
:type status: str
:param by: Column in *df* to stratify by (categorical)
:type by: str
:param ci: Whether to plot confidence intervals around the survival function
:type ci: bool
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
:type nan_policy: str
:rtype: (Figure, Axes)
"""
import matplotlib.pyplot as plt
@ -78,7 +97,7 @@ def kaplanmeier(df, time, status, by=None, ci=True, nan_policy='warn'):
ax.set_ylim(0, 1)
ax.legend()
return ax
return fig, ax
def plot_survfunc(ax, time, status, ci):
# Estimate the survival function
@ -105,7 +124,24 @@ def plot_survfunc(ax, time, status, ci):
return handle
def logrank(df, time, status, by, nan_policy='warn'):
# TODO: Documentation
"""
Perform the log-rank test for equality of survival functions
:param df: Data to perform the test on
:type df: DataFrame
:param time: Column in *df* for the time to event (numeric or timedelta)
:type time: str
:param status: Column in *df* for the status variable (True/False or 1/0)
:type status: str
:param by: Column in *df* to stratify by (categorical)
:type by: str
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
:type nan_policy: str
:rtype: :class:`yli.sig_tests.ChiSquaredResult`
"""
# TODO: Example
# Check for/clean NaNs
df = check_nan(df[[time, status, by]], nan_policy)