diff --git a/README.md b/README.md index 970256e..23162e2 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,9 @@ Relevant statistical functions are all directly available from the top-level *yl * *PenalisedLogit*: Model for Firth penalised logistic regression * *regress*: Fit arbitrary regression models * *vif*: Compute the variance inflation factor for independent variables in regression +* Survival analysis: + * *kaplanmeier*: Kaplan–Meier plot + * *logrank*: Log-rank test * Input/output: * *pickle_write_compressed*, *pickle_read_compressed*: Pickle a pandas DataFrame and compress using LZMA * *pickle_write_encrypted*, *pickle_read_encrypted*: Pickle a pandas DataFrame, compress using LZMA, and encrypt diff --git a/docs/index.rst b/docs/index.rst index 9dd97b7..d918ada 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,6 +9,7 @@ scipy-yli API reference descriptives.rst sig_tests.rst regress.rst + survival.rst io.rst distributions.rst bayes_factors.rst diff --git a/docs/sig_tests.rst b/docs/sig_tests.rst index 3e4dc8a..86b8865 100644 --- a/docs/sig_tests.rst +++ b/docs/sig_tests.rst @@ -27,6 +27,9 @@ Result classes .. autoclass:: yli.sig_tests.BrunnerMunzelResult :members: +.. autoclass:: yli.sig_tests.ChiSquaredResult + :members: + .. autoclass:: yli.sig_tests.FTestResult :members: diff --git a/docs/survival.rst b/docs/survival.rst new file mode 100644 index 0000000..219fe6b --- /dev/null +++ b/docs/survival.rst @@ -0,0 +1,9 @@ +Survival analysis +================= + +Functions +--------- + +.. autofunction:: yli.kaplanmeier + +.. autofunction:: yli.logrank diff --git a/yli/regress.py b/yli/regress.py index 0930922..89d2100 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -723,7 +723,7 @@ def regress( :type formula: str :param exposure: Column in *df* for the exposure variable (numeric, some models only) :type exposure: str - :param status: Column in *df* for the status variable (time-to-event models only) + :param status: Column in *df* for the status variable (True/False or 1/0, time-to-event models only) :type status: str :param nan_policy: How to handle *nan* values (see :ref:`nan-handling`) :type nan_policy: str diff --git a/yli/sig_tests.py b/yli/sig_tests.py index 59e7b6f..76950fd 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -1,5 +1,5 @@ # scipy-yli: Helpful SciPy utilities and recipes -# Copyright © 2022 Lee Yingtong Li (RunasSudo) +# Copyright © 2022–2023 Lee Yingtong Li (RunasSudo) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -488,7 +488,13 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont # Pearson chi-squared test class ChiSquaredResult: - # TODO: Documentation + """ + Result of a generic test with *χ*:sup:`2`-distributed test statistic + + See :meth:`yli.logrank`. + + See also :class:`yli.regress.BrantResult`, :class:`yli.regress.LikelihoodRatioTestResult`, :class:`PearsonChiSquaredResult`. + """ def __init__(self, statistic, dof, pvalue): #: *χ*:sup:`2` statistic (*float*) diff --git a/yli/survival.py b/yli/survival.py index 7c02830..0b70e37 100644 --- a/yli/survival.py +++ b/yli/survival.py @@ -22,7 +22,26 @@ from .sig_tests import ChiSquaredResult from .utils import check_nan def kaplanmeier(df, time, status, by=None, ci=True, nan_policy='warn'): - # TODO: Documentation + """ + Generate a Kaplan–Meier plot + + Uses the Python *matplotlib* library. + + :param df: Data to generate plot for + :type df: DataFrame + :param time: Column in *df* for the time to event (numeric or timedelta) + :type time: str + :param status: Column in *df* for the status variable (True/False or 1/0) + :type status: str + :param by: Column in *df* to stratify by (categorical) + :type by: str + :param ci: Whether to plot confidence intervals around the survival function + :type ci: bool + :param nan_policy: How to handle *nan* values (see :ref:`nan-handling`) + :type nan_policy: str + + :rtype: (Figure, Axes) + """ import matplotlib.pyplot as plt @@ -78,7 +97,7 @@ def kaplanmeier(df, time, status, by=None, ci=True, nan_policy='warn'): ax.set_ylim(0, 1) ax.legend() - return ax + return fig, ax def plot_survfunc(ax, time, status, ci): # Estimate the survival function @@ -105,7 +124,24 @@ def plot_survfunc(ax, time, status, ci): return handle def logrank(df, time, status, by, nan_policy='warn'): - # TODO: Documentation + """ + Perform the log-rank test for equality of survival functions + + :param df: Data to perform the test on + :type df: DataFrame + :param time: Column in *df* for the time to event (numeric or timedelta) + :type time: str + :param status: Column in *df* for the status variable (True/False or 1/0) + :type status: str + :param by: Column in *df* to stratify by (categorical) + :type by: str + :param nan_policy: How to handle *nan* values (see :ref:`nan-handling`) + :type nan_policy: str + + :rtype: :class:`yli.sig_tests.ChiSquaredResult` + """ + + # TODO: Example # Check for/clean NaNs df = check_nan(df[[time, status, by]], nan_policy)