From 5633a191f1d0e9f87d07d36091c06a3e09220f44 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 3 Dec 2022 20:01:05 +1100 Subject: [PATCH] Update documentation --- docs/sig_tests.rst | 5 +++++ yli/sig_tests.py | 49 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/docs/sig_tests.rst b/docs/sig_tests.rst index 0d10675..3e4dc8a 100644 --- a/docs/sig_tests.rst +++ b/docs/sig_tests.rst @@ -14,6 +14,8 @@ Functions .. autofunction:: yli.pearsonr +.. autofunction:: yli.spearman + .. autofunction:: yli.ttest_ind Result classes @@ -38,5 +40,8 @@ Result classes .. autoclass:: yli.sig_tests.PearsonRResult :members: +.. autoclass:: yli.sig_tests.SpearmanResult + :members: + .. autoclass:: yli.sig_tests.TTestResult :members: diff --git a/yli/sig_tests.py b/yli/sig_tests.py index 1cafc77..65b835b 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -606,7 +606,7 @@ def chi2(df, dep, ind, *, nan_policy='warn'): The odds of *Stress* in the *Response* = *True* group are 1.33 times that in the *Response* = *False* group, with 95% confidence interval 1.11–1.60. - The risk of *Stress* in the *Response* = *True* group is 1.11 that in the *Response* = *False* group, with 95% confidence interval 1.03–1.18. + The risk of *Stress* in the *Response* = *True* group is 1.11 times that in the *Response* = *False* group, with 95% confidence interval 1.03–1.18. """ # Check for/clean NaNs @@ -649,13 +649,13 @@ def chi2(df, dep, ind, *, nan_policy='warn'): class PearsonRResult: """ - Result of Pearson correlation + Result of Pearson product-moment correlation See :func:`yli.pearsonr`. """ def __init__(self, statistic, pvalue): - #: Pearson *r* correlation statistic (*float*) + #: Pearson *r* correlation statistic (:class:`yli.utils.Estimate`) self.statistic = statistic #: *p* value for the *r* statistic (*float*) self.pvalue = pvalue @@ -679,7 +679,7 @@ class PearsonRResult: def pearsonr(df, dep, ind, *, nan_policy='warn'): """ - Compute the Pearson correlation coefficient (Pearson's *r*) + Compute the Pearson product-moment correlation coefficient (Pearson's *r*) :param df: Data to perform the test on :type df: DataFrame @@ -722,8 +722,16 @@ def pearsonr(df, dep, ind, *, nan_policy='warn'): # Spearman correlation class SpearmanResult: + """ + Result of Spearman rank correlation + + See :func:`yli.spearman`. + """ + def __init__(self, statistic, pvalue): + #: Spearman *ρ* correlation statistic (:class:`yli.utils.Estimate`) self.statistic = statistic + #: *p* value for the *ρ* statistic (*float*) self.pvalue = pvalue def __repr__(self): @@ -744,6 +752,39 @@ class SpearmanResult: return 'ρ ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION)) def spearman(df, dep, ind, *, nan_policy='warn'): + """ + Compute the Spearman rank correlation coefficient (Spearman's *ρ*) + + The confidence interval for *ρ* is computed analogously to SciPy's *pearsonr*, using the Fisher transformation and normal approximation, without adjustment to variance. + + :param df: Data to perform the test on + :type df: DataFrame + :param dep: Column in *df* for the dependent variable (numerical) + :type dep: str + :param ind: Column in *df* for the independent variable (numerical) + :type ind: str + :param nan_policy: How to handle *nan* values (see :ref:`nan-handling`) + :type nan_policy: str + + :rtype: :class:`yli.sig_tests.SpearmanResult` + + **Example:** + + .. code-block:: + + df = pd.DataFrame({ + 'Profit': [2.5, 6.2, 3.1, ...], + 'Quality': [50, 57, 61, ...] + }) + yli.spearman(df, 'Profit', 'Quality') + + .. code-block:: text + + ρ (95% CI) = 0.87 (0.60–0.96); p < 0.001* + + The output states that the value of the Spearman correlation coefficient is 0.87, with 95% confidence interval 0.60–0.96, and the test is significant with *p* value < 0.001. + """ + # Check for/clean NaNs df = check_nan(df[[ind, dep]], nan_policy)