From 0f63b7eb404a957d561b3d57fe912961dc42c851 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Wed, 9 Nov 2022 22:08:27 +1100 Subject: [PATCH] Show descriptives in Mann-Whitney test output --- tests/test_mannwhitney.py | 6 ++++- yli/sig_tests.py | 50 +++++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/tests/test_mannwhitney.py b/tests/test_mannwhitney.py index 31a75fd..ad695cc 100644 --- a/tests/test_mannwhitney.py +++ b/tests/test_mannwhitney.py @@ -33,7 +33,11 @@ def test_mannwhitney_ol6_6(): assert result.pvalue == approx(0.00007, abs=0.00001) - expected_summary = '''U = 6.0; p < 0.001* + expected_summary = ''' After Before +Median (IQR) 10.75 (10.55–10.95) 11.55 (11.20–11.83) +Median (range) 10.75 (11.00–12.10) 11.55 (11.00–12.10) + +U = 6.0; p < 0.001* r = 0.92, Before > After''' assert result.summary() == expected_summary diff --git a/yli/sig_tests.py b/yli/sig_tests.py index f559c7a..c24982e 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -23,7 +23,7 @@ import functools import warnings from .config import config -from .utils import Estimate, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p +from .utils import ConfidenceInterval, Estimate, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p # ---------------- # Student's t test @@ -42,6 +42,7 @@ class TTestResult: self.dof = dof #: *p* value for the *t* statistic (*float*) self.pvalue = pvalue + #: Name of the first group (*str*) self.group1 = group1 #: Name of the second group (*str*) @@ -54,6 +55,7 @@ class TTestResult: self.sd1 = sd1 #: Standard deviation of the second group (*float*) self.sd2 = sd2 + #: Absolute value of the mean difference (:class:`yli.utils.Estimate`) self.delta = delta #: Description of the direction of the effect (*str*) @@ -252,25 +254,58 @@ class MannWhitneyResult: See :func:`yli.mannwhitney`. """ - def __init__(self, statistic, pvalue, rank_biserial, direction, brunnermunzel=None): + def __init__(self, statistic, pvalue, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None): #: Lesser of the two Mann–Whitney *U* statistics (*float*) self.statistic = statistic #: *p* value for the *U* statistic (*float*) self.pvalue = pvalue + + #: Name of the first group (*str*) + self.group1 = group1 + #: Name of the second group (*str*) + self.group2 = group2 + #: Median of the first group (*float*) + self.med1 = med1 + #: Median of the second group (*float*) + self.med2 = med2 + #: Interquartile range of the first group (:class:`yli.utils.ConfidenceInterval`) + self.iqr1 = iqr1 + #: Interquartile range of the second group (:class:`yli.utils.ConfidenceInterval`) + self.iqr2 = iqr2 + #: Range of the first group (:class:`yli.utils.ConfidenceInterval`) + self.range1 = range2 + #: Range of the second group (:class:`yli.utils.ConfidenceInterval`) + self.range2 = range2 + #: Absolute value of the rank-biserial correlation (*float*) self.rank_biserial = rank_biserial #: Description of the direction of the effect (*str*) self.direction = direction + #: :class:`BrunnerMunzelResult` on the same data, or *None* if N/A self.brunnermunzel = brunnermunzel + def _comparison_table(self, html): + """Return a table showing the medians/IQRs/ranges for each group""" + + table_data = { + self.group1: ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med1, self.range1.summary())], + self.group2: ['{:.2f} ({})'.format(self.med2, self.iqr2.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())], + } + table = pd.DataFrame(table_data, index=['Median (IQR)', 'Median (range)']) + + if html: + return table._repr_html_() + else: + return str(table) + def __repr__(self): if config.repr_is_summary: return self.summary() return super().__repr__() def _repr_html_(self): - line1 = 'U = {:.1f}; p {}
r = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.rank_biserial, self.direction) + line1 = '{}
U = {:.1f}; p {}
r = {:.2f}, {}'.format(self._comparison_table(True), self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.rank_biserial, self.direction) if self.brunnermunzel: return line1 + '
' + self.brunnermunzel._repr_html_() else: @@ -283,7 +318,7 @@ class MannWhitneyResult: :rtype: str """ - line1 = 'U = {:.1f}; p {}\nr = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.rank_biserial, self.direction) + line1 = '{}\n\nU = {:.1f}; p {}\nr = {:.2f}, {}'.format(self._comparison_table(False), self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.rank_biserial, self.direction) if self.brunnermunzel: return line1 + '\n' + self.brunnermunzel.summary() else: @@ -359,6 +394,10 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont .. code-block:: text + After Before + Median (IQR) 10.75 (10.55–10.95) 11.55 (11.20–11.83) + Median (range) 10.75 (11.00–12.10) 11.55 (11.00–12.10) + U = 6.0; p < 0.001* r = 0.92, Before > After @@ -398,7 +437,8 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont return MannWhitneyResult( statistic=min(u1, u2), pvalue=result.pvalue, - #med1=data1.median(), med2=data2.median(), + group1=group1, group2=group2, + med1=data1.median(), med2=data2.median(), iqr1=ConfidenceInterval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=ConfidenceInterval(data2.quantile(0.25), data2.quantile(0.75)), range1=ConfidenceInterval(data1.min(), data1.max()), range2=ConfidenceInterval(data2.min(), data2.max()), rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2)) # ------------------------