From ce4df2eac14208c428e3ee462647cb6ca74dfc07 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Wed, 9 Nov 2022 22:25:17 +1100 Subject: [PATCH] Show independent/dependent variables in t test and Mann-Whitney output --- tests/test_mannwhitney.py | 3 ++- tests/test_ttest.py | 5 +++-- yli/sig_tests.py | 47 ++++++++++++++++++++++++++------------- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/tests/test_mannwhitney.py b/tests/test_mannwhitney.py index ad695cc..4c87e66 100644 --- a/tests/test_mannwhitney.py +++ b/tests/test_mannwhitney.py @@ -33,7 +33,8 @@ def test_mannwhitney_ol6_6(): assert result.pvalue == approx(0.00007, abs=0.00001) - expected_summary = ''' After Before + expected_summary = '''Sample After Before +Oxygen Median (IQR) 10.75 (10.55–10.95) 11.55 (11.20–11.83) Median (range) 10.75 (11.00–12.10) 11.55 (11.00–12.10) diff --git a/tests/test_ttest.py b/tests/test_ttest.py index 46a211f..81f0f59 100644 --- a/tests/test_ttest.py +++ b/tests/test_ttest.py @@ -39,8 +39,9 @@ def test_ttest_ind_ol6_1(): assert result.delta.ci_lower == approx(0.272, abs=0.01) assert result.delta.ci_upper == approx(0.808, abs=0.01) - expected_summary = ''' Fresh Stored -μ (SD) 10.37 (0.32) 9.83 (0.24) + expected_summary = '''Type Fresh Stored +Potency +μ (SD) 10.37 (0.32) 9.83 (0.24) t(18) = 4.24; p < 0.001* Δμ (95% CI) = 0.54 (0.27–0.81), Fresh > Stored''' diff --git a/yli/sig_tests.py b/yli/sig_tests.py index 9fa6489..1644b9c 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -35,7 +35,7 @@ class TTestResult: See :func:`yli.ttest_ind`. """ - def __init__(self, statistic, dof, pvalue, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction): + def __init__(self, *, statistic, dof, pvalue, dep, ind, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction): #: *t* statistic (*float*) self.statistic = statistic #: Degrees of freedom of the *t* distribution (*int*) @@ -43,6 +43,11 @@ class TTestResult: #: *p* value for the *t* statistic (*float*) self.pvalue = pvalue + #: Name of the dependent variable (*str*) + self.dep = dep + #: Name of the independent variable (*str*) + self.ind = ind + #: Name of the first group (*str*) self.group1 = group1 #: Name of the second group (*str*) @@ -64,17 +69,17 @@ class TTestResult: def _comparison_table(self, html): """Return a table showing the means/SDs for each group""" - table_data = { - self.group1: '{:.2f} ({:.2f})'.format(self.mu1, self.sd1), - self.group2: '{:.2f} ({:.2f})'.format(self.mu2, self.sd2), - } + table_data = [[ + '{:.2f} ({:.2f})'.format(self.mu1, self.sd1), + '{:.2f} ({:.2f})'.format(self.mu2, self.sd2), + ]] if html: - table = pd.DataFrame(table_data, index=['\ue000 (SD)']) # U+E000 is in Private Use Area, mark μ symbol + table = pd.DataFrame(table_data, index=pd.Index(['\ue000 (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind)) # U+E000 is in Private Use Area, mark μ symbol table_str = table._repr_html_() return table_str.replace('\ue000', 'μ') else: - table = pd.DataFrame(table_data, index=['μ (SD)']) + table = pd.DataFrame(table_data, index=pd.Index(['μ (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind)) return str(table) def __repr__(self): @@ -120,8 +125,10 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'): yli.ttest_ind(df, 'Potency', 'Type') .. code-block:: text - Fresh Stored - μ (SD) 10.37 (0.32) 9.83 (0.24) + + Type Fresh Stored + Potency + μ (SD) 10.37 (0.32) 9.83 (0.24) t(18) = 4.24; p < 0.001* Δμ (95% CI) = 0.54 (0.27–0.81), Fresh > Stored @@ -156,6 +163,7 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'): return TTestResult( statistic=abs(statistic), dof=dof, pvalue=pvalue, + dep=dep, ind=ind, group1=group1, group2=group2, mu1=d1.mean, mu2=d2.mean, sd1=d1.std, sd2=d2.std, delta=Estimate(delta, ci0, ci1), delta_direction='{} > {}'.format(group1, group2)) @@ -254,12 +262,17 @@ class MannWhitneyResult: See :func:`yli.mannwhitney`. """ - def __init__(self, statistic, pvalue, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None): + def __init__(self, *, statistic, pvalue, dep, ind, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None): #: Lesser of the two Mann–Whitney *U* statistics (*float*) self.statistic = statistic #: *p* value for the *U* statistic (*float*) self.pvalue = pvalue + #: Name of the dependent variable (*str*) + self.dep = dep + #: Name of the independent variable (*str*) + self.ind = ind + #: Name of the first group (*str*) self.group1 = group1 #: Name of the second group (*str*) @@ -288,11 +301,11 @@ class MannWhitneyResult: def _comparison_table(self, html): """Return a table showing the medians/IQRs/ranges for each group""" - table_data = { - self.group1: ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med1, self.range1.summary())], - self.group2: ['{:.2f} ({})'.format(self.med2, self.iqr2.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())], - } - table = pd.DataFrame(table_data, index=['Median (IQR)', 'Median (range)']) + table_data = [ + ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med2, self.iqr2.summary())], + ['{:.2f} ({})'.format(self.med1, self.range1.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())], + ] + table = pd.DataFrame(table_data, index=pd.Index(['Median (IQR)', 'Median (range)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind)) if html: return table._repr_html_() @@ -394,7 +407,8 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont .. code-block:: text - After Before + Sample After Before + Oxygen Median (IQR) 10.75 (10.55–10.95) 11.55 (11.20–11.83) Median (range) 10.75 (11.00–12.10) 11.55 (11.00–12.10) @@ -437,6 +451,7 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont return MannWhitneyResult( statistic=min(u1, u2), pvalue=result.pvalue, + dep=dep, ind=ind, group1=group1, group2=group2, med1=data1.median(), med2=data2.median(), iqr1=Interval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=Interval(data2.quantile(0.25), data2.quantile(0.75)), range1=Interval(data1.min(), data1.max()), range2=Interval(data2.min(), data2.max()), rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))