Show independent/dependent variables in t test and Mann-Whitney output

This commit is contained in:
RunasSudo 2022-11-09 22:25:17 +11:00
parent 239a0646d2
commit ce4df2eac1
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
3 changed files with 36 additions and 19 deletions

View File

@ -33,7 +33,8 @@ def test_mannwhitney_ol6_6():
assert result.pvalue == approx(0.00007, abs=0.00001) assert result.pvalue == approx(0.00007, abs=0.00001)
expected_summary = ''' After Before expected_summary = '''Sample After Before
Oxygen
Median (IQR) 10.75 (10.5510.95) 11.55 (11.2011.83) Median (IQR) 10.75 (10.5510.95) 11.55 (11.2011.83)
Median (range) 10.75 (11.0012.10) 11.55 (11.0012.10) Median (range) 10.75 (11.0012.10) 11.55 (11.0012.10)

View File

@ -39,8 +39,9 @@ def test_ttest_ind_ol6_1():
assert result.delta.ci_lower == approx(0.272, abs=0.01) assert result.delta.ci_lower == approx(0.272, abs=0.01)
assert result.delta.ci_upper == approx(0.808, abs=0.01) assert result.delta.ci_upper == approx(0.808, abs=0.01)
expected_summary = ''' Fresh Stored expected_summary = '''Type Fresh Stored
μ (SD) 10.37 (0.32) 9.83 (0.24) Potency
μ (SD) 10.37 (0.32) 9.83 (0.24)
t(18) = 4.24; p < 0.001* t(18) = 4.24; p < 0.001*
Δμ (95% CI) = 0.54 (0.270.81), Fresh > Stored''' Δμ (95% CI) = 0.54 (0.270.81), Fresh > Stored'''

View File

@ -35,7 +35,7 @@ class TTestResult:
See :func:`yli.ttest_ind`. See :func:`yli.ttest_ind`.
""" """
def __init__(self, statistic, dof, pvalue, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction): def __init__(self, *, statistic, dof, pvalue, dep, ind, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction):
#: *t* statistic (*float*) #: *t* statistic (*float*)
self.statistic = statistic self.statistic = statistic
#: Degrees of freedom of the *t* distribution (*int*) #: Degrees of freedom of the *t* distribution (*int*)
@ -43,6 +43,11 @@ class TTestResult:
#: *p* value for the *t* statistic (*float*) #: *p* value for the *t* statistic (*float*)
self.pvalue = pvalue self.pvalue = pvalue
#: Name of the dependent variable (*str*)
self.dep = dep
#: Name of the independent variable (*str*)
self.ind = ind
#: Name of the first group (*str*) #: Name of the first group (*str*)
self.group1 = group1 self.group1 = group1
#: Name of the second group (*str*) #: Name of the second group (*str*)
@ -64,17 +69,17 @@ class TTestResult:
def _comparison_table(self, html): def _comparison_table(self, html):
"""Return a table showing the means/SDs for each group""" """Return a table showing the means/SDs for each group"""
table_data = { table_data = [[
self.group1: '{:.2f} ({:.2f})'.format(self.mu1, self.sd1), '{:.2f} ({:.2f})'.format(self.mu1, self.sd1),
self.group2: '{:.2f} ({:.2f})'.format(self.mu2, self.sd2), '{:.2f} ({:.2f})'.format(self.mu2, self.sd2),
} ]]
if html: if html:
table = pd.DataFrame(table_data, index=['\ue000 (SD)']) # U+E000 is in Private Use Area, mark μ symbol table = pd.DataFrame(table_data, index=pd.Index(['\ue000 (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind)) # U+E000 is in Private Use Area, mark μ symbol
table_str = table._repr_html_() table_str = table._repr_html_()
return table_str.replace('\ue000', '<i>μ</i>') return table_str.replace('\ue000', '<i>μ</i>')
else: else:
table = pd.DataFrame(table_data, index=['μ (SD)']) table = pd.DataFrame(table_data, index=pd.Index(['μ (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind))
return str(table) return str(table)
def __repr__(self): def __repr__(self):
@ -120,8 +125,10 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
yli.ttest_ind(df, 'Potency', 'Type') yli.ttest_ind(df, 'Potency', 'Type')
.. code-block:: text .. code-block:: text
Fresh Stored
μ (SD) 10.37 (0.32) 9.83 (0.24) Type Fresh Stored
Potency
μ (SD) 10.37 (0.32) 9.83 (0.24)
t(18) = 4.24; p < 0.001* t(18) = 4.24; p < 0.001*
Δμ (95% CI) = 0.54 (0.270.81), Fresh > Stored Δμ (95% CI) = 0.54 (0.270.81), Fresh > Stored
@ -156,6 +163,7 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
return TTestResult( return TTestResult(
statistic=abs(statistic), dof=dof, pvalue=pvalue, statistic=abs(statistic), dof=dof, pvalue=pvalue,
dep=dep, ind=ind,
group1=group1, group2=group2, mu1=d1.mean, mu2=d2.mean, sd1=d1.std, sd2=d2.std, group1=group1, group2=group2, mu1=d1.mean, mu2=d2.mean, sd1=d1.std, sd2=d2.std,
delta=Estimate(delta, ci0, ci1), delta=Estimate(delta, ci0, ci1),
delta_direction='{} > {}'.format(group1, group2)) delta_direction='{} > {}'.format(group1, group2))
@ -254,12 +262,17 @@ class MannWhitneyResult:
See :func:`yli.mannwhitney`. See :func:`yli.mannwhitney`.
""" """
def __init__(self, statistic, pvalue, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None): def __init__(self, *, statistic, pvalue, dep, ind, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None):
#: Lesser of the two Mann–Whitney *U* statistics (*float*) #: Lesser of the two Mann–Whitney *U* statistics (*float*)
self.statistic = statistic self.statistic = statistic
#: *p* value for the *U* statistic (*float*) #: *p* value for the *U* statistic (*float*)
self.pvalue = pvalue self.pvalue = pvalue
#: Name of the dependent variable (*str*)
self.dep = dep
#: Name of the independent variable (*str*)
self.ind = ind
#: Name of the first group (*str*) #: Name of the first group (*str*)
self.group1 = group1 self.group1 = group1
#: Name of the second group (*str*) #: Name of the second group (*str*)
@ -288,11 +301,11 @@ class MannWhitneyResult:
def _comparison_table(self, html): def _comparison_table(self, html):
"""Return a table showing the medians/IQRs/ranges for each group""" """Return a table showing the medians/IQRs/ranges for each group"""
table_data = { table_data = [
self.group1: ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med1, self.range1.summary())], ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med2, self.iqr2.summary())],
self.group2: ['{:.2f} ({})'.format(self.med2, self.iqr2.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())], ['{:.2f} ({})'.format(self.med1, self.range1.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())],
} ]
table = pd.DataFrame(table_data, index=['Median (IQR)', 'Median (range)']) table = pd.DataFrame(table_data, index=pd.Index(['Median (IQR)', 'Median (range)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind))
if html: if html:
return table._repr_html_() return table._repr_html_()
@ -394,7 +407,8 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
.. code-block:: text .. code-block:: text
After Before Sample After Before
Oxygen
Median (IQR) 10.75 (10.5510.95) 11.55 (11.2011.83) Median (IQR) 10.75 (10.5510.95) 11.55 (11.2011.83)
Median (range) 10.75 (11.0012.10) 11.55 (11.0012.10) Median (range) 10.75 (11.0012.10) 11.55 (11.0012.10)
@ -437,6 +451,7 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
return MannWhitneyResult( return MannWhitneyResult(
statistic=min(u1, u2), pvalue=result.pvalue, statistic=min(u1, u2), pvalue=result.pvalue,
dep=dep, ind=ind,
group1=group1, group2=group2, group1=group1, group2=group2,
med1=data1.median(), med2=data2.median(), iqr1=Interval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=Interval(data2.quantile(0.25), data2.quantile(0.75)), range1=Interval(data1.min(), data1.max()), range2=Interval(data2.min(), data2.max()), med1=data1.median(), med2=data2.median(), iqr1=Interval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=Interval(data2.quantile(0.25), data2.quantile(0.75)), range1=Interval(data1.min(), data1.max()), range2=Interval(data2.min(), data2.max()),
rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2)) rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))