Show independent/dependent variables in t test and Mann-Whitney output

This commit is contained in:
RunasSudo 2022-11-09 22:25:17 +11:00
parent 239a0646d2
commit ce4df2eac1
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
3 changed files with 36 additions and 19 deletions

View File

@ -33,7 +33,8 @@ def test_mannwhitney_ol6_6():
assert result.pvalue == approx(0.00007, abs=0.00001)
expected_summary = ''' After Before
expected_summary = '''Sample After Before
Oxygen
Median (IQR) 10.75 (10.5510.95) 11.55 (11.2011.83)
Median (range) 10.75 (11.0012.10) 11.55 (11.0012.10)

View File

@ -39,8 +39,9 @@ def test_ttest_ind_ol6_1():
assert result.delta.ci_lower == approx(0.272, abs=0.01)
assert result.delta.ci_upper == approx(0.808, abs=0.01)
expected_summary = ''' Fresh Stored
μ (SD) 10.37 (0.32) 9.83 (0.24)
expected_summary = '''Type Fresh Stored
Potency
μ (SD) 10.37 (0.32) 9.83 (0.24)
t(18) = 4.24; p < 0.001*
Δμ (95% CI) = 0.54 (0.270.81), Fresh > Stored'''

View File

@ -35,7 +35,7 @@ class TTestResult:
See :func:`yli.ttest_ind`.
"""
def __init__(self, statistic, dof, pvalue, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction):
def __init__(self, *, statistic, dof, pvalue, dep, ind, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction):
#: *t* statistic (*float*)
self.statistic = statistic
#: Degrees of freedom of the *t* distribution (*int*)
@ -43,6 +43,11 @@ class TTestResult:
#: *p* value for the *t* statistic (*float*)
self.pvalue = pvalue
#: Name of the dependent variable (*str*)
self.dep = dep
#: Name of the independent variable (*str*)
self.ind = ind
#: Name of the first group (*str*)
self.group1 = group1
#: Name of the second group (*str*)
@ -64,17 +69,17 @@ class TTestResult:
def _comparison_table(self, html):
"""Return a table showing the means/SDs for each group"""
table_data = {
self.group1: '{:.2f} ({:.2f})'.format(self.mu1, self.sd1),
self.group2: '{:.2f} ({:.2f})'.format(self.mu2, self.sd2),
}
table_data = [[
'{:.2f} ({:.2f})'.format(self.mu1, self.sd1),
'{:.2f} ({:.2f})'.format(self.mu2, self.sd2),
]]
if html:
table = pd.DataFrame(table_data, index=['\ue000 (SD)']) # U+E000 is in Private Use Area, mark μ symbol
table = pd.DataFrame(table_data, index=pd.Index(['\ue000 (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind)) # U+E000 is in Private Use Area, mark μ symbol
table_str = table._repr_html_()
return table_str.replace('\ue000', '<i>μ</i>')
else:
table = pd.DataFrame(table_data, index=['μ (SD)'])
table = pd.DataFrame(table_data, index=pd.Index(['μ (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind))
return str(table)
def __repr__(self):
@ -120,8 +125,10 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
yli.ttest_ind(df, 'Potency', 'Type')
.. code-block:: text
Fresh Stored
μ (SD) 10.37 (0.32) 9.83 (0.24)
Type Fresh Stored
Potency
μ (SD) 10.37 (0.32) 9.83 (0.24)
t(18) = 4.24; p < 0.001*
Δμ (95% CI) = 0.54 (0.270.81), Fresh > Stored
@ -156,6 +163,7 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
return TTestResult(
statistic=abs(statistic), dof=dof, pvalue=pvalue,
dep=dep, ind=ind,
group1=group1, group2=group2, mu1=d1.mean, mu2=d2.mean, sd1=d1.std, sd2=d2.std,
delta=Estimate(delta, ci0, ci1),
delta_direction='{} > {}'.format(group1, group2))
@ -254,12 +262,17 @@ class MannWhitneyResult:
See :func:`yli.mannwhitney`.
"""
def __init__(self, statistic, pvalue, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None):
def __init__(self, *, statistic, pvalue, dep, ind, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None):
#: Lesser of the two Mann–Whitney *U* statistics (*float*)
self.statistic = statistic
#: *p* value for the *U* statistic (*float*)
self.pvalue = pvalue
#: Name of the dependent variable (*str*)
self.dep = dep
#: Name of the independent variable (*str*)
self.ind = ind
#: Name of the first group (*str*)
self.group1 = group1
#: Name of the second group (*str*)
@ -288,11 +301,11 @@ class MannWhitneyResult:
def _comparison_table(self, html):
"""Return a table showing the medians/IQRs/ranges for each group"""
table_data = {
self.group1: ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med1, self.range1.summary())],
self.group2: ['{:.2f} ({})'.format(self.med2, self.iqr2.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())],
}
table = pd.DataFrame(table_data, index=['Median (IQR)', 'Median (range)'])
table_data = [
['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med2, self.iqr2.summary())],
['{:.2f} ({})'.format(self.med1, self.range1.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())],
]
table = pd.DataFrame(table_data, index=pd.Index(['Median (IQR)', 'Median (range)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind))
if html:
return table._repr_html_()
@ -394,7 +407,8 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
.. code-block:: text
After Before
Sample After Before
Oxygen
Median (IQR) 10.75 (10.5510.95) 11.55 (11.2011.83)
Median (range) 10.75 (11.0012.10) 11.55 (11.0012.10)
@ -437,6 +451,7 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
return MannWhitneyResult(
statistic=min(u1, u2), pvalue=result.pvalue,
dep=dep, ind=ind,
group1=group1, group2=group2,
med1=data1.median(), med2=data2.median(), iqr1=Interval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=Interval(data2.quantile(0.25), data2.quantile(0.75)), range1=Interval(data1.min(), data1.max()), range2=Interval(data2.min(), data2.max()),
rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))