Show independent/dependent variables in t test and Mann-Whitney output

2022-11-09 22:25:17 +11:00 · 2022-11-09 22:25:17 +11:00 · ce4df2eac1
parent 239a0646d2
commit ce4df2eac1
3 changed files with 36 additions and 19 deletions
--- a/tests/test_mannwhitney.py
+++ b/tests/test_mannwhitney.py
@ -33,7 +33,8 @@ def test_mannwhitney_ol6_6():
 	assert result.pvalue == approx(0.00007, abs=0.00001)
-	expected_summary = '''                              After               Before
+	expected_summary = '''Sample                        After               Before
 Oxygen                                                  
 Median (IQR)    10.75 (10.55–10.95)  11.55 (11.20–11.83)
 Median (range)  10.75 (11.00–12.10)  11.55 (11.00–12.10)
--- a/tests/test_ttest.py
+++ b/tests/test_ttest.py
@ -39,8 +39,9 @@ def test_ttest_ind_ol6_1():
 	assert result.delta.ci_lower == approx(0.272, abs=0.01)
 	assert result.delta.ci_upper == approx(0.808, abs=0.01)
-	expected_summary = '''               Fresh       Stored
+	expected_summary = '''Type            Fresh       Stored
-μ (SD)  10.37 (0.32)  9.83 (0.24)
+Potency                           
 μ (SD)   10.37 (0.32)  9.83 (0.24)
 t(18) = 4.24; p < 0.001*
 Δμ (95% CI) = 0.54 (0.27–0.81), Fresh > Stored'''
--- a/yli/sig_tests.py
+++ b/yli/sig_tests.py
@ -35,7 +35,7 @@ class TTestResult:
 	See :func:`yli.ttest_ind`.
 	"""
-	def __init__(self, statistic, dof, pvalue, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction):
+	def __init__(self, *, statistic, dof, pvalue, dep, ind, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction):
 		#: *t* statistic (*float*)
 		self.statistic = statistic
 		#: Degrees of freedom of the *t* distribution (*int*)
@ -43,6 +43,11 @@ class TTestResult:
 		#: *p* value for the *t* statistic (*float*)
 		self.pvalue = pvalue
 		#: Name of the dependent variable (*str*)
 		self.dep = dep
 		#: Name of the independent variable (*str*)
 		self.ind = ind
 		#: Name of the first group (*str*)
 		self.group1 = group1
 		#: Name of the second group (*str*)
@ -64,17 +69,17 @@ class TTestResult:
 	def _comparison_table(self, html):
 		"""Return a table showing the means/SDs for each group"""
-		table_data = {
+		table_data = [[
-			self.group1: '{:.2f} ({:.2f})'.format(self.mu1, self.sd1),
+			'{:.2f} ({:.2f})'.format(self.mu1, self.sd1),
-			self.group2: '{:.2f} ({:.2f})'.format(self.mu2, self.sd2),
+			'{:.2f} ({:.2f})'.format(self.mu2, self.sd2),
-		}
+		]]
 		if html:
-			table = pd.DataFrame(table_data, index=['\ue000 (SD)']) # U+E000 is in Private Use Area, mark μ symbol
+			table = pd.DataFrame(table_data, index=pd.Index(['\ue000 (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind)) # U+E000 is in Private Use Area, mark μ symbol
 			table_str = table._repr_html_()
 			return table_str.replace('\ue000', '<i>μ</i>')
 		else:
-			table = pd.DataFrame(table_data, index=['μ (SD)'])
+			table = pd.DataFrame(table_data, index=pd.Index(['μ (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind))
 			return str(table)
 	def __repr__(self):
@ -120,8 +125,10 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
 		yli.ttest_ind(df, 'Potency', 'Type')
 	.. code-block:: text
-		               Fresh       Stored
+		
-		μ (SD)  10.37 (0.32)  9.83 (0.24)
+		Type            Fresh       Stored
 		Potency                           
 		μ (SD)   10.37 (0.32)  9.83 (0.24)
 		t(18) = 4.24; p < 0.001*
 		Δμ (95% CI) = 0.54 (0.27–0.81), Fresh > Stored
@ -156,6 +163,7 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
 	return TTestResult(
 		statistic=abs(statistic), dof=dof, pvalue=pvalue,
 		dep=dep, ind=ind,
 		group1=group1, group2=group2, mu1=d1.mean, mu2=d2.mean, sd1=d1.std, sd2=d2.std,
 		delta=Estimate(delta, ci0, ci1),
 		delta_direction='{} > {}'.format(group1, group2))
@ -254,12 +262,17 @@ class MannWhitneyResult:
 	See :func:`yli.mannwhitney`.
 	"""
-	def __init__(self, statistic, pvalue, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None):
+	def __init__(self, *, statistic, pvalue, dep, ind, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None):
 		#: Lesser of the two Mann–Whitney *U* statistics (*float*)
 		self.statistic = statistic
 		#: *p* value for the *U* statistic (*float*)
 		self.pvalue = pvalue
 		#: Name of the dependent variable (*str*)
 		self.dep = dep
 		#: Name of the independent variable (*str*)
 		self.ind = ind
 		#: Name of the first group (*str*)
 		self.group1 = group1
 		#: Name of the second group (*str*)
@ -288,11 +301,11 @@ class MannWhitneyResult:
 	def _comparison_table(self, html):
 		"""Return a table showing the medians/IQRs/ranges for each group"""
-		table_data = {
+		table_data = [
-			self.group1: ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med1, self.range1.summary())],
+			['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med2, self.iqr2.summary())],
-			self.group2: ['{:.2f} ({})'.format(self.med2, self.iqr2.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())],
+			['{:.2f} ({})'.format(self.med1, self.range1.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())],
-		}
+		]
-		table = pd.DataFrame(table_data, index=['Median (IQR)', 'Median (range)'])
+		table = pd.DataFrame(table_data, index=pd.Index(['Median (IQR)', 'Median (range)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind))
 		if html:
 			return table._repr_html_()
@ -394,7 +407,8 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
 	.. code-block:: text
-		                              After               Before
+		Sample                        After               Before
 		Oxygen                                                  
 		Median (IQR)    10.75 (10.55–10.95)  11.55 (11.20–11.83)
 		Median (range)  10.75 (11.00–12.10)  11.55 (11.00–12.10)
@ -437,6 +451,7 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
 	return MannWhitneyResult(
 		statistic=min(u1, u2), pvalue=result.pvalue,
 		dep=dep, ind=ind,
 		group1=group1, group2=group2,
 		med1=data1.median(), med2=data2.median(), iqr1=Interval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=Interval(data2.quantile(0.25), data2.quantile(0.75)), range1=Interval(data1.min(), data1.max()), range2=Interval(data2.min(), data2.max()),
 		rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))