Show descriptives in Mann-Whitney test output

2022-11-09 22:08:27 +11:00 · 2022-11-09 22:08:27 +11:00 · 0f63b7eb40
commit 0f63b7eb40
parent ea2bf53ace
2 changed files with 50 additions and 6 deletions
--- a/tests/test_mannwhitney.py
+++ b/tests/test_mannwhitney.py
@ -33,7 +33,11 @@ def test_mannwhitney_ol6_6():
 	
 	assert result.pvalue == approx(0.00007, abs=0.00001)
 	
-	expected_summary = '''U = 6.0; p < 0.001*
+	expected_summary = '''                              After               Before
+Median (IQR)    10.75 (10.55–10.95)  11.55 (11.20–11.83)
+Median (range)  10.75 (11.00–12.10)  11.55 (11.00–12.10)
+
+U = 6.0; p < 0.001*
 r = 0.92, Before > After'''
 	
 	assert result.summary() == expected_summary
--- a/yli/sig_tests.py
+++ b/yli/sig_tests.py
@ -23,7 +23,7 @@ import functools
 import warnings

 from .config import config
-from .utils import Estimate, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p
+from .utils import ConfidenceInterval, Estimate, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p

 # ----------------
 # Student's t test
@ -42,6 +42,7 @@ class TTestResult:
 		self.dof = dof
 		#: *p* value for the *t* statistic (*float*)
 		self.pvalue = pvalue
+		
 		#: Name of the first group (*str*)
 		self.group1 = group1
 		#: Name of the second group (*str*)
@ -54,6 +55,7 @@ class TTestResult:
 		self.sd1 = sd1
 		#: Standard deviation of the second group (*float*)
 		self.sd2 = sd2
+		
 		#: Absolute value of the mean difference (:class:`yli.utils.Estimate`)
 		self.delta = delta
 		#: Description of the direction of the effect (*str*)
@ -252,25 +254,58 @@ class MannWhitneyResult:
 	See :func:`yli.mannwhitney`.
 	"""
 	
-	def __init__(self, statistic, pvalue, rank_biserial, direction, brunnermunzel=None):
+	def __init__(self, statistic, pvalue, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None):
 		#: Lesser of the two Mann–Whitney *U* statistics (*float*)
 		self.statistic = statistic
 		#: *p* value for the *U* statistic (*float*)
 		self.pvalue = pvalue
+		
+		#: Name of the first group (*str*)
+		self.group1 = group1
+		#: Name of the second group (*str*)
+		self.group2 = group2
+		#: Median of the first group (*float*)
+		self.med1 = med1
+		#: Median of the second group (*float*)
+		self.med2 = med2
+		#: Interquartile range of the first group (:class:`yli.utils.ConfidenceInterval`)
+		self.iqr1 = iqr1
+		#: Interquartile range of the second group (:class:`yli.utils.ConfidenceInterval`)
+		self.iqr2 = iqr2
+		#: Range of the first group (:class:`yli.utils.ConfidenceInterval`)
+		self.range1 = range2
+		#: Range of the second group (:class:`yli.utils.ConfidenceInterval`)
+		self.range2 = range2
+		
 		#: Absolute value of the rank-biserial correlation (*float*)
 		self.rank_biserial = rank_biserial
 		#: Description of the direction of the effect (*str*)
 		self.direction = direction
+		
 		#: :class:`BrunnerMunzelResult` on the same data, or *None* if N/A
 		self.brunnermunzel = brunnermunzel
 	
+	def _comparison_table(self, html):
+		"""Return a table showing the medians/IQRs/ranges for each group"""
+		
+		table_data = {
+			self.group1: ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med1, self.range1.summary())],
+			self.group2: ['{:.2f} ({})'.format(self.med2, self.iqr2.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())],
+		}
+		table = pd.DataFrame(table_data, index=['Median (IQR)', 'Median (range)'])
+		
+		if html:
+			return table._repr_html_()
+		else:
+			return str(table)
+	
 	def __repr__(self):
 		if config.repr_is_summary:
 			return self.summary()
 		return super().__repr__()
 	
 	def _repr_html_(self):
-		line1 = '<i>U</i> = {:.1f}; <i>p</i> {}<br><i>r</i> = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.rank_biserial, self.direction)
+		line1 = '{}<br><i>U</i> = {:.1f}; <i>p</i> {}<br><i>r</i> = {:.2f}, {}'.format(self._comparison_table(True), self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.rank_biserial, self.direction)
 		if self.brunnermunzel:
 			return line1 + '<br>' + self.brunnermunzel._repr_html_()
 		else:
@ -283,7 +318,7 @@ class MannWhitneyResult:
 		:rtype: str
 		"""
 		
-		line1 = 'U = {:.1f}; p {}\nr = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.rank_biserial, self.direction)
+		line1 = '{}\n\nU = {:.1f}; p {}\nr = {:.2f}, {}'.format(self._comparison_table(False), self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.rank_biserial, self.direction)
 		if self.brunnermunzel:
 			return line1 + '\n' + self.brunnermunzel.summary()
 		else:
@ -359,6 +394,10 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
 	
 	.. code-block:: text
 		
+		                              After               Before
+		Median (IQR)    10.75 (10.55–10.95)  11.55 (11.20–11.83)
+		Median (range)  10.75 (11.00–12.10)  11.55 (11.00–12.10)
+		
 		U = 6.0; p < 0.001*
 		r = 0.92, Before > After
 	
@ -398,7 +437,8 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
 	
 	return MannWhitneyResult(
 		statistic=min(u1, u2), pvalue=result.pvalue,
-		#med1=data1.median(), med2=data2.median(),
+		group1=group1, group2=group2,
+		med1=data1.median(), med2=data2.median(), iqr1=ConfidenceInterval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=ConfidenceInterval(data2.quantile(0.25), data2.quantile(0.75)), range1=ConfidenceInterval(data1.min(), data1.max()), range2=ConfidenceInterval(data2.min(), data2.max()),
 		rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))

 # ------------------------