diff --git a/tests/test_mannwhitney.py b/tests/test_mannwhitney.py
new file mode 100644
index 0000000..4515367
--- /dev/null
+++ b/tests/test_mannwhitney.py
@@ -0,0 +1,34 @@
+# scipy-yli: Helpful SciPy utilities and recipes
+# Copyright © 2022 Lee Yingtong Li (RunasSudo)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+from pytest import approx
+
+import numpy as np
+import pandas as pd
+
+import yli
+
+def test_mannwhitney_ol6_6():
+ """Compare yli.mannwhitney for Ott & Longnecker (2016) example 6.6"""
+
+ df = pd.DataFrame({
+ 'Sample': ['Before'] * 12 + ['After'] * 12,
+ 'Oxygen': [11.0, 11.2, 11.2, 11.2, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 11.9, 12.1, 10.2, 10.3, 10.4, 10.6, 10.6, 10.7, 10.8, 10.8, 10.9, 11.1, 11.1, 11.3]
+ })
+
+ result = yli.mannwhitney(df, 'Oxygen', 'Sample', method='asymptotic', alternative='less')
+
+ assert result.pvalue == approx(0.00007, abs=0.00001)
diff --git a/yli/__init__.py b/yli/__init__.py
index e7abd11..48f931a 100644
--- a/yli/__init__.py
+++ b/yli/__init__.py
@@ -15,7 +15,7 @@
# along with this program. If not, see .
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
-from .sig_tests import ttest_ind
+from .sig_tests import mannwhitney, ttest_ind
def reload_me():
import importlib
diff --git a/yli/sig_tests.py b/yli/sig_tests.py
index dcdfbc8..2dca8bd 100644
--- a/yli/sig_tests.py
+++ b/yli/sig_tests.py
@@ -71,3 +71,87 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
statistic=abs(statistic), dof=dof, pvalue=pvalue,
delta=abs(Estimate(delta, ci0, ci1)),
delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
+
+# -----------------
+# Mann-Whitney test
+
+class MannWhitneyResult:
+ """
+ Result of a Mann-Whitney test
+
+ brunnermunzel: BrunnerMunzelResult on same data
+ """
+
+ def __init__(self, statistic, pvalue, rank_biserial, direction, brunnermunzel=None):
+ self.statistic = statistic
+ self.pvalue = pvalue
+ self.rank_biserial = rank_biserial
+ self.direction = direction
+ self.brunnermunzel = brunnermunzel
+
+ def _repr_html_(self):
+ line1 = 'U = {:.1f}; p {}
r = {:.2f}, {}'.format(self.statistic, fmt_p_html(self.pvalue), self.rank_biserial, self.direction)
+ if self.brunnermunzel:
+ return line1 + '
' + self.brunnermunzel._repr_html_()
+ else:
+ return line1
+
+ def summary(self):
+ line1 = 'U = {:.1f}; p {}\nr = {}, {}'.format(self.statistic, fmt_p_text(self.pvalue), self.rank_biserial, self.direction)
+ if self.brunnermunzel:
+ return line1 + '\n' + self.brunnermunzel.summary()
+ else:
+ return line1
+
+class BrunnerMunzelResult:
+ """Result of a Brunner-Munzel test"""
+
+ def __init__(self, statistic, pvalue):
+ self.statistic = statistic
+ self.pvalue = pvalue
+
+ def _repr_html_(self):
+ return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p_html(self.pvalue))
+
+ def summary(self):
+ return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p_text(self.pvalue))
+
+def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'):
+ """
+ Perform a Mann-Whitney test
+
+ brunnermunzel: Set to False to skip the Brunner-Munzel test
+ use_continuity, alternative, method: See scipy.stats.mannwhitneyu
+ """
+
+ # Check for/clean NaNs
+ df = check_nan(df[[ind, dep]], nan_policy)
+
+ # Ensure 2 groups for ind
+ group1, data1, group2, data2 = as_2groups(df, dep, ind)
+
+ # Do Mann-Whitney test
+ # Stata does not perform continuity correction
+ result = stats.mannwhitneyu(data1, data2, use_continuity=use_continuity, alternative=alternative, method=method)
+
+ u1 = result.statistic
+ u2 = len(data1) * len(data2) - u1
+ r = abs(2*u1 / (len(data1) * len(data2)) - 1) # rank-biserial
+
+ # If significant, perform a Brunner-Munzel test for our interest
+ if result.pvalue < 0.05 and brunnermunzel:
+ result_bm = stats.brunnermunzel(data1, data2)
+
+ if result_bm.pvalue >= 0.05:
+ warnings.warn('Mann-Whitney test is significant but Brunner-Munzel test is not. This could be due to a difference in shape, rather than location.')
+
+ return MannWhitneyResult(
+ statistic=min(u1, u2), pvalue=result.pvalue,
+ #med1=data1.median(), med2=data2.median(),
+ rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2),
+ brunnermunzel=BrunnerMunzelResult(statistic=result_bm.statistic, pvalue=result_bm.pvalue))
+
+ return MannWhitneyResult(
+ statistic=min(u1, u2), pvalue=result.pvalue,
+ #med1=data1.median(), med2=data2.median(),
+ rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))