diff --git a/tests/test_mannwhitney.py b/tests/test_mannwhitney.py new file mode 100644 index 0000000..4515367 --- /dev/null +++ b/tests/test_mannwhitney.py @@ -0,0 +1,34 @@ +# scipy-yli: Helpful SciPy utilities and recipes +# Copyright © 2022 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from pytest import approx + +import numpy as np +import pandas as pd + +import yli + +def test_mannwhitney_ol6_6(): + """Compare yli.mannwhitney for Ott & Longnecker (2016) example 6.6""" + + df = pd.DataFrame({ + 'Sample': ['Before'] * 12 + ['After'] * 12, + 'Oxygen': [11.0, 11.2, 11.2, 11.2, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 11.9, 12.1, 10.2, 10.3, 10.4, 10.6, 10.6, 10.7, 10.8, 10.8, 10.9, 11.1, 11.1, 11.3] + }) + + result = yli.mannwhitney(df, 'Oxygen', 'Sample', method='asymptotic', alternative='less') + + assert result.pvalue == approx(0.00007, abs=0.00001) diff --git a/yli/__init__.py b/yli/__init__.py index e7abd11..48f931a 100644 --- a/yli/__init__.py +++ b/yli/__init__.py @@ -15,7 +15,7 @@ # along with this program. If not, see . from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist -from .sig_tests import ttest_ind +from .sig_tests import mannwhitney, ttest_ind def reload_me(): import importlib diff --git a/yli/sig_tests.py b/yli/sig_tests.py index dcdfbc8..2dca8bd 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -71,3 +71,87 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'): statistic=abs(statistic), dof=dof, pvalue=pvalue, delta=abs(Estimate(delta, ci0, ci1)), delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2)) + +# ----------------- +# Mann-Whitney test + +class MannWhitneyResult: + """ + Result of a Mann-Whitney test + + brunnermunzel: BrunnerMunzelResult on same data + """ + + def __init__(self, statistic, pvalue, rank_biserial, direction, brunnermunzel=None): + self.statistic = statistic + self.pvalue = pvalue + self.rank_biserial = rank_biserial + self.direction = direction + self.brunnermunzel = brunnermunzel + + def _repr_html_(self): + line1 = 'U = {:.1f}; p {}
r = {:.2f}, {}'.format(self.statistic, fmt_p_html(self.pvalue), self.rank_biserial, self.direction) + if self.brunnermunzel: + return line1 + '
' + self.brunnermunzel._repr_html_() + else: + return line1 + + def summary(self): + line1 = 'U = {:.1f}; p {}\nr = {}, {}'.format(self.statistic, fmt_p_text(self.pvalue), self.rank_biserial, self.direction) + if self.brunnermunzel: + return line1 + '\n' + self.brunnermunzel.summary() + else: + return line1 + +class BrunnerMunzelResult: + """Result of a Brunner-Munzel test""" + + def __init__(self, statistic, pvalue): + self.statistic = statistic + self.pvalue = pvalue + + def _repr_html_(self): + return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p_html(self.pvalue)) + + def summary(self): + return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p_text(self.pvalue)) + +def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'): + """ + Perform a Mann-Whitney test + + brunnermunzel: Set to False to skip the Brunner-Munzel test + use_continuity, alternative, method: See scipy.stats.mannwhitneyu + """ + + # Check for/clean NaNs + df = check_nan(df[[ind, dep]], nan_policy) + + # Ensure 2 groups for ind + group1, data1, group2, data2 = as_2groups(df, dep, ind) + + # Do Mann-Whitney test + # Stata does not perform continuity correction + result = stats.mannwhitneyu(data1, data2, use_continuity=use_continuity, alternative=alternative, method=method) + + u1 = result.statistic + u2 = len(data1) * len(data2) - u1 + r = abs(2*u1 / (len(data1) * len(data2)) - 1) # rank-biserial + + # If significant, perform a Brunner-Munzel test for our interest + if result.pvalue < 0.05 and brunnermunzel: + result_bm = stats.brunnermunzel(data1, data2) + + if result_bm.pvalue >= 0.05: + warnings.warn('Mann-Whitney test is significant but Brunner-Munzel test is not. This could be due to a difference in shape, rather than location.') + + return MannWhitneyResult( + statistic=min(u1, u2), pvalue=result.pvalue, + #med1=data1.median(), med2=data2.median(), + rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2), + brunnermunzel=BrunnerMunzelResult(statistic=result_bm.statistic, pvalue=result_bm.pvalue)) + + return MannWhitneyResult( + statistic=min(u1, u2), pvalue=result.pvalue, + #med1=data1.median(), med2=data2.median(), + rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))