Implement mannwhitney
This commit is contained in:
parent
1fad506a5e
commit
edc82c1658
34
tests/test_mannwhitney.py
Normal file
34
tests/test_mannwhitney.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
# scipy-yli: Helpful SciPy utilities and recipes
|
||||||
|
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from pytest import approx
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
import yli
|
||||||
|
|
||||||
|
def test_mannwhitney_ol6_6():
|
||||||
|
"""Compare yli.mannwhitney for Ott & Longnecker (2016) example 6.6"""
|
||||||
|
|
||||||
|
df = pd.DataFrame({
|
||||||
|
'Sample': ['Before'] * 12 + ['After'] * 12,
|
||||||
|
'Oxygen': [11.0, 11.2, 11.2, 11.2, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 11.9, 12.1, 10.2, 10.3, 10.4, 10.6, 10.6, 10.7, 10.8, 10.8, 10.9, 11.1, 11.1, 11.3]
|
||||||
|
})
|
||||||
|
|
||||||
|
result = yli.mannwhitney(df, 'Oxygen', 'Sample', method='asymptotic', alternative='less')
|
||||||
|
|
||||||
|
assert result.pvalue == approx(0.00007, abs=0.00001)
|
@ -15,7 +15,7 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
||||||
from .sig_tests import ttest_ind
|
from .sig_tests import mannwhitney, ttest_ind
|
||||||
|
|
||||||
def reload_me():
|
def reload_me():
|
||||||
import importlib
|
import importlib
|
||||||
|
@ -71,3 +71,87 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
|||||||
statistic=abs(statistic), dof=dof, pvalue=pvalue,
|
statistic=abs(statistic), dof=dof, pvalue=pvalue,
|
||||||
delta=abs(Estimate(delta, ci0, ci1)),
|
delta=abs(Estimate(delta, ci0, ci1)),
|
||||||
delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
|
delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
|
||||||
|
|
||||||
|
# -----------------
|
||||||
|
# Mann-Whitney test
|
||||||
|
|
||||||
|
class MannWhitneyResult:
|
||||||
|
"""
|
||||||
|
Result of a Mann-Whitney test
|
||||||
|
|
||||||
|
brunnermunzel: BrunnerMunzelResult on same data
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, statistic, pvalue, rank_biserial, direction, brunnermunzel=None):
|
||||||
|
self.statistic = statistic
|
||||||
|
self.pvalue = pvalue
|
||||||
|
self.rank_biserial = rank_biserial
|
||||||
|
self.direction = direction
|
||||||
|
self.brunnermunzel = brunnermunzel
|
||||||
|
|
||||||
|
def _repr_html_(self):
|
||||||
|
line1 = '<i>U</i> = {:.1f}; <i>p</i> {}<br><i>r</i> = {:.2f}, {}'.format(self.statistic, fmt_p_html(self.pvalue), self.rank_biserial, self.direction)
|
||||||
|
if self.brunnermunzel:
|
||||||
|
return line1 + '<br>' + self.brunnermunzel._repr_html_()
|
||||||
|
else:
|
||||||
|
return line1
|
||||||
|
|
||||||
|
def summary(self):
|
||||||
|
line1 = 'U = {:.1f}; p {}\nr = {}, {}'.format(self.statistic, fmt_p_text(self.pvalue), self.rank_biserial, self.direction)
|
||||||
|
if self.brunnermunzel:
|
||||||
|
return line1 + '\n' + self.brunnermunzel.summary()
|
||||||
|
else:
|
||||||
|
return line1
|
||||||
|
|
||||||
|
class BrunnerMunzelResult:
|
||||||
|
"""Result of a Brunner-Munzel test"""
|
||||||
|
|
||||||
|
def __init__(self, statistic, pvalue):
|
||||||
|
self.statistic = statistic
|
||||||
|
self.pvalue = pvalue
|
||||||
|
|
||||||
|
def _repr_html_(self):
|
||||||
|
return '<i>W</i> = {:.1f}; <i>p</i> {}'.format(self.statistic, fmt_p_html(self.pvalue))
|
||||||
|
|
||||||
|
def summary(self):
|
||||||
|
return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p_text(self.pvalue))
|
||||||
|
|
||||||
|
def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'):
|
||||||
|
"""
|
||||||
|
Perform a Mann-Whitney test
|
||||||
|
|
||||||
|
brunnermunzel: Set to False to skip the Brunner-Munzel test
|
||||||
|
use_continuity, alternative, method: See scipy.stats.mannwhitneyu
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Check for/clean NaNs
|
||||||
|
df = check_nan(df[[ind, dep]], nan_policy)
|
||||||
|
|
||||||
|
# Ensure 2 groups for ind
|
||||||
|
group1, data1, group2, data2 = as_2groups(df, dep, ind)
|
||||||
|
|
||||||
|
# Do Mann-Whitney test
|
||||||
|
# Stata does not perform continuity correction
|
||||||
|
result = stats.mannwhitneyu(data1, data2, use_continuity=use_continuity, alternative=alternative, method=method)
|
||||||
|
|
||||||
|
u1 = result.statistic
|
||||||
|
u2 = len(data1) * len(data2) - u1
|
||||||
|
r = abs(2*u1 / (len(data1) * len(data2)) - 1) # rank-biserial
|
||||||
|
|
||||||
|
# If significant, perform a Brunner-Munzel test for our interest
|
||||||
|
if result.pvalue < 0.05 and brunnermunzel:
|
||||||
|
result_bm = stats.brunnermunzel(data1, data2)
|
||||||
|
|
||||||
|
if result_bm.pvalue >= 0.05:
|
||||||
|
warnings.warn('Mann-Whitney test is significant but Brunner-Munzel test is not. This could be due to a difference in shape, rather than location.')
|
||||||
|
|
||||||
|
return MannWhitneyResult(
|
||||||
|
statistic=min(u1, u2), pvalue=result.pvalue,
|
||||||
|
#med1=data1.median(), med2=data2.median(),
|
||||||
|
rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2),
|
||||||
|
brunnermunzel=BrunnerMunzelResult(statistic=result_bm.statistic, pvalue=result_bm.pvalue))
|
||||||
|
|
||||||
|
return MannWhitneyResult(
|
||||||
|
statistic=min(u1, u2), pvalue=result.pvalue,
|
||||||
|
#med1=data1.median(), med2=data2.median(),
|
||||||
|
rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))
|
||||||
|
Loading…
Reference in New Issue
Block a user