Implement anova_oneway
This commit is contained in:
parent
a753761675
commit
e8e97f6073
52
tests/test_anova.py
Normal file
52
tests/test_anova.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# scipy-yli: Helpful SciPy utilities and recipes
|
||||||
|
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from pytest import approx
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import statsmodels.api as sm
|
||||||
|
|
||||||
|
import yli
|
||||||
|
|
||||||
|
def test_anova_oneway_ol8_2():
|
||||||
|
"""Compare yli.anova_oneway for Ott & Longnecker (2016) example 8.2"""
|
||||||
|
|
||||||
|
df = pd.DataFrame({
|
||||||
|
'Method': [1]*8 + [2]*7 + [3]*9,
|
||||||
|
'Score': [96, 79, 91, 85, 83, 91, 82, 87, 77, 76, 74, 73, 78, 71, 80, 66, 73, 69, 66, 77, 73, 71, 70, 74]
|
||||||
|
})
|
||||||
|
|
||||||
|
result = yli.anova_oneway(df, 'Score', 'Method')
|
||||||
|
|
||||||
|
assert result.statistic == approx(545.316/18.4366, rel=0.001)
|
||||||
|
assert result.dof1 == 2
|
||||||
|
assert result.dof2 == 21
|
||||||
|
assert result.pvalue < 0.001
|
||||||
|
|
||||||
|
def test_regress_ftest_ol8_2():
|
||||||
|
"""Compare ANOVA via yli.regress for Ott & Longnecker (2016) example 8.2"""
|
||||||
|
|
||||||
|
df = pd.DataFrame({
|
||||||
|
'Method': [1]*8 + [2]*7 + [3]*9,
|
||||||
|
'Score': [96, 79, 91, 85, 83, 91, 82, 87, 77, 76, 74, 73, 78, 71, 80, 66, 73, 69, 66, 77, 73, 71, 70, 74]
|
||||||
|
})
|
||||||
|
|
||||||
|
result = yli.regress(sm.OLS, df, 'Score', 'C(Method)').ftest()
|
||||||
|
|
||||||
|
assert result.statistic == approx(545.316/18.4366, rel=0.001)
|
||||||
|
assert result.dof1 == 2
|
||||||
|
assert result.dof2 == 21
|
||||||
|
assert result.pvalue < 0.001
|
@ -18,7 +18,7 @@ from .bayes_factors import bayesfactor_afbf
|
|||||||
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
||||||
from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
||||||
from .regress import PenalisedLogit, regress, vif
|
from .regress import PenalisedLogit, regress, vif
|
||||||
from .sig_tests import chi2, mannwhitney, ttest_ind
|
from .sig_tests import anova_oneway, chi2, mannwhitney, pearsonr, ttest_ind
|
||||||
|
|
||||||
def reload_me():
|
def reload_me():
|
||||||
import importlib
|
import importlib
|
||||||
|
@ -27,6 +27,7 @@ from datetime import datetime
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .bayes_factors import BayesFactor, bayesfactor_afbf
|
from .bayes_factors import BayesFactor, bayesfactor_afbf
|
||||||
|
from .sig_tests import FTestResult
|
||||||
from .utils import Estimate, check_nan, fmt_p
|
from .utils import Estimate, check_nan, fmt_p
|
||||||
|
|
||||||
def vif(df, formula=None, nan_policy='warn'):
|
def vif(df, formula=None, nan_policy='warn'):
|
||||||
@ -91,21 +92,6 @@ class LikelihoodRatioTestResult:
|
|||||||
def summary(self):
|
def summary(self):
|
||||||
return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
class FTestResult:
|
|
||||||
"""Result of an F test for regression"""
|
|
||||||
|
|
||||||
def __init__(self, statistic, dof_model, dof_resid, pvalue):
|
|
||||||
self.statistic = statistic
|
|
||||||
self.dof_model = dof_model
|
|
||||||
self.dof_resid = dof_resid
|
|
||||||
self.pvalue = pvalue
|
|
||||||
|
|
||||||
def _repr_html_(self):
|
|
||||||
return '<i>F</i>({}, {}) = {:.2f}; <i>p</i> {}'.format(self.dof_model, self.dof_resid, self.statistic, fmt_p(self.pvalue, html=True))
|
|
||||||
|
|
||||||
def summary(self):
|
|
||||||
return 'F({}, {}) = {:.2f}; p {}'.format(self.dof_model, self.dof_resid, self.statistic, fmt_p(self.pvalue, html=False))
|
|
||||||
|
|
||||||
class RegressionResult:
|
class RegressionResult:
|
||||||
"""
|
"""
|
||||||
Result of a regression
|
Result of a regression
|
||||||
|
@ -73,6 +73,42 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
|||||||
delta=abs(Estimate(delta, ci0, ci1)),
|
delta=abs(Estimate(delta, ci0, ci1)),
|
||||||
delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
|
delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
|
||||||
|
|
||||||
|
# -------------
|
||||||
|
# One-way ANOVA
|
||||||
|
|
||||||
|
class FTestResult:
|
||||||
|
"""Result of an F test for ANOVA/regression"""
|
||||||
|
|
||||||
|
def __init__(self, statistic, dof1, dof2, pvalue):
|
||||||
|
self.statistic = statistic
|
||||||
|
self.dof1 = dof1
|
||||||
|
self.dof2 = dof2
|
||||||
|
self.pvalue = pvalue
|
||||||
|
|
||||||
|
def _repr_html_(self):
|
||||||
|
return '<i>F</i>({}, {}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
|
def summary(self):
|
||||||
|
return 'F({}, {}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
|
def anova_oneway(df, dep, ind, *, nan_policy='omit'):
|
||||||
|
"""Perform one-way ANOVA"""
|
||||||
|
|
||||||
|
# Check for/clean NaNs
|
||||||
|
df = check_nan(df[[ind, dep]], nan_policy)
|
||||||
|
|
||||||
|
# Group by independent variable
|
||||||
|
groups = df.groupby(ind)[dep]
|
||||||
|
|
||||||
|
# Perform one-way ANOVA
|
||||||
|
result = stats.f_oneway(*[groups.get_group(k) for k in groups.groups])
|
||||||
|
|
||||||
|
# See stats.f_oneway implementation
|
||||||
|
dfbn = len(groups.groups) - 1
|
||||||
|
dfwn = len(df) - len(groups.groups)
|
||||||
|
|
||||||
|
return FTestResult(result.statistic, dfbn, dfwn, result.pvalue)
|
||||||
|
|
||||||
# -----------------
|
# -----------------
|
||||||
# Mann-Whitney test
|
# Mann-Whitney test
|
||||||
|
|
||||||
@ -224,3 +260,31 @@ def chi2(df, dep, ind, *, nan_policy='warn'):
|
|||||||
result = stats.chi2_contingency(ct, correction=False)
|
result = stats.chi2_contingency(ct, correction=False)
|
||||||
|
|
||||||
return PearsonChiSquaredResult(ct=ct, statistic=result[0], dof=result[2], pvalue=result[1])
|
return PearsonChiSquaredResult(ct=ct, statistic=result[0], dof=result[2], pvalue=result[1])
|
||||||
|
|
||||||
|
# -------------------
|
||||||
|
# Pearson correlation
|
||||||
|
|
||||||
|
class PearsonRResult:
|
||||||
|
"""Result of Pearson correlation"""
|
||||||
|
|
||||||
|
def __init__(self, statistic, pvalue):
|
||||||
|
self.statistic = statistic
|
||||||
|
self.pvalue = pvalue
|
||||||
|
|
||||||
|
def _repr_html_(self):
|
||||||
|
return '<i>r</i> (95% CI) = {}; <i>p</i> {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
|
def summary(self):
|
||||||
|
return 'r (95% CI) = {}; p {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
|
def pearsonr(df, dep, ind, *, nan_policy='warn'):
|
||||||
|
"""Compute the Pearson correlation coefficient (Pearson's r)"""
|
||||||
|
|
||||||
|
# Check for/clean NaNs
|
||||||
|
df = check_nan(df[[ind, dep]], nan_policy)
|
||||||
|
|
||||||
|
# Compute Pearson's r
|
||||||
|
result = stats.pearsonr(df[ind], df[dep])
|
||||||
|
ci = result.confidence_interval()
|
||||||
|
|
||||||
|
return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)
|
||||||
|
Loading…
Reference in New Issue
Block a user