Implement anova_oneway
This commit is contained in:
parent
a753761675
commit
e8e97f6073
52
tests/test_anova.py
Normal file
52
tests/test_anova.py
Normal file
@ -0,0 +1,52 @@
|
||||
# scipy-yli: Helpful SciPy utilities and recipes
|
||||
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
from pytest import approx
|
||||
|
||||
import pandas as pd
|
||||
import statsmodels.api as sm
|
||||
|
||||
import yli
|
||||
|
||||
def test_anova_oneway_ol8_2():
|
||||
"""Compare yli.anova_oneway for Ott & Longnecker (2016) example 8.2"""
|
||||
|
||||
df = pd.DataFrame({
|
||||
'Method': [1]*8 + [2]*7 + [3]*9,
|
||||
'Score': [96, 79, 91, 85, 83, 91, 82, 87, 77, 76, 74, 73, 78, 71, 80, 66, 73, 69, 66, 77, 73, 71, 70, 74]
|
||||
})
|
||||
|
||||
result = yli.anova_oneway(df, 'Score', 'Method')
|
||||
|
||||
assert result.statistic == approx(545.316/18.4366, rel=0.001)
|
||||
assert result.dof1 == 2
|
||||
assert result.dof2 == 21
|
||||
assert result.pvalue < 0.001
|
||||
|
||||
def test_regress_ftest_ol8_2():
|
||||
"""Compare ANOVA via yli.regress for Ott & Longnecker (2016) example 8.2"""
|
||||
|
||||
df = pd.DataFrame({
|
||||
'Method': [1]*8 + [2]*7 + [3]*9,
|
||||
'Score': [96, 79, 91, 85, 83, 91, 82, 87, 77, 76, 74, 73, 78, 71, 80, 66, 73, 69, 66, 77, 73, 71, 70, 74]
|
||||
})
|
||||
|
||||
result = yli.regress(sm.OLS, df, 'Score', 'C(Method)').ftest()
|
||||
|
||||
assert result.statistic == approx(545.316/18.4366, rel=0.001)
|
||||
assert result.dof1 == 2
|
||||
assert result.dof2 == 21
|
||||
assert result.pvalue < 0.001
|
@ -18,7 +18,7 @@ from .bayes_factors import bayesfactor_afbf
|
||||
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
||||
from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
||||
from .regress import PenalisedLogit, regress, vif
|
||||
from .sig_tests import chi2, mannwhitney, ttest_ind
|
||||
from .sig_tests import anova_oneway, chi2, mannwhitney, pearsonr, ttest_ind
|
||||
|
||||
def reload_me():
|
||||
import importlib
|
||||
|
@ -27,6 +27,7 @@ from datetime import datetime
|
||||
import itertools
|
||||
|
||||
from .bayes_factors import BayesFactor, bayesfactor_afbf
|
||||
from .sig_tests import FTestResult
|
||||
from .utils import Estimate, check_nan, fmt_p
|
||||
|
||||
def vif(df, formula=None, nan_policy='warn'):
|
||||
@ -91,21 +92,6 @@ class LikelihoodRatioTestResult:
|
||||
def summary(self):
|
||||
return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
||||
|
||||
class FTestResult:
|
||||
"""Result of an F test for regression"""
|
||||
|
||||
def __init__(self, statistic, dof_model, dof_resid, pvalue):
|
||||
self.statistic = statistic
|
||||
self.dof_model = dof_model
|
||||
self.dof_resid = dof_resid
|
||||
self.pvalue = pvalue
|
||||
|
||||
def _repr_html_(self):
|
||||
return '<i>F</i>({}, {}) = {:.2f}; <i>p</i> {}'.format(self.dof_model, self.dof_resid, self.statistic, fmt_p(self.pvalue, html=True))
|
||||
|
||||
def summary(self):
|
||||
return 'F({}, {}) = {:.2f}; p {}'.format(self.dof_model, self.dof_resid, self.statistic, fmt_p(self.pvalue, html=False))
|
||||
|
||||
class RegressionResult:
|
||||
"""
|
||||
Result of a regression
|
||||
|
@ -73,6 +73,42 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
||||
delta=abs(Estimate(delta, ci0, ci1)),
|
||||
delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
|
||||
|
||||
# -------------
|
||||
# One-way ANOVA
|
||||
|
||||
class FTestResult:
|
||||
"""Result of an F test for ANOVA/regression"""
|
||||
|
||||
def __init__(self, statistic, dof1, dof2, pvalue):
|
||||
self.statistic = statistic
|
||||
self.dof1 = dof1
|
||||
self.dof2 = dof2
|
||||
self.pvalue = pvalue
|
||||
|
||||
def _repr_html_(self):
|
||||
return '<i>F</i>({}, {}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True))
|
||||
|
||||
def summary(self):
|
||||
return 'F({}, {}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False))
|
||||
|
||||
def anova_oneway(df, dep, ind, *, nan_policy='omit'):
|
||||
"""Perform one-way ANOVA"""
|
||||
|
||||
# Check for/clean NaNs
|
||||
df = check_nan(df[[ind, dep]], nan_policy)
|
||||
|
||||
# Group by independent variable
|
||||
groups = df.groupby(ind)[dep]
|
||||
|
||||
# Perform one-way ANOVA
|
||||
result = stats.f_oneway(*[groups.get_group(k) for k in groups.groups])
|
||||
|
||||
# See stats.f_oneway implementation
|
||||
dfbn = len(groups.groups) - 1
|
||||
dfwn = len(df) - len(groups.groups)
|
||||
|
||||
return FTestResult(result.statistic, dfbn, dfwn, result.pvalue)
|
||||
|
||||
# -----------------
|
||||
# Mann-Whitney test
|
||||
|
||||
@ -224,3 +260,31 @@ def chi2(df, dep, ind, *, nan_policy='warn'):
|
||||
result = stats.chi2_contingency(ct, correction=False)
|
||||
|
||||
return PearsonChiSquaredResult(ct=ct, statistic=result[0], dof=result[2], pvalue=result[1])
|
||||
|
||||
# -------------------
|
||||
# Pearson correlation
|
||||
|
||||
class PearsonRResult:
|
||||
"""Result of Pearson correlation"""
|
||||
|
||||
def __init__(self, statistic, pvalue):
|
||||
self.statistic = statistic
|
||||
self.pvalue = pvalue
|
||||
|
||||
def _repr_html_(self):
|
||||
return '<i>r</i> (95% CI) = {}; <i>p</i> {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=True))
|
||||
|
||||
def summary(self):
|
||||
return 'r (95% CI) = {}; p {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=False))
|
||||
|
||||
def pearsonr(df, dep, ind, *, nan_policy='warn'):
|
||||
"""Compute the Pearson correlation coefficient (Pearson's r)"""
|
||||
|
||||
# Check for/clean NaNs
|
||||
df = check_nan(df[[ind, dep]], nan_policy)
|
||||
|
||||
# Compute Pearson's r
|
||||
result = stats.pearsonr(df[ind], df[dep])
|
||||
ci = result.confidence_interval()
|
||||
|
||||
return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)
|
||||
|
Loading…
Reference in New Issue
Block a user