Implement anova_oneway

2022-10-14 20:18:25 +11:00 · 2022-10-14 20:18:25 +11:00 · e8e97f6073
commit e8e97f6073
parent a753761675
4 changed files with 118 additions and 16 deletions
--- a/tests/test_anova.py
+++ b/tests/test_anova.py
@ -0,0 +1,52 @@
 #   scipy-yli: Helpful SciPy utilities and recipes
 #   Copyright © 2022  Lee Yingtong Li (RunasSudo)
 #
 #   This program is free software: you can redistribute it and/or modify
 #   it under the terms of the GNU Affero General Public License as published by
 #   the Free Software Foundation, either version 3 of the License, or
 #   (at your option) any later version.
 #
 #   This program is distributed in the hope that it will be useful,
 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #   GNU Affero General Public License for more details.
 #
 #   You should have received a copy of the GNU Affero General Public License
 #   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 from pytest import approx
 import pandas as pd
 import statsmodels.api as sm
 import yli
 def test_anova_oneway_ol8_2():
 	"""Compare yli.anova_oneway for Ott & Longnecker (2016) example 8.2"""
 	df = pd.DataFrame({
 		'Method': [1]*8 + [2]*7 + [3]*9,
 		'Score': [96, 79, 91, 85, 83, 91, 82, 87, 77, 76, 74, 73, 78, 71, 80, 66, 73, 69, 66, 77, 73, 71, 70, 74]
 	})
 	result = yli.anova_oneway(df, 'Score', 'Method')
 	assert result.statistic == approx(545.316/18.4366, rel=0.001)
 	assert result.dof1 == 2
 	assert result.dof2 == 21
 	assert result.pvalue < 0.001
 def test_regress_ftest_ol8_2():
 	"""Compare ANOVA via yli.regress for Ott & Longnecker (2016) example 8.2"""
 	df = pd.DataFrame({
 		'Method': [1]*8 + [2]*7 + [3]*9,
 		'Score': [96, 79, 91, 85, 83, 91, 82, 87, 77, 76, 74, 73, 78, 71, 80, 66, 73, 69, 66, 77, 73, 71, 70, 74]
 	})
 	result = yli.regress(sm.OLS, df, 'Score', 'C(Method)').ftest()
 	assert result.statistic == approx(545.316/18.4366, rel=0.001)
 	assert result.dof1 == 2
 	assert result.dof2 == 21
 	assert result.pvalue < 0.001
--- a/yli/init.py
+++ b/yli/init.py
@ -18,7 +18,7 @@ from .bayes_factors import bayesfactor_afbf
 from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
 from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
 from .regress import PenalisedLogit, regress, vif
-from .sig_tests import chi2, mannwhitney, ttest_ind
+from .sig_tests import anova_oneway, chi2, mannwhitney, pearsonr, ttest_ind
 def reload_me():
 	import importlib
--- a/yli/regress.py
+++ b/yli/regress.py
@ -27,6 +27,7 @@ from datetime import datetime
 import itertools
 from .bayes_factors import BayesFactor, bayesfactor_afbf
 from .sig_tests import FTestResult
 from .utils import Estimate, check_nan, fmt_p
 def vif(df, formula=None, nan_policy='warn'):
@ -91,21 +92,6 @@ class LikelihoodRatioTestResult:
 	def summary(self):
 		return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False))
 class FTestResult:
 	"""Result of an F test for regression"""
 	def __init__(self, statistic, dof_model, dof_resid, pvalue):
 		self.statistic = statistic
 		self.dof_model = dof_model
 		self.dof_resid = dof_resid
 		self.pvalue = pvalue
 	def _repr_html_(self):
 		return '<i>F</i>({}, {}) = {:.2f}; <i>p</i> {}'.format(self.dof_model, self.dof_resid, self.statistic, fmt_p(self.pvalue, html=True))
 	def summary(self):
 		return 'F({}, {}) = {:.2f}; p {}'.format(self.dof_model, self.dof_resid, self.statistic, fmt_p(self.pvalue, html=False))
 class RegressionResult:
 	"""
 	Result of a regression
--- a/yli/sig_tests.py
+++ b/yli/sig_tests.py
@ -73,6 +73,42 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
 		delta=abs(Estimate(delta, ci0, ci1)),
 		delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
 # -------------
 # One-way ANOVA
 class FTestResult:
 	"""Result of an F test for ANOVA/regression"""
 	def __init__(self, statistic, dof1, dof2, pvalue):
 		self.statistic = statistic
 		self.dof1 = dof1
 		self.dof2 = dof2
 		self.pvalue = pvalue
 	def _repr_html_(self):
 		return '<i>F</i>({}, {}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True))
 	def summary(self):
 		return 'F({}, {}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False))
 def anova_oneway(df, dep, ind, *, nan_policy='omit'):
 	"""Perform one-way ANOVA"""
 	# Check for/clean NaNs
 	df = check_nan(df[[ind, dep]], nan_policy)
 	# Group by independent variable
 	groups = df.groupby(ind)[dep]
 	# Perform one-way ANOVA
 	result = stats.f_oneway(*[groups.get_group(k) for k in groups.groups])
 	# See stats.f_oneway implementation
 	dfbn = len(groups.groups) - 1
 	dfwn = len(df) - len(groups.groups)
 	return FTestResult(result.statistic, dfbn, dfwn, result.pvalue)
 # -----------------
 # Mann-Whitney test
@ -224,3 +260,31 @@ def chi2(df, dep, ind, *, nan_policy='warn'):
 		result = stats.chi2_contingency(ct, correction=False)
 		return PearsonChiSquaredResult(ct=ct, statistic=result[0], dof=result[2], pvalue=result[1])
 # -------------------
 # Pearson correlation
 class PearsonRResult:
 	"""Result of Pearson correlation"""
 	def __init__(self, statistic, pvalue):
 		self.statistic = statistic
 		self.pvalue = pvalue
 	def _repr_html_(self):
 		return '<i>r</i> (95% CI) = {}; <i>p</i> {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=True))
 	def summary(self):
 		return 'r (95% CI) = {}; p {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=False))
 def pearsonr(df, dep, ind, *, nan_policy='warn'):
 	"""Compute the Pearson correlation coefficient (Pearson's r)"""
 	# Check for/clean NaNs
 	df = check_nan(df[[ind, dep]], nan_policy)
 	# Compute Pearson's r
 	result = stats.pearsonr(df[ind], df[dep])
 	ci = result.confidence_interval()
 	return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)