diff --git a/tests/test_anova.py b/tests/test_anova.py
new file mode 100644
index 0000000..d533e0b
--- /dev/null
+++ b/tests/test_anova.py
@@ -0,0 +1,52 @@
+# scipy-yli: Helpful SciPy utilities and recipes
+# Copyright © 2022 Lee Yingtong Li (RunasSudo)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+from pytest import approx
+
+import pandas as pd
+import statsmodels.api as sm
+
+import yli
+
+def test_anova_oneway_ol8_2():
+ """Compare yli.anova_oneway for Ott & Longnecker (2016) example 8.2"""
+
+ df = pd.DataFrame({
+ 'Method': [1]*8 + [2]*7 + [3]*9,
+ 'Score': [96, 79, 91, 85, 83, 91, 82, 87, 77, 76, 74, 73, 78, 71, 80, 66, 73, 69, 66, 77, 73, 71, 70, 74]
+ })
+
+ result = yli.anova_oneway(df, 'Score', 'Method')
+
+ assert result.statistic == approx(545.316/18.4366, rel=0.001)
+ assert result.dof1 == 2
+ assert result.dof2 == 21
+ assert result.pvalue < 0.001
+
+def test_regress_ftest_ol8_2():
+ """Compare ANOVA via yli.regress for Ott & Longnecker (2016) example 8.2"""
+
+ df = pd.DataFrame({
+ 'Method': [1]*8 + [2]*7 + [3]*9,
+ 'Score': [96, 79, 91, 85, 83, 91, 82, 87, 77, 76, 74, 73, 78, 71, 80, 66, 73, 69, 66, 77, 73, 71, 70, 74]
+ })
+
+ result = yli.regress(sm.OLS, df, 'Score', 'C(Method)').ftest()
+
+ assert result.statistic == approx(545.316/18.4366, rel=0.001)
+ assert result.dof1 == 2
+ assert result.dof2 == 21
+ assert result.pvalue < 0.001
diff --git a/yli/__init__.py b/yli/__init__.py
index e92de2e..ee0d36a 100644
--- a/yli/__init__.py
+++ b/yli/__init__.py
@@ -18,7 +18,7 @@ from .bayes_factors import bayesfactor_afbf
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
from .regress import PenalisedLogit, regress, vif
-from .sig_tests import chi2, mannwhitney, ttest_ind
+from .sig_tests import anova_oneway, chi2, mannwhitney, pearsonr, ttest_ind
def reload_me():
import importlib
diff --git a/yli/regress.py b/yli/regress.py
index 103cf5c..6d6dea4 100644
--- a/yli/regress.py
+++ b/yli/regress.py
@@ -27,6 +27,7 @@ from datetime import datetime
import itertools
from .bayes_factors import BayesFactor, bayesfactor_afbf
+from .sig_tests import FTestResult
from .utils import Estimate, check_nan, fmt_p
def vif(df, formula=None, nan_policy='warn'):
@@ -91,21 +92,6 @@ class LikelihoodRatioTestResult:
def summary(self):
return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False))
-class FTestResult:
- """Result of an F test for regression"""
-
- def __init__(self, statistic, dof_model, dof_resid, pvalue):
- self.statistic = statistic
- self.dof_model = dof_model
- self.dof_resid = dof_resid
- self.pvalue = pvalue
-
- def _repr_html_(self):
- return 'F({}, {}) = {:.2f}; p {}'.format(self.dof_model, self.dof_resid, self.statistic, fmt_p(self.pvalue, html=True))
-
- def summary(self):
- return 'F({}, {}) = {:.2f}; p {}'.format(self.dof_model, self.dof_resid, self.statistic, fmt_p(self.pvalue, html=False))
-
class RegressionResult:
"""
Result of a regression
diff --git a/yli/sig_tests.py b/yli/sig_tests.py
index dba9cd1..ed4992e 100644
--- a/yli/sig_tests.py
+++ b/yli/sig_tests.py
@@ -73,6 +73,42 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
delta=abs(Estimate(delta, ci0, ci1)),
delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
+# -------------
+# One-way ANOVA
+
+class FTestResult:
+ """Result of an F test for ANOVA/regression"""
+
+ def __init__(self, statistic, dof1, dof2, pvalue):
+ self.statistic = statistic
+ self.dof1 = dof1
+ self.dof2 = dof2
+ self.pvalue = pvalue
+
+ def _repr_html_(self):
+ return 'F({}, {}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True))
+
+ def summary(self):
+ return 'F({}, {}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False))
+
+def anova_oneway(df, dep, ind, *, nan_policy='omit'):
+ """Perform one-way ANOVA"""
+
+ # Check for/clean NaNs
+ df = check_nan(df[[ind, dep]], nan_policy)
+
+ # Group by independent variable
+ groups = df.groupby(ind)[dep]
+
+ # Perform one-way ANOVA
+ result = stats.f_oneway(*[groups.get_group(k) for k in groups.groups])
+
+ # See stats.f_oneway implementation
+ dfbn = len(groups.groups) - 1
+ dfwn = len(df) - len(groups.groups)
+
+ return FTestResult(result.statistic, dfbn, dfwn, result.pvalue)
+
# -----------------
# Mann-Whitney test
@@ -224,3 +260,31 @@ def chi2(df, dep, ind, *, nan_policy='warn'):
result = stats.chi2_contingency(ct, correction=False)
return PearsonChiSquaredResult(ct=ct, statistic=result[0], dof=result[2], pvalue=result[1])
+
+# -------------------
+# Pearson correlation
+
+class PearsonRResult:
+ """Result of Pearson correlation"""
+
+ def __init__(self, statistic, pvalue):
+ self.statistic = statistic
+ self.pvalue = pvalue
+
+ def _repr_html_(self):
+ return 'r (95% CI) = {}; p {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=True))
+
+ def summary(self):
+ return 'r (95% CI) = {}; p {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=False))
+
+def pearsonr(df, dep, ind, *, nan_policy='warn'):
+ """Compute the Pearson correlation coefficient (Pearson's r)"""
+
+ # Check for/clean NaNs
+ df = check_nan(df[[ind, dep]], nan_policy)
+
+ # Compute Pearson's r
+ result = stats.pearsonr(df[ind], df[dep])
+ ci = result.confidence_interval()
+
+ return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)