From 526d361338da626bf603e2458581971fa13d9ce7 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Fri, 14 Oct 2022 14:09:29 +1100 Subject: [PATCH] Implement RegressionResult.bayesfactor_beta_zero --- tests/test_bayes_factors.py | 49 +++++++++++++++++++ yli/__init__.py | 1 + yli/bayes_factors.py | 93 +++++++++++++++++++++++++++++++++++++ yli/regress.py | 20 ++++++++ 4 files changed, 163 insertions(+) create mode 100644 tests/test_bayes_factors.py create mode 100644 yli/bayes_factors.py diff --git a/tests/test_bayes_factors.py b/tests/test_bayes_factors.py new file mode 100644 index 0000000..26582d0 --- /dev/null +++ b/tests/test_bayes_factors.py @@ -0,0 +1,49 @@ +# scipy-yli: Helpful SciPy utilities and recipes +# Copyright © 2022 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from pytest import approx + +import pandas as pd +import statsmodels.api as sm + +import yli + +def test_afbf_logit_beta_zero(): + """Compare RegressionResult.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack""" + + df = pd.DataFrame({ + 'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False], + 'Fibrinogen': [2.52, 2.46, 2.29, 3.15, 2.88, 2.29, 2.99, 2.38, 2.56, 3.22, 2.35, 3.53, 2.65, 2.15, 3.32, 2.23, 2.19, 2.21, 5.06, 2.68, 2.09, 2.54, 2.18, 2.68, 3.41, 3.15, 3.35, 2.60, 2.28, 3.93, 2.60, 3.34], + 'GammaGlobulin': [38, 36, 36, 36, 30, 31, 36, 37, 31, 38, 29, 46, 46, 31, 35, 37, 33, 37, 37, 34, 44, 28, 31, 39, 37, 39, 32, 38, 36, 32, 41, 30] + }) + + result = yli.regress(sm.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin') + + # model <- glm(Unhealthy ~ Fibrinogen + GammaGlobulin, data=df, family=binomial()) + # bf_fit <- BF(model, hypothesis="Fibrinogen = 0") + # summary(bf_fit) + # bf_fit <- BF(model, hypothesis="GammaGlobulin = 0") + # summary(bf_fit) + + bf = result.bayesfactor_beta_zero('Fibrinogen') + assert bf.factor == approx(1.229, abs=0.001) + assert bf.num_desc == 'Fibrinogen ≠ 0' + assert bf.denom_desc == 'Fibrinogen = 0' + + bf = result.bayesfactor_beta_zero('GammaGlobulin') + assert bf.factor == approx(2.417, abs=0.001) + assert bf.num_desc == 'GammaGlobulin = 0' + assert bf.denom_desc == 'GammaGlobulin ≠ 0' diff --git a/yli/__init__.py b/yli/__init__.py index 783c431..3ce936f 100644 --- a/yli/__init__.py +++ b/yli/__init__.py @@ -14,6 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from .bayes_factors import BayesFactor, bayesfactor_afbf from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted from .regress import PenalisedLogit, regress, vif diff --git a/yli/bayes_factors.py b/yli/bayes_factors.py new file mode 100644 index 0000000..c47ebdf --- /dev/null +++ b/yli/bayes_factors.py @@ -0,0 +1,93 @@ +# scipy-yli: Helpful SciPy utilities and recipes +# Copyright © 2022 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +class BayesFactor: + """ + A Bayes factor + """ + + def __init__(self, factor, num_symbol, num_desc, denom_symbol, denom_desc): + self.factor = factor + + self.num_symbol = num_symbol + self.num_desc = num_desc + self.denom_symbol = denom_symbol + self.denom_desc = denom_desc + + def _repr_html_(self): + return 'BF{0}{1} = {2:.2f}, {5}
H{0}: {3}
H{1}: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=True)) + + def summary(self): + return 'BF{0}{1} = {2:.2f}, {5}\nH{0}: {3}\nH{1}: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=False)) + + def invert(self): + """Invert the Bayes factor""" + + return BayesFactor(1 / self.factor, self.denom_symbol, self.denom_desc, self.num_symbol, self.num_desc) + + def interpret_lw(self, html): + """Interpret the Bayes factor according to the Lee & Wagenmakers classification scheme""" + + if self.factor == 1: + return 'Evidence favours neither hypothesis to the other' + + if self.factor < 1: + return self.invert().interpret_lw() + + if self.factor < 3: + level = 'Anecdotal' + elif self.factor < 10: + level = 'Moderate' + elif self.factor < 30: + level = 'Strong' + elif self.factor < 100: + level = 'Very strong' + else: + level = 'Extreme' + + if html: + return '{} evidence in favour of H{}'.format(level, self.num_symbol) + else: + return '{} evidence in favour of H{}'.format(level, self.num_symbol) + +def bayesfactor_afbf(params, cov, n, hypothesis): + """ + Compute an adjusted fractional Bayes factor for the hypothesis + Using R "BFpack" library + + See R documentation for BFpack.BF + + Returns Bayes factor for hypothesis vs its complement + """ + + import rpy2.robjects as ro + import rpy2.robjects.packages + import rpy2.robjects.pandas2ri + + # Import BFpack + ro.packages.importr('BFpack') + + with ro.conversion.localconverter(ro.default_converter + ro.pandas2ri.converter): + with ro.local_context() as lc: + lc['params'] = params + lc['cov'] = cov + lc['n'] = n + lc['hypothesis'] = hypothesis + + ro.r('bf_fit <- BF(params, Sigma=as.matrix(cov), n=n, hypothesis=hypothesis)') + bf_matrix = ro.r('bf_fit$BFmatrix_confirmatory') + + return BayesFactor(bf_matrix[0][1], '1', hypothesis, 'C', 'Complement') diff --git a/yli/regress.py b/yli/regress.py index 5ab7946..4d60464 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -26,6 +26,7 @@ from statsmodels.stats.outliers_influence import variance_inflation_factor from datetime import datetime import itertools +from .bayes_factors import BayesFactor, bayesfactor_afbf from .utils import Estimate, check_nan, fmt_p_html, fmt_p_text def vif(df, formula=None, nan_policy='warn'): @@ -171,6 +172,25 @@ class RegressionResult: pvalue = 1 - stats.f(self.dof_model, self.dof_resid).cdf(self.f_statistic) return FTestResult(self.f_statistic, self.dof_model, self.dof_resid, pvalue) + def bayesfactor_beta_zero(self, term): + """ + Compute Bayes factor testing the hypothesis that the given beta is 0 + Requires statsmodels regression + """ + + # Get parameters required for AFBF + params = pd.Series({term.replace('[', '_').replace(']', '_'): beta.point for term, beta in self.beta.items()}) + cov = self.raw_result.cov_params() + + # Compute BF matrix + bf01 = bayesfactor_afbf(params, cov, self.nobs, '{} = 0'.format(term.replace('[', '_').replace(']', '_'))) + bf01 = BayesFactor(bf01.factor, '0', '{} = 0'.format(term), '1', '{} ≠ 0'.format(term)) + + if bf01.factor >= 1: + return bf01 + else: + return bf01.invert() + def _header_table(self, html): """Return the entries for the header table"""