Implement RegressionResult.bayesfactor_beta_zero

This commit is contained in:
RunasSudo 2022-10-14 14:09:29 +11:00
parent bb534cb285
commit 526d361338
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
4 changed files with 163 additions and 0 deletions

View File

@ -0,0 +1,49 @@
# scipy-yli: Helpful SciPy utilities and recipes
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from pytest import approx
import pandas as pd
import statsmodels.api as sm
import yli
def test_afbf_logit_beta_zero():
"""Compare RegressionResult.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
df = pd.DataFrame({
'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False],
'Fibrinogen': [2.52, 2.46, 2.29, 3.15, 2.88, 2.29, 2.99, 2.38, 2.56, 3.22, 2.35, 3.53, 2.65, 2.15, 3.32, 2.23, 2.19, 2.21, 5.06, 2.68, 2.09, 2.54, 2.18, 2.68, 3.41, 3.15, 3.35, 2.60, 2.28, 3.93, 2.60, 3.34],
'GammaGlobulin': [38, 36, 36, 36, 30, 31, 36, 37, 31, 38, 29, 46, 46, 31, 35, 37, 33, 37, 37, 34, 44, 28, 31, 39, 37, 39, 32, 38, 36, 32, 41, 30]
})
result = yli.regress(sm.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
# model <- glm(Unhealthy ~ Fibrinogen + GammaGlobulin, data=df, family=binomial())
# bf_fit <- BF(model, hypothesis="Fibrinogen = 0")
# summary(bf_fit)
# bf_fit <- BF(model, hypothesis="GammaGlobulin = 0")
# summary(bf_fit)
bf = result.bayesfactor_beta_zero('Fibrinogen')
assert bf.factor == approx(1.229, abs=0.001)
assert bf.num_desc == 'Fibrinogen ≠ 0'
assert bf.denom_desc == 'Fibrinogen = 0'
bf = result.bayesfactor_beta_zero('GammaGlobulin')
assert bf.factor == approx(2.417, abs=0.001)
assert bf.num_desc == 'GammaGlobulin = 0'
assert bf.denom_desc == 'GammaGlobulin ≠ 0'

View File

@ -14,6 +14,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from .bayes_factors import BayesFactor, bayesfactor_afbf
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
from .regress import PenalisedLogit, regress, vif

93
yli/bayes_factors.py Normal file
View File

@ -0,0 +1,93 @@
# scipy-yli: Helpful SciPy utilities and recipes
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
class BayesFactor:
"""
A Bayes factor
"""
def __init__(self, factor, num_symbol, num_desc, denom_symbol, denom_desc):
self.factor = factor
self.num_symbol = num_symbol
self.num_desc = num_desc
self.denom_symbol = denom_symbol
self.denom_desc = denom_desc
def _repr_html_(self):
return 'BF<sub>{0}{1}</sub> = {2:.2f}, {5}<br>H<sub>{0}</sub>: {3}<br>H<sub>{1}</sub>: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=True))
def summary(self):
return 'BF{0}{1} = {2:.2f}, {5}\nH{0}: {3}\nH{1}: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=False))
def invert(self):
"""Invert the Bayes factor"""
return BayesFactor(1 / self.factor, self.denom_symbol, self.denom_desc, self.num_symbol, self.num_desc)
def interpret_lw(self, html):
"""Interpret the Bayes factor according to the Lee & Wagenmakers classification scheme"""
if self.factor == 1:
return 'Evidence favours neither hypothesis to the other'
if self.factor < 1:
return self.invert().interpret_lw()
if self.factor < 3:
level = 'Anecdotal'
elif self.factor < 10:
level = 'Moderate'
elif self.factor < 30:
level = 'Strong'
elif self.factor < 100:
level = 'Very strong'
else:
level = 'Extreme'
if html:
return '{} evidence in favour of H<sub>{}</sub>'.format(level, self.num_symbol)
else:
return '{} evidence in favour of H{}'.format(level, self.num_symbol)
def bayesfactor_afbf(params, cov, n, hypothesis):
"""
Compute an adjusted fractional Bayes factor for the hypothesis
Using R "BFpack" library
See R documentation for BFpack.BF
Returns Bayes factor for hypothesis vs its complement
"""
import rpy2.robjects as ro
import rpy2.robjects.packages
import rpy2.robjects.pandas2ri
# Import BFpack
ro.packages.importr('BFpack')
with ro.conversion.localconverter(ro.default_converter + ro.pandas2ri.converter):
with ro.local_context() as lc:
lc['params'] = params
lc['cov'] = cov
lc['n'] = n
lc['hypothesis'] = hypothesis
ro.r('bf_fit <- BF(params, Sigma=as.matrix(cov), n=n, hypothesis=hypothesis)')
bf_matrix = ro.r('bf_fit$BFmatrix_confirmatory')
return BayesFactor(bf_matrix[0][1], '1', hypothesis, 'C', 'Complement')

View File

@ -26,6 +26,7 @@ from statsmodels.stats.outliers_influence import variance_inflation_factor
from datetime import datetime
import itertools
from .bayes_factors import BayesFactor, bayesfactor_afbf
from .utils import Estimate, check_nan, fmt_p_html, fmt_p_text
def vif(df, formula=None, nan_policy='warn'):
@ -171,6 +172,25 @@ class RegressionResult:
pvalue = 1 - stats.f(self.dof_model, self.dof_resid).cdf(self.f_statistic)
return FTestResult(self.f_statistic, self.dof_model, self.dof_resid, pvalue)
def bayesfactor_beta_zero(self, term):
"""
Compute Bayes factor testing the hypothesis that the given beta is 0
Requires statsmodels regression
"""
# Get parameters required for AFBF
params = pd.Series({term.replace('[', '_').replace(']', '_'): beta.point for term, beta in self.beta.items()})
cov = self.raw_result.cov_params()
# Compute BF matrix
bf01 = bayesfactor_afbf(params, cov, self.nobs, '{} = 0'.format(term.replace('[', '_').replace(']', '_')))
bf01 = BayesFactor(bf01.factor, '0', '{} = 0'.format(term), '1', '{} ≠ 0'.format(term))
if bf01.factor >= 1:
return bf01
else:
return bf01.invert()
def _header_table(self, html):
"""Return the entries for the header table"""