From 526d361338da626bf603e2458581971fa13d9ce7 Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Fri, 14 Oct 2022 14:09:29 +1100
Subject: [PATCH] Implement RegressionResult.bayesfactor_beta_zero

---
 tests/test_bayes_factors.py | 49 +++++++++++++++++++
 yli/__init__.py             |  1 +
 yli/bayes_factors.py        | 93 +++++++++++++++++++++++++++++++++++++
 yli/regress.py              | 20 ++++++++
 4 files changed, 163 insertions(+)
 create mode 100644 tests/test_bayes_factors.py
 create mode 100644 yli/bayes_factors.py

diff --git a/tests/test_bayes_factors.py b/tests/test_bayes_factors.py
new file mode 100644
index 0000000..26582d0
--- /dev/null
+++ b/tests/test_bayes_factors.py
@@ -0,0 +1,49 @@
+#   scipy-yli: Helpful SciPy utilities and recipes
+#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
+#
+#   This program is free software: you can redistribute it and/or modify
+#   it under the terms of the GNU Affero General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU Affero General Public License for more details.
+#
+#   You should have received a copy of the GNU Affero General Public License
+#   along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from pytest import approx
+
+import pandas as pd
+import statsmodels.api as sm
+
+import yli
+
+def test_afbf_logit_beta_zero():
+	"""Compare RegressionResult.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
+	
+	df = pd.DataFrame({
+		'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False],
+		'Fibrinogen': [2.52, 2.46, 2.29, 3.15, 2.88, 2.29, 2.99, 2.38, 2.56, 3.22, 2.35, 3.53, 2.65, 2.15, 3.32, 2.23, 2.19, 2.21, 5.06, 2.68, 2.09, 2.54, 2.18, 2.68, 3.41, 3.15, 3.35, 2.60, 2.28, 3.93, 2.60, 3.34],
+		'GammaGlobulin': [38, 36, 36, 36, 30, 31, 36, 37, 31, 38, 29, 46, 46, 31, 35, 37, 33, 37, 37, 34, 44, 28, 31, 39, 37, 39, 32, 38, 36, 32, 41, 30]
+	})
+	
+	result = yli.regress(sm.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
+	
+	# model <- glm(Unhealthy ~ Fibrinogen + GammaGlobulin, data=df, family=binomial())
+	# bf_fit <- BF(model, hypothesis="Fibrinogen = 0")
+	# summary(bf_fit)
+	# bf_fit <- BF(model, hypothesis="GammaGlobulin = 0")
+	# summary(bf_fit)
+	
+	bf = result.bayesfactor_beta_zero('Fibrinogen')
+	assert bf.factor == approx(1.229, abs=0.001)
+	assert bf.num_desc == 'Fibrinogen ≠ 0'
+	assert bf.denom_desc == 'Fibrinogen = 0'
+	
+	bf = result.bayesfactor_beta_zero('GammaGlobulin')
+	assert bf.factor == approx(2.417, abs=0.001)
+	assert bf.num_desc == 'GammaGlobulin = 0'
+	assert bf.denom_desc == 'GammaGlobulin ≠ 0'
diff --git a/yli/__init__.py b/yli/__init__.py
index 783c431..3ce936f 100644
--- a/yli/__init__.py
+++ b/yli/__init__.py
@@ -14,6 +14,7 @@
 #   You should have received a copy of the GNU Affero General Public License
 #   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
+from .bayes_factors import BayesFactor, bayesfactor_afbf
 from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
 from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
 from .regress import PenalisedLogit, regress, vif
diff --git a/yli/bayes_factors.py b/yli/bayes_factors.py
new file mode 100644
index 0000000..c47ebdf
--- /dev/null
+++ b/yli/bayes_factors.py
@@ -0,0 +1,93 @@
+#   scipy-yli: Helpful SciPy utilities and recipes
+#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
+#
+#   This program is free software: you can redistribute it and/or modify
+#   it under the terms of the GNU Affero General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU Affero General Public License for more details.
+#
+#   You should have received a copy of the GNU Affero General Public License
+#   along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+class BayesFactor:
+	"""
+	A Bayes factor
+	"""
+	
+	def __init__(self, factor, num_symbol, num_desc, denom_symbol, denom_desc):
+		self.factor = factor
+		
+		self.num_symbol = num_symbol
+		self.num_desc = num_desc
+		self.denom_symbol = denom_symbol
+		self.denom_desc = denom_desc
+	
+	def _repr_html_(self):
+		return 'BF<sub>{0}{1}</sub> = {2:.2f}, {5}<br>H<sub>{0}</sub>: {3}<br>H<sub>{1}</sub>: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=True))
+	
+	def summary(self):
+		return 'BF{0}{1} = {2:.2f}, {5}\nH{0}: {3}\nH{1}: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=False))
+	
+	def invert(self):
+		"""Invert the Bayes factor"""
+		
+		return BayesFactor(1 / self.factor, self.denom_symbol, self.denom_desc, self.num_symbol, self.num_desc)
+	
+	def interpret_lw(self, html):
+		"""Interpret the Bayes factor according to the Lee & Wagenmakers classification scheme"""
+		
+		if self.factor == 1:
+			return 'Evidence favours neither hypothesis to the other'
+		
+		if self.factor < 1:
+			return self.invert().interpret_lw()
+		
+		if self.factor < 3:
+			level = 'Anecdotal'
+		elif self.factor < 10:
+			level = 'Moderate'
+		elif self.factor < 30:
+			level = 'Strong'
+		elif self.factor < 100:
+			level = 'Very strong'
+		else:
+			level = 'Extreme'
+		
+		if html:
+			return '{} evidence in favour of H<sub>{}</sub>'.format(level, self.num_symbol)
+		else:
+			return '{} evidence in favour of H{}'.format(level, self.num_symbol)
+
+def bayesfactor_afbf(params, cov, n, hypothesis):
+	"""
+	Compute an adjusted fractional Bayes factor for the hypothesis
+	Using R "BFpack" library
+	
+	See R documentation for BFpack.BF
+	
+	Returns Bayes factor for hypothesis vs its complement
+	"""
+	
+	import rpy2.robjects as ro
+	import rpy2.robjects.packages
+	import rpy2.robjects.pandas2ri
+	
+	# Import BFpack
+	ro.packages.importr('BFpack')
+	
+	with ro.conversion.localconverter(ro.default_converter + ro.pandas2ri.converter):
+		with ro.local_context() as lc:
+			lc['params'] = params
+			lc['cov'] = cov
+			lc['n'] = n
+			lc['hypothesis'] = hypothesis
+			
+			ro.r('bf_fit <- BF(params, Sigma=as.matrix(cov), n=n, hypothesis=hypothesis)')
+			bf_matrix = ro.r('bf_fit$BFmatrix_confirmatory')
+			
+			return BayesFactor(bf_matrix[0][1], '1', hypothesis, 'C', 'Complement')
diff --git a/yli/regress.py b/yli/regress.py
index 5ab7946..4d60464 100644
--- a/yli/regress.py
+++ b/yli/regress.py
@@ -26,6 +26,7 @@ from statsmodels.stats.outliers_influence import variance_inflation_factor
 from datetime import datetime
 import itertools
 
+from .bayes_factors import BayesFactor, bayesfactor_afbf
 from .utils import Estimate, check_nan, fmt_p_html, fmt_p_text
 
 def vif(df, formula=None, nan_policy='warn'):
@@ -171,6 +172,25 @@ class RegressionResult:
 		pvalue = 1 - stats.f(self.dof_model, self.dof_resid).cdf(self.f_statistic)
 		return FTestResult(self.f_statistic, self.dof_model, self.dof_resid, pvalue)
 	
+	def bayesfactor_beta_zero(self, term):
+		"""
+		Compute Bayes factor testing the hypothesis that the given beta is 0
+		Requires statsmodels regression
+		"""
+		
+		# Get parameters required for AFBF
+		params = pd.Series({term.replace('[', '_').replace(']', '_'): beta.point for term, beta in self.beta.items()})
+		cov = self.raw_result.cov_params()
+		
+		# Compute BF matrix
+		bf01 = bayesfactor_afbf(params, cov, self.nobs, '{} = 0'.format(term.replace('[', '_').replace(']', '_')))
+		bf01 = BayesFactor(bf01.factor, '0', '{} = 0'.format(term), '1', '{} ≠ 0'.format(term))
+		
+		if bf01.factor >= 1:
+			return bf01
+		else:
+			return bf01.invert()
+	
 	def _header_table(self, html):
 		"""Return the entries for the header table"""