Implement hdi

This commit is contained in:
RunasSudo 2022-10-06 18:44:22 +11:00
parent e1fd73e095
commit d3842e1645
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
2 changed files with 68 additions and 1 deletions

42
tests/test_hdi.py Normal file
View File

@ -0,0 +1,42 @@
# scipy-yli: Helpful SciPy utilities and recipes
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import yli
import numpy as np
from scipy import stats
def test_hdi_beta_vs_r():
"""Compare yli.hdi for beta distribution with R binom.bayes"""
# Inference on a beta-binomial posterior
n, N = 12, 250
prior_a, prior_b = 1, 1
distribution = stats.beta(n + prior_a, N - n + prior_b)
# Compute HDI
hdi = yli.hdi(distribution, 0.95)
#> library(binom)
#> binom.bayes(12, 250, conf.level=0.95, type='highest', prior.shape1=1, prior.shape2=1)
# method x n shape1 shape2 mean lower upper sig
#1 bayes 12 250 13 239 0.0515873 0.02594348 0.0793006 0.05
expected = np.array([0.02594348, 0.0793006])
# Allow 1e-5 tolerance
diff = np.abs(np.array(hdi) - expected)
assert (diff < 1e-5).all()

View File

@ -16,7 +16,9 @@
import mpmath
import numpy as np
from scipy import stats
from scipy import optimize, stats
import collections
class betarat_gen(stats.rv_continuous):
"""
@ -90,3 +92,26 @@ class betarat_gen(stats.rv_continuous):
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
beta_ratio = betarat_gen(name='beta_ratio', a=0)
ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])
def hdi(distribution, level=0.95):
"""
Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)
"""
# For a given lower limit, we can compute the corresponding 95% interval
def interval_width(lower):
upper = distribution.ppf(distribution.cdf(lower) + level)
return upper - lower
# Find such interval which has the smallest width
# Use equal-tailed interval as initial guess
initial_guess = distribution.ppf((1-level)/2)
optimize_result = optimize.minimize(interval_width, initial_guess) # TODO: Allow customising parameters
lower_limit = optimize_result.x[0]
width = optimize_result.fun
upper_limit = lower_limit + width
return ConfidenceInterval(lower_limit, upper_limit)