diff --git a/tests/test_hdi.py b/tests/test_hdi.py new file mode 100644 index 0000000..42b775c --- /dev/null +++ b/tests/test_hdi.py @@ -0,0 +1,42 @@ +# scipy-yli: Helpful SciPy utilities and recipes +# Copyright © 2022 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import yli + +import numpy as np +from scipy import stats + +def test_hdi_beta_vs_r(): + """Compare yli.hdi for beta distribution with R binom.bayes""" + + # Inference on a beta-binomial posterior + n, N = 12, 250 + prior_a, prior_b = 1, 1 + + distribution = stats.beta(n + prior_a, N - n + prior_b) + + # Compute HDI + hdi = yli.hdi(distribution, 0.95) + + #> library(binom) + #> binom.bayes(12, 250, conf.level=0.95, type='highest', prior.shape1=1, prior.shape2=1) + # method x n shape1 shape2 mean lower upper sig + #1 bayes 12 250 13 239 0.0515873 0.02594348 0.0793006 0.05 + expected = np.array([0.02594348, 0.0793006]) + + # Allow 1e-5 tolerance + diff = np.abs(np.array(hdi) - expected) + assert (diff < 1e-5).all() diff --git a/yli/distributions.py b/yli/distributions.py index e9d7930..3a8f225 100644 --- a/yli/distributions.py +++ b/yli/distributions.py @@ -16,7 +16,9 @@ import mpmath import numpy as np -from scipy import stats +from scipy import optimize, stats + +import collections class betarat_gen(stats.rv_continuous): """ @@ -90,3 +92,26 @@ class betarat_gen(stats.rv_continuous): return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2) beta_ratio = betarat_gen(name='beta_ratio', a=0) + +ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper']) + +def hdi(distribution, level=0.95): + """ + Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI) + """ + + # For a given lower limit, we can compute the corresponding 95% interval + def interval_width(lower): + upper = distribution.ppf(distribution.cdf(lower) + level) + return upper - lower + + # Find such interval which has the smallest width + # Use equal-tailed interval as initial guess + initial_guess = distribution.ppf((1-level)/2) + optimize_result = optimize.minimize(interval_width, initial_guess) # TODO: Allow customising parameters + + lower_limit = optimize_result.x[0] + width = optimize_result.fun + upper_limit = lower_limit + width + + return ConfidenceInterval(lower_limit, upper_limit)