2022-10-05 20:08:04 +11:00
|
|
|
# scipy-yli: Helpful SciPy utilities and recipes
|
|
|
|
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
import mpmath
|
|
|
|
import numpy as np
|
2022-10-06 18:44:22 +11:00
|
|
|
from scipy import optimize, stats
|
|
|
|
|
|
|
|
import collections
|
2022-10-05 20:08:04 +11:00
|
|
|
|
|
|
|
class betarat_gen(stats.rv_continuous):
|
|
|
|
"""
|
|
|
|
Ratio of 2 independent beta-distributed variables
|
|
|
|
Ratio of Beta(a1, b1) / Beta(a2, b2)
|
|
|
|
|
|
|
|
References:
|
|
|
|
1. Pham-Gia T. Distributions of the ratios of independent beta variables and applications. Communications in Statistics: Theory and Methods. 2000;29(12):2693–715. doi: 10.1080/03610920008832632
|
|
|
|
2. Weekend Editor. On the ratio of Beta-distributed random variables. Some Weekend Reading. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/
|
|
|
|
"""
|
|
|
|
|
2022-10-05 21:50:16 +11:00
|
|
|
def from_scipy(self, beta1, beta2):
|
|
|
|
"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
|
|
|
|
return self(*beta1.args, *beta2.args)
|
|
|
|
|
2022-10-05 20:08:04 +11:00
|
|
|
def _do_vectorized(self, func, x, a1, b1, a2, b2):
|
|
|
|
"""Helper function to call the implementation over potentially multiple values"""
|
|
|
|
|
|
|
|
x = np.atleast_1d(x)
|
|
|
|
result = np.zeros(x.size)
|
|
|
|
for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(a1, x.size, 'edge'), np.pad(b1, x.size, 'edge'), np.pad(a2, x.size, 'edge'), np.pad(b2, x.size, 'edge'))):
|
|
|
|
result[i] = func(x_, a1_, b1_, a2_, b2_)
|
|
|
|
return result
|
|
|
|
|
|
|
|
def _pdf_one(self, w, a1, b1, a2, b2):
|
|
|
|
"""PDF for the distribution, given by Pham-Gia"""
|
|
|
|
|
|
|
|
if w <= 0:
|
|
|
|
return 0
|
|
|
|
elif w < 1:
|
|
|
|
term1 = mpmath.beta(a1 + a2, b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
|
|
|
|
term2 = mpmath.power(w, a1 - 1)
|
|
|
|
term3 = mpmath.hyp2f1(a1 + a2, 1 - b1, a1 + a2 + b2, w)
|
|
|
|
else:
|
|
|
|
term1 = mpmath.beta(a1 + a2, b1) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
|
|
|
|
term2 = 1 / mpmath.power(w, a2 + 1)
|
|
|
|
term3 = mpmath.hyp2f1(a1 + a2, 1 - b2, a1 + a2 + b1, 1/w)
|
|
|
|
|
|
|
|
return float(term1 * term2 * term3)
|
|
|
|
|
|
|
|
def _pdf(self, w, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
|
|
|
|
|
|
|
|
def _cdf_one(self, w, a1, b1, a2, b2):
|
|
|
|
"""PDF for the distribution, given by Pham-Gia"""
|
|
|
|
|
|
|
|
if w <= 0:
|
|
|
|
return 0
|
|
|
|
elif w < 1:
|
|
|
|
term1 = mpmath.beta(a1 + a2, b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
|
|
|
|
term2 = mpmath.power(w, a1) / a1
|
|
|
|
term3 = mpmath.hyper([a1, a1 + a2, 1 - b1], [a1 + 1, a1 + a2 + b2], w)
|
|
|
|
return float(term1 * term2 * term3)
|
|
|
|
else:
|
|
|
|
term1 = mpmath.beta(a1 + a2, b1) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
|
|
|
|
term2 = 1 / (a2 * mpmath.power(w, a2))
|
|
|
|
term3 = mpmath.hyper([a2, a1 + a2, 1 - b2], [a2 + 1, a1 + a2 + b1], 1/w)
|
|
|
|
return 1 - float(term1 * term2 * term3)
|
|
|
|
|
|
|
|
def _cdf(self, w, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._cdf_one, w, a1, b1, a2, b2)
|
|
|
|
|
|
|
|
def _munp_one(self, k, a1, b1, a2, b2):
|
|
|
|
"""Moments of the distribution, given by Weekend Editor"""
|
|
|
|
|
|
|
|
term1 = mpmath.rf(a1, k) / mpmath.rf(a1 + b1, k)
|
|
|
|
term2 = mpmath.rf(a2 + b2 - k, k) / mpmath.rf(a2 - k, k)
|
|
|
|
return float(term1 * term2)
|
|
|
|
|
|
|
|
def _munp(self, k, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
|
|
|
|
|
|
|
|
beta_ratio = betarat_gen(name='beta_ratio', a=0)
|
2022-10-06 18:44:22 +11:00
|
|
|
|
|
|
|
ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])
|
|
|
|
|
|
|
|
def hdi(distribution, level=0.95):
|
|
|
|
"""
|
|
|
|
Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)
|
|
|
|
"""
|
|
|
|
|
|
|
|
# For a given lower limit, we can compute the corresponding 95% interval
|
|
|
|
def interval_width(lower):
|
|
|
|
upper = distribution.ppf(distribution.cdf(lower) + level)
|
|
|
|
return upper - lower
|
|
|
|
|
|
|
|
# Find such interval which has the smallest width
|
|
|
|
# Use equal-tailed interval as initial guess
|
|
|
|
initial_guess = distribution.ppf((1-level)/2)
|
|
|
|
optimize_result = optimize.minimize(interval_width, initial_guess) # TODO: Allow customising parameters
|
|
|
|
|
|
|
|
lower_limit = optimize_result.x[0]
|
|
|
|
width = optimize_result.fun
|
|
|
|
upper_limit = lower_limit + width
|
|
|
|
|
|
|
|
return ConfidenceInterval(lower_limit, upper_limit)
|