scipy-yli/yli/distributions.py

#   scipy-yli: Helpful SciPy utilities and recipes
#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

import mpmath
import numpy as np
from scipy import optimize, stats

import collections

class betarat_gen(stats.rv_continuous):
	"""
	Ratio of 2 independent beta-distributed variables
	Ratio of Beta(a1, b1) / Beta(a2, b2)
	
	References:
	1. Pham-Gia T. Distributions of the ratios of independent beta variables and applications. Communications in Statistics: Theory and Methods. 2000;29(12):2693–715. doi: 10.1080/03610920008832632
	2. Weekend Editor. On the ratio of Beta-distributed random variables. Some Weekend Reading. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/
	"""
	
	def from_scipy(self, beta1, beta2):
		"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
		return self(*beta1.args, *beta2.args)
	
	def _do_vectorized(self, func, x, a1, b1, a2, b2):
		"""Helper function to call the implementation over potentially multiple values"""
		
		x = np.atleast_1d(x)
		result = np.zeros(x.size)
		for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(a1, x.size, 'edge'), np.pad(b1, x.size, 'edge'), np.pad(a2, x.size, 'edge'), np.pad(b2, x.size, 'edge'))):
			result[i] = func(x_, a1_, b1_, a2_, b2_)
		return result
	
	def _pdf_one(self, w, a1, b1, a2, b2):
		"""PDF for the distribution, given by Pham-Gia"""
		
		if w <= 0:
			return 0
		elif w < 1:
			term1 = mpmath.beta(a1 + a2, b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
			term2 = mpmath.power(w, a1 - 1)
			term3 = mpmath.hyp2f1(a1 + a2, 1 - b1, a1 + a2 + b2, w)
		else:
			term1 = mpmath.beta(a1 + a2, b1) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
			term2 = 1 / mpmath.power(w, a2 + 1)
			term3 = mpmath.hyp2f1(a1 + a2, 1 - b2, a1 + a2 + b1, 1/w)
		
		return float(term1 * term2 * term3)
	
	def _pdf(self, w, a1, b1, a2, b2):
		return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
	
	def _cdf_one(self, w, a1, b1, a2, b2):
		"""PDF for the distribution, given by Pham-Gia"""
		
		if w <= 0:
			return 0
		elif w < 1:
			term1 = mpmath.beta(a1 + a2, b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
			term2 = mpmath.power(w, a1) / a1
			term3 = mpmath.hyper([a1, a1 + a2, 1 - b1], [a1 + 1, a1 + a2 + b2], w)
			return float(term1 * term2 * term3)
		else:
			term1 = mpmath.beta(a1 + a2, b1) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
			term2 = 1 / (a2 * mpmath.power(w, a2))
			term3 = mpmath.hyper([a2, a1 + a2, 1 - b2], [a2 + 1, a1 + a2 + b1], 1/w)
			return 1 - float(term1 * term2 * term3)
	
	def _cdf(self, w, a1, b1, a2, b2):
		return self._do_vectorized(self._cdf_one, w, a1, b1, a2, b2)
	
	def _munp_one(self, k, a1, b1, a2, b2):
		"""Moments of the distribution, given by Weekend Editor"""
		
		term1 = mpmath.rf(a1, k) / mpmath.rf(a1 + b1, k)
		term2 = mpmath.rf(a2 + b2 - k, k) / mpmath.rf(a2 - k, k)
		return float(term1 * term2)
	
	def _munp(self, k, a1, b1, a2, b2):
		return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)

beta_ratio = betarat_gen(name='beta_ratio', a=0)

ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])

def hdi(distribution, level=0.95):
	"""
	Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)
	"""
	
	# For a given lower limit, we can compute the corresponding 95% interval
	def interval_width(lower):
		upper = distribution.ppf(distribution.cdf(lower) + level)
		return upper - lower
	
	# Find such interval which has the smallest width
	# Use equal-tailed interval as initial guess
	initial_guess = distribution.ppf((1-level)/2)
	optimize_result = optimize.minimize(interval_width, initial_guess)  # TODO: Allow customising parameters
	
	lower_limit = optimize_result.x[0]
	width = optimize_result.fun
	upper_limit = lower_limit + width
	
	return ConfidenceInterval(lower_limit, upper_limit)
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`# scipy-yli: Helpful SciPy utilities and recipes`
			`# Copyright © 2022 Lee Yingtong Li (RunasSudo)`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU Affero General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Affero General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Affero General Public License`
			`# along with this program. If not, see <https://www.gnu.org/licenses/>.`

			`import mpmath`
			`import numpy as np`
Implement hdi 2022-10-06 18:44:22 +11:00			`from scipy import optimize, stats`

			`import collections`
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00
			`class betarat_gen(stats.rv_continuous):`
			`"""`
			`Ratio of 2 independent beta-distributed variables`
			`Ratio of Beta(a1, b1) / Beta(a2, b2)`

			`References:`
			`1. Pham-Gia T. Distributions of the ratios of independent beta variables and applications. Communications in Statistics: Theory and Methods. 2000;29(12):2693–715. doi: 10.1080/03610920008832632`
			`2. Weekend Editor. On the ratio of Beta-distributed random variables. Some Weekend Reading. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/`
			`"""`

Implement beta_ratio.from_scipy 2022-10-05 21:50:16 +11:00			`def from_scipy(self, beta1, beta2):`
			`"""Construct a new beta_ratio distribution from two SciPy beta distributions"""`
			`return self(beta1.args, beta2.args)`

Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`def _do_vectorized(self, func, x, a1, b1, a2, b2):`
			`"""Helper function to call the implementation over potentially multiple values"""`

			`x = np.atleast_1d(x)`
			`result = np.zeros(x.size)`
			`for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(a1, x.size, 'edge'), np.pad(b1, x.size, 'edge'), np.pad(a2, x.size, 'edge'), np.pad(b2, x.size, 'edge'))):`
			`result[i] = func(x_, a1_, b1_, a2_, b2_)`
			`return result`

			`def _pdf_one(self, w, a1, b1, a2, b2):`
			`"""PDF for the distribution, given by Pham-Gia"""`

			`if w <= 0:`
			`return 0`
			`elif w < 1:`
			`term1 = mpmath.beta(a1 + a2, b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))`
			`term2 = mpmath.power(w, a1 - 1)`
			`term3 = mpmath.hyp2f1(a1 + a2, 1 - b1, a1 + a2 + b2, w)`
			`else:`
			`term1 = mpmath.beta(a1 + a2, b1) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))`
			`term2 = 1 / mpmath.power(w, a2 + 1)`
			`term3 = mpmath.hyp2f1(a1 + a2, 1 - b2, a1 + a2 + b1, 1/w)`

			`return float(term1 * term2 * term3)`

			`def _pdf(self, w, a1, b1, a2, b2):`
			`return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)`

			`def _cdf_one(self, w, a1, b1, a2, b2):`
			`"""PDF for the distribution, given by Pham-Gia"""`

			`if w <= 0:`
			`return 0`
			`elif w < 1:`
			`term1 = mpmath.beta(a1 + a2, b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))`
			`term2 = mpmath.power(w, a1) / a1`
			`term3 = mpmath.hyper([a1, a1 + a2, 1 - b1], [a1 + 1, a1 + a2 + b2], w)`
			`return float(term1 * term2 * term3)`
			`else:`
			`term1 = mpmath.beta(a1 + a2, b1) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))`
			`term2 = 1 / (a2 * mpmath.power(w, a2))`
			`term3 = mpmath.hyper([a2, a1 + a2, 1 - b2], [a2 + 1, a1 + a2 + b1], 1/w)`
			`return 1 - float(term1 * term2 * term3)`

			`def _cdf(self, w, a1, b1, a2, b2):`
			`return self._do_vectorized(self._cdf_one, w, a1, b1, a2, b2)`

			`def _munp_one(self, k, a1, b1, a2, b2):`
			`"""Moments of the distribution, given by Weekend Editor"""`

			`term1 = mpmath.rf(a1, k) / mpmath.rf(a1 + b1, k)`
			`term2 = mpmath.rf(a2 + b2 - k, k) / mpmath.rf(a2 - k, k)`
			`return float(term1 * term2)`

			`def _munp(self, k, a1, b1, a2, b2):`
			`return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)`

			`beta_ratio = betarat_gen(name='beta_ratio', a=0)`
Implement hdi 2022-10-06 18:44:22 +11:00
			`ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])`

			`def hdi(distribution, level=0.95):`
			`"""`
			`Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)`
			`"""`

			`# For a given lower limit, we can compute the corresponding 95% interval`
			`def interval_width(lower):`
			`upper = distribution.ppf(distribution.cdf(lower) + level)`
			`return upper - lower`

			`# Find such interval which has the smallest width`
			`# Use equal-tailed interval as initial guess`
			`initial_guess = distribution.ppf((1-level)/2)`
			`optimize_result = optimize.minimize(interval_width, initial_guess) # TODO: Allow customising parameters`

			`lower_limit = optimize_result.x[0]`
			`width = optimize_result.fun`
			`upper_limit = lower_limit + width`

			`return ConfidenceInterval(lower_limit, upper_limit)`