scipy-yli/yli/distributions.py

#   scipy-yli: Helpful SciPy utilities and recipes
#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

import numpy as np
from scipy import integrate, optimize, stats

from .config import config
from .utils import ConfidenceInterval

class betarat_gen(stats.rv_continuous):
	"""
	Ratio of 2 independent beta-distributed variables
	
	This is a SciPy *stats.rv_continuous* distribution which takes 4 parameters, *a1*, *b1*, *a2* and *b2*,
	and gives the distribution of Beta(*a1*, *b1*) / Beta(*a2*, *b2*).
	
	**References:**
	
	1. Pham-Gia T. Distributions of the ratios of independent beta variables and applications. *Communications in Statistics: Theory and Methods*. 2000;29(12):2693–715. `doi: 10.1080/03610920008832632 <https://doi.org/10.1080/03610920008832632>`_
	
	2. Weekend Editor. On the ratio of Beta-distributed random variables. *Some Weekend Reading*. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/
	"""
	
	# TODO: _rvs based on stats.beta
	
	def from_scipy(self, beta1, beta2):
		"""
		Construct a new *beta_ratio* distribution from two SciPy beta distributions
		
		:param beta1: Distribution for the numerator of the ratio
		:type beta1: frozen SciPy stats.beta
		:param beta2: Distribution for the denominator of the ratio
		:type beta2: frozen SciPy stats.beta
		
		:rtype: Frozen *beta_ratio* distribution
		"""
		
		return self(*beta1.args, *beta2.args)
	
	def _do_vectorized(self, func, x, a1, b1, a2, b2):
		"""Helper function to call the implementation over potentially multiple values"""
		
		x = np.atleast_1d(x)
		result = np.zeros(x.size)
		for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(a1, x.size, 'edge'), np.pad(b1, x.size, 'edge'), np.pad(a2, x.size, 'edge'), np.pad(b2, x.size, 'edge'))):
			result[i] = func(x_, a1_, b1_, a2_, b2_)
		return result
	
	def _pdf_one(self, w, a1, b1, a2, b2):
		"""PDF for the distribution, given by Pham-Gia"""
		
		from mpmath import beta, hyp2f1, power
		
		if w <= 0:
			return 0
		elif w < 1:
			term1 = beta(a1 + a2, b2) / (beta(a1, b1) * beta(a2, b2))
			term2 = power(w, a1 - 1)
			term3 = hyp2f1(a1 + a2, 1 - b1, a1 + a2 + b2, w)
		else:
			term1 = beta(a1 + a2, b1) / (beta(a1, b1) * beta(a2, b2))
			term2 = 1 / power(w, a2 + 1)
			term3 = hyp2f1(a1 + a2, 1 - b2, a1 + a2 + b1, 1/w)
		
		return float(term1 * term2 * term3)
	
	def _pdf(self, w, a1, b1, a2, b2):
		return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
	
	def _cdf_one(self, w, a1, b1, a2, b2):
		"""PDF for the distribution, given by Pham-Gia"""
		
		from mpmath import beta, hyper, power
		
		if w <= 0:
			return 0
		elif w < 1:
			term1 = beta(a1 + a2, b2) / (beta(a1, b1) * beta(a2, b2))
			term2 = power(w, a1) / a1
			term3 = hyper([a1, a1 + a2, 1 - b1], [a1 + 1, a1 + a2 + b2], w)
			return float(term1 * term2 * term3)
		else:
			term1 = beta(a1 + a2, b1) / (beta(a1, b1) * beta(a2, b2))
			term2 = 1 / (a2 * power(w, a2))
			term3 = hyper([a2, a1 + a2, 1 - b2], [a2 + 1, a1 + a2 + b1], 1/w)
			return 1 - float(term1 * term2 * term3)
	
	def _cdf(self, w, a1, b1, a2, b2):
		return self._do_vectorized(self._cdf_one, w, a1, b1, a2, b2)
	
	def _munp_one(self, k, a1, b1, a2, b2):
		"""Moments of the distribution, given by Weekend Editor"""
		
		from mpmath import rf
		
		term1 = rf(a1, k) * rf(a2 + b2 - k, k)
		term2 = rf(a1 + b1, k) * rf(a2 - k, k)
		return float(term1 / term2)
	
	def _munp(self, k, a1, b1, a2, b2):
		return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)

beta_ratio = betarat_gen(name='beta_ratio', a=0)  # a = lower bound of support

class betaoddsrat_gen(stats.rv_continuous):
	"""
	Ratio of the odds of 2 independent beta-distributed variables
	
	This is a SciPy *stats.rv_continuous* distribution which takes 4 parameters, *a1*, *b1*, *a2* and *b2*,
	and gives the distribution of (*X*/(1-*X*)) / (*Y*/(1-*Y*)), where *X* ~ Beta(*a1*, *b1*), and *Y* ~ Beta(*a2*, *b2*).
	
	**Reference:** Hora SC, Kelley GD. Bayesian inference on the odds and risk ratios. *Communications in Statistics: Theory and Methods*. 1983;12(6):725–38. `doi: 10.1080/03610928308828491 <https://doi.org/10.1080/03610928308828491>`_
	"""
	
	# TODO: _rvs based on stats.beta
	
	def from_scipy(self, beta1, beta2):
		"""
		Construct a new *beta_oddsratio* distribution from two SciPy beta distributions
		
		:param beta1: Distribution for the numerator of the ratio
		:type beta1: frozen SciPy stats.beta
		:param beta2: Distribution for the denominator of the ratio
		:type beta2: frozen SciPy stats.beta
		
		:rtype: Frozen *beta_oddsratio* distribution
		"""
		
		return self(*beta1.args, *beta2.args)
	
	def set_cdf_terms(self, cdf_terms):
		"""
		Set the number of terms to use when calculating the CDF
		
		If *cdf_terms* = *np.inf* (default), the CDF will be computed by numerical integration of the PDF.
		
		Otherwise, the CDF will be computed by truncating the infinite sum given by Hora & Kelley to the specified number of terms.
		"""
		
		BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms
	
	def _do_vectorized(self, func, x, a1, b1, a2, b2):
		"""Helper function to call the implementation over potentially multiple values"""
		
		x = np.atleast_1d(x)
		result = np.zeros(x.size)
		for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(np.atleast_1d(a1), x.size, 'edge'), np.pad(np.atleast_1d(b1), x.size, 'edge'), np.pad(np.atleast_1d(a2), x.size, 'edge'), np.pad(np.atleast_1d(b2), x.size, 'edge'))):
			result[i] = func(x_, a1_, b1_, a2_, b2_)
		return result
	
	def _pdf_one(self, w, a1, b1, a2, b2):
		"""PDF for the distribution, given by Hora & Kelley"""
		
		from mpmath import beta, hyp2f1, power
		
		if w <= 0:
			return 0
		elif w < 1:
			term1 = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))
			term2 = power(w, a1 - 1)
			term3 = hyp2f1(a1 + b1, a1 + a2, a1 + a2 + b1 + b2, 1 - w)
		else:
			term1 = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))
			term2 = power(w, -a2 - 1)
			term3 = hyp2f1(a2 + b2, a1 + a2, a1 + a2 + b1 + b2, 1 - power(w, -1))
		
		return float(term1 * term2 * term3)
	
	def _pdf(self, w, a1, b1, a2, b2):
		return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
	
	def _cdf_one_infsum(self, w, a1, b1, a2, b2):
		"""CDF for the distribution, by truncating infinite sum given by Hora & Kelly"""
		
		from mpmath import beta, betainc, factorial, power, rf
		
		if w <= 0:
			return 0
		elif w < 1:
			k = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))

			inf_sum = 0
			for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
				kj1 = rf(a1 + b1, j) * rf(a1 + a2, j)
				kj2 = rf(a1 + a2 + b1 + b2, j) * factorial(j)
				inf_sum += kj1/kj2 * betainc(a1, j + 1, 0, w)
			
			return k * inf_sum
		else:
			k = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))
			
			inf_sum = 0
			for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
				kj1 = rf(a2 + b2, j) * rf(a1 + a2, j)
				kj2 = rf(a1 + a2 + b1 + b2, j) * factorial(j)
				inf_sum += kj1/kj2 * betainc(a2, j + 1, 0, power(w, -1))
			
			return 1 - k * inf_sum
	
	def _cdf(self, w, a1, b1, a2, b2):
		"""CDF for the distribution"""
		
		w = np.atleast_1d(w)
		a1 = np.atleast_1d(a1)
		b1 = np.atleast_1d(b1)
		a2 = np.atleast_1d(a2)
		b2 = np.atleast_1d(b2)
		
		if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()):
			raise ValueError('Cannot compute CDF from different distributions')
		
		if BETA_ODDSRATIO_CDF_TERMS[0] == np.inf:
			# Exact solution requested
			if w.size == 1:
				if w <= 0:
					return 0
				
				# Just compute an integral
				if w < self.mean(a1, b1, a2, b2):
					# Integrate normally
					return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0]
				else:
					# Integrate on the distribution of 1/w (much faster)
					return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0]
			else:
				# Multiple points requested: use ODE solver
				solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)
				return solution.y
		else:
			# Truncate infinite sum
			return self._do_vectorized(self._cdf_one_infsum, w, a1, b1, a2, b2)
	
	def _ppf_one(self, p, a1, b1, a2, b2):
		"""PPF for the distribution, using Newton's method"""
		
		# Default SciPy implementation uses Brent's method
		# This one is a bit faster
		
		if p <= 0:
			return 0
		
		# Initial guess based on log-normal approximation
		mean = self.mean(a1, b1, a2, b2)
		se_log = self.std(a1, b1, a2, b2)/mean
		initial_guess = np.exp(np.log(mean) + stats.norm.ppf(p) * se_log)
		
		try:
			return optimize.newton(lambda x: self.cdf(x, a1, b1, a2, b2) - p, x0=initial_guess, fprime=lambda x: self.pdf(x, a1, b1, a2, b2))
		except RuntimeError:
			# Failed to converge with Newton's method :(
			# Use Brent's method instead
			return super()._ppf(p, a1, b1, a2, b2)
	
	def _ppf(self, p, a1, b1, a2, b2):
		return self._do_vectorized(self._ppf_one, p, a1, b1, a2, b2)
	
	def _munp_one(self, k, a1, b1, a2, b2):
		"""Moments of the distribution, given by Hora & Kelley"""
		
		from mpmath import rf
		
		term1 = rf(a1, k) * rf(b2, k)
		term2 = rf(b1 - k, k) * rf(a2 - k, k)
		return float(term1 / term2)
	
	def _munp(self, k, a1, b1, a2, b2):
		return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)

# See beta_oddsratio.cdf
BETA_ODDSRATIO_CDF_TERMS = [100]

beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0)  # a = lower bound of support

class transformed_gen(stats.rv_continuous):
	"""
	Represents a transformation, *Y*, of a "base" random variable, *X*
	
	This is a SciPy *stats.rv_continuous* distribution which takes parameters as described below.
	
	The transformation is *f*\ (*Y*) = *X*.
	Hence *Y*.pdf\ (*x*) = *X*.pdf\ (*f*\ (*x*)) \* *f*'\ (*x*).
	
	For example, if *X* is a model parameter, then ``transformed_dist(X, f=np.exp, fprime=np.exp, finv=np.log)`` is the distribution of the log-parameter.
	
	**Parameters:**
	
	* **base** (*frozen SciPy distribution*) – Distribution of the base random variable
	
	* **f** (*callable*) – Function *f* representing the transformation, which takes values of *Y* to values of *X*
	
	* **fprime** (*callable*) – Derivative of the function *f*
	
	* **finv** (*callable*) – Inverse of the function *f*, which takes values of *X* to values of *Y*
	"""
	
	def _pdf(self, x, base, f, fprime, finv):
		return base.pdf(np.vectorize(f)(x)) * np.vectorize(fprime)(x)
	
	def _cdf(self, x, base, f, fprime, finv):
		return base.cdf(np.vectorize(f)(x))
	
	def _ppf(self, p, base, f, fprime, finv):
		return np.vectorize(finv)(base.ppf(p))
	
	# Call implementations directly to bypass SciPy args checking, which chokes on "base" parameter
	
	def pdf(self, x, base, f, fprime, finv):
		if np.isscalar(x):
			return np.atleast_1d(self._pdf(x, base, f, fprime, finv))[0]
		return self._pdf(x, base, f, fprime, finv)
	
	def cdf(self, x, base, f, fprime, finv):
		if np.isscalar(x):
			return np.atleast_1d(self._cdf(x, base, f, fprime, finv))[0]
		return self._cdf(x, base, f, fprime, finv)
	
	def ppf(self, x, base, f, fprime, finv):
		if np.isscalar(x):
			return np.atleast_1d(self._ppf(x, base, f, fprime, finv))[0]
		return self._ppf(x, base, f, fprime, finv)

transformed_dist = transformed_gen(name='transformed')

def hdi(distribution, level=None):
	"""
	Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)
	
	:param distribution: Distribution to compute the interval for
	:type distribution: frozen SciPy distribution
	:param level: Coverage/confidence probability, default (*None*) is 1 − *config.alpha*
	:type level: float
	
	:rtype: :class:`yli.utils.ConfidenceInterval`
	"""
	
	if level is None:
		level = 1 - config.alpha
	
	# For a given lower limit, we can compute the corresponding level*100% interval
	def interval_width(lower):
		upper = distribution.ppf(distribution.cdf(lower) + level)
		return upper - lower
	
	# Find such interval which has the smallest width
	# Use equal-tailed interval as initial guess
	initial_guess = distribution.ppf((1-level)/2)
	optimize_result = optimize.minimize(interval_width, initial_guess)  # TODO: Allow customising parameters
	
	lower_limit = optimize_result.x[0]
	width = optimize_result.fun
	upper_limit = lower_limit + width
	
	return ConfidenceInterval(lower_limit, upper_limit)
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`# scipy-yli: Helpful SciPy utilities and recipes`
			`# Copyright © 2022 Lee Yingtong Li (RunasSudo)`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU Affero General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Affero General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Affero General Public License`
			`# along with this program. If not, see <https://www.gnu.org/licenses/>.`

			`import numpy as np`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`from scipy import integrate, optimize, stats`
Implement hdi 2022-10-06 18:44:22 +11:00
Observe config.alpha for significance tests and regression 2022-10-15 23:24:29 +11:00			`from .config import config`
Move ConfidenceInterval to yli.utils 2022-10-18 17:57:19 +11:00			`from .utils import ConfidenceInterval`
Observe config.alpha for significance tests and regression 2022-10-15 23:24:29 +11:00
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`class betarat_gen(stats.rv_continuous):`
			`"""`
			`Ratio of 2 independent beta-distributed variables`

Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`This is a SciPy stats.rv_continuous distribution which takes 4 parameters, a1, b1, a2 and b2,`
			`and gives the distribution of Beta(a1, b1) / Beta(a2, b2).`

			`References:`

			1. Pham-Gia T. Distributions of the ratios of independent beta variables and applications. Communications in Statistics: Theory and Methods. 2000;29(12):2693–715. `doi: 10.1080/03610920008832632 <https://doi.org/10.1080/03610920008832632>`_

			`2. Weekend Editor. On the ratio of Beta-distributed random variables. Some Weekend Reading. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/`
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`"""`

Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`# TODO: _rvs based on stats.beta`

Implement beta_ratio.from_scipy 2022-10-05 21:50:16 +11:00			`def from_scipy(self, beta1, beta2):`
Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`"""`
			`Construct a new beta_ratio distribution from two SciPy beta distributions`

			`:param beta1: Distribution for the numerator of the ratio`
			`:type beta1: frozen SciPy stats.beta`
			`:param beta2: Distribution for the denominator of the ratio`
			`:type beta2: frozen SciPy stats.beta`

			`:rtype: Frozen beta_ratio distribution`
			`"""`

Implement beta_ratio.from_scipy 2022-10-05 21:50:16 +11:00			`return self(beta1.args, beta2.args)`

Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`def _do_vectorized(self, func, x, a1, b1, a2, b2):`
			`"""Helper function to call the implementation over potentially multiple values"""`

			`x = np.atleast_1d(x)`
			`result = np.zeros(x.size)`
			`for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(a1, x.size, 'edge'), np.pad(b1, x.size, 'edge'), np.pad(a2, x.size, 'edge'), np.pad(b2, x.size, 'edge'))):`
			`result[i] = func(x_, a1_, b1_, a2_, b2_)`
			`return result`

			`def _pdf_one(self, w, a1, b1, a2, b2):`
			`"""PDF for the distribution, given by Pham-Gia"""`

Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`from mpmath import beta, hyp2f1, power`

Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`if w <= 0:`
			`return 0`
			`elif w < 1:`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`term1 = beta(a1 + a2, b2) / (beta(a1, b1) * beta(a2, b2))`
			`term2 = power(w, a1 - 1)`
			`term3 = hyp2f1(a1 + a2, 1 - b1, a1 + a2 + b2, w)`
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`else:`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`term1 = beta(a1 + a2, b1) / (beta(a1, b1) * beta(a2, b2))`
			`term2 = 1 / power(w, a2 + 1)`
			`term3 = hyp2f1(a1 + a2, 1 - b2, a1 + a2 + b1, 1/w)`
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00
			`return float(term1 * term2 * term3)`

			`def _pdf(self, w, a1, b1, a2, b2):`
			`return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)`

			`def _cdf_one(self, w, a1, b1, a2, b2):`
			`"""PDF for the distribution, given by Pham-Gia"""`

Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`from mpmath import beta, hyper, power`

Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`if w <= 0:`
			`return 0`
			`elif w < 1:`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`term1 = beta(a1 + a2, b2) / (beta(a1, b1) * beta(a2, b2))`
			`term2 = power(w, a1) / a1`
			`term3 = hyper([a1, a1 + a2, 1 - b1], [a1 + 1, a1 + a2 + b2], w)`
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`return float(term1 * term2 * term3)`
			`else:`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`term1 = beta(a1 + a2, b1) / (beta(a1, b1) * beta(a2, b2))`
			`term2 = 1 / (a2 * power(w, a2))`
			`term3 = hyper([a2, a1 + a2, 1 - b2], [a2 + 1, a1 + a2 + b1], 1/w)`
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00			`return 1 - float(term1 * term2 * term3)`

			`def _cdf(self, w, a1, b1, a2, b2):`
			`return self._do_vectorized(self._cdf_one, w, a1, b1, a2, b2)`

			`def _munp_one(self, k, a1, b1, a2, b2):`
			`"""Moments of the distribution, given by Weekend Editor"""`

Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`from mpmath import rf`

			`term1 = rf(a1, k) * rf(a2 + b2 - k, k)`
			`term2 = rf(a1 + b1, k) * rf(a2 - k, k)`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`return float(term1 / term2)`

			`def _munp(self, k, a1, b1, a2, b2):`
			`return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)`

			`beta_ratio = betarat_gen(name='beta_ratio', a=0) # a = lower bound of support`

			`class betaoddsrat_gen(stats.rv_continuous):`
			`"""`
			`Ratio of the odds of 2 independent beta-distributed variables`

Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`This is a SciPy stats.rv_continuous distribution which takes 4 parameters, a1, b1, a2 and b2,`
			`and gives the distribution of (X/(1-X)) / (Y/(1-Y)), where X ~ Beta(a1, b1), and Y ~ Beta(a2, b2).`

			Reference: Hora SC, Kelley GD. Bayesian inference on the odds and risk ratios. Communications in Statistics: Theory and Methods. 1983;12(6):725–38. `doi: 10.1080/03610928308828491 <https://doi.org/10.1080/03610928308828491>`_
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`"""`

			`# TODO: _rvs based on stats.beta`

			`def from_scipy(self, beta1, beta2):`
Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`"""`
			`Construct a new beta_oddsratio distribution from two SciPy beta distributions`

			`:param beta1: Distribution for the numerator of the ratio`
			`:type beta1: frozen SciPy stats.beta`
			`:param beta2: Distribution for the denominator of the ratio`
			`:type beta2: frozen SciPy stats.beta`

			`:rtype: Frozen beta_oddsratio distribution`
			`"""`

Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`return self(beta1.args, beta2.args)`

Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00			`def set_cdf_terms(self, cdf_terms):`
Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`"""`
			`Set the number of terms to use when calculating the CDF`

			`If cdf_terms = np.inf (default), the CDF will be computed by numerical integration of the PDF.`

			`Otherwise, the CDF will be computed by truncating the infinite sum given by Hora & Kelley to the specified number of terms.`
			`"""`

Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00			`BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms`

Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`def _do_vectorized(self, func, x, a1, b1, a2, b2):`
			`"""Helper function to call the implementation over potentially multiple values"""`

			`x = np.atleast_1d(x)`
			`result = np.zeros(x.size)`
			`for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(np.atleast_1d(a1), x.size, 'edge'), np.pad(np.atleast_1d(b1), x.size, 'edge'), np.pad(np.atleast_1d(a2), x.size, 'edge'), np.pad(np.atleast_1d(b2), x.size, 'edge'))):`
			`result[i] = func(x_, a1_, b1_, a2_, b2_)`
			`return result`

			`def _pdf_one(self, w, a1, b1, a2, b2):`
			`"""PDF for the distribution, given by Hora & Kelley"""`

Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`from mpmath import beta, hyp2f1, power`

Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`if w <= 0:`
			`return 0`
			`elif w < 1:`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`term1 = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))`
			`term2 = power(w, a1 - 1)`
			`term3 = hyp2f1(a1 + b1, a1 + a2, a1 + a2 + b1 + b2, 1 - w)`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`else:`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`term1 = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))`
			`term2 = power(w, -a2 - 1)`
			`term3 = hyp2f1(a2 + b2, a1 + a2, a1 + a2 + b1 + b2, 1 - power(w, -1))`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00
			`return float(term1 * term2 * term3)`

			`def _pdf(self, w, a1, b1, a2, b2):`
			`return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)`

Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00			`def _cdf_one_infsum(self, w, a1, b1, a2, b2):`
			`"""CDF for the distribution, by truncating infinite sum given by Hora & Kelly"""`

Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`from mpmath import beta, betainc, factorial, power, rf`

Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00			`if w <= 0:`
			`return 0`
			`elif w < 1:`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`k = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))`
Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00
			`inf_sum = 0`
			`for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`kj1 = rf(a1 + b1, j) * rf(a1 + a2, j)`
			`kj2 = rf(a1 + a2 + b1 + b2, j) * factorial(j)`
			`inf_sum += kj1/kj2 * betainc(a1, j + 1, 0, w)`
Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00
			`return k * inf_sum`
			`else:`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`k = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))`
Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00
			`inf_sum = 0`
			`for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):`
Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`kj1 = rf(a2 + b2, j) * rf(a1 + a2, j)`
			`kj2 = rf(a1 + a2 + b1 + b2, j) * factorial(j)`
			`inf_sum += kj1/kj2 * betainc(a2, j + 1, 0, power(w, -1))`
Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00
			`return 1 - k * inf_sum`

Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`def _cdf(self, w, a1, b1, a2, b2):`
Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`"""CDF for the distribution"""`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00
			`w = np.atleast_1d(w)`
			`a1 = np.atleast_1d(a1)`
			`b1 = np.atleast_1d(b1)`
			`a2 = np.atleast_1d(a2)`
			`b2 = np.atleast_1d(b2)`

			`if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()):`
			`raise ValueError('Cannot compute CDF from different distributions')`

Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00			`if BETA_ODDSRATIO_CDF_TERMS[0] == np.inf:`
			`# Exact solution requested`
			`if w.size == 1:`
			`if w <= 0:`
			`return 0`

			`# Just compute an integral`
			`if w < self.mean(a1, b1, a2, b2):`
			`# Integrate normally`
			`return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0]`
			`else:`
			`# Integrate on the distribution of 1/w (much faster)`
			`return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0]`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`else:`
Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00			`# Multiple points requested: use ODE solver`
			`solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)`
			`return solution.y`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`else:`
Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00			`# Truncate infinite sum`
			`return self._do_vectorized(self._cdf_one_infsum, w, a1, b1, a2, b2)`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00
			`def _ppf_one(self, p, a1, b1, a2, b2):`
			`"""PPF for the distribution, using Newton's method"""`

			`# Default SciPy implementation uses Brent's method`
			`# This one is a bit faster`

			`if p <= 0:`
			`return 0`

			`# Initial guess based on log-normal approximation`
			`mean = self.mean(a1, b1, a2, b2)`
			`se_log = self.std(a1, b1, a2, b2)/mean`
			`initial_guess = np.exp(np.log(mean) + stats.norm.ppf(p) * se_log)`

			`try:`
			`return optimize.newton(lambda x: self.cdf(x, a1, b1, a2, b2) - p, x0=initial_guess, fprime=lambda x: self.pdf(x, a1, b1, a2, b2))`
			`except RuntimeError:`
			`# Failed to converge with Newton's method :(`
			`# Use Brent's method instead`
			`return super()._ppf(p, a1, b1, a2, b2)`

			`def _ppf(self, p, a1, b1, a2, b2):`
			`return self._do_vectorized(self._ppf_one, p, a1, b1, a2, b2)`

			`def _munp_one(self, k, a1, b1, a2, b2):`
			`"""Moments of the distribution, given by Hora & Kelley"""`

Make mpmath optional dependency 2022-10-17 20:35:04 +11:00			`from mpmath import rf`

			`term1 = rf(a1, k) * rf(b2, k)`
			`term2 = rf(b1 - k, k) * rf(a2 - k, k)`
Implement beta_oddsratio 2022-10-07 16:52:51 +11:00			`return float(term1 / term2)`
Publish beta ratio distribution 2022-10-05 20:08:04 +11:00
			`def _munp(self, k, a1, b1, a2, b2):`
			`return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)`

Implement truncated sum for calculating beta_oddsratio.cdf 2022-10-07 22:12:55 +11:00			`# See beta_oddsratio.cdf`
			`BETA_ODDSRATIO_CDF_TERMS = [100]`

Implement transformed_dist 2022-10-09 12:54:23 +11:00			`beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0) # a = lower bound of support`

			`class transformed_gen(stats.rv_continuous):`
			`"""`
Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`Represents a transformation, Y, of a "base" random variable, X`

			`This is a SciPy stats.rv_continuous distribution which takes parameters as described below.`
Implement transformed_dist 2022-10-09 12:54:23 +11:00
Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`The transformation is f\ (Y) = X.`
			`Hence Y.pdf\ (x) = X.pdf\ (f\ (x)) \* f'\ (x).`

			For example, if X is a model parameter, then ``transformed_dist(X, f=np.exp, fprime=np.exp, finv=np.log)`` is the distribution of the log-parameter.

			`Parameters:`

			`* base (frozen SciPy distribution) – Distribution of the base random variable`

			`* f (callable) – Function f representing the transformation, which takes values of Y to values of X`

			`* fprime (callable) – Derivative of the function f`

			`* finv (callable) – Inverse of the function f, which takes values of X to values of Y`
Implement transformed_dist 2022-10-09 12:54:23 +11:00			`"""`

			`def _pdf(self, x, base, f, fprime, finv):`
			`return base.pdf(np.vectorize(f)(x)) * np.vectorize(fprime)(x)`

			`def _cdf(self, x, base, f, fprime, finv):`
			`return base.cdf(np.vectorize(f)(x))`

			`def _ppf(self, p, base, f, fprime, finv):`
			`return np.vectorize(finv)(base.ppf(p))`

			`# Call implementations directly to bypass SciPy args checking, which chokes on "base" parameter`

			`def pdf(self, x, base, f, fprime, finv):`
			`if np.isscalar(x):`
			`return np.atleast_1d(self._pdf(x, base, f, fprime, finv))[0]`
			`return self._pdf(x, base, f, fprime, finv)`

			`def cdf(self, x, base, f, fprime, finv):`
			`if np.isscalar(x):`
			`return np.atleast_1d(self._cdf(x, base, f, fprime, finv))[0]`
			`return self._cdf(x, base, f, fprime, finv)`

			`def ppf(self, x, base, f, fprime, finv):`
			`if np.isscalar(x):`
			`return np.atleast_1d(self._ppf(x, base, f, fprime, finv))[0]`
			`return self._ppf(x, base, f, fprime, finv)`

			`transformed_dist = transformed_gen(name='transformed')`
Implement hdi 2022-10-06 18:44:22 +11:00
Observe config.alpha for significance tests and regression 2022-10-15 23:24:29 +11:00			`def hdi(distribution, level=None):`
			`"""`
			`Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)`

Write Sphinx documentation 2022-10-17 21:41:19 +11:00			`:param distribution: Distribution to compute the interval for`
			`:type distribution: frozen SciPy distribution`
			`:param level: Coverage/confidence probability, default (None) is 1 − config.alpha`
			`:type level: float`

			:rtype: :class:`yli.utils.ConfidenceInterval`
Observe config.alpha for significance tests and regression 2022-10-15 23:24:29 +11:00			`"""`

			`if level is None:`
			`level = 1 - config.alpha`
Implement hdi 2022-10-06 18:44:22 +11:00
Observe config.alpha for significance tests and regression 2022-10-15 23:24:29 +11:00			`# For a given lower limit, we can compute the corresponding level*100% interval`
Implement hdi 2022-10-06 18:44:22 +11:00			`def interval_width(lower):`
			`upper = distribution.ppf(distribution.cdf(lower) + level)`
			`return upper - lower`

			`# Find such interval which has the smallest width`
			`# Use equal-tailed interval as initial guess`
			`initial_guess = distribution.ppf((1-level)/2)`
			`optimize_result = optimize.minimize(interval_width, initial_guess) # TODO: Allow customising parameters`

			`lower_limit = optimize_result.x[0]`
			`width = optimize_result.fun`
			`upper_limit = lower_limit + width`

			`return ConfidenceInterval(lower_limit, upper_limit)`