2022-10-05 20:08:04 +11:00
|
|
|
# scipy-yli: Helpful SciPy utilities and recipes
|
|
|
|
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
import numpy as np
|
2022-10-07 16:52:51 +11:00
|
|
|
from scipy import integrate, optimize, stats
|
2022-10-06 18:44:22 +11:00
|
|
|
|
2022-10-15 23:24:29 +11:00
|
|
|
from .config import config
|
2022-10-18 17:57:19 +11:00
|
|
|
from .utils import ConfidenceInterval
|
2022-10-15 23:24:29 +11:00
|
|
|
|
2022-10-05 20:08:04 +11:00
|
|
|
class betarat_gen(stats.rv_continuous):
|
|
|
|
"""
|
|
|
|
Ratio of 2 independent beta-distributed variables
|
|
|
|
|
2022-10-17 21:41:19 +11:00
|
|
|
This is a SciPy *stats.rv_continuous* distribution which takes 4 parameters, *a1*, *b1*, *a2* and *b2*,
|
|
|
|
and gives the distribution of Beta(*a1*, *b1*) / Beta(*a2*, *b2*).
|
|
|
|
|
|
|
|
**References:**
|
|
|
|
|
|
|
|
1. Pham-Gia T. Distributions of the ratios of independent beta variables and applications. *Communications in Statistics: Theory and Methods*. 2000;29(12):2693–715. `doi: 10.1080/03610920008832632 <https://doi.org/10.1080/03610920008832632>`_
|
|
|
|
|
|
|
|
2. Weekend Editor. On the ratio of Beta-distributed random variables. *Some Weekend Reading*. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/
|
2022-10-05 20:08:04 +11:00
|
|
|
"""
|
|
|
|
|
2022-10-07 16:52:51 +11:00
|
|
|
# TODO: _rvs based on stats.beta
|
|
|
|
|
2022-10-05 21:50:16 +11:00
|
|
|
def from_scipy(self, beta1, beta2):
|
2022-10-17 21:41:19 +11:00
|
|
|
"""
|
|
|
|
Construct a new *beta_ratio* distribution from two SciPy beta distributions
|
|
|
|
|
|
|
|
:param beta1: Distribution for the numerator of the ratio
|
|
|
|
:type beta1: frozen SciPy stats.beta
|
|
|
|
:param beta2: Distribution for the denominator of the ratio
|
|
|
|
:type beta2: frozen SciPy stats.beta
|
|
|
|
|
|
|
|
:rtype: Frozen *beta_ratio* distribution
|
|
|
|
"""
|
|
|
|
|
2022-10-05 21:50:16 +11:00
|
|
|
return self(*beta1.args, *beta2.args)
|
|
|
|
|
2022-10-05 20:08:04 +11:00
|
|
|
def _do_vectorized(self, func, x, a1, b1, a2, b2):
|
|
|
|
"""Helper function to call the implementation over potentially multiple values"""
|
|
|
|
|
|
|
|
x = np.atleast_1d(x)
|
|
|
|
result = np.zeros(x.size)
|
|
|
|
for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(a1, x.size, 'edge'), np.pad(b1, x.size, 'edge'), np.pad(a2, x.size, 'edge'), np.pad(b2, x.size, 'edge'))):
|
|
|
|
result[i] = func(x_, a1_, b1_, a2_, b2_)
|
|
|
|
return result
|
|
|
|
|
|
|
|
def _pdf_one(self, w, a1, b1, a2, b2):
|
|
|
|
"""PDF for the distribution, given by Pham-Gia"""
|
|
|
|
|
2022-10-17 20:35:04 +11:00
|
|
|
from mpmath import beta, hyp2f1, power
|
|
|
|
|
2022-10-05 20:08:04 +11:00
|
|
|
if w <= 0:
|
|
|
|
return 0
|
|
|
|
elif w < 1:
|
2022-10-17 20:35:04 +11:00
|
|
|
term1 = beta(a1 + a2, b2) / (beta(a1, b1) * beta(a2, b2))
|
|
|
|
term2 = power(w, a1 - 1)
|
|
|
|
term3 = hyp2f1(a1 + a2, 1 - b1, a1 + a2 + b2, w)
|
2022-10-05 20:08:04 +11:00
|
|
|
else:
|
2022-10-17 20:35:04 +11:00
|
|
|
term1 = beta(a1 + a2, b1) / (beta(a1, b1) * beta(a2, b2))
|
|
|
|
term2 = 1 / power(w, a2 + 1)
|
|
|
|
term3 = hyp2f1(a1 + a2, 1 - b2, a1 + a2 + b1, 1/w)
|
2022-10-05 20:08:04 +11:00
|
|
|
|
|
|
|
return float(term1 * term2 * term3)
|
|
|
|
|
|
|
|
def _pdf(self, w, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
|
|
|
|
|
|
|
|
def _cdf_one(self, w, a1, b1, a2, b2):
|
|
|
|
"""PDF for the distribution, given by Pham-Gia"""
|
|
|
|
|
2022-10-17 20:35:04 +11:00
|
|
|
from mpmath import beta, hyper, power
|
|
|
|
|
2022-10-05 20:08:04 +11:00
|
|
|
if w <= 0:
|
|
|
|
return 0
|
|
|
|
elif w < 1:
|
2022-10-17 20:35:04 +11:00
|
|
|
term1 = beta(a1 + a2, b2) / (beta(a1, b1) * beta(a2, b2))
|
|
|
|
term2 = power(w, a1) / a1
|
|
|
|
term3 = hyper([a1, a1 + a2, 1 - b1], [a1 + 1, a1 + a2 + b2], w)
|
2022-10-05 20:08:04 +11:00
|
|
|
return float(term1 * term2 * term3)
|
|
|
|
else:
|
2022-10-17 20:35:04 +11:00
|
|
|
term1 = beta(a1 + a2, b1) / (beta(a1, b1) * beta(a2, b2))
|
|
|
|
term2 = 1 / (a2 * power(w, a2))
|
|
|
|
term3 = hyper([a2, a1 + a2, 1 - b2], [a2 + 1, a1 + a2 + b1], 1/w)
|
2022-10-05 20:08:04 +11:00
|
|
|
return 1 - float(term1 * term2 * term3)
|
|
|
|
|
|
|
|
def _cdf(self, w, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._cdf_one, w, a1, b1, a2, b2)
|
|
|
|
|
|
|
|
def _munp_one(self, k, a1, b1, a2, b2):
|
|
|
|
"""Moments of the distribution, given by Weekend Editor"""
|
|
|
|
|
2022-10-17 20:35:04 +11:00
|
|
|
from mpmath import rf
|
|
|
|
|
|
|
|
term1 = rf(a1, k) * rf(a2 + b2 - k, k)
|
|
|
|
term2 = rf(a1 + b1, k) * rf(a2 - k, k)
|
2022-10-07 16:52:51 +11:00
|
|
|
return float(term1 / term2)
|
|
|
|
|
|
|
|
def _munp(self, k, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
|
|
|
|
|
|
|
|
beta_ratio = betarat_gen(name='beta_ratio', a=0) # a = lower bound of support
|
|
|
|
|
|
|
|
class betaoddsrat_gen(stats.rv_continuous):
|
|
|
|
"""
|
|
|
|
Ratio of the odds of 2 independent beta-distributed variables
|
|
|
|
|
2022-10-17 21:41:19 +11:00
|
|
|
This is a SciPy *stats.rv_continuous* distribution which takes 4 parameters, *a1*, *b1*, *a2* and *b2*,
|
|
|
|
and gives the distribution of (*X*/(1-*X*)) / (*Y*/(1-*Y*)), where *X* ~ Beta(*a1*, *b1*), and *Y* ~ Beta(*a2*, *b2*).
|
|
|
|
|
|
|
|
**Reference:** Hora SC, Kelley GD. Bayesian inference on the odds and risk ratios. *Communications in Statistics: Theory and Methods*. 1983;12(6):725–38. `doi: 10.1080/03610928308828491 <https://doi.org/10.1080/03610928308828491>`_
|
2022-10-07 16:52:51 +11:00
|
|
|
"""
|
|
|
|
|
|
|
|
# TODO: _rvs based on stats.beta
|
|
|
|
|
|
|
|
def from_scipy(self, beta1, beta2):
|
2022-10-17 21:41:19 +11:00
|
|
|
"""
|
|
|
|
Construct a new *beta_oddsratio* distribution from two SciPy beta distributions
|
|
|
|
|
|
|
|
:param beta1: Distribution for the numerator of the ratio
|
|
|
|
:type beta1: frozen SciPy stats.beta
|
|
|
|
:param beta2: Distribution for the denominator of the ratio
|
|
|
|
:type beta2: frozen SciPy stats.beta
|
|
|
|
|
|
|
|
:rtype: Frozen *beta_oddsratio* distribution
|
|
|
|
"""
|
|
|
|
|
2022-10-07 16:52:51 +11:00
|
|
|
return self(*beta1.args, *beta2.args)
|
|
|
|
|
2022-10-07 22:12:55 +11:00
|
|
|
def set_cdf_terms(self, cdf_terms):
|
2022-10-17 21:41:19 +11:00
|
|
|
"""
|
|
|
|
Set the number of terms to use when calculating the CDF
|
|
|
|
|
|
|
|
If *cdf_terms* = *np.inf* (default), the CDF will be computed by numerical integration of the PDF.
|
|
|
|
|
|
|
|
Otherwise, the CDF will be computed by truncating the infinite sum given by Hora & Kelley to the specified number of terms.
|
|
|
|
"""
|
|
|
|
|
2022-10-07 22:12:55 +11:00
|
|
|
BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms
|
|
|
|
|
2022-10-07 16:52:51 +11:00
|
|
|
def _do_vectorized(self, func, x, a1, b1, a2, b2):
|
|
|
|
"""Helper function to call the implementation over potentially multiple values"""
|
|
|
|
|
|
|
|
x = np.atleast_1d(x)
|
|
|
|
result = np.zeros(x.size)
|
|
|
|
for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(np.atleast_1d(a1), x.size, 'edge'), np.pad(np.atleast_1d(b1), x.size, 'edge'), np.pad(np.atleast_1d(a2), x.size, 'edge'), np.pad(np.atleast_1d(b2), x.size, 'edge'))):
|
|
|
|
result[i] = func(x_, a1_, b1_, a2_, b2_)
|
|
|
|
return result
|
|
|
|
|
|
|
|
def _pdf_one(self, w, a1, b1, a2, b2):
|
|
|
|
"""PDF for the distribution, given by Hora & Kelley"""
|
|
|
|
|
2022-10-17 20:35:04 +11:00
|
|
|
from mpmath import beta, hyp2f1, power
|
|
|
|
|
2022-10-07 16:52:51 +11:00
|
|
|
if w <= 0:
|
|
|
|
return 0
|
|
|
|
elif w < 1:
|
2022-10-17 20:35:04 +11:00
|
|
|
term1 = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))
|
|
|
|
term2 = power(w, a1 - 1)
|
|
|
|
term3 = hyp2f1(a1 + b1, a1 + a2, a1 + a2 + b1 + b2, 1 - w)
|
2022-10-07 16:52:51 +11:00
|
|
|
else:
|
2022-10-17 20:35:04 +11:00
|
|
|
term1 = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))
|
|
|
|
term2 = power(w, -a2 - 1)
|
|
|
|
term3 = hyp2f1(a2 + b2, a1 + a2, a1 + a2 + b1 + b2, 1 - power(w, -1))
|
2022-10-07 16:52:51 +11:00
|
|
|
|
|
|
|
return float(term1 * term2 * term3)
|
|
|
|
|
|
|
|
def _pdf(self, w, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
|
|
|
|
|
2022-10-07 22:12:55 +11:00
|
|
|
def _cdf_one_infsum(self, w, a1, b1, a2, b2):
|
|
|
|
"""CDF for the distribution, by truncating infinite sum given by Hora & Kelly"""
|
|
|
|
|
2022-10-17 20:35:04 +11:00
|
|
|
from mpmath import beta, betainc, factorial, power, rf
|
|
|
|
|
2022-10-07 22:12:55 +11:00
|
|
|
if w <= 0:
|
|
|
|
return 0
|
|
|
|
elif w < 1:
|
2022-10-17 20:35:04 +11:00
|
|
|
k = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))
|
2022-10-07 22:12:55 +11:00
|
|
|
|
|
|
|
inf_sum = 0
|
|
|
|
for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
|
2022-10-17 20:35:04 +11:00
|
|
|
kj1 = rf(a1 + b1, j) * rf(a1 + a2, j)
|
|
|
|
kj2 = rf(a1 + a2 + b1 + b2, j) * factorial(j)
|
|
|
|
inf_sum += kj1/kj2 * betainc(a1, j + 1, 0, w)
|
2022-10-07 22:12:55 +11:00
|
|
|
|
|
|
|
return k * inf_sum
|
|
|
|
else:
|
2022-10-17 20:35:04 +11:00
|
|
|
k = beta(a1 + a2, b1 + b2) / (beta(a1, b1) * beta(a2, b2))
|
2022-10-07 22:12:55 +11:00
|
|
|
|
|
|
|
inf_sum = 0
|
|
|
|
for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
|
2022-10-17 20:35:04 +11:00
|
|
|
kj1 = rf(a2 + b2, j) * rf(a1 + a2, j)
|
|
|
|
kj2 = rf(a1 + a2 + b1 + b2, j) * factorial(j)
|
|
|
|
inf_sum += kj1/kj2 * betainc(a2, j + 1, 0, power(w, -1))
|
2022-10-07 22:12:55 +11:00
|
|
|
|
|
|
|
return 1 - k * inf_sum
|
|
|
|
|
2022-10-07 16:52:51 +11:00
|
|
|
def _cdf(self, w, a1, b1, a2, b2):
|
2022-10-17 21:41:19 +11:00
|
|
|
"""CDF for the distribution"""
|
2022-10-07 16:52:51 +11:00
|
|
|
|
|
|
|
w = np.atleast_1d(w)
|
|
|
|
a1 = np.atleast_1d(a1)
|
|
|
|
b1 = np.atleast_1d(b1)
|
|
|
|
a2 = np.atleast_1d(a2)
|
|
|
|
b2 = np.atleast_1d(b2)
|
|
|
|
|
|
|
|
if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()):
|
|
|
|
raise ValueError('Cannot compute CDF from different distributions')
|
|
|
|
|
2022-10-07 22:12:55 +11:00
|
|
|
if BETA_ODDSRATIO_CDF_TERMS[0] == np.inf:
|
|
|
|
# Exact solution requested
|
|
|
|
if w.size == 1:
|
|
|
|
if w <= 0:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
# Just compute an integral
|
|
|
|
if w < self.mean(a1, b1, a2, b2):
|
|
|
|
# Integrate normally
|
|
|
|
return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0]
|
|
|
|
else:
|
|
|
|
# Integrate on the distribution of 1/w (much faster)
|
|
|
|
return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0]
|
2022-10-07 16:52:51 +11:00
|
|
|
else:
|
2022-10-07 22:12:55 +11:00
|
|
|
# Multiple points requested: use ODE solver
|
|
|
|
solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)
|
|
|
|
return solution.y
|
2022-10-07 16:52:51 +11:00
|
|
|
else:
|
2022-10-07 22:12:55 +11:00
|
|
|
# Truncate infinite sum
|
|
|
|
return self._do_vectorized(self._cdf_one_infsum, w, a1, b1, a2, b2)
|
2022-10-07 16:52:51 +11:00
|
|
|
|
|
|
|
def _ppf_one(self, p, a1, b1, a2, b2):
|
|
|
|
"""PPF for the distribution, using Newton's method"""
|
|
|
|
|
|
|
|
# Default SciPy implementation uses Brent's method
|
|
|
|
# This one is a bit faster
|
|
|
|
|
|
|
|
if p <= 0:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
# Initial guess based on log-normal approximation
|
|
|
|
mean = self.mean(a1, b1, a2, b2)
|
|
|
|
se_log = self.std(a1, b1, a2, b2)/mean
|
|
|
|
initial_guess = np.exp(np.log(mean) + stats.norm.ppf(p) * se_log)
|
|
|
|
|
|
|
|
try:
|
|
|
|
return optimize.newton(lambda x: self.cdf(x, a1, b1, a2, b2) - p, x0=initial_guess, fprime=lambda x: self.pdf(x, a1, b1, a2, b2))
|
|
|
|
except RuntimeError:
|
|
|
|
# Failed to converge with Newton's method :(
|
|
|
|
# Use Brent's method instead
|
|
|
|
return super()._ppf(p, a1, b1, a2, b2)
|
|
|
|
|
|
|
|
def _ppf(self, p, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._ppf_one, p, a1, b1, a2, b2)
|
|
|
|
|
|
|
|
def _munp_one(self, k, a1, b1, a2, b2):
|
|
|
|
"""Moments of the distribution, given by Hora & Kelley"""
|
|
|
|
|
2022-10-17 20:35:04 +11:00
|
|
|
from mpmath import rf
|
|
|
|
|
|
|
|
term1 = rf(a1, k) * rf(b2, k)
|
|
|
|
term2 = rf(b1 - k, k) * rf(a2 - k, k)
|
2022-10-07 16:52:51 +11:00
|
|
|
return float(term1 / term2)
|
2022-10-05 20:08:04 +11:00
|
|
|
|
|
|
|
def _munp(self, k, a1, b1, a2, b2):
|
|
|
|
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
|
|
|
|
|
2022-10-07 22:12:55 +11:00
|
|
|
# See beta_oddsratio.cdf
|
|
|
|
BETA_ODDSRATIO_CDF_TERMS = [100]
|
|
|
|
|
2022-10-09 12:54:23 +11:00
|
|
|
beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0) # a = lower bound of support
|
|
|
|
|
|
|
|
class transformed_gen(stats.rv_continuous):
|
|
|
|
"""
|
2022-10-17 21:41:19 +11:00
|
|
|
Represents a transformation, *Y*, of a "base" random variable, *X*
|
|
|
|
|
|
|
|
This is a SciPy *stats.rv_continuous* distribution which takes parameters as described below.
|
2022-10-09 12:54:23 +11:00
|
|
|
|
2022-10-17 21:41:19 +11:00
|
|
|
The transformation is *f*\ (*Y*) = *X*.
|
|
|
|
Hence *Y*.pdf\ (*x*) = *X*.pdf\ (*f*\ (*x*)) \* *f*'\ (*x*).
|
|
|
|
|
|
|
|
For example, if *X* is a model parameter, then ``transformed_dist(X, f=np.exp, fprime=np.exp, finv=np.log)`` is the distribution of the log-parameter.
|
|
|
|
|
|
|
|
**Parameters:**
|
|
|
|
|
|
|
|
* **base** (*frozen SciPy distribution*) – Distribution of the base random variable
|
|
|
|
|
|
|
|
* **f** (*callable*) – Function *f* representing the transformation, which takes values of *Y* to values of *X*
|
|
|
|
|
|
|
|
* **fprime** (*callable*) – Derivative of the function *f*
|
|
|
|
|
|
|
|
* **finv** (*callable*) – Inverse of the function *f*, which takes values of *X* to values of *Y*
|
2022-10-09 12:54:23 +11:00
|
|
|
"""
|
|
|
|
|
|
|
|
def _pdf(self, x, base, f, fprime, finv):
|
|
|
|
return base.pdf(np.vectorize(f)(x)) * np.vectorize(fprime)(x)
|
|
|
|
|
|
|
|
def _cdf(self, x, base, f, fprime, finv):
|
|
|
|
return base.cdf(np.vectorize(f)(x))
|
|
|
|
|
|
|
|
def _ppf(self, p, base, f, fprime, finv):
|
|
|
|
return np.vectorize(finv)(base.ppf(p))
|
|
|
|
|
|
|
|
# Call implementations directly to bypass SciPy args checking, which chokes on "base" parameter
|
|
|
|
|
|
|
|
def pdf(self, x, base, f, fprime, finv):
|
|
|
|
if np.isscalar(x):
|
|
|
|
return np.atleast_1d(self._pdf(x, base, f, fprime, finv))[0]
|
|
|
|
return self._pdf(x, base, f, fprime, finv)
|
|
|
|
|
|
|
|
def cdf(self, x, base, f, fprime, finv):
|
|
|
|
if np.isscalar(x):
|
|
|
|
return np.atleast_1d(self._cdf(x, base, f, fprime, finv))[0]
|
|
|
|
return self._cdf(x, base, f, fprime, finv)
|
|
|
|
|
|
|
|
def ppf(self, x, base, f, fprime, finv):
|
|
|
|
if np.isscalar(x):
|
|
|
|
return np.atleast_1d(self._ppf(x, base, f, fprime, finv))[0]
|
|
|
|
return self._ppf(x, base, f, fprime, finv)
|
|
|
|
|
|
|
|
transformed_dist = transformed_gen(name='transformed')
|
2022-10-06 18:44:22 +11:00
|
|
|
|
2022-10-15 23:24:29 +11:00
|
|
|
def hdi(distribution, level=None):
|
|
|
|
"""
|
|
|
|
Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)
|
|
|
|
|
2022-10-17 21:41:19 +11:00
|
|
|
:param distribution: Distribution to compute the interval for
|
|
|
|
:type distribution: frozen SciPy distribution
|
|
|
|
:param level: Coverage/confidence probability, default (*None*) is 1 − *config.alpha*
|
|
|
|
:type level: float
|
|
|
|
|
|
|
|
:rtype: :class:`yli.utils.ConfidenceInterval`
|
2022-10-15 23:24:29 +11:00
|
|
|
"""
|
|
|
|
|
|
|
|
if level is None:
|
|
|
|
level = 1 - config.alpha
|
2022-10-06 18:44:22 +11:00
|
|
|
|
2022-10-15 23:24:29 +11:00
|
|
|
# For a given lower limit, we can compute the corresponding level*100% interval
|
2022-10-06 18:44:22 +11:00
|
|
|
def interval_width(lower):
|
|
|
|
upper = distribution.ppf(distribution.cdf(lower) + level)
|
|
|
|
return upper - lower
|
|
|
|
|
|
|
|
# Find such interval which has the smallest width
|
|
|
|
# Use equal-tailed interval as initial guess
|
|
|
|
initial_guess = distribution.ppf((1-level)/2)
|
|
|
|
optimize_result = optimize.minimize(interval_width, initial_guess) # TODO: Allow customising parameters
|
|
|
|
|
|
|
|
lower_limit = optimize_result.x[0]
|
|
|
|
width = optimize_result.fun
|
|
|
|
upper_limit = lower_limit + width
|
|
|
|
|
|
|
|
return ConfidenceInterval(lower_limit, upper_limit)
|