Implement truncated sum for calculating beta_oddsratio.cdf

This commit is contained in:
RunasSudo 2022-10-07 22:12:55 +11:00
parent cbb79207ae
commit b8250054a2
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
1 changed files with 58 additions and 15 deletions

View File

@ -110,6 +110,10 @@ class betaoddsrat_gen(stats.rv_continuous):
"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
return self(*beta1.args, *beta2.args)
def set_cdf_terms(self, cdf_terms):
"""Set the number of terms to use when calculating CDF (see _cdf)"""
BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms
def _do_vectorized(self, func, x, a1, b1, a2, b2):
"""Helper function to call the implementation over potentially multiple values"""
@ -138,8 +142,39 @@ class betaoddsrat_gen(stats.rv_continuous):
def _pdf(self, w, a1, b1, a2, b2):
return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
def _cdf_one_infsum(self, w, a1, b1, a2, b2):
"""CDF for the distribution, by truncating infinite sum given by Hora & Kelly"""
if w <= 0:
return 0
elif w < 1:
k = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
inf_sum = 0
for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
kj1 = mpmath.rf(a1 + b1, j) * mpmath.rf(a1 + a2, j)
kj2 = mpmath.rf(a1 + a2 + b1 + b2, j) * mpmath.factorial(j)
inf_sum += kj1/kj2 * mpmath.betainc(a1, j + 1, 0, w)
return k * inf_sum
else:
k = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
inf_sum = 0
for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
kj1 = mpmath.rf(a2 + b2, j) * mpmath.rf(a1 + a2, j)
kj2 = mpmath.rf(a1 + a2 + b1 + b2, j) * mpmath.factorial(j)
inf_sum += kj1/kj2 * mpmath.betainc(a2, j + 1, 0, mpmath.power(w, -1))
return 1 - k * inf_sum
def _cdf(self, w, a1, b1, a2, b2):
"""CDF for the distribution, computed by integrating the PDF"""
"""
CDF for the distribution
If BETA_ODDSRATIO_CDF_TERMS = np.inf, compute the CDF by integrating the PDF
Otherwise, compute the CDF by truncating the infinite sum given by Hora & Kelley
"""
w = np.atleast_1d(w)
a1 = np.atleast_1d(a1)
@ -150,21 +185,26 @@ class betaoddsrat_gen(stats.rv_continuous):
if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()):
raise ValueError('Cannot compute CDF from different distributions')
if w.size == 1:
if w <= 0:
return 0
# Just compute an integral
if w < self.mean(a1, b1, a2, b2):
# Integrate normally
return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0]
if BETA_ODDSRATIO_CDF_TERMS[0] == np.inf:
# Exact solution requested
if w.size == 1:
if w <= 0:
return 0
# Just compute an integral
if w < self.mean(a1, b1, a2, b2):
# Integrate normally
return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0]
else:
# Integrate on the distribution of 1/w (much faster)
return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0]
else:
# Integrate on the distribution of 1/w (much faster)
return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0]
# Multiple points requested: use ODE solver
solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)
return solution.y
else:
# Multiple points requested: use ODE solver
solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)
return solution.y
# Truncate infinite sum
return self._do_vectorized(self._cdf_one_infsum, w, a1, b1, a2, b2)
def _ppf_one(self, p, a1, b1, a2, b2):
"""PPF for the distribution, using Newton's method"""
@ -200,7 +240,10 @@ class betaoddsrat_gen(stats.rv_continuous):
def _munp(self, k, a1, b1, a2, b2):
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0) # a = lower bound of support
# See beta_oddsratio.cdf
BETA_ODDSRATIO_CDF_TERMS = [100]
beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0, shapes='a1,b1,a2,b2') # a = lower bound of support
ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])