Implement truncated sum for calculating beta_oddsratio.cdf

This commit is contained in:
RunasSudo 2022-10-07 22:12:55 +11:00
parent cbb79207ae
commit b8250054a2
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A

View File

@ -110,6 +110,10 @@ class betaoddsrat_gen(stats.rv_continuous):
"""Construct a new beta_ratio distribution from two SciPy beta distributions""" """Construct a new beta_ratio distribution from two SciPy beta distributions"""
return self(*beta1.args, *beta2.args) return self(*beta1.args, *beta2.args)
def set_cdf_terms(self, cdf_terms):
"""Set the number of terms to use when calculating CDF (see _cdf)"""
BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms
def _do_vectorized(self, func, x, a1, b1, a2, b2): def _do_vectorized(self, func, x, a1, b1, a2, b2):
"""Helper function to call the implementation over potentially multiple values""" """Helper function to call the implementation over potentially multiple values"""
@ -138,8 +142,39 @@ class betaoddsrat_gen(stats.rv_continuous):
def _pdf(self, w, a1, b1, a2, b2): def _pdf(self, w, a1, b1, a2, b2):
return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2) return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
def _cdf_one_infsum(self, w, a1, b1, a2, b2):
"""CDF for the distribution, by truncating infinite sum given by Hora & Kelly"""
if w <= 0:
return 0
elif w < 1:
k = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
inf_sum = 0
for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
kj1 = mpmath.rf(a1 + b1, j) * mpmath.rf(a1 + a2, j)
kj2 = mpmath.rf(a1 + a2 + b1 + b2, j) * mpmath.factorial(j)
inf_sum += kj1/kj2 * mpmath.betainc(a1, j + 1, 0, w)
return k * inf_sum
else:
k = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
inf_sum = 0
for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
kj1 = mpmath.rf(a2 + b2, j) * mpmath.rf(a1 + a2, j)
kj2 = mpmath.rf(a1 + a2 + b1 + b2, j) * mpmath.factorial(j)
inf_sum += kj1/kj2 * mpmath.betainc(a2, j + 1, 0, mpmath.power(w, -1))
return 1 - k * inf_sum
def _cdf(self, w, a1, b1, a2, b2): def _cdf(self, w, a1, b1, a2, b2):
"""CDF for the distribution, computed by integrating the PDF""" """
CDF for the distribution
If BETA_ODDSRATIO_CDF_TERMS = np.inf, compute the CDF by integrating the PDF
Otherwise, compute the CDF by truncating the infinite sum given by Hora & Kelley
"""
w = np.atleast_1d(w) w = np.atleast_1d(w)
a1 = np.atleast_1d(a1) a1 = np.atleast_1d(a1)
@ -150,6 +185,8 @@ class betaoddsrat_gen(stats.rv_continuous):
if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()): if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()):
raise ValueError('Cannot compute CDF from different distributions') raise ValueError('Cannot compute CDF from different distributions')
if BETA_ODDSRATIO_CDF_TERMS[0] == np.inf:
# Exact solution requested
if w.size == 1: if w.size == 1:
if w <= 0: if w <= 0:
return 0 return 0
@ -165,6 +202,9 @@ class betaoddsrat_gen(stats.rv_continuous):
# Multiple points requested: use ODE solver # Multiple points requested: use ODE solver
solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8) solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)
return solution.y return solution.y
else:
# Truncate infinite sum
return self._do_vectorized(self._cdf_one_infsum, w, a1, b1, a2, b2)
def _ppf_one(self, p, a1, b1, a2, b2): def _ppf_one(self, p, a1, b1, a2, b2):
"""PPF for the distribution, using Newton's method""" """PPF for the distribution, using Newton's method"""
@ -200,7 +240,10 @@ class betaoddsrat_gen(stats.rv_continuous):
def _munp(self, k, a1, b1, a2, b2): def _munp(self, k, a1, b1, a2, b2):
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2) return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0) # a = lower bound of support # See beta_oddsratio.cdf
BETA_ODDSRATIO_CDF_TERMS = [100]
beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0, shapes='a1,b1,a2,b2') # a = lower bound of support
ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper']) ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])