From b8250054a2e0f40439a915abb563c1b25d1d3af3 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Fri, 7 Oct 2022 22:12:55 +1100 Subject: [PATCH] Implement truncated sum for calculating beta_oddsratio.cdf --- yli/distributions.py | 73 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/yli/distributions.py b/yli/distributions.py index e1451df..5c5e518 100644 --- a/yli/distributions.py +++ b/yli/distributions.py @@ -110,6 +110,10 @@ class betaoddsrat_gen(stats.rv_continuous): """Construct a new beta_ratio distribution from two SciPy beta distributions""" return self(*beta1.args, *beta2.args) + def set_cdf_terms(self, cdf_terms): + """Set the number of terms to use when calculating CDF (see _cdf)""" + BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms + def _do_vectorized(self, func, x, a1, b1, a2, b2): """Helper function to call the implementation over potentially multiple values""" @@ -138,8 +142,39 @@ class betaoddsrat_gen(stats.rv_continuous): def _pdf(self, w, a1, b1, a2, b2): return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2) + def _cdf_one_infsum(self, w, a1, b1, a2, b2): + """CDF for the distribution, by truncating infinite sum given by Hora & Kelly""" + + if w <= 0: + return 0 + elif w < 1: + k = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2)) + + inf_sum = 0 + for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]): + kj1 = mpmath.rf(a1 + b1, j) * mpmath.rf(a1 + a2, j) + kj2 = mpmath.rf(a1 + a2 + b1 + b2, j) * mpmath.factorial(j) + inf_sum += kj1/kj2 * mpmath.betainc(a1, j + 1, 0, w) + + return k * inf_sum + else: + k = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2)) + + inf_sum = 0 + for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]): + kj1 = mpmath.rf(a2 + b2, j) * mpmath.rf(a1 + a2, j) + kj2 = mpmath.rf(a1 + a2 + b1 + b2, j) * mpmath.factorial(j) + inf_sum += kj1/kj2 * mpmath.betainc(a2, j + 1, 0, mpmath.power(w, -1)) + + return 1 - k * inf_sum + def _cdf(self, w, a1, b1, a2, b2): - """CDF for the distribution, computed by integrating the PDF""" + """ + CDF for the distribution + + If BETA_ODDSRATIO_CDF_TERMS = np.inf, compute the CDF by integrating the PDF + Otherwise, compute the CDF by truncating the infinite sum given by Hora & Kelley + """ w = np.atleast_1d(w) a1 = np.atleast_1d(a1) @@ -150,21 +185,26 @@ class betaoddsrat_gen(stats.rv_continuous): if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()): raise ValueError('Cannot compute CDF from different distributions') - if w.size == 1: - if w <= 0: - return 0 - - # Just compute an integral - if w < self.mean(a1, b1, a2, b2): - # Integrate normally - return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0] + if BETA_ODDSRATIO_CDF_TERMS[0] == np.inf: + # Exact solution requested + if w.size == 1: + if w <= 0: + return 0 + + # Just compute an integral + if w < self.mean(a1, b1, a2, b2): + # Integrate normally + return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0] + else: + # Integrate on the distribution of 1/w (much faster) + return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0] else: - # Integrate on the distribution of 1/w (much faster) - return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0] + # Multiple points requested: use ODE solver + solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8) + return solution.y else: - # Multiple points requested: use ODE solver - solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8) - return solution.y + # Truncate infinite sum + return self._do_vectorized(self._cdf_one_infsum, w, a1, b1, a2, b2) def _ppf_one(self, p, a1, b1, a2, b2): """PPF for the distribution, using Newton's method""" @@ -200,7 +240,10 @@ class betaoddsrat_gen(stats.rv_continuous): def _munp(self, k, a1, b1, a2, b2): return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2) -beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0) # a = lower bound of support +# See beta_oddsratio.cdf +BETA_ODDSRATIO_CDF_TERMS = [100] + +beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0, shapes='a1,b1,a2,b2') # a = lower bound of support ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])