From b8250054a2e0f40439a915abb563c1b25d1d3af3 Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Fri, 7 Oct 2022 22:12:55 +1100
Subject: [PATCH] Implement truncated sum for calculating beta_oddsratio.cdf

---
 yli/distributions.py | 73 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 58 insertions(+), 15 deletions(-)

diff --git a/yli/distributions.py b/yli/distributions.py
index e1451df..5c5e518 100644
--- a/yli/distributions.py
+++ b/yli/distributions.py
@@ -110,6 +110,10 @@ class betaoddsrat_gen(stats.rv_continuous):
 		"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
 		return self(*beta1.args, *beta2.args)
 	
+	def set_cdf_terms(self, cdf_terms):
+		"""Set the number of terms to use when calculating CDF (see _cdf)"""
+		BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms
+	
 	def _do_vectorized(self, func, x, a1, b1, a2, b2):
 		"""Helper function to call the implementation over potentially multiple values"""
 		
@@ -138,8 +142,39 @@ class betaoddsrat_gen(stats.rv_continuous):
 	def _pdf(self, w, a1, b1, a2, b2):
 		return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
 	
+	def _cdf_one_infsum(self, w, a1, b1, a2, b2):
+		"""CDF for the distribution, by truncating infinite sum given by Hora & Kelly"""
+		
+		if w <= 0:
+			return 0
+		elif w < 1:
+			k = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
+
+			inf_sum = 0
+			for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
+				kj1 = mpmath.rf(a1 + b1, j) * mpmath.rf(a1 + a2, j)
+				kj2 = mpmath.rf(a1 + a2 + b1 + b2, j) * mpmath.factorial(j)
+				inf_sum += kj1/kj2 * mpmath.betainc(a1, j + 1, 0, w)
+			
+			return k * inf_sum
+		else:
+			k = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
+			
+			inf_sum = 0
+			for j in range(0, BETA_ODDSRATIO_CDF_TERMS[0]):
+				kj1 = mpmath.rf(a2 + b2, j) * mpmath.rf(a1 + a2, j)
+				kj2 = mpmath.rf(a1 + a2 + b1 + b2, j) * mpmath.factorial(j)
+				inf_sum += kj1/kj2 * mpmath.betainc(a2, j + 1, 0, mpmath.power(w, -1))
+			
+			return 1 - k * inf_sum
+	
 	def _cdf(self, w, a1, b1, a2, b2):
-		"""CDF for the distribution, computed by integrating the PDF"""
+		"""
+		CDF for the distribution
+		
+		If BETA_ODDSRATIO_CDF_TERMS = np.inf, compute the CDF by integrating the PDF
+		Otherwise, compute the CDF by truncating the infinite sum given by Hora & Kelley
+		"""
 		
 		w = np.atleast_1d(w)
 		a1 = np.atleast_1d(a1)
@@ -150,21 +185,26 @@ class betaoddsrat_gen(stats.rv_continuous):
 		if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()):
 			raise ValueError('Cannot compute CDF from different distributions')
 		
-		if w.size == 1:
-			if w <= 0:
-				return 0
-			
-			# Just compute an integral
-			if w < self.mean(a1, b1, a2, b2):
-				# Integrate normally
-				return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0]
+		if BETA_ODDSRATIO_CDF_TERMS[0] == np.inf:
+			# Exact solution requested
+			if w.size == 1:
+				if w <= 0:
+					return 0
+				
+				# Just compute an integral
+				if w < self.mean(a1, b1, a2, b2):
+					# Integrate normally
+					return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0]
+				else:
+					# Integrate on the distribution of 1/w (much faster)
+					return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0]
 			else:
-				# Integrate on the distribution of 1/w (much faster)
-				return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0]
+				# Multiple points requested: use ODE solver
+				solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)
+				return solution.y
 		else:
-			# Multiple points requested: use ODE solver
-			solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)
-			return solution.y
+			# Truncate infinite sum
+			return self._do_vectorized(self._cdf_one_infsum, w, a1, b1, a2, b2)
 	
 	def _ppf_one(self, p, a1, b1, a2, b2):
 		"""PPF for the distribution, using Newton's method"""
@@ -200,7 +240,10 @@ class betaoddsrat_gen(stats.rv_continuous):
 	def _munp(self, k, a1, b1, a2, b2):
 		return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
 
-beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0)  # a = lower bound of support
+# See beta_oddsratio.cdf
+BETA_ODDSRATIO_CDF_TERMS = [100]
+
+beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0, shapes='a1,b1,a2,b2')  # a = lower bound of support
 
 ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])