Implement beta_oddsratio
This commit is contained in:
parent
d3842e1645
commit
cbb79207ae
94
tests/test_beta_oddsratio.py
Normal file
94
tests/test_beta_oddsratio.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
# scipy-yli: Helpful SciPy utilities and recipes
|
||||||
|
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
|
||||||
|
import yli
|
||||||
|
|
||||||
|
def test_beta_ratio_cdf_vs_empirical():
|
||||||
|
"""Compare beta_ratio.cdf with empirical distribution"""
|
||||||
|
|
||||||
|
# Define the example beta distribution
|
||||||
|
beta1 = stats.beta(3, 6)
|
||||||
|
beta2 = stats.beta(12, 7)
|
||||||
|
dist = yli.beta_oddsratio.from_scipy(beta1, beta2)
|
||||||
|
|
||||||
|
# Compute empirical distribution
|
||||||
|
samples_p1 = beta1.rvs(10_000, random_state=31415)
|
||||||
|
samples_p2 = beta2.rvs(10_000, random_state=92653)
|
||||||
|
sample = (samples_p1 / (1 - samples_p1)) / (samples_p2 / (1 - samples_p2))
|
||||||
|
|
||||||
|
# Values to check
|
||||||
|
x = np.linspace(0, 2, 10)
|
||||||
|
y1 = dist.cdf(x)
|
||||||
|
y2 = [(sample < xx).sum()/sample.size for xx in x]
|
||||||
|
|
||||||
|
# Allow 0.01 tolerance
|
||||||
|
assert (np.abs(y1 - y2) < 0.01).all()
|
||||||
|
|
||||||
|
def test_beta_ratio_ppf_vs_empirical():
|
||||||
|
"""Compare beta_ratio.ppf with empirical distribution"""
|
||||||
|
|
||||||
|
# Define the example beta distribution
|
||||||
|
beta1 = stats.beta(3, 6)
|
||||||
|
beta2 = stats.beta(12, 7)
|
||||||
|
dist = yli.beta_oddsratio.from_scipy(beta1, beta2)
|
||||||
|
|
||||||
|
# Compute empirical distribution
|
||||||
|
samples_p1 = beta1.rvs(10_000, random_state=31415)
|
||||||
|
samples_p2 = beta2.rvs(10_000, random_state=92653)
|
||||||
|
sample = (samples_p1 / (1 - samples_p1)) / (samples_p2 / (1 - samples_p2))
|
||||||
|
|
||||||
|
# Values to check
|
||||||
|
x = np.linspace(0, 0.9, 10)
|
||||||
|
y1 = dist.ppf(x)
|
||||||
|
y2 = np.quantile(sample, x)
|
||||||
|
|
||||||
|
# Allow 0.01 tolerance
|
||||||
|
assert (np.abs(y1 - y2) < 0.01).all()
|
||||||
|
|
||||||
|
def test_beta_ratio_mean_vs_empirical():
|
||||||
|
"""Compare beta_ratio.mean (vs _munp) with empirical mean"""
|
||||||
|
|
||||||
|
# Define the example beta distribution
|
||||||
|
beta1 = stats.beta(3, 6)
|
||||||
|
beta2 = stats.beta(12, 7)
|
||||||
|
dist = yli.beta_oddsratio.from_scipy(beta1, beta2)
|
||||||
|
|
||||||
|
# Compute empirical mean
|
||||||
|
samples_p1 = beta1.rvs(10_000, random_state=31415)
|
||||||
|
samples_p2 = beta2.rvs(10_000, random_state=92653)
|
||||||
|
sample = (samples_p1 / (1 - samples_p1)) / (samples_p2 / (1 - samples_p2))
|
||||||
|
|
||||||
|
# Allow 0.01 tolerance
|
||||||
|
assert np.abs(dist.mean() - sample.mean()) < 0.01
|
||||||
|
|
||||||
|
def test_beta_ratio_var_vs_empirical():
|
||||||
|
"""Compare beta_ratio.var (vs _munp) with empirical variance"""
|
||||||
|
|
||||||
|
# Define the example beta distribution
|
||||||
|
beta1 = stats.beta(3, 6)
|
||||||
|
beta2 = stats.beta(12, 7)
|
||||||
|
dist = yli.beta_oddsratio.from_scipy(beta1, beta2)
|
||||||
|
|
||||||
|
# Compute empirical mean
|
||||||
|
samples_p1 = beta1.rvs(10_000, random_state=31415)
|
||||||
|
samples_p2 = beta2.rvs(10_000, random_state=92653)
|
||||||
|
sample = (samples_p1 / (1 - samples_p1)) / (samples_p2 / (1 - samples_p2))
|
||||||
|
|
||||||
|
# Allow 0.01 tolerance
|
||||||
|
assert np.abs(dist.var() - sample.var()) < 0.01
|
@ -15,6 +15,7 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
|
||||||
import yli
|
import yli
|
||||||
|
|
||||||
@ -30,7 +31,7 @@ def test_beta_ratio_vs_jsaffer_pdf():
|
|||||||
y = dist.pdf(x)
|
y = dist.pdf(x)
|
||||||
|
|
||||||
# Compare with expected values from jsaffer implementation
|
# Compare with expected values from jsaffer implementation
|
||||||
expected = np.load('beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
|
expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
|
||||||
|
|
||||||
# Allow 1e-10 tolerance
|
# Allow 1e-10 tolerance
|
||||||
diff = np.abs(y - expected)
|
diff = np.abs(y - expected)
|
||||||
@ -48,7 +49,7 @@ def test_beta_ratio_vs_jsaffer_cdf():
|
|||||||
y = dist.cdf(x)
|
y = dist.cdf(x)
|
||||||
|
|
||||||
# Compare with expected values from jsaffer implementation
|
# Compare with expected values from jsaffer implementation
|
||||||
expected = np.load('beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
|
expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
|
||||||
|
|
||||||
# Allow 1e-10 tolerance
|
# Allow 1e-10 tolerance
|
||||||
diff = np.abs(y - expected)
|
diff = np.abs(y - expected)
|
||||||
@ -65,16 +66,36 @@ def _gen_beta_ratio_vs_jsaffer():
|
|||||||
y1 = np.vectorize(lambda w: float(beta_quotient_distribution.pdf_bb_ratio(a1, a2, b1, b2, w)))(x)
|
y1 = np.vectorize(lambda w: float(beta_quotient_distribution.pdf_bb_ratio(a1, a2, b1, b2, w)))(x)
|
||||||
y2 = np.vectorize(lambda w: float(beta_quotient_distribution.cdf_bb_ratio(a1, a2, b1, b2, w)))(x)
|
y2 = np.vectorize(lambda w: float(beta_quotient_distribution.cdf_bb_ratio(a1, a2, b1, b2, w)))(x)
|
||||||
|
|
||||||
np.save('beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)
|
np.save('tests/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)
|
||||||
|
|
||||||
def test_beta_ratio_mean_vs_empirical():
|
def test_beta_ratio_mean_vs_empirical():
|
||||||
"""Compare beta_ratio.mean (via beta_ratio._munp) with empirical mean"""
|
"""Compare beta_ratio.mean (via beta_ratio._munp) with empirical mean"""
|
||||||
|
|
||||||
# Define the example beta distribution
|
# Define the example beta distribution
|
||||||
dist = yli.beta_ratio(3, 6, 12, 7)
|
beta1 = stats.beta(3, 6)
|
||||||
|
beta2 = stats.beta(12, 7)
|
||||||
|
dist = yli.beta_ratio.from_scipy(beta1, beta2)
|
||||||
|
|
||||||
# Compute empirical mean
|
# Compute empirical distribution
|
||||||
sample = dist.rvs(1000, random_state=31415)
|
samples_p1 = beta1.rvs(10_000, random_state=31415)
|
||||||
|
samples_p2 = beta2.rvs(10_000, random_state=92653)
|
||||||
|
sample = samples_p1 / samples_p2
|
||||||
|
|
||||||
# Allow 0.01 tolerance
|
# Allow 0.01 tolerance
|
||||||
assert np.abs(dist.mean() - sample.mean()) < 0.01
|
assert np.abs(dist.mean() - sample.mean()) < 0.01
|
||||||
|
|
||||||
|
def test_beta_ratio_var_vs_empirical():
|
||||||
|
"""Compare beta_ratio.var (via beta_ratio._munp) with empirical variance"""
|
||||||
|
|
||||||
|
# Define the example beta distribution
|
||||||
|
beta1 = stats.beta(3, 6)
|
||||||
|
beta2 = stats.beta(12, 7)
|
||||||
|
dist = yli.beta_ratio.from_scipy(beta1, beta2)
|
||||||
|
|
||||||
|
# Compute empirical distribution
|
||||||
|
samples_p1 = beta1.rvs(10_000, random_state=31415)
|
||||||
|
samples_p2 = beta2.rvs(10_000, random_state=92653)
|
||||||
|
sample = samples_p1 / samples_p2
|
||||||
|
|
||||||
|
# Allow 0.01 tolerance
|
||||||
|
assert np.abs(dist.var() - sample.var()) < 0.01
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
import mpmath
|
import mpmath
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import optimize, stats
|
from scipy import integrate, optimize, stats
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
@ -30,6 +30,8 @@ class betarat_gen(stats.rv_continuous):
|
|||||||
2. Weekend Editor. On the ratio of Beta-distributed random variables. Some Weekend Reading. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/
|
2. Weekend Editor. On the ratio of Beta-distributed random variables. Some Weekend Reading. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# TODO: _rvs based on stats.beta
|
||||||
|
|
||||||
def from_scipy(self, beta1, beta2):
|
def from_scipy(self, beta1, beta2):
|
||||||
"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
|
"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
|
||||||
return self(*beta1.args, *beta2.args)
|
return self(*beta1.args, *beta2.args)
|
||||||
@ -84,14 +86,121 @@ class betarat_gen(stats.rv_continuous):
|
|||||||
def _munp_one(self, k, a1, b1, a2, b2):
|
def _munp_one(self, k, a1, b1, a2, b2):
|
||||||
"""Moments of the distribution, given by Weekend Editor"""
|
"""Moments of the distribution, given by Weekend Editor"""
|
||||||
|
|
||||||
term1 = mpmath.rf(a1, k) / mpmath.rf(a1 + b1, k)
|
term1 = mpmath.rf(a1, k) * mpmath.rf(a2 + b2 - k, k)
|
||||||
term2 = mpmath.rf(a2 + b2 - k, k) / mpmath.rf(a2 - k, k)
|
term2 = mpmath.rf(a1 + b1, k) * mpmath.rf(a2 - k, k)
|
||||||
return float(term1 * term2)
|
return float(term1 / term2)
|
||||||
|
|
||||||
def _munp(self, k, a1, b1, a2, b2):
|
def _munp(self, k, a1, b1, a2, b2):
|
||||||
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
|
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
|
||||||
|
|
||||||
beta_ratio = betarat_gen(name='beta_ratio', a=0)
|
beta_ratio = betarat_gen(name='beta_ratio', a=0) # a = lower bound of support
|
||||||
|
|
||||||
|
class betaoddsrat_gen(stats.rv_continuous):
|
||||||
|
"""
|
||||||
|
Ratio of the odds of 2 independent beta-distributed variables
|
||||||
|
Ratio of (X/(1-X)) / (Y/(1-Y)), where X ~ Beta(a1, b1), Y ~ Beta(a2, b2)
|
||||||
|
|
||||||
|
Reference:
|
||||||
|
Hora SC, Kelley GD. Bayesian inference on the odds and risk ratios. Communications in Statistics: Theory and Methods. 1983;12(6):725–38. doi: 10.1080/03610928308828491
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO: _rvs based on stats.beta
|
||||||
|
|
||||||
|
def from_scipy(self, beta1, beta2):
|
||||||
|
"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
|
||||||
|
return self(*beta1.args, *beta2.args)
|
||||||
|
|
||||||
|
def _do_vectorized(self, func, x, a1, b1, a2, b2):
|
||||||
|
"""Helper function to call the implementation over potentially multiple values"""
|
||||||
|
|
||||||
|
x = np.atleast_1d(x)
|
||||||
|
result = np.zeros(x.size)
|
||||||
|
for i, (x_, a1_, b1_, a2_, b2_) in enumerate(zip(x, np.pad(np.atleast_1d(a1), x.size, 'edge'), np.pad(np.atleast_1d(b1), x.size, 'edge'), np.pad(np.atleast_1d(a2), x.size, 'edge'), np.pad(np.atleast_1d(b2), x.size, 'edge'))):
|
||||||
|
result[i] = func(x_, a1_, b1_, a2_, b2_)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _pdf_one(self, w, a1, b1, a2, b2):
|
||||||
|
"""PDF for the distribution, given by Hora & Kelley"""
|
||||||
|
|
||||||
|
if w <= 0:
|
||||||
|
return 0
|
||||||
|
elif w < 1:
|
||||||
|
term1 = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
|
||||||
|
term2 = mpmath.power(w, a1 - 1)
|
||||||
|
term3 = mpmath.hyp2f1(a1 + b1, a1 + a2, a1 + a2 + b1 + b2, 1 - w)
|
||||||
|
else:
|
||||||
|
term1 = mpmath.beta(a1 + a2, b1 + b2) / (mpmath.beta(a1, b1) * mpmath.beta(a2, b2))
|
||||||
|
term2 = mpmath.power(w, -a2 - 1)
|
||||||
|
term3 = mpmath.hyp2f1(a2 + b2, a1 + a2, a1 + a2 + b1 + b2, 1 - mpmath.power(w, -1))
|
||||||
|
|
||||||
|
return float(term1 * term2 * term3)
|
||||||
|
|
||||||
|
def _pdf(self, w, a1, b1, a2, b2):
|
||||||
|
return self._do_vectorized(self._pdf_one, w, a1, b1, a2, b2)
|
||||||
|
|
||||||
|
def _cdf(self, w, a1, b1, a2, b2):
|
||||||
|
"""CDF for the distribution, computed by integrating the PDF"""
|
||||||
|
|
||||||
|
w = np.atleast_1d(w)
|
||||||
|
a1 = np.atleast_1d(a1)
|
||||||
|
b1 = np.atleast_1d(b1)
|
||||||
|
a2 = np.atleast_1d(a2)
|
||||||
|
b2 = np.atleast_1d(b2)
|
||||||
|
|
||||||
|
if not ((a1 == a1[0]).all() and (b1 == b1[0]).all() and (a2 == a2[0]).all() and (b2 == b2[0]).all()):
|
||||||
|
raise ValueError('Cannot compute CDF from different distributions')
|
||||||
|
|
||||||
|
if w.size == 1:
|
||||||
|
if w <= 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Just compute an integral
|
||||||
|
if w < self.mean(a1, b1, a2, b2):
|
||||||
|
# Integrate normally
|
||||||
|
return integrate.quad(lambda x: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), 0, w)[0]
|
||||||
|
else:
|
||||||
|
# Integrate on the distribution of 1/w (much faster)
|
||||||
|
return 1 - integrate.quad(lambda x: self._pdf_one(x, a2[0], b2[0], a1[0], b1[0]), 0, 1/w)[0]
|
||||||
|
else:
|
||||||
|
# Multiple points requested: use ODE solver
|
||||||
|
solution = integrate.solve_ivp(lambda x, _: self._pdf_one(x, a1[0], b1[0], a2[0], b2[0]), (0, w.max()), [0], t_eval=w, method='LSODA', rtol=1.5e-8, atol=1.5e-8)
|
||||||
|
return solution.y
|
||||||
|
|
||||||
|
def _ppf_one(self, p, a1, b1, a2, b2):
|
||||||
|
"""PPF for the distribution, using Newton's method"""
|
||||||
|
|
||||||
|
# Default SciPy implementation uses Brent's method
|
||||||
|
# This one is a bit faster
|
||||||
|
|
||||||
|
if p <= 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Initial guess based on log-normal approximation
|
||||||
|
mean = self.mean(a1, b1, a2, b2)
|
||||||
|
se_log = self.std(a1, b1, a2, b2)/mean
|
||||||
|
initial_guess = np.exp(np.log(mean) + stats.norm.ppf(p) * se_log)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return optimize.newton(lambda x: self.cdf(x, a1, b1, a2, b2) - p, x0=initial_guess, fprime=lambda x: self.pdf(x, a1, b1, a2, b2))
|
||||||
|
except RuntimeError:
|
||||||
|
# Failed to converge with Newton's method :(
|
||||||
|
# Use Brent's method instead
|
||||||
|
return super()._ppf(p, a1, b1, a2, b2)
|
||||||
|
|
||||||
|
def _ppf(self, p, a1, b1, a2, b2):
|
||||||
|
return self._do_vectorized(self._ppf_one, p, a1, b1, a2, b2)
|
||||||
|
|
||||||
|
def _munp_one(self, k, a1, b1, a2, b2):
|
||||||
|
"""Moments of the distribution, given by Hora & Kelley"""
|
||||||
|
|
||||||
|
term1 = mpmath.rf(a1, k) * mpmath.rf(b2, k)
|
||||||
|
term2 = mpmath.rf(b1 - k, k) * mpmath.rf(a2 - k, k)
|
||||||
|
return float(term1 / term2)
|
||||||
|
|
||||||
|
def _munp(self, k, a1, b1, a2, b2):
|
||||||
|
return self._do_vectorized(self._munp_one, k, a1, b1, a2, b2)
|
||||||
|
|
||||||
|
beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0) # a = lower bound of support
|
||||||
|
|
||||||
ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])
|
ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user