diff --git a/yli/distributions.py b/yli/distributions.py index 8af37b3..a8daf93 100644 --- a/yli/distributions.py +++ b/yli/distributions.py @@ -20,6 +20,8 @@ from scipy import integrate, optimize, stats import collections +from .config import config + class betarat_gen(stats.rv_continuous): """ Ratio of 2 independent beta-distributed variables @@ -283,10 +285,17 @@ transformed_dist = transformed_gen(name='transformed') ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper']) -def hdi(distribution, level=0.95): - """Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)""" +def hdi(distribution, level=None): + """ + Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI) - # For a given lower limit, we can compute the corresponding 95% interval + level: Coverage/confidence probability, default (None) is 1 - config.alpha + """ + + if level is None: + level = 1 - config.alpha + + # For a given lower limit, we can compute the corresponding level*100% interval def interval_width(lower): upper = distribution.ppf(distribution.cdf(lower) + level) return upper - lower diff --git a/yli/regress.py b/yli/regress.py index 6efd25a..aacac25 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -26,6 +26,7 @@ from datetime import datetime import itertools from .bayes_factors import BayesFactor, bayesfactor_afbf +from .config import config from .sig_tests import FTestResult from .utils import Estimate, check_nan, cols_for_formula, fmt_p, formula_factor_ref_category, parse_patsy_term @@ -220,7 +221,7 @@ class RegressionResult: out += '' # Render coefficients table - out += ''.format('exp(β)' if self.exp else 'β') + out += '
{}(95% CI)p
'.format('exp(β)' if self.exp else 'β', (1-config.alpha)*100) for term_name, term in self.terms.items(): if isinstance(term, SingleTerm): @@ -306,7 +307,7 @@ class RegressionResult: raise Exception('Attempt to render unknown term type') table2 = SimpleTable(data=table_data, headers=['', 'exp(β)' if self.exp else 'β', '', '\ue000', '', ' p']) # U+E000 is in Private Use Area, mark middle of CI column - table2_text = table2.as_text().replace(' \ue000 ', '(95% CI)') # Render heading in the right spot + table2_text = table2.as_text().replace(' \ue000 ', '({:g}% CI)'.format((1-config.alpha)*100)) # Render heading in the right spot table2_lines = table2_text.splitlines(keepends=False) # Render divider line between 2 tables @@ -382,7 +383,7 @@ def regress( # Process terms terms = {} - confint = result.conf_int() + confint = result.conf_int(config.alpha) for raw_name, raw_beta in result.params.items(): beta = Estimate(raw_beta, confint[0][raw_name], confint[1][raw_name]) @@ -466,9 +467,10 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel): # Transfer other parameters to R lc['formula_'] = self.formula + lc['alpha'] = config.alpha # Fit the model - model = ro.r('logistf(formula_, data=df)') + model = ro.r('logistf(formula_, data=df, alpha=alpha)') # TODO: Handle categorical terms? terms = {t: SingleTerm(t, Estimate(b, ci0, ci1), p) for t, b, ci0, ci1, p in zip(model['terms'], model['coefficients'], model['ci.lower'], model['ci.upper'], model['prob'])} diff --git a/yli/sig_tests.py b/yli/sig_tests.py index ed4992e..0258b71 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -22,6 +22,7 @@ import statsmodels.api as sm import functools import warnings +from .config import config from .utils import Estimate, as_2groups, check_nan, fmt_p # ---------------- @@ -42,10 +43,10 @@ class TTestResult: self.delta_direction = delta_direction def _repr_html_(self): - return 't({:.0f}) = {:.2f}; p {}
δ (95% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), self.delta.summary(), self.delta_direction) + return 't({:.0f}) = {:.2f}; p {}
δ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.delta.summary(), self.delta_direction) def summary(self): - return 't({:.0f}) = {:.2f}; p {}\nδ (95% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), self.delta.summary(), self.delta_direction) + return 't({:.0f}) = {:.2f}; p {}\nδ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.delta.summary(), self.delta_direction) def ttest_ind(df, dep, ind, *, nan_policy='warn'): """Perform an independent-sample Student's t test""" @@ -65,7 +66,7 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'): statistic, pvalue, dof = cm.ttest_ind() delta = d1.mean - d2.mean - ci0, ci1 = cm.tconfint_diff() + ci0, ci1 = cm.tconfint_diff(config.alpha) # t test is symmetric so take absolute values return TTestResult( @@ -209,16 +210,16 @@ class PearsonChiSquaredResult: def _repr_html_(self): if self.oddsratio is not None: - return '{}
χ2({}) = {:.2f}; p {}
OR (95% CI) = {}
RR (95% CI) = {}'.format( - self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True), self.oddsratio.summary(), self.riskratio.summary()) + return '{0}
χ2({1}) = {2:.2f}; p {3}
OR ({4:g}% CI) = {5}
RR ({4:g}% CI) = {6}'.format( + self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary()) else: return '{}
χ2({}) = {:.2f}; p {}'.format( self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True)) def summary(self): if self.oddsratio is not None: - return '{}\nχ²({}) = {:.2f}; p {}\nOR (95% CI) = {}\nRR (95% CI) = {}'.format( - self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), self.oddsratio.summary(), self.riskratio.summary()) + return '{0}\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format( + self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary()) else: return '{}\nχ²({}) = {:.2f}; p {}'.format( self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False)) @@ -247,8 +248,8 @@ def chi2(df, dep, ind, *, nan_policy='warn'): smct = sm.stats.Table2x2(np.flip(ct.to_numpy()), shift_zeros=False) result = smct.test_nominal_association() - ORci = smct.oddsratio_confint() - RRci = smct.riskratio_confint() + ORci = smct.oddsratio_confint(config.alpha) + RRci = smct.riskratio_confint(config.alpha) return PearsonChiSquaredResult( ct=ct, statistic=result.statistic, dof=result.df, pvalue=result.pvalue, @@ -272,10 +273,10 @@ class PearsonRResult: self.pvalue = pvalue def _repr_html_(self): - return 'r (95% CI) = {}; p {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=True)) + return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=True)) def summary(self): - return 'r (95% CI) = {}; p {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=False)) + return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=False)) def pearsonr(df, dep, ind, *, nan_policy='warn'): """Compute the Pearson correlation coefficient (Pearson's r)"""
{}({:g}% CI)p