Observe config.alpha for significance tests and regression
This commit is contained in:
parent
bf5cc434b5
commit
a2442214ed
@ -20,6 +20,8 @@ from scipy import integrate, optimize, stats
|
|||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
|
from .config import config
|
||||||
|
|
||||||
class betarat_gen(stats.rv_continuous):
|
class betarat_gen(stats.rv_continuous):
|
||||||
"""
|
"""
|
||||||
Ratio of 2 independent beta-distributed variables
|
Ratio of 2 independent beta-distributed variables
|
||||||
@ -283,10 +285,17 @@ transformed_dist = transformed_gen(name='transformed')
|
|||||||
|
|
||||||
ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])
|
ConfidenceInterval = collections.namedtuple('ConfidenceInterval', ['lower', 'upper'])
|
||||||
|
|
||||||
def hdi(distribution, level=0.95):
|
def hdi(distribution, level=None):
|
||||||
"""Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)"""
|
"""
|
||||||
|
Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)
|
||||||
|
|
||||||
# For a given lower limit, we can compute the corresponding 95% interval
|
level: Coverage/confidence probability, default (None) is 1 - config.alpha
|
||||||
|
"""
|
||||||
|
|
||||||
|
if level is None:
|
||||||
|
level = 1 - config.alpha
|
||||||
|
|
||||||
|
# For a given lower limit, we can compute the corresponding level*100% interval
|
||||||
def interval_width(lower):
|
def interval_width(lower):
|
||||||
upper = distribution.ppf(distribution.cdf(lower) + level)
|
upper = distribution.ppf(distribution.cdf(lower) + level)
|
||||||
return upper - lower
|
return upper - lower
|
||||||
|
@ -26,6 +26,7 @@ from datetime import datetime
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .bayes_factors import BayesFactor, bayesfactor_afbf
|
from .bayes_factors import BayesFactor, bayesfactor_afbf
|
||||||
|
from .config import config
|
||||||
from .sig_tests import FTestResult
|
from .sig_tests import FTestResult
|
||||||
from .utils import Estimate, check_nan, cols_for_formula, fmt_p, formula_factor_ref_category, parse_patsy_term
|
from .utils import Estimate, check_nan, cols_for_formula, fmt_p, formula_factor_ref_category, parse_patsy_term
|
||||||
|
|
||||||
@ -220,7 +221,7 @@ class RegressionResult:
|
|||||||
out += '</table>'
|
out += '</table>'
|
||||||
|
|
||||||
# Render coefficients table
|
# Render coefficients table
|
||||||
out += '<table><tr><th></th><th style="text-align:center">{}</th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr>'.format('exp(<i>β</i>)' if self.exp else '<i>β</i>')
|
out += '<table><tr><th></th><th style="text-align:center">{}</th><th colspan="3" style="text-align:center">({:g}% CI)</th><th style="text-align:center"><i>p</i></th></tr>'.format('exp(<i>β</i>)' if self.exp else '<i>β</i>', (1-config.alpha)*100)
|
||||||
|
|
||||||
for term_name, term in self.terms.items():
|
for term_name, term in self.terms.items():
|
||||||
if isinstance(term, SingleTerm):
|
if isinstance(term, SingleTerm):
|
||||||
@ -306,7 +307,7 @@ class RegressionResult:
|
|||||||
raise Exception('Attempt to render unknown term type')
|
raise Exception('Attempt to render unknown term type')
|
||||||
|
|
||||||
table2 = SimpleTable(data=table_data, headers=['', 'exp(β)' if self.exp else 'β', '', '\ue000', '', ' p']) # U+E000 is in Private Use Area, mark middle of CI column
|
table2 = SimpleTable(data=table_data, headers=['', 'exp(β)' if self.exp else 'β', '', '\ue000', '', ' p']) # U+E000 is in Private Use Area, mark middle of CI column
|
||||||
table2_text = table2.as_text().replace(' \ue000 ', '(95% CI)') # Render heading in the right spot
|
table2_text = table2.as_text().replace(' \ue000 ', '({:g}% CI)'.format((1-config.alpha)*100)) # Render heading in the right spot
|
||||||
table2_lines = table2_text.splitlines(keepends=False)
|
table2_lines = table2_text.splitlines(keepends=False)
|
||||||
|
|
||||||
# Render divider line between 2 tables
|
# Render divider line between 2 tables
|
||||||
@ -382,7 +383,7 @@ def regress(
|
|||||||
# Process terms
|
# Process terms
|
||||||
terms = {}
|
terms = {}
|
||||||
|
|
||||||
confint = result.conf_int()
|
confint = result.conf_int(config.alpha)
|
||||||
|
|
||||||
for raw_name, raw_beta in result.params.items():
|
for raw_name, raw_beta in result.params.items():
|
||||||
beta = Estimate(raw_beta, confint[0][raw_name], confint[1][raw_name])
|
beta = Estimate(raw_beta, confint[0][raw_name], confint[1][raw_name])
|
||||||
@ -466,9 +467,10 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
|
|||||||
|
|
||||||
# Transfer other parameters to R
|
# Transfer other parameters to R
|
||||||
lc['formula_'] = self.formula
|
lc['formula_'] = self.formula
|
||||||
|
lc['alpha'] = config.alpha
|
||||||
|
|
||||||
# Fit the model
|
# Fit the model
|
||||||
model = ro.r('logistf(formula_, data=df)')
|
model = ro.r('logistf(formula_, data=df, alpha=alpha)')
|
||||||
|
|
||||||
# TODO: Handle categorical terms?
|
# TODO: Handle categorical terms?
|
||||||
terms = {t: SingleTerm(t, Estimate(b, ci0, ci1), p) for t, b, ci0, ci1, p in zip(model['terms'], model['coefficients'], model['ci.lower'], model['ci.upper'], model['prob'])}
|
terms = {t: SingleTerm(t, Estimate(b, ci0, ci1), p) for t, b, ci0, ci1, p in zip(model['terms'], model['coefficients'], model['ci.lower'], model['ci.upper'], model['prob'])}
|
||||||
|
@ -22,6 +22,7 @@ import statsmodels.api as sm
|
|||||||
import functools
|
import functools
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
from .config import config
|
||||||
from .utils import Estimate, as_2groups, check_nan, fmt_p
|
from .utils import Estimate, as_2groups, check_nan, fmt_p
|
||||||
|
|
||||||
# ----------------
|
# ----------------
|
||||||
@ -42,10 +43,10 @@ class TTestResult:
|
|||||||
self.delta_direction = delta_direction
|
self.delta_direction = delta_direction
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return '<i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br><i>δ</i> (95% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), self.delta.summary(), self.delta_direction)
|
return '<i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br><i>δ</i> ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
return 't({:.0f}) = {:.2f}; p {}\nδ (95% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), self.delta.summary(), self.delta_direction)
|
return 't({:.0f}) = {:.2f}; p {}\nδ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
|
||||||
|
|
||||||
def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
||||||
"""Perform an independent-sample Student's t test"""
|
"""Perform an independent-sample Student's t test"""
|
||||||
@ -65,7 +66,7 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
|||||||
statistic, pvalue, dof = cm.ttest_ind()
|
statistic, pvalue, dof = cm.ttest_ind()
|
||||||
|
|
||||||
delta = d1.mean - d2.mean
|
delta = d1.mean - d2.mean
|
||||||
ci0, ci1 = cm.tconfint_diff()
|
ci0, ci1 = cm.tconfint_diff(config.alpha)
|
||||||
|
|
||||||
# t test is symmetric so take absolute values
|
# t test is symmetric so take absolute values
|
||||||
return TTestResult(
|
return TTestResult(
|
||||||
@ -209,16 +210,16 @@ class PearsonChiSquaredResult:
|
|||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
if self.oddsratio is not None:
|
if self.oddsratio is not None:
|
||||||
return '{}<br><i>χ</i><sup>2</sup>({}) = {:.2f}; <i>p</i> {}<br>OR (95% CI) = {}<br>RR (95% CI) = {}'.format(
|
return '{0}<br><i>χ</i><sup>2</sup>({1}) = {2:.2f}; <i>p</i> {3}<br>OR ({4:g}% CI) = {5}<br>RR ({4:g}% CI) = {6}'.format(
|
||||||
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True), self.oddsratio.summary(), self.riskratio.summary())
|
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
|
||||||
else:
|
else:
|
||||||
return '{}<br><i>χ</i><sup>2</sup>({}) = {:.2f}; <i>p</i> {}'.format(
|
return '{}<br><i>χ</i><sup>2</sup>({}) = {:.2f}; <i>p</i> {}'.format(
|
||||||
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True))
|
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
if self.oddsratio is not None:
|
if self.oddsratio is not None:
|
||||||
return '{}\nχ²({}) = {:.2f}; p {}\nOR (95% CI) = {}\nRR (95% CI) = {}'.format(
|
return '{0}\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format(
|
||||||
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), self.oddsratio.summary(), self.riskratio.summary())
|
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
|
||||||
else:
|
else:
|
||||||
return '{}\nχ²({}) = {:.2f}; p {}'.format(
|
return '{}\nχ²({}) = {:.2f}; p {}'.format(
|
||||||
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
||||||
@ -247,8 +248,8 @@ def chi2(df, dep, ind, *, nan_policy='warn'):
|
|||||||
|
|
||||||
smct = sm.stats.Table2x2(np.flip(ct.to_numpy()), shift_zeros=False)
|
smct = sm.stats.Table2x2(np.flip(ct.to_numpy()), shift_zeros=False)
|
||||||
result = smct.test_nominal_association()
|
result = smct.test_nominal_association()
|
||||||
ORci = smct.oddsratio_confint()
|
ORci = smct.oddsratio_confint(config.alpha)
|
||||||
RRci = smct.riskratio_confint()
|
RRci = smct.riskratio_confint(config.alpha)
|
||||||
|
|
||||||
return PearsonChiSquaredResult(
|
return PearsonChiSquaredResult(
|
||||||
ct=ct, statistic=result.statistic, dof=result.df, pvalue=result.pvalue,
|
ct=ct, statistic=result.statistic, dof=result.df, pvalue=result.pvalue,
|
||||||
@ -272,10 +273,10 @@ class PearsonRResult:
|
|||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return '<i>r</i> (95% CI) = {}; <i>p</i> {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=True))
|
return '<i>r</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
return 'r (95% CI) = {}; p {}'.format(self.statistic.summary(), fmt_p(self.pvalue, html=False))
|
return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
def pearsonr(df, dep, ind, *, nan_policy='warn'):
|
def pearsonr(df, dep, ind, *, nan_policy='warn'):
|
||||||
"""Compute the Pearson correlation coefficient (Pearson's r)"""
|
"""Compute the Pearson correlation coefficient (Pearson's r)"""
|
||||||
|
Loading…
Reference in New Issue
Block a user