Re-implement RegressionModel.bootstrap
This commit is contained in:
parent
a09a84f9ef
commit
57e472ca09
@ -53,6 +53,7 @@ Optional dependencies are:
|
||||
* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionModel.bayesfactor_beta_zero*)
|
||||
* [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit*
|
||||
* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionModel.shap*
|
||||
* [tqdm](https://tqdm.github.io/), for *RegressionModel.bootstrap*
|
||||
|
||||
## Functions
|
||||
|
||||
|
@ -38,7 +38,6 @@ from .sig_tests import ChiSquaredResult, FTestResult
|
||||
from .utils import Estimate, PValueStyle, as_numeric, check_nan, cols_for_formula, convert_pandas_nullable, fmt_p, formula_factor_ref_category, parse_patsy_term
|
||||
|
||||
# TODO: Documentation
|
||||
# TODO: Bootstrap
|
||||
|
||||
def vif(df, formula=None, *, nan_policy='warn'):
|
||||
"""
|
||||
@ -193,6 +192,7 @@ def regress(
|
||||
result.dep = dep
|
||||
result.formula = formula
|
||||
result.nan_policy = nan_policy
|
||||
result.fit_kwargs = fit_kwargs
|
||||
if exp is not None:
|
||||
result.exp = exp
|
||||
result.fitted_dt = datetime.now()
|
||||
@ -275,6 +275,7 @@ class RegressionModel:
|
||||
self.dep = None
|
||||
self.formula = None
|
||||
self.nan_policy = None
|
||||
self.fit_kwargs = None
|
||||
self.exp = False
|
||||
self.cov_type = None
|
||||
|
||||
@ -585,6 +586,57 @@ class RegressionModel:
|
||||
|
||||
return 1 - (self.ll_model - self.dof_model) / self.ll_null
|
||||
|
||||
def bootstrap(self, samples=1000):
|
||||
"""
|
||||
Use bootstrapping to recompute confidence intervals and *p* values for the terms in the regression model
|
||||
|
||||
Mutates the current RegressionModel instance.
|
||||
|
||||
:param samples: Number of bootstrap samples to draw
|
||||
:type samples: int
|
||||
|
||||
:rtype: :class:`yli.regress.RegressionModel`
|
||||
"""
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
df = self.df()
|
||||
if df is None:
|
||||
raise Exception('Referenced DataFrame has been dropped')
|
||||
|
||||
# Preprocess data, check for NaN and get design matrices
|
||||
df_clean, dmatrices, dep_categories = df_to_dmatrices(df, self.dep, self.formula, self.nan_policy, [])
|
||||
|
||||
# Initialise bootstrap_results
|
||||
bootstrap_results = {} # Dict mapping term raw names to bootstrap betas
|
||||
for term in self.terms_flat():
|
||||
bootstrap_results[term.raw_name] = []
|
||||
|
||||
# Draw bootstrap samples and regress
|
||||
dmatrices = dmatrices[0].join(dmatrices[1])
|
||||
|
||||
for i in tqdm(range(samples)):
|
||||
bootstrap_rows = dmatrices.sample(len(df), replace=True)
|
||||
|
||||
# Fit model
|
||||
result = self.__class__.fit(bootstrap_rows.iloc[:,0], bootstrap_rows.iloc[:,1:], **self.fit_kwargs)
|
||||
|
||||
for term in result.terms_flat():
|
||||
bootstrap_results[term.raw_name].append(term.beta.point)
|
||||
|
||||
# Combine bootstrap results
|
||||
for term in self.terms_flat():
|
||||
bootstrap_betas = bootstrap_results[term.raw_name]
|
||||
bootstrap_pvalue = sum(1 for b in bootstrap_betas if b < 0) / len(bootstrap_betas)
|
||||
bootstrap_pvalue = 2 * min(bootstrap_pvalue, 1 - bootstrap_pvalue)
|
||||
|
||||
term.beta = Estimate(term.beta.point, np.quantile(bootstrap_betas, config.alpha/2), np.quantile(bootstrap_betas, 1-config.alpha/2))
|
||||
term.pvalue = bootstrap_pvalue
|
||||
|
||||
self.cov_type = 'Bootstrap'
|
||||
|
||||
return self
|
||||
|
||||
def shap(self, **kwargs):
|
||||
"""
|
||||
Compute SHAP values for the model
|
||||
|
Loading…
x
Reference in New Issue
Block a user