Re-implement RegressionModel.bootstrap

This commit is contained in:
RunasSudo 2025-01-28 19:31:40 +11:00
parent a09a84f9ef
commit 57e472ca09
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
2 changed files with 54 additions and 1 deletions

View File

@ -53,6 +53,7 @@ Optional dependencies are:
* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionModel.bayesfactor_beta_zero*)
* [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit*
* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionModel.shap*
* [tqdm](https://tqdm.github.io/), for *RegressionModel.bootstrap*
## Functions

View File

@ -38,7 +38,6 @@ from .sig_tests import ChiSquaredResult, FTestResult
from .utils import Estimate, PValueStyle, as_numeric, check_nan, cols_for_formula, convert_pandas_nullable, fmt_p, formula_factor_ref_category, parse_patsy_term
# TODO: Documentation
# TODO: Bootstrap
def vif(df, formula=None, *, nan_policy='warn'):
"""
@ -193,6 +192,7 @@ def regress(
result.dep = dep
result.formula = formula
result.nan_policy = nan_policy
result.fit_kwargs = fit_kwargs
if exp is not None:
result.exp = exp
result.fitted_dt = datetime.now()
@ -275,6 +275,7 @@ class RegressionModel:
self.dep = None
self.formula = None
self.nan_policy = None
self.fit_kwargs = None
self.exp = False
self.cov_type = None
@ -585,6 +586,57 @@ class RegressionModel:
return 1 - (self.ll_model - self.dof_model) / self.ll_null
def bootstrap(self, samples=1000):
"""
Use bootstrapping to recompute confidence intervals and *p* values for the terms in the regression model
Mutates the current RegressionModel instance.
:param samples: Number of bootstrap samples to draw
:type samples: int
:rtype: :class:`yli.regress.RegressionModel`
"""
from tqdm import tqdm
df = self.df()
if df is None:
raise Exception('Referenced DataFrame has been dropped')
# Preprocess data, check for NaN and get design matrices
df_clean, dmatrices, dep_categories = df_to_dmatrices(df, self.dep, self.formula, self.nan_policy, [])
# Initialise bootstrap_results
bootstrap_results = {} # Dict mapping term raw names to bootstrap betas
for term in self.terms_flat():
bootstrap_results[term.raw_name] = []
# Draw bootstrap samples and regress
dmatrices = dmatrices[0].join(dmatrices[1])
for i in tqdm(range(samples)):
bootstrap_rows = dmatrices.sample(len(df), replace=True)
# Fit model
result = self.__class__.fit(bootstrap_rows.iloc[:,0], bootstrap_rows.iloc[:,1:], **self.fit_kwargs)
for term in result.terms_flat():
bootstrap_results[term.raw_name].append(term.beta.point)
# Combine bootstrap results
for term in self.terms_flat():
bootstrap_betas = bootstrap_results[term.raw_name]
bootstrap_pvalue = sum(1 for b in bootstrap_betas if b < 0) / len(bootstrap_betas)
bootstrap_pvalue = 2 * min(bootstrap_pvalue, 1 - bootstrap_pvalue)
term.beta = Estimate(term.beta.point, np.quantile(bootstrap_betas, config.alpha/2), np.quantile(bootstrap_betas, 1-config.alpha/2))
term.pvalue = bootstrap_pvalue
self.cov_type = 'Bootstrap'
return self
def shap(self, **kwargs):
"""
Compute SHAP values for the model