From d17412ca0766681109846696986a25511144d79e Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sun, 16 Jul 2023 16:22:20 +1000 Subject: [PATCH] Implement yli.GLM --- yli/__init__.py | 2 +- yli/regress.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/yli/__init__.py b/yli/__init__.py index 8a69d66..53ecbf9 100644 --- a/yli/__init__.py +++ b/yli/__init__.py @@ -20,7 +20,7 @@ from .descriptives import auto_correlations, auto_descriptives from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist from .graphs import init_fonts, HorizontalEffectPlot from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted -from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, Poisson, regress, vif +from .regress import GLM, IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, Poisson, regress, vif from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind, ttest_ind_multiple from .survival import kaplanmeier, logrank, turnbull from .utils import as_ordinal diff --git a/yli/regress.py b/yli/regress.py index c39cf2e..d4036cd 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -101,7 +101,7 @@ def regress( model_class, df, dep, formula, *, nan_policy='warn', - exposure=None, + exposure=None, family=None, method=None, maxiter=None, start_params=None, tolerance=None, reduced=None, bool_baselevels=False, exp=None @@ -121,6 +121,8 @@ def regress( :type nan_policy: str :param exposure: Column in *df* for the exposure variable (numeric, some models only) :type exposure: str + :param family: See :class:`yli.GLM` + :type family: str :param method: See statsmodels *model.fit* :param maxiter: See statsmodels *model.fit* :param start_params: See statsmodels *model.fit* @@ -146,6 +148,8 @@ def regress( fit_kwargs = {} if exposure is not None: fit_kwargs['exposure'] = exposure + if family is not None: + fit_kwargs['family'] = family if method is not None: fit_kwargs['method'] = method if maxiter is not None: @@ -639,6 +643,53 @@ def raw_terms_from_statsmodels_result(raw_result): # ------------------------ # Concrete implementations +class GLM(RegressionModel): + # TODO: Documentation + + @property + def model_long_name(self): + return 'Generalised Linear Model' + + @property + def model_short_name(self): + return 'GLM' + + @classmethod + def fit(cls, data_dep, data_ind, family, method='irls', maxiter=None, start_params=None): + result = cls() + result.exp = True + result.cov_type = 'nonrobust' + + if family == 'binomial': + sm_family = sm.families.Binomial() + elif family == 'gamma': + sm_family = sm.families.Gamma() + elif family == 'gaussian': + sm_family = sm.families.Gaussian() + elif family == 'inverse_gaussian': + sm_family = sm.families.InverseGaussian() + elif family == 'negative_binomial': + sm_family = sm.families.NegativeBinomial() + elif family == 'poisson': + sm_family = sm.families.Poisson() + elif family == 'tweedie': + sm_family = sm.families.Tweedie() + else: + raise ValueError('Unknown GLM family') + + # Perform regression + raw_result = sm.GLM(endog=data_dep, exog=data_ind, family=sm_family, missing='raise').fit(disp=False, method=method, start_params=start_params) + + result.dof_model = raw_result.df_model + result.dof_resid = raw_result.df_resid + result.ll_model = raw_result.llf + result.ll_null = raw_result.llnull + + result.terms = raw_terms_from_statsmodels_result(raw_result) + result.vcov = raw_result.cov_params() + + return result + class IntervalCensoredCox(RegressionModel): """ Interval-censored Cox regression