From 503519c9c098e401fcd2a4f19a104a038463aa7b Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Fri, 21 Apr 2023 15:27:18 +1000 Subject: [PATCH] Implement Poisson regression --- yli/__init__.py | 2 +- yli/regress.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/yli/__init__.py b/yli/__init__.py index 0dd33f5..55b9574 100644 --- a/yli/__init__.py +++ b/yli/__init__.py @@ -20,7 +20,7 @@ from .descriptives import auto_correlations, auto_descriptives from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist from .graphs import init_fonts from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted -from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, regress, vif +from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, Poisson, regress, vif from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind, ttest_ind_multiple from .survival import kaplanmeier, logrank, turnbull from .utils import as_ordinal diff --git a/yli/regress.py b/yli/regress.py index 6afd89c..289b810 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -97,7 +97,13 @@ def vif(df, formula=None, *, nan_policy='warn'): return pd.Series(vifs) -def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels=False, exp=None): +def regress( + model_class, df, dep, formula, + *, + nan_policy='warn', + exposure=None, + bool_baselevels=False, exp=None +): """ Fit a statsmodels regression model @@ -111,6 +117,8 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels :type formula: str :param nan_policy: How to handle *nan* values (see :ref:`nan-handling`) :type nan_policy: str + :param exposure: Column in *df* for the exposure variable (numeric, some models only) + :type exposure: str :param bool_baselevels: Show reference categories for boolean independent variables even if reference category is *False* :type bool_baselevels: bool :param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class* @@ -128,7 +136,12 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels dmatrices, dep_categories = df_to_dmatrices(df, dep, formula, nan_policy) # Fit model - result = model_class.fit(dmatrices[0], dmatrices[1]) + if exposure is None: + result = model_class.fit(dmatrices[0], dmatrices[1]) + else: + result = model_class.fit(dmatrices[0], dmatrices[1], exposure=exposure) + + # Fill in general information result.df = df_ref result.dep = dep result.formula = formula @@ -204,6 +217,8 @@ def df_to_dmatrices(df, dep, formula, nan_policy): return dmatrices, dep_categories class RegressionModel: + # TODO: Documentation + def __init__(self): # Model configuration (all set in yli.regress) self.df = None @@ -1087,6 +1102,40 @@ class PenalisedLogit(RegressionModel): return result +class Poisson(RegressionModel): + """ + Poisson regression + """ + + # TODO: Document example + + @property + def model_long_name(self): + return 'Poisson Regression' + + @property + def model_short_name(self): + return 'Poisson' + + @classmethod + def fit(cls, data_dep, data_ind, exposure=None): + result = cls() + result.exp = True + result.cov_type = 'nonrobust' + + # Perform regression + raw_result = sm.Poisson(endog=data_dep, exog=data_ind, exposure=exposure, missing='raise').fit(disp=False) + + result.dof_model = raw_result.df_model + result.dof_resid = raw_result.df_resid + result.ll_model = raw_result.llf + result.ll_null = raw_result.llnull + + result.terms = raw_terms_from_statsmodels_result(raw_result) + result.vcov = raw_result.cov_params() + + return result + # ------------------ # Brant test helpers