Implement Poisson regression

This commit is contained in:
RunasSudo 2023-04-21 15:27:18 +10:00
parent 844e6bdec9
commit 503519c9c0
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
2 changed files with 52 additions and 3 deletions

View File

@ -20,7 +20,7 @@ from .descriptives import auto_correlations, auto_descriptives
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
from .graphs import init_fonts from .graphs import init_fonts
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, regress, vif from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, Poisson, regress, vif
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind, ttest_ind_multiple from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind, ttest_ind_multiple
from .survival import kaplanmeier, logrank, turnbull from .survival import kaplanmeier, logrank, turnbull
from .utils import as_ordinal from .utils import as_ordinal

View File

@ -97,7 +97,13 @@ def vif(df, formula=None, *, nan_policy='warn'):
return pd.Series(vifs) return pd.Series(vifs)
def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels=False, exp=None): def regress(
model_class, df, dep, formula,
*,
nan_policy='warn',
exposure=None,
bool_baselevels=False, exp=None
):
""" """
Fit a statsmodels regression model Fit a statsmodels regression model
@ -111,6 +117,8 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
:type formula: str :type formula: str
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`) :param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
:type nan_policy: str :type nan_policy: str
:param exposure: Column in *df* for the exposure variable (numeric, some models only)
:type exposure: str
:param bool_baselevels: Show reference categories for boolean independent variables even if reference category is *False* :param bool_baselevels: Show reference categories for boolean independent variables even if reference category is *False*
:type bool_baselevels: bool :type bool_baselevels: bool
:param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class* :param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class*
@ -128,7 +136,12 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
dmatrices, dep_categories = df_to_dmatrices(df, dep, formula, nan_policy) dmatrices, dep_categories = df_to_dmatrices(df, dep, formula, nan_policy)
# Fit model # Fit model
if exposure is None:
result = model_class.fit(dmatrices[0], dmatrices[1]) result = model_class.fit(dmatrices[0], dmatrices[1])
else:
result = model_class.fit(dmatrices[0], dmatrices[1], exposure=exposure)
# Fill in general information
result.df = df_ref result.df = df_ref
result.dep = dep result.dep = dep
result.formula = formula result.formula = formula
@ -204,6 +217,8 @@ def df_to_dmatrices(df, dep, formula, nan_policy):
return dmatrices, dep_categories return dmatrices, dep_categories
class RegressionModel: class RegressionModel:
# TODO: Documentation
def __init__(self): def __init__(self):
# Model configuration (all set in yli.regress) # Model configuration (all set in yli.regress)
self.df = None self.df = None
@ -1087,6 +1102,40 @@ class PenalisedLogit(RegressionModel):
return result return result
class Poisson(RegressionModel):
"""
Poisson regression
"""
# TODO: Document example
@property
def model_long_name(self):
return 'Poisson Regression'
@property
def model_short_name(self):
return 'Poisson'
@classmethod
def fit(cls, data_dep, data_ind, exposure=None):
result = cls()
result.exp = True
result.cov_type = 'nonrobust'
# Perform regression
raw_result = sm.Poisson(endog=data_dep, exog=data_ind, exposure=exposure, missing='raise').fit(disp=False)
result.dof_model = raw_result.df_model
result.dof_resid = raw_result.df_resid
result.ll_model = raw_result.llf
result.ll_null = raw_result.llnull
result.terms = raw_terms_from_statsmodels_result(raw_result)
result.vcov = raw_result.cov_params()
return result
# ------------------ # ------------------
# Brant test helpers # Brant test helpers