From a48e96e96ccf82e77ee2c962f79b15f7134d58e1 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Fri, 2 Dec 2022 12:53:40 +1100 Subject: [PATCH] Handle OrderedModel for yli.regress --- yli/regress.py | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/yli/regress.py b/yli/regress.py index 1af1f11..0e41bea 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -21,6 +21,7 @@ from scipy import stats import statsmodels import statsmodels.api as sm from statsmodels.iolib.table import SimpleTable +from statsmodels.miscmodels.ordinal_model import OrderedModel from statsmodels.stats.outliers_influence import variance_inflation_factor from tqdm import tqdm @@ -139,6 +140,7 @@ class RegressionResult: terms, llf, llnull, dof_resid, rsquared, f_statistic, + comments, exp ): #: Raw result from statsmodels *model.fit* @@ -182,6 +184,9 @@ class RegressionResult: #: *F* statistic (*float*; *None* if N/A) self.f_statistic = f_statistic + #: Comments for the model (*List[str]*) + self.comments = comments or [] + # Config for display style #: See :func:`yli.regress` self.exp = exp @@ -255,7 +260,8 @@ class RegressionResult: left_col.append(('Method:', self.fit_method)) left_col.append(('Date:', self.fitted_dt.strftime('%Y-%m-%d'))) left_col.append(('Time:', self.fitted_dt.strftime('%H:%M:%S'))) - left_col.append(('Std. Errors:', 'Non-Robust' if self.cov_type == 'nonrobust' else self.cov_type.upper() if self.cov_type.startswith('hc') else self.cov_type)) + if self.cov_type: + left_col.append(('Std. Errors:', 'Non-Robust' if self.cov_type == 'nonrobust' else self.cov_type.upper() if self.cov_type.startswith('hc') else self.cov_type)) # Right column right_col = [] @@ -345,6 +351,12 @@ class RegressionResult: # TODO: Have a detailed view which shows SE, t/z, etc. + if self.comments: + out += '
    ' + for comment in self.comments: + out += '
  1. {}
  2. '.format(comment) + out += '
' + return out def summary(self): @@ -412,6 +424,11 @@ class RegressionResult: out += '\n'.join(table2_lines[1:]) + if self.comments: + out += '\n' + for i, comment in enumerate(self.comments): + out += '\n{}. {}'.format(i + 1, comment) + return out class SingleTerm: @@ -526,6 +543,8 @@ def regress( if exp is None: if model_class in (sm.Logit, sm.Poisson, PenalisedLogit): exp = True + elif model_class is OrderedModel and model_kwargs.get('distr', 'probit') == 'logit': + exp = True else: exp = False @@ -546,6 +565,11 @@ def regress( else: dmatrices = _dmatrices + if model_class is OrderedModel: + # Drop explicit intercept term + # FIXME: Check before dropping + dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:]) + # Fit model model = model_class(endog=dmatrices[0], exog=dmatrices[1], **model_kwargs) model.formula = dep + ' ~ ' + formula @@ -577,6 +601,9 @@ def regress( # Intercept term (single term) term = '(Intercept)' terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name]) + elif model_class is OrderedModel and '/' in raw_name: + # Ignore ordinal regression intercepts + pass else: # Parse if required factor, column, contrast = parse_patsy_term(formula, df, raw_name) @@ -598,6 +625,12 @@ def regress( # Single term terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name]) + # Handle ordinal regression intercepts + #if model_class is OrderedModel: + # intercept_names = [raw_name.split('/')[0] for raw_name in model.exog_names if '/' in raw_name] + # intercepts = model.transform_threshold_params(result._results.params[-len(intercept_names):]) + # print(intercepts) + # Fit null model (for llnull) if hasattr(result, 'llnull'): llnull = result.llnull @@ -628,13 +661,18 @@ def regress( if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust': full_name = 'Robust {}'.format(full_name) + comments = [] + if model_class is OrderedModel: + comments.append('Cutpoints are omitted from the table of model parameters.') + return RegressionResult( result, full_name, model_class.__name__, method_name, - dep, result.nobs, result.df_model, datetime.now(), result.cov_type, + dep, result.nobs, result.df_model, datetime.now(), getattr(result, 'cov_type', 'nonrobust'), terms, result.llf, llnull, getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None), + comments, exp ) @@ -731,6 +769,7 @@ def regress_bootstrap( terms, full_model.llf, full_model.llnull, full_model.dof_resid, full_model.rsquared, full_model.f_statistic, + full_model.comments, full_model.exp ) @@ -848,5 +887,7 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel): terms, model['loglik'][0], model['loglik'][1], None, None, None, + [], None # Set exp in regress() ) +