Handle OrderedModel for yli.regress

This commit is contained in:
RunasSudo 2022-12-02 12:53:40 +11:00
parent 5b08784498
commit a48e96e96c
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A

View File

@ -21,6 +21,7 @@ from scipy import stats
import statsmodels import statsmodels
import statsmodels.api as sm import statsmodels.api as sm
from statsmodels.iolib.table import SimpleTable from statsmodels.iolib.table import SimpleTable
from statsmodels.miscmodels.ordinal_model import OrderedModel
from statsmodels.stats.outliers_influence import variance_inflation_factor from statsmodels.stats.outliers_influence import variance_inflation_factor
from tqdm import tqdm from tqdm import tqdm
@ -139,6 +140,7 @@ class RegressionResult:
terms, terms,
llf, llnull, llf, llnull,
dof_resid, rsquared, f_statistic, dof_resid, rsquared, f_statistic,
comments,
exp exp
): ):
#: Raw result from statsmodels *model.fit* #: Raw result from statsmodels *model.fit*
@ -182,6 +184,9 @@ class RegressionResult:
#: *F* statistic (*float*; *None* if N/A) #: *F* statistic (*float*; *None* if N/A)
self.f_statistic = f_statistic self.f_statistic = f_statistic
#: Comments for the model (*List[str]*)
self.comments = comments or []
# Config for display style # Config for display style
#: See :func:`yli.regress` #: See :func:`yli.regress`
self.exp = exp self.exp = exp
@ -255,6 +260,7 @@ class RegressionResult:
left_col.append(('Method:', self.fit_method)) left_col.append(('Method:', self.fit_method))
left_col.append(('Date:', self.fitted_dt.strftime('%Y-%m-%d'))) left_col.append(('Date:', self.fitted_dt.strftime('%Y-%m-%d')))
left_col.append(('Time:', self.fitted_dt.strftime('%H:%M:%S'))) left_col.append(('Time:', self.fitted_dt.strftime('%H:%M:%S')))
if self.cov_type:
left_col.append(('Std. Errors:', 'Non-Robust' if self.cov_type == 'nonrobust' else self.cov_type.upper() if self.cov_type.startswith('hc') else self.cov_type)) left_col.append(('Std. Errors:', 'Non-Robust' if self.cov_type == 'nonrobust' else self.cov_type.upper() if self.cov_type.startswith('hc') else self.cov_type))
# Right column # Right column
@ -345,6 +351,12 @@ class RegressionResult:
# TODO: Have a detailed view which shows SE, t/z, etc. # TODO: Have a detailed view which shows SE, t/z, etc.
if self.comments:
out += '<ol>'
for comment in self.comments:
out += '<li>{}</li>'.format(comment)
out += '</ol>'
return out return out
def summary(self): def summary(self):
@ -412,6 +424,11 @@ class RegressionResult:
out += '\n'.join(table2_lines[1:]) out += '\n'.join(table2_lines[1:])
if self.comments:
out += '\n'
for i, comment in enumerate(self.comments):
out += '\n{}. {}'.format(i + 1, comment)
return out return out
class SingleTerm: class SingleTerm:
@ -526,6 +543,8 @@ def regress(
if exp is None: if exp is None:
if model_class in (sm.Logit, sm.Poisson, PenalisedLogit): if model_class in (sm.Logit, sm.Poisson, PenalisedLogit):
exp = True exp = True
elif model_class is OrderedModel and model_kwargs.get('distr', 'probit') == 'logit':
exp = True
else: else:
exp = False exp = False
@ -546,6 +565,11 @@ def regress(
else: else:
dmatrices = _dmatrices dmatrices = _dmatrices
if model_class is OrderedModel:
# Drop explicit intercept term
# FIXME: Check before dropping
dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:])
# Fit model # Fit model
model = model_class(endog=dmatrices[0], exog=dmatrices[1], **model_kwargs) model = model_class(endog=dmatrices[0], exog=dmatrices[1], **model_kwargs)
model.formula = dep + ' ~ ' + formula model.formula = dep + ' ~ ' + formula
@ -577,6 +601,9 @@ def regress(
# Intercept term (single term) # Intercept term (single term)
term = '(Intercept)' term = '(Intercept)'
terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name]) terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name])
elif model_class is OrderedModel and '/' in raw_name:
# Ignore ordinal regression intercepts
pass
else: else:
# Parse if required # Parse if required
factor, column, contrast = parse_patsy_term(formula, df, raw_name) factor, column, contrast = parse_patsy_term(formula, df, raw_name)
@ -598,6 +625,12 @@ def regress(
# Single term # Single term
terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name]) terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
# Handle ordinal regression intercepts
#if model_class is OrderedModel:
# intercept_names = [raw_name.split('/')[0] for raw_name in model.exog_names if '/' in raw_name]
# intercepts = model.transform_threshold_params(result._results.params[-len(intercept_names):])
# print(intercepts)
# Fit null model (for llnull) # Fit null model (for llnull)
if hasattr(result, 'llnull'): if hasattr(result, 'llnull'):
llnull = result.llnull llnull = result.llnull
@ -628,13 +661,18 @@ def regress(
if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust': if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
full_name = 'Robust {}'.format(full_name) full_name = 'Robust {}'.format(full_name)
comments = []
if model_class is OrderedModel:
comments.append('Cutpoints are omitted from the table of model parameters.')
return RegressionResult( return RegressionResult(
result, result,
full_name, model_class.__name__, method_name, full_name, model_class.__name__, method_name,
dep, result.nobs, result.df_model, datetime.now(), result.cov_type, dep, result.nobs, result.df_model, datetime.now(), getattr(result, 'cov_type', 'nonrobust'),
terms, terms,
result.llf, llnull, result.llf, llnull,
getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None), getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
comments,
exp exp
) )
@ -731,6 +769,7 @@ def regress_bootstrap(
terms, terms,
full_model.llf, full_model.llnull, full_model.llf, full_model.llnull,
full_model.dof_resid, full_model.rsquared, full_model.f_statistic, full_model.dof_resid, full_model.rsquared, full_model.f_statistic,
full_model.comments,
full_model.exp full_model.exp
) )
@ -848,5 +887,7 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
terms, terms,
model['loglik'][0], model['loglik'][1], model['loglik'][0], model['loglik'][1],
None, None, None, None, None, None,
[],
None # Set exp in regress() None # Set exp in regress()
) )