Implement yli.OrdinalLogit as preferred model for ordinal logistic regression

OrdinalLogit uses a parameterisation where the cutoff terms are directly incorporated
2022-12-02 20:19:08 +11:00 · 2022-12-02 20:19:08 +11:00 · 0dab62ad0a
parent f8e56d96b1
commit 0dab62ad0a
2 changed files with 41 additions and 22 deletions
--- a/yli/init.py
+++ b/yli/init.py
@ -19,7 +19,7 @@ from .config import config
 from .descriptives import auto_descriptives
 from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
 from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
-from .regress import PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
+from .regress import OrdinalLogit, PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
 from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind
 def reload_me():
--- a/yli/regress.py
+++ b/yli/regress.py
@ -18,10 +18,10 @@ import numpy as np
 import pandas as pd
 import patsy
 from scipy import stats
-import statsmodels
+from scipy.special import expit
 import statsmodels, statsmodels.miscmodels.ordinal_model
 import statsmodels.api as sm
 from statsmodels.iolib.table import SimpleTable
 from statsmodels.miscmodels.ordinal_model import OrderedModel
 from statsmodels.stats.outliers_influence import variance_inflation_factor
 from tqdm import tqdm
@ -334,7 +334,8 @@ class RegressionResult:
 				out += '<tr><th>{}</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term_name)
 				# Render reference category
-				out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term.ref_category)
+				if term.ref_category is not None:
 					out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term.ref_category)
 				# Loop over terms
 				for sub_term_name, sub_term in term.categories.items():
@ -401,7 +402,8 @@ class RegressionResult:
 				table_data.append([term_name + '  ', '', '', '', '', ''])
 				# Render reference category
-				table_data.append(['{}  '.format(term.ref_category), 'Ref.', '', '', '', ''])
+				if term.ref_category is not None:
 					table_data.append(['{}  '.format(term.ref_category), 'Ref.', '', '', '', ''])
 				# Loop over terms
 				for sub_term_name, sub_term in term.categories.items():
@ -546,7 +548,7 @@ def regress(
 	if exp is None:
 		if model_class in (sm.Logit, sm.Poisson, PenalisedLogit):
 			exp = True
-		elif model_class is OrderedModel and model_kwargs.get('distr', 'probit') == 'logit':
+		elif model_class is OrdinalLogit:
 			exp = True
 		else:
 			exp = False
@ -568,7 +570,7 @@ def regress(
 	else:
 		dmatrices = _dmatrices
-	if model_class is OrderedModel:
+	if model_class is OrdinalLogit:
 		# Drop explicit intercept term
 		# FIXME: Check before dropping
 		dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:])
@ -604,9 +606,11 @@ def regress(
 			# Intercept term (single term)
 			term = '(Intercept)'
 			terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name])
-		elif model_class is OrderedModel and '/' in raw_name:
+		elif model_class is OrdinalLogit and '/' in raw_name:
-			# Ignore ordinal regression intercepts
+			# Group ordinal regression cutoffs
-			pass
+			if '(Cutoffs)' not in terms:
 				terms['(Cutoffs)'] = CategoricalTerm({}, None)
 			terms['(Cutoffs)'].categories[raw_name] = SingleTerm(raw_name, beta, pvalues[raw_name])
 		else:
 			# Parse if required
 			factor, column, contrast = parse_patsy_term(formula, df, raw_name)
@ -628,12 +632,6 @@ def regress(
 				# Single term
 				terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
 	# Handle ordinal regression intercepts
 	#if model_class is OrderedModel:
 	#	intercept_names = [raw_name.split('/')[0] for raw_name in model.exog_names if '/' in raw_name]
 	#	intercepts = model.transform_threshold_params(result._results.params[-len(intercept_names):])
 	#	print(intercepts)
 	# Fit null model (for llnull)
 	if hasattr(result, 'llnull'):
 		llnull = result.llnull
@ -664,10 +662,6 @@ def regress(
 	if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
 		full_name = 'Robust {}'.format(full_name)
 	comments = []
 	if model_class is OrderedModel:
 		comments.append('Cutpoints are omitted from the table of model parameters.')
 	return RegressionResult(
 		result,
 		full_name, model_class.__name__, method_name,
@ -675,7 +669,7 @@ def regress(
 		terms,
 		result.llf, llnull,
 		getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
-		comments,
+		[],
 		exp
 	)
@ -816,7 +810,7 @@ def logit_then_regress(model_class, df, dep, formula, *, nan_policy='warn', **kw
 class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
 	"""
-	statsmodel-compatible model for computing Firth penalised logistic regression
+	statsmodels-compatible model for computing Firth penalised logistic regression
 	Uses the R *logistf* library.
@ -894,3 +888,28 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
 					None  # Set exp in regress()
 				)
 # ------------------------------------------------------
 # Ordinal logistic regression (R/Stata parameterisation)
 class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
 	"""
 	statsmodels-compatible model for computing ordinal logistic (or probit) regression
 	The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
 	The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
 	In this parameterisation, cutoff terms are represented directly in the model.
 	"""
 	def __init__(self, endog, exog, **kwargs):
 		if 'distr' not in kwargs:
 			kwargs['distr'] = 'logit'
 		super().__init__(endog, exog, **kwargs)
 	def transform_threshold_params(self, params):
 		th_params = params[-(self.k_levels - 1):]
 		thresh = np.concatenate(([-np.inf], th_params, [np.inf]))
 		return thresh
 	def transform_reverse_threshold_params(self, params):
 		return params[:-1]