Implement yli.OrdinalLogit as preferred model for ordinal logistic regression

OrdinalLogit uses a parameterisation where the cutoff terms are directly incorporated
This commit is contained in:
RunasSudo 2022-12-02 20:19:08 +11:00
parent f8e56d96b1
commit 0dab62ad0a
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
2 changed files with 41 additions and 22 deletions

View File

@ -19,7 +19,7 @@ from .config import config
from .descriptives import auto_descriptives
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
from .regress import PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
from .regress import OrdinalLogit, PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind
def reload_me():

View File

@ -18,10 +18,10 @@ import numpy as np
import pandas as pd
import patsy
from scipy import stats
import statsmodels
from scipy.special import expit
import statsmodels, statsmodels.miscmodels.ordinal_model
import statsmodels.api as sm
from statsmodels.iolib.table import SimpleTable
from statsmodels.miscmodels.ordinal_model import OrderedModel
from statsmodels.stats.outliers_influence import variance_inflation_factor
from tqdm import tqdm
@ -334,7 +334,8 @@ class RegressionResult:
out += '<tr><th>{}</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term_name)
# Render reference category
out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term.ref_category)
if term.ref_category is not None:
out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term.ref_category)
# Loop over terms
for sub_term_name, sub_term in term.categories.items():
@ -401,7 +402,8 @@ class RegressionResult:
table_data.append([term_name + ' ', '', '', '', '', ''])
# Render reference category
table_data.append(['{} '.format(term.ref_category), 'Ref.', '', '', '', ''])
if term.ref_category is not None:
table_data.append(['{} '.format(term.ref_category), 'Ref.', '', '', '', ''])
# Loop over terms
for sub_term_name, sub_term in term.categories.items():
@ -546,7 +548,7 @@ def regress(
if exp is None:
if model_class in (sm.Logit, sm.Poisson, PenalisedLogit):
exp = True
elif model_class is OrderedModel and model_kwargs.get('distr', 'probit') == 'logit':
elif model_class is OrdinalLogit:
exp = True
else:
exp = False
@ -568,7 +570,7 @@ def regress(
else:
dmatrices = _dmatrices
if model_class is OrderedModel:
if model_class is OrdinalLogit:
# Drop explicit intercept term
# FIXME: Check before dropping
dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:])
@ -604,9 +606,11 @@ def regress(
# Intercept term (single term)
term = '(Intercept)'
terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name])
elif model_class is OrderedModel and '/' in raw_name:
# Ignore ordinal regression intercepts
pass
elif model_class is OrdinalLogit and '/' in raw_name:
# Group ordinal regression cutoffs
if '(Cutoffs)' not in terms:
terms['(Cutoffs)'] = CategoricalTerm({}, None)
terms['(Cutoffs)'].categories[raw_name] = SingleTerm(raw_name, beta, pvalues[raw_name])
else:
# Parse if required
factor, column, contrast = parse_patsy_term(formula, df, raw_name)
@ -628,12 +632,6 @@ def regress(
# Single term
terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
# Handle ordinal regression intercepts
#if model_class is OrderedModel:
# intercept_names = [raw_name.split('/')[0] for raw_name in model.exog_names if '/' in raw_name]
# intercepts = model.transform_threshold_params(result._results.params[-len(intercept_names):])
# print(intercepts)
# Fit null model (for llnull)
if hasattr(result, 'llnull'):
llnull = result.llnull
@ -664,10 +662,6 @@ def regress(
if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
full_name = 'Robust {}'.format(full_name)
comments = []
if model_class is OrderedModel:
comments.append('Cutpoints are omitted from the table of model parameters.')
return RegressionResult(
result,
full_name, model_class.__name__, method_name,
@ -675,7 +669,7 @@ def regress(
terms,
result.llf, llnull,
getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
comments,
[],
exp
)
@ -816,7 +810,7 @@ def logit_then_regress(model_class, df, dep, formula, *, nan_policy='warn', **kw
class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
"""
statsmodel-compatible model for computing Firth penalised logistic regression
statsmodels-compatible model for computing Firth penalised logistic regression
Uses the R *logistf* library.
@ -894,3 +888,28 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
None # Set exp in regress()
)
# ------------------------------------------------------
# Ordinal logistic regression (R/Stata parameterisation)
class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
"""
statsmodels-compatible model for computing ordinal logistic (or probit) regression
The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
In this parameterisation, cutoff terms are represented directly in the model.
"""
def __init__(self, endog, exog, **kwargs):
if 'distr' not in kwargs:
kwargs['distr'] = 'logit'
super().__init__(endog, exog, **kwargs)
def transform_threshold_params(self, params):
th_params = params[-(self.k_levels - 1):]
thresh = np.concatenate(([-np.inf], th_params, [np.inf]))
return thresh
def transform_reverse_threshold_params(self, params):
return params[:-1]