Implement yli.OrdinalLogit as preferred model for ordinal logistic regression

OrdinalLogit uses a parameterisation where the cutoff terms are directly incorporated
This commit is contained in:
RunasSudo 2022-12-02 20:19:08 +11:00
parent f8e56d96b1
commit 0dab62ad0a
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
2 changed files with 41 additions and 22 deletions

View File

@ -19,7 +19,7 @@ from .config import config
from .descriptives import auto_descriptives from .descriptives import auto_descriptives
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
from .regress import PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif from .regress import OrdinalLogit, PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind
def reload_me(): def reload_me():

View File

@ -18,10 +18,10 @@ import numpy as np
import pandas as pd import pandas as pd
import patsy import patsy
from scipy import stats from scipy import stats
import statsmodels from scipy.special import expit
import statsmodels, statsmodels.miscmodels.ordinal_model
import statsmodels.api as sm import statsmodels.api as sm
from statsmodels.iolib.table import SimpleTable from statsmodels.iolib.table import SimpleTable
from statsmodels.miscmodels.ordinal_model import OrderedModel
from statsmodels.stats.outliers_influence import variance_inflation_factor from statsmodels.stats.outliers_influence import variance_inflation_factor
from tqdm import tqdm from tqdm import tqdm
@ -334,7 +334,8 @@ class RegressionResult:
out += '<tr><th>{}</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term_name) out += '<tr><th>{}</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term_name)
# Render reference category # Render reference category
out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term.ref_category) if term.ref_category is not None:
out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term.ref_category)
# Loop over terms # Loop over terms
for sub_term_name, sub_term in term.categories.items(): for sub_term_name, sub_term in term.categories.items():
@ -401,7 +402,8 @@ class RegressionResult:
table_data.append([term_name + ' ', '', '', '', '', '']) table_data.append([term_name + ' ', '', '', '', '', ''])
# Render reference category # Render reference category
table_data.append(['{} '.format(term.ref_category), 'Ref.', '', '', '', '']) if term.ref_category is not None:
table_data.append(['{} '.format(term.ref_category), 'Ref.', '', '', '', ''])
# Loop over terms # Loop over terms
for sub_term_name, sub_term in term.categories.items(): for sub_term_name, sub_term in term.categories.items():
@ -546,7 +548,7 @@ def regress(
if exp is None: if exp is None:
if model_class in (sm.Logit, sm.Poisson, PenalisedLogit): if model_class in (sm.Logit, sm.Poisson, PenalisedLogit):
exp = True exp = True
elif model_class is OrderedModel and model_kwargs.get('distr', 'probit') == 'logit': elif model_class is OrdinalLogit:
exp = True exp = True
else: else:
exp = False exp = False
@ -568,7 +570,7 @@ def regress(
else: else:
dmatrices = _dmatrices dmatrices = _dmatrices
if model_class is OrderedModel: if model_class is OrdinalLogit:
# Drop explicit intercept term # Drop explicit intercept term
# FIXME: Check before dropping # FIXME: Check before dropping
dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:]) dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:])
@ -604,9 +606,11 @@ def regress(
# Intercept term (single term) # Intercept term (single term)
term = '(Intercept)' term = '(Intercept)'
terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name]) terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name])
elif model_class is OrderedModel and '/' in raw_name: elif model_class is OrdinalLogit and '/' in raw_name:
# Ignore ordinal regression intercepts # Group ordinal regression cutoffs
pass if '(Cutoffs)' not in terms:
terms['(Cutoffs)'] = CategoricalTerm({}, None)
terms['(Cutoffs)'].categories[raw_name] = SingleTerm(raw_name, beta, pvalues[raw_name])
else: else:
# Parse if required # Parse if required
factor, column, contrast = parse_patsy_term(formula, df, raw_name) factor, column, contrast = parse_patsy_term(formula, df, raw_name)
@ -628,12 +632,6 @@ def regress(
# Single term # Single term
terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name]) terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
# Handle ordinal regression intercepts
#if model_class is OrderedModel:
# intercept_names = [raw_name.split('/')[0] for raw_name in model.exog_names if '/' in raw_name]
# intercepts = model.transform_threshold_params(result._results.params[-len(intercept_names):])
# print(intercepts)
# Fit null model (for llnull) # Fit null model (for llnull)
if hasattr(result, 'llnull'): if hasattr(result, 'llnull'):
llnull = result.llnull llnull = result.llnull
@ -664,10 +662,6 @@ def regress(
if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust': if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
full_name = 'Robust {}'.format(full_name) full_name = 'Robust {}'.format(full_name)
comments = []
if model_class is OrderedModel:
comments.append('Cutpoints are omitted from the table of model parameters.')
return RegressionResult( return RegressionResult(
result, result,
full_name, model_class.__name__, method_name, full_name, model_class.__name__, method_name,
@ -675,7 +669,7 @@ def regress(
terms, terms,
result.llf, llnull, result.llf, llnull,
getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None), getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
comments, [],
exp exp
) )
@ -816,7 +810,7 @@ def logit_then_regress(model_class, df, dep, formula, *, nan_policy='warn', **kw
class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel): class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
""" """
statsmodel-compatible model for computing Firth penalised logistic regression statsmodels-compatible model for computing Firth penalised logistic regression
Uses the R *logistf* library. Uses the R *logistf* library.
@ -894,3 +888,28 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
None # Set exp in regress() None # Set exp in regress()
) )
# ------------------------------------------------------
# Ordinal logistic regression (R/Stata parameterisation)
class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
"""
statsmodels-compatible model for computing ordinal logistic (or probit) regression
The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
In this parameterisation, cutoff terms are represented directly in the model.
"""
def __init__(self, endog, exog, **kwargs):
if 'distr' not in kwargs:
kwargs['distr'] = 'logit'
super().__init__(endog, exog, **kwargs)
def transform_threshold_params(self, params):
th_params = params[-(self.k_levels - 1):]
thresh = np.concatenate(([-np.inf], th_params, [np.inf]))
return thresh
def transform_reverse_threshold_params(self, params):
return params[:-1]