Implement yli.OrdinalLogit as preferred model for ordinal logistic regression
OrdinalLogit uses a parameterisation where the cutoff terms are directly incorporated
This commit is contained in:
parent
f8e56d96b1
commit
0dab62ad0a
@ -19,7 +19,7 @@ from .config import config
|
|||||||
from .descriptives import auto_descriptives
|
from .descriptives import auto_descriptives
|
||||||
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
||||||
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
||||||
from .regress import PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
|
from .regress import OrdinalLogit, PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
|
||||||
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind
|
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind
|
||||||
|
|
||||||
def reload_me():
|
def reload_me():
|
||||||
|
@ -18,10 +18,10 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import patsy
|
import patsy
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
import statsmodels
|
from scipy.special import expit
|
||||||
|
import statsmodels, statsmodels.miscmodels.ordinal_model
|
||||||
import statsmodels.api as sm
|
import statsmodels.api as sm
|
||||||
from statsmodels.iolib.table import SimpleTable
|
from statsmodels.iolib.table import SimpleTable
|
||||||
from statsmodels.miscmodels.ordinal_model import OrderedModel
|
|
||||||
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
@ -334,7 +334,8 @@ class RegressionResult:
|
|||||||
out += '<tr><th>{}</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term_name)
|
out += '<tr><th>{}</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term_name)
|
||||||
|
|
||||||
# Render reference category
|
# Render reference category
|
||||||
out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term.ref_category)
|
if term.ref_category is not None:
|
||||||
|
out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term.ref_category)
|
||||||
|
|
||||||
# Loop over terms
|
# Loop over terms
|
||||||
for sub_term_name, sub_term in term.categories.items():
|
for sub_term_name, sub_term in term.categories.items():
|
||||||
@ -401,7 +402,8 @@ class RegressionResult:
|
|||||||
table_data.append([term_name + ' ', '', '', '', '', ''])
|
table_data.append([term_name + ' ', '', '', '', '', ''])
|
||||||
|
|
||||||
# Render reference category
|
# Render reference category
|
||||||
table_data.append(['{} '.format(term.ref_category), 'Ref.', '', '', '', ''])
|
if term.ref_category is not None:
|
||||||
|
table_data.append(['{} '.format(term.ref_category), 'Ref.', '', '', '', ''])
|
||||||
|
|
||||||
# Loop over terms
|
# Loop over terms
|
||||||
for sub_term_name, sub_term in term.categories.items():
|
for sub_term_name, sub_term in term.categories.items():
|
||||||
@ -546,7 +548,7 @@ def regress(
|
|||||||
if exp is None:
|
if exp is None:
|
||||||
if model_class in (sm.Logit, sm.Poisson, PenalisedLogit):
|
if model_class in (sm.Logit, sm.Poisson, PenalisedLogit):
|
||||||
exp = True
|
exp = True
|
||||||
elif model_class is OrderedModel and model_kwargs.get('distr', 'probit') == 'logit':
|
elif model_class is OrdinalLogit:
|
||||||
exp = True
|
exp = True
|
||||||
else:
|
else:
|
||||||
exp = False
|
exp = False
|
||||||
@ -568,7 +570,7 @@ def regress(
|
|||||||
else:
|
else:
|
||||||
dmatrices = _dmatrices
|
dmatrices = _dmatrices
|
||||||
|
|
||||||
if model_class is OrderedModel:
|
if model_class is OrdinalLogit:
|
||||||
# Drop explicit intercept term
|
# Drop explicit intercept term
|
||||||
# FIXME: Check before dropping
|
# FIXME: Check before dropping
|
||||||
dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:])
|
dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:])
|
||||||
@ -604,9 +606,11 @@ def regress(
|
|||||||
# Intercept term (single term)
|
# Intercept term (single term)
|
||||||
term = '(Intercept)'
|
term = '(Intercept)'
|
||||||
terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name])
|
terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name])
|
||||||
elif model_class is OrderedModel and '/' in raw_name:
|
elif model_class is OrdinalLogit and '/' in raw_name:
|
||||||
# Ignore ordinal regression intercepts
|
# Group ordinal regression cutoffs
|
||||||
pass
|
if '(Cutoffs)' not in terms:
|
||||||
|
terms['(Cutoffs)'] = CategoricalTerm({}, None)
|
||||||
|
terms['(Cutoffs)'].categories[raw_name] = SingleTerm(raw_name, beta, pvalues[raw_name])
|
||||||
else:
|
else:
|
||||||
# Parse if required
|
# Parse if required
|
||||||
factor, column, contrast = parse_patsy_term(formula, df, raw_name)
|
factor, column, contrast = parse_patsy_term(formula, df, raw_name)
|
||||||
@ -628,12 +632,6 @@ def regress(
|
|||||||
# Single term
|
# Single term
|
||||||
terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
|
terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
|
||||||
|
|
||||||
# Handle ordinal regression intercepts
|
|
||||||
#if model_class is OrderedModel:
|
|
||||||
# intercept_names = [raw_name.split('/')[0] for raw_name in model.exog_names if '/' in raw_name]
|
|
||||||
# intercepts = model.transform_threshold_params(result._results.params[-len(intercept_names):])
|
|
||||||
# print(intercepts)
|
|
||||||
|
|
||||||
# Fit null model (for llnull)
|
# Fit null model (for llnull)
|
||||||
if hasattr(result, 'llnull'):
|
if hasattr(result, 'llnull'):
|
||||||
llnull = result.llnull
|
llnull = result.llnull
|
||||||
@ -664,10 +662,6 @@ def regress(
|
|||||||
if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
|
if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
|
||||||
full_name = 'Robust {}'.format(full_name)
|
full_name = 'Robust {}'.format(full_name)
|
||||||
|
|
||||||
comments = []
|
|
||||||
if model_class is OrderedModel:
|
|
||||||
comments.append('Cutpoints are omitted from the table of model parameters.')
|
|
||||||
|
|
||||||
return RegressionResult(
|
return RegressionResult(
|
||||||
result,
|
result,
|
||||||
full_name, model_class.__name__, method_name,
|
full_name, model_class.__name__, method_name,
|
||||||
@ -675,7 +669,7 @@ def regress(
|
|||||||
terms,
|
terms,
|
||||||
result.llf, llnull,
|
result.llf, llnull,
|
||||||
getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
|
getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
|
||||||
comments,
|
[],
|
||||||
exp
|
exp
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -816,7 +810,7 @@ def logit_then_regress(model_class, df, dep, formula, *, nan_policy='warn', **kw
|
|||||||
|
|
||||||
class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
|
class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
|
||||||
"""
|
"""
|
||||||
statsmodel-compatible model for computing Firth penalised logistic regression
|
statsmodels-compatible model for computing Firth penalised logistic regression
|
||||||
|
|
||||||
Uses the R *logistf* library.
|
Uses the R *logistf* library.
|
||||||
|
|
||||||
@ -894,3 +888,28 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
|
|||||||
None # Set exp in regress()
|
None # Set exp in regress()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------
|
||||||
|
# Ordinal logistic regression (R/Stata parameterisation)
|
||||||
|
|
||||||
|
class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
|
||||||
|
"""
|
||||||
|
statsmodels-compatible model for computing ordinal logistic (or probit) regression
|
||||||
|
|
||||||
|
The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
|
||||||
|
The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
|
||||||
|
In this parameterisation, cutoff terms are represented directly in the model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, endog, exog, **kwargs):
|
||||||
|
if 'distr' not in kwargs:
|
||||||
|
kwargs['distr'] = 'logit'
|
||||||
|
|
||||||
|
super().__init__(endog, exog, **kwargs)
|
||||||
|
|
||||||
|
def transform_threshold_params(self, params):
|
||||||
|
th_params = params[-(self.k_levels - 1):]
|
||||||
|
thresh = np.concatenate(([-np.inf], th_params, [np.inf]))
|
||||||
|
return thresh
|
||||||
|
|
||||||
|
def transform_reverse_threshold_params(self, params):
|
||||||
|
return params[:-1]
|
||||||
|
Loading…
Reference in New Issue
Block a user