diff --git a/yli/__init__.py b/yli/__init__.py
index baa73ad..fff4bac 100644
--- a/yli/__init__.py
+++ b/yli/__init__.py
@@ -19,7 +19,7 @@ from .config import config
from .descriptives import auto_descriptives
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
-from .regress import PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
+from .regress import OrdinalLogit, PenalisedLogit, logit_then_regress, regress, regress_bootstrap, vif
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind
def reload_me():
diff --git a/yli/regress.py b/yli/regress.py
index 1769381..7153f61 100644
--- a/yli/regress.py
+++ b/yli/regress.py
@@ -18,10 +18,10 @@ import numpy as np
import pandas as pd
import patsy
from scipy import stats
-import statsmodels
+from scipy.special import expit
+import statsmodels, statsmodels.miscmodels.ordinal_model
import statsmodels.api as sm
from statsmodels.iolib.table import SimpleTable
-from statsmodels.miscmodels.ordinal_model import OrderedModel
from statsmodels.stats.outliers_influence import variance_inflation_factor
from tqdm import tqdm
@@ -334,7 +334,8 @@ class RegressionResult:
out += '
{} | | | | | |
'.format(term_name)
# Render reference category
- out += '{} | Ref. | | | | |
'.format(term.ref_category)
+ if term.ref_category is not None:
+ out += '{} | Ref. | | | | |
'.format(term.ref_category)
# Loop over terms
for sub_term_name, sub_term in term.categories.items():
@@ -401,7 +402,8 @@ class RegressionResult:
table_data.append([term_name + ' ', '', '', '', '', ''])
# Render reference category
- table_data.append(['{} '.format(term.ref_category), 'Ref.', '', '', '', ''])
+ if term.ref_category is not None:
+ table_data.append(['{} '.format(term.ref_category), 'Ref.', '', '', '', ''])
# Loop over terms
for sub_term_name, sub_term in term.categories.items():
@@ -546,7 +548,7 @@ def regress(
if exp is None:
if model_class in (sm.Logit, sm.Poisson, PenalisedLogit):
exp = True
- elif model_class is OrderedModel and model_kwargs.get('distr', 'probit') == 'logit':
+ elif model_class is OrdinalLogit:
exp = True
else:
exp = False
@@ -568,7 +570,7 @@ def regress(
else:
dmatrices = _dmatrices
- if model_class is OrderedModel:
+ if model_class is OrdinalLogit:
# Drop explicit intercept term
# FIXME: Check before dropping
dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:])
@@ -604,9 +606,11 @@ def regress(
# Intercept term (single term)
term = '(Intercept)'
terms[term] = SingleTerm(raw_name, beta, pvalues[raw_name])
- elif model_class is OrderedModel and '/' in raw_name:
- # Ignore ordinal regression intercepts
- pass
+ elif model_class is OrdinalLogit and '/' in raw_name:
+ # Group ordinal regression cutoffs
+ if '(Cutoffs)' not in terms:
+ terms['(Cutoffs)'] = CategoricalTerm({}, None)
+ terms['(Cutoffs)'].categories[raw_name] = SingleTerm(raw_name, beta, pvalues[raw_name])
else:
# Parse if required
factor, column, contrast = parse_patsy_term(formula, df, raw_name)
@@ -628,12 +632,6 @@ def regress(
# Single term
terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
- # Handle ordinal regression intercepts
- #if model_class is OrderedModel:
- # intercept_names = [raw_name.split('/')[0] for raw_name in model.exog_names if '/' in raw_name]
- # intercepts = model.transform_threshold_params(result._results.params[-len(intercept_names):])
- # print(intercepts)
-
# Fit null model (for llnull)
if hasattr(result, 'llnull'):
llnull = result.llnull
@@ -664,10 +662,6 @@ def regress(
if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
full_name = 'Robust {}'.format(full_name)
- comments = []
- if model_class is OrderedModel:
- comments.append('Cutpoints are omitted from the table of model parameters.')
-
return RegressionResult(
result,
full_name, model_class.__name__, method_name,
@@ -675,7 +669,7 @@ def regress(
terms,
result.llf, llnull,
getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
- comments,
+ [],
exp
)
@@ -816,7 +810,7 @@ def logit_then_regress(model_class, df, dep, formula, *, nan_policy='warn', **kw
class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
"""
- statsmodel-compatible model for computing Firth penalised logistic regression
+ statsmodels-compatible model for computing Firth penalised logistic regression
Uses the R *logistf* library.
@@ -894,3 +888,28 @@ class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
None # Set exp in regress()
)
+# ------------------------------------------------------
+# Ordinal logistic regression (R/Stata parameterisation)
+
+class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
+ """
+ statsmodels-compatible model for computing ordinal logistic (or probit) regression
+
+ The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
+ The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
+ In this parameterisation, cutoff terms are represented directly in the model.
+ """
+
+ def __init__(self, endog, exog, **kwargs):
+ if 'distr' not in kwargs:
+ kwargs['distr'] = 'logit'
+
+ super().__init__(endog, exog, **kwargs)
+
+ def transform_threshold_params(self, params):
+ th_params = params[-(self.k_levels - 1):]
+ thresh = np.concatenate(([-np.inf], th_params, [np.inf]))
+ return thresh
+
+ def transform_reverse_threshold_params(self, params):
+ return params[:-1]