Add unit test for OrdinalLogit

This commit is contained in:
RunasSudo 2022-12-02 21:43:05 +11:00
parent 2135796d85
commit ea2d04ada1
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
5 changed files with 102 additions and 3 deletions

BIN
tests/data/ucla_ologit.dta Normal file

Binary file not shown.

View File

@ -33,7 +33,7 @@ def test_beta_ratio_vs_jsaffer_pdf():
y = dist.pdf(x) y = dist.pdf(x)
# Compare with expected values from jsaffer implementation # Compare with expected values from jsaffer implementation
expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0] expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
assert y == approx(expected) assert y == approx(expected)
def test_beta_ratio_vs_jsaffer_cdf(): def test_beta_ratio_vs_jsaffer_cdf():
@ -48,7 +48,7 @@ def test_beta_ratio_vs_jsaffer_cdf():
y = dist.cdf(x) y = dist.cdf(x)
# Compare with expected values from jsaffer implementation # Compare with expected values from jsaffer implementation
expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1] expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
assert y == approx(expected) assert y == approx(expected)
def _gen_beta_ratio_vs_jsaffer(): def _gen_beta_ratio_vs_jsaffer():
@ -62,7 +62,7 @@ def _gen_beta_ratio_vs_jsaffer():
y1 = np.vectorize(lambda w: float(beta_quotient_distribution.pdf_bb_ratio(a1, a2, b1, b2, w)))(x) y1 = np.vectorize(lambda w: float(beta_quotient_distribution.pdf_bb_ratio(a1, a2, b1, b2, w)))(x)
y2 = np.vectorize(lambda w: float(beta_quotient_distribution.cdf_bb_ratio(a1, a2, b1, b2, w)))(x) y2 = np.vectorize(lambda w: float(beta_quotient_distribution.cdf_bb_ratio(a1, a2, b1, b2, w)))(x)
np.save('tests/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False) np.save('tests/data/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)
def test_beta_ratio_mean_vs_empirical(): def test_beta_ratio_mean_vs_empirical():
"""Compare beta_ratio.mean (via beta_ratio._munp) with empirical mean""" """Compare beta_ratio.mean (via beta_ratio._munp) with empirical mean"""

View File

@ -0,0 +1,64 @@
# scipy-yli: Helpful SciPy utilities and recipes
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from pytest import approx
import pandas as pd
import yli
def test_ordinallogit_ucla():
"""Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
df = pd.read_stata('tests/data/ucla_ologit.dta')
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001)
assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001)
assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001)
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001)
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001)
# Confidence intervals compared with Stata 16
# . ologit apply pared public gpa
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001)
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001)
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001)
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001)
expected_summary = ''' Ordinal Logistic Regression Results
===============================================================
Dep. Variable: apply | No. Observations: 400
Model: OrdinalLogit | Df. Model: 5
Method: Maximum Likelihood | Df. Residuals: 395
Date: {0:%Y-%m-%d} | Pseudo : 0.03
Time: {0:%H:%M:%S} | LL-Model: -358.51
Std. Errors: Non-Robust | LL-Null: -370.60
| p (LR): <0.001*
===============================================================
β (95% CI) p
------------------------------------------------------------
pared 1.05 (0.53 - 1.57) <0.001*
public -0.06 (-0.64 - 0.53) 0.84
gpa 0.62 (0.10 - 1.13) 0.02*
(Cutoffs)
unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005*
somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
------------------------------------------------------------'''.format(result.fitted_dt)
assert result.summary() == expected_summary

View File

@ -858,6 +858,8 @@ def regress(
# Get full name to display # Get full name to display
if model_class is sm.Logit: if model_class is sm.Logit:
full_name = 'Logistic Regression' full_name = 'Logistic Regression'
elif model_class is OrdinalLogit:
full_name = 'Ordinal Logistic Regression'
else: else:
full_name = '{} Regression'.format(model_class.__name__) full_name = '{} Regression'.format(model_class.__name__)
if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust': if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
@ -1004,6 +1006,39 @@ class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata. The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs. The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
In this parameterisation, cutoff terms are represented directly in the model. In this parameterisation, cutoff terms are represented directly in the model.
**Example:**
.. code-block::
df = pd.DataFrame(...)
yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
.. code-block:: text
Ordinal Logistic Regression Results
===============================================================
Dep. Variable: apply | No. Observations: 400
Model: OrdinalLogit | Df. Model: 5
Method: Maximum Likelihood | Df. Residuals: 395
Date: 2022-12-02 | Pseudo : 0.03
Time: 21:30:38 | LL-Model: -358.51
Std. Errors: Non-Robust | LL-Null: -370.60
| p (LR): <0.001*
===============================================================
β (95% CI) p
------------------------------------------------------------
pared 1.05 (0.53 - 1.57) <0.001*
public -0.06 (-0.64 - 0.53) 0.84
gpa 0.62 (0.10 - 1.13) 0.02*
(Cutoffs)
unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005*
somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
------------------------------------------------------------
The output summarises the result of the regression.
The parameters shown under "(Cutoffs)" are the cutoff values in the latent variable parameterisation of ordinal regression.
Note that because `exp=False` was passed, the parameter estimates are not automatically exponentiated.
""" """
def __init__(self, endog, exog, **kwargs): def __init__(self, endog, exog, **kwargs):