Add unit test for OrdinalLogit
This commit is contained in:
parent
2135796d85
commit
ea2d04ada1
BIN
tests/data/ucla_ologit.dta
Normal file
BIN
tests/data/ucla_ologit.dta
Normal file
Binary file not shown.
@ -33,7 +33,7 @@ def test_beta_ratio_vs_jsaffer_pdf():
|
||||
y = dist.pdf(x)
|
||||
|
||||
# Compare with expected values from jsaffer implementation
|
||||
expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
|
||||
expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
|
||||
assert y == approx(expected)
|
||||
|
||||
def test_beta_ratio_vs_jsaffer_cdf():
|
||||
@ -48,7 +48,7 @@ def test_beta_ratio_vs_jsaffer_cdf():
|
||||
y = dist.cdf(x)
|
||||
|
||||
# Compare with expected values from jsaffer implementation
|
||||
expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
|
||||
expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
|
||||
assert y == approx(expected)
|
||||
|
||||
def _gen_beta_ratio_vs_jsaffer():
|
||||
@ -62,7 +62,7 @@ def _gen_beta_ratio_vs_jsaffer():
|
||||
y1 = np.vectorize(lambda w: float(beta_quotient_distribution.pdf_bb_ratio(a1, a2, b1, b2, w)))(x)
|
||||
y2 = np.vectorize(lambda w: float(beta_quotient_distribution.cdf_bb_ratio(a1, a2, b1, b2, w)))(x)
|
||||
|
||||
np.save('tests/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)
|
||||
np.save('tests/data/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)
|
||||
|
||||
def test_beta_ratio_mean_vs_empirical():
|
||||
"""Compare beta_ratio.mean (via beta_ratio._munp) with empirical mean"""
|
||||
|
64
tests/test_ordinallogit.py
Normal file
64
tests/test_ordinallogit.py
Normal file
@ -0,0 +1,64 @@
|
||||
# scipy-yli: Helpful SciPy utilities and recipes
|
||||
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
from pytest import approx
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import yli
|
||||
|
||||
def test_ordinallogit_ucla():
|
||||
"""Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
|
||||
|
||||
df = pd.read_stata('tests/data/ucla_ologit.dta')
|
||||
|
||||
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
|
||||
|
||||
assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001)
|
||||
assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001)
|
||||
assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001)
|
||||
|
||||
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001)
|
||||
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001)
|
||||
|
||||
# Confidence intervals compared with Stata 16
|
||||
# . ologit apply pared public gpa
|
||||
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001)
|
||||
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001)
|
||||
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001)
|
||||
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001)
|
||||
|
||||
expected_summary = ''' Ordinal Logistic Regression Results
|
||||
===============================================================
|
||||
Dep. Variable: apply | No. Observations: 400
|
||||
Model: OrdinalLogit | Df. Model: 5
|
||||
Method: Maximum Likelihood | Df. Residuals: 395
|
||||
Date: {0:%Y-%m-%d} | Pseudo R²: 0.03
|
||||
Time: {0:%H:%M:%S} | LL-Model: -358.51
|
||||
Std. Errors: Non-Robust | LL-Null: -370.60
|
||||
| p (LR): <0.001*
|
||||
===============================================================
|
||||
β (95% CI) p
|
||||
------------------------------------------------------------
|
||||
pared 1.05 (0.53 - 1.57) <0.001*
|
||||
public -0.06 (-0.64 - 0.53) 0.84
|
||||
gpa 0.62 (0.10 - 1.13) 0.02*
|
||||
(Cutoffs)
|
||||
unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005*
|
||||
somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
|
||||
------------------------------------------------------------'''.format(result.fitted_dt)
|
||||
|
||||
assert result.summary() == expected_summary
|
@ -858,6 +858,8 @@ def regress(
|
||||
# Get full name to display
|
||||
if model_class is sm.Logit:
|
||||
full_name = 'Logistic Regression'
|
||||
elif model_class is OrdinalLogit:
|
||||
full_name = 'Ordinal Logistic Regression'
|
||||
else:
|
||||
full_name = '{} Regression'.format(model_class.__name__)
|
||||
if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
|
||||
@ -1004,6 +1006,39 @@ class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
|
||||
The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
|
||||
The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
|
||||
In this parameterisation, cutoff terms are represented directly in the model.
|
||||
|
||||
**Example:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
df = pd.DataFrame(...)
|
||||
yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
Ordinal Logistic Regression Results
|
||||
===============================================================
|
||||
Dep. Variable: apply | No. Observations: 400
|
||||
Model: OrdinalLogit | Df. Model: 5
|
||||
Method: Maximum Likelihood | Df. Residuals: 395
|
||||
Date: 2022-12-02 | Pseudo R²: 0.03
|
||||
Time: 21:30:38 | LL-Model: -358.51
|
||||
Std. Errors: Non-Robust | LL-Null: -370.60
|
||||
| p (LR): <0.001*
|
||||
===============================================================
|
||||
β (95% CI) p
|
||||
------------------------------------------------------------
|
||||
pared 1.05 (0.53 - 1.57) <0.001*
|
||||
public -0.06 (-0.64 - 0.53) 0.84
|
||||
gpa 0.62 (0.10 - 1.13) 0.02*
|
||||
(Cutoffs)
|
||||
unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005*
|
||||
somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
|
||||
------------------------------------------------------------
|
||||
|
||||
The output summarises the result of the regression.
|
||||
The parameters shown under "(Cutoffs)" are the cutoff values in the latent variable parameterisation of ordinal regression.
|
||||
Note that because `exp=False` was passed, the parameter estimates are not automatically exponentiated.
|
||||
"""
|
||||
|
||||
def __init__(self, endog, exog, **kwargs):
|
||||
|
Loading…
Reference in New Issue
Block a user