From ea2d04ada18670aaf0181152aff99ca46747f09e Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Fri, 2 Dec 2022 21:43:05 +1100 Subject: [PATCH] Add unit test for OrdinalLogit --- tests/{ => data}/beta_ratio_vs_jsaffer.npy | Bin tests/data/ucla_ologit.dta | Bin 0 -> 3821 bytes tests/test_beta_ratio.py | 6 +- tests/test_ordinallogit.py | 64 +++++++++++++++++++++ yli/regress.py | 35 +++++++++++ 5 files changed, 102 insertions(+), 3 deletions(-) rename tests/{ => data}/beta_ratio_vs_jsaffer.npy (100%) create mode 100644 tests/data/ucla_ologit.dta create mode 100644 tests/test_ordinallogit.py diff --git a/tests/beta_ratio_vs_jsaffer.npy b/tests/data/beta_ratio_vs_jsaffer.npy similarity index 100% rename from tests/beta_ratio_vs_jsaffer.npy rename to tests/data/beta_ratio_vs_jsaffer.npy diff --git a/tests/data/ucla_ologit.dta b/tests/data/ucla_ologit.dta new file mode 100644 index 0000000000000000000000000000000000000000..14037e25d761f1b1964ee50c2763d3adc2e35596 GIT binary patch literal 3821 zcmeH~OKg-?6vyvZkjN|u>ZpLEt-)&R1E-8tK!*+@%z!UiAc%}$DVD|uQzPm&>KuB?(CTx z;rDYthZ}lE_GNpz{ou{v_C2ZYRC{-7|64ub;lqc24wOoxx1?V8R~ooBbTOoI-RoJD zZeAE284T+i43`E%DwWFhthd6Nyra8kcWOB1JN7jn!+OtuQ>-oXn5RcqdW_kc>R&tj zV@B83{{QK}o`IGS>eI)wQ50rMA7?`pt#sbbYC043G^D)vVm5@RGTRXgcR~7VyCu{w zi=QSBzp2R)bd}kjY)cS#3$CsFnslKtelaV%%IskgiRBPNzLCH(lQ@m#)Z`g)`lVQN zVg4gbRyub=YOOsYGSYq_s^i*Mr)LVHY72CrR!*B>~v0lhhUnjzJnh8+FzCf5sSn6;H(P~-~^%LkgtsLf->AKbK$v1Yu6^k!x zTTN8~6IxM0_#jkfPhrByh%j*?%}(A9M^!vP3#oY&=~zsO^vJBs=@*+E@+lK<%yfDX z&K$^@9h0g^=v-qw2^Ta7>H-R<^3y%qmSzwsR4za=iCkRw zZejj)2!~$Nl^37F-P;uK5SK~3BNBNcXG&KPhF}{p?|L4~>|hnEL)^6DrY4WlFFx-R z$t{?$m~y?;w-=qyJ8&T#=`p_%GMI0CjtNWKhzbYeqlf^2vYcD)M9BP!qoow5wG;=l zGWyxQ*$b6^EN4n@;Fqj&FlDeG0_25Aqh`QD!DYG9IZ66l_Ucz{fYR%wiXz?$yh^WI-`%(BREkcmYdd9$wq1j$nC$LOXJW!@F;k$7&8 z;^!mdCf-ZrS~|_jh>B|U%Qs#{XeUB<6^Dg+!+A&3smbjK+sbDlO;EKV z!Cj3BsCsQ>;^1hg$Wcz9^B_o;?Jx?VQ8bNl_ZVC#7NA63P+nBZ6UYZUNBes<%5 zyMjFSqd1D%klkh))UZ|T7Nv}bu=JzY{TH{Qgj`@EfR##R)4}f6XYtKt1Eg&M4_R_| zhKLtoTF{!nl5)noYvcuHmW{Bs^ z8lwOD0Z22Tp)mLXQRX7MG|$11VIVhWNbu7#b3JIpV8aDe%jUk7!rF*-|&c{BJ%erwZN()mntDQXo@D(nlUwUK+U3?)*1H J*KVyo{{@|1iX{L5 literal 0 HcmV?d00001 diff --git a/tests/test_beta_ratio.py b/tests/test_beta_ratio.py index 8391f11..bf1bbeb 100644 --- a/tests/test_beta_ratio.py +++ b/tests/test_beta_ratio.py @@ -33,7 +33,7 @@ def test_beta_ratio_vs_jsaffer_pdf(): y = dist.pdf(x) # Compare with expected values from jsaffer implementation - expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0] + expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0] assert y == approx(expected) def test_beta_ratio_vs_jsaffer_cdf(): @@ -48,7 +48,7 @@ def test_beta_ratio_vs_jsaffer_cdf(): y = dist.cdf(x) # Compare with expected values from jsaffer implementation - expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1] + expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1] assert y == approx(expected) def _gen_beta_ratio_vs_jsaffer(): @@ -62,7 +62,7 @@ def _gen_beta_ratio_vs_jsaffer(): y1 = np.vectorize(lambda w: float(beta_quotient_distribution.pdf_bb_ratio(a1, a2, b1, b2, w)))(x) y2 = np.vectorize(lambda w: float(beta_quotient_distribution.cdf_bb_ratio(a1, a2, b1, b2, w)))(x) - np.save('tests/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False) + np.save('tests/data/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False) def test_beta_ratio_mean_vs_empirical(): """Compare beta_ratio.mean (via beta_ratio._munp) with empirical mean""" diff --git a/tests/test_ordinallogit.py b/tests/test_ordinallogit.py new file mode 100644 index 0000000..fcc6bb3 --- /dev/null +++ b/tests/test_ordinallogit.py @@ -0,0 +1,64 @@ +# scipy-yli: Helpful SciPy utilities and recipes +# Copyright © 2022 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from pytest import approx + +import pandas as pd + +import yli + +def test_ordinallogit_ucla(): + """Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/""" + + df = pd.read_stata('tests/data/ucla_ologit.dta') + + result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False) + + assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001) + assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001) + assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001) + + assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001) + assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001) + + # Confidence intervals compared with Stata 16 + # . ologit apply pared public gpa + assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001) + assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001) + assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001) + assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001) + + expected_summary = ''' Ordinal Logistic Regression Results +=============================================================== +Dep. Variable: apply | No. Observations: 400 + Model: OrdinalLogit | Df. Model: 5 + Method: Maximum Likelihood | Df. Residuals: 395 + Date: {0:%Y-%m-%d} | Pseudo R²: 0.03 + Time: {0:%H:%M:%S} | LL-Model: -358.51 + Std. Errors: Non-Robust | LL-Null: -370.60 + | p (LR): <0.001* +=============================================================== + β (95% CI) p +------------------------------------------------------------ + pared 1.05 (0.53 - 1.57) <0.001* + public -0.06 (-0.64 - 0.53) 0.84 + gpa 0.62 (0.10 - 1.13) 0.02* + (Cutoffs) + unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005* +somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001* +------------------------------------------------------------'''.format(result.fitted_dt) + + assert result.summary() == expected_summary diff --git a/yli/regress.py b/yli/regress.py index 781f7b3..2ed70fe 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -858,6 +858,8 @@ def regress( # Get full name to display if model_class is sm.Logit: full_name = 'Logistic Regression' + elif model_class is OrdinalLogit: + full_name = 'Ordinal Logistic Regression' else: full_name = '{} Regression'.format(model_class.__name__) if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust': @@ -1004,6 +1006,39 @@ class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel): The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata. The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs. In this parameterisation, cutoff terms are represented directly in the model. + + **Example:** + + .. code-block:: + + df = pd.DataFrame(...) + yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False) + + .. code-block:: text + + Ordinal Logistic Regression Results + =============================================================== + Dep. Variable: apply | No. Observations: 400 + Model: OrdinalLogit | Df. Model: 5 + Method: Maximum Likelihood | Df. Residuals: 395 + Date: 2022-12-02 | Pseudo R²: 0.03 + Time: 21:30:38 | LL-Model: -358.51 + Std. Errors: Non-Robust | LL-Null: -370.60 + | p (LR): <0.001* + =============================================================== + β (95% CI) p + ------------------------------------------------------------ + pared 1.05 (0.53 - 1.57) <0.001* + public -0.06 (-0.64 - 0.53) 0.84 + gpa 0.62 (0.10 - 1.13) 0.02* + (Cutoffs) + unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005* + somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001* + ------------------------------------------------------------ + + The output summarises the result of the regression. + The parameters shown under "(Cutoffs)" are the cutoff values in the latent variable parameterisation of ordinal regression. + Note that because `exp=False` was passed, the parameter estimates are not automatically exponentiated. """ def __init__(self, endog, exog, **kwargs):