From ea2d04ada18670aaf0181152aff99ca46747f09e Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Fri, 2 Dec 2022 21:43:05 +1100
Subject: [PATCH] Add unit test for OrdinalLogit

---
 tests/{ => data}/beta_ratio_vs_jsaffer.npy | Bin
 tests/data/ucla_ologit.dta                 | Bin 0 -> 3821 bytes
 tests/test_beta_ratio.py                   |   6 +-
 tests/test_ordinallogit.py                 |  64 +++++++++++++++++++++
 yli/regress.py                             |  35 +++++++++++
 5 files changed, 102 insertions(+), 3 deletions(-)
 rename tests/{ => data}/beta_ratio_vs_jsaffer.npy (100%)
 create mode 100644 tests/data/ucla_ologit.dta
 create mode 100644 tests/test_ordinallogit.py
diff --git a/tests/beta_ratio_vs_jsaffer.npy b/tests/data/beta_ratio_vs_jsaffer.npy
similarity index 100%
rename from tests/beta_ratio_vs_jsaffer.npy
rename to tests/data/beta_ratio_vs_jsaffer.npy
diff --git a/tests/data/ucla_ologit.dta b/tests/data/ucla_ologit.dta
new file mode 100644
index 0000000000000000000000000000000000000000..14037e25d761f1b1964ee50c2763d3adc2e35596
GIT binary patch
literal 3821
zcmeH~OKg-?6vyvZkjN|u>ZpLEt-)&R1E-8tK!*+@%z!UiAc%}$DV<U&IOU<CW#NKJ
z+zl?<rR{pcau%-IxNGBbRxW1UZis0kAsYX`^PNs{QH(nmPST$5yzV{sJpT9luC+vA
zW4Idy{dz+v-QTnk8h+h$@4=l7KRvh;MKeF#4%;Lu-QSAU_2>D|uQzPm&>KuB?(CTx
z;rDYthZ}lE_GNpz{ou{v_C2ZYRC{-7|64ub;lqc24wOoxx1?V8R~ooBbTOoI-RoJD
zZeAE284T+i43`E%DwWFhthd6Nyra8kcWOB1JN7jn!+OtuQ>-oXn5RcqdW_kc>R&tj
zV@B83{{QK}o`IGS>eI)wQ50rMA7?`pt#sbbYC043G^D)vVm5@RGTRXgcR~7VyCu{w
zi=QSBzp2R)bd}kjY)cS#3$CsFnslKtelaV%%IskgiRBPNzLCH(lQ@m#)Z`g)`lVQN
zVg4gbRyub=YOOsYGSYq_s^<DInOn}|m#m&<l|p3?mQ$1a5H9t7hQq@AdkFh$2NXn<
zs9%TcD!d6vR$qY>i*Mr)LVHY72<y{ZA?3x0jW7{6k=%{R-11(86UkPDh06PoV(|#1
zG5!*yGW!IUE1kCc!h8W?eL6*)Go=iit-_iIWEqWR#&M|Tx?{_BNLS%g2q}=Ryl6UG
zlifc-M<NBMWLGBn#xV(_{+j8=GW*5JuMc5k{3KkmIt)ptcR|=)?n`}_#3`)@A;n@5
z9jR-HbEeaJ87Gog(RCHtAS5Jc-{8rjQLL9#M$ZK0kT9J-M1aj)f5)~ITxXAu8P%tC
zlxAd!`XRVr1y_K1RvF72V-+$lLIx%DYN8XDZ|p~B35ldT&kl+DE-IF+<{(p(U6|CT
zt>CrR!*B>~v0lhhUnjzJnh8+FzCf5sSn6;H(P~-~^%LkgtsLf->AKbK$v1Yu6^k!x
zTTN8~6IxM0_#jkfPhrByh%j*?%}(A9M^!vP3#oY&=~zsO^vJBs=@*+E@+lK<%yfDX
z&K$^@9h0g^=v-qw2^Ta7>H-R<^3y%qmSzwsR4za=iCkRw<FZ^G(WSm8(b1S9UJeo>
zZejj)2!~$Nl^37F-P;uK5SK~3BNBNcXG&KPhF}{p?|L4~>|hnEL)^6DrY4WlFFx-R
z$t{?$m~y?;w-=qyJ8&T#=`p_%GMI0CjtNWKhzbYeqlf^2vYcD)M9BP!qoow5wG;=l
zGWyxQ*$b6^EN4n@;Fqj&FlDeG0_25Aqh`QD!DYG9IZ6<T`XDCN+yEv3nL;AvI*OxW
z42PBi>6l_Ucz{fYR%wiXz?$yh^WI-`%(BREkcmYdd9$wq1j$nC$LOXJW!@F;k$7&8
z;^!mdCf-ZrS~|<TV$%5PUw>_jh>B|U%Qs#{XeUB<6^Dg+!+A&3smbjK+sbDlO;EKV
z!Cj3BsCsQ>;^1hg$Wcz9^B_o;?Jx?VQ8bNl_ZVC#7NA63P+nBZ<B>6UYZUNBes<%5
zyMjFSqd1D%klkh))UZ|T7Nv}bu=JzY{TH{Qgj`@EfR##R)4}f6XYtKt1Eg&M4_R_|
zh<IzzOA65E)}T3_Vma9L5(&$@iQ2*6F*2S0jTh~>KLtoTF{!nl5)noYvcuHmW{Bs^
z8<RsTJznYZ;z3Nj(tcXs!7rk=@(Y+4PQ<}$BL{Krcj^3K`Nmgpynu*|Wo!)n;<Gr(
z**4<LB-D{n)K>lwOD0Z22Tp)mLXQRX7MG|$11VIVhWNbu7#b3JIpV8aDe%jUk<rrL
zb>7!rF*-|&c{BJ%erwZN()mntDQXo@D<xK?4Ll-c*WRV<_9ZV&cwD#`O%;p2ec~W&
zioNSRgE2S?87wdHiS!1cUXq)jQd#1(TEySTsDtfbNaQDKE5FMQ#o}`iztO!8)!dkl
zqOm{Ei5!dpPH{7(KYFz^?&1u{KsUh?c>(<G)lWJOEmE0%7Ggu$><N2!W0lMJf!8vz
z67&9Yj&OZM{WfF=g~asBfs05cHh8@LoMavlYL$igQHbB+UIz{)2SD|(l#hPEjQ?`?
z_VkbX;`FakzC6FzSLg=Wb?A@ZX8moqC&eJDrNYgtqa&Ax^ksSd>nlUwUK+U3?)*1H
J*KVyo{{@|1iX{L5

literal 0
HcmV?d00001

diff --git a/tests/test_beta_ratio.py b/tests/test_beta_ratio.py
index 8391f11..bf1bbeb 100644
--- a/tests/test_beta_ratio.py
+++ b/tests/test_beta_ratio.py
@@ -33,7 +33,7 @@ def test_beta_ratio_vs_jsaffer_pdf():
 	y = dist.pdf(x)
 	
 	# Compare with expected values from jsaffer implementation
-	expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
+	expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
 	assert y == approx(expected)
 
 def test_beta_ratio_vs_jsaffer_cdf():
@@ -48,7 +48,7 @@ def test_beta_ratio_vs_jsaffer_cdf():
 	y = dist.cdf(x)
 	
 	# Compare with expected values from jsaffer implementation
-	expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
+	expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
 	assert y == approx(expected)
 
 def _gen_beta_ratio_vs_jsaffer():
@@ -62,7 +62,7 @@ def _gen_beta_ratio_vs_jsaffer():
 	y1 = np.vectorize(lambda w: float(beta_quotient_distribution.pdf_bb_ratio(a1, a2, b1, b2, w)))(x)
 	y2 = np.vectorize(lambda w: float(beta_quotient_distribution.cdf_bb_ratio(a1, a2, b1, b2, w)))(x)
 	
-	np.save('tests/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)
+	np.save('tests/data/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)
 
 def test_beta_ratio_mean_vs_empirical():
 	"""Compare beta_ratio.mean (via beta_ratio._munp) with empirical mean"""
diff --git a/tests/test_ordinallogit.py b/tests/test_ordinallogit.py
new file mode 100644
index 0000000..fcc6bb3
--- /dev/null
+++ b/tests/test_ordinallogit.py
@@ -0,0 +1,64 @@
+#   scipy-yli: Helpful SciPy utilities and recipes
+#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
+#
+#   This program is free software: you can redistribute it and/or modify
+#   it under the terms of the GNU Affero General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU Affero General Public License for more details.
+#
+#   You should have received a copy of the GNU Affero General Public License
+#   along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from pytest import approx
+
+import pandas as pd
+
+import yli
+
+def test_ordinallogit_ucla():
+	"""Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
+	
+	df = pd.read_stata('tests/data/ucla_ologit.dta')
+	
+	result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
+	
+	assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001)
+	assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001)
+	assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001)
+	
+	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001)
+	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001)
+	
+	# Confidence intervals compared with Stata 16
+	# . ologit apply pared public gpa
+	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001)
+	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001)
+	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001)
+	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001)
+	
+	expected_summary = '''              Ordinal Logistic Regression Results              
+===============================================================
+Dep. Variable:              apply  |  No. Observations:     400
+        Model:       OrdinalLogit  |         Df. Model:       5
+       Method: Maximum Likelihood  |     Df. Residuals:     395
+         Date:         {0:%Y-%m-%d}  |         Pseudo R²:    0.03
+         Time:           {0:%H:%M:%S}  |          LL-Model: -358.51
+  Std. Errors:         Non-Robust  |           LL-Null: -370.60
+                                   |            p (LR): <0.001*
+===============================================================
+                                β      (95% CI)         p   
+------------------------------------------------------------
+                      pared    1.05  (0.53 - 1.57)   <0.001*
+                     public   -0.06 (-0.64 - 0.53)    0.84  
+                        gpa    0.62  (0.10 - 1.13)    0.02* 
+                  (Cutoffs)                                 
+   unlikely/somewhat likely    2.20  (0.68 - 3.73)    0.005*
+somewhat likely/very likely    4.30  (2.72 - 5.88)   <0.001*
+------------------------------------------------------------'''.format(result.fitted_dt)
+	
+	assert result.summary() == expected_summary
diff --git a/yli/regress.py b/yli/regress.py
index 781f7b3..2ed70fe 100644
--- a/yli/regress.py
+++ b/yli/regress.py
@@ -858,6 +858,8 @@ def regress(
 	# Get full name to display
 	if model_class is sm.Logit:
 		full_name = 'Logistic Regression'
+	elif model_class is OrdinalLogit:
+		full_name = 'Ordinal Logistic Regression'
 	else:
 		full_name = '{} Regression'.format(model_class.__name__)
 	if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
@@ -1004,6 +1006,39 @@ class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
 	The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
 	The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
 	In this parameterisation, cutoff terms are represented directly in the model.
+	
+	**Example:**
+	
+	.. code-block::
+		
+		df = pd.DataFrame(...)
+		yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
+	
+	.. code-block:: text
+		
+		              Ordinal Logistic Regression Results              
+		===============================================================
+		Dep. Variable:              apply  |  No. Observations:     400
+		        Model:       OrdinalLogit  |         Df. Model:       5
+		       Method: Maximum Likelihood  |     Df. Residuals:     395
+		         Date:         2022-12-02  |         Pseudo R²:    0.03
+		         Time:           21:30:38  |          LL-Model: -358.51
+		  Std. Errors:         Non-Robust  |           LL-Null: -370.60
+		                                   |            p (LR): <0.001*
+		===============================================================
+		                                β      (95% CI)         p   
+		------------------------------------------------------------
+		                      pared    1.05  (0.53 - 1.57)   <0.001*
+		                     public   -0.06 (-0.64 - 0.53)    0.84  
+		                        gpa    0.62  (0.10 - 1.13)    0.02* 
+		                  (Cutoffs)                                 
+		   unlikely/somewhat likely    2.20  (0.68 - 3.73)    0.005*
+		somewhat likely/very likely    4.30  (2.72 - 5.88)   <0.001*
+		------------------------------------------------------------
+	
+	The output summarises the result of the regression.
+	The parameters shown under "(Cutoffs)" are the cutoff values in the latent variable parameterisation of ordinal regression.
+	Note that because `exp=False` was passed, the parameter estimates are not automatically exponentiated.
 	"""
 	
 	def __init__(self, endog, exog, **kwargs):