Add unit test for OrdinalLogit

2022-12-02 21:43:05 +11:00 · 2022-12-02 21:43:05 +11:00 · ea2d04ada1
commit ea2d04ada1
parent 2135796d85
5 changed files with 102 additions and 3 deletions
--- a/tests/data/beta_ratio_vs_jsaffer.npy
+++ b/tests/data/beta_ratio_vs_jsaffer.npy
--- a/tests/data/ucla_ologit.dta
+++ b/tests/data/ucla_ologit.dta
--- a/tests/test_beta_ratio.py
+++ b/tests/test_beta_ratio.py
@ -33,7 +33,7 @@ def test_beta_ratio_vs_jsaffer_pdf():
 	y = dist.pdf(x)
 	
 	# Compare with expected values from jsaffer implementation
-	expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
+	expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[0]
 	assert y == approx(expected)

 def test_beta_ratio_vs_jsaffer_cdf():
@ -48,7 +48,7 @@ def test_beta_ratio_vs_jsaffer_cdf():
 	y = dist.cdf(x)
 	
 	# Compare with expected values from jsaffer implementation
-	expected = np.load('tests/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
+	expected = np.load('tests/data/beta_ratio_vs_jsaffer.npy', allow_pickle=False)[1]
 	assert y == approx(expected)

 def _gen_beta_ratio_vs_jsaffer():
@ -62,7 +62,7 @@ def _gen_beta_ratio_vs_jsaffer():
 	y1 = np.vectorize(lambda w: float(beta_quotient_distribution.pdf_bb_ratio(a1, a2, b1, b2, w)))(x)
 	y2 = np.vectorize(lambda w: float(beta_quotient_distribution.cdf_bb_ratio(a1, a2, b1, b2, w)))(x)
 	
-	np.save('tests/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)
+	np.save('tests/data/beta_ratio_vs_jsaffer.npy', np.array([y1, y2]), allow_pickle=False)

 def test_beta_ratio_mean_vs_empirical():
 	"""Compare beta_ratio.mean (via beta_ratio._munp) with empirical mean"""
--- a/tests/test_ordinallogit.py
+++ b/tests/test_ordinallogit.py
@ -0,0 +1,64 @@
+#   scipy-yli: Helpful SciPy utilities and recipes
+#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
+#
+#   This program is free software: you can redistribute it and/or modify
+#   it under the terms of the GNU Affero General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU Affero General Public License for more details.
+#
+#   You should have received a copy of the GNU Affero General Public License
+#   along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from pytest import approx
+
+import pandas as pd
+
+import yli
+
+def test_ordinallogit_ucla():
+	"""Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
+	
+	df = pd.read_stata('tests/data/ucla_ologit.dta')
+	
+	result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
+	
+	assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001)
+	assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001)
+	assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001)
+	
+	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001)
+	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001)
+	
+	# Confidence intervals compared with Stata 16
+	# . ologit apply pared public gpa
+	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001)
+	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001)
+	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001)
+	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001)
+	
+	expected_summary = '''              Ordinal Logistic Regression Results              
+===============================================================
+Dep. Variable:              apply  |  No. Observations:     400
+        Model:       OrdinalLogit  |         Df. Model:       5
+       Method: Maximum Likelihood  |     Df. Residuals:     395
+         Date:         {0:%Y-%m-%d}  |         Pseudo R²:    0.03
+         Time:           {0:%H:%M:%S}  |          LL-Model: -358.51
+  Std. Errors:         Non-Robust  |           LL-Null: -370.60
+                                   |            p (LR): <0.001*
+===============================================================
+                                β      (95% CI)         p   
+------------------------------------------------------------
+                      pared    1.05  (0.53 - 1.57)   <0.001*
+                     public   -0.06 (-0.64 - 0.53)    0.84  
+                        gpa    0.62  (0.10 - 1.13)    0.02* 
+                  (Cutoffs)                                 
+   unlikely/somewhat likely    2.20  (0.68 - 3.73)    0.005*
+somewhat likely/very likely    4.30  (2.72 - 5.88)   <0.001*
+------------------------------------------------------------'''.format(result.fitted_dt)
+	
+	assert result.summary() == expected_summary
--- a/yli/regress.py
+++ b/yli/regress.py
@ -858,6 +858,8 @@ def regress(
 	# Get full name to display
 	if model_class is sm.Logit:
 		full_name = 'Logistic Regression'
+	elif model_class is OrdinalLogit:
+		full_name = 'Ordinal Logistic Regression'
 	else:
 		full_name = '{} Regression'.format(model_class.__name__)
 	if fit_kwargs.get('cov_type', 'nonrobust') != 'nonrobust':
@ -1004,6 +1006,39 @@ class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
 	The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
 	The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
 	In this parameterisation, cutoff terms are represented directly in the model.
+	
+	**Example:**
+	
+	.. code-block::
+		
+		df = pd.DataFrame(...)
+		yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
+	
+	.. code-block:: text
+		
+		              Ordinal Logistic Regression Results              
+		===============================================================
+		Dep. Variable:              apply  |  No. Observations:     400
+		        Model:       OrdinalLogit  |         Df. Model:       5
+		       Method: Maximum Likelihood  |     Df. Residuals:     395
+		         Date:         2022-12-02  |         Pseudo R²:    0.03
+		         Time:           21:30:38  |          LL-Model: -358.51
+		  Std. Errors:         Non-Robust  |           LL-Null: -370.60
+		                                   |            p (LR): <0.001*
+		===============================================================
+		                                β      (95% CI)         p   
+		------------------------------------------------------------
+		                      pared    1.05  (0.53 - 1.57)   <0.001*
+		                     public   -0.06 (-0.64 - 0.53)    0.84  
+		                        gpa    0.62  (0.10 - 1.13)    0.02* 
+		                  (Cutoffs)                                 
+		   unlikely/somewhat likely    2.20  (0.68 - 3.73)    0.005*
+		somewhat likely/very likely    4.30  (2.72 - 5.88)   <0.001*
+		------------------------------------------------------------
+	
+	The output summarises the result of the regression.
+	The parameters shown under "(Cutoffs)" are the cutoff values in the latent variable parameterisation of ordinal regression.
+	Note that because `exp=False` was passed, the parameter estimates are not automatically exponentiated.
 	"""
 	
 	def __init__(self, endog, exog, **kwargs):