scipy-yli/tests/test_ordinallogit.py

#   scipy-yli: Helpful SciPy utilities and recipes
#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

from pytest import approx

import pandas as pd

import yli

def test_ordinallogit_ucla():
	"""Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
	
	df = pd.read_stata('tests/data/ucla_ologit.dta')
	
	result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
	
	assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001)
	assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001)
	assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001)
	
	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001)
	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001)
	
	# Confidence intervals compared with Stata 16
	# . ologit apply pared public gpa
	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001)
	assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001)
	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001)
	assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001)
	
	expected_summary = '''           Ordinal Logistic Regression Results            
==========================================================
Dep. Variable:         apply  |  No. Observations:     400
        Model: Ordinal Logit  |         Df. Model:       5
         Date:    {0:%Y-%m-%d}  |     Df. Residuals:     395
         Time:      {0:%H:%M:%S}  |         Pseudo R²:    0.03
  Std. Errors:    Non-Robust  |          LL-Model: -358.51
                              |           LL-Null: -370.60
                              |            p (LR): <0.001*
============================================================
                                β      (95% CI)         p   
------------------------------------------------------------
                      pared    1.05  (0.53 - 1.57)   <0.001*
                     public   -0.06 (-0.64 - 0.53)    0.84  
                        gpa    0.62  (0.10 - 1.13)    0.02* 
                  (Cutoffs)                                 
   unlikely/somewhat likely    2.20  (0.68 - 3.73)    0.005*
somewhat likely/very likely    4.30  (2.72 - 5.88)   <0.001*
------------------------------------------------------------'''.format(result.fitted_dt)
	
	assert result.summary() == expected_summary

def test_brant_ucla():
	"""Compare RegressionResult.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
	
	df = pd.read_stata('tests/data/ucla_ologit.dta')
	result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
	brant_result = result.brant()
	
	# df <- read.dta("https://stats.idre.ucla.edu/stat/data/ologit.dta")
	# m <- polr(apply ~ pared + public + gpa, data=df, Hess=TRUE)
	# brant(m)
	
	assert brant_result.tests['Omnibus'].statistic == approx(4.34, abs=0.01)
	assert brant_result.tests['Omnibus'].dof == 3
	assert brant_result.tests['Omnibus'].pvalue == approx(0.23, abs=0.01)
	
	assert brant_result.tests['pared'].statistic == approx(0.13, abs=0.01)
	assert brant_result.tests['pared'].dof == 1
	assert brant_result.tests['pared'].pvalue == approx(0.72, abs=0.01)
	
	assert brant_result.tests['public'].statistic == approx(3.44, abs=0.01)
	assert brant_result.tests['public'].dof == 1
	assert brant_result.tests['public'].pvalue == approx(0.06, abs=0.01)
	
	assert brant_result.tests['gpa'].statistic == approx(0.18, abs=0.01)
	assert brant_result.tests['gpa'].dof == 1
	assert brant_result.tests['gpa'].pvalue == approx(0.67, abs=0.01)
	
	expected_summary = '''          χ²  df     p   
Omnibus  4.34  3   0.23  
pared    0.13  1   0.72  
public   3.44  1   0.06  
gpa      0.18  1   0.67  '''
	
	assert brant_result.summary() == expected_summary
Add unit test for OrdinalLogit 2022-12-02 21:43:05 +11:00			`# scipy-yli: Helpful SciPy utilities and recipes`
			`# Copyright © 2022 Lee Yingtong Li (RunasSudo)`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU Affero General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Affero General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Affero General Public License`
			`# along with this program. If not, see <https://www.gnu.org/licenses/>.`

			`from pytest import approx`

			`import pandas as pd`

			`import yli`

			`def test_ordinallogit_ucla():`
			`"""Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""`

			`df = pd.read_stata('tests/data/ucla_ologit.dta')`

			`result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)`

			`assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001)`
			`assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001)`
			`assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001)`

			`assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001)`
			`assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001)`

			`# Confidence intervals compared with Stata 16`
			`# . ologit apply pared public gpa`
			`assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001)`
			`assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001)`
			`assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001)`
			`assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001)`

Large refactor of yli.regress 2023-04-16 21:56:09 +10:00			`expected_summary = ''' Ordinal Logistic Regression Results`
			`==========================================================`
			`Dep. Variable: apply \| No. Observations: 400`
			`Model: Ordinal Logit \| Df. Model: 5`
			`Date: {0:%Y-%m-%d} \| Df. Residuals: 395`
			`Time: {0:%H:%M:%S} \| Pseudo R²: 0.03`
			`Std. Errors: Non-Robust \| LL-Model: -358.51`
			`\| LL-Null: -370.60`
			`\| p (LR): <0.001*`
			`============================================================`
Add unit test for OrdinalLogit 2022-12-02 21:43:05 +11:00			`β (95% CI) p`
			`------------------------------------------------------------`
			`pared 1.05 (0.53 - 1.57) <0.001*`
			`public -0.06 (-0.64 - 0.53) 0.84`
			`gpa 0.62 (0.10 - 1.13) 0.02*`
			`(Cutoffs)`
			`unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005*`
			`somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*`
			`------------------------------------------------------------'''.format(result.fitted_dt)`

			`assert result.summary() == expected_summary`
Add unit test for RegressionResult.brant 2022-12-02 21:53:07 +11:00
			`def test_brant_ucla():`
			`"""Compare RegressionResult.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""`

			`df = pd.read_stata('tests/data/ucla_ologit.dta')`
			`result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)`
			`brant_result = result.brant()`

			`# df <- read.dta("https://stats.idre.ucla.edu/stat/data/ologit.dta")`
			`# m <- polr(apply ~ pared + public + gpa, data=df, Hess=TRUE)`
			`# brant(m)`

			`assert brant_result.tests['Omnibus'].statistic == approx(4.34, abs=0.01)`
			`assert brant_result.tests['Omnibus'].dof == 3`
			`assert brant_result.tests['Omnibus'].pvalue == approx(0.23, abs=0.01)`

			`assert brant_result.tests['pared'].statistic == approx(0.13, abs=0.01)`
			`assert brant_result.tests['pared'].dof == 1`
			`assert brant_result.tests['pared'].pvalue == approx(0.72, abs=0.01)`

			`assert brant_result.tests['public'].statistic == approx(3.44, abs=0.01)`
			`assert brant_result.tests['public'].dof == 1`
			`assert brant_result.tests['public'].pvalue == approx(0.06, abs=0.01)`

			`assert brant_result.tests['gpa'].statistic == approx(0.18, abs=0.01)`
			`assert brant_result.tests['gpa'].dof == 1`
			`assert brant_result.tests['gpa'].pvalue == approx(0.67, abs=0.01)`

			`expected_summary = ''' χ² df p`
			`Omnibus 4.34 3 0.23`
			`pared 0.13 1 0.72`
			`public 3.44 1 0.06`
			`gpa 0.18 1 0.67 '''`

			`assert brant_result.summary() == expected_summary`