# scipy-yli: Helpful SciPy utilities and recipes # Copyright © 2022–2023 Lee Yingtong Li (RunasSudo) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from pytest import approx import pandas as pd import yli def test_ordinallogit_ucla(): """Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/""" df = pd.read_stata('tests/data/ucla_ologit.dta') result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False) assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001) assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001) assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001) assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001) assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001) # Confidence intervals compared with Stata 16 # . ologit apply pared public gpa assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001) assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001) assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001) assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001) expected_summary = ''' Ordinal Logistic Regression Results ========================================================== Dep. Variable: apply | No. Observations: 400 Model: Ordinal Logit | Df. Model: 3 Date: {0:%Y-%m-%d} | Df. Residuals: 395 Time: {0:%H:%M:%S} | Pseudo R²: 0.03 Std. Errors: Non-Robust | LL-Model: -358.51 | LL-Null: -370.60 | p (LR): <0.001* ============================================================ β (95% CI) p ------------------------------------------------------------ pared 1.05 (0.53 - 1.57) <0.001* public -0.06 (-0.64 - 0.53) 0.84 gpa 0.62 (0.10 - 1.13) 0.02* (Cutoffs) unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005* somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001* ------------------------------------------------------------'''.format(result.fitted_dt) assert result.summary() == expected_summary assert result._repr_html_() == '
Ordinal Logistic Regression Results
Dep. Variable:applyNo. Observations:400
Model:Ordinal LogitDf. Model:3
Date:{0:%Y-%m-%d}Df. Residuals:395
Time:{0:%H:%M:%S}Pseudo R2:0.03
Std. Errors:Non-RobustLL-Model:-358.51
LL-Null:-370.60
p (LR):<0.001*
β(95% CI)p
pared1.05(0.531.57)<0.001*
public-0.06(-0.640.53)=0.84
gpa0.62(0.101.13)=0.02*
(Cutoffs)
unlikely/somewhat likely2.20(0.683.73)=0.005*
somewhat likely/very likely4.30(2.725.88)<0.001*
'.format(result.fitted_dt) def test_brant_ucla(): """Compare RegressionModel.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/""" df = pd.read_stata('tests/data/ucla_ologit.dta') result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False) brant_result = result.brant() # df <- read.dta("https://stats.idre.ucla.edu/stat/data/ologit.dta") # m <- polr(apply ~ pared + public + gpa, data=df, Hess=TRUE) # brant(m) assert brant_result.tests['Omnibus'].statistic == approx(4.34, abs=0.01) assert brant_result.tests['Omnibus'].dof == 3 assert brant_result.tests['Omnibus'].pvalue == approx(0.23, abs=0.01) assert brant_result.tests['pared'].statistic == approx(0.13, abs=0.01) assert brant_result.tests['pared'].dof == 1 assert brant_result.tests['pared'].pvalue == approx(0.72, abs=0.01) assert brant_result.tests['public'].statistic == approx(3.44, abs=0.01) assert brant_result.tests['public'].dof == 1 assert brant_result.tests['public'].pvalue == approx(0.06, abs=0.01) assert brant_result.tests['gpa'].statistic == approx(0.18, abs=0.01) assert brant_result.tests['gpa'].dof == 1 assert brant_result.tests['gpa'].pvalue == approx(0.67, abs=0.01) expected_summary = ''' χ² df p Omnibus 4.34 3 0.23 pared 0.13 1 0.72 public 3.44 1 0.06 gpa 0.18 1 0.67 ''' assert brant_result.summary() == expected_summary assert brant_result._repr_html_() == '
Brant Test Results
χ2dfp
Omnibus4.343=0.23
pared0.131=0.72
public3.441=0.06
gpa0.181=0.67
'