# scipy-yli: Helpful SciPy utilities and recipes
# Copyright © 2022–2023 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
from pytest import approx
import pandas as pd
import yli
def test_ordinallogit_ucla():
"""Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
df = pd.read_stata('tests/data/ucla_ologit.dta')
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001)
assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001)
assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001)
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001)
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001)
# Confidence intervals compared with Stata 16
# . ologit apply pared public gpa
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001)
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001)
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001)
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001)
expected_summary = ''' Ordinal Logistic Regression Results
==========================================================
Dep. Variable: apply | No. Observations: 400
Model: Ordinal Logit | Df. Model: 3
Date: {0:%Y-%m-%d} | Df. Residuals: 395
Time: {0:%H:%M:%S} | Pseudo R²: 0.03
Std. Errors: Non-Robust | LL-Model: -358.51
| LL-Null: -370.60
| p (LR): <0.001*
============================================================
β (95% CI) p
------------------------------------------------------------
pared 1.05 (0.53 - 1.57) <0.001*
public -0.06 (-0.64 - 0.53) 0.84
gpa 0.62 (0.10 - 1.13) 0.02*
(Cutoffs)
unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005*
somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
------------------------------------------------------------'''.format(result.fitted_dt)
assert result.summary() == expected_summary
assert result._repr_html_() == '
Ordinal Logistic Regression ResultsDep. Variable: | apply | No. Observations: | 400 |
---|
Model: | Ordinal Logit | Df. Model: | 3 |
---|
Date: | {0:%Y-%m-%d} | Df. Residuals: | 395 |
---|
Time: | {0:%H:%M:%S} | Pseudo R2: | 0.03 |
---|
Std. Errors: | Non-Robust | LL-Model: | -358.51 |
---|
| | LL-Null: | -370.60 |
---|
| | p (LR): | <0.001* |
---|
| β | (95% CI) | p |
---|
pared | 1.05 | (0.53 | – | 1.57) | <0.001* |
---|
public | -0.06 | (-0.64 | – | 0.53) | =0.84 |
---|
gpa | 0.62 | (0.10 | – | 1.13) | =0.02* |
---|
(Cutoffs) | | | | | |
---|
unlikely/somewhat likely | 2.20 | (0.68 | – | 3.73) | =0.005* |
somewhat likely/very likely | 4.30 | (2.72 | – | 5.88) | <0.001* |
'.format(result.fitted_dt)
def test_brant_ucla():
"""Compare RegressionModel.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
df = pd.read_stata('tests/data/ucla_ologit.dta')
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
brant_result = result.brant()
# df <- read.dta("https://stats.idre.ucla.edu/stat/data/ologit.dta")
# m <- polr(apply ~ pared + public + gpa, data=df, Hess=TRUE)
# brant(m)
assert brant_result.tests['Omnibus'].statistic == approx(4.34, abs=0.01)
assert brant_result.tests['Omnibus'].dof == 3
assert brant_result.tests['Omnibus'].pvalue == approx(0.23, abs=0.01)
assert brant_result.tests['pared'].statistic == approx(0.13, abs=0.01)
assert brant_result.tests['pared'].dof == 1
assert brant_result.tests['pared'].pvalue == approx(0.72, abs=0.01)
assert brant_result.tests['public'].statistic == approx(3.44, abs=0.01)
assert brant_result.tests['public'].dof == 1
assert brant_result.tests['public'].pvalue == approx(0.06, abs=0.01)
assert brant_result.tests['gpa'].statistic == approx(0.18, abs=0.01)
assert brant_result.tests['gpa'].dof == 1
assert brant_result.tests['gpa'].pvalue == approx(0.67, abs=0.01)
expected_summary = ''' χ² df p
Omnibus 4.34 3 0.23
pared 0.13 1 0.72
public 3.44 1 0.06
gpa 0.18 1 0.67 '''
assert brant_result.summary() == expected_summary
assert brant_result._repr_html_() == 'Brant Test Results | χ2 | df | p |
---|
Omnibus | 4.34 | 3 | =0.23 |
---|
pared | 0.13 | 1 | =0.72 |
---|
public | 3.44 | 1 | =0.06 |
---|
gpa | 0.18 | 1 | =0.67 |
---|
'