102 lines
7.5 KiB
Python
102 lines
7.5 KiB
Python
# scipy-yli: Helpful SciPy utilities and recipes
|
|
# Copyright © 2022–2023 Lee Yingtong Li (RunasSudo)
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
from pytest import approx
|
|
|
|
import pandas as pd
|
|
|
|
import yli
|
|
|
|
def test_ordinallogit_ucla():
|
|
"""Compare yli.regress with yli.OrdinalLogit for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
|
|
|
|
df = pd.read_stata('tests/data/ucla_ologit.dta')
|
|
|
|
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
|
|
|
|
assert result.terms['pared'].beta.point == approx(1.04769, abs=0.0001)
|
|
assert result.terms['public'].beta.point == approx(-0.05879, abs=0.001)
|
|
assert result.terms['gpa'].beta.point == approx(0.61594, abs=0.001)
|
|
|
|
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.point == approx(2.20391, abs=0.001)
|
|
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.point == approx(4.29936, abs=0.001)
|
|
|
|
# Confidence intervals compared with Stata 16
|
|
# . ologit apply pared public gpa
|
|
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_lower == approx(0.6754621, abs=0.001)
|
|
assert result.terms['(Cutoffs)'].categories['unlikely/somewhat likely'].beta.ci_upper == approx(3.731184, abs=0.001)
|
|
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_lower == approx(2.72234, abs=0.001)
|
|
assert result.terms['(Cutoffs)'].categories['somewhat likely/very likely'].beta.ci_upper == approx(5.875195, abs=0.001)
|
|
|
|
expected_summary = ''' Ordinal Logistic Regression Results
|
|
==========================================================
|
|
Dep. Variable: apply | No. Observations: 400
|
|
Model: Ordinal Logit | Df. Model: 3
|
|
Date: {0:%Y-%m-%d} | Df. Residuals: 395
|
|
Time: {0:%H:%M:%S} | Pseudo R²: 0.03
|
|
Std. Errors: Non-Robust | LL-Model: -358.51
|
|
| LL-Null: -370.60
|
|
| p (LR): <0.001*
|
|
============================================================
|
|
β (95% CI) p
|
|
------------------------------------------------------------
|
|
pared 1.05 (0.53 - 1.57) <0.001*
|
|
public -0.06 (-0.64 - 0.53) 0.84
|
|
gpa 0.62 (0.10 - 1.13) 0.02*
|
|
(Cutoffs)
|
|
unlikely/somewhat likely 2.20 (0.68 - 3.73) 0.005*
|
|
somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
|
|
------------------------------------------------------------'''.format(result.fitted_dt)
|
|
|
|
assert result.summary() == expected_summary
|
|
assert result._repr_html_() == '<table><caption>Ordinal Logistic Regression Results</caption><tr><th>Dep. Variable:</th><td>apply</td><th>No. Observations:</th><td>400</td></tr><tr><th>Model:</th><td>Ordinal Logit</td><th>Df. Model:</th><td>3</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Df. Residuals:</th><td>395</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.03</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Model:</th><td>-358.51</td></tr><tr><th></th><td></td><th>LL-Null:</th><td>-370.60</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td><0.001*</td></tr></table><table><tr><th></th><th style="text-align:center"><i>β</i></th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>pared</th><td>1.05</td><td style="padding-right:0">(0.53</td><td>–</td><td style="padding-left:0">1.57)</td><td style="text-align:left"><0.001*</td></tr><tr><th>public</th><td>-0.06</td><td style="padding-right:0">(-0.64</td><td>–</td><td style="padding-left:0">0.53)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.84</td></tr><tr><th>gpa</th><td>0.62</td><td style="padding-right:0">(0.10</td><td>–</td><td style="padding-left:0">1.13)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.02*</td></tr><tr><th>(Cutoffs)</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">unlikely/somewhat likely</td><td>2.20</td><td style="padding-right:0">(0.68</td><td>–</td><td style="padding-left:0">3.73)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.005*</td></tr><tr><td style="text-align:right;font-style:italic">somewhat likely/very likely</td><td>4.30</td><td style="padding-right:0">(2.72</td><td>–</td><td style="padding-left:0">5.88)</td><td style="text-align:left"><0.001*</td></tr></table>'.format(result.fitted_dt)
|
|
|
|
def test_brant_ucla():
|
|
"""Compare RegressionModel.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
|
|
|
|
df = pd.read_stata('tests/data/ucla_ologit.dta')
|
|
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
|
|
brant_result = result.brant()
|
|
|
|
# df <- read.dta("https://stats.idre.ucla.edu/stat/data/ologit.dta")
|
|
# m <- polr(apply ~ pared + public + gpa, data=df, Hess=TRUE)
|
|
# brant(m)
|
|
|
|
assert brant_result.tests['Omnibus'].statistic == approx(4.34, abs=0.01)
|
|
assert brant_result.tests['Omnibus'].dof == 3
|
|
assert brant_result.tests['Omnibus'].pvalue == approx(0.23, abs=0.01)
|
|
|
|
assert brant_result.tests['pared'].statistic == approx(0.13, abs=0.01)
|
|
assert brant_result.tests['pared'].dof == 1
|
|
assert brant_result.tests['pared'].pvalue == approx(0.72, abs=0.01)
|
|
|
|
assert brant_result.tests['public'].statistic == approx(3.44, abs=0.01)
|
|
assert brant_result.tests['public'].dof == 1
|
|
assert brant_result.tests['public'].pvalue == approx(0.06, abs=0.01)
|
|
|
|
assert brant_result.tests['gpa'].statistic == approx(0.18, abs=0.01)
|
|
assert brant_result.tests['gpa'].dof == 1
|
|
assert brant_result.tests['gpa'].pvalue == approx(0.67, abs=0.01)
|
|
|
|
expected_summary = ''' χ² df p
|
|
Omnibus 4.34 3 0.23
|
|
pared 0.13 1 0.72
|
|
public 3.44 1 0.06
|
|
gpa 0.18 1 0.67 '''
|
|
|
|
assert brant_result.summary() == expected_summary
|
|
assert brant_result._repr_html_() == '<table><caption>Brant Test Results</caption><thead><tr><th></th><th style="text-align:center"><i>χ</i><sup>2</sup></th><th style="text-align:center">df</th><th style="text-align:center"><i>p</i></th></thead><tbody><tr><th>Omnibus</th><td>4.34</td><td>3</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.23</td></tr><tr><th>pared</th><td>0.13</td><td>1</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.72</td></tr><tr><th>public</th><td>3.44</td><td>1</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.06</td></tr><tr><th>gpa</th><td>0.18</td><td>1</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.67</td></tr></tbody></table>'
|