2022-10-13 13:25:24 +11:00
|
|
|
# scipy-yli: Helpful SciPy utilities and recipes
|
2023-04-16 21:56:09 +10:00
|
|
|
# Copyright © 2022–2023 Lee Yingtong Li (RunasSudo)
|
2022-10-13 13:25:24 +11:00
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
from pytest import approx
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
import yli
|
|
|
|
|
|
|
|
def test_chi2_ol10_15():
|
|
|
|
"""Compare yli.chi2 for Ott & Longnecker (2016) example 10.15"""
|
|
|
|
|
|
|
|
data = [
|
|
|
|
(1, 'Moderate', 15),
|
|
|
|
(2, 'Moderate', 32),
|
|
|
|
(3, 'Moderate', 18),
|
|
|
|
(4, 'Moderate', 5),
|
|
|
|
(1, 'Mildly Severe', 8),
|
|
|
|
(2, 'Mildly Severe', 29),
|
|
|
|
(3, 'Mildly Severe', 23),
|
|
|
|
(4, 'Mildly Severe', 18),
|
|
|
|
(1, 'Severe', 1),
|
|
|
|
(2, 'Severe', 20),
|
|
|
|
(3, 'Severe', 25),
|
|
|
|
(4, 'Severe', 22)
|
|
|
|
]
|
|
|
|
|
|
|
|
df = pd.DataFrame({
|
|
|
|
'AgeCategory': np.repeat([d[0] for d in data], [d[2] for d in data]),
|
|
|
|
'Severity': np.repeat([d[1] for d in data], [d[2] for d in data])
|
|
|
|
})
|
|
|
|
|
|
|
|
result = yli.chi2(df, 'Severity', 'AgeCategory')
|
|
|
|
assert result.statistic == approx(27.13, abs=0.01)
|
|
|
|
assert result.pvalue == approx(0.00014, abs=0.00001)
|
|
|
|
|
|
|
|
def test_chi2_ol10_18():
|
|
|
|
"""Compare yli.chi2 for Ott & Longnecker (2016) example 10.18"""
|
|
|
|
|
|
|
|
data = [
|
|
|
|
(False, False, 250),
|
|
|
|
(True, False, 750),
|
|
|
|
(False, True, 400),
|
|
|
|
(True, True, 1600)
|
|
|
|
]
|
|
|
|
|
|
|
|
df = pd.DataFrame({
|
|
|
|
'Response': np.repeat([d[0] for d in data], [d[2] for d in data]),
|
|
|
|
'Stress': np.repeat([d[1] for d in data], [d[2] for d in data])
|
|
|
|
})
|
|
|
|
|
|
|
|
result = yli.chi2(df, 'Stress', 'Response')
|
|
|
|
assert result.oddsratio.point == approx(1.333, abs=0.001)
|
|
|
|
assert result.oddsratio.ci_lower == approx(1.113, abs=0.001)
|
|
|
|
assert result.oddsratio.ci_upper == approx(1.596, abs=0.001)
|
2022-10-19 08:13:14 +11:00
|
|
|
|
2023-04-16 21:56:09 +10:00
|
|
|
expected_summary = '''Stress False True
|
|
|
|
Response
|
|
|
|
False 250 400
|
|
|
|
True 750 1600
|
2022-10-19 08:13:14 +11:00
|
|
|
|
|
|
|
χ²(1) = 9.82; p = 0.002*
|
|
|
|
OR (95% CI) = 1.33 (1.11–1.60)
|
|
|
|
RR (95% CI) = 1.11 (1.03–1.18)'''
|
|
|
|
|
|
|
|
assert result.summary() == expected_summary
|
2023-04-17 15:16:09 +10:00
|
|
|
assert result._repr_html_() == '<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border="1" class="dataframe">\n <thead>\n <tr style="text-align: right;">\n <th>Stress</th>\n <th>False</th>\n <th>True</th>\n </tr>\n <tr>\n <th>Response</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>False</th>\n <td>250</td>\n <td>400</td>\n </tr>\n <tr>\n <th>True</th>\n <td>750</td>\n <td>1600</td>\n </tr>\n </tbody>\n</table>\n</div><br><i>χ</i><sup>2</sup>(1) = 9.82; <i>p</i> = 0.002*<br>OR (95% CI) = 1.33 (1.11–1.60)<br>RR (95% CI) = 1.11 (1.03–1.18)'
|