Add test case for Poisson regression
This commit is contained in:
parent
3c22fe4197
commit
138e31751a
201
tests/data/ucla_poisson_sim.csv
Normal file
201
tests/data/ucla_poisson_sim.csv
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
num_awards,prog,math
|
||||||
|
0,3,41
|
||||||
|
0,1,41
|
||||||
|
0,3,44
|
||||||
|
0,3,42
|
||||||
|
0,3,40
|
||||||
|
0,1,42
|
||||||
|
0,3,46
|
||||||
|
0,3,40
|
||||||
|
0,3,33
|
||||||
|
0,3,46
|
||||||
|
0,3,40
|
||||||
|
0,2,38
|
||||||
|
0,3,44
|
||||||
|
0,3,37
|
||||||
|
0,3,40
|
||||||
|
0,1,39
|
||||||
|
0,1,43
|
||||||
|
0,3,38
|
||||||
|
0,2,45
|
||||||
|
0,3,39
|
||||||
|
0,1,42
|
||||||
|
0,3,45
|
||||||
|
0,3,40
|
||||||
|
0,3,40
|
||||||
|
0,2,43
|
||||||
|
0,2,49
|
||||||
|
1,1,44
|
||||||
|
1,1,46
|
||||||
|
0,2,46
|
||||||
|
1,2,41
|
||||||
|
0,1,42
|
||||||
|
0,3,47
|
||||||
|
1,2,43
|
||||||
|
0,2,49
|
||||||
|
1,3,40
|
||||||
|
1,3,39
|
||||||
|
0,3,49
|
||||||
|
0,2,42
|
||||||
|
0,1,43
|
||||||
|
0,2,41
|
||||||
|
0,1,57
|
||||||
|
0,2,50
|
||||||
|
0,2,44
|
||||||
|
0,3,39
|
||||||
|
0,2,52
|
||||||
|
1,3,41
|
||||||
|
0,3,39
|
||||||
|
0,1,39
|
||||||
|
0,3,52
|
||||||
|
1,2,48
|
||||||
|
0,2,45
|
||||||
|
0,3,40
|
||||||
|
0,1,46
|
||||||
|
3,2,50
|
||||||
|
0,2,43
|
||||||
|
0,1,43
|
||||||
|
1,2,54
|
||||||
|
1,1,42
|
||||||
|
0,3,45
|
||||||
|
0,1,54
|
||||||
|
0,3,47
|
||||||
|
1,2,45
|
||||||
|
0,2,43
|
||||||
|
1,3,40
|
||||||
|
0,3,52
|
||||||
|
1,3,52
|
||||||
|
0,1,45
|
||||||
|
0,2,49
|
||||||
|
1,2,53
|
||||||
|
0,1,54
|
||||||
|
2,2,50
|
||||||
|
0,2,48
|
||||||
|
0,2,51
|
||||||
|
0,1,41
|
||||||
|
1,3,51
|
||||||
|
0,1,46
|
||||||
|
0,3,55
|
||||||
|
0,1,46
|
||||||
|
0,1,61
|
||||||
|
0,2,45
|
||||||
|
0,3,54
|
||||||
|
0,2,54
|
||||||
|
1,2,51
|
||||||
|
0,1,49
|
||||||
|
0,3,56
|
||||||
|
0,2,53
|
||||||
|
1,1,49
|
||||||
|
0,1,57
|
||||||
|
0,1,42
|
||||||
|
0,3,46
|
||||||
|
0,3,45
|
||||||
|
0,1,55
|
||||||
|
0,3,47
|
||||||
|
0,2,50
|
||||||
|
0,3,41
|
||||||
|
0,1,52
|
||||||
|
1,2,57
|
||||||
|
0,3,53
|
||||||
|
1,3,51
|
||||||
|
0,1,57
|
||||||
|
0,2,51
|
||||||
|
0,3,40
|
||||||
|
0,3,50
|
||||||
|
0,3,57
|
||||||
|
0,1,35
|
||||||
|
0,2,49
|
||||||
|
0,1,50
|
||||||
|
1,1,55
|
||||||
|
0,2,48
|
||||||
|
0,2,52
|
||||||
|
1,2,53
|
||||||
|
1,2,59
|
||||||
|
2,2,66
|
||||||
|
0,2,57
|
||||||
|
1,2,54
|
||||||
|
0,3,57
|
||||||
|
1,2,54
|
||||||
|
0,2,54
|
||||||
|
0,2,54
|
||||||
|
2,2,49
|
||||||
|
1,3,51
|
||||||
|
3,2,58
|
||||||
|
1,2,60
|
||||||
|
0,2,51
|
||||||
|
0,2,63
|
||||||
|
1,2,50
|
||||||
|
1,2,53
|
||||||
|
0,2,53
|
||||||
|
0,2,55
|
||||||
|
1,2,56
|
||||||
|
0,2,49
|
||||||
|
1,2,61
|
||||||
|
0,1,49
|
||||||
|
0,2,57
|
||||||
|
0,2,66
|
||||||
|
0,1,56
|
||||||
|
3,2,60
|
||||||
|
1,2,57
|
||||||
|
0,2,59
|
||||||
|
0,2,58
|
||||||
|
0,2,57
|
||||||
|
3,2,64
|
||||||
|
0,1,60
|
||||||
|
0,1,54
|
||||||
|
1,2,61
|
||||||
|
1,1,58
|
||||||
|
0,1,56
|
||||||
|
0,1,60
|
||||||
|
0,2,55
|
||||||
|
3,2,57
|
||||||
|
0,2,62
|
||||||
|
0,2,56
|
||||||
|
0,2,51
|
||||||
|
2,3,56
|
||||||
|
0,1,61
|
||||||
|
1,2,63
|
||||||
|
5,2,61
|
||||||
|
0,2,67
|
||||||
|
0,2,48
|
||||||
|
1,2,61
|
||||||
|
0,1,57
|
||||||
|
0,1,48
|
||||||
|
2,2,65
|
||||||
|
4,2,62
|
||||||
|
1,2,61
|
||||||
|
0,3,53
|
||||||
|
1,1,58
|
||||||
|
1,2,64
|
||||||
|
3,2,65
|
||||||
|
1,1,56
|
||||||
|
1,1,63
|
||||||
|
3,2,57
|
||||||
|
2,2,72
|
||||||
|
0,3,66
|
||||||
|
0,2,62
|
||||||
|
1,2,58
|
||||||
|
1,2,63
|
||||||
|
3,2,64
|
||||||
|
2,2,72
|
||||||
|
0,2,64
|
||||||
|
6,2,69
|
||||||
|
4,2,70
|
||||||
|
1,2,66
|
||||||
|
0,1,58
|
||||||
|
2,2,62
|
||||||
|
2,2,67
|
||||||
|
1,2,64
|
||||||
|
2,2,63
|
||||||
|
1,2,68
|
||||||
|
1,2,75
|
||||||
|
0,2,69
|
||||||
|
1,2,65
|
||||||
|
2,2,71
|
||||||
|
5,2,71
|
||||||
|
1,2,60
|
||||||
|
2,2,71
|
||||||
|
2,3,75
|
||||||
|
1,2,71
|
||||||
|
0,2,72
|
||||||
|
3,2,73
|
|
@ -254,3 +254,33 @@ Dep. Variable: Outcome | No. Observations: 240
|
|||||||
|
|
||||||
assert result.summary() == expected_summary
|
assert result.summary() == expected_summary
|
||||||
assert result._repr_html_() == '<table><caption>Penalised Logistic Regression Results</caption><tr><th>Dep. Variable:</th><td>Outcome</td><th>No. Observations:</th><td>240</td></tr><tr><th>Model:</th><td>Penalised Logit</td><th>Df. Model:</th><td>1</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.37</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>LL-Model:</th><td>-66.43</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Null:</th><td>-105.91</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td><0.001*</td></tr></table><table><tr><th></th><th style="text-align:center"><i>β</i></th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>-2.28</td><td style="padding-right:0">(-2.77</td><td>–</td><td style="padding-left:0">-1.85)</td><td style="text-align:left"><0.001*</td></tr><tr><th>Pred</th><td>5.99</td><td style="padding-right:0">(3.95</td><td>–</td><td style="padding-left:0">10.85)</td><td style="text-align:left"><0.001*</td></tr></table>'.format(result.fitted_dt)
|
assert result._repr_html_() == '<table><caption>Penalised Logistic Regression Results</caption><tr><th>Dep. Variable:</th><td>Outcome</td><th>No. Observations:</th><td>240</td></tr><tr><th>Model:</th><td>Penalised Logit</td><th>Df. Model:</th><td>1</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.37</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>LL-Model:</th><td>-66.43</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Null:</th><td>-105.91</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td><0.001*</td></tr></table><table><tr><th></th><th style="text-align:center"><i>β</i></th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>-2.28</td><td style="padding-right:0">(-2.77</td><td>–</td><td style="padding-left:0">-1.85)</td><td style="text-align:left"><0.001*</td></tr><tr><th>Pred</th><td>5.99</td><td style="padding-right:0">(3.95</td><td>–</td><td style="padding-left:0">10.85)</td><td style="text-align:left"><0.001*</td></tr></table>'.format(result.fitted_dt)
|
||||||
|
|
||||||
|
def test_regress_poisson_ucla():
|
||||||
|
"""Compare yli.regress with yli.Poisson for https://stats.oarc.ucla.edu/r/dae/poisson-regression/"""
|
||||||
|
|
||||||
|
df = pd.read_csv('tests/data/ucla_poisson_sim.csv')
|
||||||
|
|
||||||
|
result = yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
|
||||||
|
|
||||||
|
expected_summary = ''' Poisson Regression Results
|
||||||
|
=======================================================
|
||||||
|
Dep. Variable: num_awards | No. Observations: 200
|
||||||
|
Model: Poisson | Df. Model: 3
|
||||||
|
Date: {0:%Y-%m-%d} | Df. Residuals: 196
|
||||||
|
Time: {0:%H:%M:%S} | Pseudo R²: 0.21
|
||||||
|
Std. Errors: Non-Robust | LL-Model: -182.75
|
||||||
|
| LL-Null: -231.86
|
||||||
|
| p (LR): <0.001*
|
||||||
|
=======================================================
|
||||||
|
exp(β) (95% CI) p
|
||||||
|
--------------------------------------------
|
||||||
|
(Intercept) 0.01 (0.00 - 0.02) <0.001*
|
||||||
|
prog
|
||||||
|
1 Ref.
|
||||||
|
2 2.96 (1.46 - 5.97) 0.002*
|
||||||
|
3 1.45 (0.61 - 3.44) 0.40
|
||||||
|
math 1.07 (1.05 - 1.10) <0.001*
|
||||||
|
--------------------------------------------'''.format(result.fitted_dt)
|
||||||
|
|
||||||
|
assert result.summary() == expected_summary
|
||||||
|
assert result._repr_html_() == '<table><caption>Poisson Regression Results</caption><tr><th>Dep. Variable:</th><td>num_awards</td><th>No. Observations:</th><td>200</td></tr><tr><th>Model:</th><td>Poisson</td><th>Df. Model:</th><td>3</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Df. Residuals:</th><td>196</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.21</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Model:</th><td>-182.75</td></tr><tr><th></th><td></td><th>LL-Null:</th><td>-231.86</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td><0.001*</td></tr></table><table><tr><th></th><th style="text-align:center">exp(<i>β</i>)</th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>0.01</td><td style="padding-right:0">(0.00</td><td>–</td><td style="padding-left:0">0.02)</td><td style="text-align:left"><0.001*</td></tr><tr><th>prog</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">1</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">2</td><td>2.96</td><td style="padding-right:0">(1.46</td><td>–</td><td style="padding-left:0">5.97)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.002*</td></tr><tr><td style="text-align:right;font-style:italic">3</td><td>1.45</td><td style="padding-right:0">(0.61</td><td>–</td><td style="padding-left:0">3.44)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.40</td></tr><tr><th>math</th><td>1.07</td><td style="padding-right:0">(1.05</td><td>–</td><td style="padding-left:0">1.10)</td><td style="text-align:left"><0.001*</td></tr></table>'.format(result.fitted_dt)
|
||||||
|
@ -820,7 +820,7 @@ class Logit(RegressionModel):
|
|||||||
|
|
||||||
The output summarises the results of the regression.
|
The output summarises the results of the regression.
|
||||||
Note that the parameter estimates are automatically exponentiated.
|
Note that the parameter estimates are automatically exponentiated.
|
||||||
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
|
For example, the adjusted odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -888,7 +888,7 @@ class OLS(RegressionModel):
|
|||||||
----------------------------------------------
|
----------------------------------------------
|
||||||
|
|
||||||
The output summarises the results of the regression.
|
The output summarises the results of the regression.
|
||||||
For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
|
For example, the adjusted mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -1210,9 +1210,39 @@ class PenalisedLogit(RegressionModel):
|
|||||||
class Poisson(RegressionModel):
|
class Poisson(RegressionModel):
|
||||||
"""
|
"""
|
||||||
Poisson regression
|
Poisson regression
|
||||||
"""
|
|
||||||
|
|
||||||
# TODO: Document example
|
**Example:**
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
df = pd.DataFrame(...)
|
||||||
|
yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
Poisson Regression Results
|
||||||
|
=======================================================
|
||||||
|
Dep. Variable: num_awards | No. Observations: 200
|
||||||
|
Model: Poisson | Df. Model: 3
|
||||||
|
Date: 2023-04-22 | Df. Residuals: 196
|
||||||
|
Time: 16:58:21 | Pseudo R²: 0.21
|
||||||
|
Std. Errors: Non-Robust | LL-Model: -182.75
|
||||||
|
| LL-Null: -231.86
|
||||||
|
| p (LR): <0.001*
|
||||||
|
=======================================================
|
||||||
|
exp(β) (95% CI) p
|
||||||
|
--------------------------------------------
|
||||||
|
(Intercept) 0.01 (0.00 - 0.02) <0.001*
|
||||||
|
prog
|
||||||
|
1 Ref.
|
||||||
|
2 2.96 (1.46 - 5.97) 0.002*
|
||||||
|
3 1.45 (0.61 - 3.44) 0.40
|
||||||
|
math 1.07 (1.05 - 1.10) <0.001*
|
||||||
|
--------------------------------------------
|
||||||
|
|
||||||
|
The output summarises the results of the regression.
|
||||||
|
For example, the adjusted incidence rate ratio in "num_awards" per unit increase in "math" is 1.07, with 95% confidence interval 1.05–1.10, and is significant with *p* value < 0.001.
|
||||||
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def model_long_name(self):
|
def model_long_name(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user