Add test case for Poisson regression
This commit is contained in:
parent
3c22fe4197
commit
138e31751a
201
tests/data/ucla_poisson_sim.csv
Normal file
201
tests/data/ucla_poisson_sim.csv
Normal file
@ -0,0 +1,201 @@
|
||||
num_awards,prog,math
|
||||
0,3,41
|
||||
0,1,41
|
||||
0,3,44
|
||||
0,3,42
|
||||
0,3,40
|
||||
0,1,42
|
||||
0,3,46
|
||||
0,3,40
|
||||
0,3,33
|
||||
0,3,46
|
||||
0,3,40
|
||||
0,2,38
|
||||
0,3,44
|
||||
0,3,37
|
||||
0,3,40
|
||||
0,1,39
|
||||
0,1,43
|
||||
0,3,38
|
||||
0,2,45
|
||||
0,3,39
|
||||
0,1,42
|
||||
0,3,45
|
||||
0,3,40
|
||||
0,3,40
|
||||
0,2,43
|
||||
0,2,49
|
||||
1,1,44
|
||||
1,1,46
|
||||
0,2,46
|
||||
1,2,41
|
||||
0,1,42
|
||||
0,3,47
|
||||
1,2,43
|
||||
0,2,49
|
||||
1,3,40
|
||||
1,3,39
|
||||
0,3,49
|
||||
0,2,42
|
||||
0,1,43
|
||||
0,2,41
|
||||
0,1,57
|
||||
0,2,50
|
||||
0,2,44
|
||||
0,3,39
|
||||
0,2,52
|
||||
1,3,41
|
||||
0,3,39
|
||||
0,1,39
|
||||
0,3,52
|
||||
1,2,48
|
||||
0,2,45
|
||||
0,3,40
|
||||
0,1,46
|
||||
3,2,50
|
||||
0,2,43
|
||||
0,1,43
|
||||
1,2,54
|
||||
1,1,42
|
||||
0,3,45
|
||||
0,1,54
|
||||
0,3,47
|
||||
1,2,45
|
||||
0,2,43
|
||||
1,3,40
|
||||
0,3,52
|
||||
1,3,52
|
||||
0,1,45
|
||||
0,2,49
|
||||
1,2,53
|
||||
0,1,54
|
||||
2,2,50
|
||||
0,2,48
|
||||
0,2,51
|
||||
0,1,41
|
||||
1,3,51
|
||||
0,1,46
|
||||
0,3,55
|
||||
0,1,46
|
||||
0,1,61
|
||||
0,2,45
|
||||
0,3,54
|
||||
0,2,54
|
||||
1,2,51
|
||||
0,1,49
|
||||
0,3,56
|
||||
0,2,53
|
||||
1,1,49
|
||||
0,1,57
|
||||
0,1,42
|
||||
0,3,46
|
||||
0,3,45
|
||||
0,1,55
|
||||
0,3,47
|
||||
0,2,50
|
||||
0,3,41
|
||||
0,1,52
|
||||
1,2,57
|
||||
0,3,53
|
||||
1,3,51
|
||||
0,1,57
|
||||
0,2,51
|
||||
0,3,40
|
||||
0,3,50
|
||||
0,3,57
|
||||
0,1,35
|
||||
0,2,49
|
||||
0,1,50
|
||||
1,1,55
|
||||
0,2,48
|
||||
0,2,52
|
||||
1,2,53
|
||||
1,2,59
|
||||
2,2,66
|
||||
0,2,57
|
||||
1,2,54
|
||||
0,3,57
|
||||
1,2,54
|
||||
0,2,54
|
||||
0,2,54
|
||||
2,2,49
|
||||
1,3,51
|
||||
3,2,58
|
||||
1,2,60
|
||||
0,2,51
|
||||
0,2,63
|
||||
1,2,50
|
||||
1,2,53
|
||||
0,2,53
|
||||
0,2,55
|
||||
1,2,56
|
||||
0,2,49
|
||||
1,2,61
|
||||
0,1,49
|
||||
0,2,57
|
||||
0,2,66
|
||||
0,1,56
|
||||
3,2,60
|
||||
1,2,57
|
||||
0,2,59
|
||||
0,2,58
|
||||
0,2,57
|
||||
3,2,64
|
||||
0,1,60
|
||||
0,1,54
|
||||
1,2,61
|
||||
1,1,58
|
||||
0,1,56
|
||||
0,1,60
|
||||
0,2,55
|
||||
3,2,57
|
||||
0,2,62
|
||||
0,2,56
|
||||
0,2,51
|
||||
2,3,56
|
||||
0,1,61
|
||||
1,2,63
|
||||
5,2,61
|
||||
0,2,67
|
||||
0,2,48
|
||||
1,2,61
|
||||
0,1,57
|
||||
0,1,48
|
||||
2,2,65
|
||||
4,2,62
|
||||
1,2,61
|
||||
0,3,53
|
||||
1,1,58
|
||||
1,2,64
|
||||
3,2,65
|
||||
1,1,56
|
||||
1,1,63
|
||||
3,2,57
|
||||
2,2,72
|
||||
0,3,66
|
||||
0,2,62
|
||||
1,2,58
|
||||
1,2,63
|
||||
3,2,64
|
||||
2,2,72
|
||||
0,2,64
|
||||
6,2,69
|
||||
4,2,70
|
||||
1,2,66
|
||||
0,1,58
|
||||
2,2,62
|
||||
2,2,67
|
||||
1,2,64
|
||||
2,2,63
|
||||
1,2,68
|
||||
1,2,75
|
||||
0,2,69
|
||||
1,2,65
|
||||
2,2,71
|
||||
5,2,71
|
||||
1,2,60
|
||||
2,2,71
|
||||
2,3,75
|
||||
1,2,71
|
||||
0,2,72
|
||||
3,2,73
|
|
@ -254,3 +254,33 @@ Dep. Variable: Outcome | No. Observations: 240
|
||||
|
||||
assert result.summary() == expected_summary
|
||||
assert result._repr_html_() == '<table><caption>Penalised Logistic Regression Results</caption><tr><th>Dep. Variable:</th><td>Outcome</td><th>No. Observations:</th><td>240</td></tr><tr><th>Model:</th><td>Penalised Logit</td><th>Df. Model:</th><td>1</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.37</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>LL-Model:</th><td>-66.43</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Null:</th><td>-105.91</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td><0.001*</td></tr></table><table><tr><th></th><th style="text-align:center"><i>β</i></th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>-2.28</td><td style="padding-right:0">(-2.77</td><td>–</td><td style="padding-left:0">-1.85)</td><td style="text-align:left"><0.001*</td></tr><tr><th>Pred</th><td>5.99</td><td style="padding-right:0">(3.95</td><td>–</td><td style="padding-left:0">10.85)</td><td style="text-align:left"><0.001*</td></tr></table>'.format(result.fitted_dt)
|
||||
|
||||
def test_regress_poisson_ucla():
|
||||
"""Compare yli.regress with yli.Poisson for https://stats.oarc.ucla.edu/r/dae/poisson-regression/"""
|
||||
|
||||
df = pd.read_csv('tests/data/ucla_poisson_sim.csv')
|
||||
|
||||
result = yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
|
||||
|
||||
expected_summary = ''' Poisson Regression Results
|
||||
=======================================================
|
||||
Dep. Variable: num_awards | No. Observations: 200
|
||||
Model: Poisson | Df. Model: 3
|
||||
Date: {0:%Y-%m-%d} | Df. Residuals: 196
|
||||
Time: {0:%H:%M:%S} | Pseudo R²: 0.21
|
||||
Std. Errors: Non-Robust | LL-Model: -182.75
|
||||
| LL-Null: -231.86
|
||||
| p (LR): <0.001*
|
||||
=======================================================
|
||||
exp(β) (95% CI) p
|
||||
--------------------------------------------
|
||||
(Intercept) 0.01 (0.00 - 0.02) <0.001*
|
||||
prog
|
||||
1 Ref.
|
||||
2 2.96 (1.46 - 5.97) 0.002*
|
||||
3 1.45 (0.61 - 3.44) 0.40
|
||||
math 1.07 (1.05 - 1.10) <0.001*
|
||||
--------------------------------------------'''.format(result.fitted_dt)
|
||||
|
||||
assert result.summary() == expected_summary
|
||||
assert result._repr_html_() == '<table><caption>Poisson Regression Results</caption><tr><th>Dep. Variable:</th><td>num_awards</td><th>No. Observations:</th><td>200</td></tr><tr><th>Model:</th><td>Poisson</td><th>Df. Model:</th><td>3</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Df. Residuals:</th><td>196</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.21</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Model:</th><td>-182.75</td></tr><tr><th></th><td></td><th>LL-Null:</th><td>-231.86</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td><0.001*</td></tr></table><table><tr><th></th><th style="text-align:center">exp(<i>β</i>)</th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>0.01</td><td style="padding-right:0">(0.00</td><td>–</td><td style="padding-left:0">0.02)</td><td style="text-align:left"><0.001*</td></tr><tr><th>prog</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">1</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">2</td><td>2.96</td><td style="padding-right:0">(1.46</td><td>–</td><td style="padding-left:0">5.97)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.002*</td></tr><tr><td style="text-align:right;font-style:italic">3</td><td>1.45</td><td style="padding-right:0">(0.61</td><td>–</td><td style="padding-left:0">3.44)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.40</td></tr><tr><th>math</th><td>1.07</td><td style="padding-right:0">(1.05</td><td>–</td><td style="padding-left:0">1.10)</td><td style="text-align:left"><0.001*</td></tr></table>'.format(result.fitted_dt)
|
||||
|
@ -820,7 +820,7 @@ class Logit(RegressionModel):
|
||||
|
||||
The output summarises the results of the regression.
|
||||
Note that the parameter estimates are automatically exponentiated.
|
||||
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
|
||||
For example, the adjusted odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
|
||||
"""
|
||||
|
||||
@property
|
||||
@ -888,7 +888,7 @@ class OLS(RegressionModel):
|
||||
----------------------------------------------
|
||||
|
||||
The output summarises the results of the regression.
|
||||
For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
|
||||
For example, the adjusted mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
|
||||
"""
|
||||
|
||||
@property
|
||||
@ -1210,9 +1210,39 @@ class PenalisedLogit(RegressionModel):
|
||||
class Poisson(RegressionModel):
|
||||
"""
|
||||
Poisson regression
|
||||
"""
|
||||
|
||||
# TODO: Document example
|
||||
**Example:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
df = pd.DataFrame(...)
|
||||
yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
Poisson Regression Results
|
||||
=======================================================
|
||||
Dep. Variable: num_awards | No. Observations: 200
|
||||
Model: Poisson | Df. Model: 3
|
||||
Date: 2023-04-22 | Df. Residuals: 196
|
||||
Time: 16:58:21 | Pseudo R²: 0.21
|
||||
Std. Errors: Non-Robust | LL-Model: -182.75
|
||||
| LL-Null: -231.86
|
||||
| p (LR): <0.001*
|
||||
=======================================================
|
||||
exp(β) (95% CI) p
|
||||
--------------------------------------------
|
||||
(Intercept) 0.01 (0.00 - 0.02) <0.001*
|
||||
prog
|
||||
1 Ref.
|
||||
2 2.96 (1.46 - 5.97) 0.002*
|
||||
3 1.45 (0.61 - 3.44) 0.40
|
||||
math 1.07 (1.05 - 1.10) <0.001*
|
||||
--------------------------------------------
|
||||
|
||||
The output summarises the results of the regression.
|
||||
For example, the adjusted incidence rate ratio in "num_awards" per unit increase in "math" is 1.07, with 95% confidence interval 1.05–1.10, and is significant with *p* value < 0.001.
|
||||
"""
|
||||
|
||||
@property
|
||||
def model_long_name(self):
|
||||
|
Loading…
Reference in New Issue
Block a user