Add test case for Poisson regression

This commit is contained in:
RunasSudo 2023-04-22 17:08:54 +10:00
parent 3c22fe4197
commit 138e31751a
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
3 changed files with 265 additions and 4 deletions

View File

@ -0,0 +1,201 @@
num_awards,prog,math
0,3,41
0,1,41
0,3,44
0,3,42
0,3,40
0,1,42
0,3,46
0,3,40
0,3,33
0,3,46
0,3,40
0,2,38
0,3,44
0,3,37
0,3,40
0,1,39
0,1,43
0,3,38
0,2,45
0,3,39
0,1,42
0,3,45
0,3,40
0,3,40
0,2,43
0,2,49
1,1,44
1,1,46
0,2,46
1,2,41
0,1,42
0,3,47
1,2,43
0,2,49
1,3,40
1,3,39
0,3,49
0,2,42
0,1,43
0,2,41
0,1,57
0,2,50
0,2,44
0,3,39
0,2,52
1,3,41
0,3,39
0,1,39
0,3,52
1,2,48
0,2,45
0,3,40
0,1,46
3,2,50
0,2,43
0,1,43
1,2,54
1,1,42
0,3,45
0,1,54
0,3,47
1,2,45
0,2,43
1,3,40
0,3,52
1,3,52
0,1,45
0,2,49
1,2,53
0,1,54
2,2,50
0,2,48
0,2,51
0,1,41
1,3,51
0,1,46
0,3,55
0,1,46
0,1,61
0,2,45
0,3,54
0,2,54
1,2,51
0,1,49
0,3,56
0,2,53
1,1,49
0,1,57
0,1,42
0,3,46
0,3,45
0,1,55
0,3,47
0,2,50
0,3,41
0,1,52
1,2,57
0,3,53
1,3,51
0,1,57
0,2,51
0,3,40
0,3,50
0,3,57
0,1,35
0,2,49
0,1,50
1,1,55
0,2,48
0,2,52
1,2,53
1,2,59
2,2,66
0,2,57
1,2,54
0,3,57
1,2,54
0,2,54
0,2,54
2,2,49
1,3,51
3,2,58
1,2,60
0,2,51
0,2,63
1,2,50
1,2,53
0,2,53
0,2,55
1,2,56
0,2,49
1,2,61
0,1,49
0,2,57
0,2,66
0,1,56
3,2,60
1,2,57
0,2,59
0,2,58
0,2,57
3,2,64
0,1,60
0,1,54
1,2,61
1,1,58
0,1,56
0,1,60
0,2,55
3,2,57
0,2,62
0,2,56
0,2,51
2,3,56
0,1,61
1,2,63
5,2,61
0,2,67
0,2,48
1,2,61
0,1,57
0,1,48
2,2,65
4,2,62
1,2,61
0,3,53
1,1,58
1,2,64
3,2,65
1,1,56
1,1,63
3,2,57
2,2,72
0,3,66
0,2,62
1,2,58
1,2,63
3,2,64
2,2,72
0,2,64
6,2,69
4,2,70
1,2,66
0,1,58
2,2,62
2,2,67
1,2,64
2,2,63
1,2,68
1,2,75
0,2,69
1,2,65
2,2,71
5,2,71
1,2,60
2,2,71
2,3,75
1,2,71
0,2,72
3,2,73
1 num_awards prog math
2 0 3 41
3 0 1 41
4 0 3 44
5 0 3 42
6 0 3 40
7 0 1 42
8 0 3 46
9 0 3 40
10 0 3 33
11 0 3 46
12 0 3 40
13 0 2 38
14 0 3 44
15 0 3 37
16 0 3 40
17 0 1 39
18 0 1 43
19 0 3 38
20 0 2 45
21 0 3 39
22 0 1 42
23 0 3 45
24 0 3 40
25 0 3 40
26 0 2 43
27 0 2 49
28 1 1 44
29 1 1 46
30 0 2 46
31 1 2 41
32 0 1 42
33 0 3 47
34 1 2 43
35 0 2 49
36 1 3 40
37 1 3 39
38 0 3 49
39 0 2 42
40 0 1 43
41 0 2 41
42 0 1 57
43 0 2 50
44 0 2 44
45 0 3 39
46 0 2 52
47 1 3 41
48 0 3 39
49 0 1 39
50 0 3 52
51 1 2 48
52 0 2 45
53 0 3 40
54 0 1 46
55 3 2 50
56 0 2 43
57 0 1 43
58 1 2 54
59 1 1 42
60 0 3 45
61 0 1 54
62 0 3 47
63 1 2 45
64 0 2 43
65 1 3 40
66 0 3 52
67 1 3 52
68 0 1 45
69 0 2 49
70 1 2 53
71 0 1 54
72 2 2 50
73 0 2 48
74 0 2 51
75 0 1 41
76 1 3 51
77 0 1 46
78 0 3 55
79 0 1 46
80 0 1 61
81 0 2 45
82 0 3 54
83 0 2 54
84 1 2 51
85 0 1 49
86 0 3 56
87 0 2 53
88 1 1 49
89 0 1 57
90 0 1 42
91 0 3 46
92 0 3 45
93 0 1 55
94 0 3 47
95 0 2 50
96 0 3 41
97 0 1 52
98 1 2 57
99 0 3 53
100 1 3 51
101 0 1 57
102 0 2 51
103 0 3 40
104 0 3 50
105 0 3 57
106 0 1 35
107 0 2 49
108 0 1 50
109 1 1 55
110 0 2 48
111 0 2 52
112 1 2 53
113 1 2 59
114 2 2 66
115 0 2 57
116 1 2 54
117 0 3 57
118 1 2 54
119 0 2 54
120 0 2 54
121 2 2 49
122 1 3 51
123 3 2 58
124 1 2 60
125 0 2 51
126 0 2 63
127 1 2 50
128 1 2 53
129 0 2 53
130 0 2 55
131 1 2 56
132 0 2 49
133 1 2 61
134 0 1 49
135 0 2 57
136 0 2 66
137 0 1 56
138 3 2 60
139 1 2 57
140 0 2 59
141 0 2 58
142 0 2 57
143 3 2 64
144 0 1 60
145 0 1 54
146 1 2 61
147 1 1 58
148 0 1 56
149 0 1 60
150 0 2 55
151 3 2 57
152 0 2 62
153 0 2 56
154 0 2 51
155 2 3 56
156 0 1 61
157 1 2 63
158 5 2 61
159 0 2 67
160 0 2 48
161 1 2 61
162 0 1 57
163 0 1 48
164 2 2 65
165 4 2 62
166 1 2 61
167 0 3 53
168 1 1 58
169 1 2 64
170 3 2 65
171 1 1 56
172 1 1 63
173 3 2 57
174 2 2 72
175 0 3 66
176 0 2 62
177 1 2 58
178 1 2 63
179 3 2 64
180 2 2 72
181 0 2 64
182 6 2 69
183 4 2 70
184 1 2 66
185 0 1 58
186 2 2 62
187 2 2 67
188 1 2 64
189 2 2 63
190 1 2 68
191 1 2 75
192 0 2 69
193 1 2 65
194 2 2 71
195 5 2 71
196 1 2 60
197 2 2 71
198 2 3 75
199 1 2 71
200 0 2 72
201 3 2 73

View File

@ -254,3 +254,33 @@ Dep. Variable: Outcome | No. Observations: 240
assert result.summary() == expected_summary
assert result._repr_html_() == '<table><caption>Penalised Logistic Regression Results</caption><tr><th>Dep. Variable:</th><td>Outcome</td><th>No. Observations:</th><td>240</td></tr><tr><th>Model:</th><td>Penalised Logit</td><th>Df. Model:</th><td>1</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.37</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>LL-Model:</th><td>-66.43</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Null:</th><td>-105.91</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td>&lt;0.001*</td></tr></table><table><tr><th></th><th style="text-align:center"><i>β</i></th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>-2.28</td><td style="padding-right:0">(-2.77</td><td>–</td><td style="padding-left:0">-1.85)</td><td style="text-align:left">&lt;0.001*</td></tr><tr><th>Pred</th><td>5.99</td><td style="padding-right:0">(3.95</td><td>–</td><td style="padding-left:0">10.85)</td><td style="text-align:left">&lt;0.001*</td></tr></table>'.format(result.fitted_dt)
def test_regress_poisson_ucla():
"""Compare yli.regress with yli.Poisson for https://stats.oarc.ucla.edu/r/dae/poisson-regression/"""
df = pd.read_csv('tests/data/ucla_poisson_sim.csv')
result = yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
expected_summary = ''' Poisson Regression Results
=======================================================
Dep. Variable: num_awards | No. Observations: 200
Model: Poisson | Df. Model: 3
Date: {0:%Y-%m-%d} | Df. Residuals: 196
Time: {0:%H:%M:%S} | Pseudo : 0.21
Std. Errors: Non-Robust | LL-Model: -182.75
| LL-Null: -231.86
| p (LR): <0.001*
=======================================================
exp(β) (95% CI) p
--------------------------------------------
(Intercept) 0.01 (0.00 - 0.02) <0.001*
prog
1 Ref.
2 2.96 (1.46 - 5.97) 0.002*
3 1.45 (0.61 - 3.44) 0.40
math 1.07 (1.05 - 1.10) <0.001*
--------------------------------------------'''.format(result.fitted_dt)
assert result.summary() == expected_summary
assert result._repr_html_() == '<table><caption>Poisson Regression Results</caption><tr><th>Dep. Variable:</th><td>num_awards</td><th>No. Observations:</th><td>200</td></tr><tr><th>Model:</th><td>Poisson</td><th>Df. Model:</th><td>3</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Df. Residuals:</th><td>196</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.21</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Model:</th><td>-182.75</td></tr><tr><th></th><td></td><th>LL-Null:</th><td>-231.86</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td>&lt;0.001*</td></tr></table><table><tr><th></th><th style="text-align:center">exp(<i>β</i>)</th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>0.01</td><td style="padding-right:0">(0.00</td><td>–</td><td style="padding-left:0">0.02)</td><td style="text-align:left">&lt;0.001*</td></tr><tr><th>prog</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">1</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">2</td><td>2.96</td><td style="padding-right:0">(1.46</td><td>–</td><td style="padding-left:0">5.97)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.002*</td></tr><tr><td style="text-align:right;font-style:italic">3</td><td>1.45</td><td style="padding-right:0">(0.61</td><td>–</td><td style="padding-left:0">3.44)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.40</td></tr><tr><th>math</th><td>1.07</td><td style="padding-right:0">(1.05</td><td>–</td><td style="padding-left:0">1.10)</td><td style="text-align:left">&lt;0.001*</td></tr></table>'.format(result.fitted_dt)

View File

@ -820,7 +820,7 @@ class Logit(RegressionModel):
The output summarises the results of the regression.
Note that the parameter estimates are automatically exponentiated.
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.0145.79, and is significant with *p* value 0.049.
For example, the adjusted odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.0145.79, and is significant with *p* value 0.049.
"""
@property
@ -888,7 +888,7 @@ class OLS(RegressionModel):
----------------------------------------------
The output summarises the results of the regression.
For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.050.41, and is significant with *p* value 0.02.
For example, the adjusted mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.050.41, and is significant with *p* value 0.02.
"""
@property
@ -1210,9 +1210,39 @@ class PenalisedLogit(RegressionModel):
class Poisson(RegressionModel):
"""
Poisson regression
"""
# TODO: Document example
**Example:**
.. code-block::
df = pd.DataFrame(...)
yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
.. code-block:: text
Poisson Regression Results
=======================================================
Dep. Variable: num_awards | No. Observations: 200
Model: Poisson | Df. Model: 3
Date: 2023-04-22 | Df. Residuals: 196
Time: 16:58:21 | Pseudo : 0.21
Std. Errors: Non-Robust | LL-Model: -182.75
| LL-Null: -231.86
| p (LR): <0.001*
=======================================================
exp(β) (95% CI) p
--------------------------------------------
(Intercept) 0.01 (0.00 - 0.02) <0.001*
prog
1 Ref.
2 2.96 (1.46 - 5.97) 0.002*
3 1.45 (0.61 - 3.44) 0.40
math 1.07 (1.05 - 1.10) <0.001*
--------------------------------------------
The output summarises the results of the regression.
For example, the adjusted incidence rate ratio in "num_awards" per unit increase in "math" is 1.07, with 95% confidence interval 1.051.10, and is significant with *p* value < 0.001.
"""
@property
def model_long_name(self):