Add test case for Poisson regression

2023-04-22 17:08:54 +10:00 · 2023-04-22 17:08:54 +10:00 · 138e31751a
commit 138e31751a
parent 3c22fe4197
3 changed files with 265 additions and 4 deletions
--- a/tests/data/ucla_poisson_sim.csv
+++ b/tests/data/ucla_poisson_sim.csv
@ -0,0 +1,201 @@
+num_awards,prog,math
+0,3,41
+0,1,41
+0,3,44
+0,3,42
+0,3,40
+0,1,42
+0,3,46
+0,3,40
+0,3,33
+0,3,46
+0,3,40
+0,2,38
+0,3,44
+0,3,37
+0,3,40
+0,1,39
+0,1,43
+0,3,38
+0,2,45
+0,3,39
+0,1,42
+0,3,45
+0,3,40
+0,3,40
+0,2,43
+0,2,49
+1,1,44
+1,1,46
+0,2,46
+1,2,41
+0,1,42
+0,3,47
+1,2,43
+0,2,49
+1,3,40
+1,3,39
+0,3,49
+0,2,42
+0,1,43
+0,2,41
+0,1,57
+0,2,50
+0,2,44
+0,3,39
+0,2,52
+1,3,41
+0,3,39
+0,1,39
+0,3,52
+1,2,48
+0,2,45
+0,3,40
+0,1,46
+3,2,50
+0,2,43
+0,1,43
+1,2,54
+1,1,42
+0,3,45
+0,1,54
+0,3,47
+1,2,45
+0,2,43
+1,3,40
+0,3,52
+1,3,52
+0,1,45
+0,2,49
+1,2,53
+0,1,54
+2,2,50
+0,2,48
+0,2,51
+0,1,41
+1,3,51
+0,1,46
+0,3,55
+0,1,46
+0,1,61
+0,2,45
+0,3,54
+0,2,54
+1,2,51
+0,1,49
+0,3,56
+0,2,53
+1,1,49
+0,1,57
+0,1,42
+0,3,46
+0,3,45
+0,1,55
+0,3,47
+0,2,50
+0,3,41
+0,1,52
+1,2,57
+0,3,53
+1,3,51
+0,1,57
+0,2,51
+0,3,40
+0,3,50
+0,3,57
+0,1,35
+0,2,49
+0,1,50
+1,1,55
+0,2,48
+0,2,52
+1,2,53
+1,2,59
+2,2,66
+0,2,57
+1,2,54
+0,3,57
+1,2,54
+0,2,54
+0,2,54
+2,2,49
+1,3,51
+3,2,58
+1,2,60
+0,2,51
+0,2,63
+1,2,50
+1,2,53
+0,2,53
+0,2,55
+1,2,56
+0,2,49
+1,2,61
+0,1,49
+0,2,57
+0,2,66
+0,1,56
+3,2,60
+1,2,57
+0,2,59
+0,2,58
+0,2,57
+3,2,64
+0,1,60
+0,1,54
+1,2,61
+1,1,58
+0,1,56
+0,1,60
+0,2,55
+3,2,57
+0,2,62
+0,2,56
+0,2,51
+2,3,56
+0,1,61
+1,2,63
+5,2,61
+0,2,67
+0,2,48
+1,2,61
+0,1,57
+0,1,48
+2,2,65
+4,2,62
+1,2,61
+0,3,53
+1,1,58
+1,2,64
+3,2,65
+1,1,56
+1,1,63
+3,2,57
+2,2,72
+0,3,66
+0,2,62
+1,2,58
+1,2,63
+3,2,64
+2,2,72
+0,2,64
+6,2,69
+4,2,70
+1,2,66
+0,1,58
+2,2,62
+2,2,67
+1,2,64
+2,2,63
+1,2,68
+1,2,75
+0,2,69
+1,2,65
+2,2,71
+5,2,71
+1,2,60
+2,2,71
+2,3,75
+1,2,71
+0,2,72
+3,2,73
--- a/tests/test_regress.py
+++ b/tests/test_regress.py
@ -254,3 +254,33 @@ Dep. Variable:         Outcome  |  No. Observations:     240
 	
 	assert result.summary() == expected_summary
 	assert result._repr_html_() == '<table><caption>Penalised Logistic Regression Results</caption><tr><th>Dep. Variable:</th><td>Outcome</td><th>No. Observations:</th><td>240</td></tr><tr><th>Model:</th><td>Penalised Logit</td><th>Df. Model:</th><td>1</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.37</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>LL-Model:</th><td>-66.43</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Null:</th><td>-105.91</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td>&lt;0.001*</td></tr></table><table><tr><th></th><th style="text-align:center"><i>β</i></th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>-2.28</td><td style="padding-right:0">(-2.77</td><td>–</td><td style="padding-left:0">-1.85)</td><td style="text-align:left">&lt;0.001*</td></tr><tr><th>Pred</th><td>5.99</td><td style="padding-right:0">(3.95</td><td>–</td><td style="padding-left:0">10.85)</td><td style="text-align:left">&lt;0.001*</td></tr></table>'.format(result.fitted_dt)
+
+def test_regress_poisson_ucla():
+	"""Compare yli.regress with yli.Poisson for https://stats.oarc.ucla.edu/r/dae/poisson-regression/"""
+	
+	df = pd.read_csv('tests/data/ucla_poisson_sim.csv')
+	
+	result = yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
+	
+	expected_summary = '''               Poisson Regression Results              
+=======================================================
+Dep. Variable: num_awards  |  No. Observations:     200
+        Model:    Poisson  |         Df. Model:       3
+         Date: {0:%Y-%m-%d}  |     Df. Residuals:     196
+         Time:   {0:%H:%M:%S}  |         Pseudo R²:    0.21
+  Std. Errors: Non-Robust  |          LL-Model: -182.75
+                           |           LL-Null: -231.86
+                           |            p (LR): <0.001*
+=======================================================
+              exp(β)   (95% CI)         p   
+--------------------------------------------
+(Intercept)     0.01 (0.00 - 0.02)   <0.001*
+       prog                                 
+          1     Ref.                        
+          2     2.96 (1.46 - 5.97)    0.002*
+          3     1.45 (0.61 - 3.44)    0.40  
+       math     1.07 (1.05 - 1.10)   <0.001*
+--------------------------------------------'''.format(result.fitted_dt)
+	
+	assert result.summary() == expected_summary
+	assert result._repr_html_() == '<table><caption>Poisson Regression Results</caption><tr><th>Dep. Variable:</th><td>num_awards</td><th>No. Observations:</th><td>200</td></tr><tr><th>Model:</th><td>Poisson</td><th>Df. Model:</th><td>3</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Df. Residuals:</th><td>196</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.21</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Model:</th><td>-182.75</td></tr><tr><th></th><td></td><th>LL-Null:</th><td>-231.86</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td>&lt;0.001*</td></tr></table><table><tr><th></th><th style="text-align:center">exp(<i>β</i>)</th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>0.01</td><td style="padding-right:0">(0.00</td><td>–</td><td style="padding-left:0">0.02)</td><td style="text-align:left">&lt;0.001*</td></tr><tr><th>prog</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">1</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">2</td><td>2.96</td><td style="padding-right:0">(1.46</td><td>–</td><td style="padding-left:0">5.97)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.002*</td></tr><tr><td style="text-align:right;font-style:italic">3</td><td>1.45</td><td style="padding-right:0">(0.61</td><td>–</td><td style="padding-left:0">3.44)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.40</td></tr><tr><th>math</th><td>1.07</td><td style="padding-right:0">(1.05</td><td>–</td><td style="padding-left:0">1.10)</td><td style="text-align:left">&lt;0.001*</td></tr></table>'.format(result.fitted_dt)
--- a/yli/regress.py
+++ b/yli/regress.py
@ -820,7 +820,7 @@ class Logit(RegressionModel):
 	
 	The output summarises the results of the regression.
 	Note that the parameter estimates are automatically exponentiated.
-	For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
+	For example, the adjusted odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
 	"""
 	
 	@property
@ -888,7 +888,7 @@ class OLS(RegressionModel):
 		----------------------------------------------
 	
 	The output summarises the results of the regression.
-	For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
+	For example, the adjusted mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
 	"""
 	
 	@property
@ -1210,9 +1210,39 @@ class PenalisedLogit(RegressionModel):
 class Poisson(RegressionModel):
 	"""
 	Poisson regression
-	"""
 	
-	# TODO: Document example
+	**Example:**
+	
+	.. code-block::
+		
+		df = pd.DataFrame(...)
+		yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
+	
+	.. code-block:: text
+		
+		               Poisson Regression Results              
+		=======================================================
+		Dep. Variable: num_awards  |  No. Observations:     200
+		        Model:    Poisson  |         Df. Model:       3
+		         Date: 2023-04-22  |     Df. Residuals:     196
+		         Time:   16:58:21  |         Pseudo R²:    0.21
+		  Std. Errors: Non-Robust  |          LL-Model: -182.75
+		                           |           LL-Null: -231.86
+		                           |            p (LR): <0.001*
+		=======================================================
+		              exp(β)   (95% CI)         p   
+		--------------------------------------------
+		(Intercept)     0.01 (0.00 - 0.02)   <0.001*
+		       prog                                 
+		          1     Ref.                        
+		          2     2.96 (1.46 - 5.97)    0.002*
+		          3     1.45 (0.61 - 3.44)    0.40  
+		       math     1.07 (1.05 - 1.10)   <0.001*
+		--------------------------------------------
+	
+	The output summarises the results of the regression.
+	For example, the adjusted incidence rate ratio in "num_awards" per unit increase in "math" is 1.07, with 95% confidence interval 1.05–1.10, and is significant with *p* value < 0.001.
+	"""
 	
 	@property
 	def model_long_name(self):