Add test case for Poisson regression

2023-04-22 17:08:54 +10:00 · 2023-04-22 17:08:54 +10:00 · 138e31751a
commit 138e31751a
parent 3c22fe4197
3 changed files with 265 additions and 4 deletions
--- a/tests/data/ucla_poisson_sim.csv
+++ b/tests/data/ucla_poisson_sim.csv
@ -0,0 +1,201 @@
 num_awards,prog,math
 0,3,41
 0,1,41
 0,3,44
 0,3,42
 0,3,40
 0,1,42
 0,3,46
 0,3,40
 0,3,33
 0,3,46
 0,3,40
 0,2,38
 0,3,44
 0,3,37
 0,3,40
 0,1,39
 0,1,43
 0,3,38
 0,2,45
 0,3,39
 0,1,42
 0,3,45
 0,3,40
 0,3,40
 0,2,43
 0,2,49
 1,1,44
 1,1,46
 0,2,46
 1,2,41
 0,1,42
 0,3,47
 1,2,43
 0,2,49
 1,3,40
 1,3,39
 0,3,49
 0,2,42
 0,1,43
 0,2,41
 0,1,57
 0,2,50
 0,2,44
 0,3,39
 0,2,52
 1,3,41
 0,3,39
 0,1,39
 0,3,52
 1,2,48
 0,2,45
 0,3,40
 0,1,46
 3,2,50
 0,2,43
 0,1,43
 1,2,54
 1,1,42
 0,3,45
 0,1,54
 0,3,47
 1,2,45
 0,2,43
 1,3,40
 0,3,52
 1,3,52
 0,1,45
 0,2,49
 1,2,53
 0,1,54
 2,2,50
 0,2,48
 0,2,51
 0,1,41
 1,3,51
 0,1,46
 0,3,55
 0,1,46
 0,1,61
 0,2,45
 0,3,54
 0,2,54
 1,2,51
 0,1,49
 0,3,56
 0,2,53
 1,1,49
 0,1,57
 0,1,42
 0,3,46
 0,3,45
 0,1,55
 0,3,47
 0,2,50
 0,3,41
 0,1,52
 1,2,57
 0,3,53
 1,3,51
 0,1,57
 0,2,51
 0,3,40
 0,3,50
 0,3,57
 0,1,35
 0,2,49
 0,1,50
 1,1,55
 0,2,48
 0,2,52
 1,2,53
 1,2,59
 2,2,66
 0,2,57
 1,2,54
 0,3,57
 1,2,54
 0,2,54
 0,2,54
 2,2,49
 1,3,51
 3,2,58
 1,2,60
 0,2,51
 0,2,63
 1,2,50
 1,2,53
 0,2,53
 0,2,55
 1,2,56
 0,2,49
 1,2,61
 0,1,49
 0,2,57
 0,2,66
 0,1,56
 3,2,60
 1,2,57
 0,2,59
 0,2,58
 0,2,57
 3,2,64
 0,1,60
 0,1,54
 1,2,61
 1,1,58
 0,1,56
 0,1,60
 0,2,55
 3,2,57
 0,2,62
 0,2,56
 0,2,51
 2,3,56
 0,1,61
 1,2,63
 5,2,61
 0,2,67
 0,2,48
 1,2,61
 0,1,57
 0,1,48
 2,2,65
 4,2,62
 1,2,61
 0,3,53
 1,1,58
 1,2,64
 3,2,65
 1,1,56
 1,1,63
 3,2,57
 2,2,72
 0,3,66
 0,2,62
 1,2,58
 1,2,63
 3,2,64
 2,2,72
 0,2,64
 6,2,69
 4,2,70
 1,2,66
 0,1,58
 2,2,62
 2,2,67
 1,2,64
 2,2,63
 1,2,68
 1,2,75
 0,2,69
 1,2,65
 2,2,71
 5,2,71
 1,2,60
 2,2,71
 2,3,75
 1,2,71
 0,2,72
 3,2,73
--- a/tests/test_regress.py
+++ b/tests/test_regress.py
@ -254,3 +254,33 @@ Dep. Variable:         Outcome  |  No. Observations:     240
 	assert result.summary() == expected_summary
 	assert result._repr_html_() == '<table><caption>Penalised Logistic Regression Results</caption><tr><th>Dep. Variable:</th><td>Outcome</td><th>No. Observations:</th><td>240</td></tr><tr><th>Model:</th><td>Penalised Logit</td><th>Df. Model:</th><td>1</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.37</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>LL-Model:</th><td>-66.43</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Null:</th><td>-105.91</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td>&lt;0.001*</td></tr></table><table><tr><th></th><th style="text-align:center"><i>β</i></th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>-2.28</td><td style="padding-right:0">(-2.77</td><td>–</td><td style="padding-left:0">-1.85)</td><td style="text-align:left">&lt;0.001*</td></tr><tr><th>Pred</th><td>5.99</td><td style="padding-right:0">(3.95</td><td>–</td><td style="padding-left:0">10.85)</td><td style="text-align:left">&lt;0.001*</td></tr></table>'.format(result.fitted_dt)
 def test_regress_poisson_ucla():
 	"""Compare yli.regress with yli.Poisson for https://stats.oarc.ucla.edu/r/dae/poisson-regression/"""
 	df = pd.read_csv('tests/data/ucla_poisson_sim.csv')
 	result = yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
 	expected_summary = '''               Poisson Regression Results              
 =======================================================
 Dep. Variable: num_awards  |  No. Observations:     200
        Model:    Poisson  |         Df. Model:       3
         Date: {0:%Y-%m-%d}  |     Df. Residuals:     196
         Time:   {0:%H:%M:%S}  |         Pseudo R²:    0.21
  Std. Errors: Non-Robust  |          LL-Model: -182.75
                           |           LL-Null: -231.86
                           |            p (LR): <0.001*
 =======================================================
              exp(β)   (95% CI)         p   
 --------------------------------------------
 (Intercept)     0.01 (0.00 - 0.02)   <0.001*
       prog                                 
          1     Ref.                        
          2     2.96 (1.46 - 5.97)    0.002*
          3     1.45 (0.61 - 3.44)    0.40  
       math     1.07 (1.05 - 1.10)   <0.001*
 --------------------------------------------'''.format(result.fitted_dt)
 	assert result.summary() == expected_summary
 	assert result._repr_html_() == '<table><caption>Poisson Regression Results</caption><tr><th>Dep. Variable:</th><td>num_awards</td><th>No. Observations:</th><td>200</td></tr><tr><th>Model:</th><td>Poisson</td><th>Df. Model:</th><td>3</td></tr><tr><th>Date:</th><td>{0:%Y-%m-%d}</td><th>Df. Residuals:</th><td>196</td></tr><tr><th>Time:</th><td>{0:%H:%M:%S}</td><th>Pseudo <i>R</i><sup>2</sup>:</th><td>0.21</td></tr><tr><th>Std. Errors:</th><td>Non-Robust</td><th>LL-Model:</th><td>-182.75</td></tr><tr><th></th><td></td><th>LL-Null:</th><td>-231.86</td></tr><tr><th></th><td></td><th><i>p</i> (LR):</th><td>&lt;0.001*</td></tr></table><table><tr><th></th><th style="text-align:center">exp(<i>β</i>)</th><th colspan="3" style="text-align:center">(95% CI)</th><th style="text-align:center"><i>p</i></th></tr><tr><th>(Intercept)</th><td>0.01</td><td style="padding-right:0">(0.00</td><td>–</td><td style="padding-left:0">0.02)</td><td style="text-align:left">&lt;0.001*</td></tr><tr><th>prog</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">1</td><td>Ref.</td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr><tr><td style="text-align:right;font-style:italic">2</td><td>2.96</td><td style="padding-right:0">(1.46</td><td>–</td><td style="padding-left:0">5.97)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.002*</td></tr><tr><td style="text-align:right;font-style:italic">3</td><td>1.45</td><td style="padding-right:0">(0.61</td><td>–</td><td style="padding-left:0">3.44)</td><td style="text-align:left"><span style="visibility:hidden">=</span>0.40</td></tr><tr><th>math</th><td>1.07</td><td style="padding-right:0">(1.05</td><td>–</td><td style="padding-left:0">1.10)</td><td style="text-align:left">&lt;0.001*</td></tr></table>'.format(result.fitted_dt)
--- a/yli/regress.py
+++ b/yli/regress.py
@ -820,7 +820,7 @@ class Logit(RegressionModel):
 	The output summarises the results of the regression.
 	Note that the parameter estimates are automatically exponentiated.
-	For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
+	For example, the adjusted odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
 	"""
 	@property
@ -888,7 +888,7 @@ class OLS(RegressionModel):
 		----------------------------------------------
 	The output summarises the results of the regression.
-	For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
+	For example, the adjusted mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
 	"""
 	@property
@ -1210,9 +1210,39 @@ class PenalisedLogit(RegressionModel):
 class Poisson(RegressionModel):
 	"""
 	Poisson regression
 	"""
-	# TODO: Document example
+	**Example:**
 	.. code-block::
 		df = pd.DataFrame(...)
 		yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math')
 	.. code-block:: text
 		               Poisson Regression Results              
 		=======================================================
 		Dep. Variable: num_awards  |  No. Observations:     200
 		        Model:    Poisson  |         Df. Model:       3
 		         Date: 2023-04-22  |     Df. Residuals:     196
 		         Time:   16:58:21  |         Pseudo R²:    0.21
 		  Std. Errors: Non-Robust  |          LL-Model: -182.75
 		                           |           LL-Null: -231.86
 		                           |            p (LR): <0.001*
 		=======================================================
 		              exp(β)   (95% CI)         p   
 		--------------------------------------------
 		(Intercept)     0.01 (0.00 - 0.02)   <0.001*
 		       prog                                 
 		          1     Ref.                        
 		          2     2.96 (1.46 - 5.97)    0.002*
 		          3     1.45 (0.61 - 3.44)    0.40  
 		       math     1.07 (1.05 - 1.10)   <0.001*
 		--------------------------------------------
 	The output summarises the results of the regression.
 	For example, the adjusted incidence rate ratio in "num_awards" per unit increase in "math" is 1.07, with 95% confidence interval 1.05–1.10, and is significant with *p* value < 0.001.
 	"""
 	@property
 	def model_long_name(self):