diff --git a/tests/data/ucla_poisson_sim.csv b/tests/data/ucla_poisson_sim.csv new file mode 100644 index 0000000..e275be1 --- /dev/null +++ b/tests/data/ucla_poisson_sim.csv @@ -0,0 +1,201 @@ +num_awards,prog,math +0,3,41 +0,1,41 +0,3,44 +0,3,42 +0,3,40 +0,1,42 +0,3,46 +0,3,40 +0,3,33 +0,3,46 +0,3,40 +0,2,38 +0,3,44 +0,3,37 +0,3,40 +0,1,39 +0,1,43 +0,3,38 +0,2,45 +0,3,39 +0,1,42 +0,3,45 +0,3,40 +0,3,40 +0,2,43 +0,2,49 +1,1,44 +1,1,46 +0,2,46 +1,2,41 +0,1,42 +0,3,47 +1,2,43 +0,2,49 +1,3,40 +1,3,39 +0,3,49 +0,2,42 +0,1,43 +0,2,41 +0,1,57 +0,2,50 +0,2,44 +0,3,39 +0,2,52 +1,3,41 +0,3,39 +0,1,39 +0,3,52 +1,2,48 +0,2,45 +0,3,40 +0,1,46 +3,2,50 +0,2,43 +0,1,43 +1,2,54 +1,1,42 +0,3,45 +0,1,54 +0,3,47 +1,2,45 +0,2,43 +1,3,40 +0,3,52 +1,3,52 +0,1,45 +0,2,49 +1,2,53 +0,1,54 +2,2,50 +0,2,48 +0,2,51 +0,1,41 +1,3,51 +0,1,46 +0,3,55 +0,1,46 +0,1,61 +0,2,45 +0,3,54 +0,2,54 +1,2,51 +0,1,49 +0,3,56 +0,2,53 +1,1,49 +0,1,57 +0,1,42 +0,3,46 +0,3,45 +0,1,55 +0,3,47 +0,2,50 +0,3,41 +0,1,52 +1,2,57 +0,3,53 +1,3,51 +0,1,57 +0,2,51 +0,3,40 +0,3,50 +0,3,57 +0,1,35 +0,2,49 +0,1,50 +1,1,55 +0,2,48 +0,2,52 +1,2,53 +1,2,59 +2,2,66 +0,2,57 +1,2,54 +0,3,57 +1,2,54 +0,2,54 +0,2,54 +2,2,49 +1,3,51 +3,2,58 +1,2,60 +0,2,51 +0,2,63 +1,2,50 +1,2,53 +0,2,53 +0,2,55 +1,2,56 +0,2,49 +1,2,61 +0,1,49 +0,2,57 +0,2,66 +0,1,56 +3,2,60 +1,2,57 +0,2,59 +0,2,58 +0,2,57 +3,2,64 +0,1,60 +0,1,54 +1,2,61 +1,1,58 +0,1,56 +0,1,60 +0,2,55 +3,2,57 +0,2,62 +0,2,56 +0,2,51 +2,3,56 +0,1,61 +1,2,63 +5,2,61 +0,2,67 +0,2,48 +1,2,61 +0,1,57 +0,1,48 +2,2,65 +4,2,62 +1,2,61 +0,3,53 +1,1,58 +1,2,64 +3,2,65 +1,1,56 +1,1,63 +3,2,57 +2,2,72 +0,3,66 +0,2,62 +1,2,58 +1,2,63 +3,2,64 +2,2,72 +0,2,64 +6,2,69 +4,2,70 +1,2,66 +0,1,58 +2,2,62 +2,2,67 +1,2,64 +2,2,63 +1,2,68 +1,2,75 +0,2,69 +1,2,65 +2,2,71 +5,2,71 +1,2,60 +2,2,71 +2,3,75 +1,2,71 +0,2,72 +3,2,73 diff --git a/tests/test_regress.py b/tests/test_regress.py index 8f5200c..69ee28e 100644 --- a/tests/test_regress.py +++ b/tests/test_regress.py @@ -254,3 +254,33 @@ Dep. Variable: Outcome | No. Observations: 240 assert result.summary() == expected_summary assert result._repr_html_() == '
Penalised Logistic Regression Results
Dep. Variable:OutcomeNo. Observations:240
Model:Penalised LogitDf. Model:1
Date:{0:%Y-%m-%d}Pseudo R2:0.37
Time:{0:%H:%M:%S}LL-Model:-66.43
Std. Errors:Non-RobustLL-Null:-105.91
p (LR):<0.001*
β(95% CI)p
(Intercept)-2.28(-2.77-1.85)<0.001*
Pred5.99(3.9510.85)<0.001*
'.format(result.fitted_dt) + +def test_regress_poisson_ucla(): + """Compare yli.regress with yli.Poisson for https://stats.oarc.ucla.edu/r/dae/poisson-regression/""" + + df = pd.read_csv('tests/data/ucla_poisson_sim.csv') + + result = yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math') + + expected_summary = ''' Poisson Regression Results +======================================================= +Dep. Variable: num_awards | No. Observations: 200 + Model: Poisson | Df. Model: 3 + Date: {0:%Y-%m-%d} | Df. Residuals: 196 + Time: {0:%H:%M:%S} | Pseudo R²: 0.21 + Std. Errors: Non-Robust | LL-Model: -182.75 + | LL-Null: -231.86 + | p (LR): <0.001* +======================================================= + exp(β) (95% CI) p +-------------------------------------------- +(Intercept) 0.01 (0.00 - 0.02) <0.001* + prog + 1 Ref. + 2 2.96 (1.46 - 5.97) 0.002* + 3 1.45 (0.61 - 3.44) 0.40 + math 1.07 (1.05 - 1.10) <0.001* +--------------------------------------------'''.format(result.fitted_dt) + + assert result.summary() == expected_summary + assert result._repr_html_() == '
Poisson Regression Results
Dep. Variable:num_awardsNo. Observations:200
Model:PoissonDf. Model:3
Date:{0:%Y-%m-%d}Df. Residuals:196
Time:{0:%H:%M:%S}Pseudo R2:0.21
Std. Errors:Non-RobustLL-Model:-182.75
LL-Null:-231.86
p (LR):<0.001*
exp(β)(95% CI)p
(Intercept)0.01(0.000.02)<0.001*
prog
1Ref.
22.96(1.465.97)=0.002*
31.45(0.613.44)=0.40
math1.07(1.051.10)<0.001*
'.format(result.fitted_dt) diff --git a/yli/regress.py b/yli/regress.py index d00436d..6d8c725 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -820,7 +820,7 @@ class Logit(RegressionModel): The output summarises the results of the regression. Note that the parameter estimates are automatically exponentiated. - For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049. + For example, the adjusted odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049. """ @property @@ -888,7 +888,7 @@ class OLS(RegressionModel): ---------------------------------------------- The output summarises the results of the regression. - For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02. + For example, the adjusted mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02. """ @property @@ -1210,9 +1210,39 @@ class PenalisedLogit(RegressionModel): class Poisson(RegressionModel): """ Poisson regression - """ - # TODO: Document example + **Example:** + + .. code-block:: + + df = pd.DataFrame(...) + yli.regress(yli.Poisson, df, 'num_awards', 'C(prog) + math') + + .. code-block:: text + + Poisson Regression Results + ======================================================= + Dep. Variable: num_awards | No. Observations: 200 + Model: Poisson | Df. Model: 3 + Date: 2023-04-22 | Df. Residuals: 196 + Time: 16:58:21 | Pseudo R²: 0.21 + Std. Errors: Non-Robust | LL-Model: -182.75 + | LL-Null: -231.86 + | p (LR): <0.001* + ======================================================= + exp(β) (95% CI) p + -------------------------------------------- + (Intercept) 0.01 (0.00 - 0.02) <0.001* + prog + 1 Ref. + 2 2.96 (1.46 - 5.97) 0.002* + 3 1.45 (0.61 - 3.44) 0.40 + math 1.07 (1.05 - 1.10) <0.001* + -------------------------------------------- + + The output summarises the results of the regression. + For example, the adjusted incidence rate ratio in "num_awards" per unit increase in "math" is 1.07, with 95% confidence interval 1.05–1.10, and is significant with *p* value < 0.001. + """ @property def model_long_name(self):