From f1e943ca89e39dc80e3a9e22e008ddb28def3010 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sun, 16 Apr 2023 23:52:12 +1000 Subject: [PATCH] Update documentation --- README.md | 5 +- docs/regress.rst | 11 ++- tests/test_bayes_factors.py | 2 +- tests/test_ordinallogit.py | 2 +- yli/regress.py | 160 +++++++++++++++++++++++------------- yli/shap.py | 4 +- yli/sig_tests.py | 2 +- 7 files changed, 118 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index c4036f3..ca8a5e9 100644 --- a/README.md +++ b/README.md @@ -49,9 +49,9 @@ Optional dependencies are: * [mpmath](https://mpmath.org/), for *beta_ratio* and *beta_oddsratio* * [PyCryptodome](https://www.pycryptodome.org/), for *pickle_write_encrypted* and *pickle_read_encrypted* * [rpy2](https://rpy2.github.io/), with R packages: - * [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionResult.bayesfactor_beta_zero*) + * [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionModel.bayesfactor_beta_zero*) * [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit* -* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionResult.shap* +* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionModel.shap* ## Functions @@ -64,7 +64,6 @@ Relevant statistical functions are all directly available from the top-level *yl * *pearsonr*: Pearson correlation coefficient *r* * *ttest_ind*: Independent 2-sample *t* test * Regression: - * *logit_then_regress*: Perform logistic regression and use the estimates as the starting values for an arbitrary regression * *PenalisedLogit*: Model for Firth penalised logistic regression * *regress*: Fit arbitrary regression models * *vif*: Compute the variance inflation factor for independent variables in regression diff --git a/docs/regress.rst b/docs/regress.rst index 3f70a78..b252f17 100644 --- a/docs/regress.rst +++ b/docs/regress.rst @@ -4,17 +4,22 @@ Regression Functions --------- -.. autofunction:: yli.logit_then_regress +.. comment + .. autofunction:: yli.logit_then_regress .. autofunction:: yli.regress .. autofunction:: yli.vif -Additional regression models +Regression models ---------------------------- +.. autoclass:: yli.Logit + .. autoclass:: yli.OrdinalLogit +.. autoclass:: yli.OLS + .. autoclass:: yli.PenalisedLogit Result classes @@ -27,7 +32,7 @@ Result classes :members: :inherited-members: -.. autoclass:: yli.regress.RegressionResult +.. autoclass:: yli.regress.RegressionModel :members: .. autoclass:: yli.shap.ShapResult diff --git a/tests/test_bayes_factors.py b/tests/test_bayes_factors.py index f7f5758..f099af1 100644 --- a/tests/test_bayes_factors.py +++ b/tests/test_bayes_factors.py @@ -21,7 +21,7 @@ import pandas as pd import yli def test_afbf_logit_beta_zero(): - """Compare RegressionResult.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack""" + """Compare RegressionModel.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack""" df = pd.DataFrame({ 'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False], diff --git a/tests/test_ordinallogit.py b/tests/test_ordinallogit.py index b2ab173..355cac0 100644 --- a/tests/test_ordinallogit.py +++ b/tests/test_ordinallogit.py @@ -64,7 +64,7 @@ somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001* assert result.summary() == expected_summary def test_brant_ucla(): - """Compare RegressionResult.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/""" + """Compare RegressionModel.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/""" df = pd.read_stata('tests/data/ucla_ologit.dta') result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False) diff --git a/yli/regress.py b/yli/regress.py index 303babd..4d334ed 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -102,7 +102,7 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels Fit a statsmodels regression model :param model_class: Type of regression model to fit - :type model_class: :class:`RegressionModel` subclass + :type model_class: :class:`yli.regress.RegressionModel` subclass :param df: Data to perform regression on :type df: DataFrame :param dep: Column in *df* for the dependent variable (numeric) @@ -116,41 +116,9 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels :param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class* :type exp: bool - :rtype: :class:`RegressionModel` + :rtype: :class:`yli.regress.RegressionModel` - **Example:** - - .. code-block:: - - df = pd.DataFrame({ - 'Unhealthy': [False, False, False, ...], - 'Fibrinogen': [2.52, 2.46, 2.29, ...], - 'GammaGlobulin': [38, 36, 36, ...] - }) - yli.regress(sm.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin') - - .. code-block:: text - - Logistic Regression Results - ====================================================== - Dep. Variable: Unhealthy | No. Observations: 32 - Model: Logit | Df. Model: 2 - Date: 2022-10-18 | Df. Residuals: 29 - Time: 19:00:34 | Pseudo R²: 0.26 - Std. Errors: Non-Robust | LL-Model: -11.47 - | LL-Null: -15.44 - | p (LR): 0.02* - ====================================================== - exp(β) (95% CI) p - ----------------------------------------------- - (Intercept) 0.00 (0.00 - 0.24) 0.03* - Fibrinogen 6.80 (1.01 - 45.79) 0.049* - GammaGlobulin 1.17 (0.92 - 1.48) 0.19 - ----------------------------------------------- - - The output summarises the results of the regression. - Note that the parameter estimates are automatically exponentiated. - For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049. + **Example:** See :class:`yli.OLS`, :class:`yli.Logit`, etc. """ if not any(x.__name__ == 'RegressionModel' for x in model_class.__bases__): @@ -466,7 +434,7 @@ class RegressionModel: Uses the R *BFpack* library. Requires the regression to be from statsmodels. - The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`RegressionResult.raw_result`. + The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`SingleTerm.raw_name`. :param term: Raw name of the term to be tested :type term: str @@ -556,7 +524,7 @@ class LikelihoodRatioTestResult(ChiSquaredResult): """ Result of a likelihood ratio test for regression - See :meth:`RegressionResult.lrtest_null`. + See :meth:`RegressionModel.lrtest_null`. """ def __init__(self, statistic, dof, pvalue): @@ -578,7 +546,7 @@ class SingleTerm: """A term in a :class:`RegressionModel` which is a single term""" def __init__(self, raw_name, beta, pvalue): - #: Raw name of the term (*str*; e.g. in :attr:`RegressionModel.raw_result`) + #: Raw name of the term (*str*) self.raw_name = raw_name #: :class:`yli.utils.Estimate` of the coefficient self.beta = beta @@ -608,6 +576,44 @@ def raw_terms_from_statsmodels_result(raw_result): # Concrete implementations class Logit(RegressionModel): + """ + Logistic regression + + **Example:** + + .. code-block:: + + df = pd.DataFrame({ + 'Unhealthy': [False, False, False, ...], + 'Fibrinogen': [2.52, 2.46, 2.29, ...], + 'GammaGlobulin': [38, 36, 36, ...] + }) + yli.regress(yli.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin') + + .. code-block:: text + + Logistic Regression Results + ====================================================== + Dep. Variable: Unhealthy | No. Observations: 32 + Model: Logit | Df. Model: 2 + Date: 2022-10-18 | Df. Residuals: 29 + Time: 19:00:34 | Pseudo R²: 0.26 + Std. Errors: Non-Robust | LL-Model: -11.47 + | LL-Null: -15.44 + | p (LR): 0.02* + ====================================================== + exp(β) (95% CI) p + ----------------------------------------------- + (Intercept) 0.00 (0.00 - 0.24) 0.03* + Fibrinogen 6.80 (1.01 - 45.79) 0.049* + GammaGlobulin 1.17 (0.92 - 1.48) 0.19 + ----------------------------------------------- + + The output summarises the results of the regression. + Note that the parameter estimates are automatically exponentiated. + For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049. + """ + @property def model_long_name(self): return 'Logistic Regression' @@ -636,6 +642,46 @@ class Logit(RegressionModel): return result class OLS(RegressionModel): + """ + Ordinary least squares linear regression + + **Example:** + + .. code-block:: + + df = pd.DataFrame(...) + yli.regress(yli.OLS, df, 'LNC', 'D + T1 + T2 + S + PR + NE + CT + BW + N + PT') + + .. code-block:: text + + Ordinary Least Squares Regression Results + ======================================================= + Dep. Variable: LNC | No. Observations: 32 + Model: OLS | Df. Model: 10 + Date: 2023-04-16 | Df. Residuals: 21 + Time: 23:34:01 | R²: 0.86 + Std. Errors: Non-Robust | F: 13.28 + | p (F): <0.001* + ======================================================= + β (95% CI) p + ---------------------------------------------- + (Intercept) -10.63 (-22.51 - 1.24) 0.08 + D 0.23 (0.05 - 0.41) 0.02* + T1 0.01 (-0.04 - 0.05) 0.82 + T2 0.01 (-0.00 - 0.02) 0.24 + S 0.00 (0.00 - 0.00) <0.001* + PR -0.11 (-0.28 - 0.07) 0.21 + NE 0.26 (0.09 - 0.42) 0.004* + CT 0.12 (-0.03 - 0.26) 0.12 + BW 0.04 (-0.18 - 0.26) 0.73 + N -0.01 (-0.03 - 0.00) 0.14 + PT -0.22 (-0.49 - 0.05) 0.10 + ---------------------------------------------- + + The output summarises the results of the regression. + For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02. + """ + @property def model_long_name(self): return 'Ordinary Least Squares Regression' @@ -680,16 +726,16 @@ class OrdinalLogit(RegressionModel): .. code-block:: text - Ordinal Logistic Regression Results - =============================================================== - Dep. Variable: apply | No. Observations: 400 - Model: OrdinalLogit | Df. Model: 5 - Method: Maximum Likelihood | Df. Residuals: 395 - Date: 2022-12-02 | Pseudo R²: 0.03 - Time: 21:30:38 | LL-Model: -358.51 - Std. Errors: Non-Robust | LL-Null: -370.60 - | p (LR): <0.001* - =============================================================== + Ordinal Logistic Regression Results + ========================================================== + Dep. Variable: apply | No. Observations: 400 + Model: Ordinal Logit | Df. Model: 5 + Date: 2022-12-02 | Df. Residuals: 395 + Time: 21:30:38 | Pseudo R²: 0.03 + Std. Errors: Non-Robust | LL-Model: -358.51 + | LL-Null: -370.60 + | p (LR): <0.001* + ============================================================ β (95% CI) p ------------------------------------------------------------ pared 1.05 (0.53 - 1.57) <0.001* @@ -886,15 +932,15 @@ class PenalisedLogit(RegressionModel): .. code-block:: text - Penalised Logistic Regression Results - ========================================================= - Dep. Variable: Outcome | No. Observations: 240 - Model: Logit | Df. Model: 1 - Method: Penalised ML | Pseudo R²: 0.37 - Date: 2022-10-19 | LL-Model: -66.43 - Time: 07:50:40 | LL-Null: -105.91 - Std. Errors: Non-Robust | p (LR): <0.001* - ========================================================= + Penalised Logistic Regression Results + ============================================================ + Dep. Variable: Outcome | No. Observations: 240 + Model: Penalised Logit | Df. Model: 1 + Date: 2022-10-19 | Pseudo R²: 0.37 + Time: 07:50:40 | LL-Model: -66.43 + Std. Errors: Non-Robust | LL-Null: -105.91 + | p (LR): <0.001* + ============================================================ β (95% CI) p --------------------------------------------- (Intercept) -2.28 (-2.77 - -1.85) <0.001* diff --git a/yli/shap.py b/yli/shap.py index c59ba89..9e1c4b8 100644 --- a/yli/shap.py +++ b/yli/shap.py @@ -7,7 +7,7 @@ class ShapResult: """ SHAP values for a regression model - See :meth:`yli.regress.RegressionResult.shap`. + See :meth:`yli.regress.RegressionModel.shap`. """ def __init__(self, model, shap_values, features): @@ -63,7 +63,7 @@ class ShapResult: model = self.model() if model is None: - raise Exception('Referenced RegressionResult has been dropped') + raise Exception('Referenced RegressionModel has been dropped') xdata = self._get_xdata(model) diff --git a/yli/sig_tests.py b/yli/sig_tests.py index 76950fd..359c016 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -187,7 +187,7 @@ class FTestResult: """ Result of an *F* test for ANOVA/regression - See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionResult.ftest`. + See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionModel.ftest`. """ def __init__(self, statistic, dof1, dof2, pvalue):