Update documentation

This commit is contained in:
RunasSudo 2023-04-16 23:52:12 +10:00
parent ac2aca7b8f
commit f1e943ca89
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
7 changed files with 118 additions and 68 deletions

View File

@ -49,9 +49,9 @@ Optional dependencies are:
* [mpmath](https://mpmath.org/), for *beta_ratio* and *beta_oddsratio* * [mpmath](https://mpmath.org/), for *beta_ratio* and *beta_oddsratio*
* [PyCryptodome](https://www.pycryptodome.org/), for *pickle_write_encrypted* and *pickle_read_encrypted* * [PyCryptodome](https://www.pycryptodome.org/), for *pickle_write_encrypted* and *pickle_read_encrypted*
* [rpy2](https://rpy2.github.io/), with R packages: * [rpy2](https://rpy2.github.io/), with R packages:
* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionResult.bayesfactor_beta_zero*) * [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionModel.bayesfactor_beta_zero*)
* [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit* * [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit*
* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionResult.shap* * [shap](https://shap.readthedocs.io/en/latest/), for *RegressionModel.shap*
## Functions ## Functions
@ -64,7 +64,6 @@ Relevant statistical functions are all directly available from the top-level *yl
* *pearsonr*: Pearson correlation coefficient *r* * *pearsonr*: Pearson correlation coefficient *r*
* *ttest_ind*: Independent 2-sample *t* test * *ttest_ind*: Independent 2-sample *t* test
* Regression: * Regression:
* *logit_then_regress*: Perform logistic regression and use the estimates as the starting values for an arbitrary regression
* *PenalisedLogit*: Model for Firth penalised logistic regression * *PenalisedLogit*: Model for Firth penalised logistic regression
* *regress*: Fit arbitrary regression models * *regress*: Fit arbitrary regression models
* *vif*: Compute the variance inflation factor for independent variables in regression * *vif*: Compute the variance inflation factor for independent variables in regression

View File

@ -4,17 +4,22 @@ Regression
Functions Functions
--------- ---------
.. autofunction:: yli.logit_then_regress .. comment
.. autofunction:: yli.logit_then_regress
.. autofunction:: yli.regress .. autofunction:: yli.regress
.. autofunction:: yli.vif .. autofunction:: yli.vif
Additional regression models Regression models
---------------------------- ----------------------------
.. autoclass:: yli.Logit
.. autoclass:: yli.OrdinalLogit .. autoclass:: yli.OrdinalLogit
.. autoclass:: yli.OLS
.. autoclass:: yli.PenalisedLogit .. autoclass:: yli.PenalisedLogit
Result classes Result classes
@ -27,7 +32,7 @@ Result classes
:members: :members:
:inherited-members: :inherited-members:
.. autoclass:: yli.regress.RegressionResult .. autoclass:: yli.regress.RegressionModel
:members: :members:
.. autoclass:: yli.shap.ShapResult .. autoclass:: yli.shap.ShapResult

View File

@ -21,7 +21,7 @@ import pandas as pd
import yli import yli
def test_afbf_logit_beta_zero(): def test_afbf_logit_beta_zero():
"""Compare RegressionResult.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack""" """Compare RegressionModel.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
df = pd.DataFrame({ df = pd.DataFrame({
'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False], 'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False],

View File

@ -64,7 +64,7 @@ somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
assert result.summary() == expected_summary assert result.summary() == expected_summary
def test_brant_ucla(): def test_brant_ucla():
"""Compare RegressionResult.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/""" """Compare RegressionModel.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
df = pd.read_stata('tests/data/ucla_ologit.dta') df = pd.read_stata('tests/data/ucla_ologit.dta')
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False) result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)

View File

@ -102,7 +102,7 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
Fit a statsmodels regression model Fit a statsmodels regression model
:param model_class: Type of regression model to fit :param model_class: Type of regression model to fit
:type model_class: :class:`RegressionModel` subclass :type model_class: :class:`yli.regress.RegressionModel` subclass
:param df: Data to perform regression on :param df: Data to perform regression on
:type df: DataFrame :type df: DataFrame
:param dep: Column in *df* for the dependent variable (numeric) :param dep: Column in *df* for the dependent variable (numeric)
@ -116,41 +116,9 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
:param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class* :param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class*
:type exp: bool :type exp: bool
:rtype: :class:`RegressionModel` :rtype: :class:`yli.regress.RegressionModel`
**Example:** **Example:** See :class:`yli.OLS`, :class:`yli.Logit`, etc.
.. code-block::
df = pd.DataFrame({
'Unhealthy': [False, False, False, ...],
'Fibrinogen': [2.52, 2.46, 2.29, ...],
'GammaGlobulin': [38, 36, 36, ...]
})
yli.regress(sm.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
.. code-block:: text
Logistic Regression Results
======================================================
Dep. Variable: Unhealthy | No. Observations: 32
Model: Logit | Df. Model: 2
Date: 2022-10-18 | Df. Residuals: 29
Time: 19:00:34 | Pseudo : 0.26
Std. Errors: Non-Robust | LL-Model: -11.47
| LL-Null: -15.44
| p (LR): 0.02*
======================================================
exp(β) (95% CI) p
-----------------------------------------------
(Intercept) 0.00 (0.00 - 0.24) 0.03*
Fibrinogen 6.80 (1.01 - 45.79) 0.049*
GammaGlobulin 1.17 (0.92 - 1.48) 0.19
-----------------------------------------------
The output summarises the results of the regression.
Note that the parameter estimates are automatically exponentiated.
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.0145.79, and is significant with *p* value 0.049.
""" """
if not any(x.__name__ == 'RegressionModel' for x in model_class.__bases__): if not any(x.__name__ == 'RegressionModel' for x in model_class.__bases__):
@ -466,7 +434,7 @@ class RegressionModel:
Uses the R *BFpack* library. Uses the R *BFpack* library.
Requires the regression to be from statsmodels. Requires the regression to be from statsmodels.
The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`RegressionResult.raw_result`. The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`SingleTerm.raw_name`.
:param term: Raw name of the term to be tested :param term: Raw name of the term to be tested
:type term: str :type term: str
@ -556,7 +524,7 @@ class LikelihoodRatioTestResult(ChiSquaredResult):
""" """
Result of a likelihood ratio test for regression Result of a likelihood ratio test for regression
See :meth:`RegressionResult.lrtest_null`. See :meth:`RegressionModel.lrtest_null`.
""" """
def __init__(self, statistic, dof, pvalue): def __init__(self, statistic, dof, pvalue):
@ -578,7 +546,7 @@ class SingleTerm:
"""A term in a :class:`RegressionModel` which is a single term""" """A term in a :class:`RegressionModel` which is a single term"""
def __init__(self, raw_name, beta, pvalue): def __init__(self, raw_name, beta, pvalue):
#: Raw name of the term (*str*; e.g. in :attr:`RegressionModel.raw_result`) #: Raw name of the term (*str*)
self.raw_name = raw_name self.raw_name = raw_name
#: :class:`yli.utils.Estimate` of the coefficient #: :class:`yli.utils.Estimate` of the coefficient
self.beta = beta self.beta = beta
@ -608,6 +576,44 @@ def raw_terms_from_statsmodels_result(raw_result):
# Concrete implementations # Concrete implementations
class Logit(RegressionModel): class Logit(RegressionModel):
"""
Logistic regression
**Example:**
.. code-block::
df = pd.DataFrame({
'Unhealthy': [False, False, False, ...],
'Fibrinogen': [2.52, 2.46, 2.29, ...],
'GammaGlobulin': [38, 36, 36, ...]
})
yli.regress(yli.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
.. code-block:: text
Logistic Regression Results
======================================================
Dep. Variable: Unhealthy | No. Observations: 32
Model: Logit | Df. Model: 2
Date: 2022-10-18 | Df. Residuals: 29
Time: 19:00:34 | Pseudo : 0.26
Std. Errors: Non-Robust | LL-Model: -11.47
| LL-Null: -15.44
| p (LR): 0.02*
======================================================
exp(β) (95% CI) p
-----------------------------------------------
(Intercept) 0.00 (0.00 - 0.24) 0.03*
Fibrinogen 6.80 (1.01 - 45.79) 0.049*
GammaGlobulin 1.17 (0.92 - 1.48) 0.19
-----------------------------------------------
The output summarises the results of the regression.
Note that the parameter estimates are automatically exponentiated.
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.0145.79, and is significant with *p* value 0.049.
"""
@property @property
def model_long_name(self): def model_long_name(self):
return 'Logistic Regression' return 'Logistic Regression'
@ -636,6 +642,46 @@ class Logit(RegressionModel):
return result return result
class OLS(RegressionModel): class OLS(RegressionModel):
"""
Ordinary least squares linear regression
**Example:**
.. code-block::
df = pd.DataFrame(...)
yli.regress(yli.OLS, df, 'LNC', 'D + T1 + T2 + S + PR + NE + CT + BW + N + PT')
.. code-block:: text
Ordinary Least Squares Regression Results
=======================================================
Dep. Variable: LNC | No. Observations: 32
Model: OLS | Df. Model: 10
Date: 2023-04-16 | Df. Residuals: 21
Time: 23:34:01 | : 0.86
Std. Errors: Non-Robust | F: 13.28
| p (F): <0.001*
=======================================================
β (95% CI) p
----------------------------------------------
(Intercept) -10.63 (-22.51 - 1.24) 0.08
D 0.23 (0.05 - 0.41) 0.02*
T1 0.01 (-0.04 - 0.05) 0.82
T2 0.01 (-0.00 - 0.02) 0.24
S 0.00 (0.00 - 0.00) <0.001*
PR -0.11 (-0.28 - 0.07) 0.21
NE 0.26 (0.09 - 0.42) 0.004*
CT 0.12 (-0.03 - 0.26) 0.12
BW 0.04 (-0.18 - 0.26) 0.73
N -0.01 (-0.03 - 0.00) 0.14
PT -0.22 (-0.49 - 0.05) 0.10
----------------------------------------------
The output summarises the results of the regression.
For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.050.41, and is significant with *p* value 0.02.
"""
@property @property
def model_long_name(self): def model_long_name(self):
return 'Ordinary Least Squares Regression' return 'Ordinary Least Squares Regression'
@ -680,16 +726,16 @@ class OrdinalLogit(RegressionModel):
.. code-block:: text .. code-block:: text
Ordinal Logistic Regression Results Ordinal Logistic Regression Results
=============================================================== ==========================================================
Dep. Variable: apply | No. Observations: 400 Dep. Variable: apply | No. Observations: 400
Model: OrdinalLogit | Df. Model: 5 Model: Ordinal Logit | Df. Model: 5
Method: Maximum Likelihood | Df. Residuals: 395 Date: 2022-12-02 | Df. Residuals: 395
Date: 2022-12-02 | Pseudo : 0.03 Time: 21:30:38 | Pseudo : 0.03
Time: 21:30:38 | LL-Model: -358.51 Std. Errors: Non-Robust | LL-Model: -358.51
Std. Errors: Non-Robust | LL-Null: -370.60 | LL-Null: -370.60
| p (LR): <0.001* | p (LR): <0.001*
=============================================================== ============================================================
β (95% CI) p β (95% CI) p
------------------------------------------------------------ ------------------------------------------------------------
pared 1.05 (0.53 - 1.57) <0.001* pared 1.05 (0.53 - 1.57) <0.001*
@ -886,15 +932,15 @@ class PenalisedLogit(RegressionModel):
.. code-block:: text .. code-block:: text
Penalised Logistic Regression Results Penalised Logistic Regression Results
========================================================= ============================================================
Dep. Variable: Outcome | No. Observations: 240 Dep. Variable: Outcome | No. Observations: 240
Model: Logit | Df. Model: 1 Model: Penalised Logit | Df. Model: 1
Method: Penalised ML | Pseudo : 0.37 Date: 2022-10-19 | Pseudo : 0.37
Date: 2022-10-19 | LL-Model: -66.43 Time: 07:50:40 | LL-Model: -66.43
Time: 07:50:40 | LL-Null: -105.91 Std. Errors: Non-Robust | LL-Null: -105.91
Std. Errors: Non-Robust | p (LR): <0.001* | p (LR): <0.001*
========================================================= ============================================================
β (95% CI) p β (95% CI) p
--------------------------------------------- ---------------------------------------------
(Intercept) -2.28 (-2.77 - -1.85) <0.001* (Intercept) -2.28 (-2.77 - -1.85) <0.001*

View File

@ -7,7 +7,7 @@ class ShapResult:
""" """
SHAP values for a regression model SHAP values for a regression model
See :meth:`yli.regress.RegressionResult.shap`. See :meth:`yli.regress.RegressionModel.shap`.
""" """
def __init__(self, model, shap_values, features): def __init__(self, model, shap_values, features):
@ -63,7 +63,7 @@ class ShapResult:
model = self.model() model = self.model()
if model is None: if model is None:
raise Exception('Referenced RegressionResult has been dropped') raise Exception('Referenced RegressionModel has been dropped')
xdata = self._get_xdata(model) xdata = self._get_xdata(model)

View File

@ -187,7 +187,7 @@ class FTestResult:
""" """
Result of an *F* test for ANOVA/regression Result of an *F* test for ANOVA/regression
See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionResult.ftest`. See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionModel.ftest`.
""" """
def __init__(self, statistic, dof1, dof2, pvalue): def __init__(self, statistic, dof1, dof2, pvalue):