Update documentation
This commit is contained in:
parent
ac2aca7b8f
commit
f1e943ca89
@ -49,9 +49,9 @@ Optional dependencies are:
|
||||
* [mpmath](https://mpmath.org/), for *beta_ratio* and *beta_oddsratio*
|
||||
* [PyCryptodome](https://www.pycryptodome.org/), for *pickle_write_encrypted* and *pickle_read_encrypted*
|
||||
* [rpy2](https://rpy2.github.io/), with R packages:
|
||||
* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionResult.bayesfactor_beta_zero*)
|
||||
* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionModel.bayesfactor_beta_zero*)
|
||||
* [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit*
|
||||
* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionResult.shap*
|
||||
* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionModel.shap*
|
||||
|
||||
## Functions
|
||||
|
||||
@ -64,7 +64,6 @@ Relevant statistical functions are all directly available from the top-level *yl
|
||||
* *pearsonr*: Pearson correlation coefficient *r*
|
||||
* *ttest_ind*: Independent 2-sample *t* test
|
||||
* Regression:
|
||||
* *logit_then_regress*: Perform logistic regression and use the estimates as the starting values for an arbitrary regression
|
||||
* *PenalisedLogit*: Model for Firth penalised logistic regression
|
||||
* *regress*: Fit arbitrary regression models
|
||||
* *vif*: Compute the variance inflation factor for independent variables in regression
|
||||
|
@ -4,17 +4,22 @@ Regression
|
||||
Functions
|
||||
---------
|
||||
|
||||
.. autofunction:: yli.logit_then_regress
|
||||
.. comment
|
||||
.. autofunction:: yli.logit_then_regress
|
||||
|
||||
.. autofunction:: yli.regress
|
||||
|
||||
.. autofunction:: yli.vif
|
||||
|
||||
Additional regression models
|
||||
Regression models
|
||||
----------------------------
|
||||
|
||||
.. autoclass:: yli.Logit
|
||||
|
||||
.. autoclass:: yli.OrdinalLogit
|
||||
|
||||
.. autoclass:: yli.OLS
|
||||
|
||||
.. autoclass:: yli.PenalisedLogit
|
||||
|
||||
Result classes
|
||||
@ -27,7 +32,7 @@ Result classes
|
||||
:members:
|
||||
:inherited-members:
|
||||
|
||||
.. autoclass:: yli.regress.RegressionResult
|
||||
.. autoclass:: yli.regress.RegressionModel
|
||||
:members:
|
||||
|
||||
.. autoclass:: yli.shap.ShapResult
|
||||
|
@ -21,7 +21,7 @@ import pandas as pd
|
||||
import yli
|
||||
|
||||
def test_afbf_logit_beta_zero():
|
||||
"""Compare RegressionResult.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
|
||||
"""Compare RegressionModel.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
|
||||
|
||||
df = pd.DataFrame({
|
||||
'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False],
|
||||
|
@ -64,7 +64,7 @@ somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
|
||||
assert result.summary() == expected_summary
|
||||
|
||||
def test_brant_ucla():
|
||||
"""Compare RegressionResult.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
|
||||
"""Compare RegressionModel.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
|
||||
|
||||
df = pd.read_stata('tests/data/ucla_ologit.dta')
|
||||
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
|
||||
|
150
yli/regress.py
150
yli/regress.py
@ -102,7 +102,7 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
|
||||
Fit a statsmodels regression model
|
||||
|
||||
:param model_class: Type of regression model to fit
|
||||
:type model_class: :class:`RegressionModel` subclass
|
||||
:type model_class: :class:`yli.regress.RegressionModel` subclass
|
||||
:param df: Data to perform regression on
|
||||
:type df: DataFrame
|
||||
:param dep: Column in *df* for the dependent variable (numeric)
|
||||
@ -116,41 +116,9 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
|
||||
:param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class*
|
||||
:type exp: bool
|
||||
|
||||
:rtype: :class:`RegressionModel`
|
||||
:rtype: :class:`yli.regress.RegressionModel`
|
||||
|
||||
**Example:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
df = pd.DataFrame({
|
||||
'Unhealthy': [False, False, False, ...],
|
||||
'Fibrinogen': [2.52, 2.46, 2.29, ...],
|
||||
'GammaGlobulin': [38, 36, 36, ...]
|
||||
})
|
||||
yli.regress(sm.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
Logistic Regression Results
|
||||
======================================================
|
||||
Dep. Variable: Unhealthy | No. Observations: 32
|
||||
Model: Logit | Df. Model: 2
|
||||
Date: 2022-10-18 | Df. Residuals: 29
|
||||
Time: 19:00:34 | Pseudo R²: 0.26
|
||||
Std. Errors: Non-Robust | LL-Model: -11.47
|
||||
| LL-Null: -15.44
|
||||
| p (LR): 0.02*
|
||||
======================================================
|
||||
exp(β) (95% CI) p
|
||||
-----------------------------------------------
|
||||
(Intercept) 0.00 (0.00 - 0.24) 0.03*
|
||||
Fibrinogen 6.80 (1.01 - 45.79) 0.049*
|
||||
GammaGlobulin 1.17 (0.92 - 1.48) 0.19
|
||||
-----------------------------------------------
|
||||
|
||||
The output summarises the results of the regression.
|
||||
Note that the parameter estimates are automatically exponentiated.
|
||||
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
|
||||
**Example:** See :class:`yli.OLS`, :class:`yli.Logit`, etc.
|
||||
"""
|
||||
|
||||
if not any(x.__name__ == 'RegressionModel' for x in model_class.__bases__):
|
||||
@ -466,7 +434,7 @@ class RegressionModel:
|
||||
Uses the R *BFpack* library.
|
||||
|
||||
Requires the regression to be from statsmodels.
|
||||
The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`RegressionResult.raw_result`.
|
||||
The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`SingleTerm.raw_name`.
|
||||
|
||||
:param term: Raw name of the term to be tested
|
||||
:type term: str
|
||||
@ -556,7 +524,7 @@ class LikelihoodRatioTestResult(ChiSquaredResult):
|
||||
"""
|
||||
Result of a likelihood ratio test for regression
|
||||
|
||||
See :meth:`RegressionResult.lrtest_null`.
|
||||
See :meth:`RegressionModel.lrtest_null`.
|
||||
"""
|
||||
|
||||
def __init__(self, statistic, dof, pvalue):
|
||||
@ -578,7 +546,7 @@ class SingleTerm:
|
||||
"""A term in a :class:`RegressionModel` which is a single term"""
|
||||
|
||||
def __init__(self, raw_name, beta, pvalue):
|
||||
#: Raw name of the term (*str*; e.g. in :attr:`RegressionModel.raw_result`)
|
||||
#: Raw name of the term (*str*)
|
||||
self.raw_name = raw_name
|
||||
#: :class:`yli.utils.Estimate` of the coefficient
|
||||
self.beta = beta
|
||||
@ -608,6 +576,44 @@ def raw_terms_from_statsmodels_result(raw_result):
|
||||
# Concrete implementations
|
||||
|
||||
class Logit(RegressionModel):
|
||||
"""
|
||||
Logistic regression
|
||||
|
||||
**Example:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
df = pd.DataFrame({
|
||||
'Unhealthy': [False, False, False, ...],
|
||||
'Fibrinogen': [2.52, 2.46, 2.29, ...],
|
||||
'GammaGlobulin': [38, 36, 36, ...]
|
||||
})
|
||||
yli.regress(yli.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
Logistic Regression Results
|
||||
======================================================
|
||||
Dep. Variable: Unhealthy | No. Observations: 32
|
||||
Model: Logit | Df. Model: 2
|
||||
Date: 2022-10-18 | Df. Residuals: 29
|
||||
Time: 19:00:34 | Pseudo R²: 0.26
|
||||
Std. Errors: Non-Robust | LL-Model: -11.47
|
||||
| LL-Null: -15.44
|
||||
| p (LR): 0.02*
|
||||
======================================================
|
||||
exp(β) (95% CI) p
|
||||
-----------------------------------------------
|
||||
(Intercept) 0.00 (0.00 - 0.24) 0.03*
|
||||
Fibrinogen 6.80 (1.01 - 45.79) 0.049*
|
||||
GammaGlobulin 1.17 (0.92 - 1.48) 0.19
|
||||
-----------------------------------------------
|
||||
|
||||
The output summarises the results of the regression.
|
||||
Note that the parameter estimates are automatically exponentiated.
|
||||
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
|
||||
"""
|
||||
|
||||
@property
|
||||
def model_long_name(self):
|
||||
return 'Logistic Regression'
|
||||
@ -636,6 +642,46 @@ class Logit(RegressionModel):
|
||||
return result
|
||||
|
||||
class OLS(RegressionModel):
|
||||
"""
|
||||
Ordinary least squares linear regression
|
||||
|
||||
**Example:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
df = pd.DataFrame(...)
|
||||
yli.regress(yli.OLS, df, 'LNC', 'D + T1 + T2 + S + PR + NE + CT + BW + N + PT')
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
Ordinary Least Squares Regression Results
|
||||
=======================================================
|
||||
Dep. Variable: LNC | No. Observations: 32
|
||||
Model: OLS | Df. Model: 10
|
||||
Date: 2023-04-16 | Df. Residuals: 21
|
||||
Time: 23:34:01 | R²: 0.86
|
||||
Std. Errors: Non-Robust | F: 13.28
|
||||
| p (F): <0.001*
|
||||
=======================================================
|
||||
β (95% CI) p
|
||||
----------------------------------------------
|
||||
(Intercept) -10.63 (-22.51 - 1.24) 0.08
|
||||
D 0.23 (0.05 - 0.41) 0.02*
|
||||
T1 0.01 (-0.04 - 0.05) 0.82
|
||||
T2 0.01 (-0.00 - 0.02) 0.24
|
||||
S 0.00 (0.00 - 0.00) <0.001*
|
||||
PR -0.11 (-0.28 - 0.07) 0.21
|
||||
NE 0.26 (0.09 - 0.42) 0.004*
|
||||
CT 0.12 (-0.03 - 0.26) 0.12
|
||||
BW 0.04 (-0.18 - 0.26) 0.73
|
||||
N -0.01 (-0.03 - 0.00) 0.14
|
||||
PT -0.22 (-0.49 - 0.05) 0.10
|
||||
----------------------------------------------
|
||||
|
||||
The output summarises the results of the regression.
|
||||
For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
|
||||
"""
|
||||
|
||||
@property
|
||||
def model_long_name(self):
|
||||
return 'Ordinary Least Squares Regression'
|
||||
@ -681,15 +727,15 @@ class OrdinalLogit(RegressionModel):
|
||||
.. code-block:: text
|
||||
|
||||
Ordinal Logistic Regression Results
|
||||
===============================================================
|
||||
==========================================================
|
||||
Dep. Variable: apply | No. Observations: 400
|
||||
Model: OrdinalLogit | Df. Model: 5
|
||||
Method: Maximum Likelihood | Df. Residuals: 395
|
||||
Date: 2022-12-02 | Pseudo R²: 0.03
|
||||
Time: 21:30:38 | LL-Model: -358.51
|
||||
Std. Errors: Non-Robust | LL-Null: -370.60
|
||||
Model: Ordinal Logit | Df. Model: 5
|
||||
Date: 2022-12-02 | Df. Residuals: 395
|
||||
Time: 21:30:38 | Pseudo R²: 0.03
|
||||
Std. Errors: Non-Robust | LL-Model: -358.51
|
||||
| LL-Null: -370.60
|
||||
| p (LR): <0.001*
|
||||
===============================================================
|
||||
============================================================
|
||||
β (95% CI) p
|
||||
------------------------------------------------------------
|
||||
pared 1.05 (0.53 - 1.57) <0.001*
|
||||
@ -887,14 +933,14 @@ class PenalisedLogit(RegressionModel):
|
||||
.. code-block:: text
|
||||
|
||||
Penalised Logistic Regression Results
|
||||
=========================================================
|
||||
============================================================
|
||||
Dep. Variable: Outcome | No. Observations: 240
|
||||
Model: Logit | Df. Model: 1
|
||||
Method: Penalised ML | Pseudo R²: 0.37
|
||||
Date: 2022-10-19 | LL-Model: -66.43
|
||||
Time: 07:50:40 | LL-Null: -105.91
|
||||
Std. Errors: Non-Robust | p (LR): <0.001*
|
||||
=========================================================
|
||||
Model: Penalised Logit | Df. Model: 1
|
||||
Date: 2022-10-19 | Pseudo R²: 0.37
|
||||
Time: 07:50:40 | LL-Model: -66.43
|
||||
Std. Errors: Non-Robust | LL-Null: -105.91
|
||||
| p (LR): <0.001*
|
||||
============================================================
|
||||
β (95% CI) p
|
||||
---------------------------------------------
|
||||
(Intercept) -2.28 (-2.77 - -1.85) <0.001*
|
||||
|
@ -7,7 +7,7 @@ class ShapResult:
|
||||
"""
|
||||
SHAP values for a regression model
|
||||
|
||||
See :meth:`yli.regress.RegressionResult.shap`.
|
||||
See :meth:`yli.regress.RegressionModel.shap`.
|
||||
"""
|
||||
|
||||
def __init__(self, model, shap_values, features):
|
||||
@ -63,7 +63,7 @@ class ShapResult:
|
||||
|
||||
model = self.model()
|
||||
if model is None:
|
||||
raise Exception('Referenced RegressionResult has been dropped')
|
||||
raise Exception('Referenced RegressionModel has been dropped')
|
||||
|
||||
xdata = self._get_xdata(model)
|
||||
|
||||
|
@ -187,7 +187,7 @@ class FTestResult:
|
||||
"""
|
||||
Result of an *F* test for ANOVA/regression
|
||||
|
||||
See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionResult.ftest`.
|
||||
See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionModel.ftest`.
|
||||
"""
|
||||
|
||||
def __init__(self, statistic, dof1, dof2, pvalue):
|
||||
|
Loading…
Reference in New Issue
Block a user