Update documentation

This commit is contained in:
RunasSudo 2023-04-16 23:52:12 +10:00
parent ac2aca7b8f
commit f1e943ca89
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
7 changed files with 118 additions and 68 deletions

View File

@ -49,9 +49,9 @@ Optional dependencies are:
* [mpmath](https://mpmath.org/), for *beta_ratio* and *beta_oddsratio*
* [PyCryptodome](https://www.pycryptodome.org/), for *pickle_write_encrypted* and *pickle_read_encrypted*
* [rpy2](https://rpy2.github.io/), with R packages:
* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionResult.bayesfactor_beta_zero*)
* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionModel.bayesfactor_beta_zero*)
* [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit*
* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionResult.shap*
* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionModel.shap*
## Functions
@ -64,7 +64,6 @@ Relevant statistical functions are all directly available from the top-level *yl
* *pearsonr*: Pearson correlation coefficient *r*
* *ttest_ind*: Independent 2-sample *t* test
* Regression:
* *logit_then_regress*: Perform logistic regression and use the estimates as the starting values for an arbitrary regression
* *PenalisedLogit*: Model for Firth penalised logistic regression
* *regress*: Fit arbitrary regression models
* *vif*: Compute the variance inflation factor for independent variables in regression

View File

@ -4,17 +4,22 @@ Regression
Functions
---------
.. autofunction:: yli.logit_then_regress
.. comment
.. autofunction:: yli.logit_then_regress
.. autofunction:: yli.regress
.. autofunction:: yli.vif
Additional regression models
Regression models
----------------------------
.. autoclass:: yli.Logit
.. autoclass:: yli.OrdinalLogit
.. autoclass:: yli.OLS
.. autoclass:: yli.PenalisedLogit
Result classes
@ -27,7 +32,7 @@ Result classes
:members:
:inherited-members:
.. autoclass:: yli.regress.RegressionResult
.. autoclass:: yli.regress.RegressionModel
:members:
.. autoclass:: yli.shap.ShapResult

View File

@ -21,7 +21,7 @@ import pandas as pd
import yli
def test_afbf_logit_beta_zero():
"""Compare RegressionResult.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
"""Compare RegressionModel.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
df = pd.DataFrame({
'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False],

View File

@ -64,7 +64,7 @@ somewhat likely/very likely 4.30 (2.72 - 5.88) <0.001*
assert result.summary() == expected_summary
def test_brant_ucla():
"""Compare RegressionResult.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
"""Compare RegressionModel.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
df = pd.read_stata('tests/data/ucla_ologit.dta')
result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)

View File

@ -102,7 +102,7 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
Fit a statsmodels regression model
:param model_class: Type of regression model to fit
:type model_class: :class:`RegressionModel` subclass
:type model_class: :class:`yli.regress.RegressionModel` subclass
:param df: Data to perform regression on
:type df: DataFrame
:param dep: Column in *df* for the dependent variable (numeric)
@ -116,41 +116,9 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
:param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class*
:type exp: bool
:rtype: :class:`RegressionModel`
:rtype: :class:`yli.regress.RegressionModel`
**Example:**
.. code-block::
df = pd.DataFrame({
'Unhealthy': [False, False, False, ...],
'Fibrinogen': [2.52, 2.46, 2.29, ...],
'GammaGlobulin': [38, 36, 36, ...]
})
yli.regress(sm.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
.. code-block:: text
Logistic Regression Results
======================================================
Dep. Variable: Unhealthy | No. Observations: 32
Model: Logit | Df. Model: 2
Date: 2022-10-18 | Df. Residuals: 29
Time: 19:00:34 | Pseudo : 0.26
Std. Errors: Non-Robust | LL-Model: -11.47
| LL-Null: -15.44
| p (LR): 0.02*
======================================================
exp(β) (95% CI) p
-----------------------------------------------
(Intercept) 0.00 (0.00 - 0.24) 0.03*
Fibrinogen 6.80 (1.01 - 45.79) 0.049*
GammaGlobulin 1.17 (0.92 - 1.48) 0.19
-----------------------------------------------
The output summarises the results of the regression.
Note that the parameter estimates are automatically exponentiated.
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.0145.79, and is significant with *p* value 0.049.
**Example:** See :class:`yli.OLS`, :class:`yli.Logit`, etc.
"""
if not any(x.__name__ == 'RegressionModel' for x in model_class.__bases__):
@ -466,7 +434,7 @@ class RegressionModel:
Uses the R *BFpack* library.
Requires the regression to be from statsmodels.
The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`RegressionResult.raw_result`.
The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`SingleTerm.raw_name`.
:param term: Raw name of the term to be tested
:type term: str
@ -556,7 +524,7 @@ class LikelihoodRatioTestResult(ChiSquaredResult):
"""
Result of a likelihood ratio test for regression
See :meth:`RegressionResult.lrtest_null`.
See :meth:`RegressionModel.lrtest_null`.
"""
def __init__(self, statistic, dof, pvalue):
@ -578,7 +546,7 @@ class SingleTerm:
"""A term in a :class:`RegressionModel` which is a single term"""
def __init__(self, raw_name, beta, pvalue):
#: Raw name of the term (*str*; e.g. in :attr:`RegressionModel.raw_result`)
#: Raw name of the term (*str*)
self.raw_name = raw_name
#: :class:`yli.utils.Estimate` of the coefficient
self.beta = beta
@ -608,6 +576,44 @@ def raw_terms_from_statsmodels_result(raw_result):
# Concrete implementations
class Logit(RegressionModel):
"""
Logistic regression
**Example:**
.. code-block::
df = pd.DataFrame({
'Unhealthy': [False, False, False, ...],
'Fibrinogen': [2.52, 2.46, 2.29, ...],
'GammaGlobulin': [38, 36, 36, ...]
})
yli.regress(yli.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
.. code-block:: text
Logistic Regression Results
======================================================
Dep. Variable: Unhealthy | No. Observations: 32
Model: Logit | Df. Model: 2
Date: 2022-10-18 | Df. Residuals: 29
Time: 19:00:34 | Pseudo : 0.26
Std. Errors: Non-Robust | LL-Model: -11.47
| LL-Null: -15.44
| p (LR): 0.02*
======================================================
exp(β) (95% CI) p
-----------------------------------------------
(Intercept) 0.00 (0.00 - 0.24) 0.03*
Fibrinogen 6.80 (1.01 - 45.79) 0.049*
GammaGlobulin 1.17 (0.92 - 1.48) 0.19
-----------------------------------------------
The output summarises the results of the regression.
Note that the parameter estimates are automatically exponentiated.
For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.0145.79, and is significant with *p* value 0.049.
"""
@property
def model_long_name(self):
return 'Logistic Regression'
@ -636,6 +642,46 @@ class Logit(RegressionModel):
return result
class OLS(RegressionModel):
"""
Ordinary least squares linear regression
**Example:**
.. code-block::
df = pd.DataFrame(...)
yli.regress(yli.OLS, df, 'LNC', 'D + T1 + T2 + S + PR + NE + CT + BW + N + PT')
.. code-block:: text
Ordinary Least Squares Regression Results
=======================================================
Dep. Variable: LNC | No. Observations: 32
Model: OLS | Df. Model: 10
Date: 2023-04-16 | Df. Residuals: 21
Time: 23:34:01 | : 0.86
Std. Errors: Non-Robust | F: 13.28
| p (F): <0.001*
=======================================================
β (95% CI) p
----------------------------------------------
(Intercept) -10.63 (-22.51 - 1.24) 0.08
D 0.23 (0.05 - 0.41) 0.02*
T1 0.01 (-0.04 - 0.05) 0.82
T2 0.01 (-0.00 - 0.02) 0.24
S 0.00 (0.00 - 0.00) <0.001*
PR -0.11 (-0.28 - 0.07) 0.21
NE 0.26 (0.09 - 0.42) 0.004*
CT 0.12 (-0.03 - 0.26) 0.12
BW 0.04 (-0.18 - 0.26) 0.73
N -0.01 (-0.03 - 0.00) 0.14
PT -0.22 (-0.49 - 0.05) 0.10
----------------------------------------------
The output summarises the results of the regression.
For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.050.41, and is significant with *p* value 0.02.
"""
@property
def model_long_name(self):
return 'Ordinary Least Squares Regression'
@ -680,16 +726,16 @@ class OrdinalLogit(RegressionModel):
.. code-block:: text
Ordinal Logistic Regression Results
===============================================================
Dep. Variable: apply | No. Observations: 400
Model: OrdinalLogit | Df. Model: 5
Method: Maximum Likelihood | Df. Residuals: 395
Date: 2022-12-02 | Pseudo : 0.03
Time: 21:30:38 | LL-Model: -358.51
Std. Errors: Non-Robust | LL-Null: -370.60
| p (LR): <0.001*
===============================================================
Ordinal Logistic Regression Results
==========================================================
Dep. Variable: apply | No. Observations: 400
Model: Ordinal Logit | Df. Model: 5
Date: 2022-12-02 | Df. Residuals: 395
Time: 21:30:38 | Pseudo : 0.03
Std. Errors: Non-Robust | LL-Model: -358.51
| LL-Null: -370.60
| p (LR): <0.001*
============================================================
β (95% CI) p
------------------------------------------------------------
pared 1.05 (0.53 - 1.57) <0.001*
@ -886,15 +932,15 @@ class PenalisedLogit(RegressionModel):
.. code-block:: text
Penalised Logistic Regression Results
=========================================================
Dep. Variable: Outcome | No. Observations: 240
Model: Logit | Df. Model: 1
Method: Penalised ML | Pseudo : 0.37
Date: 2022-10-19 | LL-Model: -66.43
Time: 07:50:40 | LL-Null: -105.91
Std. Errors: Non-Robust | p (LR): <0.001*
=========================================================
Penalised Logistic Regression Results
============================================================
Dep. Variable: Outcome | No. Observations: 240
Model: Penalised Logit | Df. Model: 1
Date: 2022-10-19 | Pseudo : 0.37
Time: 07:50:40 | LL-Model: -66.43
Std. Errors: Non-Robust | LL-Null: -105.91
| p (LR): <0.001*
============================================================
β (95% CI) p
---------------------------------------------
(Intercept) -2.28 (-2.77 - -1.85) <0.001*

View File

@ -7,7 +7,7 @@ class ShapResult:
"""
SHAP values for a regression model
See :meth:`yli.regress.RegressionResult.shap`.
See :meth:`yli.regress.RegressionModel.shap`.
"""
def __init__(self, model, shap_values, features):
@ -63,7 +63,7 @@ class ShapResult:
model = self.model()
if model is None:
raise Exception('Referenced RegressionResult has been dropped')
raise Exception('Referenced RegressionModel has been dropped')
xdata = self._get_xdata(model)

View File

@ -187,7 +187,7 @@ class FTestResult:
"""
Result of an *F* test for ANOVA/regression
See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionResult.ftest`.
See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionModel.ftest`.
"""
def __init__(self, statistic, dof1, dof2, pvalue):