Update documentation

2023-04-16 23:52:12 +10:00 · 2023-04-16 23:52:12 +10:00 · f1e943ca89
commit f1e943ca89
parent ac2aca7b8f
7 changed files with 118 additions and 68 deletions
--- a/README.md
+++ b/README.md
@ -49,9 +49,9 @@ Optional dependencies are:
 * [mpmath](https://mpmath.org/), for *beta_ratio* and *beta_oddsratio*
 * [PyCryptodome](https://www.pycryptodome.org/), for *pickle_write_encrypted* and *pickle_read_encrypted*
 * [rpy2](https://rpy2.github.io/), with R packages:
-	* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionResult.bayesfactor_beta_zero*)
+	* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionModel.bayesfactor_beta_zero*)
 	* [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit*
-* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionResult.shap*
+* [shap](https://shap.readthedocs.io/en/latest/), for *RegressionModel.shap*

 ## Functions

@ -64,7 +64,6 @@ Relevant statistical functions are all directly available from the top-level *yl
 	* *pearsonr*: Pearson correlation coefficient *r*
 	* *ttest_ind*: Independent 2-sample *t* test
 * Regression:
-	* *logit_then_regress*: Perform logistic regression and use the estimates as the starting values for an arbitrary regression
 	* *PenalisedLogit*: Model for Firth penalised logistic regression
 	* *regress*: Fit arbitrary regression models
 	* *vif*: Compute the variance inflation factor for independent variables in regression
--- a/docs/regress.rst
+++ b/docs/regress.rst
@ -4,17 +4,22 @@ Regression
 Functions
 ---------

-.. autofunction:: yli.logit_then_regress
+.. comment
+	.. autofunction:: yli.logit_then_regress

 .. autofunction:: yli.regress

 .. autofunction:: yli.vif

-Additional regression models
+Regression models
 ----------------------------

+.. autoclass:: yli.Logit
+
 .. autoclass:: yli.OrdinalLogit

+.. autoclass:: yli.OLS
+
 .. autoclass:: yli.PenalisedLogit

 Result classes
@ -27,7 +32,7 @@ Result classes
 	:members:
 	:inherited-members:

-.. autoclass:: yli.regress.RegressionResult
+.. autoclass:: yli.regress.RegressionModel
 	:members:

 .. autoclass:: yli.shap.ShapResult
--- a/tests/test_bayes_factors.py
+++ b/tests/test_bayes_factors.py
@ -21,7 +21,7 @@ import pandas as pd
 import yli

 def test_afbf_logit_beta_zero():
-	"""Compare RegressionResult.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
+	"""Compare RegressionModel.bayesfactor_beta_zero for Ott & Longnecker (2016) chapter 12.23 with R BFpack"""
 	
 	df = pd.DataFrame({
 		'Unhealthy': [False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False],
--- a/tests/test_ordinallogit.py
+++ b/tests/test_ordinallogit.py
@ -64,7 +64,7 @@ somewhat likely/very likely    4.30  (2.72 - 5.88)   <0.001*
 	assert result.summary() == expected_summary

 def test_brant_ucla():
-	"""Compare RegressionResult.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
+	"""Compare RegressionModel.brant with R brant library for UCLA example at https://stats.oarc.ucla.edu/r/dae/ordinal-logistic-regression/"""
 	
 	df = pd.read_stata('tests/data/ucla_ologit.dta')
 	result = yli.regress(yli.OrdinalLogit, df, 'apply', 'pared + public + gpa', exp=False)
--- a/yli/regress.py
+++ b/yli/regress.py
@ -102,7 +102,7 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
 	Fit a statsmodels regression model
 	
 	:param model_class: Type of regression model to fit
-	:type model_class: :class:`RegressionModel` subclass
+	:type model_class: :class:`yli.regress.RegressionModel` subclass
 	:param df: Data to perform regression on
 	:type df: DataFrame
 	:param dep: Column in *df* for the dependent variable (numeric)
@ -116,41 +116,9 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
 	:param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class*
 	:type exp: bool
 	
-	:rtype: :class:`RegressionModel`
+	:rtype: :class:`yli.regress.RegressionModel`
 	
-	**Example:**
-	
-	.. code-block::
-		
-		df = pd.DataFrame({
-			'Unhealthy': [False, False, False, ...],
-			'Fibrinogen': [2.52, 2.46, 2.29, ...],
-			'GammaGlobulin': [38, 36, 36, ...]
-		})
-		yli.regress(sm.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
-	
-	.. code-block:: text
-		
-		             Logistic Regression Results              
-		======================================================
-		Dep. Variable:  Unhealthy  |  No. Observations:     32
-		        Model:      Logit  |         Df. Model:      2
-		         Date: 2022-10-18  |     Df. Residuals:     29
-		         Time:   19:00:34  |         Pseudo R²:   0.26
-		  Std. Errors: Non-Robust  |          LL-Model: -11.47
-		                           |           LL-Null: -15.44
-		                           |            p (LR):  0.02*
-		======================================================
-		                exp(β)   (95% CI)          p   
-		-----------------------------------------------
-		  (Intercept)     0.00 (0.00 -  0.24)    0.03* 
-		   Fibrinogen     6.80 (1.01 - 45.79)    0.049*
-		GammaGlobulin     1.17 (0.92 -  1.48)    0.19  
-		-----------------------------------------------
-	
-	The output summarises the results of the regression.
-	Note that the parameter estimates are automatically exponentiated.
-	For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
+	**Example:** See :class:`yli.OLS`, :class:`yli.Logit`, etc.
 	"""
 	
 	if not any(x.__name__ == 'RegressionModel' for x in model_class.__bases__):
@ -466,7 +434,7 @@ class RegressionModel:
 		Uses the R *BFpack* library.
 		
 		Requires the regression to be from statsmodels.
-		The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`RegressionResult.raw_result`.
+		The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`SingleTerm.raw_name`.
 		
 		:param term: Raw name of the term to be tested
 		:type term: str
@ -556,7 +524,7 @@ class LikelihoodRatioTestResult(ChiSquaredResult):
 	"""
 	Result of a likelihood ratio test for regression
 	
-	See :meth:`RegressionResult.lrtest_null`.
+	See :meth:`RegressionModel.lrtest_null`.
 	"""
 	
 	def __init__(self, statistic, dof, pvalue):
@ -578,7 +546,7 @@ class SingleTerm:
 	"""A term in a :class:`RegressionModel` which is a single term"""
 	
 	def __init__(self, raw_name, beta, pvalue):
-		#: Raw name of the term (*str*; e.g. in :attr:`RegressionModel.raw_result`)
+		#: Raw name of the term (*str*)
 		self.raw_name = raw_name
 		#: :class:`yli.utils.Estimate` of the coefficient
 		self.beta = beta
@ -608,6 +576,44 @@ def raw_terms_from_statsmodels_result(raw_result):
 # Concrete implementations

 class Logit(RegressionModel):
+	"""
+	Logistic regression
+	
+	**Example:**
+	
+	.. code-block::
+		
+		df = pd.DataFrame({
+			'Unhealthy': [False, False, False, ...],
+			'Fibrinogen': [2.52, 2.46, 2.29, ...],
+			'GammaGlobulin': [38, 36, 36, ...]
+		})
+		yli.regress(yli.Logit, df, 'Unhealthy', 'Fibrinogen + GammaGlobulin')
+	
+	.. code-block:: text
+		
+		             Logistic Regression Results              
+		======================================================
+		Dep. Variable:  Unhealthy  |  No. Observations:     32
+		        Model:      Logit  |         Df. Model:      2
+		         Date: 2022-10-18  |     Df. Residuals:     29
+		         Time:   19:00:34  |         Pseudo R²:   0.26
+		  Std. Errors: Non-Robust  |          LL-Model: -11.47
+		                           |           LL-Null: -15.44
+		                           |            p (LR):  0.02*
+		======================================================
+		                exp(β)   (95% CI)          p   
+		-----------------------------------------------
+		  (Intercept)     0.00 (0.00 -  0.24)    0.03* 
+		   Fibrinogen     6.80 (1.01 - 45.79)    0.049*
+		GammaGlobulin     1.17 (0.92 -  1.48)    0.19  
+		-----------------------------------------------
+	
+	The output summarises the results of the regression.
+	Note that the parameter estimates are automatically exponentiated.
+	For example, the odds ratio for unhealthiness per unit increase in fibrinogen is 6.80, with 95% confidence interval 1.01–45.79, and is significant with *p* value 0.049.
+	"""
+	
 	@property
 	def model_long_name(self):
 		return 'Logistic Regression'
@ -636,6 +642,46 @@ class Logit(RegressionModel):
 		return result

 class OLS(RegressionModel):
+	"""
+	Ordinary least squares linear regression
+	
+	**Example:**
+	
+	.. code-block::
+		
+		df = pd.DataFrame(...)
+		yli.regress(yli.OLS, df, 'LNC', 'D + T1 + T2 + S + PR + NE + CT + BW + N + PT')
+	
+	.. code-block:: text
+		
+		       Ordinary Least Squares Regression Results       
+		=======================================================
+		Dep. Variable:        LNC  |  No. Observations:      32
+		        Model:        OLS  |         Df. Model:      10
+		         Date: 2023-04-16  |     Df. Residuals:      21
+		         Time:   23:34:01  |                R²:    0.86
+		  Std. Errors: Non-Robust  |                 F:   13.28
+		                           |             p (F): <0.001*
+		=======================================================
+		                β        (95% CI)         p   
+		----------------------------------------------
+		(Intercept)   -10.63 (-22.51 - 1.24)    0.08  
+		          D     0.23   (0.05 - 0.41)    0.02* 
+		         T1     0.01  (-0.04 - 0.05)    0.82  
+		         T2     0.01  (-0.00 - 0.02)    0.24  
+		          S     0.00   (0.00 - 0.00)   <0.001*
+		         PR    -0.11  (-0.28 - 0.07)    0.21  
+		         NE     0.26   (0.09 - 0.42)    0.004*
+		         CT     0.12  (-0.03 - 0.26)    0.12  
+		         BW     0.04  (-0.18 - 0.26)    0.73  
+		          N    -0.01  (-0.03 - 0.00)    0.14  
+		         PT    -0.22  (-0.49 - 0.05)    0.10  
+		----------------------------------------------
+	
+	The output summarises the results of the regression.
+	For example, the mean difference in "LNC" per unit increase in "D" is 0.23, with 95% confidence interval 0.05–0.41, and is significant with *p* value 0.02.
+	"""
+	
 	@property
 	def model_long_name(self):
 		return 'Ordinary Least Squares Regression'
@ -680,16 +726,16 @@ class OrdinalLogit(RegressionModel):
 	
 	.. code-block:: text
 		
-		              Ordinal Logistic Regression Results              
-		===============================================================
-		Dep. Variable:              apply  |  No. Observations:     400
-		        Model:       OrdinalLogit  |         Df. Model:       5
-		       Method: Maximum Likelihood  |     Df. Residuals:     395
-		         Date:         2022-12-02  |         Pseudo R²:    0.03
-		         Time:           21:30:38  |          LL-Model: -358.51
-		  Std. Errors:         Non-Robust  |           LL-Null: -370.60
-		                                   |            p (LR): <0.001*
-		===============================================================
+		           Ordinal Logistic Regression Results            
+		==========================================================
+		Dep. Variable:         apply  |  No. Observations:     400
+		        Model: Ordinal Logit  |         Df. Model:       5
+		         Date:    2022-12-02  |     Df. Residuals:     395
+		         Time:      21:30:38  |         Pseudo R²:    0.03
+		  Std. Errors:    Non-Robust  |          LL-Model: -358.51
+		                              |           LL-Null: -370.60
+		                              |            p (LR): <0.001*
+		============================================================
 		                                β      (95% CI)         p   
 		------------------------------------------------------------
 		                      pared    1.05  (0.53 - 1.57)   <0.001*
@ -886,15 +932,15 @@ class PenalisedLogit(RegressionModel):
 	
 	.. code-block:: text
 		
-		          Penalised Logistic Regression Results          
-		=========================================================
-		Dep. Variable:      Outcome  |  No. Observations:     240
-		        Model:        Logit  |         Df. Model:       1
-		       Method: Penalised ML  |         Pseudo R²:    0.37
-		         Date:   2022-10-19  |          LL-Model:  -66.43
-		         Time:     07:50:40  |           LL-Null: -105.91
-		  Std. Errors:   Non-Robust  |            p (LR): <0.001*
-		=========================================================
+		           Penalised Logistic Regression Results            
+		============================================================
+		Dep. Variable:         Outcome  |  No. Observations:     240
+		        Model: Penalised Logit  |         Df. Model:       1
+		         Date:      2022-10-19  |         Pseudo R²:    0.37
+		         Time:        07:50:40  |          LL-Model:  -66.43
+		  Std. Errors:      Non-Robust  |           LL-Null: -105.91
+		                                |            p (LR): <0.001*
+		============================================================
 		                β      (95% CI)          p   
 		---------------------------------------------
 		(Intercept)   -2.28 (-2.77 - -1.85)   <0.001*
--- a/yli/shap.py
+++ b/yli/shap.py
@ -7,7 +7,7 @@ class ShapResult:
 	"""
 	SHAP values for a regression model
 	
-	See :meth:`yli.regress.RegressionResult.shap`.
+	See :meth:`yli.regress.RegressionModel.shap`.
 	"""
 	
 	def __init__(self, model, shap_values, features):
@ -63,7 +63,7 @@ class ShapResult:
 		
 		model = self.model()
 		if model is None:
-			raise Exception('Referenced RegressionResult has been dropped')
+			raise Exception('Referenced RegressionModel has been dropped')
 		
 		xdata = self._get_xdata(model)
 		
--- a/yli/sig_tests.py
+++ b/yli/sig_tests.py
@ -187,7 +187,7 @@ class FTestResult:
 	"""
 	Result of an *F* test for ANOVA/regression
 	
-	See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionResult.ftest`.
+	See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionModel.ftest`.
 	"""
 	
 	def __init__(self, statistic, dof1, dof2, pvalue):