Write Sphinx documentation
This commit is contained in:
parent
7730ce476a
commit
57189990d6
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
|||||||
__pycache__
|
__pycache__
|
||||||
*.pyc
|
*.pyc
|
||||||
|
|
||||||
|
/docs/_build
|
||||||
|
@ -77,7 +77,7 @@ Relevant statistical functions are all directly available from the top-level *yl
|
|||||||
* Bayesian inference:
|
* Bayesian inference:
|
||||||
* *bayesfactor_afbf*: Adjusted fractional Bayes factor for a hypothesis on parameters from regression
|
* *bayesfactor_afbf*: Adjusted fractional Bayes factor for a hypothesis on parameters from regression
|
||||||
|
|
||||||
Each function is documented in the respective docstring within the source code. Examples can be found in the unit tests in the *tests* directory.
|
Each function is documented in the respective docstring within the source code, and Sphinx documentation is buildable from the *docs* directory. Examples can be found in the unit tests in the *tests* directory.
|
||||||
|
|
||||||
## Warning
|
## Warning
|
||||||
|
|
||||||
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# Minimal makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line, and also
|
||||||
|
# from the environment for the first two.
|
||||||
|
SPHINXOPTS ?=
|
||||||
|
SPHINXBUILD ?= sphinx-build
|
||||||
|
SOURCEDIR = .
|
||||||
|
BUILDDIR = _build
|
||||||
|
|
||||||
|
# Put it first so that "make" without argument is like "make help".
|
||||||
|
help:
|
||||||
|
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||||
|
|
||||||
|
.PHONY: help Makefile
|
||||||
|
|
||||||
|
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||||
|
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||||
|
%: Makefile
|
||||||
|
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
13
docs/bayes_factors.rst
Normal file
13
docs/bayes_factors.rst
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
Bayesian inference
|
||||||
|
==================
|
||||||
|
|
||||||
|
Functions
|
||||||
|
---------
|
||||||
|
|
||||||
|
.. autofunction:: yli.bayesfactor_afbf
|
||||||
|
|
||||||
|
Result classes
|
||||||
|
--------------
|
||||||
|
|
||||||
|
.. autoclass:: yli.bayes_factors.BayesFactor
|
||||||
|
:members:
|
30
docs/conf.py
Normal file
30
docs/conf.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# Configuration file for the Sphinx documentation builder.
|
||||||
|
#
|
||||||
|
# For the full list of built-in configuration values, see the documentation:
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
sys.path.append(os.path.abspath('..'))
|
||||||
|
|
||||||
|
# -- Project information -----------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||||
|
|
||||||
|
project = 'scipy-yli'
|
||||||
|
copyright = '2022, RunasSudo'
|
||||||
|
author = 'RunasSudo'
|
||||||
|
|
||||||
|
# -- General configuration ---------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||||
|
|
||||||
|
extensions = ['sphinx.ext.autodoc']
|
||||||
|
|
||||||
|
templates_path = ['_templates']
|
||||||
|
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for HTML output -------------------------------------------------
|
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||||
|
|
||||||
|
html_theme = 'sphinx_rtd_theme'
|
||||||
|
html_static_path = ['_static']
|
16
docs/distributions.rst
Normal file
16
docs/distributions.rst
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
Distributions
|
||||||
|
=============
|
||||||
|
|
||||||
|
.. autodata:: yli.beta_oddsratio
|
||||||
|
|
||||||
|
.. automethod:: yli.distributions.betaoddsrat_gen.from_scipy
|
||||||
|
|
||||||
|
.. automethod:: yli.distributions.betaoddsrat_gen.set_cdf_terms
|
||||||
|
|
||||||
|
.. autodata:: yli.beta_ratio
|
||||||
|
|
||||||
|
.. automethod:: yli.distributions.betarat_gen.from_scipy
|
||||||
|
|
||||||
|
.. autodata:: yli.transformed_dist
|
||||||
|
|
||||||
|
.. autofunction:: yli.hdi
|
24
docs/general.rst
Normal file
24
docs/general.rst
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
General specifications
|
||||||
|
======================
|
||||||
|
|
||||||
|
.. _nan-handling:
|
||||||
|
|
||||||
|
NaN handling
|
||||||
|
------------
|
||||||
|
|
||||||
|
Most functions take a parameter **nan_policy** to specify how to handle *nan* values in the data. The options are:
|
||||||
|
|
||||||
|
* **warn** (default) – Warn on *nan* values
|
||||||
|
* **raise** – Raise an error on *nan* values
|
||||||
|
* **omit** – Silently drop rows with *nan* values
|
||||||
|
|
||||||
|
In determining whether there is *nan* in the data, only the columns specified in the function (if applicable) are considered.
|
||||||
|
|
||||||
|
General result classes
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
.. autoclass:: yli.utils.ConfidenceInterval
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.utils.Estimate
|
||||||
|
:members:
|
12
docs/global.rst
Normal file
12
docs/global.rst
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
Global options
|
||||||
|
==============
|
||||||
|
|
||||||
|
.. autodata:: yli.config
|
||||||
|
|
||||||
|
.. autoattribute:: yli.config.Config.alpha
|
||||||
|
|
||||||
|
.. autoattribute:: yli.config.Config.pvalue_leading_zero
|
||||||
|
|
||||||
|
.. autoattribute:: yli.config.Config.pvalue_max_dps
|
||||||
|
|
||||||
|
.. autoattribute:: yli.config.Config.pvalue_min_dps
|
14
docs/index.rst
Normal file
14
docs/index.rst
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
scipy-yli API reference
|
||||||
|
=======================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Contents:
|
||||||
|
|
||||||
|
general.rst
|
||||||
|
sig_tests.rst
|
||||||
|
regress.rst
|
||||||
|
io.rst
|
||||||
|
distributions.rst
|
||||||
|
bayes_factors.rst
|
||||||
|
global.rst
|
10
docs/io.rst
Normal file
10
docs/io.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
Input/output
|
||||||
|
============
|
||||||
|
|
||||||
|
.. autofunction:: yli.pickle_read_compressed
|
||||||
|
|
||||||
|
.. autofunction:: yli.pickle_read_encrypted
|
||||||
|
|
||||||
|
.. autofunction:: yli.pickle_write_compressed
|
||||||
|
|
||||||
|
.. autofunction:: yli.pickle_write_encrypted
|
35
docs/make.bat
Normal file
35
docs/make.bat
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
@ECHO OFF
|
||||||
|
|
||||||
|
pushd %~dp0
|
||||||
|
|
||||||
|
REM Command file for Sphinx documentation
|
||||||
|
|
||||||
|
if "%SPHINXBUILD%" == "" (
|
||||||
|
set SPHINXBUILD=sphinx-build
|
||||||
|
)
|
||||||
|
set SOURCEDIR=.
|
||||||
|
set BUILDDIR=_build
|
||||||
|
|
||||||
|
%SPHINXBUILD% >NUL 2>NUL
|
||||||
|
if errorlevel 9009 (
|
||||||
|
echo.
|
||||||
|
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||||
|
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||||
|
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||||
|
echo.may add the Sphinx directory to PATH.
|
||||||
|
echo.
|
||||||
|
echo.If you don't have Sphinx installed, grab it from
|
||||||
|
echo.https://www.sphinx-doc.org/
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "" goto help
|
||||||
|
|
||||||
|
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||||
|
goto end
|
||||||
|
|
||||||
|
:help
|
||||||
|
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||||
|
|
||||||
|
:end
|
||||||
|
popd
|
28
docs/regress.rst
Normal file
28
docs/regress.rst
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
Regression
|
||||||
|
==========
|
||||||
|
|
||||||
|
Functions
|
||||||
|
---------
|
||||||
|
|
||||||
|
.. autofunction:: yli.logit_then_regress
|
||||||
|
|
||||||
|
.. autoclass:: yli.PenalisedLogit
|
||||||
|
|
||||||
|
.. autofunction:: yli.regress
|
||||||
|
|
||||||
|
.. autofunction:: yli.vif
|
||||||
|
|
||||||
|
Result classes
|
||||||
|
--------------
|
||||||
|
|
||||||
|
.. autoclass:: yli.regress.CategoricalTerm
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.regress.LikelihoodRatioTestResult
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.regress.RegressionResult
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.regress.SingleTerm
|
||||||
|
:members:
|
36
docs/sig_tests.rst
Normal file
36
docs/sig_tests.rst
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
Significance tests
|
||||||
|
==================
|
||||||
|
|
||||||
|
Functions
|
||||||
|
---------
|
||||||
|
|
||||||
|
.. autofunction:: yli.anova_oneway
|
||||||
|
|
||||||
|
.. autofunction:: yli.chi2
|
||||||
|
|
||||||
|
.. autofunction:: yli.mannwhitney
|
||||||
|
|
||||||
|
.. autofunction:: yli.pearsonr
|
||||||
|
|
||||||
|
.. autofunction:: yli.ttest_ind
|
||||||
|
|
||||||
|
Result classes
|
||||||
|
--------------
|
||||||
|
|
||||||
|
.. autoclass:: yli.sig_tests.BrunnerMunzelResult
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.sig_tests.FTestResult
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.sig_tests.MannWhitneyResult
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.sig_tests.PearsonChiSquaredResult
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.sig_tests.PearsonRResult
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.sig_tests.TTestResult
|
||||||
|
:members:
|
@ -18,26 +18,45 @@ class BayesFactor:
|
|||||||
"""A Bayes factor"""
|
"""A Bayes factor"""
|
||||||
|
|
||||||
def __init__(self, factor, num_symbol, num_desc, denom_symbol, denom_desc):
|
def __init__(self, factor, num_symbol, num_desc, denom_symbol, denom_desc):
|
||||||
|
#: Value of the Bayes factor (*float*)
|
||||||
self.factor = factor
|
self.factor = factor
|
||||||
|
|
||||||
|
#: Symbol representing the hypothesis in the numerator (*str*)
|
||||||
self.num_symbol = num_symbol
|
self.num_symbol = num_symbol
|
||||||
|
#: Description of the hypothesis in the numerator (*str*)
|
||||||
self.num_desc = num_desc
|
self.num_desc = num_desc
|
||||||
|
#: Symbol representing the hypothesis in the denominator (*str*)
|
||||||
self.denom_symbol = denom_symbol
|
self.denom_symbol = denom_symbol
|
||||||
|
#: Description of the hypothesis in the denominator (*str*)
|
||||||
self.denom_desc = denom_desc
|
self.denom_desc = denom_desc
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return 'BF<sub>{0}{1}</sub> = {2:.2f}, {5}<br>H<sub>{0}</sub>: {3}<br>H<sub>{1}</sub>: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=True))
|
return 'BF<sub>{0}{1}</sub> = {2:.2f}, {5}<br>H<sub>{0}</sub>: {3}<br>H<sub>{1}</sub>: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=True))
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the Bayes factor
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
return 'BF{0}{1} = {2:.2f}, {5}\nH{0}: {3}\nH{1}: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=False))
|
return 'BF{0}{1} = {2:.2f}, {5}\nH{0}: {3}\nH{1}: {4}'.format(self.num_symbol, self.denom_symbol, self.factor, self.num_desc, self.denom_desc, self.interpret_lw(html=False))
|
||||||
|
|
||||||
def invert(self):
|
def invert(self):
|
||||||
"""Invert the Bayes factor"""
|
"""
|
||||||
|
Invert the Bayes factor
|
||||||
|
|
||||||
|
:rtype: :class:`BayesFactor`
|
||||||
|
"""
|
||||||
|
|
||||||
return BayesFactor(1 / self.factor, self.denom_symbol, self.denom_desc, self.num_symbol, self.num_desc)
|
return BayesFactor(1 / self.factor, self.denom_symbol, self.denom_desc, self.num_symbol, self.num_desc)
|
||||||
|
|
||||||
def interpret_lw(self, html):
|
def interpret_lw(self, html):
|
||||||
"""Interpret the Bayes factor according to the Lee & Wagenmakers classification scheme"""
|
"""
|
||||||
|
Interpret the Bayes factor according to the Lee & Wagenmakers classification scheme
|
||||||
|
|
||||||
|
:meta private:
|
||||||
|
"""
|
||||||
|
|
||||||
if self.factor == 1:
|
if self.factor == 1:
|
||||||
return 'Evidence favours neither hypothesis to the other'
|
return 'Evidence favours neither hypothesis to the other'
|
||||||
@ -64,11 +83,11 @@ class BayesFactor:
|
|||||||
def bayesfactor_afbf(params, cov, n, hypothesis):
|
def bayesfactor_afbf(params, cov, n, hypothesis):
|
||||||
"""
|
"""
|
||||||
Compute an adjusted fractional Bayes factor for the hypothesis
|
Compute an adjusted fractional Bayes factor for the hypothesis
|
||||||
Using R "BFpack" library
|
|
||||||
|
|
||||||
See R documentation for BFpack.BF
|
Uses R *BFpack* library. See R documentation for *BFpack.BF*.
|
||||||
|
|
||||||
Returns Bayes factor for hypothesis vs its complement
|
:return: Bayes factor for the hypothesis versus its complement
|
||||||
|
:rtype: :class:`yli.bayes_factors.BayesFactor`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import rpy2.robjects as ro
|
import rpy2.robjects as ro
|
||||||
|
@ -15,18 +15,18 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
"""Stores global configuration for the library"""
|
"""Global configuration for the library"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Display at least this many decimal places for p values
|
#: Display at least this many decimal places for *p* values (*int*)
|
||||||
self.pvalue_min_dps = 2
|
self.pvalue_min_dps = 2
|
||||||
# Display at most this many decimal places for p values
|
#: Display at most this many decimal places for *p* values (*int*)
|
||||||
self.pvalue_max_dps = 3
|
self.pvalue_max_dps = 3
|
||||||
# Display a leading zero for p values
|
#: Display a leading zero for *p* values (*bool*)
|
||||||
self.pvalue_leading_zero = True
|
self.pvalue_leading_zero = True
|
||||||
|
|
||||||
# Alpha level for significance tests, confidence intervals
|
#: Alpha level for significance tests, confidence intervals (*float*)
|
||||||
self.alpha = 0.05
|
self.alpha = 0.05
|
||||||
|
|
||||||
"""Global config singleton"""
|
"""Global configuration singleton"""
|
||||||
config = Config()
|
config = Config()
|
||||||
|
@ -23,17 +23,31 @@ from .utils import ConfidenceInterval
|
|||||||
class betarat_gen(stats.rv_continuous):
|
class betarat_gen(stats.rv_continuous):
|
||||||
"""
|
"""
|
||||||
Ratio of 2 independent beta-distributed variables
|
Ratio of 2 independent beta-distributed variables
|
||||||
Ratio of Beta(a1, b1) / Beta(a2, b2)
|
|
||||||
|
|
||||||
References:
|
This is a SciPy *stats.rv_continuous* distribution which takes 4 parameters, *a1*, *b1*, *a2* and *b2*,
|
||||||
1. Pham-Gia T. Distributions of the ratios of independent beta variables and applications. Communications in Statistics: Theory and Methods. 2000;29(12):2693–715. doi: 10.1080/03610920008832632
|
and gives the distribution of Beta(*a1*, *b1*) / Beta(*a2*, *b2*).
|
||||||
2. Weekend Editor. On the ratio of Beta-distributed random variables. Some Weekend Reading. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/
|
|
||||||
|
**References:**
|
||||||
|
|
||||||
|
1. Pham-Gia T. Distributions of the ratios of independent beta variables and applications. *Communications in Statistics: Theory and Methods*. 2000;29(12):2693–715. `doi: 10.1080/03610920008832632 <https://doi.org/10.1080/03610920008832632>`_
|
||||||
|
|
||||||
|
2. Weekend Editor. On the ratio of Beta-distributed random variables. *Some Weekend Reading*. 2021 Sep 13. https://www.someweekendreading.blog/beta-ratios/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO: _rvs based on stats.beta
|
# TODO: _rvs based on stats.beta
|
||||||
|
|
||||||
def from_scipy(self, beta1, beta2):
|
def from_scipy(self, beta1, beta2):
|
||||||
"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
|
"""
|
||||||
|
Construct a new *beta_ratio* distribution from two SciPy beta distributions
|
||||||
|
|
||||||
|
:param beta1: Distribution for the numerator of the ratio
|
||||||
|
:type beta1: frozen SciPy stats.beta
|
||||||
|
:param beta2: Distribution for the denominator of the ratio
|
||||||
|
:type beta2: frozen SciPy stats.beta
|
||||||
|
|
||||||
|
:rtype: Frozen *beta_ratio* distribution
|
||||||
|
"""
|
||||||
|
|
||||||
return self(*beta1.args, *beta2.args)
|
return self(*beta1.args, *beta2.args)
|
||||||
|
|
||||||
def _do_vectorized(self, func, x, a1, b1, a2, b2):
|
def _do_vectorized(self, func, x, a1, b1, a2, b2):
|
||||||
@ -104,20 +118,38 @@ beta_ratio = betarat_gen(name='beta_ratio', a=0) # a = lower bound of support
|
|||||||
class betaoddsrat_gen(stats.rv_continuous):
|
class betaoddsrat_gen(stats.rv_continuous):
|
||||||
"""
|
"""
|
||||||
Ratio of the odds of 2 independent beta-distributed variables
|
Ratio of the odds of 2 independent beta-distributed variables
|
||||||
Ratio of (X/(1-X)) / (Y/(1-Y)), where X ~ Beta(a1, b1), Y ~ Beta(a2, b2)
|
|
||||||
|
|
||||||
Reference:
|
This is a SciPy *stats.rv_continuous* distribution which takes 4 parameters, *a1*, *b1*, *a2* and *b2*,
|
||||||
Hora SC, Kelley GD. Bayesian inference on the odds and risk ratios. Communications in Statistics: Theory and Methods. 1983;12(6):725–38. doi: 10.1080/03610928308828491
|
and gives the distribution of (*X*/(1-*X*)) / (*Y*/(1-*Y*)), where *X* ~ Beta(*a1*, *b1*), and *Y* ~ Beta(*a2*, *b2*).
|
||||||
|
|
||||||
|
**Reference:** Hora SC, Kelley GD. Bayesian inference on the odds and risk ratios. *Communications in Statistics: Theory and Methods*. 1983;12(6):725–38. `doi: 10.1080/03610928308828491 <https://doi.org/10.1080/03610928308828491>`_
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO: _rvs based on stats.beta
|
# TODO: _rvs based on stats.beta
|
||||||
|
|
||||||
def from_scipy(self, beta1, beta2):
|
def from_scipy(self, beta1, beta2):
|
||||||
"""Construct a new beta_ratio distribution from two SciPy beta distributions"""
|
"""
|
||||||
|
Construct a new *beta_oddsratio* distribution from two SciPy beta distributions
|
||||||
|
|
||||||
|
:param beta1: Distribution for the numerator of the ratio
|
||||||
|
:type beta1: frozen SciPy stats.beta
|
||||||
|
:param beta2: Distribution for the denominator of the ratio
|
||||||
|
:type beta2: frozen SciPy stats.beta
|
||||||
|
|
||||||
|
:rtype: Frozen *beta_oddsratio* distribution
|
||||||
|
"""
|
||||||
|
|
||||||
return self(*beta1.args, *beta2.args)
|
return self(*beta1.args, *beta2.args)
|
||||||
|
|
||||||
def set_cdf_terms(self, cdf_terms):
|
def set_cdf_terms(self, cdf_terms):
|
||||||
"""Set the number of terms to use when calculating CDF (see _cdf)"""
|
"""
|
||||||
|
Set the number of terms to use when calculating the CDF
|
||||||
|
|
||||||
|
If *cdf_terms* = *np.inf* (default), the CDF will be computed by numerical integration of the PDF.
|
||||||
|
|
||||||
|
Otherwise, the CDF will be computed by truncating the infinite sum given by Hora & Kelley to the specified number of terms.
|
||||||
|
"""
|
||||||
|
|
||||||
BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms
|
BETA_ODDSRATIO_CDF_TERMS[0] = cdf_terms
|
||||||
|
|
||||||
def _do_vectorized(self, func, x, a1, b1, a2, b2):
|
def _do_vectorized(self, func, x, a1, b1, a2, b2):
|
||||||
@ -179,12 +211,7 @@ class betaoddsrat_gen(stats.rv_continuous):
|
|||||||
return 1 - k * inf_sum
|
return 1 - k * inf_sum
|
||||||
|
|
||||||
def _cdf(self, w, a1, b1, a2, b2):
|
def _cdf(self, w, a1, b1, a2, b2):
|
||||||
"""
|
"""CDF for the distribution"""
|
||||||
CDF for the distribution
|
|
||||||
|
|
||||||
If BETA_ODDSRATIO_CDF_TERMS = np.inf, compute the CDF by integrating the PDF
|
|
||||||
Otherwise, compute the CDF by truncating the infinite sum given by Hora & Kelley
|
|
||||||
"""
|
|
||||||
|
|
||||||
w = np.atleast_1d(w)
|
w = np.atleast_1d(w)
|
||||||
a1 = np.atleast_1d(a1)
|
a1 = np.atleast_1d(a1)
|
||||||
@ -259,10 +286,24 @@ beta_oddsratio = betaoddsrat_gen(name='beta_oddsratio', a=0) # a = lower bound
|
|||||||
|
|
||||||
class transformed_gen(stats.rv_continuous):
|
class transformed_gen(stats.rv_continuous):
|
||||||
"""
|
"""
|
||||||
Represents a transformation, Y, of a "base" random variable, X, where f(Y) = X
|
Represents a transformation, *Y*, of a "base" random variable, *X*
|
||||||
Hence Y.pdf(x) = X.pdf(f(x)) * f'(x)
|
|
||||||
|
|
||||||
e.g. if X is a model parameter, then transformed_dist(X, f=np.exp, fprime=np.exp, finv=np.log) is distribution of the log-parameter
|
This is a SciPy *stats.rv_continuous* distribution which takes parameters as described below.
|
||||||
|
|
||||||
|
The transformation is *f*\ (*Y*) = *X*.
|
||||||
|
Hence *Y*.pdf\ (*x*) = *X*.pdf\ (*f*\ (*x*)) \* *f*'\ (*x*).
|
||||||
|
|
||||||
|
For example, if *X* is a model parameter, then ``transformed_dist(X, f=np.exp, fprime=np.exp, finv=np.log)`` is the distribution of the log-parameter.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
|
||||||
|
* **base** (*frozen SciPy distribution*) – Distribution of the base random variable
|
||||||
|
|
||||||
|
* **f** (*callable*) – Function *f* representing the transformation, which takes values of *Y* to values of *X*
|
||||||
|
|
||||||
|
* **fprime** (*callable*) – Derivative of the function *f*
|
||||||
|
|
||||||
|
* **finv** (*callable*) – Inverse of the function *f*, which takes values of *X* to values of *Y*
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _pdf(self, x, base, f, fprime, finv):
|
def _pdf(self, x, base, f, fprime, finv):
|
||||||
@ -297,7 +338,12 @@ def hdi(distribution, level=None):
|
|||||||
"""
|
"""
|
||||||
Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)
|
Get the highest density interval for the distribution, e.g. for a Bayesian posterior, the highest posterior density interval (HPD/HDI)
|
||||||
|
|
||||||
level: Coverage/confidence probability, default (None) is 1 - config.alpha
|
:param distribution: Distribution to compute the interval for
|
||||||
|
:type distribution: frozen SciPy distribution
|
||||||
|
:param level: Coverage/confidence probability, default (*None*) is 1 − *config.alpha*
|
||||||
|
:type level: float
|
||||||
|
|
||||||
|
:rtype: :class:`yli.utils.ConfidenceInterval`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if level is None:
|
if level is None:
|
||||||
|
36
yli/io.py
36
yli/io.py
@ -23,9 +23,16 @@ def pickle_write_encrypted(df, fname):
|
|||||||
"""
|
"""
|
||||||
Write the DataFrame to an encrypted file
|
Write the DataFrame to an encrypted file
|
||||||
|
|
||||||
DataFrame is serialised with Pickle, compressed with LZMA, then encrypted
|
Prompts the user for a password.
|
||||||
Encryption is AES-256-CTR
|
|
||||||
Password is derived using scrypt KDF
|
The DataFrame is serialised with *pickle*, compressed with LZMA, then encrypted.
|
||||||
|
Encryption uses AES-256-CTR.
|
||||||
|
Encryption key is derived from the password using the scrypt KDF.
|
||||||
|
|
||||||
|
:param df: Data to serialise
|
||||||
|
:type df: DataFrame
|
||||||
|
:param fname: Filename to write data to
|
||||||
|
:type fname: str
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from Crypto.Cipher import AES
|
from Crypto.Cipher import AES
|
||||||
@ -56,7 +63,12 @@ def pickle_write_compressed(df, fname):
|
|||||||
"""
|
"""
|
||||||
Write the DataFrame to a compressed file
|
Write the DataFrame to a compressed file
|
||||||
|
|
||||||
DataFrame is serialised with Pickle and compressed with LZMA
|
The DataFrame is serialised with *pickle*, then compressed with LZMA.
|
||||||
|
|
||||||
|
:param df: Data to serialise
|
||||||
|
:type df: DataFrame
|
||||||
|
:param fname: Filename to write data to
|
||||||
|
:type fname: str
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Serialise Pandas and compress
|
# Serialise Pandas and compress
|
||||||
@ -67,6 +79,13 @@ def pickle_write_compressed(df, fname):
|
|||||||
def pickle_read_encrypted(fname):
|
def pickle_read_encrypted(fname):
|
||||||
"""
|
"""
|
||||||
Read a DataFrame from an encrypted file
|
Read a DataFrame from an encrypted file
|
||||||
|
|
||||||
|
See :meth:`yli.pickle_write_encrypted`.
|
||||||
|
|
||||||
|
:param fname: Filename to read data from
|
||||||
|
:type fname: str
|
||||||
|
|
||||||
|
:rtype: DataFrame
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from Crypto.Cipher import AES
|
from Crypto.Cipher import AES
|
||||||
@ -91,7 +110,14 @@ def pickle_read_encrypted(fname):
|
|||||||
|
|
||||||
def pickle_read_compressed(fname):
|
def pickle_read_compressed(fname):
|
||||||
"""
|
"""
|
||||||
Read a DataFrame from an compressed (but not encrypted) file
|
Read a DataFrame from a compressed file
|
||||||
|
|
||||||
|
See :meth:`yli.pickle_write_compressed`.
|
||||||
|
|
||||||
|
:param fname: Filename to read data from
|
||||||
|
:type fname: str
|
||||||
|
|
||||||
|
:rtype: DataFrame
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Uncompress and deserialise to Pandas
|
# Uncompress and deserialise to Pandas
|
||||||
|
145
yli/regress.py
145
yli/regress.py
@ -30,11 +30,17 @@ from .config import config
|
|||||||
from .sig_tests import FTestResult
|
from .sig_tests import FTestResult
|
||||||
from .utils import Estimate, check_nan, cols_for_formula, fmt_p, formula_factor_ref_category, parse_patsy_term
|
from .utils import Estimate, check_nan, cols_for_formula, fmt_p, formula_factor_ref_category, parse_patsy_term
|
||||||
|
|
||||||
def vif(df, formula=None, nan_policy='warn'):
|
def vif(df, formula=None, *, nan_policy='warn'):
|
||||||
"""
|
"""
|
||||||
Calculate the variance inflation factor for each variable in df
|
Calculate the variance inflation factor (VIF) for each variable in *df*
|
||||||
|
|
||||||
formula: If specified, calculate the VIF only for the variables in the formula
|
:param df: Data to calculate the VIF for
|
||||||
|
:type df: DataFrame
|
||||||
|
:param formula: If specified, calculate the VIF only for the variables in the formula
|
||||||
|
:type formula: str
|
||||||
|
|
||||||
|
:return: The variance inflation factor
|
||||||
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if formula:
|
if formula:
|
||||||
@ -63,24 +69,37 @@ def vif(df, formula=None, nan_policy='warn'):
|
|||||||
# Regression
|
# Regression
|
||||||
|
|
||||||
class LikelihoodRatioTestResult:
|
class LikelihoodRatioTestResult:
|
||||||
"""Result of a likelihood ratio test for regression"""
|
"""
|
||||||
|
Result of a likelihood ratio test for regression
|
||||||
|
|
||||||
|
See :meth:`RegressionResult.lrtest_null`.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, statistic, dof, pvalue):
|
def __init__(self, statistic, dof, pvalue):
|
||||||
|
#: Likelihood ratio test statistic (*float*)
|
||||||
self.statistic = statistic
|
self.statistic = statistic
|
||||||
|
#: Degrees of freedom for the likelihood ratio test statistic (*int*)
|
||||||
self.dof = dof
|
self.dof = dof
|
||||||
|
#: *p* value for the likelihood ratio test (*float*)
|
||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return 'LR({}) = {:.2f}; <i>p</i> {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True))
|
return 'LR({}) = {:.2f}; <i>p</i> {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the likelihood ratio test
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
class RegressionResult:
|
class RegressionResult:
|
||||||
"""
|
"""
|
||||||
Result of a regression
|
Result of a regression
|
||||||
|
|
||||||
llf/llnull: Log-likelihood of model/null model
|
See :func:`yli.regress`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -92,44 +111,63 @@ class RegressionResult:
|
|||||||
dof_resid, rsquared, f_statistic,
|
dof_resid, rsquared, f_statistic,
|
||||||
exp
|
exp
|
||||||
):
|
):
|
||||||
# A copy of the raw result so we can access it
|
#: Raw result from statsmodels *model.fit*
|
||||||
self.raw_result = raw_result
|
self.raw_result = raw_result
|
||||||
|
|
||||||
# Information for display
|
# Information for display
|
||||||
|
#: Full name of the regression model type (*str*)
|
||||||
self.full_name = full_name
|
self.full_name = full_name
|
||||||
|
#: Short name of the regression model type (*str*)
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
|
#: Method for fitting the regression model (*str*)
|
||||||
self.fit_method = fit_method
|
self.fit_method = fit_method
|
||||||
|
|
||||||
# Basic fitted model information
|
# Basic fitted model information
|
||||||
|
#: Name of the dependent variable (*str*)
|
||||||
self.dep = dep
|
self.dep = dep
|
||||||
|
#: Number of observations (*int*)
|
||||||
self.nobs = nobs
|
self.nobs = nobs
|
||||||
|
#: Degrees of freedom for the model (*int*)
|
||||||
self.dof_model = dof_model
|
self.dof_model = dof_model
|
||||||
|
#: Date and time of fitting the model (Python *datetime*)
|
||||||
self.fitted_dt = fitted_dt
|
self.fitted_dt = fitted_dt
|
||||||
|
#: Method for computing the covariance matrix (*str*)
|
||||||
self.cov_type = cov_type
|
self.cov_type = cov_type
|
||||||
|
|
||||||
# Regression coefficients/p values
|
# Regression coefficients/p values
|
||||||
|
#: Coefficients and *p* values for each term in the model (*dict* of :class:`SingleTerm` or :class:`CategoricalTerm`)
|
||||||
self.terms = terms
|
self.terms = terms
|
||||||
|
|
||||||
# Model log-likelihood
|
# Model log-likelihood
|
||||||
|
#: Log-likelihood of fitted model (*float*)
|
||||||
self.llf = llf
|
self.llf = llf
|
||||||
|
#: Log-likelihood of null model (*float*)
|
||||||
self.llnull = llnull
|
self.llnull = llnull
|
||||||
|
|
||||||
# Extra statistics (not all regression models have these)
|
# Extra statistics (not all regression models have these)
|
||||||
|
#: Degrees of freedom for the residuals (*int*; *None* if N/A)
|
||||||
self.dof_resid = dof_resid
|
self.dof_resid = dof_resid
|
||||||
|
#: *R*:sup:`2` statistic (*float*; *None* if N/A)
|
||||||
self.rsquared = rsquared
|
self.rsquared = rsquared
|
||||||
|
#: *F* statistic (*float*; *None* if N/A)
|
||||||
self.f_statistic = f_statistic
|
self.f_statistic = f_statistic
|
||||||
|
|
||||||
# Config for display style
|
# Config for display style
|
||||||
|
#: See :func:`yli.regress`
|
||||||
self.exp = exp
|
self.exp = exp
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pseudo_rsquared(self):
|
def pseudo_rsquared(self):
|
||||||
"""McFadden's pseudo R-squared"""
|
"""McFadden's pseudo *R*:sup:`2` statistic"""
|
||||||
|
|
||||||
return 1 - self.llf/self.llnull
|
return 1 - self.llf/self.llnull
|
||||||
|
|
||||||
def lrtest_null(self):
|
def lrtest_null(self):
|
||||||
"""Compute the likelihood ratio test comparing the model to the null model"""
|
"""
|
||||||
|
Compute the likelihood ratio test comparing the model to the null model
|
||||||
|
|
||||||
|
:rtype: :class:`LikelihoodRatioTestResult`
|
||||||
|
"""
|
||||||
|
|
||||||
statistic = -2 * (self.llnull - self.llf)
|
statistic = -2 * (self.llnull - self.llf)
|
||||||
pvalue = 1 - stats.chi2.cdf(statistic, self.dof_model)
|
pvalue = 1 - stats.chi2.cdf(statistic, self.dof_model)
|
||||||
@ -137,17 +175,28 @@ class RegressionResult:
|
|||||||
return LikelihoodRatioTestResult(statistic, self.dof_model, pvalue)
|
return LikelihoodRatioTestResult(statistic, self.dof_model, pvalue)
|
||||||
|
|
||||||
def ftest(self):
|
def ftest(self):
|
||||||
"""Perform the F test that all slopes are 0"""
|
"""
|
||||||
|
Perform the *F* test that all slopes are 0
|
||||||
|
|
||||||
|
:rtype: :class:`yli.sig_tests.FTestResult`
|
||||||
|
"""
|
||||||
|
|
||||||
pvalue = 1 - stats.f(self.dof_model, self.dof_resid).cdf(self.f_statistic)
|
pvalue = 1 - stats.f(self.dof_model, self.dof_resid).cdf(self.f_statistic)
|
||||||
return FTestResult(self.f_statistic, self.dof_model, self.dof_resid, pvalue)
|
return FTestResult(self.f_statistic, self.dof_model, self.dof_resid, pvalue)
|
||||||
|
|
||||||
def bayesfactor_beta_zero(self, term):
|
def bayesfactor_beta_zero(self, term):
|
||||||
"""
|
"""
|
||||||
Compute Bayes factor testing the hypothesis that the given beta is 0
|
Compute a Bayes factor testing the hypothesis that the given beta is 0
|
||||||
Requires statsmodels regression
|
|
||||||
|
|
||||||
term: Raw name (from statsmodels, available via RegressionResult.raw_result) of term to be tested
|
Uses the R *BFpack* library.
|
||||||
|
|
||||||
|
Requires the regression to be from statsmodels.
|
||||||
|
The term must be specified as the *raw name* from the statsmodels regression, available via :attr:`RegressionResult.raw_result`.
|
||||||
|
|
||||||
|
:param term: Raw name of the term to be tested
|
||||||
|
:type term: str
|
||||||
|
|
||||||
|
:rtype: :class:`yli.bayes_factors.BayesFactor`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# FIXME: Allow specifying our renamed terms
|
# FIXME: Allow specifying our renamed terms
|
||||||
@ -264,6 +313,12 @@ class RegressionResult:
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the regression model
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
# Render header table
|
# Render header table
|
||||||
left_col, right_col = self._header_table(html=False)
|
left_col, right_col = self._header_table(html=False)
|
||||||
|
|
||||||
@ -325,24 +380,23 @@ class RegressionResult:
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
class SingleTerm:
|
class SingleTerm:
|
||||||
"""
|
"""A term in a :class:`RegressionResult` which is a single term"""
|
||||||
A term in a RegressionResult which is a single term
|
|
||||||
|
|
||||||
raw_name: The raw name of the term (e.g. in RegressionResult.raw_result data)
|
|
||||||
beta: An Estimate of the coefficient
|
|
||||||
pvalue: The p value
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, raw_name, beta, pvalue):
|
def __init__(self, raw_name, beta, pvalue):
|
||||||
|
#: Raw name of the term (*str*; e.g. in :attr:`RegressionResult.raw_result`)
|
||||||
self.raw_name = raw_name
|
self.raw_name = raw_name
|
||||||
|
#: :class:`yli.utils.Estimate` of the coefficient
|
||||||
self.beta = beta
|
self.beta = beta
|
||||||
|
#: *p* value for the coefficient (*float*)
|
||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
|
||||||
class CategoricalTerm:
|
class CategoricalTerm:
|
||||||
"""A group of terms in a RegressionResult corresponding to a categorical variable"""
|
"""A group of terms in a :class:`RegressionResult` corresponding to a categorical variable"""
|
||||||
|
|
||||||
def __init__(self, categories, ref_category):
|
def __init__(self, categories, ref_category):
|
||||||
|
#: Terms for each of the categories, excluding the reference category (*dict* of :class:`SingleTerm`)
|
||||||
self.categories = categories
|
self.categories = categories
|
||||||
|
#: Name of the reference category (*str*)
|
||||||
self.ref_category = ref_category
|
self.ref_category = ref_category
|
||||||
|
|
||||||
def regress(
|
def regress(
|
||||||
@ -356,8 +410,30 @@ def regress(
|
|||||||
"""
|
"""
|
||||||
Fit a statsmodels regression model
|
Fit a statsmodels regression model
|
||||||
|
|
||||||
bool_baselevels: Show reference categories for boolean independent variables even if reference category is False
|
:param model_class: Type of regression model to fit
|
||||||
exp: Report exponentiated parameters rather than raw parameters
|
:type model_class: statsmodels model class
|
||||||
|
:param df: Data to perform regression on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param dep: Column in *df* for the dependent variable (numeric)
|
||||||
|
:type dep: str
|
||||||
|
:param formula: Patsy formula for the regression model
|
||||||
|
:type formula: str
|
||||||
|
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
|
||||||
|
:type nan_policy: str
|
||||||
|
:param model_kwargs: Keyword arguments to pass to *model_class* constructor
|
||||||
|
:type model_kwargs: dict
|
||||||
|
:param fit_kwargs: Keyword arguments to pass to *model.fit*
|
||||||
|
:type fit_kwargs: dict
|
||||||
|
:param family: See statsmodels *GLM* constructor
|
||||||
|
:param cov_type: See statsmodels *model.fit*
|
||||||
|
:param maxiter: See statsmodels *model.fit*
|
||||||
|
:param start_params: See statsmodels *model.fit*
|
||||||
|
:param bool_baselevels: Show reference categories for boolean independent variables even if reference category is *False*
|
||||||
|
:type bool_baselevels: bool
|
||||||
|
:param exp: Report exponentiated parameters rather than raw parameters, default (*None*) is to autodetect based on *model_class*
|
||||||
|
:type exp: bool
|
||||||
|
|
||||||
|
:rtype: :class:`yli.regress.RegressionResult`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Populate model_kwargs
|
# Populate model_kwargs
|
||||||
@ -473,7 +549,23 @@ def regress(
|
|||||||
)
|
)
|
||||||
|
|
||||||
def logit_then_regress(model_class, df, dep, formula, *, nan_policy='warn', **kwargs):
|
def logit_then_regress(model_class, df, dep, formula, *, nan_policy='warn', **kwargs):
|
||||||
"""Perform logistic regression, then use parameters as start parameters for desired regression"""
|
"""
|
||||||
|
Perform logistic regression, then use parameters as start parameters for desired regression
|
||||||
|
|
||||||
|
:param model_class: Type of regression model to fit
|
||||||
|
:type model_class: statsmodels model class
|
||||||
|
:param df: Data to perform regression on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param dep: Column in *df* for the dependent variable (numeric)
|
||||||
|
:type dep: str
|
||||||
|
:param formula: Patsy formula for the regression model
|
||||||
|
:type formula: str
|
||||||
|
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
|
||||||
|
:type nan_policy: str
|
||||||
|
:param kwargs: Passed through to :func:`yli.regress`
|
||||||
|
|
||||||
|
:rtype: :class:`yli.regress.RegressionResult`
|
||||||
|
"""
|
||||||
|
|
||||||
# Check for/clean NaNs
|
# Check for/clean NaNs
|
||||||
# Do this once here so we only get 1 warning
|
# Do this once here so we only get 1 warning
|
||||||
@ -496,10 +588,11 @@ def logit_then_regress(model_class, df, dep, formula, *, nan_policy='warn', **kw
|
|||||||
|
|
||||||
class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
|
class PenalisedLogit(statsmodels.discrete.discrete_model.BinaryModel):
|
||||||
"""
|
"""
|
||||||
Statsmodel-compatible model for computing Firth penalised logistic regression
|
statsmodel-compatible model for computing Firth penalised logistic regression
|
||||||
Uses R "logistf" library
|
|
||||||
|
|
||||||
NB: This class expects to be used in the context of yli.regress()
|
Uses the R *logistf* library.
|
||||||
|
|
||||||
|
This class should be used in conjunction with :func:`yli.regress`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def fit(self, disp=False):
|
def fit(self, disp=False):
|
||||||
|
179
yli/sig_tests.py
179
yli/sig_tests.py
@ -30,26 +30,50 @@ from .utils import Estimate, as_2groups, check_nan, fmt_p
|
|||||||
|
|
||||||
class TTestResult:
|
class TTestResult:
|
||||||
"""
|
"""
|
||||||
Result of a Student's t test
|
Result of a Student's *t* test
|
||||||
|
|
||||||
delta: Mean difference
|
See :func:`yli.ttest_ind`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, statistic, dof, pvalue, delta, delta_direction):
|
def __init__(self, statistic, dof, pvalue, delta, delta_direction):
|
||||||
|
#: *t* statistic (*float*)
|
||||||
self.statistic = statistic
|
self.statistic = statistic
|
||||||
|
#: Degrees of freedom of the *t* distribution (*int*)
|
||||||
self.dof = dof
|
self.dof = dof
|
||||||
|
#: *p* value for the *t* statistic (*float*)
|
||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
#: Absolute value of the mean difference (:class:`yli.utils.Estimate`)
|
||||||
self.delta = delta
|
self.delta = delta
|
||||||
|
#: Description of the direction of the effect (*str*)
|
||||||
self.delta_direction = delta_direction
|
self.delta_direction = delta_direction
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return '<i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br>Δ<i>μ</i> ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
|
return '<i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br>Δ<i>μ</i> ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the *t* test
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
return 't({:.0f}) = {:.2f}; p {}\nΔμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
|
return 't({:.0f}) = {:.2f}; p {}\nΔμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
|
||||||
|
|
||||||
def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
||||||
"""Perform an independent-sample Student's t test"""
|
"""
|
||||||
|
Perform an independent 2-sample Student's *t* test
|
||||||
|
|
||||||
|
:param df: Data to perform the test on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param dep: Column in *df* for the dependent variable (numeric)
|
||||||
|
:type dep: str
|
||||||
|
:param ind: Column in *df* for the independent variable (dichotomous)
|
||||||
|
:type ind: str
|
||||||
|
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
|
||||||
|
:type nan_policy: str
|
||||||
|
|
||||||
|
:rtype: :class:`yli.sig_tests.TTestResult`
|
||||||
|
"""
|
||||||
|
|
||||||
# Check for/clean NaNs
|
# Check for/clean NaNs
|
||||||
df = check_nan(df[[ind, dep]], nan_policy)
|
df = check_nan(df[[ind, dep]], nan_policy)
|
||||||
@ -78,22 +102,49 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
|||||||
# One-way ANOVA
|
# One-way ANOVA
|
||||||
|
|
||||||
class FTestResult:
|
class FTestResult:
|
||||||
"""Result of an F test for ANOVA/regression"""
|
"""
|
||||||
|
Result of an *F* test for ANOVA/regression
|
||||||
|
|
||||||
|
See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionResult.ftest`.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, statistic, dof1, dof2, pvalue):
|
def __init__(self, statistic, dof1, dof2, pvalue):
|
||||||
|
#: *F* statistic (*float*)
|
||||||
self.statistic = statistic
|
self.statistic = statistic
|
||||||
|
#: Degrees of freedom in the *F* distribution numerator (*int*)
|
||||||
self.dof1 = dof1
|
self.dof1 = dof1
|
||||||
|
#: Degrees of freedom in the *F* distribution denominator (*int*)
|
||||||
self.dof2 = dof2
|
self.dof2 = dof2
|
||||||
|
#: *p* value for the *F* statistic (*float*)
|
||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return '<i>F</i>({}, {}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True))
|
return '<i>F</i>({}, {}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the *F* test
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
return 'F({}, {}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False))
|
return 'F({}, {}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
def anova_oneway(df, dep, ind, *, nan_policy='omit'):
|
def anova_oneway(df, dep, ind, *, nan_policy='omit'):
|
||||||
"""Perform one-way ANOVA"""
|
"""
|
||||||
|
Perform one-way ANOVA
|
||||||
|
|
||||||
|
:param df: Data to perform the test on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param dep: Column in *df* for the dependent variable (numeric)
|
||||||
|
:type dep: str
|
||||||
|
:param ind: Column in *df* for the independent variable (categorical)
|
||||||
|
:type ind: str
|
||||||
|
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
|
||||||
|
:type nan_policy: str
|
||||||
|
|
||||||
|
:rtype: :class:`yli.sig_tests.FTestResult`
|
||||||
|
"""
|
||||||
|
|
||||||
# Check for/clean NaNs
|
# Check for/clean NaNs
|
||||||
df = check_nan(df[[ind, dep]], nan_policy)
|
df = check_nan(df[[ind, dep]], nan_policy)
|
||||||
@ -115,16 +166,21 @@ def anova_oneway(df, dep, ind, *, nan_policy='omit'):
|
|||||||
|
|
||||||
class MannWhitneyResult:
|
class MannWhitneyResult:
|
||||||
"""
|
"""
|
||||||
Result of a Mann-Whitney test
|
Result of a Mann-Whitney *U* test
|
||||||
|
|
||||||
brunnermunzel: BrunnerMunzelResult on same data
|
See :func:`yli.mannwhitney`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, statistic, pvalue, rank_biserial, direction, brunnermunzel=None):
|
def __init__(self, statistic, pvalue, rank_biserial, direction, brunnermunzel=None):
|
||||||
|
#: Mann–Whitney *U* statistic (*float*)
|
||||||
self.statistic = statistic
|
self.statistic = statistic
|
||||||
|
#: *p* value for the *U* statistic (*float*)
|
||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
#: Absolute value of the rank-biserial correlation (*float*)
|
||||||
self.rank_biserial = rank_biserial
|
self.rank_biserial = rank_biserial
|
||||||
|
#: Description of the direction of the effect (*str*)
|
||||||
self.direction = direction
|
self.direction = direction
|
||||||
|
#: :class:`BrunnerMunzelResult` on the same data, or *None* if N/A
|
||||||
self.brunnermunzel = brunnermunzel
|
self.brunnermunzel = brunnermunzel
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
@ -135,6 +191,12 @@ class MannWhitneyResult:
|
|||||||
return line1
|
return line1
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the Mann–Whitney test
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
line1 = 'U = {:.1f}; p {}\nr = {}, {}'.format(self.statistic, fmt_p(self.pvalue, html=False), self.rank_biserial, self.direction)
|
line1 = 'U = {:.1f}; p {}\nr = {}, {}'.format(self.statistic, fmt_p(self.pvalue, html=False), self.rank_biserial, self.direction)
|
||||||
if self.brunnermunzel:
|
if self.brunnermunzel:
|
||||||
return line1 + '\n' + self.brunnermunzel.summary()
|
return line1 + '\n' + self.brunnermunzel.summary()
|
||||||
@ -142,24 +204,55 @@ class MannWhitneyResult:
|
|||||||
return line1
|
return line1
|
||||||
|
|
||||||
class BrunnerMunzelResult:
|
class BrunnerMunzelResult:
|
||||||
|
"""
|
||||||
|
Result of a Brunner–Munzel test
|
||||||
|
|
||||||
|
See :func:`yli.mannwhitney`. This library calls the Brunner–Munzel test statistic *W*.
|
||||||
|
"""
|
||||||
|
|
||||||
"""Result of a Brunner-Munzel test"""
|
"""Result of a Brunner-Munzel test"""
|
||||||
|
|
||||||
def __init__(self, statistic, pvalue):
|
def __init__(self, statistic, pvalue):
|
||||||
|
#: *W* statistic (*float*)
|
||||||
self.statistic = statistic
|
self.statistic = statistic
|
||||||
|
#: *p* value for the *W* statistic (*float*)
|
||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return '<i>W</i> = {:.1f}; <i>p</i> {}'.format(self.statistic, fmt_p(self.pvalue, html=True))
|
return '<i>W</i> = {:.1f}; <i>p</i> {}'.format(self.statistic, fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the Brunner–Munzel test
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, html=False))
|
return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'):
|
def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'):
|
||||||
"""
|
"""
|
||||||
Perform a Mann-Whitney test
|
Perform a Mann-Whitney *U* test
|
||||||
|
|
||||||
brunnermunzel: Set to False to skip the Brunner-Munzel test
|
By default, this function performs a Brunner–Munzel test if the Mann–Whitney test is significant.
|
||||||
use_continuity, alternative, method: See scipy.stats.mannwhitneyu
|
If the Mann–Whitney test is significant but the Brunner–Munzel test is not, a warning is raised.
|
||||||
|
The Brunner–Munzel test is returned only if non-significant.
|
||||||
|
|
||||||
|
:param df: Data to perform the test on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param dep: Column in *df* for the dependent variable (numeric)
|
||||||
|
:type dep: str
|
||||||
|
:param ind: Column in *df* for the independent variable (dichotomous)
|
||||||
|
:type ind: str
|
||||||
|
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
|
||||||
|
:type nan_policy: str
|
||||||
|
:param brunnermunzel: Whether to compute the Brunner–Munzel test if the Mann–Whitney test is significant
|
||||||
|
:type brunnermunzel: bool
|
||||||
|
:param use_continuity: See *scipy.stats.mannwhitneyu*
|
||||||
|
:param alternative: See *scipy.stats.mannwhitneyu*
|
||||||
|
:param method: See *scipy.stats.mannwhitneyu*
|
||||||
|
|
||||||
|
:rtype: :class:`yli.sig_tests.MannWhitneyResult`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check for/clean NaNs
|
# Check for/clean NaNs
|
||||||
@ -198,14 +291,24 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
|
|||||||
# Pearson chi-squared test
|
# Pearson chi-squared test
|
||||||
|
|
||||||
class PearsonChiSquaredResult:
|
class PearsonChiSquaredResult:
|
||||||
"""Result of a Pearson chi-squared test"""
|
"""
|
||||||
|
Result of a Pearson *χ*:sup:`2` test
|
||||||
|
|
||||||
|
See :func:`yli.chi2`.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, ct, statistic, dof, pvalue, oddsratio=None, riskratio=None):
|
def __init__(self, ct, statistic, dof, pvalue, oddsratio=None, riskratio=None):
|
||||||
|
#: Contingency table for the observations (*DataFrame*)
|
||||||
self.ct = ct
|
self.ct = ct
|
||||||
|
#: *χ*:sup:`2` statistic (*float*)
|
||||||
self.statistic = statistic
|
self.statistic = statistic
|
||||||
|
#: Degrees of freedom for the *χ*:sup:`2` distribution (*int*)
|
||||||
self.dof = dof
|
self.dof = dof
|
||||||
|
#: *p* value for the *χ*:sup:`2` test (*float*)
|
||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
#: Odds ratio (*float*; *None* if not a 2×2 table)
|
||||||
self.oddsratio = oddsratio
|
self.oddsratio = oddsratio
|
||||||
|
#: Risk ratio (*float*; *None* if not a 2×2 table)
|
||||||
self.riskratio = riskratio
|
self.riskratio = riskratio
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
@ -217,6 +320,12 @@ class PearsonChiSquaredResult:
|
|||||||
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True))
|
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the *χ*:sup:`2` test
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
if self.oddsratio is not None:
|
if self.oddsratio is not None:
|
||||||
return '{0}\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format(
|
return '{0}\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format(
|
||||||
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
|
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
|
||||||
@ -225,7 +334,24 @@ class PearsonChiSquaredResult:
|
|||||||
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
def chi2(df, dep, ind, *, nan_policy='warn'):
|
def chi2(df, dep, ind, *, nan_policy='warn'):
|
||||||
"""Perform a Pearson chi-squared test"""
|
"""
|
||||||
|
Perform a Pearson *χ*:sup:`2` test
|
||||||
|
|
||||||
|
If a 2×2 contingency table is obtained (i.e. if both variables are dichotomous), an odds ratio and risk ratio are calculated.
|
||||||
|
The ratios are calculated for the higher-valued value in each variable (i.e. ``True`` compared with ``False`` for a boolean).
|
||||||
|
The risk ratio is calculated relative to the independent variable.
|
||||||
|
|
||||||
|
:param df: Data to perform the test on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param dep: Column in *df* for the dependent variable (categorical)
|
||||||
|
:type dep: str
|
||||||
|
:param ind: Column in *df* for the independent variable (categorical)
|
||||||
|
:type ind: str
|
||||||
|
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
|
||||||
|
:type nan_policy: str
|
||||||
|
|
||||||
|
:rtype: :class:`yli.sig_tests.PearsonChiSquaredResult`
|
||||||
|
"""
|
||||||
|
|
||||||
# Check for/clean NaNs
|
# Check for/clean NaNs
|
||||||
df = check_nan(df[[ind, dep]], nan_policy)
|
df = check_nan(df[[ind, dep]], nan_policy)
|
||||||
@ -266,20 +392,45 @@ def chi2(df, dep, ind, *, nan_policy='warn'):
|
|||||||
# Pearson correlation
|
# Pearson correlation
|
||||||
|
|
||||||
class PearsonRResult:
|
class PearsonRResult:
|
||||||
"""Result of Pearson correlation"""
|
"""
|
||||||
|
Result of Pearson correlation
|
||||||
|
|
||||||
|
See :func:`yli.pearsonr`.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, statistic, pvalue):
|
def __init__(self, statistic, pvalue):
|
||||||
|
#: Pearson *r* correlation statistic (*float*)
|
||||||
self.statistic = statistic
|
self.statistic = statistic
|
||||||
|
#: *p* value for the *r* statistic (*float*)
|
||||||
self.pvalue = pvalue
|
self.pvalue = pvalue
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return '<i>r</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=True))
|
return '<i>r</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=True))
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the Pearson correlation
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=False))
|
return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=False))
|
||||||
|
|
||||||
def pearsonr(df, dep, ind, *, nan_policy='warn'):
|
def pearsonr(df, dep, ind, *, nan_policy='warn'):
|
||||||
"""Compute the Pearson correlation coefficient (Pearson's r)"""
|
"""
|
||||||
|
Compute the Pearson correlation coefficient (Pearson's r)
|
||||||
|
|
||||||
|
:param df: Data to perform the test on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param dep: Column in *df* for the dependent variable (numerical)
|
||||||
|
:type dep: str
|
||||||
|
:param ind: Column in *df* for the independent variable (numerical)
|
||||||
|
:type ind: str
|
||||||
|
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
|
||||||
|
:type nan_policy: str
|
||||||
|
|
||||||
|
:rtype: :class:`yli.sig_tests.PearsonRResult`
|
||||||
|
"""
|
||||||
|
|
||||||
# Check for/clean NaNs
|
# Check for/clean NaNs
|
||||||
df = check_nan(df[[ind, dep]], nan_policy)
|
df = check_nan(df[[ind, dep]], nan_policy)
|
||||||
|
18
yli/utils.py
18
yli/utils.py
@ -143,26 +143,44 @@ class ConfidenceInterval:
|
|||||||
"""A confidence interval"""
|
"""A confidence interval"""
|
||||||
|
|
||||||
def __init__(self, lower, upper):
|
def __init__(self, lower, upper):
|
||||||
|
#: Lower confidence limit (*float*)
|
||||||
self.lower = lower
|
self.lower = lower
|
||||||
|
#: Upper confidence limit (*float*)
|
||||||
self.upper = upper
|
self.upper = upper
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return self.summary()
|
return self.summary()
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the confidence interval
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
return '{:.2f}–{:.2f}'.format(self.lower, self.upper)
|
return '{:.2f}–{:.2f}'.format(self.lower, self.upper)
|
||||||
|
|
||||||
class Estimate:
|
class Estimate:
|
||||||
"""A point estimate and surrounding confidence interval"""
|
"""A point estimate and surrounding confidence interval"""
|
||||||
|
|
||||||
def __init__(self, point, ci_lower, ci_upper):
|
def __init__(self, point, ci_lower, ci_upper):
|
||||||
|
#: Point estimate (*float*)
|
||||||
self.point = point
|
self.point = point
|
||||||
|
#: Lower confidence limit (*float*)
|
||||||
self.ci_lower = ci_lower
|
self.ci_lower = ci_lower
|
||||||
|
#: Upper confidence limit (*float*)
|
||||||
self.ci_upper = ci_upper
|
self.ci_upper = ci_upper
|
||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
return self.summary()
|
return self.summary()
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the estimate and confidence interval
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
return '{:.2f} ({:.2f}–{:.2f})'.format(self.point, self.ci_lower, self.ci_upper)
|
return '{:.2f} ({:.2f}–{:.2f})'.format(self.point, self.ci_lower, self.ci_upper)
|
||||||
|
|
||||||
def __neg__(self):
|
def __neg__(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user