Update documentation
This commit is contained in:
parent
50597ddc74
commit
c6cef4aee7
@ -14,6 +14,8 @@ Most functions take a parameter **nan_policy** to specify how to handle *nan* va
|
||||
|
||||
In determining whether there is *nan* in the data, only the columns specified in the function (if applicable) are considered.
|
||||
|
||||
.. autofunction:: yli.utils.check_nan
|
||||
|
||||
General result classes
|
||||
----------------------
|
||||
|
||||
|
@ -12,3 +12,4 @@ scipy-yli API reference
|
||||
distributions.rst
|
||||
bayes_factors.rst
|
||||
global.rst
|
||||
internal.rst
|
||||
|
23
docs/internal.rst
Normal file
23
docs/internal.rst
Normal file
@ -0,0 +1,23 @@
|
||||
Internal utilities
|
||||
==================
|
||||
|
||||
Data wrangling
|
||||
--------------
|
||||
|
||||
.. autofunction:: yli.utils.as_2groups
|
||||
|
||||
.. autofunction:: yli.utils.convert_pandas_nullable
|
||||
|
||||
*p* values
|
||||
----------
|
||||
|
||||
.. autofunction:: yli.utils.fmt_p
|
||||
|
||||
Formula manipulation
|
||||
--------------------
|
||||
|
||||
.. autofunction:: yli.utils.cols_for_formula
|
||||
|
||||
.. autofunction:: yli.utils.formula_factor_ref_category
|
||||
|
||||
.. autofunction:: yli.utils.parse_patsy_term
|
@ -325,9 +325,7 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
|
||||
:param alternative: See *scipy.stats.mannwhitneyu*
|
||||
:param method: See *scipy.stats.mannwhitneyu*
|
||||
|
||||
:return: The result of the Mann–Whitney test.
|
||||
The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*,
|
||||
*and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.
|
||||
:return: The result of the Mann–Whitney test. The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*, *and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.
|
||||
|
||||
:rtype: :class:`yli.sig_tests.MannWhitneyResult`
|
||||
|
||||
|
85
yli/utils.py
85
yli/utils.py
@ -26,7 +26,17 @@ from .config import config
|
||||
# Data cleaning and validation
|
||||
|
||||
def check_nan(df, nan_policy):
|
||||
"""Check df against nan_policy and return cleaned input"""
|
||||
"""
|
||||
Check df against *nan_policy* and return cleaned input
|
||||
|
||||
:param df: Data to check for NaNs
|
||||
:type df: DataFrame
|
||||
:param nan_policy: Policy to apply when encountering NaN values (*warn*, *raise*, *omit*)
|
||||
:type nan_policy: str
|
||||
|
||||
:return: Data with NaNs removed, which may or may not be copied
|
||||
:rtype: DataFrame
|
||||
"""
|
||||
|
||||
if nan_policy == 'raise':
|
||||
if pd.isna(df).any(axis=None):
|
||||
@ -43,7 +53,17 @@ def check_nan(df, nan_policy):
|
||||
raise Exception('Invalid nan_policy, expected "raise", "warn" or "omit"')
|
||||
|
||||
def convert_pandas_nullable(df):
|
||||
"""Convert pandas nullable dtypes (e.g. Int64) to non-nullable numpy dtypes"""
|
||||
"""
|
||||
Convert pandas nullable dtypes (e.g. *Int64*) to non-nullable numpy dtypes
|
||||
|
||||
Behaviour on encountering *NA* values is undefined, so the data should be passed through :func:`check_nan` first.
|
||||
|
||||
:param df: Data to check for pandas nullable dtypes
|
||||
:type df: DataFrame
|
||||
|
||||
:return: Data with pandas nullable dtypes converted, which may or may not be copied
|
||||
:rtype: DataFrame
|
||||
"""
|
||||
|
||||
# Avoid copy if possible
|
||||
df_cleaned = None
|
||||
@ -59,7 +79,19 @@ def convert_pandas_nullable(df):
|
||||
return df_cleaned
|
||||
|
||||
def as_2groups(df, data, group):
|
||||
"""Group the data by the given variable, ensuring only 2 groups"""
|
||||
"""
|
||||
Group the data by the given variable, asserting only 2 groups
|
||||
|
||||
:param df: Data to group
|
||||
:type df: DataFrame
|
||||
:param group: Column to group by
|
||||
:type group: str
|
||||
|
||||
:return: (*group1*, *data1*, *group2*, *data2*)
|
||||
|
||||
* **group1**, **group2** (*str*) – The 2 values of the grouping variable
|
||||
* **data1**, **data2** (*DataFrame*) – The 2 corresponding subsets of *df*
|
||||
"""
|
||||
|
||||
# Get groupings
|
||||
groups = list(df.groupby(group).groups.items())
|
||||
@ -115,9 +147,19 @@ def do_fmt_p(p):
|
||||
|
||||
def fmt_p(p, *, html, only_value=False, tabular=False):
|
||||
"""
|
||||
Format p value
|
||||
Format *p* value for display
|
||||
|
||||
tabular: If true, output in ‘tabular’ format of p values where decimal points align
|
||||
:param p: *p* value to display
|
||||
:type p: float
|
||||
:param html: Whether to output as HTML (*True*) or plaintext (*False*)
|
||||
:type html: bool
|
||||
:param only_value: Whether to display only the value (*True*, e.g. ``0.04``, ``<0.001``) or equality symbol and value (*False*, e.g. ``= 0.04``, ``< 0.001``)
|
||||
:type only_value: bool
|
||||
:param tabular: Whether to pad with spaces so that decimal points align
|
||||
:type tabular: bool
|
||||
|
||||
:return: Formatted *p* value
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
# FIXME: Make only_value and tabular enums
|
||||
@ -250,7 +292,17 @@ class Estimate:
|
||||
# Patsy formula manipulation
|
||||
|
||||
def cols_for_formula(formula, df):
|
||||
"""Return the columns corresponding to the Patsy formula"""
|
||||
"""
|
||||
Return the columns corresponding to the Patsy formula
|
||||
|
||||
:param formula: Patsy formula to parse
|
||||
:type formula: str
|
||||
:param df: Data to apply the formula on
|
||||
:type df: DataFrame
|
||||
|
||||
:return: Columns in (the right-hand side of) the formula
|
||||
:rtype: List[str]
|
||||
"""
|
||||
|
||||
# Parse the formula
|
||||
model_desc = patsy.ModelDesc.from_formula(formula)
|
||||
@ -286,7 +338,17 @@ def formula_get_factor_info(formula, df, factor):
|
||||
return factor_info
|
||||
|
||||
def formula_factor_ref_category(formula, df, factor):
|
||||
"""Get the reference category for a term in a Patsy formula referring to a categorical factor"""
|
||||
"""
|
||||
Get the reference category for a term in a Patsy formula referring to a categorical factor
|
||||
|
||||
:param formula: Patsy formula to parse
|
||||
:type formula: str
|
||||
:param df: Data to apply the formula on
|
||||
:type df: DataFrame
|
||||
:param factor: Factor to determine reference category for (e.g. ``Country``, ``C(Country)``, ``C(Country, Treatment)``, ``C(Country, Treatment("Australia"))``)
|
||||
|
||||
:return: Reference category for the specified factor
|
||||
"""
|
||||
|
||||
if '(' in factor and not factor.startswith('C('):
|
||||
raise Exception('Attempted to get reference category for unknown expression type "{}"'.format(factor))
|
||||
@ -319,8 +381,13 @@ def parse_patsy_term(formula, df, term):
|
||||
"""
|
||||
Parse a Patsy term into its component parts
|
||||
|
||||
Returns: factor, column, contrast
|
||||
e.g. "C(x, Treatment(y))[T.z]" -> "C(x, Treatment(y))", "x", "z"
|
||||
**Example:** The term ``"C(x, Treatment(y))[T.z]"`` parses to ``("C(x, Treatment(y))", "x", "z")``.
|
||||
|
||||
:return: (*factor*, *column*, *contrast*)
|
||||
|
||||
* **factor** (*str*) – Name of the factor, as specified in the Patsy formula
|
||||
* **column** (*str*) – Name of the DataFrame column corresponding to the factor
|
||||
* **contrast** (*str*) – Name of the contrast for the factor, or *None* if not applicable
|
||||
"""
|
||||
|
||||
if '(' not in term:
|
||||
|
Loading…
Reference in New Issue
Block a user