Update documentation
This commit is contained in:
parent
50597ddc74
commit
c6cef4aee7
@ -14,6 +14,8 @@ Most functions take a parameter **nan_policy** to specify how to handle *nan* va
|
|||||||
|
|
||||||
In determining whether there is *nan* in the data, only the columns specified in the function (if applicable) are considered.
|
In determining whether there is *nan* in the data, only the columns specified in the function (if applicable) are considered.
|
||||||
|
|
||||||
|
.. autofunction:: yli.utils.check_nan
|
||||||
|
|
||||||
General result classes
|
General result classes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
@ -12,3 +12,4 @@ scipy-yli API reference
|
|||||||
distributions.rst
|
distributions.rst
|
||||||
bayes_factors.rst
|
bayes_factors.rst
|
||||||
global.rst
|
global.rst
|
||||||
|
internal.rst
|
||||||
|
23
docs/internal.rst
Normal file
23
docs/internal.rst
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
Internal utilities
|
||||||
|
==================
|
||||||
|
|
||||||
|
Data wrangling
|
||||||
|
--------------
|
||||||
|
|
||||||
|
.. autofunction:: yli.utils.as_2groups
|
||||||
|
|
||||||
|
.. autofunction:: yli.utils.convert_pandas_nullable
|
||||||
|
|
||||||
|
*p* values
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. autofunction:: yli.utils.fmt_p
|
||||||
|
|
||||||
|
Formula manipulation
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
.. autofunction:: yli.utils.cols_for_formula
|
||||||
|
|
||||||
|
.. autofunction:: yli.utils.formula_factor_ref_category
|
||||||
|
|
||||||
|
.. autofunction:: yli.utils.parse_patsy_term
|
@ -325,9 +325,7 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
|
|||||||
:param alternative: See *scipy.stats.mannwhitneyu*
|
:param alternative: See *scipy.stats.mannwhitneyu*
|
||||||
:param method: See *scipy.stats.mannwhitneyu*
|
:param method: See *scipy.stats.mannwhitneyu*
|
||||||
|
|
||||||
:return: The result of the Mann–Whitney test.
|
:return: The result of the Mann–Whitney test. The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*, *and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.
|
||||||
The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*,
|
|
||||||
*and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.
|
|
||||||
|
|
||||||
:rtype: :class:`yli.sig_tests.MannWhitneyResult`
|
:rtype: :class:`yli.sig_tests.MannWhitneyResult`
|
||||||
|
|
||||||
|
85
yli/utils.py
85
yli/utils.py
@ -26,7 +26,17 @@ from .config import config
|
|||||||
# Data cleaning and validation
|
# Data cleaning and validation
|
||||||
|
|
||||||
def check_nan(df, nan_policy):
|
def check_nan(df, nan_policy):
|
||||||
"""Check df against nan_policy and return cleaned input"""
|
"""
|
||||||
|
Check df against *nan_policy* and return cleaned input
|
||||||
|
|
||||||
|
:param df: Data to check for NaNs
|
||||||
|
:type df: DataFrame
|
||||||
|
:param nan_policy: Policy to apply when encountering NaN values (*warn*, *raise*, *omit*)
|
||||||
|
:type nan_policy: str
|
||||||
|
|
||||||
|
:return: Data with NaNs removed, which may or may not be copied
|
||||||
|
:rtype: DataFrame
|
||||||
|
"""
|
||||||
|
|
||||||
if nan_policy == 'raise':
|
if nan_policy == 'raise':
|
||||||
if pd.isna(df).any(axis=None):
|
if pd.isna(df).any(axis=None):
|
||||||
@ -43,7 +53,17 @@ def check_nan(df, nan_policy):
|
|||||||
raise Exception('Invalid nan_policy, expected "raise", "warn" or "omit"')
|
raise Exception('Invalid nan_policy, expected "raise", "warn" or "omit"')
|
||||||
|
|
||||||
def convert_pandas_nullable(df):
|
def convert_pandas_nullable(df):
|
||||||
"""Convert pandas nullable dtypes (e.g. Int64) to non-nullable numpy dtypes"""
|
"""
|
||||||
|
Convert pandas nullable dtypes (e.g. *Int64*) to non-nullable numpy dtypes
|
||||||
|
|
||||||
|
Behaviour on encountering *NA* values is undefined, so the data should be passed through :func:`check_nan` first.
|
||||||
|
|
||||||
|
:param df: Data to check for pandas nullable dtypes
|
||||||
|
:type df: DataFrame
|
||||||
|
|
||||||
|
:return: Data with pandas nullable dtypes converted, which may or may not be copied
|
||||||
|
:rtype: DataFrame
|
||||||
|
"""
|
||||||
|
|
||||||
# Avoid copy if possible
|
# Avoid copy if possible
|
||||||
df_cleaned = None
|
df_cleaned = None
|
||||||
@ -59,7 +79,19 @@ def convert_pandas_nullable(df):
|
|||||||
return df_cleaned
|
return df_cleaned
|
||||||
|
|
||||||
def as_2groups(df, data, group):
|
def as_2groups(df, data, group):
|
||||||
"""Group the data by the given variable, ensuring only 2 groups"""
|
"""
|
||||||
|
Group the data by the given variable, asserting only 2 groups
|
||||||
|
|
||||||
|
:param df: Data to group
|
||||||
|
:type df: DataFrame
|
||||||
|
:param group: Column to group by
|
||||||
|
:type group: str
|
||||||
|
|
||||||
|
:return: (*group1*, *data1*, *group2*, *data2*)
|
||||||
|
|
||||||
|
* **group1**, **group2** (*str*) – The 2 values of the grouping variable
|
||||||
|
* **data1**, **data2** (*DataFrame*) – The 2 corresponding subsets of *df*
|
||||||
|
"""
|
||||||
|
|
||||||
# Get groupings
|
# Get groupings
|
||||||
groups = list(df.groupby(group).groups.items())
|
groups = list(df.groupby(group).groups.items())
|
||||||
@ -115,9 +147,19 @@ def do_fmt_p(p):
|
|||||||
|
|
||||||
def fmt_p(p, *, html, only_value=False, tabular=False):
|
def fmt_p(p, *, html, only_value=False, tabular=False):
|
||||||
"""
|
"""
|
||||||
Format p value
|
Format *p* value for display
|
||||||
|
|
||||||
tabular: If true, output in ‘tabular’ format of p values where decimal points align
|
:param p: *p* value to display
|
||||||
|
:type p: float
|
||||||
|
:param html: Whether to output as HTML (*True*) or plaintext (*False*)
|
||||||
|
:type html: bool
|
||||||
|
:param only_value: Whether to display only the value (*True*, e.g. ``0.04``, ``<0.001``) or equality symbol and value (*False*, e.g. ``= 0.04``, ``< 0.001``)
|
||||||
|
:type only_value: bool
|
||||||
|
:param tabular: Whether to pad with spaces so that decimal points align
|
||||||
|
:type tabular: bool
|
||||||
|
|
||||||
|
:return: Formatted *p* value
|
||||||
|
:rtype: str
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# FIXME: Make only_value and tabular enums
|
# FIXME: Make only_value and tabular enums
|
||||||
@ -250,7 +292,17 @@ class Estimate:
|
|||||||
# Patsy formula manipulation
|
# Patsy formula manipulation
|
||||||
|
|
||||||
def cols_for_formula(formula, df):
|
def cols_for_formula(formula, df):
|
||||||
"""Return the columns corresponding to the Patsy formula"""
|
"""
|
||||||
|
Return the columns corresponding to the Patsy formula
|
||||||
|
|
||||||
|
:param formula: Patsy formula to parse
|
||||||
|
:type formula: str
|
||||||
|
:param df: Data to apply the formula on
|
||||||
|
:type df: DataFrame
|
||||||
|
|
||||||
|
:return: Columns in (the right-hand side of) the formula
|
||||||
|
:rtype: List[str]
|
||||||
|
"""
|
||||||
|
|
||||||
# Parse the formula
|
# Parse the formula
|
||||||
model_desc = patsy.ModelDesc.from_formula(formula)
|
model_desc = patsy.ModelDesc.from_formula(formula)
|
||||||
@ -286,7 +338,17 @@ def formula_get_factor_info(formula, df, factor):
|
|||||||
return factor_info
|
return factor_info
|
||||||
|
|
||||||
def formula_factor_ref_category(formula, df, factor):
|
def formula_factor_ref_category(formula, df, factor):
|
||||||
"""Get the reference category for a term in a Patsy formula referring to a categorical factor"""
|
"""
|
||||||
|
Get the reference category for a term in a Patsy formula referring to a categorical factor
|
||||||
|
|
||||||
|
:param formula: Patsy formula to parse
|
||||||
|
:type formula: str
|
||||||
|
:param df: Data to apply the formula on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param factor: Factor to determine reference category for (e.g. ``Country``, ``C(Country)``, ``C(Country, Treatment)``, ``C(Country, Treatment("Australia"))``)
|
||||||
|
|
||||||
|
:return: Reference category for the specified factor
|
||||||
|
"""
|
||||||
|
|
||||||
if '(' in factor and not factor.startswith('C('):
|
if '(' in factor and not factor.startswith('C('):
|
||||||
raise Exception('Attempted to get reference category for unknown expression type "{}"'.format(factor))
|
raise Exception('Attempted to get reference category for unknown expression type "{}"'.format(factor))
|
||||||
@ -319,8 +381,13 @@ def parse_patsy_term(formula, df, term):
|
|||||||
"""
|
"""
|
||||||
Parse a Patsy term into its component parts
|
Parse a Patsy term into its component parts
|
||||||
|
|
||||||
Returns: factor, column, contrast
|
**Example:** The term ``"C(x, Treatment(y))[T.z]"`` parses to ``("C(x, Treatment(y))", "x", "z")``.
|
||||||
e.g. "C(x, Treatment(y))[T.z]" -> "C(x, Treatment(y))", "x", "z"
|
|
||||||
|
:return: (*factor*, *column*, *contrast*)
|
||||||
|
|
||||||
|
* **factor** (*str*) – Name of the factor, as specified in the Patsy formula
|
||||||
|
* **column** (*str*) – Name of the DataFrame column corresponding to the factor
|
||||||
|
* **contrast** (*str*) – Name of the contrast for the factor, or *None* if not applicable
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if '(' not in term:
|
if '(' not in term:
|
||||||
|
Loading…
Reference in New Issue
Block a user