From c6cef4aee71cfee396bd7bbabef2e534690ef458 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Wed, 9 Nov 2022 17:05:04 +1100 Subject: [PATCH] Update documentation --- docs/general.rst | 2 ++ docs/index.rst | 1 + docs/internal.rst | 23 +++++++++++++ yli/sig_tests.py | 4 +-- yli/utils.py | 85 ++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 103 insertions(+), 12 deletions(-) create mode 100644 docs/internal.rst diff --git a/docs/general.rst b/docs/general.rst index a40a103..0cdfc1a 100644 --- a/docs/general.rst +++ b/docs/general.rst @@ -14,6 +14,8 @@ Most functions take a parameter **nan_policy** to specify how to handle *nan* va In determining whether there is *nan* in the data, only the columns specified in the function (if applicable) are considered. +.. autofunction:: yli.utils.check_nan + General result classes ---------------------- diff --git a/docs/index.rst b/docs/index.rst index 11d7987..0ae28de 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,3 +12,4 @@ scipy-yli API reference distributions.rst bayes_factors.rst global.rst + internal.rst diff --git a/docs/internal.rst b/docs/internal.rst new file mode 100644 index 0000000..56fc5ff --- /dev/null +++ b/docs/internal.rst @@ -0,0 +1,23 @@ +Internal utilities +================== + +Data wrangling +-------------- + +.. autofunction:: yli.utils.as_2groups + +.. autofunction:: yli.utils.convert_pandas_nullable + +*p* values +---------- + +.. autofunction:: yli.utils.fmt_p + +Formula manipulation +-------------------- + +.. autofunction:: yli.utils.cols_for_formula + +.. autofunction:: yli.utils.formula_factor_ref_category + +.. autofunction:: yli.utils.parse_patsy_term diff --git a/yli/sig_tests.py b/yli/sig_tests.py index 92bc5bc..f01cd83 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -325,9 +325,7 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont :param alternative: See *scipy.stats.mannwhitneyu* :param method: See *scipy.stats.mannwhitneyu* - :return: The result of the Mann–Whitney test. - The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*, - *and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant. + :return: The result of the Mann–Whitney test. The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*, *and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant. :rtype: :class:`yli.sig_tests.MannWhitneyResult` diff --git a/yli/utils.py b/yli/utils.py index 9bb0134..94514a2 100644 --- a/yli/utils.py +++ b/yli/utils.py @@ -26,7 +26,17 @@ from .config import config # Data cleaning and validation def check_nan(df, nan_policy): - """Check df against nan_policy and return cleaned input""" + """ + Check df against *nan_policy* and return cleaned input + + :param df: Data to check for NaNs + :type df: DataFrame + :param nan_policy: Policy to apply when encountering NaN values (*warn*, *raise*, *omit*) + :type nan_policy: str + + :return: Data with NaNs removed, which may or may not be copied + :rtype: DataFrame + """ if nan_policy == 'raise': if pd.isna(df).any(axis=None): @@ -43,7 +53,17 @@ def check_nan(df, nan_policy): raise Exception('Invalid nan_policy, expected "raise", "warn" or "omit"') def convert_pandas_nullable(df): - """Convert pandas nullable dtypes (e.g. Int64) to non-nullable numpy dtypes""" + """ + Convert pandas nullable dtypes (e.g. *Int64*) to non-nullable numpy dtypes + + Behaviour on encountering *NA* values is undefined, so the data should be passed through :func:`check_nan` first. + + :param df: Data to check for pandas nullable dtypes + :type df: DataFrame + + :return: Data with pandas nullable dtypes converted, which may or may not be copied + :rtype: DataFrame + """ # Avoid copy if possible df_cleaned = None @@ -59,7 +79,19 @@ def convert_pandas_nullable(df): return df_cleaned def as_2groups(df, data, group): - """Group the data by the given variable, ensuring only 2 groups""" + """ + Group the data by the given variable, asserting only 2 groups + + :param df: Data to group + :type df: DataFrame + :param group: Column to group by + :type group: str + + :return: (*group1*, *data1*, *group2*, *data2*) + + * **group1**, **group2** (*str*) – The 2 values of the grouping variable + * **data1**, **data2** (*DataFrame*) – The 2 corresponding subsets of *df* + """ # Get groupings groups = list(df.groupby(group).groups.items()) @@ -115,9 +147,19 @@ def do_fmt_p(p): def fmt_p(p, *, html, only_value=False, tabular=False): """ - Format p value + Format *p* value for display - tabular: If true, output in ‘tabular’ format of p values where decimal points align + :param p: *p* value to display + :type p: float + :param html: Whether to output as HTML (*True*) or plaintext (*False*) + :type html: bool + :param only_value: Whether to display only the value (*True*, e.g. ``0.04``, ``<0.001``) or equality symbol and value (*False*, e.g. ``= 0.04``, ``< 0.001``) + :type only_value: bool + :param tabular: Whether to pad with spaces so that decimal points align + :type tabular: bool + + :return: Formatted *p* value + :rtype: str """ # FIXME: Make only_value and tabular enums @@ -250,7 +292,17 @@ class Estimate: # Patsy formula manipulation def cols_for_formula(formula, df): - """Return the columns corresponding to the Patsy formula""" + """ + Return the columns corresponding to the Patsy formula + + :param formula: Patsy formula to parse + :type formula: str + :param df: Data to apply the formula on + :type df: DataFrame + + :return: Columns in (the right-hand side of) the formula + :rtype: List[str] + """ # Parse the formula model_desc = patsy.ModelDesc.from_formula(formula) @@ -286,7 +338,17 @@ def formula_get_factor_info(formula, df, factor): return factor_info def formula_factor_ref_category(formula, df, factor): - """Get the reference category for a term in a Patsy formula referring to a categorical factor""" + """ + Get the reference category for a term in a Patsy formula referring to a categorical factor + + :param formula: Patsy formula to parse + :type formula: str + :param df: Data to apply the formula on + :type df: DataFrame + :param factor: Factor to determine reference category for (e.g. ``Country``, ``C(Country)``, ``C(Country, Treatment)``, ``C(Country, Treatment("Australia"))``) + + :return: Reference category for the specified factor + """ if '(' in factor and not factor.startswith('C('): raise Exception('Attempted to get reference category for unknown expression type "{}"'.format(factor)) @@ -319,8 +381,13 @@ def parse_patsy_term(formula, df, term): """ Parse a Patsy term into its component parts - Returns: factor, column, contrast - e.g. "C(x, Treatment(y))[T.z]" -> "C(x, Treatment(y))", "x", "z" + **Example:** The term ``"C(x, Treatment(y))[T.z]"`` parses to ``("C(x, Treatment(y))", "x", "z")``. + + :return: (*factor*, *column*, *contrast*) + + * **factor** (*str*) – Name of the factor, as specified in the Patsy formula + * **column** (*str*) – Name of the DataFrame column corresponding to the factor + * **contrast** (*str*) – Name of the contrast for the factor, or *None* if not applicable """ if '(' not in term: