Update documentation

2022-11-09 17:05:04 +11:00 · 2022-11-09 17:05:04 +11:00 · c6cef4aee7
commit c6cef4aee7
parent 50597ddc74
5 changed files with 103 additions and 12 deletions
--- a/docs/general.rst
+++ b/docs/general.rst
@ -14,6 +14,8 @@ Most functions take a parameter **nan_policy** to specify how to handle *nan* va

 In determining whether there is *nan* in the data, only the columns specified in the function (if applicable) are considered.

+.. autofunction:: yli.utils.check_nan
+
 General result classes
 ----------------------

--- a/docs/index.rst
+++ b/docs/index.rst
@ -12,3 +12,4 @@ scipy-yli API reference
 	distributions.rst
 	bayes_factors.rst
 	global.rst
+	internal.rst
--- a/docs/internal.rst
+++ b/docs/internal.rst
@ -0,0 +1,23 @@
+Internal utilities
+==================
+
+Data wrangling
+--------------
+
+.. autofunction:: yli.utils.as_2groups
+
+.. autofunction:: yli.utils.convert_pandas_nullable
+
+*p* values
+----------
+
+.. autofunction:: yli.utils.fmt_p
+
+Formula manipulation
+--------------------
+
+.. autofunction:: yli.utils.cols_for_formula
+
+.. autofunction:: yli.utils.formula_factor_ref_category
+
+.. autofunction:: yli.utils.parse_patsy_term
--- a/yli/sig_tests.py
+++ b/yli/sig_tests.py
@ -325,9 +325,7 @@ def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_cont
 	:param alternative: See *scipy.stats.mannwhitneyu*
 	:param method: See *scipy.stats.mannwhitneyu*
 	
-	:return: The result of the Mann–Whitney test.
-	The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*,
-	*and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.
+	:return: The result of the Mann–Whitney test. The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*, *and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.
 	
 	:rtype: :class:`yli.sig_tests.MannWhitneyResult`
 	
--- a/yli/utils.py
+++ b/yli/utils.py
@ -26,7 +26,17 @@ from .config import config
 # Data cleaning and validation

 def check_nan(df, nan_policy):
-	"""Check df against nan_policy and return cleaned input"""
+	"""
+	Check df against *nan_policy* and return cleaned input
+	
+	:param df: Data to check for NaNs
+	:type df: DataFrame
+	:param nan_policy: Policy to apply when encountering NaN values (*warn*, *raise*, *omit*)
+	:type nan_policy: str
+	
+	:return: Data with NaNs removed, which may or may not be copied
+	:rtype: DataFrame
+	"""
 	
 	if nan_policy == 'raise':
 		if pd.isna(df).any(axis=None):
@ -43,7 +53,17 @@ def check_nan(df, nan_policy):
 		raise Exception('Invalid nan_policy, expected "raise", "warn" or "omit"')

 def convert_pandas_nullable(df):
-	"""Convert pandas nullable dtypes (e.g. Int64) to non-nullable numpy dtypes"""
+	"""
+	Convert pandas nullable dtypes (e.g. *Int64*) to non-nullable numpy dtypes
+	
+	Behaviour on encountering *NA* values is undefined, so the data should be passed through :func:`check_nan` first.
+	
+	:param df: Data to check for pandas nullable dtypes
+	:type df: DataFrame
+	
+	:return: Data with pandas nullable dtypes converted, which may or may not be copied
+	:rtype: DataFrame
+	"""
 	
 	# Avoid copy if possible
 	df_cleaned = None
@ -59,7 +79,19 @@ def convert_pandas_nullable(df):
 	return df_cleaned

 def as_2groups(df, data, group):
-	"""Group the data by the given variable, ensuring only 2 groups"""
+	"""
+	Group the data by the given variable, asserting only 2 groups
+	
+	:param df: Data to group
+	:type df: DataFrame
+	:param group: Column to group by
+	:type group: str
+	
+	:return: (*group1*, *data1*, *group2*, *data2*)
+		
+		* **group1**, **group2** (*str*) – The 2 values of the grouping variable
+		* **data1**, **data2** (*DataFrame*) – The 2 corresponding subsets of *df*
+	"""
 	
 	# Get groupings
 	groups = list(df.groupby(group).groups.items())
@ -115,9 +147,19 @@ def do_fmt_p(p):

 def fmt_p(p, *, html, only_value=False, tabular=False):
 	"""
-	Format p value
+	Format *p* value for display
 	
-	tabular: If true, output in ‘tabular’ format of p values where decimal points align
+	:param p: *p* value to display
+	:type p: float
+	:param html: Whether to output as HTML (*True*) or plaintext (*False*)
+	:type html: bool
+	:param only_value: Whether to display only the value (*True*, e.g. ``0.04``, ``<0.001``) or equality symbol and value (*False*, e.g. ``= 0.04``, ``< 0.001``)
+	:type only_value: bool
+	:param tabular: Whether to pad with spaces so that decimal points align
+	:type tabular: bool
+	
+	:return: Formatted *p* value
+	:rtype: str
 	"""
 	
 	# FIXME: Make only_value and tabular enums
@ -250,7 +292,17 @@ class Estimate:
 # Patsy formula manipulation

 def cols_for_formula(formula, df):
-	"""Return the columns corresponding to the Patsy formula"""
+	"""
+	Return the columns corresponding to the Patsy formula
+	
+	:param formula: Patsy formula to parse
+	:type formula: str
+	:param df: Data to apply the formula on
+	:type df: DataFrame
+	
+	:return: Columns in (the right-hand side of) the formula
+	:rtype: List[str]
+	"""
 	
 	# Parse the formula
 	model_desc = patsy.ModelDesc.from_formula(formula)
@ -286,7 +338,17 @@ def formula_get_factor_info(formula, df, factor):
 	return factor_info

 def formula_factor_ref_category(formula, df, factor):
-	"""Get the reference category for a term in a Patsy formula referring to a categorical factor"""
+	"""
+	Get the reference category for a term in a Patsy formula referring to a categorical factor
+	
+	:param formula: Patsy formula to parse
+	:type formula: str
+	:param df: Data to apply the formula on
+	:type df: DataFrame
+	:param factor: Factor to determine reference category for (e.g. ``Country``, ``C(Country)``, ``C(Country, Treatment)``, ``C(Country, Treatment("Australia"))``)
+	
+	:return: Reference category for the specified factor
+	"""
 	
 	if '(' in factor and not factor.startswith('C('):
 		raise Exception('Attempted to get reference category for unknown expression type "{}"'.format(factor))
@ -319,8 +381,13 @@ def parse_patsy_term(formula, df, term):
 	"""
 	Parse a Patsy term into its component parts
 	
-	Returns: factor, column, contrast
-	e.g. "C(x, Treatment(y))[T.z]" -> "C(x, Treatment(y))", "x", "z"
+	**Example:** The term ``"C(x, Treatment(y))[T.z]"`` parses to ``("C(x, Treatment(y))", "x", "z")``.
+	
+	:return: (*factor*, *column*, *contrast*)
+		
+		* **factor** (*str*) – Name of the factor, as specified in the Patsy formula
+		* **column** (*str*) – Name of the DataFrame column corresponding to the factor
+		* **contrast** (*str*) – Name of the contrast for the factor, or *None* if not applicable
 	"""
 	
 	if '(' not in term: