scipy-yli/yli/sig_tests.py

#   scipy-yli: Helpful SciPy utilities and recipes
#   Copyright © 2022–2023  Lee Yingtong Li (RunasSudo)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm

import functools
import warnings

from .config import config
from .utils import Estimate, Interval, PValueStyle, as_2groups, as_numeric, check_nan, convert_pandas_nullable, fmt_p

# ----------------
# Student's t test

class TTestResult:
	"""
	Result of a Student's *t* test
	
	See :func:`yli.ttest_ind`.
	"""
	
	def __init__(self, *, statistic, dof, pvalue, dep, ind, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction):
		#: *t* statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom of the *t* distribution (*int*)
		self.dof = dof
		#: *p* value for the *t* statistic (*float*)
		self.pvalue = pvalue
		
		#: Name of the dependent variable (*str*)
		self.dep = dep
		#: Name of the independent variable (*str*)
		self.ind = ind
		
		#: Name of the first group (*str*)
		self.group1 = group1
		#: Name of the second group (*str*)
		self.group2 = group2
		#: Mean of the first group (*float*)
		self.mu1 = mu1
		#: Mean of the second group (*float*)
		self.mu2 = mu2
		#: Standard deviation of the first group (*float*)
		self.sd1 = sd1
		#: Standard deviation of the second group (*float*)
		self.sd2 = sd2
		
		#: Absolute value of the mean difference (:class:`yli.utils.Estimate`)
		self.delta = delta
		#: Description of the direction of the effect (*str*)
		self.delta_direction = delta_direction
	
	def _comparison_table(self, html):
		"""Return a table showing the means/SDs for each group"""
		
		table_data = [[
			'{:.2f} ({:.2f})'.format(self.mu1, self.sd1),
			'{:.2f} ({:.2f})'.format(self.mu2, self.sd2),
		]]
		
		if html:
			table = pd.DataFrame(table_data, index=pd.Index(['\ue000 (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind)) # U+E000 is in Private Use Area, mark μ symbol
			table_str = table._repr_html_()
			return table_str.replace('\ue000', '<i>μ</i>')
		else:
			table = pd.DataFrame(table_data, index=pd.Index(['μ (SD)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind))
			return str(table)
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '{}<br><i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br>Δ<i>μ</i> ({:g}% CI) = {}, {}'.format(self._comparison_table(True), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
	
	def summary(self):
		"""
		Return a stringified summary of the *t* test
		
		:rtype: str
		"""
		
		return '{}\n\nt({:.0f}) = {:.2f}; p {}\nΔμ ({:g}% CI) = {}, {}'.format(self._comparison_table(False), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
	
	def summary_short(self, html):
		"""
		Return a stringified summary of the *t* test (*t* statistic only)
		
		:rtype: str
		"""
		
		if html:
			return '<i>t</i>({:.0f}) = {:.2f}'.format(self.dof, self.statistic)
		else:
			return 't({:.0f}) = {:.2f}'.format(self.dof, self.statistic)

def ttest_ind(df, dep, ind, *, nan_policy='warn'):
	"""
	Perform an independent 2-sample Student's *t* test
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (dichotomous)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.TTestResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Type': ['Fresh'] * 10 + ['Stored'] * 10,
			'Potency': [10.2, 10.5, 10.3, ...]
		})
		yli.ttest_ind(df, 'Potency', 'Type')
	
	.. code-block:: text
		
		Type            Fresh       Stored
		Potency                           
		μ (SD)   10.37 (0.32)  9.83 (0.24)
		
		t(18) = 4.24; p < 0.001*
		Δμ (95% CI) = 0.54 (0.27–0.81), Fresh > Stored
	
	The output states that the value of the *t* statistic is 4.24, the *t* distribution has 18 degrees of freedom, and the test is significant with *p* value < 0.001.
	The mean difference is 0.54 in favour of the *Fresh* group, with 95% confidence interval 0.27–0.81.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Ensure 2 groups for ind
	group1, data1, group2, data2 = as_2groups(df, dep, ind)
	
	# Do t test
	# Use statsmodels rather than SciPy because this provides the mean difference automatically
	d1 = sm.stats.DescrStatsW(data1, ddof=1)
	d2 = sm.stats.DescrStatsW(data2, ddof=1)
	
	cm = sm.stats.CompareMeans(d1, d2)
	statistic, pvalue, dof = cm.ttest_ind()
	
	delta = d1.mean - d2.mean
	ci0, ci1 = cm.tconfint_diff(config.alpha)
	
	# t test is symmetric so take absolute value
	if d2.mean > d1.mean:
		delta, ci0, ci1 = -delta, -ci1, -ci0
		d1, d2 = d2, d1
		group1, group2 = group2, group1
		# Now group1 > group2
	
	return TTestResult(
		statistic=abs(statistic), dof=dof, pvalue=pvalue,
		dep=dep, ind=ind,
		group1=group1, group2=group2, mu1=d1.mean, mu2=d2.mean, sd1=d1.std, sd2=d2.std,
		delta=Estimate(delta, ci0, ci1),
		delta_direction='{} > {}'.format(group1, group2))

class MultipleTTestResult:
	"""
	Result of multiple Student's *t* tests, adjusted for multiplicity
	
	See :func:`yli.ttest_ind_multiple`.
	"""
	
	def __init__(self, *, dep, results):
		#: Name of the dependent variable (*str*)
		self.dep = dep
		#: Results of the *t* tests (*List[*\ :class:`TTestResult`\ *]*)
		self.results = results
	
	def _comparison_table(self, html):
		"""Return a table showing the means/SDs for each group"""
		
		group1 = self.results[0].group1
		group2 = self.results[0].group2
		
		# TODO: Render HTML directly so can have proper HTML p values
		table_data = []
		for row in self.results:
			cell1 = '{:.2f} ({:.2f})'.format(row.mu1, row.sd1)
			cell2 = '{:.2f} ({:.2f})'.format(row.mu2, row.sd2)
			cell_pvalue = fmt_p(row.pvalue, PValueStyle.TABULAR)
			
			# Display the cells the right way around
			if row.group1 == group1 and row.group2 == group2:
				table_data.append([cell1, cell2, cell_pvalue])
			elif row.group1 == group2 and row.group2 == group1:
				table_data.append([cell2, cell1, cell_pvalue])
			else:
				raise Exception('t tests have different groups')
		
		if html:
			table = pd.DataFrame(table_data, index=pd.Index([row.ind for row in self.results], name='\ue000 (SD)'), columns=pd.Index([self.results[0].group1, self.results[0].group2, '\ue001'], name=self.dep)) # U+E000 is in Private Use Area, mark μ symbol
			table_str = table._repr_html_()
			return table_str.replace('\ue000', '<i>μ</i>').replace('\ue001', '<i>p</i>')
		else:
			table = pd.DataFrame(table_data, index=pd.Index([row.ind for row in self.results], name='μ (SD)'), columns=pd.Index([self.results[0].group1, self.results[0].group2, 'p'], name=self.dep))
			return str(table)
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return self._comparison_table(True)
	
	def summary(self):
		"""
		Return a stringified summary of the *t* tests
		
		:rtype: str
		"""
		return str(self._comparison_table(False))

def ttest_ind_multiple(df, dep, inds, *, nan_policy='warn', method='hs'):
	"""
	Perform independent 2-sample Student's *t* tests with multiple independent variables, adjusting for multiplicity
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Columns in *df* for the independent variables (dichotomous)
	:type ind: List[str]
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	:param method: Method to apply for multiplicity adjustment (see `statsmodels multipletests <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`_)
	:type method: str
	
	:rtype: :class:`yli.sig_tests.MultipleTTestResult`
	"""
	
	# TODO: Unit testing
	# FIXME: Assert groups of independent variables have same levels
	
	# Perform t tests
	results = []
	for ind in inds:
		results.append(ttest_ind(df, dep, ind, nan_policy=nan_policy))
	
	# Adjust for multiplicity
	_, pvalues_corrected, _, _ = sm.stats.multipletests([result.pvalue for result in results], alpha=config.alpha, method=method)
	
	for result, pvalue_corrected in zip(results, pvalues_corrected):
		result.pvalue = pvalue_corrected
	
	return MultipleTTestResult(dep=dep, results=results)

# -------------
# One-way ANOVA

class FTestResult:
	"""
	Result of an *F* test for ANOVA/regression
	
	See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionModel.ftest`.
	"""
	
	def __init__(self, statistic, dof1, dof2, pvalue):
		#: *F* statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom in the *F* distribution numerator (*int*)
		self.dof1 = dof1
		#: Degrees of freedom in the *F* distribution denominator (*int*)
		self.dof2 = dof2
		#: *p* value for the *F* statistic (*float*)
		self.pvalue = pvalue
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '<i>F</i>({:.0f}, {:.0f}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the *F* test
		
		:rtype: str
		"""
		
		return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))

def anova_oneway(df, dep, ind, *, nan_policy='warn'):
	"""
	Perform one-way ANOVA
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (categorical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.FTestResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Method': [1]*8 + [2]*7 + [3]*9,
			'Score': [96, 79, 91, ...]
		})
		yli.anova_oneway(df, 'Score', 'Method')
	
	.. code-block:: text
		
		F(2, 21) = 29.57; p < 0.001*
	
	The output states that the value of the *F* statistic is 29.57, the *F* distribution has 2 degrees of freedom in the numerator and 21 in the denominator, and the test is significant with *p* value < 0.001.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Group by independent variable
	groups = df.groupby(ind)[dep]
	
	# Perform one-way ANOVA
	result = stats.f_oneway(*[groups.get_group(k) for k in groups.groups])
	
	# See stats.f_oneway implementation
	dfbn = len(groups.groups) - 1
	dfwn = len(df) - len(groups.groups)
	
	return FTestResult(result.statistic, dfbn, dfwn, result.pvalue)

# -----------------
# Mann-Whitney test

class MannWhitneyResult:
	"""
	Result of a Mann–Whitney *U* test
	
	See :func:`yli.mannwhitney`.
	"""
	
	def __init__(self, *, statistic, pvalue, dep, ind, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None):
		#: Lesser of the two Mann–Whitney *U* statistics (*float*)
		self.statistic = statistic
		#: *p* value for the *U* statistic (*float*)
		self.pvalue = pvalue
		
		#: Name of the dependent variable (*str*)
		self.dep = dep
		#: Name of the independent variable (*str*)
		self.ind = ind
		
		#: Name of the first group (*str*)
		self.group1 = group1
		#: Name of the second group (*str*)
		self.group2 = group2
		#: Median of the first group (*float*)
		self.med1 = med1
		#: Median of the second group (*float*)
		self.med2 = med2
		#: Interquartile range of the first group (:class:`yli.utils.Interval`)
		self.iqr1 = iqr1
		#: Interquartile range of the second group (:class:`yli.utils.Interval`)
		self.iqr2 = iqr2
		#: Range of the first group (:class:`yli.utils.Interval`)
		self.range1 = range2
		#: Range of the second group (:class:`yli.utils.Interval`)
		self.range2 = range2
		
		#: Absolute value of the rank-biserial correlation (*float*)
		self.rank_biserial = rank_biserial
		#: Description of the direction of the effect (*str*)
		self.direction = direction
		
		#: :class:`BrunnerMunzelResult` on the same data, or *None* if N/A
		self.brunnermunzel = brunnermunzel
	
	def _comparison_table(self, html):
		"""Return a table showing the medians/IQRs/ranges for each group"""
		
		table_data = [
			['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med2, self.iqr2.summary())],
			['{:.2f} ({})'.format(self.med1, self.range1.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())],
		]
		table = pd.DataFrame(table_data, index=pd.Index(['Median (IQR)', 'Median (range)'], name=self.dep), columns=pd.Index([self.group1, self.group2], name=self.ind))
		
		if html:
			return table._repr_html_()
		else:
			return str(table)
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		line1 = '{}<br><i>U</i> = {:.1f}; <i>p</i> {}<br><i>r</i> = {:.2f}, {}'.format(self._comparison_table(True), self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.rank_biserial, self.direction)
		if self.brunnermunzel:
			return line1 + '<br>' + self.brunnermunzel._repr_html_()
		else:
			return line1
	
	def summary(self):
		"""
		Return a stringified summary of the Mann–Whitney test
		
		:rtype: str
		"""
		
		line1 = '{}\n\nU = {:.1f}; p {}\nr = {:.2f}, {}'.format(self._comparison_table(False), self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.rank_biserial, self.direction)
		if self.brunnermunzel:
			return line1 + '\n' + self.brunnermunzel.summary()
		else:
			return line1
	
	def summary_short(self, html):
		"""
		Return a stringified summary of the Mann–Whitney test (*U* statistic only)
		
		:rtype: str
		"""
		
		if html:
			return '<i>U</i> = {:.1f}'.format(self.statistic)
		else:
			return 'U = {:.1f}'.format(self.statistic)

class BrunnerMunzelResult:
	"""
	Result of a Brunner–Munzel test
	
	See :func:`yli.mannwhitney`. This library calls the Brunner–Munzel test statistic *W*.
	"""
	
	"""Result of a Brunner-Munzel test"""
	
	def __init__(self, statistic, pvalue):
		#: *W* statistic (*float*)
		self.statistic = statistic
		#: *p* value for the *W* statistic (*float*)
		self.pvalue = pvalue
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '<i>W</i> = {:.1f}; <i>p</i> {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the Brunner–Munzel test
		
		:rtype: str
		"""
		
		return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))

def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'):
	"""
	Perform a Mann–Whitney *U* test
	
	By default, this function performs a Brunner–Munzel test if the Mann–Whitney test is significant.
	If the Mann–Whitney test is significant but the Brunner–Munzel test is not, a warning is raised.
	The Brunner–Munzel test is returned only if non-significant.
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (dichotomous)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	:param brunnermunzel: Whether to compute the Brunner–Munzel test if the Mann–Whitney test is significant
	:type brunnermunzel: bool
	:param use_continuity: See *scipy.stats.mannwhitneyu*
	:param alternative: See *scipy.stats.mannwhitneyu*
	:param method: See *scipy.stats.mannwhitneyu*
	
	:return: The result of the Mann–Whitney test. The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*, *and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.
	
	:rtype: :class:`yli.sig_tests.MannWhitneyResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Sample': ['Before'] * 12 + ['After'] * 12,
			'Oxygen': [11.0, 11.2, 11.2, ...]
		})
		yli.mannwhitney(df, 'Oxygen', 'Sample', method='asymptotic', alternative='less')
	
	.. code-block:: text
		
		Sample                        After               Before
		Oxygen                                                  
		Median (IQR)    10.75 (10.55–10.95)  11.55 (11.20–11.83)
		Median (range)  10.75 (11.00–12.10)  11.55 (11.00–12.10)
		
		U = 6.0; p < 0.001*
		r = 0.92, Before > After
	
	The output states that the value of the Mann–Whitney *U* statistic is 6.0, and the one-sided test is significant with asymptotic *p* value < 0.001.
	The rank-biserial correlation is 0.92 in favour of the *Before* group.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Convert pandas nullable types for independent variables as this breaks mannwhitneyu
	df = convert_pandas_nullable(df)
	
	# Ensure 2 groups for ind
	group1, data1, group2, data2 = as_2groups(df, dep, ind)
	
	# Ensure numeric, factorising if required
	data1, _ = as_numeric(data1)
	data2, _ = as_numeric(data2)
	
	# Do Mann-Whitney test
	# Stata does not perform continuity correction
	result = stats.mannwhitneyu(data1, data2, use_continuity=use_continuity, alternative=alternative, method=method)
	
	u1 = result.statistic
	u2 = len(data1) * len(data2) - u1
	r = abs(2*u1 / (len(data1) * len(data2)) - 1)  # rank-biserial
	
	# If significant, perform a Brunner-Munzel test for our interest
	if result.pvalue < 0.05 and brunnermunzel:
		result_bm = stats.brunnermunzel(data1, data2)
		
		if result_bm.pvalue >= 0.05:
			warnings.warn('Mann-Whitney test is significant but Brunner-Munzel test is not. This could be due to a difference in shape, rather than location.')
			
			return MannWhitneyResult(
				statistic=min(u1, u2), pvalue=result.pvalue,
				#med1=data1.median(), med2=data2.median(),
				rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2),
				brunnermunzel=BrunnerMunzelResult(statistic=result_bm.statistic, pvalue=result_bm.pvalue))
	
	return MannWhitneyResult(
		statistic=min(u1, u2), pvalue=result.pvalue,
		dep=dep, ind=ind,
		group1=group1, group2=group2,
		med1=data1.median(), med2=data2.median(), iqr1=Interval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=Interval(data2.quantile(0.25), data2.quantile(0.75)), range1=Interval(data1.min(), data1.max()), range2=Interval(data2.min(), data2.max()),
		rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))

# ------------------------
# Pearson chi-squared test

class ChiSquaredResult:
	"""
	Result of a generic test with *χ*:sup:`2`-distributed test statistic
	
	See :meth:`yli.logrank`.
	
	See also :class:`yli.regress.BrantResult`, :class:`yli.regress.LikelihoodRatioTestResult`, :class:`PearsonChiSquaredResult`.
	"""
	
	def __init__(self, statistic, dof, pvalue):
		#: *χ*:sup:`2` statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom for the *χ*:sup:`2` distribution (*int*)
		self.dof = dof
		#: *p* value for the *χ*:sup:`2` test (*float*)
		self.pvalue = pvalue
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '<i>χ</i><sup>2</sup>({}) = {:.2f}; <i>p</i> {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the *χ*:sup:`2` test
		
		:rtype: str
		"""
		
		return 'χ²({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))

class PearsonChiSquaredResult(ChiSquaredResult):
	"""
	Result of a Pearson *χ*:sup:`2` test
	
	See :func:`yli.chi2`.
	"""
	
	def __init__(self, ct, statistic, dof, pvalue, oddsratio=None, riskratio=None):
		super().__init__(statistic, dof, pvalue)
		
		#: Contingency table for the observations (*DataFrame*)
		self.ct = ct
		#: Odds ratio (*float*; *None* if not a 2×2 table)
		self.oddsratio = oddsratio
		#: Risk ratio (*float*; *None* if not a 2×2 table)
		self.riskratio = riskratio
	
	def _repr_html_(self):
		if self.oddsratio is not None:
			return '{0}<br><i>χ</i><sup>2</sup>({1}) = {2:.2f}; <i>p</i> {3}<br>OR ({4:g}% CI) = {5}<br>RR ({4:g}% CI) = {6}'.format(
				self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
		else:
			return '{}<br><i>χ</i><sup>2</sup>({}) = {:.2f}; <i>p</i> {}'.format(
				self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the *χ*:sup:`2` test
		
		:rtype: str
		"""
		
		if self.oddsratio is not None:
			return '{0}\n\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format(
				self.ct, self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
		else:
			return '{}\n\nχ²({}) = {:.2f}; p {}'.format(
				self.ct, self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))
	
	def summary_short(self, html):
		"""
		Return a stringified summary of the *χ*:sup:`2` test (*χ*:sup:`2` statistic only)
		
		:rtype: str
		"""
		
		if html:
			return '<i>χ</i><sup>2</sup>({}) = {:.2f}'.format(self.dof, self.statistic)
		else:
			return 'χ²({}) = {:.2f}'.format(self.dof, self.statistic)

def chi2(df, dep, ind, *, nan_policy='warn'):
	"""
	Perform a Pearson *χ*:sup:`2` test
	
	If a 2×2 contingency table is obtained (i.e. if both variables are dichotomous), an odds ratio and risk ratio are calculated.
	The ratios are calculated for the higher-valued value in each variable (i.e. *True* compared with *False* for a boolean).
	The risk ratio is calculated relative to the independent variable (rows of the contingency table).
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (categorical)
	:type dep: str
	:param ind: Column in *df* for the independent variable (categorical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.PearsonChiSquaredResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Response': np.repeat([False, True, False, True], [250, 750, 400, 1600]),
			'Stress': np.repeat([False, False, True, True], [250, 750, 400, 1600])
		})
		yli.chi2(df, 'Stress', 'Response')
	
	.. code-block:: text
		
		Stress    False  True
		Response             
		False       250   400
		True        750  1600
		
		χ²(1) = 9.82; p = 0.002*
		OR (95% CI) = 1.33 (1.11–1.60)
		RR (95% CI) = 1.11 (1.03–1.18)
	
	The output shows the contingency table, and states that the value of the Pearson *χ*:sup:`2` statistic is 9.82, the *χ*:sup:`2` distribution has 1 degree of freedom, and the test is significant with *p* value 0.002.
	
	The odds of *Stress* in the *Response* = *True* group are 1.33 times that in the *Response* = *False* group, with 95% confidence interval 1.11–1.60.
	
	The risk of *Stress* in the *Response* = *True* group is 1.11 times that in the *Response* = *False* group, with 95% confidence interval 1.03–1.18.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Compute contingency table
	ct = pd.crosstab(df[ind], df[dep])
	
	# Get expected counts
	expected = stats.contingency.expected_freq(ct)
	
	# Warn on low expected counts
	if (expected < 5).sum() / expected.size > 0.2:
		warnings.warn('{} of {} cells ({:.0f}%) have expected count < 5'.format((expected < 5).sum(), expected.size, (expected < 5).sum() / expected.size * 100))
	if (expected < 1).any():
		warnings.warn('{} cells have expected count < 1'.format((expected < 1).sum()))
	
	if ct.shape == (2,2):
		# 2x2 table
		# Use statsmodels to get OR and RR
		
		smct = sm.stats.Table2x2(np.flip(ct.to_numpy()), shift_zeros=False)
		result = smct.test_nominal_association()
		ORci = smct.oddsratio_confint(config.alpha)
		RRci = smct.riskratio_confint(config.alpha)
		
		return PearsonChiSquaredResult(
			ct=ct, statistic=result.statistic, dof=result.df, pvalue=result.pvalue,
			oddsratio=Estimate(smct.oddsratio, ORci[0], ORci[1]), riskratio=Estimate(smct.riskratio, RRci[0], RRci[1]))
	else:
		# rxc table
		# Just use SciPy
		
		result = stats.chi2_contingency(ct, correction=False)
		
		return PearsonChiSquaredResult(ct=ct, statistic=result[0], dof=result[2], pvalue=result[1])

# -------------------
# Pearson correlation

class PearsonRResult:
	"""
	Result of Pearson product-moment correlation
	
	See :func:`yli.pearsonr`.
	"""
	
	def __init__(self, statistic, pvalue):
		#: Pearson *r* correlation statistic (:class:`yli.utils.Estimate`)
		self.statistic = statistic
		#: *p* value for the *r* statistic (*float*)
		self.pvalue = pvalue
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '<i>r</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the Pearson correlation
		
		:rtype: str
		"""
		
		return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION))

def pearsonr(df, dep, ind, *, nan_policy='warn'):
	"""
	Compute the Pearson product-moment correlation coefficient (Pearson's *r*)
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numerical)
	:type dep: str
	:param ind: Column in *df* for the independent variable (numerical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.PearsonRResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'y': [41, 39, 47, 51, 43, 40, 57, 46, 50, 59, 61, 52],
			'x': [24, 30, 33, 35, 36, 36, 37, 37, 38, 40, 43, 49]
		})
		yli.pearsonr(df, 'y', 'x')
	
	.. code-block:: text
		
		r (95% CI) = 0.65 (0.11–0.89); p = 0.02*
	
	The output states that the value of the Pearson correlation coefficient is 0.65, with 95% confidence interval 0.11–0.89, and the test is significant with *p* value 0.02.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Compute Pearson's r
	result = stats.pearsonr(df[ind], df[dep])
	ci = result.confidence_interval()
	
	return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)

# --------------------
# Spearman correlation

class SpearmanResult:
	"""
	Result of Spearman rank correlation
	
	See :func:`yli.spearman`.
	"""
	
	def __init__(self, statistic, pvalue):
		#: Spearman *ρ* correlation statistic (:class:`yli.utils.Estimate`)
		self.statistic = statistic
		#: *p* value for the *ρ* statistic (*float*)
		self.pvalue = pvalue
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '<i>ρ</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the Spearman correlation
		
		:rtype: str
		"""
		
		return 'ρ ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION))

def spearman(df, dep, ind, *, nan_policy='warn'):
	"""
	Compute the Spearman rank correlation coefficient (Spearman's *ρ*)
	
	The confidence interval for *ρ* is computed analogously to SciPy's *pearsonr*, using the Fisher transformation and normal approximation, without adjustment to variance.
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numerical)
	:type dep: str
	:param ind: Column in *df* for the independent variable (numerical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.SpearmanResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Profit': [2.5, 6.2, 3.1, ...],
			'Quality': [50, 57, 61, ...]
		})
		yli.spearman(df, 'Profit', 'Quality')
	
	.. code-block:: text
		
		ρ (95% CI) = 0.87 (0.60–0.96); p < 0.001*
	
	The output states that the value of the Spearman correlation coefficient is 0.87, with 95% confidence interval 0.60–0.96, and the test is significant with *p* value < 0.001.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Ensure numeric, factorising categorical variables as required
	ind, _ = as_numeric(df[ind])
	dep, _ = as_numeric(df[dep])
	
	# Compute Spearman's rho
	result = stats.spearmanr(ind, dep)
	
	# Compute confidence interval
	ci = stats._stats_py._pearsonr_fisher_ci(result.correlation, len(dep), 1 - config.alpha, 'two-sided')
	return SpearmanResult(statistic=Estimate(result.correlation, ci.low, ci.high), pvalue=result.pvalue)

# ----------------------------
# Automatic selection of tests

class AutoBinaryResult:
	"""
	Result of automatically computed univariable tests of association for a dichotomous dependent variable
	
	See :func:`yli.auto_univariable`.
	
	Results data stored within instances of this class is not intended to be directly accessed.
	"""
	
	def __init__(self, *, dep, group1, group2, result_data, result_labels):
		#: Name of the dependent variable (*str*)
		self.dep = dep
		
		#: Name of the first group (*str*)
		self.group1 = group1
		#: Name of the second group (*str*)
		self.group2 = group2
		
		# List of tuples (first group summary, second group summary, test result)
		self._result_data = result_data
		# List of tuples (plaintext label, HTML label)
		self._result_labels = result_labels
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		result = '<table><thead><tr><th>{}</th><th>{}</th><th>{}</th><th></th><th style="text-align:center"><i>p</i></th></tr></thead><tbody>'.format(self.dep, self.group1, self.group2)
		
		for data, label in zip(self._result_data, self._result_labels):
			result += '<tr><th>{}</th><td>{}</td><td>{}</td><td style="text-align:left">{}</td><td style="text-align:left">{}</td></tr>'.format(label[1], data[0], data[1], data[2].summary_short(True), fmt_p(data[2].pvalue, PValueStyle.TABULAR | PValueStyle.HTML))
		
		result += '</tbody></table>'
		return result
	
	def summary(self):
		"""
		Return a stringified summary of the tests of association
		
		:rtype: str
		"""
		
		# Format data for output
		result_data_fmt = [
			[r[0], r[1], r[2].summary_short(False), fmt_p(r[2].pvalue, PValueStyle.TABULAR)]
			for r in self._result_data
		]
		result_labels_fmt = [r[0] for r in self._result_labels]
		
		table = pd.DataFrame(result_data_fmt, index=result_labels_fmt, columns=pd.Index([self.group1, self.group2, '', 'p'], name=self.dep))
		return str(table)

def auto_univariable(df, dep, inds, *, nan_policy='warn'):
	"""
	Automatically compute univariable tests of association for a dichotomous dependent variable
	
	The tests performed are:
	
	* For a dichotomous independent variable – :func:`yli.chi2`
	* For a continuous independent variable – :func:`yli.ttest_ind`
	* For an ordinal independent variable – :func:`yli.mannwhitney`
	
	If *nan_policy* is *warn* or *omit*, rows with *nan* values are omitted only from the individual tests of association for the missing variables.
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (dichotomous)
	:type dep: str
	:param inds: Columns in *df* for the independent variables
	:type inds: List[str]
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.AutoBinaryResult`
	"""
	
	# Check for/clean NaNs in dependent variable
	df = check_nan(df[inds + [dep]], nan_policy, cols=[dep])
	
	# Ensure 2 groups for dep
	# TODO: Work for non-binary dependent variables?
	group1, data1, group2, data2 = as_2groups(df, inds, dep)
	
	result_data = []
	result_labels = []
	
	for ind in inds:
		# Check for/clean NaNs in independent variable
		# Following this, we pass nan_policy='raise' to assert no NaNs remaining
		df_cleaned = check_nan(df, nan_policy, cols=[ind])
		
		if df_cleaned[ind].dtype == 'category' and df_cleaned[ind].cat.ordered and df_cleaned[ind].cat.categories.dtype in ('float64', 'int64', 'Float64', 'Int64'):
			# Ordinal numeric data
			# Mann-Whitney test
			result = mannwhitney(df_cleaned, ind, dep, nan_policy='raise')
			
			result_labels.append((
				'{}, median (IQR)'.format(ind),
				'{}, median (IQR)'.format(ind),
			))
			result_data.append((
				'{:.2f} ({})'.format(result.med1, result.iqr1.summary()),
				'{:.2f} ({})'.format(result.med2, result.iqr2.summary()),
				result
			))
		elif df_cleaned[ind].dtype in ('bool', 'boolean', 'category', 'object'):
			# Categorical data
			# Pearson chi-squared test
			result = chi2(df_cleaned, dep, ind, nan_policy='raise')
			
			values = sorted(df_cleaned[ind].unique())
			
			# Value counts
			result_labels.append((
				'{}, {}'.format(ind, ':'.join(str(v) for v in values)),
				'{}, {}'.format(ind, ':'.join(str(v) for v in values)),
			))
			result_data.append((
				':'.join(str((data1[ind] == v).sum()) for v in values),
				':'.join(str((data2[ind] == v).sum()) for v in values),
				result
			))
		elif df_cleaned[ind].dtype in ('float64', 'int64', 'Float64', 'Int64'):
			# Continuous data
			# t test
			result = ttest_ind(df_cleaned, ind, dep, nan_policy='raise')
			
			result_labels.append((
				'{}, μ (SD)'.format(ind),
				'{}, <i>μ</i> (SD)'.format(ind),
			))
			result_data.append((
				'{:.2f} ({:.2f})'.format(result.mu1, result.sd1),
				'{:.2f} ({:.2f})'.format(result.mu2, result.sd2),
				result
			))
		else:
			raise Exception('Unsupported independent dtype for auto_univariable, {}'.format(df[ind].dtype))
	
	return AutoBinaryResult(dep=dep, group1=group1, group2=group2, result_data=result_data, result_labels=result_labels)