scipy-yli/yli/sig_tests.py

#   scipy-yli: Helpful SciPy utilities and recipes
#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm

import functools
import warnings

from .config import config
from .utils import Estimate, as_2groups, check_nan, convert_pandas_nullable, fmt_p

# ----------------
# Student's t test

class TTestResult:
	"""
	Result of a Student's *t* test

	See :func:`yli.ttest_ind`.
	"""

	def __init__(self, statistic, dof, pvalue, delta, delta_direction):
		#: *t* statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom of the *t* distribution (*int*)
		self.dof = dof
		#: *p* value for the *t* statistic (*float*)
		self.pvalue = pvalue
		#: Absolute value of the mean difference (:class:`yli.utils.Estimate`)
		self.delta = delta
		#: Description of the direction of the effect (*str*)
		self.delta_direction = delta_direction

	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()

	def _repr_html_(self):
		return '<i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br>Δ<i>μ</i> ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)

	def summary(self):
		"""
		Return a stringified summary of the *t* test

		:rtype: str
		"""

		return 't({:.0f}) = {:.2f}; p {}\nΔμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)

def ttest_ind(df, dep, ind, *, nan_policy='warn'):
	"""
	Perform an independent 2-sample Student's *t* test

	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (dichotomous)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str

	:rtype: :class:`yli.sig_tests.TTestResult`

	**Example:**

	.. code-block::

		df = pd.DataFrame({
			'Type': ['Fresh'] * 10 + ['Stored'] * 10,
			'Potency': [10.2, 10.5, 10.3, ...]
		})
		yli.ttest_ind(df, 'Potency', 'Type')

	.. code-block:: text

		t(18) = 4.24; p < 0.001*
		Δμ (95% CI) = 0.54 (0.27–0.81), Fresh > Stored

	The output states that the value of the *t* statistic is 4.24, the *t* distribution has 18 degrees of freedom, and the test is significant with *p* value < 0.001.
	The mean difference is 0.54 in favour of the *Fresh* group, with 95% confidence interval 0.27–0.81.
	"""

	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)

	# Ensure 2 groups for ind
	group1, data1, group2, data2 = as_2groups(df, dep, ind)

	# Do t test
	# Use statsmodels rather than SciPy because this provides the mean difference automatically
	d1 = sm.stats.DescrStatsW(data1)
	d2 = sm.stats.DescrStatsW(data2)

	cm = sm.stats.CompareMeans(d1, d2)
	statistic, pvalue, dof = cm.ttest_ind()

	delta = d1.mean - d2.mean
	ci0, ci1 = cm.tconfint_diff(config.alpha)

	# t test is symmetric so take absolute values
	return TTestResult(
		statistic=abs(statistic), dof=dof, pvalue=pvalue,
		delta=abs(Estimate(delta, ci0, ci1)),
		delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))

# -------------
# One-way ANOVA

class FTestResult:
	"""
	Result of an *F* test for ANOVA/regression

	See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionResult.ftest`.
	"""

	def __init__(self, statistic, dof1, dof2, pvalue):
		#: *F* statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom in the *F* distribution numerator (*int*)
		self.dof1 = dof1
		#: Degrees of freedom in the *F* distribution denominator (*int*)
		self.dof2 = dof2
		#: *p* value for the *F* statistic (*float*)
		self.pvalue = pvalue

	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()

	def _repr_html_(self):
		return '<i>F</i>({:.0f}, {:.0f}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True))

	def summary(self):
		"""
		Return a stringified summary of the *F* test

		:rtype: str
		"""

		return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False))

def anova_oneway(df, dep, ind, *, nan_policy='omit'):
	"""
	Perform one-way ANOVA

	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (categorical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str

	:rtype: :class:`yli.sig_tests.FTestResult`

	**Example:**

	.. code-block::

		df = pd.DataFrame({
			'Method': [1]*8 + [2]*7 + [3]*9,
			'Score': [96, 79, 91, ...]
		})
		yli.anova_oneway(df, 'Score', 'Method')

	.. code-block:: text

		F(2, 21) = 29.57; p < 0.001*

	The output states that the value of the *F* statistic is 29.57, the *F* distribution has 2 degrees of freedom in the numerator and 21 in the denominator, and the test is significant with *p* value < 0.001.
	"""

	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)

	# Group by independent variable
	groups = df.groupby(ind)[dep]

	# Perform one-way ANOVA
	result = stats.f_oneway(*[groups.get_group(k) for k in groups.groups])

	# See stats.f_oneway implementation
	dfbn = len(groups.groups) - 1
	dfwn = len(df) - len(groups.groups)

	return FTestResult(result.statistic, dfbn, dfwn, result.pvalue)

# -----------------
# Mann-Whitney test

class MannWhitneyResult:
	"""
	Result of a Mann-Whitney *U* test

	See :func:`yli.mannwhitney`.
	"""

	def __init__(self, statistic, pvalue, rank_biserial, direction, brunnermunzel=None):
		#: Lesser of the two Mann–Whitney *U* statistics (*float*)
		self.statistic = statistic
		#: *p* value for the *U* statistic (*float*)
		self.pvalue = pvalue
		#: Absolute value of the rank-biserial correlation (*float*)
		self.rank_biserial = rank_biserial
		#: Description of the direction of the effect (*str*)
		self.direction = direction
		#: :class:`BrunnerMunzelResult` on the same data, or *None* if N/A
		self.brunnermunzel = brunnermunzel

	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()

	def _repr_html_(self):
		line1 = '<i>U</i> = {:.1f}; <i>p</i> {}<br><i>r</i> = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, html=True), self.rank_biserial, self.direction)
		if self.brunnermunzel:
			return line1 + '<br>' + self.brunnermunzel._repr_html_()
		else:
			return line1

	def summary(self):
		"""
		Return a stringified summary of the Mann–Whitney test

		:rtype: str
		"""

		line1 = 'U = {:.1f}; p {}\nr = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, html=False), self.rank_biserial, self.direction)
		if self.brunnermunzel:
			return line1 + '\n' + self.brunnermunzel.summary()
		else:
			return line1

class BrunnerMunzelResult:
	"""
	Result of a Brunner–Munzel test

	See :func:`yli.mannwhitney`. This library calls the Brunner–Munzel test statistic *W*.
	"""

	"""Result of a Brunner-Munzel test"""

	def __init__(self, statistic, pvalue):
		#: *W* statistic (*float*)
		self.statistic = statistic
		#: *p* value for the *W* statistic (*float*)
		self.pvalue = pvalue

	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()

	def _repr_html_(self):
		return '<i>W</i> = {:.1f}; <i>p</i> {}'.format(self.statistic, fmt_p(self.pvalue, html=True))

	def summary(self):
		"""
		Return a stringified summary of the Brunner–Munzel test

		:rtype: str
		"""

		return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, html=False))

def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'):
	"""
	Perform a Mann-Whitney *U* test

	By default, this function performs a Brunner–Munzel test if the Mann–Whitney test is significant.
	If the Mann–Whitney test is significant but the Brunner–Munzel test is not, a warning is raised.
	The Brunner–Munzel test is returned only if non-significant.

	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (dichotomous)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	:param brunnermunzel: Whether to compute the Brunner–Munzel test if the Mann–Whitney test is significant
	:type brunnermunzel: bool
	:param use_continuity: See *scipy.stats.mannwhitneyu*
	:param alternative: See *scipy.stats.mannwhitneyu*
	:param method: See *scipy.stats.mannwhitneyu*

	:return: The result of the Mann–Whitney test.
	The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*,
	*and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.

	:rtype: :class:`yli.sig_tests.MannWhitneyResult`

	**Example:**

	.. code-block::

		df = pd.DataFrame({
			'Sample': ['Before'] * 12 + ['After'] * 12,
			'Oxygen': [11.0, 11.2, 11.2, ...]
		})
		yli.mannwhitney(df, 'Oxygen', 'Sample', method='asymptotic', alternative='less')

	.. code-block:: text

		U = 6.0; p < 0.001*
		r = 0.92, Before > After

	The output states that the value of the Mann–Whitney *U* statistic is 6.0, and the one-sided test is significant with asymptotic *p* value < 0.001.
	The rank-biserial correlation is 0.92 in favour of the *Before* group.
	"""

	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)

	# Convert pandas nullable types for independent variables as this breaks statsmodels
	df = convert_pandas_nullable(df)

	# Ensure 2 groups for ind
	group1, data1, group2, data2 = as_2groups(df, dep, ind)

	# Do Mann-Whitney test
	# Stata does not perform continuity correction
	result = stats.mannwhitneyu(data1, data2, use_continuity=use_continuity, alternative=alternative, method=method)

	u1 = result.statistic
	u2 = len(data1) * len(data2) - u1
	r = abs(2*u1 / (len(data1) * len(data2)) - 1)  # rank-biserial

	# If significant, perform a Brunner-Munzel test for our interest
	if result.pvalue < 0.05 and brunnermunzel:
		result_bm = stats.brunnermunzel(data1, data2)

		if result_bm.pvalue >= 0.05:
			warnings.warn('Mann-Whitney test is significant but Brunner-Munzel test is not. This could be due to a difference in shape, rather than location.')

			return MannWhitneyResult(
				statistic=min(u1, u2), pvalue=result.pvalue,
				#med1=data1.median(), med2=data2.median(),
				rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2),
				brunnermunzel=BrunnerMunzelResult(statistic=result_bm.statistic, pvalue=result_bm.pvalue))

	return MannWhitneyResult(
		statistic=min(u1, u2), pvalue=result.pvalue,
		#med1=data1.median(), med2=data2.median(),
		rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))

# ------------------------
# Pearson chi-squared test

class PearsonChiSquaredResult:
	"""
	Result of a Pearson *χ*:sup:`2` test

	See :func:`yli.chi2`.
	"""

	def __init__(self, ct, statistic, dof, pvalue, oddsratio=None, riskratio=None):
		#: Contingency table for the observations (*DataFrame*)
		self.ct = ct
		#: *χ*:sup:`2` statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom for the *χ*:sup:`2` distribution (*int*)
		self.dof = dof
		#: *p* value for the *χ*:sup:`2` test (*float*)
		self.pvalue = pvalue
		#: Odds ratio (*float*; *None* if not a 2×2 table)
		self.oddsratio = oddsratio
		#: Risk ratio (*float*; *None* if not a 2×2 table)
		self.riskratio = riskratio

	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()

	def _repr_html_(self):
		if self.oddsratio is not None:
			return '{0}<br><i>χ</i><sup>2</sup>({1}) = {2:.2f}; <i>p</i> {3}<br>OR ({4:g}% CI) = {5}<br>RR ({4:g}% CI) = {6}'.format(
				self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
		else:
			return '{}<br><i>χ</i><sup>2</sup>({}) = {:.2f}; <i>p</i> {}'.format(
				self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True))

	def summary(self):
		"""
		Return a stringified summary of the *χ*:sup:`2` test

		:rtype: str
		"""

		if self.oddsratio is not None:
			return '{0}\n\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format(
				self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
		else:
			return '{}\n\nχ²({}) = {:.2f}; p {}'.format(
				self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False))

def chi2(df, dep, ind, *, nan_policy='warn'):
	"""
	Perform a Pearson *χ*:sup:`2` test

	If a 2×2 contingency table is obtained (i.e. if both variables are dichotomous), an odds ratio and risk ratio are calculated.
	The ratios are calculated for the higher-valued value in each variable (i.e. *True* compared with *False* for a boolean).
	The risk ratio is calculated relative to the independent variable (rows of the contingency table).

	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (categorical)
	:type dep: str
	:param ind: Column in *df* for the independent variable (categorical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str

	:rtype: :class:`yli.sig_tests.PearsonChiSquaredResult`

	**Example:**

	.. code-block::

		df = pd.DataFrame({
			'Response': np.repeat([False, True, False, True], [250, 750, 400, 1600]),
			'Stress': np.repeat([False, False, True, True], [250, 750, 400, 1600])
		})
		yli.chi2(df, 'Stress', 'Response')

	.. code-block:: text

		Stress    False  True
		Response
		False       250   400
		True        750  1600

		χ²(1) = 9.82; p = 0.002*
		OR (95% CI) = 1.33 (1.11–1.60)
		RR (95% CI) = 1.11 (1.03–1.18)

	The output shows the contingency table, and states that the value of the Pearson *χ*:sup:`2` statistic is 9.82, the *χ*:sup:`2` distribution has 1 degree of freedom, and the test is significant with *p* value 0.002.

	The odds of *Stress* in the *Response* = *True* group are 1.33 times that in the *Response* = *False* group, with 95% confidence interval 1.11–1.60.

	The risk of *Stress* in the *Response* = *True* group is 1.11 that in the *Response* = *False* group, with 95% confidence interval 1.03–1.18.
	"""

	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)

	# Compute contingency table
	ct = pd.crosstab(df[ind], df[dep])

	# Get expected counts
	expected = stats.contingency.expected_freq(ct)

	# Warn on low expected counts
	if (expected < 5).sum() / expected.size > 0.2:
		warnings.warn('{} of {} cells ({:.0f}%) have expected count < 5'.format((expected < 5).sum(), expected.size, (expected < 5).sum() / expected.size * 100))
	if (expected < 1).any():
		warnings.warn('{} cells have expected count < 1'.format((expected < 1).sum()))

	if ct.shape == (2,2):
		# 2x2 table
		# Use statsmodels to get OR and RR

		smct = sm.stats.Table2x2(np.flip(ct.to_numpy()), shift_zeros=False)
		result = smct.test_nominal_association()
		ORci = smct.oddsratio_confint(config.alpha)
		RRci = smct.riskratio_confint(config.alpha)

		return PearsonChiSquaredResult(
			ct=ct, statistic=result.statistic, dof=result.df, pvalue=result.pvalue,
			oddsratio=Estimate(smct.oddsratio, ORci[0], ORci[1]), riskratio=Estimate(smct.riskratio, RRci[0], RRci[1]))
	else:
		# rxc table
		# Just use SciPy

		result = stats.chi2_contingency(ct, correction=False)

		return PearsonChiSquaredResult(ct=ct, statistic=result[0], dof=result[2], pvalue=result[1])

# -------------------
# Pearson correlation

class PearsonRResult:
	"""
	Result of Pearson correlation

	See :func:`yli.pearsonr`.
	"""

	def __init__(self, statistic, pvalue):
		#: Pearson *r* correlation statistic (*float*)
		self.statistic = statistic
		#: *p* value for the *r* statistic (*float*)
		self.pvalue = pvalue

	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()

	def _repr_html_(self):
		return '<i>r</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=True))

	def summary(self):
		"""
		Return a stringified summary of the Pearson correlation

		:rtype: str
		"""

		return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=False))

def pearsonr(df, dep, ind, *, nan_policy='warn'):
	"""
	Compute the Pearson correlation coefficient (Pearson's *r*)

	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numerical)
	:type dep: str
	:param ind: Column in *df* for the independent variable (numerical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str

	:rtype: :class:`yli.sig_tests.PearsonRResult`

	**Example:**

	.. code-block::

		df = pd.DataFrame({
			'y': [41, 39, 47, 51, 43, 40, 57, 46, 50, 59, 61, 52],
			'x': [24, 30, 33, 35, 36, 36, 37, 37, 38, 40, 43, 49]
		})
		yli.pearsonr(df, 'y', 'x')

	.. code-block:: text

		r (95% CI) = 0.65 (0.11–0.89); p = 0.02*

	The output states that the value of the Pearson correlation coefficient is 0.65, with 95% confidence interval 0.11–0.89, and the test is significant with *p* value 0.02.
	"""

	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)

	# Compute Pearson's r
	result = stats.pearsonr(df[ind], df[dep])
	ci = result.confidence_interval()

	return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)