scipy-yli/yli/sig_tests.py

#   scipy-yli: Helpful SciPy utilities and recipes
#   Copyright © 2022  Lee Yingtong Li (RunasSudo)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm

import functools
import warnings

from .config import config
from .utils import ConfidenceInterval, Estimate, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p

# ----------------
# Student's t test

class TTestResult:
	"""
	Result of a Student's *t* test
	
	See :func:`yli.ttest_ind`.
	"""
	
	def __init__(self, statistic, dof, pvalue, group1, group2, mu1, mu2, sd1, sd2, delta, delta_direction):
		#: *t* statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom of the *t* distribution (*int*)
		self.dof = dof
		#: *p* value for the *t* statistic (*float*)
		self.pvalue = pvalue
		
		#: Name of the first group (*str*)
		self.group1 = group1
		#: Name of the second group (*str*)
		self.group2 = group2
		#: Mean of the first group (*float*)
		self.mu1 = mu1
		#: Mean of the second group (*float*)
		self.mu2 = mu2
		#: Standard deviation of the first group (*float*)
		self.sd1 = sd1
		#: Standard deviation of the second group (*float*)
		self.sd2 = sd2
		
		#: Absolute value of the mean difference (:class:`yli.utils.Estimate`)
		self.delta = delta
		#: Description of the direction of the effect (*str*)
		self.delta_direction = delta_direction
	
	def _comparison_table(self, html):
		"""Return a table showing the means/SDs for each group"""
		
		table_data = {
			self.group1: '{:.2f} ({:.2f})'.format(self.mu1, self.sd1),
			self.group2: '{:.2f} ({:.2f})'.format(self.mu2, self.sd2),
		}
		
		if html:
			table = pd.DataFrame(table_data, index=['\ue000 (SD)']) # U+E000 is in Private Use Area, mark μ symbol
			table_str = table._repr_html_()
			return table_str.replace('\ue000', '<i>μ</i>')
		else:
			table = pd.DataFrame(table_data, index=['μ (SD)'])
			return str(table)
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '{}<br><i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br>Δ<i>μ</i> ({:g}% CI) = {}, {}'.format(self._comparison_table(True), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
	
	def summary(self):
		"""
		Return a stringified summary of the *t* test
		
		:rtype: str
		"""
		
		return '{}\n\nt({:.0f}) = {:.2f}; p {}\nΔμ ({:g}% CI) = {}, {}'.format(self._comparison_table(False), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), (1-config.alpha)*100, self.delta.summary(), self.delta_direction)

def ttest_ind(df, dep, ind, *, nan_policy='warn'):
	"""
	Perform an independent 2-sample Student's *t* test
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (dichotomous)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.TTestResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Type': ['Fresh'] * 10 + ['Stored'] * 10,
			'Potency': [10.2, 10.5, 10.3, ...]
		})
		yli.ttest_ind(df, 'Potency', 'Type')
	
	.. code-block:: text
		               Fresh       Stored
		μ (SD)  10.37 (0.32)  9.83 (0.24)
		
		t(18) = 4.24; p < 0.001*
		Δμ (95% CI) = 0.54 (0.27–0.81), Fresh > Stored
	
	The output states that the value of the *t* statistic is 4.24, the *t* distribution has 18 degrees of freedom, and the test is significant with *p* value < 0.001.
	The mean difference is 0.54 in favour of the *Fresh* group, with 95% confidence interval 0.27–0.81.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Ensure 2 groups for ind
	group1, data1, group2, data2 = as_2groups(df, dep, ind)
	
	# Do t test
	# Use statsmodels rather than SciPy because this provides the mean difference automatically
	d1 = sm.stats.DescrStatsW(data1, ddof=1)
	d2 = sm.stats.DescrStatsW(data2, ddof=1)
	
	cm = sm.stats.CompareMeans(d1, d2)
	statistic, pvalue, dof = cm.ttest_ind()
	
	delta = d1.mean - d2.mean
	ci0, ci1 = cm.tconfint_diff(config.alpha)
	
	# t test is symmetric so take absolute value
	if d2.mean > d1.mean:
		delta, ci0, ci1 = -delta, -ci1, -ci0
		d1, d2 = d2, d1
		group1, group2 = group2, group1
		# Now group1 > group2
	
	return TTestResult(
		statistic=abs(statistic), dof=dof, pvalue=pvalue,
		group1=group1, group2=group2, mu1=d1.mean, mu2=d2.mean, sd1=d1.std, sd2=d2.std,
		delta=Estimate(delta, ci0, ci1),
		delta_direction='{} > {}'.format(group1, group2))

# -------------
# One-way ANOVA

class FTestResult:
	"""
	Result of an *F* test for ANOVA/regression
	
	See :func:`yli.anova_oneway` and :meth:`yli.regress.RegressionResult.ftest`.
	"""
	
	def __init__(self, statistic, dof1, dof2, pvalue):
		#: *F* statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom in the *F* distribution numerator (*int*)
		self.dof1 = dof1
		#: Degrees of freedom in the *F* distribution denominator (*int*)
		self.dof2 = dof2
		#: *p* value for the *F* statistic (*float*)
		self.pvalue = pvalue
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '<i>F</i>({:.0f}, {:.0f}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the *F* test
		
		:rtype: str
		"""
		
		return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))

def anova_oneway(df, dep, ind, *, nan_policy='omit'):
	"""
	Perform one-way ANOVA
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (categorical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.FTestResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Method': [1]*8 + [2]*7 + [3]*9,
			'Score': [96, 79, 91, ...]
		})
		yli.anova_oneway(df, 'Score', 'Method')
	
	.. code-block:: text
		
		F(2, 21) = 29.57; p < 0.001*
	
	The output states that the value of the *F* statistic is 29.57, the *F* distribution has 2 degrees of freedom in the numerator and 21 in the denominator, and the test is significant with *p* value < 0.001.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Group by independent variable
	groups = df.groupby(ind)[dep]
	
	# Perform one-way ANOVA
	result = stats.f_oneway(*[groups.get_group(k) for k in groups.groups])
	
	# See stats.f_oneway implementation
	dfbn = len(groups.groups) - 1
	dfwn = len(df) - len(groups.groups)
	
	return FTestResult(result.statistic, dfbn, dfwn, result.pvalue)

# -----------------
# Mann-Whitney test

class MannWhitneyResult:
	"""
	Result of a Mann-Whitney *U* test
	
	See :func:`yli.mannwhitney`.
	"""
	
	def __init__(self, statistic, pvalue, group1, group2, med1, med2, iqr1, iqr2, range1, range2, rank_biserial, direction, brunnermunzel=None):
		#: Lesser of the two Mann–Whitney *U* statistics (*float*)
		self.statistic = statistic
		#: *p* value for the *U* statistic (*float*)
		self.pvalue = pvalue
		
		#: Name of the first group (*str*)
		self.group1 = group1
		#: Name of the second group (*str*)
		self.group2 = group2
		#: Median of the first group (*float*)
		self.med1 = med1
		#: Median of the second group (*float*)
		self.med2 = med2
		#: Interquartile range of the first group (:class:`yli.utils.ConfidenceInterval`)
		self.iqr1 = iqr1
		#: Interquartile range of the second group (:class:`yli.utils.ConfidenceInterval`)
		self.iqr2 = iqr2
		#: Range of the first group (:class:`yli.utils.ConfidenceInterval`)
		self.range1 = range2
		#: Range of the second group (:class:`yli.utils.ConfidenceInterval`)
		self.range2 = range2
		
		#: Absolute value of the rank-biserial correlation (*float*)
		self.rank_biserial = rank_biserial
		#: Description of the direction of the effect (*str*)
		self.direction = direction
		
		#: :class:`BrunnerMunzelResult` on the same data, or *None* if N/A
		self.brunnermunzel = brunnermunzel
	
	def _comparison_table(self, html):
		"""Return a table showing the medians/IQRs/ranges for each group"""
		
		table_data = {
			self.group1: ['{:.2f} ({})'.format(self.med1, self.iqr1.summary()), '{:.2f} ({})'.format(self.med1, self.range1.summary())],
			self.group2: ['{:.2f} ({})'.format(self.med2, self.iqr2.summary()), '{:.2f} ({})'.format(self.med2, self.range2.summary())],
		}
		table = pd.DataFrame(table_data, index=['Median (IQR)', 'Median (range)'])
		
		if html:
			return table._repr_html_()
		else:
			return str(table)
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		line1 = '{}<br><i>U</i> = {:.1f}; <i>p</i> {}<br><i>r</i> = {:.2f}, {}'.format(self._comparison_table(True), self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.rank_biserial, self.direction)
		if self.brunnermunzel:
			return line1 + '<br>' + self.brunnermunzel._repr_html_()
		else:
			return line1
	
	def summary(self):
		"""
		Return a stringified summary of the Mann–Whitney test
		
		:rtype: str
		"""
		
		line1 = '{}\n\nU = {:.1f}; p {}\nr = {:.2f}, {}'.format(self._comparison_table(False), self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.rank_biserial, self.direction)
		if self.brunnermunzel:
			return line1 + '\n' + self.brunnermunzel.summary()
		else:
			return line1

class BrunnerMunzelResult:
	"""
	Result of a Brunner–Munzel test
	
	See :func:`yli.mannwhitney`. This library calls the Brunner–Munzel test statistic *W*.
	"""
	
	"""Result of a Brunner-Munzel test"""
	
	def __init__(self, statistic, pvalue):
		#: *W* statistic (*float*)
		self.statistic = statistic
		#: *p* value for the *W* statistic (*float*)
		self.pvalue = pvalue
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '<i>W</i> = {:.1f}; <i>p</i> {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the Brunner–Munzel test
		
		:rtype: str
		"""
		
		return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))

def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'):
	"""
	Perform a Mann-Whitney *U* test
	
	By default, this function performs a Brunner–Munzel test if the Mann–Whitney test is significant.
	If the Mann–Whitney test is significant but the Brunner–Munzel test is not, a warning is raised.
	The Brunner–Munzel test is returned only if non-significant.
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numeric)
	:type dep: str
	:param ind: Column in *df* for the independent variable (dichotomous)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	:param brunnermunzel: Whether to compute the Brunner–Munzel test if the Mann–Whitney test is significant
	:type brunnermunzel: bool
	:param use_continuity: See *scipy.stats.mannwhitneyu*
	:param alternative: See *scipy.stats.mannwhitneyu*
	:param method: See *scipy.stats.mannwhitneyu*
	
	:return: The result of the Mann–Whitney test. The result of a Brunner–Munzel test is included in the result object if and only if *brunnermunzel* is *True*, *and* the Mann–Whitney test is significant, *and* the Brunner–Munzel test is non-significant.
	
	:rtype: :class:`yli.sig_tests.MannWhitneyResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Sample': ['Before'] * 12 + ['After'] * 12,
			'Oxygen': [11.0, 11.2, 11.2, ...]
		})
		yli.mannwhitney(df, 'Oxygen', 'Sample', method='asymptotic', alternative='less')
	
	.. code-block:: text
		
		                              After               Before
		Median (IQR)    10.75 (10.55–10.95)  11.55 (11.20–11.83)
		Median (range)  10.75 (11.00–12.10)  11.55 (11.00–12.10)
		
		U = 6.0; p < 0.001*
		r = 0.92, Before > After
	
	The output states that the value of the Mann–Whitney *U* statistic is 6.0, and the one-sided test is significant with asymptotic *p* value < 0.001.
	The rank-biserial correlation is 0.92 in favour of the *Before* group.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Convert pandas nullable types for independent variables as this breaks statsmodels
	df = convert_pandas_nullable(df)
	
	# Ensure 2 groups for ind
	group1, data1, group2, data2 = as_2groups(df, dep, ind)
	
	# Do Mann-Whitney test
	# Stata does not perform continuity correction
	result = stats.mannwhitneyu(data1, data2, use_continuity=use_continuity, alternative=alternative, method=method)
	
	u1 = result.statistic
	u2 = len(data1) * len(data2) - u1
	r = abs(2*u1 / (len(data1) * len(data2)) - 1)  # rank-biserial
	
	# If significant, perform a Brunner-Munzel test for our interest
	if result.pvalue < 0.05 and brunnermunzel:
		result_bm = stats.brunnermunzel(data1, data2)
		
		if result_bm.pvalue >= 0.05:
			warnings.warn('Mann-Whitney test is significant but Brunner-Munzel test is not. This could be due to a difference in shape, rather than location.')
			
			return MannWhitneyResult(
				statistic=min(u1, u2), pvalue=result.pvalue,
				#med1=data1.median(), med2=data2.median(),
				rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2),
				brunnermunzel=BrunnerMunzelResult(statistic=result_bm.statistic, pvalue=result_bm.pvalue))
	
	return MannWhitneyResult(
		statistic=min(u1, u2), pvalue=result.pvalue,
		group1=group1, group2=group2,
		med1=data1.median(), med2=data2.median(), iqr1=ConfidenceInterval(data1.quantile(0.25), data1.quantile(0.75)), iqr2=ConfidenceInterval(data2.quantile(0.25), data2.quantile(0.75)), range1=ConfidenceInterval(data1.min(), data1.max()), range2=ConfidenceInterval(data2.min(), data2.max()),
		rank_biserial=r, direction=('{1} > {0}' if u1 < u2 else '{0} > {1}').format(group1, group2))

# ------------------------
# Pearson chi-squared test

class PearsonChiSquaredResult:
	"""
	Result of a Pearson *χ*:sup:`2` test
	
	See :func:`yli.chi2`.
	"""
	
	def __init__(self, ct, statistic, dof, pvalue, oddsratio=None, riskratio=None):
		#: Contingency table for the observations (*DataFrame*)
		self.ct = ct
		#: *χ*:sup:`2` statistic (*float*)
		self.statistic = statistic
		#: Degrees of freedom for the *χ*:sup:`2` distribution (*int*)
		self.dof = dof
		#: *p* value for the *χ*:sup:`2` test (*float*)
		self.pvalue = pvalue
		#: Odds ratio (*float*; *None* if not a 2×2 table)
		self.oddsratio = oddsratio
		#: Risk ratio (*float*; *None* if not a 2×2 table)
		self.riskratio = riskratio
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		if self.oddsratio is not None:
			return '{0}<br><i>χ</i><sup>2</sup>({1}) = {2:.2f}; <i>p</i> {3}<br>OR ({4:g}% CI) = {5}<br>RR ({4:g}% CI) = {6}'.format(
				self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
		else:
			return '{}<br><i>χ</i><sup>2</sup>({}) = {:.2f}; <i>p</i> {}'.format(
				self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the *χ*:sup:`2` test
		
		:rtype: str
		"""
		
		if self.oddsratio is not None:
			return '{0}\n\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format(
				self.ct, self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
		else:
			return '{}\n\nχ²({}) = {:.2f}; p {}'.format(
				self.ct, self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))

def chi2(df, dep, ind, *, nan_policy='warn'):
	"""
	Perform a Pearson *χ*:sup:`2` test
	
	If a 2×2 contingency table is obtained (i.e. if both variables are dichotomous), an odds ratio and risk ratio are calculated.
	The ratios are calculated for the higher-valued value in each variable (i.e. *True* compared with *False* for a boolean).
	The risk ratio is calculated relative to the independent variable (rows of the contingency table).
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (categorical)
	:type dep: str
	:param ind: Column in *df* for the independent variable (categorical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.PearsonChiSquaredResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'Response': np.repeat([False, True, False, True], [250, 750, 400, 1600]),
			'Stress': np.repeat([False, False, True, True], [250, 750, 400, 1600])
		})
		yli.chi2(df, 'Stress', 'Response')
	
	.. code-block:: text
		
		Stress    False  True
		Response             
		False       250   400
		True        750  1600
		
		χ²(1) = 9.82; p = 0.002*
		OR (95% CI) = 1.33 (1.11–1.60)
		RR (95% CI) = 1.11 (1.03–1.18)
	
	The output shows the contingency table, and states that the value of the Pearson *χ*:sup:`2` statistic is 9.82, the *χ*:sup:`2` distribution has 1 degree of freedom, and the test is significant with *p* value 0.002.
	
	The odds of *Stress* in the *Response* = *True* group are 1.33 times that in the *Response* = *False* group, with 95% confidence interval 1.11–1.60.
	
	The risk of *Stress* in the *Response* = *True* group is 1.11 that in the *Response* = *False* group, with 95% confidence interval 1.03–1.18.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Compute contingency table
	ct = pd.crosstab(df[ind], df[dep])
	
	# Get expected counts
	expected = stats.contingency.expected_freq(ct)
	
	# Warn on low expected counts
	if (expected < 5).sum() / expected.size > 0.2:
		warnings.warn('{} of {} cells ({:.0f}%) have expected count < 5'.format((expected < 5).sum(), expected.size, (expected < 5).sum() / expected.size * 100))
	if (expected < 1).any():
		warnings.warn('{} cells have expected count < 1'.format((expected < 1).sum()))
	
	if ct.shape == (2,2):
		# 2x2 table
		# Use statsmodels to get OR and RR
		
		smct = sm.stats.Table2x2(np.flip(ct.to_numpy()), shift_zeros=False)
		result = smct.test_nominal_association()
		ORci = smct.oddsratio_confint(config.alpha)
		RRci = smct.riskratio_confint(config.alpha)
		
		return PearsonChiSquaredResult(
			ct=ct, statistic=result.statistic, dof=result.df, pvalue=result.pvalue,
			oddsratio=Estimate(smct.oddsratio, ORci[0], ORci[1]), riskratio=Estimate(smct.riskratio, RRci[0], RRci[1]))
	else:
		# rxc table
		# Just use SciPy
		
		result = stats.chi2_contingency(ct, correction=False)
		
		return PearsonChiSquaredResult(ct=ct, statistic=result[0], dof=result[2], pvalue=result[1])

# -------------------
# Pearson correlation

class PearsonRResult:
	"""
	Result of Pearson correlation
	
	See :func:`yli.pearsonr`.
	"""
	
	def __init__(self, statistic, pvalue):
		#: Pearson *r* correlation statistic (*float*)
		self.statistic = statistic
		#: *p* value for the *r* statistic (*float*)
		self.pvalue = pvalue
	
	def __repr__(self):
		if config.repr_is_summary:
			return self.summary()
		return super().__repr__()
	
	def _repr_html_(self):
		return '<i>r</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
	
	def summary(self):
		"""
		Return a stringified summary of the Pearson correlation
		
		:rtype: str
		"""
		
		return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION))

def pearsonr(df, dep, ind, *, nan_policy='warn'):
	"""
	Compute the Pearson correlation coefficient (Pearson's *r*)
	
	:param df: Data to perform the test on
	:type df: DataFrame
	:param dep: Column in *df* for the dependent variable (numerical)
	:type dep: str
	:param ind: Column in *df* for the independent variable (numerical)
	:type ind: str
	:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
	:type nan_policy: str
	
	:rtype: :class:`yli.sig_tests.PearsonRResult`
	
	**Example:**
	
	.. code-block::
		
		df = pd.DataFrame({
			'y': [41, 39, 47, 51, 43, 40, 57, 46, 50, 59, 61, 52],
			'x': [24, 30, 33, 35, 36, 36, 37, 37, 38, 40, 43, 49]
		})
		yli.pearsonr(df, 'y', 'x')
	
	.. code-block:: text
		
		r (95% CI) = 0.65 (0.11–0.89); p = 0.02*
	
	The output states that the value of the Pearson correlation coefficient is 0.65, with 95% confidence interval 0.11–0.89, and the test is significant with *p* value 0.02.
	"""
	
	# Check for/clean NaNs
	df = check_nan(df[[ind, dep]], nan_policy)
	
	# Compute Pearson's r
	result = stats.pearsonr(df[ind], df[dep])
	ci = result.confidence_interval()
	
	return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)