2022-10-11 22:52:42 +11:00
|
|
|
# scipy-yli: Helpful SciPy utilities and recipes
|
|
|
|
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
from scipy import stats
|
|
|
|
import statsmodels.api as sm
|
|
|
|
|
|
|
|
import functools
|
|
|
|
import warnings
|
|
|
|
|
2022-10-13 12:53:18 +11:00
|
|
|
from .utils import Estimate, as_2groups, check_nan, fmt_p_html, fmt_p_text
|
2022-10-11 22:52:42 +11:00
|
|
|
|
2022-10-13 12:53:18 +11:00
|
|
|
# ----------------
|
|
|
|
# Student's t test
|
2022-10-11 22:52:42 +11:00
|
|
|
|
|
|
|
class TTestResult:
|
|
|
|
"""
|
|
|
|
Result of a Student's t test
|
|
|
|
|
|
|
|
delta: Mean difference
|
|
|
|
"""
|
|
|
|
|
2022-10-13 12:53:18 +11:00
|
|
|
def __init__(self, statistic, dof, pvalue, delta, delta_direction):
|
2022-10-11 22:52:42 +11:00
|
|
|
self.statistic = statistic
|
|
|
|
self.dof = dof
|
|
|
|
self.pvalue = pvalue
|
|
|
|
self.delta = delta
|
2022-10-13 12:53:18 +11:00
|
|
|
self.delta_direction = delta_direction
|
2022-10-11 22:52:42 +11:00
|
|
|
|
|
|
|
def _repr_html_(self):
|
2022-10-13 12:53:18 +11:00
|
|
|
return '<i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br><i>δ</i> (95% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p_html(self.pvalue), self.delta.summary(), self.delta_direction)
|
2022-10-11 22:52:42 +11:00
|
|
|
|
|
|
|
def summary(self):
|
2022-10-13 12:53:18 +11:00
|
|
|
return 't({:.0f}) = {:.2f}; p {}\nδ (95% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p_text(self.pvalue), self.delta.summary(), self.delta_direction)
|
2022-10-11 22:52:42 +11:00
|
|
|
|
|
|
|
def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
|
|
|
"""Perform an independent-sample Student's t test"""
|
|
|
|
|
2022-10-13 12:53:18 +11:00
|
|
|
# Check for/clean NaNs
|
2022-10-11 22:52:42 +11:00
|
|
|
df = check_nan(df[[ind, dep]], nan_policy)
|
|
|
|
|
2022-10-13 12:53:18 +11:00
|
|
|
# Ensure 2 groups for ind
|
|
|
|
group1, data1, group2, data2 = as_2groups(df, dep, ind)
|
2022-10-11 22:52:42 +11:00
|
|
|
|
|
|
|
# Do t test
|
|
|
|
# Use statsmodels rather than SciPy because this provides the mean difference automatically
|
2022-10-13 12:53:18 +11:00
|
|
|
d1 = sm.stats.DescrStatsW(data1)
|
|
|
|
d2 = sm.stats.DescrStatsW(data2)
|
2022-10-11 22:52:42 +11:00
|
|
|
|
2022-10-13 12:53:18 +11:00
|
|
|
cm = sm.stats.CompareMeans(d1, d2)
|
2022-10-11 22:52:42 +11:00
|
|
|
statistic, pvalue, dof = cm.ttest_ind()
|
|
|
|
|
2022-10-13 12:53:18 +11:00
|
|
|
delta = d1.mean - d2.mean
|
2022-10-11 22:52:42 +11:00
|
|
|
ci0, ci1 = cm.tconfint_diff()
|
|
|
|
|
2022-10-13 12:53:18 +11:00
|
|
|
# t test is symmetric so take absolute values
|
|
|
|
return TTestResult(
|
|
|
|
statistic=abs(statistic), dof=dof, pvalue=pvalue,
|
|
|
|
delta=abs(Estimate(delta, ci0, ci1)),
|
|
|
|
delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))
|