# scipy-yli: Helpful SciPy utilities and recipes # Copyright © 2022 Lee Yingtong Li (RunasSudo) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import pandas as pd from scipy import stats import statsmodels.api as sm import functools import warnings from .utils import Estimate, as_2groups, check_nan, fmt_p_html, fmt_p_text # ---------------- # Student's t test class TTestResult: """ Result of a Student's t test delta: Mean difference """ def __init__(self, statistic, dof, pvalue, delta, delta_direction): self.statistic = statistic self.dof = dof self.pvalue = pvalue self.delta = delta self.delta_direction = delta_direction def _repr_html_(self): return 't({:.0f}) = {:.2f}; p {}
δ (95% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p_html(self.pvalue), self.delta.summary(), self.delta_direction) def summary(self): return 't({:.0f}) = {:.2f}; p {}\nδ (95% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p_text(self.pvalue), self.delta.summary(), self.delta_direction) def ttest_ind(df, dep, ind, *, nan_policy='warn'): """Perform an independent-sample Student's t test""" # Check for/clean NaNs df = check_nan(df[[ind, dep]], nan_policy) # Ensure 2 groups for ind group1, data1, group2, data2 = as_2groups(df, dep, ind) # Do t test # Use statsmodels rather than SciPy because this provides the mean difference automatically d1 = sm.stats.DescrStatsW(data1) d2 = sm.stats.DescrStatsW(data2) cm = sm.stats.CompareMeans(d1, d2) statistic, pvalue, dof = cm.ttest_ind() delta = d1.mean - d2.mean ci0, ci1 = cm.tconfint_diff() # t test is symmetric so take absolute values return TTestResult( statistic=abs(statistic), dof=dof, pvalue=pvalue, delta=abs(Estimate(delta, ci0, ci1)), delta_direction=('{0} > {1}' if d1.mean > d2.mean else '{1} > {0}').format(group1, group2))