From 844e6bdec9e60d9aae58bec84f64179d16842e95 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Thu, 20 Apr 2023 15:41:03 +1000 Subject: [PATCH] Implement ttest_ind_multiple --- docs/sig_tests.rst | 5 +++ yli/__init__.py | 2 +- yli/sig_tests.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) diff --git a/docs/sig_tests.rst b/docs/sig_tests.rst index 86b8865..8359a30 100644 --- a/docs/sig_tests.rst +++ b/docs/sig_tests.rst @@ -18,6 +18,8 @@ Functions .. autofunction:: yli.ttest_ind +.. autofunction:: yli.ttest_ind_multiple + Result classes -------------- @@ -36,6 +38,9 @@ Result classes .. autoclass:: yli.sig_tests.MannWhitneyResult :members: +.. autoclass:: yli.sig_tests.MultipleTTestResult + :members: + .. autoclass:: yli.sig_tests.PearsonChiSquaredResult :members: :inherited-members: diff --git a/yli/__init__.py b/yli/__init__.py index 9c5f614..0dd33f5 100644 --- a/yli/__init__.py +++ b/yli/__init__.py @@ -21,7 +21,7 @@ from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist from .graphs import init_fonts from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, regress, vif -from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind +from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind, ttest_ind_multiple from .survival import kaplanmeier, logrank, turnbull from .utils import as_ordinal diff --git a/yli/sig_tests.py b/yli/sig_tests.py index 359c016..24b6d98 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -180,6 +180,98 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'): delta=Estimate(delta, ci0, ci1), delta_direction='{} > {}'.format(group1, group2)) +class MultipleTTestResult: + """ + Result of multiple Student's *t* tests, adjusted for multiplicity + + See :func:`yli.ttest_ind_multiple`. + """ + + def __init__(self, *, dep, results): + #: Name of the dependent variable (*str*) + self.dep = dep + #: Results of the *t* tests (*List[*\ :class:`TTestResult`\ *]*) + self.results = results + + def _comparison_table(self, html): + """Return a table showing the means/SDs for each group""" + + group1 = self.results[0].group1 + group2 = self.results[0].group2 + + # TODO: Render HTML directly so can have proper HTML p values + table_data = [] + for row in self.results: + cell1 = '{:.2f} ({:.2f})'.format(row.mu1, row.sd1) + cell2 = '{:.2f} ({:.2f})'.format(row.mu2, row.sd2) + cell_pvalue = fmt_p(row.pvalue, PValueStyle.TABULAR) + + # Display the cells the right way around + if row.group1 == group1 and row.group2 == group2: + table_data.append([cell1, cell2, cell_pvalue]) + elif row.group1 == group2 and row.group2 == group1: + table_data.append([cell2, cell1, cell_pvalue]) + else: + raise Exception('t tests have different groups') + + if html: + table = pd.DataFrame(table_data, index=pd.Index([row.ind for row in self.results], name='\ue000 (SD)'), columns=pd.Index([self.results[0].group1, self.results[0].group2, '\ue001'], name=self.dep)) # U+E000 is in Private Use Area, mark μ symbol + table_str = table._repr_html_() + return table_str.replace('\ue000', 'μ').replace('\ue001', 'p') + else: + table = pd.DataFrame(table_data, index=pd.Index([row.ind for row in self.results], name='μ (SD)'), columns=pd.Index([self.results[0].group1, self.results[0].group2, 'p'], name=self.dep)) + return str(table) + + def __repr__(self): + if config.repr_is_summary: + return self.summary() + return super().__repr__() + + def _repr_html_(self): + return self._comparison_table(True) + + def summary(self): + """ + Return a stringified summary of the *t* tests + + :rtype: str + """ + return str(self._comparison_table(False)) + +def ttest_ind_multiple(df, dep, inds, *, nan_policy='warn', method='hs'): + """ + Perform independent 2-sample Student's *t* tests with multiple independent variables, adjusting for multiplicity + + :param df: Data to perform the test on + :type df: DataFrame + :param dep: Column in *df* for the dependent variable (numeric) + :type dep: str + :param ind: Columns in *df* for the independent variables (dichotomous) + :type ind: List[str] + :param nan_policy: How to handle *nan* values (see :ref:`nan-handling`) + :type nan_policy: str + :param method: Method to apply for multiplicity adjustment (see `statsmodels multipletests `_) + :type method: str + + :rtype: :class:`yli.sig_tests.MultipleTTestResult` + """ + + # TODO: Unit testing + # FIXME: Assert groups of independent variables have same levels + + # Perform t tests + results = [] + for ind in inds: + results.append(ttest_ind(df, dep, ind, nan_policy=nan_policy)) + + # Adjust for multiplicity + _, pvalues_corrected, _, _ = sm.stats.multipletests([result.pvalue for result in results], alpha=config.alpha, method=method) + + for result, pvalue_corrected in zip(results, pvalues_corrected): + result.pvalue = pvalue_corrected + + return MultipleTTestResult(dep=dep, results=results) + # ------------- # One-way ANOVA