Implement ttest_ind_multiple
This commit is contained in:
parent
2852d3dd19
commit
844e6bdec9
@ -18,6 +18,8 @@ Functions
|
|||||||
|
|
||||||
.. autofunction:: yli.ttest_ind
|
.. autofunction:: yli.ttest_ind
|
||||||
|
|
||||||
|
.. autofunction:: yli.ttest_ind_multiple
|
||||||
|
|
||||||
Result classes
|
Result classes
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
@ -36,6 +38,9 @@ Result classes
|
|||||||
.. autoclass:: yli.sig_tests.MannWhitneyResult
|
.. autoclass:: yli.sig_tests.MannWhitneyResult
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: yli.sig_tests.MultipleTTestResult
|
||||||
|
:members:
|
||||||
|
|
||||||
.. autoclass:: yli.sig_tests.PearsonChiSquaredResult
|
.. autoclass:: yli.sig_tests.PearsonChiSquaredResult
|
||||||
:members:
|
:members:
|
||||||
:inherited-members:
|
:inherited-members:
|
||||||
|
@ -21,7 +21,7 @@ from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
|||||||
from .graphs import init_fonts
|
from .graphs import init_fonts
|
||||||
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
||||||
from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, regress, vif
|
from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, regress, vif
|
||||||
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind
|
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind, ttest_ind_multiple
|
||||||
from .survival import kaplanmeier, logrank, turnbull
|
from .survival import kaplanmeier, logrank, turnbull
|
||||||
from .utils import as_ordinal
|
from .utils import as_ordinal
|
||||||
|
|
||||||
|
@ -180,6 +180,98 @@ def ttest_ind(df, dep, ind, *, nan_policy='warn'):
|
|||||||
delta=Estimate(delta, ci0, ci1),
|
delta=Estimate(delta, ci0, ci1),
|
||||||
delta_direction='{} > {}'.format(group1, group2))
|
delta_direction='{} > {}'.format(group1, group2))
|
||||||
|
|
||||||
|
class MultipleTTestResult:
|
||||||
|
"""
|
||||||
|
Result of multiple Student's *t* tests, adjusted for multiplicity
|
||||||
|
|
||||||
|
See :func:`yli.ttest_ind_multiple`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *, dep, results):
|
||||||
|
#: Name of the dependent variable (*str*)
|
||||||
|
self.dep = dep
|
||||||
|
#: Results of the *t* tests (*List[*\ :class:`TTestResult`\ *]*)
|
||||||
|
self.results = results
|
||||||
|
|
||||||
|
def _comparison_table(self, html):
|
||||||
|
"""Return a table showing the means/SDs for each group"""
|
||||||
|
|
||||||
|
group1 = self.results[0].group1
|
||||||
|
group2 = self.results[0].group2
|
||||||
|
|
||||||
|
# TODO: Render HTML directly so can have proper HTML p values
|
||||||
|
table_data = []
|
||||||
|
for row in self.results:
|
||||||
|
cell1 = '{:.2f} ({:.2f})'.format(row.mu1, row.sd1)
|
||||||
|
cell2 = '{:.2f} ({:.2f})'.format(row.mu2, row.sd2)
|
||||||
|
cell_pvalue = fmt_p(row.pvalue, PValueStyle.TABULAR)
|
||||||
|
|
||||||
|
# Display the cells the right way around
|
||||||
|
if row.group1 == group1 and row.group2 == group2:
|
||||||
|
table_data.append([cell1, cell2, cell_pvalue])
|
||||||
|
elif row.group1 == group2 and row.group2 == group1:
|
||||||
|
table_data.append([cell2, cell1, cell_pvalue])
|
||||||
|
else:
|
||||||
|
raise Exception('t tests have different groups')
|
||||||
|
|
||||||
|
if html:
|
||||||
|
table = pd.DataFrame(table_data, index=pd.Index([row.ind for row in self.results], name='\ue000 (SD)'), columns=pd.Index([self.results[0].group1, self.results[0].group2, '\ue001'], name=self.dep)) # U+E000 is in Private Use Area, mark μ symbol
|
||||||
|
table_str = table._repr_html_()
|
||||||
|
return table_str.replace('\ue000', '<i>μ</i>').replace('\ue001', '<i>p</i>')
|
||||||
|
else:
|
||||||
|
table = pd.DataFrame(table_data, index=pd.Index([row.ind for row in self.results], name='μ (SD)'), columns=pd.Index([self.results[0].group1, self.results[0].group2, 'p'], name=self.dep))
|
||||||
|
return str(table)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
if config.repr_is_summary:
|
||||||
|
return self.summary()
|
||||||
|
return super().__repr__()
|
||||||
|
|
||||||
|
def _repr_html_(self):
|
||||||
|
return self._comparison_table(True)
|
||||||
|
|
||||||
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the *t* tests
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
return str(self._comparison_table(False))
|
||||||
|
|
||||||
|
def ttest_ind_multiple(df, dep, inds, *, nan_policy='warn', method='hs'):
|
||||||
|
"""
|
||||||
|
Perform independent 2-sample Student's *t* tests with multiple independent variables, adjusting for multiplicity
|
||||||
|
|
||||||
|
:param df: Data to perform the test on
|
||||||
|
:type df: DataFrame
|
||||||
|
:param dep: Column in *df* for the dependent variable (numeric)
|
||||||
|
:type dep: str
|
||||||
|
:param ind: Columns in *df* for the independent variables (dichotomous)
|
||||||
|
:type ind: List[str]
|
||||||
|
:param nan_policy: How to handle *nan* values (see :ref:`nan-handling`)
|
||||||
|
:type nan_policy: str
|
||||||
|
:param method: Method to apply for multiplicity adjustment (see `statsmodels multipletests <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`_)
|
||||||
|
:type method: str
|
||||||
|
|
||||||
|
:rtype: :class:`yli.sig_tests.MultipleTTestResult`
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO: Unit testing
|
||||||
|
# FIXME: Assert groups of independent variables have same levels
|
||||||
|
|
||||||
|
# Perform t tests
|
||||||
|
results = []
|
||||||
|
for ind in inds:
|
||||||
|
results.append(ttest_ind(df, dep, ind, nan_policy=nan_policy))
|
||||||
|
|
||||||
|
# Adjust for multiplicity
|
||||||
|
_, pvalues_corrected, _, _ = sm.stats.multipletests([result.pvalue for result in results], alpha=config.alpha, method=method)
|
||||||
|
|
||||||
|
for result, pvalue_corrected in zip(results, pvalues_corrected):
|
||||||
|
result.pvalue = pvalue_corrected
|
||||||
|
|
||||||
|
return MultipleTTestResult(dep=dep, results=results)
|
||||||
|
|
||||||
# -------------
|
# -------------
|
||||||
# One-way ANOVA
|
# One-way ANOVA
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user