From d8dcece09dbe661742d89061cb4aeb528e14f9c3 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Wed, 9 Nov 2022 17:39:33 +1100 Subject: [PATCH] Refactor fmt_p Combine html, only_value, tabular flags into a single Flag --- docs/internal.rst | 20 ++++++++++++++++ tests/test_fmt_pvalues.py | 36 ++++++++++++++-------------- yli/regress.py | 22 ++++++++--------- yli/sig_tests.py | 30 +++++++++++------------ yli/utils.py | 50 ++++++++++++++++++++++----------------- 5 files changed, 92 insertions(+), 66 deletions(-) diff --git a/docs/internal.rst b/docs/internal.rst index 56fc5ff..0774bac 100644 --- a/docs/internal.rst +++ b/docs/internal.rst @@ -13,6 +13,26 @@ Data wrangling .. autofunction:: yli.utils.fmt_p +.. autoclass:: yli.utils.PValueStyle + + .. attribute:: VALUE_ONLY + + Display only the *p* value (e.g. ``0.08``, ``<0.001*``) + + This is an alias for specifying no flags. + + .. attribute:: RELATION + + Force displaying a relational operator before the *p* value (e.g. ``= 0.08``, ``< 0.001*``) + + .. attribute:: TABULAR + + Pad with spaces to ensure decimal points align (incompatible with :attr:`RELATION`) + + .. attribute:: HTML + + Format as HTML (i.e. escape ``<``) + Formula manipulation -------------------- diff --git a/tests/test_fmt_pvalues.py b/tests/test_fmt_pvalues.py index 52988f4..19fdc3a 100644 --- a/tests/test_fmt_pvalues.py +++ b/tests/test_fmt_pvalues.py @@ -16,7 +16,7 @@ import yli from yli.config import config -from yli.utils import fmt_p +from yli.utils import PValueStyle, fmt_p def test_fmt_pvalues_ord(): """Test formatting of p values requiring no special handling""" @@ -27,11 +27,11 @@ def test_fmt_pvalues_ord(): config.pvalue_leading_zero = True config.alpha = 0.05 - assert fmt_p(0.0096, html=False) == '= 0.01*' - assert fmt_p(0.01, html=False) == '= 0.01*' - assert fmt_p(0.04, html=False) == '= 0.04*' - assert fmt_p(0.11, html=False) == '= 0.11' - assert fmt_p(0.55, html=False) == '= 0.55' + assert fmt_p(0.0096, PValueStyle.RELATION) == '= 0.01*' + assert fmt_p(0.01, PValueStyle.RELATION) == '= 0.01*' + assert fmt_p(0.04, PValueStyle.RELATION) == '= 0.04*' + assert fmt_p(0.11, PValueStyle.RELATION) == '= 0.11' + assert fmt_p(0.55, PValueStyle.RELATION) == '= 0.55' def test_fmt_pvalues_ord_noleadingzero(): """Test formatting of p values requiring no special handling, no leading zero""" @@ -42,11 +42,11 @@ def test_fmt_pvalues_ord_noleadingzero(): config.pvalue_leading_zero = False config.alpha = 0.05 - assert fmt_p(0.0096, html=False) == '= .01*' - assert fmt_p(0.01, html=False) == '= .01*' - assert fmt_p(0.04, html=False) == '= .04*' - assert fmt_p(0.11, html=False) == '= .11' - assert fmt_p(0.55, html=False) == '= .55' + assert fmt_p(0.0096, PValueStyle.RELATION) == '= .01*' + assert fmt_p(0.01, PValueStyle.RELATION) == '= .01*' + assert fmt_p(0.04, PValueStyle.RELATION) == '= .04*' + assert fmt_p(0.11, PValueStyle.RELATION) == '= .11' + assert fmt_p(0.55, PValueStyle.RELATION) == '= .55' def test_fmt_pvalues_small(): """Test formatting of small p values requiring extra decimal points to represent""" @@ -57,7 +57,7 @@ def test_fmt_pvalues_small(): config.pvalue_leading_zero = True config.alpha = 0.05 - assert fmt_p(0.009, html=False) == '= 0.009*' + assert fmt_p(0.009, PValueStyle.RELATION) == '= 0.009*' def test_fmt_pvalues_ambiguous(): """Test formatting of p values requiring extra decimal points to avoid ambiguity""" @@ -68,12 +68,12 @@ def test_fmt_pvalues_ambiguous(): config.pvalue_leading_zero = True config.alpha = 0.05 - assert fmt_p(0.048, html=False) == '= 0.048*' - assert fmt_p(0.052, html=False) == '= 0.052' + assert fmt_p(0.048, PValueStyle.RELATION) == '= 0.048*' + assert fmt_p(0.052, PValueStyle.RELATION) == '= 0.052' # Special rounding rules - assert fmt_p(0.04999, html=False) == '= 0.049*' - assert fmt_p(0.05001, html=False) == '= 0.051' + assert fmt_p(0.04999, PValueStyle.RELATION) == '= 0.049*' + assert fmt_p(0.05001, PValueStyle.RELATION) == '= 0.051' def test_fmt_pvalues_extreme(): """Test formatting of p values too small or large to be represented""" @@ -84,5 +84,5 @@ def test_fmt_pvalues_extreme(): config.pvalue_leading_zero = True config.alpha = 0.05 - assert fmt_p(0.0009, html=False) == '< 0.001*' - assert fmt_p(0.999, html=False) == '> 0.99' + assert fmt_p(0.0009, PValueStyle.RELATION) == '< 0.001*' + assert fmt_p(0.999, PValueStyle.RELATION) == '> 0.99' diff --git a/yli/regress.py b/yli/regress.py index b386302..ad4ac83 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -29,7 +29,7 @@ import warnings from .bayes_factors import BayesFactor, bayesfactor_afbf from .config import config from .sig_tests import FTestResult -from .utils import Estimate, check_nan, cols_for_formula, convert_pandas_nullable, fmt_p, formula_factor_ref_category, parse_patsy_term +from .utils import Estimate, PValueStyle, check_nan, cols_for_formula, convert_pandas_nullable, fmt_p, formula_factor_ref_category, parse_patsy_term def vif(df, formula=None, *, nan_policy='warn'): """ @@ -112,7 +112,7 @@ class LikelihoodRatioTestResult: return super().__repr__() def _repr_html_(self): - return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True)) + return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML)) def summary(self): """ @@ -121,7 +121,7 @@ class LikelihoodRatioTestResult: :rtype: str """ - return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False)) + return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION)) class RegressionResult: """ @@ -272,10 +272,10 @@ class RegressionResult: if html: right_col.append(('F:', format(f_result.statistic, '.2f'))) - right_col.append(('p (F):', fmt_p(f_result.pvalue, html=True, only_value=True))) + right_col.append(('p (F):', fmt_p(f_result.pvalue, PValueStyle.VALUE_ONLY | PValueStyle.HTML))) else: right_col.append(('F:', format(f_result.statistic, '.2f'))) - right_col.append(('p (F):', fmt_p(f_result.pvalue, html=False, only_value=True))) + right_col.append(('p (F):', fmt_p(f_result.pvalue, PValueStyle.VALUE_ONLY))) else: # Otherwise report likelihood ratio test as overall test lrtest_result = self.lrtest_null() @@ -283,9 +283,9 @@ class RegressionResult: right_col.append(('LL-Model:', format(self.llf, '.2f'))) right_col.append(('LL-Null:', format(self.llnull, '.2f'))) if html: - right_col.append(('p (LR):', fmt_p(lrtest_result.pvalue, html=True, only_value=True))) + right_col.append(('p (LR):', fmt_p(lrtest_result.pvalue, PValueStyle.VALUE_ONLY | PValueStyle.HTML))) else: - right_col.append(('p (LR):', fmt_p(lrtest_result.pvalue, html=False, only_value=True))) + right_col.append(('p (LR):', fmt_p(lrtest_result.pvalue, PValueStyle.VALUE_ONLY))) return left_col, right_col @@ -320,7 +320,7 @@ class RegressionResult: if self.exp: beta = np.exp(beta) - out += '{}{:.2f}({:.2f}–{:.2f}){}'.format(term_name, beta.point, beta.ci_lower, beta.ci_upper, fmt_p(term.pvalue, html=True, tabular=True)) + out += '{}{:.2f}({:.2f}–{:.2f}){}'.format(term_name, beta.point, beta.ci_lower, beta.ci_upper, fmt_p(term.pvalue, PValueStyle.TABULAR | PValueStyle.HTML)) elif isinstance(term, CategoricalTerm): # Categorical term out += '{}'.format(term_name) @@ -335,7 +335,7 @@ class RegressionResult: if self.exp: beta = np.exp(beta) - out += '{}{:.2f}({:.2f}–{:.2f}){}'.format(sub_term_name, beta.point, beta.ci_lower, beta.ci_upper, fmt_p(sub_term.pvalue, html=True, tabular=True)) + out += '{}{:.2f}({:.2f}–{:.2f}){}'.format(sub_term_name, beta.point, beta.ci_lower, beta.ci_upper, fmt_p(sub_term.pvalue, PValueStyle.TABULAR | PValueStyle.HTML)) else: raise Exception('Attempt to render unknown term type') @@ -381,7 +381,7 @@ class RegressionResult: beta = np.exp(beta) # Add some extra padding - table_data.append([term_name + ' ', format(beta.point, '.2f'), '({:.2f}'.format(beta.ci_lower), '-', '{:.2f})'.format(beta.ci_upper), ' ' + fmt_p(term.pvalue, html=False, tabular=True)]) + table_data.append([term_name + ' ', format(beta.point, '.2f'), '({:.2f}'.format(beta.ci_lower), '-', '{:.2f})'.format(beta.ci_upper), ' ' + fmt_p(term.pvalue, PValueStyle.TABULAR)]) elif isinstance(term, CategoricalTerm): # Categorical term table_data.append([term_name + ' ', '', '', '', '', '']) @@ -396,7 +396,7 @@ class RegressionResult: if self.exp: beta = np.exp(beta) - table_data.append([sub_term_name + ' ', format(beta.point, '.2f'), '({:.2f}'.format(beta.ci_lower), '-', '{:.2f})'.format(beta.ci_upper), ' ' + fmt_p(sub_term.pvalue, html=False, tabular=True)]) + table_data.append([sub_term_name + ' ', format(beta.point, '.2f'), '({:.2f}'.format(beta.ci_lower), '-', '{:.2f})'.format(beta.ci_upper), ' ' + fmt_p(sub_term.pvalue, PValueStyle.TABULAR)]) else: raise Exception('Attempt to render unknown term type') diff --git a/yli/sig_tests.py b/yli/sig_tests.py index f01cd83..d2a7f03 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -23,7 +23,7 @@ import functools import warnings from .config import config -from .utils import Estimate, as_2groups, check_nan, convert_pandas_nullable, fmt_p +from .utils import Estimate, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p # ---------------- # Student's t test @@ -65,7 +65,7 @@ class TTestResult: return super().__repr__() def _repr_html_(self): - return 't({:.0f}) = {:.2f}; p {}
μ{} (SD) = {:.2f} ({:.2f}), μ{} (SD) = {:.2f} ({:.2f})
Δμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), self.group1, self.mu1, self.sd1, self.group2, self.mu2, self.sd2, (1-config.alpha)*100, self.delta.summary(), self.delta_direction) + return 't({:.0f}) = {:.2f}; p {}
μ{} (SD) = {:.2f} ({:.2f}), μ{} (SD) = {:.2f} ({:.2f})
Δμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.group1, self.mu1, self.sd1, self.group2, self.mu2, self.sd2, (1-config.alpha)*100, self.delta.summary(), self.delta_direction) def summary(self): """ @@ -74,7 +74,7 @@ class TTestResult: :rtype: str """ - return 't({:.0f}) = {:.2f}; p {}\nμ({}) (SD) = {:.2f} ({:.2f}), μ({}) (SD) = {:.2f} ({:.2f})\nΔμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), self.group1, self.mu1, self.sd1, self.group2, self.mu2, self.sd2, (1-config.alpha)*100, self.delta.summary(), self.delta_direction) + return 't({:.0f}) = {:.2f}; p {}\nμ({}) (SD) = {:.2f} ({:.2f}), μ({}) (SD) = {:.2f} ({:.2f})\nΔμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.group1, self.mu1, self.sd1, self.group2, self.mu2, self.sd2, (1-config.alpha)*100, self.delta.summary(), self.delta_direction) def ttest_ind(df, dep, ind, *, nan_policy='warn'): """ @@ -166,7 +166,7 @@ class FTestResult: return super().__repr__() def _repr_html_(self): - return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True)) + return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML)) def summary(self): """ @@ -175,7 +175,7 @@ class FTestResult: :rtype: str """ - return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False)) + return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION)) def anova_oneway(df, dep, ind, *, nan_policy='omit'): """ @@ -252,7 +252,7 @@ class MannWhitneyResult: return super().__repr__() def _repr_html_(self): - line1 = 'U = {:.1f}; p {}
r = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, html=True), self.rank_biserial, self.direction) + line1 = 'U = {:.1f}; p {}
r = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.rank_biserial, self.direction) if self.brunnermunzel: return line1 + '
' + self.brunnermunzel._repr_html_() else: @@ -265,7 +265,7 @@ class MannWhitneyResult: :rtype: str """ - line1 = 'U = {:.1f}; p {}\nr = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, html=False), self.rank_biserial, self.direction) + line1 = 'U = {:.1f}; p {}\nr = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.rank_biserial, self.direction) if self.brunnermunzel: return line1 + '\n' + self.brunnermunzel.summary() else: @@ -292,7 +292,7 @@ class BrunnerMunzelResult: return super().__repr__() def _repr_html_(self): - return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, html=True)) + return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML)) def summary(self): """ @@ -301,7 +301,7 @@ class BrunnerMunzelResult: :rtype: str """ - return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, html=False)) + return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION)) def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'): """ @@ -415,10 +415,10 @@ class PearsonChiSquaredResult: def _repr_html_(self): if self.oddsratio is not None: return '{0}
χ2({1}) = {2:.2f}; p {3}
OR ({4:g}% CI) = {5}
RR ({4:g}% CI) = {6}'.format( - self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary()) + self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary()) else: return '{}
χ2({}) = {:.2f}; p {}'.format( - self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True)) + self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML)) def summary(self): """ @@ -429,10 +429,10 @@ class PearsonChiSquaredResult: if self.oddsratio is not None: return '{0}\n\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format( - self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary()) + self.ct, self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary()) else: return '{}\n\nχ²({}) = {:.2f}; p {}'.format( - self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False)) + self.ct, self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION)) def chi2(df, dep, ind, *, nan_policy='warn'): """ @@ -538,7 +538,7 @@ class PearsonRResult: return super().__repr__() def _repr_html_(self): - return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=True)) + return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML)) def summary(self): """ @@ -547,7 +547,7 @@ class PearsonRResult: :rtype: str """ - return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=False)) + return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION)) def pearsonr(df, dep, ind, *, nan_policy='warn'): """ diff --git a/yli/utils.py b/yli/utils.py index 94514a2..6813ca8 100644 --- a/yli/utils.py +++ b/yli/utils.py @@ -18,6 +18,7 @@ import numpy as np import pandas as pd import patsy +import enum import warnings from .config import config @@ -145,25 +146,28 @@ def do_fmt_p(p): # OK to round to pvalue_min_dps return '', '{0:.{dps}f}'.format(p, dps=config.pvalue_min_dps) -def fmt_p(p, *, html, only_value=False, tabular=False): +class PValueStyle(enum.Flag): + """An *enum.Flag* representing how to render a *p* value""" + + VALUE_ONLY = 0 + + RELATION = enum.auto() + TABULAR = enum.auto() + HTML = enum.auto() + +def fmt_p(p, style): """ Format *p* value for display :param p: *p* value to display :type p: float - :param html: Whether to output as HTML (*True*) or plaintext (*False*) - :type html: bool - :param only_value: Whether to display only the value (*True*, e.g. ``0.04``, ``<0.001``) or equality symbol and value (*False*, e.g. ``= 0.04``, ``< 0.001``) - :type only_value: bool - :param tabular: Whether to pad with spaces so that decimal points align - :type tabular: bool + :param style: Style to format the *p* value + :type style: :class:`PValueStyle` :return: Formatted *p* value :rtype: str """ - # FIXME: Make only_value and tabular enums - sign, fmt = do_fmt_p(p) # Strip leading zero if required @@ -176,28 +180,32 @@ def fmt_p(p, *, html, only_value=False, tabular=False): else: asterisk = '' - if html: + if PValueStyle.HTML in style: # Escape angle quotes sign = sign.replace('<', '<') sign = sign.replace('>', '>') - if only_value: - return '{}{}{}'.format(sign, fmt, asterisk) - elif tabular: + if PValueStyle.RELATION in style: + # Add relational operator + if not sign: + sign = '=' + return '{} {}{}'.format(sign, fmt, asterisk) + elif PValueStyle.TABULAR in style: # Always left-aligned, so reserve space for sign if required to align decimal points if not sign: sign = '=' return '{}{}{}'.format(sign, fmt, asterisk) else: - # Non-tabular so force a sign + # Only value + return '{}{}{}'.format(sign, fmt, asterisk) + else: + if PValueStyle.RELATION in style: + # Add relational operator if not sign: sign = '=' return '{} {}{}'.format(sign, fmt, asterisk) - else: - if only_value: - return '{}{}{}'.format(sign, fmt, asterisk) - elif tabular: + elif PValueStyle.TABULAR in style: # Right-aligned, so add spaces to simulate left alignment if not sign: sign = ' ' @@ -214,10 +222,8 @@ def fmt_p(p, *, html, only_value=False, tabular=False): return '{}{}{}{}'.format(sign, fmt, asterisk, rpadding) else: - # Non-tabular so force a sign - if not sign: - sign = '=' - return '{} {}{}'.format(sign, fmt, asterisk) + # Only value + return '{}{}{}'.format(sign, fmt, asterisk) # ------------------------------ # General result-related classes