Refactor fmt_p

Combine html, only_value, tabular flags into a single Flag
This commit is contained in:
RunasSudo 2022-11-09 17:39:33 +11:00
parent c6cef4aee7
commit d8dcece09d
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
5 changed files with 92 additions and 66 deletions

View File

@ -13,6 +13,26 @@ Data wrangling
.. autofunction:: yli.utils.fmt_p
.. autoclass:: yli.utils.PValueStyle
.. attribute:: VALUE_ONLY
Display only the *p* value (e.g. ``0.08``, ``<0.001*``)
This is an alias for specifying no flags.
.. attribute:: RELATION
Force displaying a relational operator before the *p* value (e.g. ``= 0.08``, ``< 0.001*``)
.. attribute:: TABULAR
Pad with spaces to ensure decimal points align (incompatible with :attr:`RELATION`)
.. attribute:: HTML
Format as HTML (i.e. escape ``<``)
Formula manipulation
--------------------

View File

@ -16,7 +16,7 @@
import yli
from yli.config import config
from yli.utils import fmt_p
from yli.utils import PValueStyle, fmt_p
def test_fmt_pvalues_ord():
"""Test formatting of p values requiring no special handling"""
@ -27,11 +27,11 @@ def test_fmt_pvalues_ord():
config.pvalue_leading_zero = True
config.alpha = 0.05
assert fmt_p(0.0096, html=False) == '= 0.01*'
assert fmt_p(0.01, html=False) == '= 0.01*'
assert fmt_p(0.04, html=False) == '= 0.04*'
assert fmt_p(0.11, html=False) == '= 0.11'
assert fmt_p(0.55, html=False) == '= 0.55'
assert fmt_p(0.0096, PValueStyle.RELATION) == '= 0.01*'
assert fmt_p(0.01, PValueStyle.RELATION) == '= 0.01*'
assert fmt_p(0.04, PValueStyle.RELATION) == '= 0.04*'
assert fmt_p(0.11, PValueStyle.RELATION) == '= 0.11'
assert fmt_p(0.55, PValueStyle.RELATION) == '= 0.55'
def test_fmt_pvalues_ord_noleadingzero():
"""Test formatting of p values requiring no special handling, no leading zero"""
@ -42,11 +42,11 @@ def test_fmt_pvalues_ord_noleadingzero():
config.pvalue_leading_zero = False
config.alpha = 0.05
assert fmt_p(0.0096, html=False) == '= .01*'
assert fmt_p(0.01, html=False) == '= .01*'
assert fmt_p(0.04, html=False) == '= .04*'
assert fmt_p(0.11, html=False) == '= .11'
assert fmt_p(0.55, html=False) == '= .55'
assert fmt_p(0.0096, PValueStyle.RELATION) == '= .01*'
assert fmt_p(0.01, PValueStyle.RELATION) == '= .01*'
assert fmt_p(0.04, PValueStyle.RELATION) == '= .04*'
assert fmt_p(0.11, PValueStyle.RELATION) == '= .11'
assert fmt_p(0.55, PValueStyle.RELATION) == '= .55'
def test_fmt_pvalues_small():
"""Test formatting of small p values requiring extra decimal points to represent"""
@ -57,7 +57,7 @@ def test_fmt_pvalues_small():
config.pvalue_leading_zero = True
config.alpha = 0.05
assert fmt_p(0.009, html=False) == '= 0.009*'
assert fmt_p(0.009, PValueStyle.RELATION) == '= 0.009*'
def test_fmt_pvalues_ambiguous():
"""Test formatting of p values requiring extra decimal points to avoid ambiguity"""
@ -68,12 +68,12 @@ def test_fmt_pvalues_ambiguous():
config.pvalue_leading_zero = True
config.alpha = 0.05
assert fmt_p(0.048, html=False) == '= 0.048*'
assert fmt_p(0.052, html=False) == '= 0.052'
assert fmt_p(0.048, PValueStyle.RELATION) == '= 0.048*'
assert fmt_p(0.052, PValueStyle.RELATION) == '= 0.052'
# Special rounding rules
assert fmt_p(0.04999, html=False) == '= 0.049*'
assert fmt_p(0.05001, html=False) == '= 0.051'
assert fmt_p(0.04999, PValueStyle.RELATION) == '= 0.049*'
assert fmt_p(0.05001, PValueStyle.RELATION) == '= 0.051'
def test_fmt_pvalues_extreme():
"""Test formatting of p values too small or large to be represented"""
@ -84,5 +84,5 @@ def test_fmt_pvalues_extreme():
config.pvalue_leading_zero = True
config.alpha = 0.05
assert fmt_p(0.0009, html=False) == '< 0.001*'
assert fmt_p(0.999, html=False) == '> 0.99'
assert fmt_p(0.0009, PValueStyle.RELATION) == '< 0.001*'
assert fmt_p(0.999, PValueStyle.RELATION) == '> 0.99'

View File

@ -29,7 +29,7 @@ import warnings
from .bayes_factors import BayesFactor, bayesfactor_afbf
from .config import config
from .sig_tests import FTestResult
from .utils import Estimate, check_nan, cols_for_formula, convert_pandas_nullable, fmt_p, formula_factor_ref_category, parse_patsy_term
from .utils import Estimate, PValueStyle, check_nan, cols_for_formula, convert_pandas_nullable, fmt_p, formula_factor_ref_category, parse_patsy_term
def vif(df, formula=None, *, nan_policy='warn'):
"""
@ -112,7 +112,7 @@ class LikelihoodRatioTestResult:
return super().__repr__()
def _repr_html_(self):
return 'LR({}) = {:.2f}; <i>p</i> {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True))
return 'LR({}) = {:.2f}; <i>p</i> {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
def summary(self):
"""
@ -121,7 +121,7 @@ class LikelihoodRatioTestResult:
:rtype: str
"""
return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False))
return 'LR({}) = {:.2f}; p {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))
class RegressionResult:
"""
@ -272,10 +272,10 @@ class RegressionResult:
if html:
right_col.append(('<i>F</i>:', format(f_result.statistic, '.2f')))
right_col.append(('<i>p</i> (<i>F</i>):', fmt_p(f_result.pvalue, html=True, only_value=True)))
right_col.append(('<i>p</i> (<i>F</i>):', fmt_p(f_result.pvalue, PValueStyle.VALUE_ONLY | PValueStyle.HTML)))
else:
right_col.append(('F:', format(f_result.statistic, '.2f')))
right_col.append(('p (F):', fmt_p(f_result.pvalue, html=False, only_value=True)))
right_col.append(('p (F):', fmt_p(f_result.pvalue, PValueStyle.VALUE_ONLY)))
else:
# Otherwise report likelihood ratio test as overall test
lrtest_result = self.lrtest_null()
@ -283,9 +283,9 @@ class RegressionResult:
right_col.append(('LL-Model:', format(self.llf, '.2f')))
right_col.append(('LL-Null:', format(self.llnull, '.2f')))
if html:
right_col.append(('<i>p</i> (LR):', fmt_p(lrtest_result.pvalue, html=True, only_value=True)))
right_col.append(('<i>p</i> (LR):', fmt_p(lrtest_result.pvalue, PValueStyle.VALUE_ONLY | PValueStyle.HTML)))
else:
right_col.append(('p (LR):', fmt_p(lrtest_result.pvalue, html=False, only_value=True)))
right_col.append(('p (LR):', fmt_p(lrtest_result.pvalue, PValueStyle.VALUE_ONLY)))
return left_col, right_col
@ -320,7 +320,7 @@ class RegressionResult:
if self.exp:
beta = np.exp(beta)
out += '<tr><th>{}</th><td>{:.2f}</td><td style="padding-right:0">({:.2f}</td><td>–</td><td style="padding-left:0">{:.2f})</td><td style="text-align:left">{}</td></tr>'.format(term_name, beta.point, beta.ci_lower, beta.ci_upper, fmt_p(term.pvalue, html=True, tabular=True))
out += '<tr><th>{}</th><td>{:.2f}</td><td style="padding-right:0">({:.2f}</td><td>–</td><td style="padding-left:0">{:.2f})</td><td style="text-align:left">{}</td></tr>'.format(term_name, beta.point, beta.ci_lower, beta.ci_upper, fmt_p(term.pvalue, PValueStyle.TABULAR | PValueStyle.HTML))
elif isinstance(term, CategoricalTerm):
# Categorical term
out += '<tr><th>{}</th><td></td><td style="padding-right:0"></td><td></td><td style="padding-left:0"></td><td></td></tr>'.format(term_name)
@ -335,7 +335,7 @@ class RegressionResult:
if self.exp:
beta = np.exp(beta)
out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>{:.2f}</td><td style="padding-right:0">({:.2f}</td><td>–</td><td style="padding-left:0">{:.2f})</td><td style="text-align:left">{}</td></tr>'.format(sub_term_name, beta.point, beta.ci_lower, beta.ci_upper, fmt_p(sub_term.pvalue, html=True, tabular=True))
out += '<tr><td style="text-align:right;font-style:italic">{}</td><td>{:.2f}</td><td style="padding-right:0">({:.2f}</td><td>–</td><td style="padding-left:0">{:.2f})</td><td style="text-align:left">{}</td></tr>'.format(sub_term_name, beta.point, beta.ci_lower, beta.ci_upper, fmt_p(sub_term.pvalue, PValueStyle.TABULAR | PValueStyle.HTML))
else:
raise Exception('Attempt to render unknown term type')
@ -381,7 +381,7 @@ class RegressionResult:
beta = np.exp(beta)
# Add some extra padding
table_data.append([term_name + ' ', format(beta.point, '.2f'), '({:.2f}'.format(beta.ci_lower), '-', '{:.2f})'.format(beta.ci_upper), ' ' + fmt_p(term.pvalue, html=False, tabular=True)])
table_data.append([term_name + ' ', format(beta.point, '.2f'), '({:.2f}'.format(beta.ci_lower), '-', '{:.2f})'.format(beta.ci_upper), ' ' + fmt_p(term.pvalue, PValueStyle.TABULAR)])
elif isinstance(term, CategoricalTerm):
# Categorical term
table_data.append([term_name + ' ', '', '', '', '', ''])
@ -396,7 +396,7 @@ class RegressionResult:
if self.exp:
beta = np.exp(beta)
table_data.append([sub_term_name + ' ', format(beta.point, '.2f'), '({:.2f}'.format(beta.ci_lower), '-', '{:.2f})'.format(beta.ci_upper), ' ' + fmt_p(sub_term.pvalue, html=False, tabular=True)])
table_data.append([sub_term_name + ' ', format(beta.point, '.2f'), '({:.2f}'.format(beta.ci_lower), '-', '{:.2f})'.format(beta.ci_upper), ' ' + fmt_p(sub_term.pvalue, PValueStyle.TABULAR)])
else:
raise Exception('Attempt to render unknown term type')

View File

@ -23,7 +23,7 @@ import functools
import warnings
from .config import config
from .utils import Estimate, as_2groups, check_nan, convert_pandas_nullable, fmt_p
from .utils import Estimate, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p
# ----------------
# Student's t test
@ -65,7 +65,7 @@ class TTestResult:
return super().__repr__()
def _repr_html_(self):
return '<i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br><i>μ</i><sub>{}</sub> (SD) = {:.2f} ({:.2f}), <i>μ</i><sub>{}</sub> (SD) = {:.2f} ({:.2f})<br>Δ<i>μ</i> ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=True), self.group1, self.mu1, self.sd1, self.group2, self.mu2, self.sd2, (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
return '<i>t</i>({:.0f}) = {:.2f}; <i>p</i> {}<br><i>μ</i><sub>{}</sub> (SD) = {:.2f} ({:.2f}), <i>μ</i><sub>{}</sub> (SD) = {:.2f} ({:.2f})<br>Δ<i>μ</i> ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.group1, self.mu1, self.sd1, self.group2, self.mu2, self.sd2, (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
def summary(self):
"""
@ -74,7 +74,7 @@ class TTestResult:
:rtype: str
"""
return 't({:.0f}) = {:.2f}; p {}\nμ({}) (SD) = {:.2f} ({:.2f}), μ({}) (SD) = {:.2f} ({:.2f})\nΔμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, html=False), self.group1, self.mu1, self.sd1, self.group2, self.mu2, self.sd2, (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
return 't({:.0f}) = {:.2f}; p {}\nμ({}) (SD) = {:.2f} ({:.2f}), μ({}) (SD) = {:.2f} ({:.2f})\nΔμ ({:g}% CI) = {}, {}'.format(self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.group1, self.mu1, self.sd1, self.group2, self.mu2, self.sd2, (1-config.alpha)*100, self.delta.summary(), self.delta_direction)
def ttest_ind(df, dep, ind, *, nan_policy='warn'):
"""
@ -166,7 +166,7 @@ class FTestResult:
return super().__repr__()
def _repr_html_(self):
return '<i>F</i>({:.0f}, {:.0f}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=True))
return '<i>F</i>({:.0f}, {:.0f}) = {:.2f}; <i>p</i> {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
def summary(self):
"""
@ -175,7 +175,7 @@ class FTestResult:
:rtype: str
"""
return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, html=False))
return 'F({:.0f}, {:.0f}) = {:.2f}; p {}'.format(self.dof1, self.dof2, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))
def anova_oneway(df, dep, ind, *, nan_policy='omit'):
"""
@ -252,7 +252,7 @@ class MannWhitneyResult:
return super().__repr__()
def _repr_html_(self):
line1 = '<i>U</i> = {:.1f}; <i>p</i> {}<br><i>r</i> = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, html=True), self.rank_biserial, self.direction)
line1 = '<i>U</i> = {:.1f}; <i>p</i> {}<br><i>r</i> = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), self.rank_biserial, self.direction)
if self.brunnermunzel:
return line1 + '<br>' + self.brunnermunzel._repr_html_()
else:
@ -265,7 +265,7 @@ class MannWhitneyResult:
:rtype: str
"""
line1 = 'U = {:.1f}; p {}\nr = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, html=False), self.rank_biserial, self.direction)
line1 = 'U = {:.1f}; p {}\nr = {:.2f}, {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), self.rank_biserial, self.direction)
if self.brunnermunzel:
return line1 + '\n' + self.brunnermunzel.summary()
else:
@ -292,7 +292,7 @@ class BrunnerMunzelResult:
return super().__repr__()
def _repr_html_(self):
return '<i>W</i> = {:.1f}; <i>p</i> {}'.format(self.statistic, fmt_p(self.pvalue, html=True))
return '<i>W</i> = {:.1f}; <i>p</i> {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
def summary(self):
"""
@ -301,7 +301,7 @@ class BrunnerMunzelResult:
:rtype: str
"""
return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, html=False))
return 'W = {:.1f}; p {}'.format(self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))
def mannwhitney(df, dep, ind, *, nan_policy='warn', brunnermunzel=True, use_continuity=False, alternative='two-sided', method='auto'):
"""
@ -415,10 +415,10 @@ class PearsonChiSquaredResult:
def _repr_html_(self):
if self.oddsratio is not None:
return '{0}<br><i>χ</i><sup>2</sup>({1}) = {2:.2f}; <i>p</i> {3}<br>OR ({4:g}% CI) = {5}<br>RR ({4:g}% CI) = {6}'.format(
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
else:
return '{}<br><i>χ</i><sup>2</sup>({}) = {:.2f}; <i>p</i> {}'.format(
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, html=True))
self.ct._repr_html_(), self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
def summary(self):
"""
@ -429,10 +429,10 @@ class PearsonChiSquaredResult:
if self.oddsratio is not None:
return '{0}\n\nχ²({1}) = {2:.2f}; p {3}\nOR ({4:g}% CI) = {5}\nRR ({4:g}% CI) = {6}'.format(
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION), (1-config.alpha)*100, self.oddsratio.summary(), self.riskratio.summary())
else:
return '{}\n\nχ²({}) = {:.2f}; p {}'.format(
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, html=False))
self.ct, self.dof, self.statistic, fmt_p(self.pvalue, PValueStyle.RELATION))
def chi2(df, dep, ind, *, nan_policy='warn'):
"""
@ -538,7 +538,7 @@ class PearsonRResult:
return super().__repr__()
def _repr_html_(self):
return '<i>r</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=True))
return '<i>r</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
def summary(self):
"""
@ -547,7 +547,7 @@ class PearsonRResult:
:rtype: str
"""
return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, html=False))
return 'r ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION))
def pearsonr(df, dep, ind, *, nan_policy='warn'):
"""

View File

@ -18,6 +18,7 @@ import numpy as np
import pandas as pd
import patsy
import enum
import warnings
from .config import config
@ -145,25 +146,28 @@ def do_fmt_p(p):
# OK to round to pvalue_min_dps
return '', '{0:.{dps}f}'.format(p, dps=config.pvalue_min_dps)
def fmt_p(p, *, html, only_value=False, tabular=False):
class PValueStyle(enum.Flag):
"""An *enum.Flag* representing how to render a *p* value"""
VALUE_ONLY = 0
RELATION = enum.auto()
TABULAR = enum.auto()
HTML = enum.auto()
def fmt_p(p, style):
"""
Format *p* value for display
:param p: *p* value to display
:type p: float
:param html: Whether to output as HTML (*True*) or plaintext (*False*)
:type html: bool
:param only_value: Whether to display only the value (*True*, e.g. ``0.04``, ``<0.001``) or equality symbol and value (*False*, e.g. ``= 0.04``, ``< 0.001``)
:type only_value: bool
:param tabular: Whether to pad with spaces so that decimal points align
:type tabular: bool
:param style: Style to format the *p* value
:type style: :class:`PValueStyle`
:return: Formatted *p* value
:rtype: str
"""
# FIXME: Make only_value and tabular enums
sign, fmt = do_fmt_p(p)
# Strip leading zero if required
@ -176,28 +180,32 @@ def fmt_p(p, *, html, only_value=False, tabular=False):
else:
asterisk = ''
if html:
if PValueStyle.HTML in style:
# Escape angle quotes
sign = sign.replace('<', '&lt;')
sign = sign.replace('>', '&gt;')
if only_value:
return '{}{}{}'.format(sign, fmt, asterisk)
elif tabular:
if PValueStyle.RELATION in style:
# Add relational operator
if not sign:
sign = '='
return '{} {}{}'.format(sign, fmt, asterisk)
elif PValueStyle.TABULAR in style:
# Always left-aligned, so reserve space for sign if required to align decimal points
if not sign:
sign = '<span style="visibility:hidden">=</span>'
return '{}{}{}'.format(sign, fmt, asterisk)
else:
# Non-tabular so force a sign
# Only value
return '{}{}{}'.format(sign, fmt, asterisk)
else:
if PValueStyle.RELATION in style:
# Add relational operator
if not sign:
sign = '='
return '{} {}{}'.format(sign, fmt, asterisk)
else:
if only_value:
return '{}{}{}'.format(sign, fmt, asterisk)
elif tabular:
elif PValueStyle.TABULAR in style:
# Right-aligned, so add spaces to simulate left alignment
if not sign:
sign = ' '
@ -214,10 +222,8 @@ def fmt_p(p, *, html, only_value=False, tabular=False):
return '{}{}{}{}'.format(sign, fmt, asterisk, rpadding)
else:
# Non-tabular so force a sign
if not sign:
sign = '='
return '{} {}{}'.format(sign, fmt, asterisk)
# Only value
return '{}{}{}'.format(sign, fmt, asterisk)
# ------------------------------
# General result-related classes