From 70fba04b2f2b6a8c0c895e84ea74c89e235b0bc5 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 15 Oct 2022 01:42:06 +1100 Subject: [PATCH] Regression output: Hide boolean reference categories by default if reference category is False --- tests/test_regress.py | 2 +- yli/regress.py | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tests/test_regress.py b/tests/test_regress.py index 7bb6de6..721861d 100644 --- a/tests/test_regress.py +++ b/tests/test_regress.py @@ -138,7 +138,7 @@ def test_regress_logit_ol10_18(): 'Stress': np.repeat([d[1] for d in data], [d[2] for d in data]) }) - result = yli.regress(sm.Logit, df, 'Stress', 'Response') + result = yli.regress(sm.Logit, df, 'Stress', 'Response', bool_baselevels=True) assert isinstance(result.terms['Response'], CategoricalTerm) assert result.terms['Response'].ref_category == False diff --git a/yli/regress.py b/yli/regress.py index 2bb07f5..6efd25a 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -340,9 +340,15 @@ class CategoricalTerm: def regress( model_class, df, dep, formula, *, - nan_policy='warn', exp=None + nan_policy='warn', + bool_baselevels=False, exp=None ): - """Fit a statsmodels regression model""" + """ + Fit a statsmodels regression model + + bool_baselevels: Show reference categories for boolean independent variables even if reference category is False + exp: Report exponentiated parameters rather than raw parameters + """ # Autodetect whether to exponentiate if exp is None: @@ -392,16 +398,20 @@ def regress( if contrast is not None: # Categorical term - # Add a new categorical term if not exists - if column not in terms: - ref_category = formula_factor_ref_category(formula, df, factor) - terms[column] = CategoricalTerm({}, ref_category) - terms[column].categories[contrast] = SingleTerm(raw_name, beta, result.pvalues[raw_name]) + if bool_baselevels is False and contrast == 'True' and set(df[column].unique()) == set([True, False]): + # Treat as single term + terms[column] = SingleTerm(raw_name, beta, result.pvalues[raw_name]) + else: + # Add a new categorical term if not exists + if column not in terms: + ref_category = formula_factor_ref_category(formula, df, factor) + terms[column] = CategoricalTerm({}, ref_category) + + terms[column].categories[contrast] = SingleTerm(raw_name, beta, result.pvalues[raw_name]) else: # Single term - term = raw_name - terms[term] = SingleTerm(raw_name, beta, result.pvalues[raw_name]) + terms[column] = SingleTerm(raw_name, beta, result.pvalues[raw_name]) # Fit null model (for llnull) if hasattr(result, 'llnull'):