Regression output: Hide boolean reference categories by default if reference category is False

This commit is contained in:
RunasSudo 2022-10-15 01:42:06 +11:00
parent d1249914ff
commit 70fba04b2f
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
2 changed files with 20 additions and 10 deletions

View File

@ -138,7 +138,7 @@ def test_regress_logit_ol10_18():
'Stress': np.repeat([d[1] for d in data], [d[2] for d in data]) 'Stress': np.repeat([d[1] for d in data], [d[2] for d in data])
}) })
result = yli.regress(sm.Logit, df, 'Stress', 'Response') result = yli.regress(sm.Logit, df, 'Stress', 'Response', bool_baselevels=True)
assert isinstance(result.terms['Response'], CategoricalTerm) assert isinstance(result.terms['Response'], CategoricalTerm)
assert result.terms['Response'].ref_category == False assert result.terms['Response'].ref_category == False

View File

@ -340,9 +340,15 @@ class CategoricalTerm:
def regress( def regress(
model_class, df, dep, formula, *, model_class, df, dep, formula, *,
nan_policy='warn', exp=None nan_policy='warn',
bool_baselevels=False, exp=None
): ):
"""Fit a statsmodels regression model""" """
Fit a statsmodels regression model
bool_baselevels: Show reference categories for boolean independent variables even if reference category is False
exp: Report exponentiated parameters rather than raw parameters
"""
# Autodetect whether to exponentiate # Autodetect whether to exponentiate
if exp is None: if exp is None:
@ -392,16 +398,20 @@ def regress(
if contrast is not None: if contrast is not None:
# Categorical term # Categorical term
# Add a new categorical term if not exists
if column not in terms:
ref_category = formula_factor_ref_category(formula, df, factor)
terms[column] = CategoricalTerm({}, ref_category)
terms[column].categories[contrast] = SingleTerm(raw_name, beta, result.pvalues[raw_name]) if bool_baselevels is False and contrast == 'True' and set(df[column].unique()) == set([True, False]):
# Treat as single term
terms[column] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
else:
# Add a new categorical term if not exists
if column not in terms:
ref_category = formula_factor_ref_category(formula, df, factor)
terms[column] = CategoricalTerm({}, ref_category)
terms[column].categories[contrast] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
else: else:
# Single term # Single term
term = raw_name terms[column] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
terms[term] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
# Fit null model (for llnull) # Fit null model (for llnull)
if hasattr(result, 'llnull'): if hasattr(result, 'llnull'):