Regression output: Hide boolean reference categories by default if reference category is False

This commit is contained in:
RunasSudo 2022-10-15 01:42:06 +11:00
parent d1249914ff
commit 70fba04b2f
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
2 changed files with 20 additions and 10 deletions

View File

@ -138,7 +138,7 @@ def test_regress_logit_ol10_18():
'Stress': np.repeat([d[1] for d in data], [d[2] for d in data])
})
result = yli.regress(sm.Logit, df, 'Stress', 'Response')
result = yli.regress(sm.Logit, df, 'Stress', 'Response', bool_baselevels=True)
assert isinstance(result.terms['Response'], CategoricalTerm)
assert result.terms['Response'].ref_category == False

View File

@ -340,9 +340,15 @@ class CategoricalTerm:
def regress(
model_class, df, dep, formula, *,
nan_policy='warn', exp=None
nan_policy='warn',
bool_baselevels=False, exp=None
):
"""Fit a statsmodels regression model"""
"""
Fit a statsmodels regression model
bool_baselevels: Show reference categories for boolean independent variables even if reference category is False
exp: Report exponentiated parameters rather than raw parameters
"""
# Autodetect whether to exponentiate
if exp is None:
@ -392,16 +398,20 @@ def regress(
if contrast is not None:
# Categorical term
# Add a new categorical term if not exists
if column not in terms:
ref_category = formula_factor_ref_category(formula, df, factor)
terms[column] = CategoricalTerm({}, ref_category)
terms[column].categories[contrast] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
if bool_baselevels is False and contrast == 'True' and set(df[column].unique()) == set([True, False]):
# Treat as single term
terms[column] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
else:
# Add a new categorical term if not exists
if column not in terms:
ref_category = formula_factor_ref_category(formula, df, factor)
terms[column] = CategoricalTerm({}, ref_category)
terms[column].categories[contrast] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
else:
# Single term
term = raw_name
terms[term] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
terms[column] = SingleTerm(raw_name, beta, result.pvalues[raw_name])
# Fit null model (for llnull)
if hasattr(result, 'llnull'):