diff --git a/yli/regress.py b/yli/regress.py index 3965d20..98a3459 100644 --- a/yli/regress.py +++ b/yli/regress.py @@ -129,7 +129,7 @@ class RegressionResult: model_class, df, dep, formula, nan_policy, model_kwargs, fit_kwargs, raw_result, full_name, model_name, fit_method, - nobs, dof_model, fitted_dt, cov_type, + nobs, nevents, dof_model, fitted_dt, cov_type, terms, ll_model, ll_null, dof_resid, rsquared, f_statistic, @@ -166,6 +166,8 @@ class RegressionResult: # Basic fitted model information #: Number of observations (*int*) self.nobs = nobs + #: Number of events (*int*, time-to-event models only) + self.nevents = nevents #: Degrees of freedom for the model (*int*) self.dof_model = dof_model #: Date and time of fitting the model (Python *datetime*) @@ -453,7 +455,7 @@ class RegressionResult: self.model_class, self.df, dep, self.formula, self.nan_policy, self.model_kwargs, self.fit_kwargs, None, self.full_name, self.model_name, self.fit_method, - self.nobs, self.dof_model, datetime.now(), 'Bootstrap', + self.nobs, None, self.dof_model, datetime.now(), 'Bootstrap', terms, self.ll_model, self.ll_null, self.dof_resid, self.rsquared, self.f_statistic, @@ -499,12 +501,14 @@ class RegressionResult: right_col = [] right_col.append(('No. Observations:', format(self.nobs, '.0f'))) + if self.nevents: + right_col.append(('No. Events:', format(self.nevents, '.0f'))) right_col.append(('Df. Model:', format(self.dof_model, '.0f'))) if self.dof_resid: right_col.append(('Df. Residuals:', format(self.dof_resid, '.0f'))) if self.rsquared: right_col.append(('R2:' if html else 'R²:', format(self.rsquared, '.2f'))) - else: + elif self.ll_null: right_col.append(('Pseudo R2:' if html else 'Pseudo R²:', format(self.pseudo_rsquared, '.2f'))) if self.f_statistic: # Report the F test if available @@ -516,7 +520,7 @@ class RegressionResult: else: right_col.append(('F:', format(f_result.statistic, '.2f'))) right_col.append(('p (F):', fmt_p(f_result.pvalue, PValueStyle.VALUE_ONLY))) - else: + elif self.ll_null: # Otherwise report likelihood ratio test as overall test lrtest_result = self.lrtest_null() @@ -689,7 +693,7 @@ def regress( model_class, df, dep, formula, *, nan_policy='warn', model_kwargs=None, fit_kwargs=None, - family=None, exposure=None, # common model_kwargs + family=None, exposure=None, status=None, # common model_kwargs cov_type=None, method=None, maxiter=None, start_params=None, # common fit_kwargs bool_baselevels=False, exp=None, _dmatrices=None, @@ -707,6 +711,8 @@ def regress( :type formula: str :param exposure: Column in *df* for the exposure variable (numeric, some models only) :type exposure: str + :param status: Column in *df* for the status variable (time-to-event models only) + :type status: str :param nan_policy: How to handle *nan* values (see :ref:`nan-handling`) :type nan_policy: str :param model_kwargs: Keyword arguments to pass to *model_class* constructor @@ -780,9 +786,7 @@ def regress( # Autodetect whether to exponentiate if exp is None: - if model_class in (sm.Logit, sm.Poisson, PenalisedLogit): - exp = True - elif model_class is OrdinalLogit: + if model_class in (sm.Logit, sm.PHReg, sm.Poisson, OrdinalLogit, PenalisedLogit): exp = True else: exp = False @@ -790,11 +794,14 @@ def regress( df_ref = weakref.ref(df) if _dmatrices is None: - # Check for/clean NaNs - if exposure is None: - df = df[[dep] + cols_for_formula(formula, df)] - else: - df = df[[dep, exposure] + cols_for_formula(formula, df)] + # Check for/clean NaNs in input columns + columns = [dep] + cols_for_formula(formula, df) + if exposure is not None: + columns.append(exposure) + if status is not None: + columns.append(status) + + df = df[columns] df = check_nan(df, nan_policy) # Ensure numeric type for dependent variable @@ -808,17 +815,22 @@ def regress( else: dmatrices = _dmatrices - if model_class is OrdinalLogit: + if model_class in (sm.PHReg, OrdinalLogit): # Drop explicit intercept term # FIXME: Check before dropping dmatrices = (dmatrices[0], dmatrices[1].iloc[:,1:]) + # Add exposure to model if exposure is not None: if df[exposure].dtype == '