Fixups and update documentation
This commit is contained in:
parent
bbb998003e
commit
56e16bc71d
@ -5,6 +5,8 @@ Data wrangling
|
|||||||
--------------
|
--------------
|
||||||
|
|
||||||
.. autofunction:: yli.utils.as_2groups
|
.. autofunction:: yli.utils.as_2groups
|
||||||
|
|
||||||
|
.. autofunction:: yli.utils.as_numeric
|
||||||
|
|
||||||
.. autofunction:: yli.utils.convert_pandas_nullable
|
.. autofunction:: yli.utils.convert_pandas_nullable
|
||||||
|
|
||||||
@ -31,7 +33,7 @@ Data wrangling
|
|||||||
|
|
||||||
.. attribute:: HTML
|
.. attribute:: HTML
|
||||||
|
|
||||||
Format as HTML (i.e. escape ``<``)
|
Format as HTML (e.g. escape ``<``)
|
||||||
|
|
||||||
Formula manipulation
|
Formula manipulation
|
||||||
--------------------
|
--------------------
|
||||||
@ -41,3 +43,26 @@ Formula manipulation
|
|||||||
.. autofunction:: yli.utils.formula_factor_ref_category
|
.. autofunction:: yli.utils.formula_factor_ref_category
|
||||||
|
|
||||||
.. autofunction:: yli.utils.parse_patsy_term
|
.. autofunction:: yli.utils.parse_patsy_term
|
||||||
|
|
||||||
|
Library style
|
||||||
|
-------------
|
||||||
|
|
||||||
|
For API nomenclature, the following guidelines are used:
|
||||||
|
|
||||||
|
* Prefer to call a test by its specific name (e.g. *anova* rather than *ftest* where applicable), unless most commonly known only by the distribution of the test statistic (e.g. *chi2*, *ttest*).
|
||||||
|
|
||||||
|
..
|
||||||
|
|
||||||
|
* A test/statistic is not referred to by both a distribution and specific name (e.g. *mannwhitney* rather than *mannwhitneyu*), unless required for disambiguation (e.g. *pearsonr* to distinguish the Pearson *χ*:sup:`2` test).
|
||||||
|
|
||||||
|
..
|
||||||
|
|
||||||
|
* The word "test" is omitted (e.g. *chi2* rather than *chi2test*), unless the name would otherwise be a single letter (e.g. *ttest*, *ftest*), or unless required for disambiguation (e.g. *LikelihoodRatioTestResult* to distinguish from the unrelated meaning of "likelihood ratio" in epidemiology).
|
||||||
|
|
||||||
|
..
|
||||||
|
|
||||||
|
* Underscores are usually omitted from the names of specific tests, test families and statistics (e.g. *ttest*, *oddsratio*, *pearsonr*, *pvalue*), but are used to separate these from other components (e.g. *ttest_ind*, *anova_oneway*, *lrtest_null*). There are a few exceptions (e.g. *rank_biserial*, *pseudo_rsquared*, *f_statistic*).
|
||||||
|
|
||||||
|
..
|
||||||
|
|
||||||
|
* The result class for a test has the same naming convention as the test function (e.g. *TTestResult* for *ttest_ind*), with abbreviations spelled out (e.g. *PearsonChiSquaredResult*, *LikelihoodRatioTestResult*); unless the result class is generic among several tests (e.g. *FTestResult* for *anova_oneway* and *RegressionResult.ftest*), or unless required for disambiguation (e.g. *PearsonChiSquaredResult* for *chi2*, as there are other *χ*:sup:`2` tests).
|
||||||
|
@ -6,23 +6,23 @@ Functions
|
|||||||
|
|
||||||
.. autofunction:: yli.logit_then_regress
|
.. autofunction:: yli.logit_then_regress
|
||||||
|
|
||||||
.. autoclass:: yli.OrdinalLogit
|
|
||||||
|
|
||||||
.. autoclass:: yli.PenalisedLogit
|
|
||||||
|
|
||||||
.. autofunction:: yli.regress
|
.. autofunction:: yli.regress
|
||||||
|
|
||||||
.. autofunction:: yli.vif
|
.. autofunction:: yli.vif
|
||||||
|
|
||||||
|
Additional regression models
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. autoclass:: yli.OrdinalLogit
|
||||||
|
|
||||||
|
.. autoclass:: yli.PenalisedLogit
|
||||||
|
|
||||||
Result classes
|
Result classes
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
.. autoclass:: yli.regress.BrantResult
|
.. autoclass:: yli.regress.BrantResult
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
.. autoclass:: yli.regress.CategoricalTerm
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. autoclass:: yli.regress.LikelihoodRatioTestResult
|
.. autoclass:: yli.regress.LikelihoodRatioTestResult
|
||||||
:members:
|
:members:
|
||||||
:inherited-members:
|
:inherited-members:
|
||||||
@ -30,5 +30,11 @@ Result classes
|
|||||||
.. autoclass:: yli.regress.RegressionResult
|
.. autoclass:: yli.regress.RegressionResult
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
Model terms
|
||||||
|
-----------
|
||||||
|
|
||||||
|
.. autoclass:: yli.regress.CategoricalTerm
|
||||||
|
:members:
|
||||||
|
|
||||||
.. autoclass:: yli.regress.SingleTerm
|
.. autoclass:: yli.regress.SingleTerm
|
||||||
:members:
|
:members:
|
||||||
|
@ -26,9 +26,14 @@ def reload_me():
|
|||||||
import importlib
|
import importlib
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
for k, v in list(sys.modules.items()):
|
for _ in range(2): # Do it twice to make sure imports are also reloaded fully
|
||||||
if k == 'yli' or k.startswith('yli.'):
|
for k, v in list(sys.modules.items()):
|
||||||
try:
|
if k == 'yli' or k.startswith('yli.'):
|
||||||
importlib.reload(v)
|
try:
|
||||||
except ModuleNotFoundError:
|
importlib.reload(v)
|
||||||
pass
|
except ModuleNotFoundError as ex:
|
||||||
|
if ex.name.startswith('yli.'):
|
||||||
|
# Must be due to a module which we deleted - can safely ignore
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ex
|
||||||
|
@ -51,6 +51,7 @@ def auto_descriptives(df, cols, *, ordinal_range=[], ordinal_iqr=[]):
|
|||||||
|
|
||||||
if data_cleaned.dtype in ('bool', 'boolean', 'category', 'object'):
|
if data_cleaned.dtype in ('bool', 'boolean', 'category', 'object'):
|
||||||
# Categorical data
|
# Categorical data
|
||||||
|
# FIXME: Sort order
|
||||||
values = sorted(data_cleaned.unique())
|
values = sorted(data_cleaned.unique())
|
||||||
|
|
||||||
# Value counts
|
# Value counts
|
||||||
|
@ -130,7 +130,7 @@ class RegressionResult:
|
|||||||
full_name, model_name, fit_method,
|
full_name, model_name, fit_method,
|
||||||
nobs, dof_model, fitted_dt, cov_type,
|
nobs, dof_model, fitted_dt, cov_type,
|
||||||
terms,
|
terms,
|
||||||
llf, llnull,
|
ll_model, ll_null,
|
||||||
dof_resid, rsquared, f_statistic,
|
dof_resid, rsquared, f_statistic,
|
||||||
comments,
|
comments,
|
||||||
exp
|
exp
|
||||||
@ -178,9 +178,9 @@ class RegressionResult:
|
|||||||
|
|
||||||
# Model log-likelihood
|
# Model log-likelihood
|
||||||
#: Log-likelihood of fitted model (*float*)
|
#: Log-likelihood of fitted model (*float*)
|
||||||
self.llf = llf
|
self.ll_model = ll_model
|
||||||
#: Log-likelihood of null model (*float*)
|
#: Log-likelihood of null model (*float*)
|
||||||
self.llnull = llnull
|
self.ll_null = ll_null
|
||||||
|
|
||||||
# Extra statistics (not all regression models have these)
|
# Extra statistics (not all regression models have these)
|
||||||
#: Degrees of freedom for the residuals (*int*; *None* if N/A)
|
#: Degrees of freedom for the residuals (*int*; *None* if N/A)
|
||||||
@ -201,7 +201,7 @@ class RegressionResult:
|
|||||||
def pseudo_rsquared(self):
|
def pseudo_rsquared(self):
|
||||||
"""McFadden's pseudo *R*:sup:`2` statistic"""
|
"""McFadden's pseudo *R*:sup:`2` statistic"""
|
||||||
|
|
||||||
return 1 - self.llf/self.llnull
|
return 1 - self.ll_model/self.ll_null
|
||||||
|
|
||||||
def lrtest_null(self):
|
def lrtest_null(self):
|
||||||
"""
|
"""
|
||||||
@ -210,7 +210,7 @@ class RegressionResult:
|
|||||||
:rtype: :class:`LikelihoodRatioTestResult`
|
:rtype: :class:`LikelihoodRatioTestResult`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
statistic = -2 * (self.llnull - self.llf)
|
statistic = -2 * (self.ll_null - self.ll_model)
|
||||||
pvalue = 1 - stats.chi2.cdf(statistic, self.dof_model)
|
pvalue = 1 - stats.chi2.cdf(statistic, self.dof_model)
|
||||||
|
|
||||||
return LikelihoodRatioTestResult(statistic, self.dof_model, pvalue)
|
return LikelihoodRatioTestResult(statistic, self.dof_model, pvalue)
|
||||||
@ -308,7 +308,7 @@ class RegressionResult:
|
|||||||
|
|
||||||
# Fit individual logistic regressions
|
# Fit individual logistic regressions
|
||||||
logit_models = []
|
logit_models = []
|
||||||
for upper_limit in sorted(df[dep].unique())[:-1]: # FIXME: Sort order
|
for upper_limit in sorted(df[dep].unique())[:-1]:
|
||||||
dep_dichotomous = (df[dep] <= upper_limit).astype(int).reset_index(drop=True)
|
dep_dichotomous = (df[dep] <= upper_limit).astype(int).reset_index(drop=True)
|
||||||
logit_result = sm.Logit(dep_dichotomous, dmatrix_right).fit(disp=False, **self.fit_kwargs)
|
logit_result = sm.Logit(dep_dichotomous, dmatrix_right).fit(disp=False, **self.fit_kwargs)
|
||||||
|
|
||||||
@ -454,7 +454,7 @@ class RegressionResult:
|
|||||||
self.full_name, self.model_name, self.fit_method,
|
self.full_name, self.model_name, self.fit_method,
|
||||||
self.nobs, self.dof_model, datetime.now(), 'Bootstrap',
|
self.nobs, self.dof_model, datetime.now(), 'Bootstrap',
|
||||||
terms,
|
terms,
|
||||||
self.llf, self.llnull,
|
self.ll_model, self.ll_null,
|
||||||
self.dof_resid, self.rsquared, self.f_statistic,
|
self.dof_resid, self.rsquared, self.f_statistic,
|
||||||
self.comments,
|
self.comments,
|
||||||
self.exp
|
self.exp
|
||||||
@ -499,8 +499,8 @@ class RegressionResult:
|
|||||||
# Otherwise report likelihood ratio test as overall test
|
# Otherwise report likelihood ratio test as overall test
|
||||||
lrtest_result = self.lrtest_null()
|
lrtest_result = self.lrtest_null()
|
||||||
|
|
||||||
right_col.append(('LL-Model:', format(self.llf, '.2f')))
|
right_col.append(('LL-Model:', format(self.ll_model, '.2f')))
|
||||||
right_col.append(('LL-Null:', format(self.llnull, '.2f')))
|
right_col.append(('LL-Null:', format(self.ll_null, '.2f')))
|
||||||
if html:
|
if html:
|
||||||
right_col.append(('<i>p</i> (LR):', fmt_p(lrtest_result.pvalue, PValueStyle.VALUE_ONLY | PValueStyle.HTML)))
|
right_col.append(('<i>p</i> (LR):', fmt_p(lrtest_result.pvalue, PValueStyle.VALUE_ONLY | PValueStyle.HTML)))
|
||||||
else:
|
else:
|
||||||
@ -859,9 +859,11 @@ def regress(
|
|||||||
# Single term
|
# Single term
|
||||||
terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
|
terms[column] = SingleTerm(raw_name, beta, pvalues[raw_name])
|
||||||
|
|
||||||
# Fit null model (for llnull)
|
# Fit null model (for ll_null)
|
||||||
if hasattr(result, 'llnull'):
|
if hasattr(result, 'll_null'):
|
||||||
llnull = result.llnull
|
ll_null = result.ll_null
|
||||||
|
elif hasattr(result, 'llnull'):
|
||||||
|
ll_null = result.llnull
|
||||||
else:
|
else:
|
||||||
# Construct null (intercept-only) model
|
# Construct null (intercept-only) model
|
||||||
#result_null = model_class.from_formula(formula=dep + ' ~ 1', data=df).fit()
|
#result_null = model_class.from_formula(formula=dep + ' ~ 1', data=df).fit()
|
||||||
@ -870,7 +872,7 @@ def regress(
|
|||||||
dm_exog['Intercept'].fillna(1, inplace=True)
|
dm_exog['Intercept'].fillna(1, inplace=True)
|
||||||
|
|
||||||
result_null = model_class(endog=dmatrices[0], exog=dm_exog).fit()
|
result_null = model_class(endog=dmatrices[0], exog=dm_exog).fit()
|
||||||
llnull = result_null.llf
|
ll_null = result_null.llf
|
||||||
|
|
||||||
if model_class is sm.OLS:
|
if model_class is sm.OLS:
|
||||||
method_name = 'Least Squares'
|
method_name = 'Least Squares'
|
||||||
@ -897,7 +899,7 @@ def regress(
|
|||||||
full_name, model_class.__name__, method_name,
|
full_name, model_class.__name__, method_name,
|
||||||
result.nobs, result.df_model, datetime.now(), getattr(result, 'cov_type', 'nonrobust'),
|
result.nobs, result.df_model, datetime.now(), getattr(result, 'cov_type', 'nonrobust'),
|
||||||
terms,
|
terms,
|
||||||
result.llf, llnull,
|
result.llf, ll_null,
|
||||||
getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
|
getattr(result, 'df_resid', None), getattr(result, 'rsquared', None), getattr(result, 'fvalue', None),
|
||||||
[],
|
[],
|
||||||
exp
|
exp
|
||||||
@ -1030,7 +1032,7 @@ class OrdinalLogit(statsmodels.miscmodels.ordinal_model.OrderedModel):
|
|||||||
statsmodels-compatible model for computing ordinal logistic (or probit) regression
|
statsmodels-compatible model for computing ordinal logistic (or probit) regression
|
||||||
|
|
||||||
The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
|
The implementation subclasses statsmodels' native *OrderedModel*, but substitutes an alternative parameterisation used by R and Stata.
|
||||||
The the native statsmodels implementation, the first cutoff term is the true cutoff, and further cutoff terms are log differences between consecutive cutoffs.
|
In the native statsmodels implementation, the first cutoff parameter is the true cutoff, but further cutoff parameter are log differences between consecutive cutoffs.
|
||||||
In this parameterisation, cutoff terms are represented directly in the model.
|
In this parameterisation, cutoff terms are represented directly in the model.
|
||||||
|
|
||||||
**Example:**
|
**Example:**
|
||||||
|
@ -109,6 +109,7 @@ def as_2groups(df, data, group):
|
|||||||
raise Exception('Got {} values for {}, expected 2'.format(len(groups), group))
|
raise Exception('Got {} values for {}, expected 2'.format(len(groups), group))
|
||||||
|
|
||||||
# Get 2 groups
|
# Get 2 groups
|
||||||
|
# FIXME: Sort order
|
||||||
group1 = groups[0][0]
|
group1 = groups[0][0]
|
||||||
data1 = df.loc[groups[0][1], data]
|
data1 = df.loc[groups[0][1], data]
|
||||||
group2 = groups[1][0]
|
group2 = groups[1][0]
|
||||||
@ -118,7 +119,7 @@ def as_2groups(df, data, group):
|
|||||||
|
|
||||||
def as_numeric(data):
|
def as_numeric(data):
|
||||||
"""
|
"""
|
||||||
Convert the given data to a numeric type, factorising if required
|
Convert the data to a numeric type, factorising if required
|
||||||
|
|
||||||
:param data: Data to convert
|
:param data: Data to convert
|
||||||
:type df: Series
|
:type df: Series
|
||||||
|
Loading…
Reference in New Issue
Block a user