Implement yli.IntervalCensoredCox
This commit is contained in:
parent
65f3a2601d
commit
2852d3dd19
@ -45,7 +45,8 @@ The mandatory dependencies of this library are:
|
|||||||
|
|
||||||
Optional dependencies are:
|
Optional dependencies are:
|
||||||
|
|
||||||
* matplotlib and [seaborn](https://seaborn.pydata.org/), for plotting functions
|
* [hpstat](https://yingtongli.me/git/hpstat), for *IntervalCensoredCox*
|
||||||
|
* [matplotlib](https://matplotlib.org/) and [seaborn](https://seaborn.pydata.org/), for plotting functions
|
||||||
* [mpmath](https://mpmath.org/), for *beta_ratio* and *beta_oddsratio*
|
* [mpmath](https://mpmath.org/), for *beta_ratio* and *beta_oddsratio*
|
||||||
* [PyCryptodome](https://www.pycryptodome.org/), for *pickle_write_encrypted* and *pickle_read_encrypted*
|
* [PyCryptodome](https://www.pycryptodome.org/), for *pickle_write_encrypted* and *pickle_read_encrypted*
|
||||||
* [rpy2](https://rpy2.github.io/), with R packages:
|
* [rpy2](https://rpy2.github.io/), with R packages:
|
||||||
@ -64,6 +65,7 @@ Relevant statistical functions are all directly available from the top-level *yl
|
|||||||
* *pearsonr*: Pearson correlation coefficient *r*
|
* *pearsonr*: Pearson correlation coefficient *r*
|
||||||
* *ttest_ind*: Independent 2-sample *t* test
|
* *ttest_ind*: Independent 2-sample *t* test
|
||||||
* Regression:
|
* Regression:
|
||||||
|
* *IntervalCensoredCox*: Model for interval-censored Cox regression
|
||||||
* *PenalisedLogit*: Model for Firth penalised logistic regression
|
* *PenalisedLogit*: Model for Firth penalised logistic regression
|
||||||
* *regress*: Fit arbitrary regression models
|
* *regress*: Fit arbitrary regression models
|
||||||
* *vif*: Compute the variance inflation factor for independent variables in regression
|
* *vif*: Compute the variance inflation factor for independent variables in regression
|
||||||
|
@ -17,3 +17,6 @@ Global options
|
|||||||
|
|
||||||
.. autoattribute:: yli.config.Config.repr_is_summary
|
.. autoattribute:: yli.config.Config.repr_is_summary
|
||||||
:annotation: = True
|
:annotation: = True
|
||||||
|
|
||||||
|
.. autoattribute:: yli.config.Config.hpstat_path
|
||||||
|
:annotation: = True
|
||||||
|
@ -20,7 +20,7 @@ from .descriptives import auto_correlations, auto_descriptives
|
|||||||
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
||||||
from .graphs import init_fonts
|
from .graphs import init_fonts
|
||||||
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
||||||
from .regress import Logit, OLS, OrdinalLogit, PenalisedLogit, regress, vif
|
from .regress import IntervalCensoredCox, Logit, OLS, OrdinalLogit, PenalisedLogit, regress, vif
|
||||||
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind
|
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind
|
||||||
from .survival import kaplanmeier, logrank, turnbull
|
from .survival import kaplanmeier, logrank, turnbull
|
||||||
from .utils import as_ordinal
|
from .utils import as_ordinal
|
||||||
|
@ -33,5 +33,8 @@ class Config:
|
|||||||
#: If enabled, `__repr__` on test results, etc. directly calls the ``summary`` function (*bool*)
|
#: If enabled, `__repr__` on test results, etc. directly calls the ``summary`` function (*bool*)
|
||||||
self.repr_is_summary = True
|
self.repr_is_summary = True
|
||||||
|
|
||||||
|
#: Path to hpstat binary
|
||||||
|
self.hpstat_path = './hpstat'
|
||||||
|
|
||||||
"""Global configuration singleton"""
|
"""Global configuration singleton"""
|
||||||
config = Config()
|
config = Config()
|
||||||
|
@ -105,7 +105,7 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
|
|||||||
:type model_class: :class:`yli.regress.RegressionModel` subclass
|
:type model_class: :class:`yli.regress.RegressionModel` subclass
|
||||||
:param df: Data to perform regression on
|
:param df: Data to perform regression on
|
||||||
:type df: DataFrame
|
:type df: DataFrame
|
||||||
:param dep: Column in *df* for the dependent variable (numeric)
|
:param dep: Column(s) in *df* for the dependent variable (numeric)
|
||||||
:type dep: str
|
:type dep: str
|
||||||
:param formula: Patsy formula for the regression model
|
:param formula: Patsy formula for the regression model
|
||||||
:type formula: str
|
:type formula: str
|
||||||
@ -184,12 +184,15 @@ def regress(model_class, df, dep, formula, *, nan_policy='warn', bool_baselevels
|
|||||||
|
|
||||||
def df_to_dmatrices(df, dep, formula, nan_policy):
|
def df_to_dmatrices(df, dep, formula, nan_policy):
|
||||||
# Check for/clean NaNs in input columns
|
# Check for/clean NaNs in input columns
|
||||||
columns = [dep] + cols_for_formula(formula, df)
|
columns = cols_for_formula(dep, df) + cols_for_formula(formula, df)
|
||||||
|
|
||||||
df = df[columns]
|
df = df[columns]
|
||||||
df = check_nan(df, nan_policy)
|
df = check_nan(df, nan_policy)
|
||||||
|
|
||||||
# Ensure numeric type for dependent variable
|
# Ensure numeric type for dependent variable
|
||||||
|
if '+' in dep:
|
||||||
|
dep_categories = None
|
||||||
|
else:
|
||||||
df[dep], dep_categories = as_numeric(df[dep])
|
df[dep], dep_categories = as_numeric(df[dep])
|
||||||
|
|
||||||
# Convert pandas nullable types for independent variables as this breaks statsmodels
|
# Convert pandas nullable types for independent variables as this breaks statsmodels
|
||||||
@ -575,6 +578,92 @@ def raw_terms_from_statsmodels_result(raw_result):
|
|||||||
# ------------------------
|
# ------------------------
|
||||||
# Concrete implementations
|
# Concrete implementations
|
||||||
|
|
||||||
|
class IntervalCensoredCox(RegressionModel):
|
||||||
|
"""
|
||||||
|
Interval-censored Cox regression
|
||||||
|
|
||||||
|
Uses hpstat *intcox* command.
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
df = pd.DataFrame(...)
|
||||||
|
yli.regress(yli.IntervalCensoredCox, df, 'Left_Time + Right_Time', 'A + B + C + D + E + F + G + H')
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
Interval-Censored Cox Regression Results
|
||||||
|
===================================================================
|
||||||
|
Dep. Variable: Left_Time + Right_Time | No. Observations: 1124
|
||||||
|
Model: Interval-Censored Cox | No. Events: 133
|
||||||
|
Date: 2023-04-17 | Df. Model: 8
|
||||||
|
Time: 22:30:40 | Pseudo R²: 0.00
|
||||||
|
Std. Errors: OPG | LL-Model: -613.21
|
||||||
|
| LL-Null: -615.81
|
||||||
|
| p (LR): 0.82
|
||||||
|
===================================================================
|
||||||
|
exp(β) (95% CI) p
|
||||||
|
----------------------------------
|
||||||
|
A 0.83 (0.37 - 1.88) 0.66
|
||||||
|
B 1.08 (0.81 - 1.46) 0.60
|
||||||
|
C 0.49 (0.24 - 1.00) 0.052
|
||||||
|
D 0.79 (0.42 - 1.50) 0.48
|
||||||
|
E 0.87 (0.40 - 1.85) 0.71
|
||||||
|
F 0.64 (0.28 - 1.45) 0.29
|
||||||
|
G 1.07 (0.44 - 2.62) 0.88
|
||||||
|
H 1.23 (0.48 - 3.20) 0.67
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
The output summarises the result of the regression.
|
||||||
|
|
||||||
|
**Reference:** Zeng D, Mao L, Lin DY. Maximum likelihood estimation for semiparametric transformation models with interval-censored data. *Biometrika*. 2016;103(2):253–71. `doi:10.1093/biomet/asw013 <https://doi.org/10.1093/biomet/asw013>`_
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def model_long_name(self):
|
||||||
|
return 'Interval-Censored Cox Regression'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def model_short_name(self):
|
||||||
|
return 'Interval-Censored Cox'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fit(cls, data_dep, data_ind):
|
||||||
|
if len(data_dep.columns) != 2:
|
||||||
|
raise ValueError('IntervalCensoredCox requires left and right times')
|
||||||
|
|
||||||
|
# Drop explicit intercept term
|
||||||
|
if 'Intercept' in data_ind:
|
||||||
|
del data_ind['Intercept']
|
||||||
|
|
||||||
|
result = cls()
|
||||||
|
result.exp = True
|
||||||
|
result.cov_type = 'OPG'
|
||||||
|
result.nevents = np.isfinite(data_dep.iloc[:, 1]).sum()
|
||||||
|
result.dof_model = len(data_ind.columns)
|
||||||
|
|
||||||
|
# Export data to CSV
|
||||||
|
csv_buf = io.StringIO()
|
||||||
|
data_dep.join(data_ind).to_csv(csv_buf, index=False)
|
||||||
|
csv_str = csv_buf.getvalue()
|
||||||
|
|
||||||
|
# Run intcens binary
|
||||||
|
proc = subprocess.run([config.hpstat_path, 'intcox', '-', '--output', 'json'], input=csv_str, capture_output=True, encoding='utf-8', check=True)
|
||||||
|
raw_result = json.loads(proc.stdout)
|
||||||
|
|
||||||
|
z_critical = -stats.norm.ppf(config.alpha / 2)
|
||||||
|
result.terms = {raw_name: SingleTerm(
|
||||||
|
raw_name=raw_name,
|
||||||
|
beta=Estimate(raw_param, raw_param - z_critical * raw_se, raw_param + z_critical * raw_se),
|
||||||
|
pvalue=stats.norm.cdf(-np.abs(raw_param) / raw_se) * 2
|
||||||
|
) for raw_name, raw_param, raw_se in zip(data_ind.columns, raw_result['params'], raw_result['params_se'])}
|
||||||
|
|
||||||
|
result.ll_model = raw_result['ll_model']
|
||||||
|
result.ll_null = raw_result['ll_null']
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
class Logit(RegressionModel):
|
class Logit(RegressionModel):
|
||||||
"""
|
"""
|
||||||
Logistic regression
|
Logistic regression
|
||||||
|
Loading…
Reference in New Issue
Block a user