Implement yli.spearman
This commit is contained in:
parent
56e16bc71d
commit
fa89f2f156
@ -49,3 +49,18 @@ def test_pearsonr_ol11_16():
|
|||||||
assert result.statistic.point == approx(0.606, abs=0.001)
|
assert result.statistic.point == approx(0.606, abs=0.001)
|
||||||
assert result.statistic.ci_lower == approx(0.314, abs=0.001)
|
assert result.statistic.ci_lower == approx(0.314, abs=0.001)
|
||||||
assert result.statistic.ci_upper == approx(0.793, abs=0.001)
|
assert result.statistic.ci_upper == approx(0.793, abs=0.001)
|
||||||
|
|
||||||
|
def test_spearman_ol11_17():
|
||||||
|
"""Compare yli.spearman for Ott & Longnecker (2016) example 11.17"""
|
||||||
|
|
||||||
|
df = pd.DataFrame({
|
||||||
|
'Profit': [2.5, 6.2, 3.1, 4.6, 7.3, 4.5, 6.1, 11.6, 10.0, 14.2, 16.1, 19.5],
|
||||||
|
'Quality': [50, 57, 61, 68, 77, 80, 82, 85, 89, 91, 95, 99]
|
||||||
|
})
|
||||||
|
|
||||||
|
result = yli.spearman(df, 'Profit', 'Quality')
|
||||||
|
|
||||||
|
assert result.statistic.point == approx(0.874, abs=0.001)
|
||||||
|
|
||||||
|
expected_summary = 'ρ (95% CI) = 0.87 (0.60–0.96); p < 0.001*' # NB: The confidence intervals are unvalidated
|
||||||
|
assert result.summary() == expected_summary
|
||||||
|
@ -20,7 +20,7 @@ from .descriptives import auto_descriptives
|
|||||||
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
|
||||||
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
|
||||||
from .regress import OrdinalLogit, PenalisedLogit, logit_then_regress, regress, vif
|
from .regress import OrdinalLogit, PenalisedLogit, logit_then_regress, regress, vif
|
||||||
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind
|
from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind
|
||||||
|
|
||||||
def reload_me():
|
def reload_me():
|
||||||
import importlib
|
import importlib
|
||||||
|
@ -23,7 +23,7 @@ import functools
|
|||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from .config import config
|
from .config import config
|
||||||
from .utils import Estimate, Interval, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p
|
from .utils import Estimate, Interval, PValueStyle, as_2groups, as_numeric, check_nan, convert_pandas_nullable, fmt_p
|
||||||
|
|
||||||
# ----------------
|
# ----------------
|
||||||
# Student's t test
|
# Student's t test
|
||||||
@ -718,6 +718,46 @@ def pearsonr(df, dep, ind, *, nan_policy='warn'):
|
|||||||
|
|
||||||
return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)
|
return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue)
|
||||||
|
|
||||||
|
# --------------------
|
||||||
|
# Spearman correlation
|
||||||
|
|
||||||
|
class SpearmanResult:
|
||||||
|
def __init__(self, statistic, pvalue):
|
||||||
|
self.statistic = statistic
|
||||||
|
self.pvalue = pvalue
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
if config.repr_is_summary:
|
||||||
|
return self.summary()
|
||||||
|
return super().__repr__()
|
||||||
|
|
||||||
|
def _repr_html_(self):
|
||||||
|
return '<i>ρ</i> ({:g}% CI) = {}; <i>p</i> {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML))
|
||||||
|
|
||||||
|
def summary(self):
|
||||||
|
"""
|
||||||
|
Return a stringified summary of the Spearman correlation
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
|
return 'ρ ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION))
|
||||||
|
|
||||||
|
def spearman(df, dep, ind, *, nan_policy='warn'):
|
||||||
|
# Check for/clean NaNs
|
||||||
|
df = check_nan(df[[ind, dep]], nan_policy)
|
||||||
|
|
||||||
|
# Ensure numeric, factorising categorical variables as required
|
||||||
|
ind, _ = as_numeric(df[ind])
|
||||||
|
dep, _ = as_numeric(df[dep])
|
||||||
|
|
||||||
|
# Compute Spearman's rho
|
||||||
|
result = stats.spearmanr(ind, dep)
|
||||||
|
|
||||||
|
# Compute confidence interval
|
||||||
|
ci = stats._stats_py._pearsonr_fisher_ci(result.correlation, len(dep), 1 - config.alpha, 'two-sided')
|
||||||
|
return SpearmanResult(statistic=Estimate(result.correlation, ci.low, ci.high), pvalue=result.pvalue)
|
||||||
|
|
||||||
# ----------------------------
|
# ----------------------------
|
||||||
# Automatic selection of tests
|
# Automatic selection of tests
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user