From fa89f2f15658f204bfa3482ede43f4bcab6372b4 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 3 Dec 2022 20:00:29 +1100 Subject: [PATCH] Implement yli.spearman --- tests/test_correlation.py | 15 ++++++++++++++ yli/__init__.py | 2 +- yli/sig_tests.py | 42 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/tests/test_correlation.py b/tests/test_correlation.py index 34bfc3a..d3a65c0 100644 --- a/tests/test_correlation.py +++ b/tests/test_correlation.py @@ -49,3 +49,18 @@ def test_pearsonr_ol11_16(): assert result.statistic.point == approx(0.606, abs=0.001) assert result.statistic.ci_lower == approx(0.314, abs=0.001) assert result.statistic.ci_upper == approx(0.793, abs=0.001) + +def test_spearman_ol11_17(): + """Compare yli.spearman for Ott & Longnecker (2016) example 11.17""" + + df = pd.DataFrame({ + 'Profit': [2.5, 6.2, 3.1, 4.6, 7.3, 4.5, 6.1, 11.6, 10.0, 14.2, 16.1, 19.5], + 'Quality': [50, 57, 61, 68, 77, 80, 82, 85, 89, 91, 95, 99] + }) + + result = yli.spearman(df, 'Profit', 'Quality') + + assert result.statistic.point == approx(0.874, abs=0.001) + + expected_summary = 'ρ (95% CI) = 0.87 (0.60–0.96); p < 0.001*' # NB: The confidence intervals are unvalidated + assert result.summary() == expected_summary diff --git a/yli/__init__.py b/yli/__init__.py index 35e1f9d..0609483 100644 --- a/yli/__init__.py +++ b/yli/__init__.py @@ -20,7 +20,7 @@ from .descriptives import auto_descriptives from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist from .io import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted from .regress import OrdinalLogit, PenalisedLogit, logit_then_regress, regress, vif -from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, ttest_ind +from .sig_tests import anova_oneway, auto_univariable, chi2, mannwhitney, pearsonr, spearman, ttest_ind def reload_me(): import importlib diff --git a/yli/sig_tests.py b/yli/sig_tests.py index 14ba7a8..1cafc77 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -23,7 +23,7 @@ import functools import warnings from .config import config -from .utils import Estimate, Interval, PValueStyle, as_2groups, check_nan, convert_pandas_nullable, fmt_p +from .utils import Estimate, Interval, PValueStyle, as_2groups, as_numeric, check_nan, convert_pandas_nullable, fmt_p # ---------------- # Student's t test @@ -718,6 +718,46 @@ def pearsonr(df, dep, ind, *, nan_policy='warn'): return PearsonRResult(statistic=Estimate(result.statistic, ci.low, ci.high), pvalue=result.pvalue) +# -------------------- +# Spearman correlation + +class SpearmanResult: + def __init__(self, statistic, pvalue): + self.statistic = statistic + self.pvalue = pvalue + + def __repr__(self): + if config.repr_is_summary: + return self.summary() + return super().__repr__() + + def _repr_html_(self): + return 'ρ ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION | PValueStyle.HTML)) + + def summary(self): + """ + Return a stringified summary of the Spearman correlation + + :rtype: str + """ + + return 'ρ ({:g}% CI) = {}; p {}'.format((1-config.alpha)*100, self.statistic.summary(), fmt_p(self.pvalue, PValueStyle.RELATION)) + +def spearman(df, dep, ind, *, nan_policy='warn'): + # Check for/clean NaNs + df = check_nan(df[[ind, dep]], nan_policy) + + # Ensure numeric, factorising categorical variables as required + ind, _ = as_numeric(df[ind]) + dep, _ = as_numeric(df[dep]) + + # Compute Spearman's rho + result = stats.spearmanr(ind, dep) + + # Compute confidence interval + ci = stats._stats_py._pearsonr_fisher_ci(result.correlation, len(dep), 1 - config.alpha, 'two-sided') + return SpearmanResult(statistic=Estimate(result.correlation, ci.low, ci.high), pvalue=result.pvalue) + # ---------------------------- # Automatic selection of tests