diff --git a/tests/test_correlation.py b/tests/test_correlation.py new file mode 100644 index 0000000..34bfc3a --- /dev/null +++ b/tests/test_correlation.py @@ -0,0 +1,51 @@ +# scipy-yli: Helpful SciPy utilities and recipes +# Copyright © 2022 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from pytest import approx + +import pandas as pd + +import yli + +def test_pearsonr_ol11_15(): + """Compare yli.pearsonr for Ott & Longnecker (2016) example 11.15""" + + df = pd.DataFrame({ + 'y': [41, 39, 47, 51, 43, 40, 57, 46, 50, 59, 61, 52], + 'x': [24, 30, 33, 35, 36, 36, 37, 37, 38, 40, 43, 49] + }) + + result = yli.pearsonr(df, 'y', 'x') + + assert result.statistic.point == approx(0.646, abs=0.001) + assert result.pvalue == approx(0.0234, abs=0.0001) + + expected_summary = 'r (95% CI) = 0.65 (0.11–0.89); p = 0.02*' + assert result.summary() == expected_summary + +def test_pearsonr_ol11_16(): + """Compare yli.pearsonr for Ott & Longnecker (2016) example 11.16""" + + df = pd.DataFrame({ + 'Eggs': [27, 32, 39, 48, 59, 67, 71, 65, 73, 67, 78, 72, 81, 74, 83, 75, 84, 77, 83, 76, 82, 75, 78, 77, 75, 73, 71, 70, 68, 65], + 'Weight': [2.1, 2.3, 2.4, 2.5, 2.9, 3.1, 3.2, 3.3, 3.4, 3.4, 3.5, 3.5, 3.5, 3.6, 3.6, 3.6, 3.6, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0, 4.3, 4.4, 4.7, 4.8, 4.9, 5.0, 5.1] + }) + + result = yli.pearsonr(df, 'Eggs', 'Weight') + + assert result.statistic.point == approx(0.606, abs=0.001) + assert result.statistic.ci_lower == approx(0.314, abs=0.001) + assert result.statistic.ci_upper == approx(0.793, abs=0.001) diff --git a/yli/sig_tests.py b/yli/sig_tests.py index 1535b36..5d63321 100644 --- a/yli/sig_tests.py +++ b/yli/sig_tests.py @@ -543,6 +543,22 @@ def pearsonr(df, dep, ind, *, nan_policy='warn'): :type nan_policy: str :rtype: :class:`yli.sig_tests.PearsonRResult` + + **Example:** + + .. code-block:: + + df = pd.DataFrame({ + 'y': [41, 39, 47, 51, 43, 40, 57, 46, 50, 59, 61, 52], + 'x': [24, 30, 33, 35, 36, 36, 37, 37, 38, 40, 43, 49] + }) + yli.pearsonr(df, 'y', 'x') + + .. code-block:: text + + r (95% CI) = 0.65 (0.11–0.89); p = 0.02* + + The output states that the value of the Pearson correlation coefficient is 0.65, with 95% confidence interval 0.11–0.89, and the test is significant with *p* value 0.02. """ # Check for/clean NaNs