#   scipy-yli: Helpful SciPy utilities and recipes
#   Copyright © 2022–2024  Lee Yingtong Li (RunasSudo)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

from pytest import approx

import numpy as np
import pandas as pd

import yli

def test_pearsonr_ol11_15():
	"""Compare yli.pearsonr for Ott & Longnecker (2016) example 11.15"""
	
	df = pd.DataFrame({
		'y': [41, 39, 47, 51, 43, 40, 57, 46, 50, 59, 61, 52],
		'x': [24, 30, 33, 35, 36, 36, 37, 37, 38, 40, 43, 49]
	})
	
	result = yli.pearsonr(df, 'y', 'x')
	
	assert result.statistic.point == approx(0.646, abs=0.001)
	assert result.pvalue == approx(0.0234, abs=0.0001)
	
	expected_summary = 'r (95% CI) = 0.65 (0.11–0.89); p = 0.02*'
	assert result.summary() == expected_summary
	assert result._repr_html_() == '<i>r</i> (95% CI) = 0.65 (0.11–0.89); <i>p</i> = 0.02*'

def test_pearsonr_ol11_16():
	"""Compare yli.pearsonr for Ott & Longnecker (2016) example 11.16"""
	
	df = pd.DataFrame({
		'Eggs': [27, 32, 39, 48, 59, 67, 71, 65, 73, 67, 78, 72, 81, 74, 83, 75, 84, 77, 83, 76, 82, 75, 78, 77, 75, 73, 71, 70, 68, 65],
		'Weight': [2.1, 2.3, 2.4, 2.5, 2.9, 3.1, 3.2, 3.3, 3.4, 3.4, 3.5, 3.5, 3.5, 3.6, 3.6, 3.6, 3.6, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0, 4.3, 4.4, 4.7, 4.8, 4.9, 5.0, 5.1]
	})
	
	result = yli.pearsonr(df, 'Eggs', 'Weight')
	
	assert result.statistic.point == approx(0.606, abs=0.001)
	assert result.statistic.ci_lower == approx(0.314, abs=0.001)
	assert result.statistic.ci_upper == approx(0.793, abs=0.001)

def test_spearman_ol11_17():
	"""Compare yli.spearman for Ott & Longnecker (2016) example 11.17"""
	
	df = pd.DataFrame({
		'Profit': [2.5, 6.2, 3.1, 4.6, 7.3, 4.5, 6.1, 11.6, 10.0, 14.2, 16.1, 19.5],
		'Quality': [50, 57, 61, 68, 77, 80, 82, 85, 89, 91, 95, 99]
	})
	
	result = yli.spearman(df, 'Profit', 'Quality')
	
	assert result.statistic.point == approx(0.874, abs=0.001)
	
	expected_summary = 'ρ (95% CI) = 0.87 (0.60–0.96); p < 0.001*'  # NB: The confidence intervals are unvalidated
	assert result.summary() == expected_summary
	assert result._repr_html_() == '<i>ρ</i> (95% CI) = 0.87 (0.60–0.96); <i>p</i> &lt; 0.001*'

def test_eta_wikipedia():
	"""Compare _compute_eta, used in yli.auto_correlations, for https://en.wikipedia.org/w/index.php?title=Correlation_ratio&oldid=1203268770#Example"""
	
	df = pd.DataFrame({
		'Subject': ['Algebra'] * 5 + ['Geometry'] * 4 + ['Statistics'] * 6,
		'Score': [45, 70, 29, 15, 21, 40, 20, 30, 42, 65, 95, 80, 70, 85, 73]
	})
	
	result = yli.descriptives._compute_eta(df, 'Subject', 'Score')
	
	assert result == np.sqrt(6780/9640)