Add documentation for auto_correlations
This commit is contained in:
parent
c2d4aaf8be
commit
68d7a31b53
@ -50,6 +50,7 @@ Optional dependencies are:
|
||||
* [rpy2](https://rpy2.github.io/), with R packages:
|
||||
* [BFpack](https://cran.r-project.org/web/packages/BFpack/index.html), for *bayesfactor_afbf* (*RegressionResult.bayesfactor_beta_zero*)
|
||||
* [logistf](https://cran.r-project.org/web/packages/logistf/index.html), for *PenalisedLogit*
|
||||
* matplotlib and [seaborn](https://seaborn.pydata.org/), for plotting functions
|
||||
|
||||
## Functions
|
||||
|
||||
|
@ -4,10 +4,15 @@ Descriptive statistics
|
||||
Functions
|
||||
---------
|
||||
|
||||
.. autofunction:: yli.auto_correlations
|
||||
|
||||
.. autofunction:: yli.auto_descriptives
|
||||
|
||||
Result classes
|
||||
--------------
|
||||
|
||||
.. autoclass:: yli.descriptives.AutoCorrelationsResult
|
||||
:members:
|
||||
|
||||
.. autoclass:: yli.descriptives.AutoDescriptivesResult
|
||||
:members:
|
||||
|
@ -16,7 +16,6 @@
|
||||
|
||||
import pandas as pd
|
||||
from scipy import stats
|
||||
import seaborn as sns
|
||||
|
||||
from .config import config
|
||||
from .utils import as_numeric, check_nan
|
||||
@ -144,7 +143,31 @@ class AutoDescriptivesResult:
|
||||
return str(table)
|
||||
|
||||
def auto_correlations(df, cols):
|
||||
# TODO: Documentation
|
||||
"""
|
||||
Automatically compute pairwise correlation coefficients
|
||||
|
||||
Dichotomous variables are coded as 0/1, according to which value is lower or higher in the natural sort order.
|
||||
Categorical variables with more than 2 categories are coded with one-hot dummy variables for all categories.
|
||||
Ordinal variables are factorised and coded as ranks.
|
||||
Pairwise Pearson correlation coefficients are then calculated on the coded data.
|
||||
|
||||
The effect of the coding is that, for example:
|
||||
|
||||
* 2 continuous variables are compared using Pearson's *r*
|
||||
* 2 ordinal variables are compared using Spearman's *ρ*
|
||||
* 2 dichotomous variables are compared using Yule's *φ*
|
||||
* A continuous variable and dichotomous variable are compared using point-biserial correlation
|
||||
* An ordinal variable and dichotomous variable are compared using rank-biserial correlation
|
||||
|
||||
There is no *nan_policy* argument. *nan* values are omitted from summary statistics for each variable, and the count of *nan* values is reported.
|
||||
|
||||
:param df: Data to compute correlations for
|
||||
:type df: DataFrame
|
||||
:param cols: Columns in *df* for the variables to compute correlations for
|
||||
:type cols: List[str]
|
||||
|
||||
:rtype: :class:`yli.descriptives.AutoCorrelationsResult`
|
||||
"""
|
||||
|
||||
def _col_to_numeric(col):
|
||||
if col.dtype == 'category' and col.cat.ordered:
|
||||
@ -206,7 +229,8 @@ def auto_correlations(df, cols):
|
||||
|
||||
for i, col1 in enumerate(df_coded.columns):
|
||||
for col2 in df_coded.columns[:i]:
|
||||
statistic = stats.pearsonr(df_coded[col1], df_coded[col2]).statistic
|
||||
df_2cols = df_coded[[col1, col2]].dropna()
|
||||
statistic = stats.pearsonr(df_2cols[col1], df_2cols[col2]).statistic
|
||||
df_corr.loc[col1, col2] = statistic
|
||||
df_corr.loc[col2, col1] = statistic
|
||||
|
||||
@ -216,9 +240,14 @@ def auto_correlations(df, cols):
|
||||
return AutoCorrelationsResult(df_corr)
|
||||
|
||||
class AutoCorrelationsResult:
|
||||
# TODO: Documentation
|
||||
"""
|
||||
Result of automatically computed pairwise correlation coefficients
|
||||
|
||||
See :func:`yli.auto_correlations`.
|
||||
"""
|
||||
|
||||
def __init__(self, correlations):
|
||||
#: Pairwise correlation coefficients (*DataFrame*)
|
||||
self.correlations = correlations
|
||||
|
||||
def __repr__(self):
|
||||
@ -245,4 +274,9 @@ class AutoCorrelationsResult:
|
||||
return 'Correlation Matrix\n\n' + str(self.correlations)
|
||||
|
||||
def plot(self):
|
||||
"""
|
||||
Plot a heatmap of the pairwise correlation coefficients
|
||||
"""
|
||||
|
||||
import seaborn as sns
|
||||
sns.heatmap(self.correlations, vmin=-1, vmax=1, cmap='RdBu')
|
||||
|
Loading…
Reference in New Issue
Block a user