Spaces:
Sleeping
Sleeping
"""Correlation helpers for RQ1 and RQ2 analyses. | |
Functions here wrap `scipy.stats` to compute non‑parametric correlations | |
(Spearman ρ, Kendall τ) with optional bootstrap confidence intervals so | |
results can be reported with uncertainty estimates. | |
Typical usage | |
------------- | |
from evaluation.stats.correlation import corr_ci | |
rho, (lo, hi), p = corr_ci(x, y, method="spearman", n_boot=1000) | |
""" | |
from __future__ import annotations | |
from typing import Tuple, Sequence, Literal | |
import numpy as np | |
from scipy import stats | |
Method = Literal["spearman", "kendall"] | |
def _correlate(x: Sequence[float], y: Sequence[float], method: Method): | |
if method == "spearman": | |
return stats.spearmanr(x, y, nan_policy="omit") | |
if method == "kendall": | |
return stats.kendalltau(x, y, nan_policy="omit") | |
raise ValueError(method) | |
def corr_ci( | |
x: Sequence[float], | |
y: Sequence[float], | |
*, | |
method: Method = "spearman", | |
n_boot: int = 1000, | |
ci: float = 0.95, | |
random_state: int | None = None, | |
) -> Tuple[float, Tuple[float, float], float]: | |
"""Correlation coefficient, bootstrap CI, and p‑value. | |
Parameters | |
---------- | |
x, y | |
Numeric sequences of equal length. | |
method | |
'spearman' or 'kendall'. | |
n_boot | |
Number of bootstrap resamples for the CI. 0 → no CI. | |
ci | |
Confidence level (e.g. 0.95 for 95 %). | |
random_state | |
Seed for reproducibility. | |
Returns | |
------- | |
r : float | |
Correlation coefficient. | |
(lo, hi) : Tuple[float, float] | |
Lower/upper CI bounds. ``(nan, nan)`` if *n_boot* == 0. | |
p : float | |
Two‑sided p‑value from the correlation test. | |
""" | |
x = np.asarray(x, dtype=float) | |
y = np.asarray(y, dtype=float) | |
if x.shape != y.shape: | |
raise ValueError("x and y must have the same length") | |
r, p = _correlate(x, y, method) | |
if n_boot == 0: | |
return float(r), (float("nan"), float("nan")), float(p) | |
rng = np.random.default_rng(random_state) | |
bs = [] | |
for _ in range(n_boot): | |
idx = rng.integers(0, len(x), len(x)) | |
r_bs, _ = _correlate(x[idx], y[idx], method) | |
bs.append(r_bs) | |
lo, hi = np.percentile(bs, [(1 - ci) / 2 * 100, (1 + ci) / 2 * 100]) | |
return float(r), (float(lo), float(hi)), float(p) | |