Source code for polyzymd.analyses.shared.inferential_statistics

"""Inferential statistical tests shared across analysis comparisons.

This module provides statistical functions for comparing analysis results
across multiple conditions, including t-tests, ANOVA, and effect sizes.

It is the canonical home for inferential statistics used by analysis plugins
and comparison utilities.

All functions use SciPy for statistical calculations.
"""

from __future__ import annotations

import logging
import math
from dataclasses import dataclass
from typing import Sequence

import numpy as np
from numpy.typing import ArrayLike

logger = logging.getLogger("polyzymd.analyses")


[docs] @dataclass class TTestResult: """Result of a two-sample t-test. Attributes ---------- t_statistic : float The t-statistic p_value : float Two-tailed p-value """ t_statistic: float p_value: float @property def significant(self) -> bool: """Whether the result is significant at p < 0.05. .. note:: This uses a hardcoded alpha=0.05 threshold. The comparison pipeline overrides significance with configurable thresholds (BH-adjusted or Tukey). Use this property only as a convenience default. """ return self.p_value < 0.05
[docs] def to_dict(self) -> dict: """Convert to dictionary.""" return { "t_statistic": self.t_statistic, "p_value": self.p_value, "significant": self.significant, }
[docs] @dataclass class EffectSize: """Cohen's d effect size with interpretation. Attributes ---------- cohens_d : float The effect size (positive = group1 > group2) interpretation : str Categorical interpretation: "negligible", "small", "medium", "large" direction : str "higher" (d > 0), "lower" (d < 0), or "unchanged" (d == 0). """ cohens_d: float interpretation: str direction: str
[docs] def to_dict(self) -> dict: """Convert to dictionary.""" return { "cohens_d": self.cohens_d, "interpretation": self.interpretation, "direction": self.direction, }
[docs] @dataclass class ANOVAResult: """Result of one-way ANOVA. Attributes ---------- f_statistic : float The F-statistic p_value : float P-value for the test """ f_statistic: float p_value: float @property def significant(self) -> bool: """Whether the result is significant at p < 0.05. .. note:: This uses a hardcoded alpha=0.05 threshold. The comparison pipeline overrides significance with configurable thresholds. Use this property only as a convenience default. """ return self.p_value < 0.05
[docs] def to_dict(self) -> dict: """Convert to dictionary.""" return { "f_statistic": self.f_statistic, "p_value": self.p_value, "significant": self.significant, }
[docs] @dataclass class BHResult: """Result of Benjamini-Hochberg correction for one hypothesis. Attributes ---------- raw_p_value : float | None Original uncorrected p-value. adjusted_p_value : float | None BH-adjusted p-value (q-value). None if raw was None. significant : bool Whether adjusted_p_value <= alpha. rank : int | None 1-based rank among non-None p-values (smallest=1). None if raw was None. """ raw_p_value: float | None adjusted_p_value: float | None significant: bool rank: int | None
[docs] def benjamini_hochberg( p_values: Sequence[float | None], alpha: float = 0.05, ) -> list[BHResult]: """Apply Benjamini-Hochberg FDR correction to a family of p-values. Implements the Benjamini-Hochberg (1995) step-up procedure to control the false discovery rate. The correction adjusts p-values such that declaring significance at ``adjusted_p <= alpha`` controls the expected proportion of false discoveries at level *alpha*. ``None`` and ``NaN`` entries in *p_values* (e.g. cross-temperature pairs where statistics are suppressed, or degenerate tests with undefined p-values) are passed through — the corresponding ``BHResult`` has ``adjusted_p_value=None`` and ``significant=False``. Parameters ---------- p_values : Sequence[float | None] Raw two-tailed p-values. ``None`` entries are preserved. alpha : float, optional FDR significance threshold, by default 0.05. Returns ------- list[BHResult] One entry per input p-value, in the same order. References ---------- Benjamini, Y. & Hochberg, Y. (1995). Controlling the false discovery rate: a practical and powerful approach to multiple testing. *JRSS B*, 57(1), 289-300. """ if not 0.0 < alpha <= 1.0: raise ValueError(f"alpha must satisfy 0 < alpha <= 1, got {alpha}") if not p_values: return [] results: list[BHResult] = [ BHResult(raw_p_value=None, adjusted_p_value=None, significant=False, rank=None) for _ in p_values ] indexed_non_null: list[tuple[int, float]] = [] for idx, p in enumerate(p_values): if p is None: continue raw_p = float(p) if math.isnan(raw_p): continue indexed_non_null.append((idx, raw_p)) if not indexed_non_null: return results indexed_non_null.sort(key=lambda item: item[1]) m = len(indexed_non_null) sorted_p = np.asarray([item[1] for item in indexed_non_null], dtype=np.float64) ranks = np.arange(1, m + 1, dtype=np.float64) adjusted_sorted = sorted_p * m / ranks adjusted_sorted = np.minimum.accumulate(adjusted_sorted[::-1])[::-1] adjusted_sorted = np.clip(adjusted_sorted, 0.0, 1.0) for rank_idx, ((original_idx, raw_p), adjusted_p) in enumerate( zip(indexed_non_null, adjusted_sorted, strict=False), start=1, ): adjusted = float(adjusted_p) results[original_idx] = BHResult( raw_p_value=raw_p, adjusted_p_value=adjusted, significant=adjusted <= alpha, rank=rank_idx, ) return results
[docs] def independent_ttest( group1: ArrayLike, group2: ArrayLike, method: str = "student", ) -> TTestResult: """Perform a two-sample independent t-test. Tests the null hypothesis that two independent samples have identical expected values. The ``method`` parameter controls the variance assumption: - ``"student"`` uses Student's t-test (``equal_var=True``), which assumes equal population variances - ``"welch"`` uses Welch's t-test (``equal_var=False``), which does not assume equal variances Use ``"student"`` when homoscedasticity is a reasonable assumption. Use ``"welch"`` when variances may differ across conditions. Parameters ---------- group1 : array_like First group of values (e.g., control replicate means) group2 : array_like Second group of values (e.g., treatment replicate means) method : str, optional T-test method to use: ``"student"`` or ``"welch"``, by default ``"student"``. Returns ------- TTestResult Result containing t-statistic and p-value Examples -------- >>> control = [0.715, 0.693, 0.696] # No polymer RMSF >>> treatment = [0.517, 0.586] # 100% SBMA RMSF >>> result = independent_ttest(control, treatment) >>> print(f"t = {result.t_statistic:.3f}, p = {result.p_value:.4f}") Raises ------ ValueError If *method* is not ``"student"`` or ``"welch"`` """ from scipy import stats g1 = np.asarray(group1, dtype=np.float64) g2 = np.asarray(group2, dtype=np.float64) # Guard: need at least 2 observations per group for a t-test if len(g1) < 2 or len(g2) < 2: return TTestResult(t_statistic=float("nan"), p_value=float("nan")) if method == "student": equal_var = True elif method == "welch": equal_var = False else: raise ValueError(f"Unknown t-test method {method!r}; expected 'student' or 'welch'") t, p = stats.ttest_ind(g1, g2, equal_var=equal_var) return TTestResult( t_statistic=float(t), p_value=float(p), )
def cohens_d( group1: ArrayLike, group2: ArrayLike, ) -> EffectSize: """Compute Cohen's d effect size. Cohen's d is the difference between means divided by the pooled standard deviation. A positive d means group1 has higher values. Parameters ---------- group1 : array_like First group (typically control) group2 : array_like Second group (typically treatment) Returns ------- EffectSize Effect size with interpretation Notes ----- Effect size interpretation (Cohen, 1988): - |d| < 0.2: negligible - 0.2 <= |d| < 0.5: small - 0.5 <= |d| < 0.8: medium - |d| >= 0.8: large """ g1 = np.asarray(group1, dtype=np.float64) g2 = np.asarray(group2, dtype=np.float64) n1, n2 = len(g1), len(g2) if n1 < 2 or n2 < 2: # Undefined: can't compute pooled std with < 2 samples return EffectSize( cohens_d=float("nan"), interpretation="undefined", direction="undetermined", ) else: var1 = np.var(g1, ddof=1) var2 = np.var(g2, ddof=1) # Pooled standard deviation pooled_std = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)) if pooled_std > 0: d = float((np.mean(g1) - np.mean(g2)) / pooled_std) else: # Zero pooled SD with different means is undefined if np.mean(g1) != np.mean(g2): return EffectSize( cohens_d=float("nan"), interpretation="undefined", direction="undetermined", ) d = 0.0 # Interpret magnitude d_abs = abs(d) if d_abs < 0.2: interpretation = "negligible" elif d_abs < 0.5: interpretation = "small" elif d_abs < 0.8: interpretation = "medium" else: interpretation = "large" # Interpret direction if d > 0: direction = "higher" elif d < 0: direction = "lower" else: direction = "unchanged" return EffectSize( cohens_d=d, interpretation=interpretation, direction=direction, )
[docs] def one_way_anova(*groups: ArrayLike) -> ANOVAResult: """Perform classical one-way ANOVA across multiple groups. Tests the null hypothesis that all groups have the same mean using ``scipy.stats.f_oneway`` (equal variance assumption). Parameters ---------- *groups : array_like Variable number of groups to compare. Each group must have at least 2 observations; groups with fewer observations cause the function to return NaN statistics. Returns ------- ANOVAResult Result containing F-statistic and p-value. Both are NaN if any group has fewer than 2 observations. Examples -------- >>> no_poly = [0.715, 0.693, 0.696] >>> sbma = [0.517, 0.586] >>> egma = [0.558, 0.738, 0.496] >>> result = one_way_anova(no_poly, sbma, egma) >>> print(f"F = {result.f_statistic:.3f}, p = {result.p_value:.4f}") """ from scipy import stats arrays = [np.asarray(g, dtype=np.float64) for g in groups] # Guard: need at least 2 groups for ANOVA if len(arrays) < 2: return ANOVAResult(f_statistic=float("nan"), p_value=float("nan")) # Guard: need at least 2 observations per group for ANOVA if any(len(a) < 2 for a in arrays): return ANOVAResult(f_statistic=float("nan"), p_value=float("nan")) stat, p = stats.f_oneway(*arrays) return ANOVAResult(f_statistic=float(stat), p_value=float(p))
[docs] @dataclass class TukeyHSDResult: """Result of Tukey's HSD test for one pair of groups. Attributes ---------- group_i : int Index of the first group. group_j : int Index of the second group. statistic : float Mean difference (group_j - group_i). p_value : float Tukey-adjusted p-value for this pair. """ group_i: int group_j: int statistic: float p_value: float
[docs] def tukey_hsd(*groups: ArrayLike) -> list[TukeyHSDResult]: """Run Tukey's Honestly Significant Difference test. Computes family-wise-adjusted p-values for all pairwise group comparisons using ``scipy.stats.tukey_hsd``. Parameters ---------- *groups : array_like Variable number of groups to compare. Each group must have at least 2 observations. Returns ------- list[TukeyHSDResult] One result per unique pair (i < j), ordered by (i, j). Returns an empty list if fewer than 2 groups are provided or any group has fewer than 2 observations. Examples -------- >>> results = tukey_hsd([1, 2, 3], [4, 5, 6], [7, 8, 9]) >>> for r in results: ... print(f"({r.group_i}, {r.group_j}): p={r.p_value:.4f}") """ from scipy import stats arrays = [np.asarray(g, dtype=np.float64) for g in groups] if len(arrays) < 2: return [] if any(len(a) < 2 for a in arrays): logger.warning( "Tukey HSD requires at least 2 observations per group; " "got sizes %s — returning empty results", [len(a) for a in arrays], ) return [] result = stats.tukey_hsd(*arrays) pairs: list[TukeyHSDResult] = [] n = len(arrays) for i in range(n): for j in range(i + 1, n): pairs.append( TukeyHSDResult( group_i=i, group_j=j, statistic=float(result.statistic[j, i]), p_value=float(result.pvalue[i, j]), ) ) return pairs
[docs] def percent_change(control_mean: float, treatment_mean: float) -> float: """Calculate percent change from control. Parameters ---------- control_mean : float Mean value of control condition treatment_mean : float Mean value of treatment condition Returns ------- float Percent change: (treatment - control) / control * 100 Negative = reduction, Positive = increase. Special handling for zero control values: - 0 -> 0 returns ``0.0`` - 0 -> positive returns ``math.inf`` - 0 -> negative returns ``-math.inf`` If either input is non-finite (NaN or +/-inf), returns ``math.nan``. """ if not (math.isfinite(control_mean) and math.isfinite(treatment_mean)): return math.nan if control_mean == 0: if treatment_mean == 0: return 0.0 return math.inf if treatment_mean > 0 else -math.inf return (treatment_mean - control_mean) / control_mean * 100