Source code for polyzymd.analyses.shared.multi_run_comparison

"""Shared helpers for multi-run comparison orchestration.

These helpers keep run-wise comparison logic concise across plugins that
compare multiple named runs (RMSD, Rg, SASA).
"""

from __future__ import annotations

import logging
from collections.abc import Callable, Mapping, Sequence
from typing import Any



[docs]
def filter_summaries_with_run(
    summaries: dict[str, Any],
    run_label: str,
    get_run_fn: Callable[[Any, str], Any],
    logger: logging.Logger | None = None,
) -> dict[str, Any]:
    """Filter condition summaries to those containing a specific run.

    Parameters
    ----------
    summaries : dict[str, Any]
        Mapping from condition label to condition summary.
    run_label : str
        Run label to keep.
    get_run_fn : Callable[[Any, str], Any]
        Callback that returns run summary for ``(summary, run_label)`` and
        raises ``KeyError`` when the run is missing.
    logger : logging.Logger | None, optional
        Optional logger for missing-run warnings.

    Returns
    -------
    dict[str, Any]
        Subset of ``summaries`` with run data available.
    """
    filtered: dict[str, Any] = {}
    for label, summary in summaries.items():
        try:
            get_run_fn(summary, run_label)
        except KeyError:
            if logger is not None:
                logger.warning(
                    "Run '%s' missing for condition '%s'; excluding from run-level comparison",
                    run_label,
                    label,
                )
            continue
        filtered[label] = summary
    return filtered




[docs]
def build_condition_pairs(
    condition_labels: list[str],
    control_label: str | None,
    on_control_missing: str = "all_pairs",
    logger: logging.Logger | None = None,
) -> list[tuple[str, str]]:
    """Build pairwise condition pairs for comparison.

    Parameters
    ----------
    condition_labels : list[str]
        Ordered condition labels to compare.
    control_label : str | None
        Preferred control label for control-vs-treatment comparisons.
    on_control_missing : str, optional
        Behavior when ``control_label`` is requested but unavailable.

        Supported values:

        - ``"all_pairs"``: fall back to all-vs-all
        - ``"skip"``: return no pairs
    logger : logging.Logger | None, optional
        Optional logger for fallback/skip messages.

    Returns
    -------
    list[tuple[str, str]]
        Pair list as ``(condition_a, condition_b)`` tuples.

    Raises
    ------
    ValueError
        Raised when ``on_control_missing`` is not ``"all_pairs"`` or ``"skip"``.
    """
    if on_control_missing not in ("all_pairs", "skip"):
        raise ValueError(
            f"on_control_missing must be 'all_pairs' or 'skip', got {on_control_missing!r}"
        )

    if len(condition_labels) < 2:
        return []

    if control_label is not None:
        if control_label in condition_labels:
            return [(control_label, label) for label in condition_labels if label != control_label]

        if on_control_missing == "skip":
            if logger is not None:
                logger.warning(
                    "Control condition '%s' unavailable; skipping pairwise comparisons",
                    control_label,
                )
            return []

        if logger is not None:
            logger.warning(
                "Control condition '%s' unavailable; falling back to all-vs-all pairwise comparisons",
                control_label,
            )

    return [
        (condition_labels[i], condition_labels[j])
        for i in range(len(condition_labels))
        for j in range(i + 1, len(condition_labels))
    ]




[docs]
def apply_fdr_correction(
    pairwise_results: list[Any],
    anova_by_run: dict[Any, Any] | list[Any] | None = None,
    fdr_alpha: float = 0.05,
    get_p_value: Callable[[Any], float | None] | None = None,
    set_corrected: Callable[[Any, Any], None] | None = None,
) -> None:
    """Apply Benjamini-Hochberg FDR correction across statistical result families.

    Parameters
    ----------
    pairwise_results : list[Any]
        Pairwise comparison result objects.
    anova_by_run : dict[Any, Any] | list[Any] | None, optional
        ANOVA result objects, as either list-like or dict-like container.
    fdr_alpha : float, optional
        FDR threshold.
    get_p_value : Callable[[Any], float | None] | None, optional
        Callback extracting raw p-value from a result object. Defaults to
        reading ``.p_value``.
    set_corrected : Callable[[Any, Any], None] | None, optional
        Callback applying BH output to each result object. Defaults to setting
        ``.p_value_adjusted`` (when available) and ``.significant``.
    """
    from polyzymd.analyses.shared.inferential_statistics import benjamini_hochberg

    def _default_get_p_value(result: Any) -> float | None:
        if hasattr(result, "testable") and not result.testable:
            return None
        return getattr(result, "p_value", None)

    def _default_set_corrected(result: Any, bh_result: Any) -> None:
        if hasattr(result, "p_value_adjusted"):
            result.p_value_adjusted = bh_result.adjusted_p_value
        result.significant = bh_result.significant

    def _validate_default_setter_targets(results: Sequence[Any], label: str) -> None:
        for idx, result in enumerate(results):
            if not hasattr(result, "significant"):
                result_type = type(result).__name__
                raise TypeError(
                    "apply_fdr_correction() default setter requires results with a "
                    f"'significant' attribute. {label}[{idx}] has type {result_type}. "
                    "Provide set_corrected=... for custom result objects."
                )

            result_type = type(result).__name__
            original_significant = result.significant
            try:
                result.significant = original_significant
            except (AttributeError, TypeError) as exc:
                raise TypeError(
                    "apply_fdr_correction() default setter requires a mutable "
                    f"'significant' attribute. {label}[{idx}] has type {result_type}. "
                    "Provide set_corrected=... for custom result objects."
                ) from exc

            if hasattr(result, "p_value_adjusted"):
                original_adjusted = result.p_value_adjusted
                try:
                    result.p_value_adjusted = original_adjusted
                except (AttributeError, TypeError) as exc:
                    raise TypeError(
                        "apply_fdr_correction() default setter requires a mutable "
                        f"'p_value_adjusted' attribute when present. {label}[{idx}] has type "
                        f"{result_type}. Provide set_corrected=... for custom result objects."
                    ) from exc

    p_getter = get_p_value or _default_get_p_value
    corrected_setter = set_corrected or _default_set_corrected

    if pairwise_results:
        if set_corrected is None:
            _validate_default_setter_targets(pairwise_results, "pairwise_results")
        pairwise_p_values = [p_getter(result) for result in pairwise_results]
        pairwise_bh = benjamini_hochberg(pairwise_p_values, alpha=fdr_alpha)
        for result, bh_result in zip(pairwise_results, pairwise_bh, strict=False):
            corrected_setter(result, bh_result)

    anova_items = _coerce_result_sequence(anova_by_run)
    if anova_items:
        if set_corrected is None:
            _validate_default_setter_targets(anova_items, "anova_by_run")
        anova_p_values = [p_getter(result) for result in anova_items]
        anova_bh = benjamini_hochberg(anova_p_values, alpha=fdr_alpha)
        for result, bh_result in zip(anova_items, anova_bh, strict=False):
            corrected_setter(result, bh_result)



def _coerce_result_sequence(results: Mapping[Any, Any] | Sequence[Any] | None) -> list[Any]:
    """Normalize mapping or sequence result containers to a list."""
    if results is None:
        return []
    if isinstance(results, Mapping):
        return list(results.values())
    return list(results)