Source code for polyzymd.analyses._framework.comparison_models

"""Comparison result models for analysis plugins."""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import ClassVar, Generic, Self, TypeVar

from pydantic import BaseModel, ConfigDict, Field


class BasePlotSettings(BaseModel):
    """Base class for per-analysis plot settings."""


class SlurmResourceHint(BaseModel):
    """Per-plugin SLURM resource hints for HPC submission."""

    mem: str | None = None
    time: str | None = None
    cpus_per_task: int | None = None


@dataclass(frozen=True)
class MetricValue:
    """A scalar metric extracted from one aggregated condition result."""

    name: str
    mean: float
    sem: float
    replicate_values: list[float]
    higher_is_better: bool | None = True
    direction_labels: tuple[str, str, str] = ("decreased", "unchanged", "increased")


class ConditionSummary(BaseModel):
    """Summary statistics for one condition in a scalar comparison."""

    model_config = {"extra": "allow"}

    label: str
    n_replicates: int = 0


class PairwiseResult(BaseModel):
    """Statistical comparison between two conditions for one metric."""

    model_config = ConfigDict(ser_json_inf_nan="strings")

    condition_a: str
    condition_b: str
    metric: str = "default"
    t_statistic: float
    p_value: float
    p_value_adjusted: float | None
    posthoc_method: str = "ttest_bh"
    cohens_d: float
    effect_size_interpretation: str
    direction: str
    significant: bool
    percent_change: float
    testable: bool = True
    note: str | None = None


class ANOVAResult(BaseModel):
    """One-way ANOVA result for one metric."""

    metric: str = "default"
    f_statistic: float
    p_value: float
    significant: bool
    testable: bool = True
    note: str | None = None


class ComparisonResult(BaseModel):
    """Serializable result of a default scalar cross-condition comparison."""

    model_config = ConfigDict(ser_json_inf_nan="strings")

    analysis_type: str
    name: str
    control_label: str | None = None
    fdr_alpha: float | None = None
    ttest_method: str = "student"
    posthoc_method: str = "ttest_bh"
    conditions: list[ConditionSummary] = Field(default_factory=list)
    pairwise_comparisons: list[PairwiseResult] = Field(default_factory=list)
    anova: list[ANOVAResult] | None = None
    ranking: list[str] = Field(default_factory=list)
    rankings_by_metric: dict[str, list[str]] | None = None
    equilibration_time: str = "0ns"
    created_at: str = ""
    polyzymd_version: str = ""

[docs] def save(self, path: Path | str) -> Path: """Save the result to a JSON file. Parameters ---------- path : Path or str Output path. Returns ------- Path Path to the saved file. """ path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(self.model_dump_json(indent=2)) return path
[docs] @classmethod def load(cls, path: Path | str) -> Self: """Load the result from a JSON file. Parameters ---------- path : Path or str Path to a JSON file. Returns ------- Self Loaded result. """ path = Path(path) return cls.model_validate_json(path.read_text())
class BaseConditionSummary(BaseModel, ABC): """Abstract base class for condition-level custom comparison summaries.""" label: str config_path: str n_replicates: int replicate_values: list[float] @property @abstractmethod def primary_metric_value(self) -> float: """Return the primary metric value for ranking and comparison.""" @property @abstractmethod def primary_metric_sem(self) -> float: """Return the SEM of the primary metric.""" TConditionSummary = TypeVar("TConditionSummary", bound=BaseConditionSummary) TPairwiseResult = TypeVar("TPairwiseResult", bound=PairwiseResult) class BaseComparisonResult(BaseModel, ABC, Generic[TConditionSummary, TPairwiseResult]): """Abstract base class for custom plugin comparison results.""" model_config = ConfigDict(ser_json_inf_nan="strings") comparison_type: ClassVar[str] = "base" metric: str name: str control_label: str | None = None conditions: list[TConditionSummary] pairwise_comparisons: list[TPairwiseResult] anova: ANOVAResult | list[ANOVAResult] | None = None ranking: list[str] equilibration_time: str created_at: datetime polyzymd_version: str
[docs] def save(self, path: Path | str) -> Path: """Save the result to a JSON file. Parameters ---------- path : Path or str Output path. Returns ------- Path Path to the saved file. """ path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(self.model_dump_json(indent=2)) return path
[docs] @classmethod def load(cls, path: Path | str) -> Self: """Load the result from a JSON file. Parameters ---------- path : Path or str Path to a JSON file. Returns ------- Self Loaded result. """ path = Path(path) return cls.model_validate_json(path.read_text())
[docs] def get_condition(self, label: str) -> TConditionSummary: """Get a condition by label. Parameters ---------- label : str Condition label. Returns ------- TConditionSummary The matching condition summary. Raises ------ KeyError If the condition is not found. """ for condition in self.conditions: if condition.label == label: return condition raise KeyError(f"Condition '{label}' not found")
[docs] def get_comparison(self, label: tuple[str, str]) -> TPairwiseResult | None: """Get a pairwise comparison by condition pair. Parameters ---------- label : tuple[str, str] Explicit ``(condition_a, condition_b)`` pair. Returns ------- TPairwiseResult or None The matching comparison, or ``None`` if not found. """ if not isinstance(label, tuple): raise TypeError("Comparison lookup requires a (condition_a, condition_b) tuple") condition_a, condition_b = label for comparison in self.pairwise_comparisons: if comparison.condition_a == condition_a and comparison.condition_b == condition_b: return comparison return None