Source code for polyzymd.analyses.shared.binding_preference._models

"""Binding preference result models and data containers."""

from __future__ import annotations

import json
import logging
from pathlib import Path
from typing import Any, Literal

from pydantic import BaseModel, Field

logger = logging.getLogger(__name__)


[docs] class PolymerComposition(BaseModel): """Polymer composition data extracted from trajectory. Contains counts of residues and heavy atoms for each polymer type, enabling normalization of enrichment ratios by polymer availability. This data is used for dual normalization of binding preference: - **Residue-based**: Normalizes by number of polymer residues per type. Matches the experimental viewpoint where concentrations are specified in terms of monomer units. - **Atom-based**: Normalizes by number of heavy atoms (non-hydrogen) per type. Accounts for differences in monomer size, since larger monomers have more surface area and thus more opportunity for contacts. Attributes ---------- residue_counts : dict[str, int] Number of residues per polymer type (e.g., {"SBM": 50, "EGM": 50}) heavy_atom_counts : dict[str, int] Number of heavy atoms (non-hydrogen) per polymer type. Heavy atoms are defined as all atoms with element != 'H'. Examples -------- >>> composition = PolymerComposition( ... residue_counts={"SBM": 50, "EGM": 50}, ... heavy_atom_counts={"SBM": 750, "EGM": 400}, ... ) >>> composition.total_residues 100 >>> composition.residue_fraction("SBM") 0.5 >>> composition.heavy_atom_fraction("SBM") 0.652 # SBM has larger monomers """ residue_counts: dict[str, int] = Field( default_factory=dict, description="Number of residues per polymer type", ) heavy_atom_counts: dict[str, int] = Field( default_factory=dict, description="Number of heavy atoms (non-H) per polymer type", ) @property def total_residues(self) -> int: """Total polymer residues across all types.""" return sum(self.residue_counts.values()) @property def total_heavy_atoms(self) -> int: """Total heavy atoms across all polymer types.""" return sum(self.heavy_atom_counts.values())
[docs] def residue_fraction(self, polymer_type: str) -> float: """Fraction of residues that are this polymer type. Parameters ---------- polymer_type : str Polymer type name (e.g., "SBM") Returns ------- float Fraction in range [0, 1], or 0.0 if type not found """ total = self.total_residues if total == 0: return 0.0 return self.residue_counts.get(polymer_type, 0) / total
[docs] def heavy_atom_fraction(self, polymer_type: str) -> float: """Fraction of heavy atoms that are this polymer type. Parameters ---------- polymer_type : str Polymer type name (e.g., "SBM") Returns ------- float Fraction in range [0, 1], or 0.0 if type not found """ total = self.total_heavy_atoms if total == 0: return 0.0 return self.heavy_atom_counts.get(polymer_type, 0) / total
[docs] def polymer_types(self) -> list[str]: """Get sorted list of polymer types in this composition.""" return sorted(set(self.residue_counts.keys()) | set(self.heavy_atom_counts.keys()))
[docs] class BindingPreferenceEntry(BaseModel): """Single entry in the binding preference matrix. Represents the binding preference metrics for one (polymer_type, protein_group) combination. Enrichment Interpretation (centered at zero) -------------------------------------------- The enrichment ratio measures whether a polymer type contacts a protein group more or less than expected based on **surface availability**. - enrichment > 0: Preferential binding (more contacts than expected) - +0.5 means "50% more contacts than expected" - +1.0 means "2× as many contacts as expected" - enrichment = 0: Neutral (contact frequency matches surface availability) - enrichment < 0: Avoidance (fewer contacts than expected) - -0.3 means "30% fewer contacts than expected" - enrichment = -1: Complete avoidance (no contacts at all) The expected share is based on protein surface availability: expected_share = n_exposed_in_group / total_exposed_residues This normalization answers: "Given how much of the protein surface is aromatic/charged/etc., does this polymer type contact that surface proportionally, more than proportionally, or less?" Attributes ---------- polymer_type : str Polymer residue type (e.g., "SBM", "EGM") protein_group : str Protein group label (e.g., "aromatic", "charged_positive") total_contact_frames : int Sum of contact frames across all exposed residues in this group. mean_contact_fraction : float Average per-residue contact fraction within this group. n_residues_in_group : int Total residues in this protein group (exposed + buried) n_exposed_in_group : int Surface-exposed residues in this group (used for enrichment) n_residues_contacted : int Number of exposed residues that had at least one contact contact_share : float Fraction of this polymer's total contacts that went to this group. expected_share : float Expected contact share based on surface availability (n_exposed_in_group / total_exposed_residues) enrichment : float | None Zero-centered enrichment: (contact_share / expected_share) - 1 polymer_residue_count : int Number of residues of this polymer type (metadata) total_polymer_residues : int Total polymer residues across all types (metadata) polymer_heavy_atom_count : int Number of heavy atoms for this polymer type (metadata) total_polymer_heavy_atoms : int Total polymer heavy atoms across all types (metadata) """ polymer_type: str protein_group: str total_contact_frames: int = Field( description="Sum of contact frames for all exposed residues in group" ) mean_contact_fraction: float = Field( description="Average per-residue contact fraction in group" ) n_residues_in_group: int = Field(description="Total residues in this protein group") n_exposed_in_group: int = Field(description="Surface-exposed residues in group") n_residues_contacted: int = Field( default=0, description="Exposed residues with at least one contact" ) contact_share: float = Field( default=0.0, description="Fraction of polymer's contacts to this group" ) expected_share: float = Field( default=0.0, description="Expected share based on protein surface availability", ) enrichment: float | None = Field( default=None, description="Zero-centered enrichment: (contact_share / expected_share) - 1", ) # Polymer composition metadata (for secondary analysis) polymer_residue_count: int = Field( default=0, description="Number of residues of this polymer type (metadata)", ) total_polymer_residues: int = Field( default=0, description="Total polymer residues across all types (metadata)", ) polymer_heavy_atom_count: int = Field( default=0, description="Number of heavy atoms (non-H) for this polymer type (metadata)", ) total_polymer_heavy_atoms: int = Field( default=0, description="Total heavy atoms across all polymer types (metadata)", )
[docs] class BindingPreferenceResult(BaseModel): """Complete binding preference analysis result. Provides enrichment-normalized metrics for polymer-protein binding preferences, answering questions like: - "Does SBMA preferentially bind aromatic residues?" - "How does EGMA's preference for charged residues compare to SBMA?" - "Which amino acid class does this polymer type prefer?" Enrichment values are centered at zero: - enrichment > 0: Preferential binding - enrichment = 0: Neutral (random chance) - enrichment < 0: Avoidance Attributes ---------- entries : list[BindingPreferenceEntry] All (polymer_type × protein_group) combinations n_frames : int Total frames analyzed total_exposed_residues : int Number of surface-exposed residues considered surface_exposure_threshold : float SASA threshold used for surface filtering protein_groups_used : dict[str, str] Mapping of group name to MDAnalysis selection string polymer_types_used : dict[str, str] Mapping of polymer type name to MDAnalysis selection string polymer_composition : PolymerComposition Polymer composition data (residue/atom counts per type, metadata only) system_coverage : SystemCoverageResult, optional System-level coverage metrics collapsed across polymer types. Answers: "What does this polymer mixture collectively cover?" schema_version : int Version for forward compatibility. Version 4 adds system_coverage. """ entries: list[BindingPreferenceEntry] = Field( default_factory=list, description="DEPRECATED: Overlapping-groups entries. Use binding_preference instead.", ) n_frames: int = 0 total_exposed_residues: int = 0 surface_exposure_threshold: float | None = None protein_groups_used: dict[str, str] = Field(default_factory=dict) polymer_types_used: dict[str, str] = Field(default_factory=dict) polymer_composition: PolymerComposition | None = Field( default=None, description="Polymer composition data (residue/atom counts per type, metadata only)", ) system_coverage: "SystemCoverageResult | None" = Field( default=None, description="System-level coverage metrics collapsed across polymer types", ) binding_preference: "PolymerBindingPreferenceResult | None" = Field( default=None, description=( "Partition-based per-polymer binding preference. " "contact_share sums to 1.0 within each partition for each polymer. " "This is the primary binding preference output (v5+)." ), ) metadata: dict[str, Any] = Field(default_factory=dict) schema_version: int = 5 # Version 5: adds partition-based binding_preference
[docs] def to_dataframe(self) -> "pd.DataFrame": """Convert to pandas DataFrame for analysis/plotting. Returns ------- pd.DataFrame Columns: polymer_type, protein_group, total_contact_frames, mean_contact_fraction, n_residues_in_group, n_exposed_in_group, n_residues_contacted, contact_share, expected_share, enrichment """ import pandas as pd return pd.DataFrame([e.model_dump() for e in self.entries])
[docs] def enrichment_matrix(self) -> dict[str, dict[str, float]]: """Get enrichment as nested dict: {polymer_type: {protein_group: value}}. Enrichment values are centered at zero and normalized by protein surface availability: - > 0: Preferential binding (more contacts than expected) - = 0: Neutral (matches surface availability) - < 0: Avoidance (fewer contacts than expected) Returns ------- dict[str, dict[str, float]] Nested mapping of enrichment values. Missing/invalid values are returned as 0.0. Examples -------- >>> matrix = result.enrichment_matrix() >>> print(matrix["SBM"]["aromatic"]) 0.45 # 45% more contacts than expected based on surface availability """ result: dict[str, dict[str, float]] = {} for entry in self.entries: if entry.polymer_type not in result: result[entry.polymer_type] = {} value = entry.enrichment result[entry.polymer_type][entry.protein_group] = value if value is not None else 0.0 return result
[docs] def contact_fraction_matrix(self) -> dict[str, dict[str, float]]: """Get mean contact fractions as nested dict. Returns ------- dict[str, dict[str, float]] Nested mapping: {polymer_type: {protein_group: mean_frac}} """ result: dict[str, dict[str, float]] = {} for entry in self.entries: if entry.polymer_type not in result: result[entry.polymer_type] = {} result[entry.polymer_type][entry.protein_group] = entry.mean_contact_fraction return result
[docs] def contact_share_matrix(self) -> dict[str, dict[str, float]]: """Get contact shares as nested dict. Returns ------- dict[str, dict[str, float]] Nested mapping: {polymer_type: {protein_group: contact_share}} """ result: dict[str, dict[str, float]] = {} for entry in self.entries: if entry.polymer_type not in result: result[entry.polymer_type] = {} result[entry.polymer_type][entry.protein_group] = entry.contact_share return result
[docs] def get_enrichment(self, polymer_type: str, protein_group: str) -> float | None: """Get enrichment for a specific (polymer_type, protein_group) pair. Parameters ---------- polymer_type : str Polymer type name protein_group : str Protein group name Returns ------- float or None Enrichment value (centered at zero), or None if pair not found. Enrichment is based on protein surface availability: (contact_share / expected_share) - 1 """ for entry in self.entries: if entry.polymer_type == polymer_type and entry.protein_group == protein_group: return entry.enrichment return None
[docs] def get_entry(self, polymer_type: str, protein_group: str) -> BindingPreferenceEntry | None: """Get the full entry for a (polymer_type, protein_group) pair. Parameters ---------- polymer_type : str Polymer type name protein_group : str Protein group name Returns ------- BindingPreferenceEntry or None Full entry, or None if not found """ for entry in self.entries: if entry.polymer_type == polymer_type and entry.protein_group == protein_group: return entry return None
[docs] def polymer_types(self) -> list[str]: """Get list of polymer types in this result.""" return sorted({e.polymer_type for e in self.entries})
[docs] def protein_groups(self) -> list[str]: """Get list of protein groups in this result.""" return sorted({e.protein_group for e in self.entries})
[docs] def save(self, path: str | Path) -> None: """Save to JSON file.""" Path(path).write_text(json.dumps(self.model_dump(), indent=2)) logger.info(f"Saved binding preference result to {path}")
[docs] @classmethod def load(cls, path: str | Path) -> "BindingPreferenceResult": """Load from JSON file. Parameters ---------- path : str or Path Path to JSON file Returns ------- BindingPreferenceResult Loaded result """ data = json.loads(Path(path).read_text()) return cls.model_validate(data)
[docs] class AggregatedBindingPreferenceEntry(BaseModel): """Aggregated binding preference for one (polymer_type, protein_group) pair. Contains mean ± SEM across replicates for enrichment based on protein surface availability. Enrichment values are centered at zero: - > 0: Preferential binding (more contacts than expected by surface area) - = 0: Neutral (contact frequency matches surface availability) - < 0: Avoidance (fewer contacts than expected by surface area) The expected share is based on protein surface availability: expected_share = n_exposed_in_group / total_exposed_residues This normalization answers: "Given how much of the protein surface is aromatic/charged/etc., does this polymer type contact that surface proportionally, more than proportionally, or less?" """ polymer_type: str protein_group: str # Enrichment (surface-availability normalized) mean_enrichment: float | None = Field( default=None, description="Mean enrichment across replicates (surface-normalized)", ) sem_enrichment: float | None = Field( default=None, description="Standard error of enrichment", ) per_replicate_enrichments: list[float] = Field( default_factory=list, description="Enrichment values from each replicate", ) # Contact metrics mean_contact_fraction: float = Field( default=0.0, description="Mean per-residue contact fraction", ) sem_contact_fraction: float = Field( default=0.0, description="Standard error of contact fraction", ) mean_contact_share: float = Field( default=0.0, description="Mean contact share", ) # Expected share (from protein surface availability) expected_share: float = Field( default=0.0, description="Expected contact share based on protein surface availability", ) # Group metadata n_exposed_in_group: int = Field( default=0, description="Surface-exposed residues in group", ) n_residues_in_group: int = Field( default=0, description="Total residues in group", ) n_replicates: int = Field( default=0, description="Number of replicates with valid data", )
[docs] class AggregatedBindingPreferenceResult(BaseModel): """Binding preference aggregated across replicates. Contains mean ± SEM for all metrics across multiple replicates. Enrichment is normalized by protein surface availability. Enrichment values are centered at zero: - > 0: Preferential binding (more contacts than expected by surface area) - = 0: Neutral (contact frequency matches surface availability) - < 0: Avoidance (fewer contacts than expected by surface area) """ entries: list[AggregatedBindingPreferenceEntry] = Field(default_factory=list) n_replicates: int = 0 total_exposed_residues: int = 0 surface_exposure_threshold: float | None = None protein_groups_used: dict[str, str] = Field(default_factory=dict) polymer_types_used: dict[str, str] = Field(default_factory=dict) polymer_composition: PolymerComposition | None = Field( default=None, description="Polymer composition data (residue/atom counts per type, metadata only)", ) system_coverage: "AggregatedSystemCoverageResult | None" = Field( default=None, description="Aggregated system-level coverage metrics", ) binding_preference: "AggregatedPolymerBindingPreferenceResult | None" = Field( default=None, description=( "Aggregated partition-based per-polymer binding preference. " "contact_share sums to 1.0 within each partition for each polymer. " "This is the primary binding preference output (v5+)." ), ) schema_version: int = 5 # Version 5: adds partition-based binding_preference
[docs] def to_dataframe(self) -> "pd.DataFrame": """Convert to pandas DataFrame.""" import pandas as pd return pd.DataFrame([e.model_dump() for e in self.entries])
[docs] def enrichment_matrix(self) -> dict[str, dict[str, float]]: """Get mean enrichment as nested dict. Enrichment values are centered at zero and normalized by protein surface availability: - > 0: Preferential binding (more contacts than expected) - = 0: Neutral (matches surface availability) - < 0: Avoidance (fewer contacts than expected) Returns ------- dict[str, dict[str, float]] Nested mapping: {polymer_type: {protein_group: mean_enrichment}}. Missing/invalid values are returned as 0.0. """ result: dict[str, dict[str, float]] = {} for entry in self.entries: if entry.polymer_type not in result: result[entry.polymer_type] = {} value = entry.mean_enrichment result[entry.polymer_type][entry.protein_group] = value if value is not None else 0.0 return result
[docs] def get_entry( self, polymer_type: str, protein_group: str ) -> AggregatedBindingPreferenceEntry | None: """Get entry for a (polymer_type, protein_group) pair.""" for entry in self.entries: if entry.polymer_type == polymer_type and entry.protein_group == protein_group: return entry return None
[docs] def polymer_types(self) -> list[str]: """Get list of polymer types.""" return sorted({e.polymer_type for e in self.entries})
[docs] def protein_groups(self) -> list[str]: """Get list of protein groups.""" return sorted({e.protein_group for e in self.entries})
[docs] def save(self, path: str | Path) -> None: """Save to JSON file.""" Path(path).write_text(json.dumps(self.model_dump(), indent=2)) logger.info(f"Saved aggregated binding preference to {path}")
[docs] @classmethod def load(cls, path: str | Path) -> "AggregatedBindingPreferenceResult": """Load from JSON file.""" data = json.loads(Path(path).read_text()) return cls.model_validate(data)
[docs] class SystemCoverageEntry(BaseModel): """System-level coverage entry for one protein group. While BindingPreferenceEntry answers "What does SBMA prefer?", this entry answers "What fraction of ALL polymer contacts in this system go to this protein group?" — collapsing across polymer types for condition-level analysis. Use Case -------- Compare copolymer compositions (conditions) with each other. For example: "Does a 70:30 SBMA:EGMA mixture cover aromatic residues differently than a 30:70 mixture?" Coverage Enrichment Calculation ------------------------------- For each protein group: coverage_share = Σ(all polymer contacts to group) / Σ(all polymer contacts) expected_share = n_exposed_in_group / total_exposed_residues coverage_enrichment = (coverage_share / expected_share) - 1 Interpretation (centered at zero): - coverage_enrichment > 0: Preferential coverage (more than surface predicts) - +0.5 means "50% more coverage than expected" - coverage_enrichment = 0: Neutral (coverage matches surface availability) - coverage_enrichment < 0: Under-coverage (less than surface predicts) - -0.3 means "30% less coverage than expected" - coverage_enrichment = -1: Complete avoidance (no coverage at all) Attributes ---------- protein_group : str Protein group label (e.g., "aromatic", "charged_positive") total_contact_frames : int Sum of contact frames from ALL polymer types to this group. coverage_share : float Fraction of all polymer contacts that went to this group. expected_share : float Expected coverage based on protein surface availability (n_exposed_in_group / total_exposed_residues) coverage_enrichment : float | None Zero-centered enrichment: (coverage_share / expected_share) - 1 n_exposed_in_group : int Surface-exposed residues in this group n_residues_in_group : int Total residues in this group (exposed + buried) polymer_contributions : dict[str, float] Breakdown of coverage by polymer type: {"SBMA": 0.35, "EGMA": 0.65} Values sum to 1.0 (fraction of contacts to this group from each polymer) """ protein_group: str total_contact_frames: int = Field( default=0, description="Sum of contact frames from ALL polymer types to this group", ) coverage_share: float = Field( default=0.0, description="Fraction of all polymer contacts that went to this group", ) expected_share: float = Field( default=0.0, description="Expected coverage based on protein surface availability", ) coverage_enrichment: float | None = Field( default=None, description="Zero-centered enrichment: (coverage_share / expected_share) - 1", ) n_exposed_in_group: int = Field( default=0, description="Surface-exposed residues in this group", ) n_residues_in_group: int = Field( default=0, description="Total residues in this group", ) polymer_contributions: dict[str, float] = Field( default_factory=dict, description="Fraction of contacts to this group from each polymer type (sums to 1.0)", )
[docs] class PartitionCoverageEntry(BaseModel): """Coverage metrics for one element in a partition. A partition element is a mutually exclusive subset of protein residues. Within a partition, all elements together cover the entire protein surface exactly once (no residue is counted in multiple elements). This ensures that: - coverage_share sums to 1.0 across all elements in the partition - expected_share sums to 1.0 across all elements in the partition - enrichment is mathematically valid (no inflated denominators) Attributes ---------- partition_element : str Name of this partition element (e.g., "aromatic", "lid_helix_5", "rest_of_protein") total_contact_frames : int Sum of contact frames from ALL polymer types to residues in this element coverage_share : float Fraction of all polymer contacts that went to this element. Sums to 1.0 across all elements in the partition. expected_share : float Expected coverage based on surface availability (n_exposed / total_exposed). Sums to 1.0 across all elements in the partition. coverage_enrichment : float | None Zero-centered enrichment: (coverage_share / expected_share) - 1 n_exposed_in_element : int Number of surface-exposed residues in this element n_residues_in_element : int Total residues in this element (exposed + buried) polymer_contributions : dict[str, float] Breakdown of coverage by polymer type (sums to 1.0 for this element) """ partition_element: str total_contact_frames: int = Field( default=0, description="Sum of contact frames from ALL polymer types to this element", ) coverage_share: float = Field( default=0.0, description="Fraction of all polymer contacts that went to this element", ) expected_share: float = Field( default=0.0, description="Expected coverage based on protein surface availability", ) coverage_enrichment: float | None = Field( default=None, description="Zero-centered enrichment: (coverage_share / expected_share) - 1", ) n_exposed_in_element: int = Field( default=0, description="Surface-exposed residues in this element", ) n_residues_in_element: int = Field( default=0, description="Total residues in this element", ) polymer_contributions: dict[str, float] = Field( default_factory=dict, description="Fraction of contacts to this element from each polymer type", )
[docs] class PartitionCoverageResult(BaseModel): """Coverage analysis for a complete partition of the protein surface. A partition divides the protein surface into mutually exclusive regions that together cover the entire surface. This ensures valid enrichment calculations where both coverage_share and expected_share sum to 1.0. Partition Types --------------- - **aa_class**: 5-way partition by amino acid class (aromatic, polar, nonpolar, charged_positive, charged_negative) - **binary_custom**: 2-way partition for a custom group vs rest_of_protein (e.g., "lid_helix_5" vs "rest_of_protein") - **combined_custom**: N+1 way partition with all non-overlapping custom groups plus "rest_of_protein" Attributes ---------- partition_name : str Descriptive name (e.g., "aa_class", "lid_helix_5_vs_rest") partition_type : str One of: "aa_class", "binary_custom", "combined_custom" entries : list[PartitionCoverageEntry] Coverage metrics for each element in the partition total_coverage_share : float Validation check: should be ~1.0 total_expected_share : float Validation check: should be ~1.0 """ partition_name: str partition_type: Literal["aa_class", "binary_custom", "combined_custom", "user_defined"] entries: list[PartitionCoverageEntry] = Field(default_factory=list) total_coverage_share: float = Field( default=1.0, description="Sum of coverage_share across elements (validation: should be ~1.0)", ) total_expected_share: float = Field( default=1.0, description="Sum of expected_share across elements (validation: should be ~1.0)", )
[docs] def to_dataframe(self) -> "pd.DataFrame": """Convert to pandas DataFrame for analysis/plotting.""" import pandas as pd return pd.DataFrame([e.model_dump() for e in self.entries])
[docs] def coverage_enrichment_dict(self) -> dict[str, float]: """Get coverage enrichment as dict: {element: enrichment}.""" return { e.partition_element: ( e.coverage_enrichment if e.coverage_enrichment is not None else 0.0 ) for e in self.entries }
[docs] def coverage_share_dict(self) -> dict[str, float]: """Get coverage shares as dict: {element: share}.""" return {e.partition_element: e.coverage_share for e in self.entries}
[docs] def expected_share_dict(self) -> dict[str, float]: """Get expected shares as dict: {element: share}.""" return {e.partition_element: e.expected_share for e in self.entries}
[docs] def get_entry(self, element_name: str) -> PartitionCoverageEntry | None: """Get the entry for a specific partition element.""" for entry in self.entries: if entry.partition_element == element_name: return entry return None
[docs] def element_names(self) -> list[str]: """Get list of partition element names.""" return [e.partition_element for e in self.entries]
[docs] class SystemCoverageResult(BaseModel): """System-level coverage analysis with proper partition structure. This result uses partitions to ensure mathematically valid enrichment calculations. A partition divides the protein surface into mutually exclusive regions, avoiding the overlap bug where custom groups and AA class groups can inflate the expected_share denominator. Partition Strategy ------------------ 1. **AA Class Partition** (always computed): 5-way partition by amino acid class. Every surface residue belongs to exactly one class. 2. **Binary Custom Partitions** (per custom group): Each custom group is compared to "rest_of_protein". This answers: "Does my lid_helix_5 have enriched polymer contacts vs non-lid regions?" 3. **Combined Custom Partition** (optional): If custom groups don't overlap, all custom groups + rest_of_protein form a single partition. If groups overlap, this is not computed and an error is raised if explicitly requested. 4. **User-Defined Partitions** (from protein_partitions config): Custom partitions specified by the user in the YAML config. Each partition references groups from protein_groups and must be mutually exclusive. 'rest_of_protein' is auto-added if the groups don't cover all exposed protein residues. One plot per partition is generated. Attributes ---------- aa_class_coverage : PartitionCoverageResult 5-way partition by amino acid class. Always computed. custom_group_coverages : dict[str, PartitionCoverageResult] Binary partitions for each custom group vs rest_of_protein. Keys are custom group names. combined_custom_coverage : PartitionCoverageResult | None All custom groups + rest_of_protein as a single partition. Only computed if custom groups don't overlap. user_defined_partitions : dict[str, PartitionCoverageResult] User-defined partitions from protein_partitions config. Keys are partition names, values are the computed coverage partitions. 'rest_of_protein' is auto-added if groups don't fully cover the protein. n_frames : int Total frames analyzed total_contact_frames : int Sum of all polymer contacts across all groups total_exposed_residues : int Number of surface-exposed protein residues surface_exposure_threshold : float | None SASA threshold used for surface filtering custom_group_selections : dict[str, str] Custom group name to MDAnalysis selection (for metadata) polymer_types_included : list[str] Polymer types that contributed to coverage has_overlapping_custom_groups : bool True if custom groups share residues (combined partition not computed) overlapping_group_pairs : list[tuple[str, str]] Pairs of custom groups that overlap (for diagnostics) schema_version : int Schema version (2 = partition-based) """ aa_class_coverage: PartitionCoverageResult custom_group_coverages: dict[str, PartitionCoverageResult] = Field(default_factory=dict) combined_custom_coverage: PartitionCoverageResult | None = None user_defined_partitions: dict[str, PartitionCoverageResult] = Field( default_factory=dict, description=( "User-defined partitions from protein_partitions config. " "Each partition contains mutually exclusive groups defined by the user, " "with 'rest_of_protein' auto-added if groups don't cover all protein residues." ), ) # Metadata n_frames: int = 0 total_contact_frames: int = Field( default=0, description="Sum of all polymer contacts across all groups", ) total_exposed_residues: int = 0 surface_exposure_threshold: float | None = None custom_group_selections: dict[str, str] = Field(default_factory=dict) polymer_types_included: list[str] = Field(default_factory=list) has_overlapping_custom_groups: bool = False overlapping_group_pairs: list[tuple[str, str]] = Field(default_factory=list) schema_version: int = 2
[docs] def get_aa_class_enrichment(self, aa_class: str) -> float | None: """Get coverage enrichment for an AA class. Parameters ---------- aa_class : str One of: aromatic, polar, nonpolar, charged_positive, charged_negative Returns ------- float | None Coverage enrichment, or None if not found """ entry = self.aa_class_coverage.get_entry(aa_class) return entry.coverage_enrichment if entry else None
[docs] def get_custom_group_enrichment(self, group_name: str) -> float | None: """Get coverage enrichment for a custom group (vs rest_of_protein). Parameters ---------- group_name : str Custom group name (e.g., "lid_helix_5") Returns ------- float | None Coverage enrichment for the custom group, or None if not found """ if group_name not in self.custom_group_coverages: return None partition = self.custom_group_coverages[group_name] entry = partition.get_entry(group_name) return entry.coverage_enrichment if entry else None
[docs] def aa_class_enrichment_dict(self) -> dict[str, float]: """Get AA class enrichments as dict: {aa_class: enrichment}.""" return self.aa_class_coverage.coverage_enrichment_dict()
[docs] def custom_group_enrichment_dict(self) -> dict[str, float]: """Get custom group enrichments as dict: {group_name: enrichment}. Each custom group's enrichment is relative to rest_of_protein. """ result = {} for group_name, partition in self.custom_group_coverages.items(): entry = partition.get_entry(group_name) if entry and entry.coverage_enrichment is not None: result[group_name] = entry.coverage_enrichment else: result[group_name] = 0.0 return result
[docs] def aa_class_names(self) -> list[str]: """Get list of AA class names in canonical order.""" canonical_order = ["aromatic", "polar", "nonpolar", "charged_positive", "charged_negative"] names = self.aa_class_coverage.element_names() return [n for n in canonical_order if n in names]
[docs] def custom_group_names(self) -> list[str]: """Get list of custom group names.""" return sorted(self.custom_group_coverages.keys())
[docs] def user_partition_names(self) -> list[str]: """Get list of user-defined partition names.""" return sorted(self.user_defined_partitions.keys())
[docs] def get_user_partition(self, partition_name: str) -> PartitionCoverageResult | None: """Get a user-defined partition by name. Parameters ---------- partition_name : str Name of the partition (e.g., "lid_helices") Returns ------- PartitionCoverageResult | None The partition result, or None if not found """ return self.user_defined_partitions.get(partition_name)
[docs] def save(self, path: str | Path) -> None: """Save to JSON file. Parameters ---------- path : str or Path Output path for JSON file """ Path(path).write_text(json.dumps(self.model_dump(), indent=2)) logger.info(f"Saved system coverage result to {path}")
[docs] @classmethod def load(cls, path: str | Path) -> "SystemCoverageResult": """Load from JSON file. Parameters ---------- path : str or Path Path to JSON file Returns ------- SystemCoverageResult Loaded result """ data = json.loads(Path(path).read_text()) return cls.model_validate(data)
[docs] class PartitionBindingEntry(BaseModel): """Binding metrics for one partition element for a specific polymer type. A partition element is a mutually exclusive subset of protein residues. Within a partition, all elements together cover the entire protein surface exactly once (no residue is counted in multiple elements). This entry is for a SINGLE polymer type, answering: "What fraction of SBMA's contacts go to aromatic residues?" This ensures that: - contact_share sums to 1.0 across all elements in the partition (for this polymer) - expected_share sums to 1.0 across all elements in the partition - enrichment is mathematically valid (no inflated denominators) Attributes ---------- partition_element : str Name of this partition element (e.g., "aromatic", "lid_helix_5", "rest_of_protein") polymer_type : str Polymer type this entry is for (e.g., "SBM", "EGM") total_contact_frames : int Sum of contact frames from THIS polymer type to residues in this element contact_share : float Fraction of this polymer's contacts that went to this element. Sums to 1.0 across all elements in the partition (for this polymer). expected_share : float Expected share based on surface availability (n_exposed / total_exposed). Sums to 1.0 across all elements in the partition. enrichment : float | None Zero-centered enrichment: (contact_share / expected_share) - 1 n_exposed_in_element : int Number of surface-exposed residues in this element n_residues_in_element : int Total residues in this element (exposed + buried) n_residues_contacted : int Number of exposed residues that had at least one contact from this polymer """ partition_element: str polymer_type: str total_contact_frames: int = Field( default=0, description="Sum of contact frames from THIS polymer type to this element", ) contact_share: float = Field( default=0.0, description="Fraction of this polymer's contacts that went to this element", ) expected_share: float = Field( default=0.0, description="Expected share based on protein surface availability", ) enrichment: float | None = Field( default=None, description="Zero-centered enrichment: (contact_share / expected_share) - 1", ) n_exposed_in_element: int = Field( default=0, description="Surface-exposed residues in this element", ) n_residues_in_element: int = Field( default=0, description="Total residues in this element", ) n_residues_contacted: int = Field( default=0, description="Exposed residues contacted by this polymer type", )
[docs] class PartitionBindingResult(BaseModel): """Binding preference for a complete partition for ONE polymer type. A partition divides the protein surface into mutually exclusive regions that together cover the entire surface. This class stores the binding preference of a single polymer type across all partition elements. This ensures valid enrichment calculations where both contact_share and expected_share sum to 1.0. Partition Types --------------- - **aa_class**: 5-way partition by amino acid class (aromatic, polar, nonpolar, charged_positive, charged_negative) - **user_defined**: N+1 way partition with user-specified groups plus "rest_of_protein" (auto-added if groups don't cover all residues) Attributes ---------- partition_name : str Descriptive name (e.g., "aa_class", "lid_helices") partition_type : str One of: "aa_class", "user_defined" polymer_type : str Polymer type this result is for (e.g., "SBM", "EGM") entries : list[PartitionBindingEntry] Binding metrics for each element in the partition total_contact_share : float Validation check: should be ~1.0 total_expected_share : float Validation check: should be ~1.0 total_contact_frames : int Total contact frames from this polymer type (across all elements) """ partition_name: str partition_type: Literal["aa_class", "user_defined"] polymer_type: str entries: list[PartitionBindingEntry] = Field(default_factory=list) total_contact_share: float = Field( default=1.0, description="Sum of contact_share across elements (validation: should be ~1.0)", ) total_expected_share: float = Field( default=1.0, description="Sum of expected_share across elements (validation: should be ~1.0)", ) total_contact_frames: int = Field( default=0, description="Total contact frames from this polymer type", )
[docs] def to_dataframe(self) -> "pd.DataFrame": """Convert to pandas DataFrame for analysis/plotting.""" import pandas as pd return pd.DataFrame([e.model_dump() for e in self.entries])
[docs] def enrichment_dict(self) -> dict[str, float]: """Get enrichment as dict: {element: enrichment}.""" return { e.partition_element: (e.enrichment if e.enrichment is not None else 0.0) for e in self.entries }
[docs] def contact_share_dict(self) -> dict[str, float]: """Get contact shares as dict: {element: share}.""" return {e.partition_element: e.contact_share for e in self.entries}
[docs] def expected_share_dict(self) -> dict[str, float]: """Get expected shares as dict: {element: share}.""" return {e.partition_element: e.expected_share for e in self.entries}
[docs] def get_entry(self, element_name: str) -> PartitionBindingEntry | None: """Get the entry for a specific partition element.""" for entry in self.entries: if entry.partition_element == element_name: return entry return None
[docs] def element_names(self) -> list[str]: """Get list of partition element names.""" return [e.partition_element for e in self.entries]
[docs] class PolymerBindingPreferenceResult(BaseModel): """Per-polymer binding preference using proper partition structure. This result stores binding preference for ALL polymer types, with each polymer having its own partition-based enrichment calculations. Unlike SystemCoverageResult (which collapses all polymer contacts), this maintains per-polymer data to answer: "Does SBMA prefer aromatic residues more than EGMA does?" Partition Strategy (per polymer type) ------------------------------------- 1. **AA Class Partition** (always computed): 5-way partition by amino acid class. Every surface residue belongs to exactly one class. Each polymer type gets its own enrichment values. 2. **User-Defined Partitions** (from protein_partitions config): Custom partitions specified by the user. Each partition references groups from protein_groups. 'rest_of_protein' is auto-added if groups don't cover all exposed protein residues. Each polymer type gets its own enrichment values per partition. Attributes ---------- aa_class_binding : dict[str, PartitionBindingResult] AA class partition binding for each polymer type. Keys are polymer type names (e.g., "SBM", "EGM"). user_defined_partitions : dict[str, dict[str, PartitionBindingResult]] User-defined partitions for each polymer type. Outer keys are partition names, inner keys are polymer types. Example: {"lid_helices": {"SBM": ..., "EGM": ...}} n_frames : int Total frames analyzed total_exposed_residues : int Number of surface-exposed protein residues surface_exposure_threshold : float | None SASA threshold used for surface filtering polymer_types : list[str] Polymer types included in this result polymer_composition : PolymerComposition | None Polymer composition metadata schema_version : int Schema version (5 = partition-based binding preference) """ aa_class_binding: dict[str, PartitionBindingResult] = Field( default_factory=dict, description="AA class partition binding for each polymer type", ) user_defined_partitions: dict[str, dict[str, PartitionBindingResult]] = Field( default_factory=dict, description=( "User-defined partitions for each polymer type. " "Outer key: partition name, inner key: polymer type." ), ) # Metadata n_frames: int = 0 total_exposed_residues: int = 0 surface_exposure_threshold: float | None = None polymer_types: list[str] = Field(default_factory=list) polymer_composition: PolymerComposition | None = None protein_groups_used: dict[str, str] = Field(default_factory=dict) schema_version: int = 5 # Version 5: partition-based per-polymer binding
[docs] def get_aa_class_enrichment(self, polymer_type: str, aa_class: str) -> float | None: """Get binding enrichment for an AA class for a specific polymer. Parameters ---------- polymer_type : str Polymer type (e.g., "SBM") aa_class : str One of: aromatic, polar, nonpolar, charged_positive, charged_negative Returns ------- float | None Binding enrichment, or None if not found """ if polymer_type not in self.aa_class_binding: return None entry = self.aa_class_binding[polymer_type].get_entry(aa_class) return entry.enrichment if entry else None
[docs] def get_user_partition_enrichment( self, partition_name: str, polymer_type: str, element_name: str ) -> float | None: """Get binding enrichment for a user partition element for a specific polymer. Parameters ---------- partition_name : str Name of the user-defined partition (e.g., "lid_helices") polymer_type : str Polymer type (e.g., "SBM") element_name : str Element within the partition (e.g., "lid_helix_5") Returns ------- float | None Binding enrichment, or None if not found """ if partition_name not in self.user_defined_partitions: return None if polymer_type not in self.user_defined_partitions[partition_name]: return None entry = self.user_defined_partitions[partition_name][polymer_type].get_entry(element_name) return entry.enrichment if entry else None
[docs] def aa_class_enrichment_matrix(self) -> dict[str, dict[str, float]]: """Get AA class enrichments as nested dict: {polymer_type: {aa_class: enrichment}}. Returns ------- dict[str, dict[str, float]] Nested mapping of enrichment values. """ result: dict[str, dict[str, float]] = {} for poly_type, partition_result in self.aa_class_binding.items(): result[poly_type] = partition_result.enrichment_dict() return result
[docs] def user_partition_enrichment_matrix(self, partition_name: str) -> dict[str, dict[str, float]]: """Get user partition enrichments as nested dict. Parameters ---------- partition_name : str Name of the user-defined partition Returns ------- dict[str, dict[str, float]] {polymer_type: {element_name: enrichment}} """ if partition_name not in self.user_defined_partitions: return {} result: dict[str, dict[str, float]] = {} for poly_type, partition_result in self.user_defined_partitions[partition_name].items(): result[poly_type] = partition_result.enrichment_dict() return result
[docs] def aa_class_names(self) -> list[str]: """Get list of AA class names in canonical order.""" canonical_order = ["aromatic", "polar", "nonpolar", "charged_positive", "charged_negative"] if not self.aa_class_binding: return [] # Get from first polymer type first_poly = next(iter(self.aa_class_binding.values())) names = first_poly.element_names() return [n for n in canonical_order if n in names]
[docs] def user_partition_names(self) -> list[str]: """Get list of user-defined partition names.""" return sorted(self.user_defined_partitions.keys())
[docs] def save(self, path: str | Path) -> None: """Save to JSON file.""" Path(path).write_text(json.dumps(self.model_dump(), indent=2)) logger.info(f"Saved polymer binding preference result to {path}")
[docs] @classmethod def load(cls, path: str | Path) -> "PolymerBindingPreferenceResult": """Load from JSON file.""" data = json.loads(Path(path).read_text()) return cls.model_validate(data)
[docs] class AggregatedPartitionBindingEntry(BaseModel): """Aggregated binding metrics for one partition element for a specific polymer type. Contains mean ± SEM across replicates for binding preference, enabling statistical comparison of binding enrichment across conditions. Attributes ---------- partition_element : str Element name (e.g., "aromatic", "lid_helix_5", "rest_of_protein") polymer_type : str Polymer type this entry is for (e.g., "SBM", "EGM") mean_contact_share : float Mean contact share across replicates sem_contact_share : float Standard error of contact share mean_enrichment : float | None Mean enrichment across replicates sem_enrichment : float | None Standard error of enrichment per_replicate_enrichments : list[float] Enrichment values from each replicate expected_share : float Expected share based on surface availability n_exposed_in_element : int Surface-exposed residues in this element n_residues_in_element : int Total residues in this element n_replicates : int Number of replicates with valid data """ partition_element: str polymer_type: str mean_contact_share: float = Field( default=0.0, description="Mean contact share across replicates", ) sem_contact_share: float = Field( default=0.0, description="Standard error of contact share", ) mean_enrichment: float | None = Field( default=None, description="Mean enrichment across replicates", ) sem_enrichment: float | None = Field( default=None, description="Standard error of enrichment", ) per_replicate_enrichments: list[float] = Field( default_factory=list, description="Enrichment values from each replicate", ) expected_share: float = Field( default=0.0, description="Expected share based on surface availability", ) n_exposed_in_element: int = Field( default=0, description="Surface-exposed residues in this element", ) n_residues_in_element: int = Field( default=0, description="Total residues in this element", ) n_replicates: int = Field( default=0, description="Number of replicates with valid data", )
[docs] class AggregatedPartitionBindingResult(BaseModel): """Aggregated binding preference for a partition for ONE polymer type. Contains aggregated statistics across replicates for all partition elements. Attributes ---------- partition_name : str Descriptive name (e.g., "aa_class", "lid_helices") partition_type : str One of: "aa_class", "user_defined" polymer_type : str Polymer type this result is for entries : list[AggregatedPartitionBindingEntry] Aggregated binding metrics for each element mean_total_contact_share : float Mean of total_contact_share across replicates (validation: should be ~1.0) n_replicates : int Number of replicates """ partition_name: str partition_type: Literal["aa_class", "user_defined"] polymer_type: str entries: list[AggregatedPartitionBindingEntry] = Field(default_factory=list) mean_total_contact_share: float = Field( default=1.0, description="Mean sum of contact_share across elements (should be ~1.0)", ) n_replicates: int = Field(default=0)
[docs] def enrichment_dict(self) -> dict[str, float]: """Get mean enrichment as dict: {element: mean_enrichment}.""" return { e.partition_element: (e.mean_enrichment if e.mean_enrichment is not None else 0.0) for e in self.entries }
[docs] def element_names(self) -> list[str]: """Get list of partition element names.""" return [e.partition_element for e in self.entries]
[docs] class AggregatedPolymerBindingPreferenceResult(BaseModel): """Aggregated per-polymer binding preference across replicates. Contains mean ± SEM for all partition-based binding metrics. Attributes ---------- aa_class_binding : dict[str, AggregatedPartitionBindingResult] Aggregated AA class partition binding for each polymer type. user_defined_partitions : dict[str, dict[str, AggregatedPartitionBindingResult]] Aggregated user-defined partitions for each polymer type. n_replicates : int Number of replicates total_exposed_residues : int Number of surface-exposed protein residues surface_exposure_threshold : float | None SASA threshold used polymer_types : list[str] Polymer types included schema_version : int Schema version """ aa_class_binding: dict[str, AggregatedPartitionBindingResult] = Field( default_factory=dict, description="Aggregated AA class partition binding for each polymer type", ) user_defined_partitions: dict[str, dict[str, AggregatedPartitionBindingResult]] = Field( default_factory=dict, description="Aggregated user-defined partitions for each polymer type", ) n_replicates: int = 0 total_exposed_residues: int = 0 surface_exposure_threshold: float | None = None polymer_types: list[str] = Field(default_factory=list) schema_version: int = 5
[docs] def aa_class_enrichment_matrix(self) -> dict[str, dict[str, float]]: """Get AA class enrichments as nested dict: {polymer_type: {aa_class: mean_enrichment}}.""" result: dict[str, dict[str, float]] = {} for poly_type, partition_result in self.aa_class_binding.items(): result[poly_type] = partition_result.enrichment_dict() return result
[docs] def aa_class_names(self) -> list[str]: """Get list of AA class names in canonical order.""" canonical_order = ["aromatic", "polar", "nonpolar", "charged_positive", "charged_negative"] if not self.aa_class_binding: return [] first_poly = next(iter(self.aa_class_binding.values())) names = first_poly.element_names() return [n for n in canonical_order if n in names]
[docs] def user_partition_names(self) -> list[str]: """Get list of user-defined partition names.""" return sorted(self.user_defined_partitions.keys())
[docs] class AggregatedPartitionCoverageEntry(BaseModel): """Aggregated coverage for one partition element across replicates. Contains mean ± SEM for coverage metrics, enabling statistical comparison of coverage enrichment across conditions. Attributes ---------- partition_element : str Element name (e.g., "aromatic", "lid_helix_5", "rest_of_protein") mean_coverage_share : float Mean coverage share across replicates sem_coverage_share : float Standard error of coverage share mean_coverage_enrichment : float | None Mean coverage enrichment across replicates sem_coverage_enrichment : float | None Standard error of coverage enrichment per_replicate_enrichments : list[float] Coverage enrichment values from each replicate expected_share : float Expected coverage based on surface availability n_exposed_in_element : int Surface-exposed residues in this element n_residues_in_element : int Total residues in this element n_replicates : int Number of replicates with valid data mean_polymer_contributions : dict[str, float] Mean polymer contributions across replicates """ partition_element: str mean_coverage_share: float = Field( default=0.0, description="Mean coverage share across replicates", ) sem_coverage_share: float = Field( default=0.0, description="Standard error of coverage share", ) mean_coverage_enrichment: float | None = Field( default=None, description="Mean coverage enrichment across replicates", ) sem_coverage_enrichment: float | None = Field( default=None, description="Standard error of coverage enrichment", ) per_replicate_enrichments: list[float] = Field( default_factory=list, description="Coverage enrichment values from each replicate", ) expected_share: float = Field( default=0.0, description="Expected coverage based on surface availability", ) n_exposed_in_element: int = Field( default=0, description="Surface-exposed residues in this element", ) n_residues_in_element: int = Field( default=0, description="Total residues in this element", ) n_replicates: int = Field( default=0, description="Number of replicates with valid data", ) mean_polymer_contributions: dict[str, float] = Field( default_factory=dict, description="Mean polymer contributions across replicates", )
[docs] class AggregatedPartitionCoverageResult(BaseModel): """Aggregated coverage for a partition across replicates. Contains mean ± SEM for all elements in the partition. Attributes ---------- partition_name : str Name of the partition partition_type : str One of: "aa_class", "binary_custom", "combined_custom" entries : list[AggregatedPartitionCoverageEntry] Aggregated coverage for each element n_replicates : int Number of replicates aggregated """ partition_name: str partition_type: Literal["aa_class", "binary_custom", "combined_custom", "user_defined"] entries: list[AggregatedPartitionCoverageEntry] = Field(default_factory=list) n_replicates: int = 0
[docs] def to_dataframe(self) -> "pd.DataFrame": """Convert to pandas DataFrame.""" import pandas as pd return pd.DataFrame([e.model_dump() for e in self.entries])
[docs] def coverage_enrichment_dict(self) -> dict[str, float]: """Get mean coverage enrichment as dict: {element: enrichment}.""" return { e.partition_element: ( e.mean_coverage_enrichment if e.mean_coverage_enrichment is not None else 0.0 ) for e in self.entries }
[docs] def get_entry(self, element_name: str) -> AggregatedPartitionCoverageEntry | None: """Get entry for a specific partition element.""" for entry in self.entries: if entry.partition_element == element_name: return entry return None
[docs] def element_names(self) -> list[str]: """Get list of partition element names.""" return [e.partition_element for e in self.entries]
[docs] class AggregatedSystemCoverageResult(BaseModel): """System coverage aggregated across replicates (schema v2). Contains aggregated partition coverages with mean ± SEM statistics for statistical comparison between conditions. Attributes ---------- aa_class_coverage : AggregatedPartitionCoverageResult Aggregated 5-way AA class partition custom_group_coverages : dict[str, AggregatedPartitionCoverageResult] Aggregated binary partitions for each custom group combined_custom_coverage : AggregatedPartitionCoverageResult | None Aggregated combined custom partition (if applicable) n_replicates : int Number of replicates aggregated total_exposed_residues : int Number of surface-exposed protein residues surface_exposure_threshold : float | None SASA threshold used for surface filtering custom_group_selections : dict[str, str] Custom group name to MDAnalysis selection polymer_types_included : list[str] Polymer types that contributed to coverage has_overlapping_custom_groups : bool True if custom groups share residues schema_version : int Schema version (2 = partition-based) """ aa_class_coverage: AggregatedPartitionCoverageResult custom_group_coverages: dict[str, AggregatedPartitionCoverageResult] = Field( default_factory=dict ) combined_custom_coverage: AggregatedPartitionCoverageResult | None = None user_defined_partitions: dict[str, AggregatedPartitionCoverageResult] = Field( default_factory=dict, description=( "Aggregated user-defined partitions from protein_partitions config. " "Keys are partition names, values are aggregated coverage results." ), ) # Metadata n_replicates: int = 0 total_exposed_residues: int = 0 surface_exposure_threshold: float | None = None custom_group_selections: dict[str, str] = Field(default_factory=dict) polymer_types_included: list[str] = Field(default_factory=list) has_overlapping_custom_groups: bool = False schema_version: int = 2
[docs] def get_aa_class_enrichment(self, aa_class: str) -> float | None: """Get mean coverage enrichment for an AA class.""" entry = self.aa_class_coverage.get_entry(aa_class) return entry.mean_coverage_enrichment if entry else None
[docs] def get_custom_group_enrichment(self, group_name: str) -> float | None: """Get mean coverage enrichment for a custom group (vs rest_of_protein).""" if group_name not in self.custom_group_coverages: return None partition = self.custom_group_coverages[group_name] entry = partition.get_entry(group_name) return entry.mean_coverage_enrichment if entry else None
[docs] def aa_class_enrichment_dict(self) -> dict[str, float]: """Get AA class mean enrichments as dict: {aa_class: enrichment}.""" return self.aa_class_coverage.coverage_enrichment_dict()
[docs] def custom_group_enrichment_dict(self) -> dict[str, float]: """Get custom group mean enrichments as dict: {group_name: enrichment}.""" result = {} for group_name, partition in self.custom_group_coverages.items(): entry = partition.get_entry(group_name) if entry and entry.mean_coverage_enrichment is not None: result[group_name] = entry.mean_coverage_enrichment else: result[group_name] = 0.0 return result
[docs] def aa_class_names(self) -> list[str]: """Get list of AA class names in canonical order.""" canonical_order = ["aromatic", "polar", "nonpolar", "charged_positive", "charged_negative"] names = self.aa_class_coverage.element_names() return [n for n in canonical_order if n in names]
[docs] def custom_group_names(self) -> list[str]: """Get list of custom group names.""" return sorted(self.custom_group_coverages.keys())
[docs] def save(self, path: str | Path) -> None: """Save to JSON file.""" Path(path).write_text(json.dumps(self.model_dump(), indent=2)) logger.info(f"Saved aggregated system coverage to {path}")
[docs] @classmethod def load(cls, path: str | Path) -> "AggregatedSystemCoverageResult": """Load from JSON file.""" data = json.loads(Path(path).read_text()) return cls.model_validate(data)