Source code for polyzymd.compare.settings

"""Analysis and comparison settings for the comparison workflow.

This module defines the concrete settings classes for each analysis type,
registered via the AnalysisSettingsRegistry and ComparisonSettingsRegistry.

Analysis Settings (WHAT to analyze):
- RMSFAnalysisSettings: RMSF calculation parameters
- DistancesAnalysisSettings: Distance pair monitoring parameters
- CatalyticTriadAnalysisSettings: Active site distance analysis
- ContactsAnalysisSettings: Polymer-protein contact parameters

Comparison Settings (HOW to compare):
- RMSFComparisonSettings: (no comparison-specific params)
- DistancesComparisonSettings: (no comparison-specific params)
- CatalyticTriadComparisonSettings: (no comparison-specific params)
- ContactsComparisonSettings: FDR, effect size, top residues

All settings classes are auto-registered on module import.
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Optional

from pydantic import Field, field_validator, model_validator

from polyzymd.analysis.core.constants import (
    DEFAULT_CONTACT_CUTOFF,
    DEFAULT_DISTANCE_THRESHOLD,
    DEFAULT_SURFACE_EXPOSURE_THRESHOLD,
)
from polyzymd.analysis.core.registry import (
    AnalysisSettingsRegistry,
    BaseAnalysisSettings,
    BaseComparisonSettings,
    ComparisonSettingsRegistry,
)

if TYPE_CHECKING:
    from polyzymd.analysis.core.alignment import AlignmentConfig

# ============================================================================
# RMSF Settings
# ============================================================================


[docs] @AnalysisSettingsRegistry.register("rmsf") class RMSFAnalysisSettings(BaseAnalysisSettings): """RMSF analysis settings. Attributes ---------- selection : str MDAnalysis selection string for RMSF calculation. reference_mode : str Reference structure mode: centroid, average, frame, or external. reference_frame : int, optional Frame number if reference_mode is 'frame' (1-indexed). reference_file : str, optional Path to external PDB file if reference_mode is 'external'. """ selection: str = Field( default="protein and name CA", description="MDAnalysis selection string for RMSF calculation", ) reference_mode: str = Field( default="centroid", description="Reference structure mode: centroid, average, frame, or external", ) reference_frame: Optional[int] = Field( default=None, description="Frame number if reference_mode is 'frame' (1-indexed)", ) reference_file: Optional[str] = Field( default=None, description=( "Path to external PDB file if reference_mode is 'external'. " "The PDB must contain protein atoms matching the simulation topology." ), )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "rmsf"
[docs] @field_validator("reference_mode", mode="after") @classmethod def validate_reference_mode(cls, v: str) -> str: """Validate reference mode is one of the allowed values.""" valid = {"centroid", "average", "frame", "external"} if v not in valid: raise ValueError(f"reference_mode must be one of {valid}, got '{v}'") return v
[docs] @model_validator(mode="after") def validate_reference_params(self) -> "RMSFAnalysisSettings": """Validate reference_frame and reference_file for their modes.""" if self.reference_mode == "frame" and self.reference_frame is None: raise ValueError("reference_frame is required when reference_mode is 'frame'") if self.reference_mode == "external" and self.reference_file is None: raise ValueError( "reference_file is required when reference_mode is 'external'. " "Provide a path to the external PDB reference structure." ) return self
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary.""" result = { "enabled": True, "selection": self.selection, "reference_mode": self.reference_mode, } if self.reference_frame is not None: result["reference_frame"] = self.reference_frame if self.reference_file is not None: result["reference_file"] = self.reference_file return result
[docs] @ComparisonSettingsRegistry.register("rmsf") class RMSFComparisonSettings(BaseComparisonSettings): """Comparison settings for RMSF analysis. Currently empty — all RMSF comparison behavior uses defaults from ``BaseComparisonSettings``. This class exists as an extension point: add fields here when RMSF-specific comparison parameters are needed (e.g., a per-residue significance threshold) without modifying the orchestrator or other comparison types. """
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "rmsf"
# ============================================================================ # Distance Analysis Settings # ============================================================================
[docs] class DistancePairSettings(BaseAnalysisSettings): """Configuration for a single distance pair. Attributes ---------- label : str Human-readable label for this pair. selection_a : str First atom/point selection. selection_b : str Second atom/point selection. threshold : float, optional Per-pair distance threshold (Angstroms). If None, uses the global threshold from DistancesAnalysisSettings. below_label : str, optional Display label for the "below threshold" state (e.g. ``"Bound"``, ``"Closed"``). When ``None``, defaults to ``"Below {threshold}Å"``. above_label : str, optional Display label for the "above threshold" state (e.g. ``"Unbound"``, ``"Open"``). When ``None``, defaults to ``"Above {threshold}Å"``. """ label: str = Field(..., description="Human-readable label for this pair") selection_a: str = Field(..., description="First atom/point selection") selection_b: str = Field(..., description="Second atom/point selection") threshold: Optional[float] = Field( default=None, description="Per-pair distance threshold (Angstroms). If None, uses global threshold.", ) below_label: Optional[str] = Field( default=None, description=( 'Display label for "below threshold" state (e.g. "Bound", "Closed"). ' 'Defaults to "Below {threshold}Å".' ), ) above_label: Optional[str] = Field( default=None, description=( 'Display label for "above threshold" state (e.g. "Unbound", "Open"). ' 'Defaults to "Above {threshold}Å".' ), )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "distance_pair"
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary.""" result: dict[str, Any] = { "label": self.label, "selection_a": self.selection_a, "selection_b": self.selection_b, } if self.threshold is not None: result["threshold"] = self.threshold if self.below_label is not None: result["below_label"] = self.below_label if self.above_label is not None: result["above_label"] = self.above_label return result
[docs] @AnalysisSettingsRegistry.register("distances") class DistancesAnalysisSettings(BaseAnalysisSettings): """Distance analysis settings. Attributes ---------- threshold : float, optional Distance threshold for contact analysis (Angstroms). pairs : list[DistancePairSettings] List of atom pairs to measure distances between. use_pbc : bool Use PBC-aware minimum image distances. Default True. align_trajectory : bool Align trajectory before distance calculation. Default True. When enabled, removes rotational drift and COM motion that can add noise to inter-domain distance measurements. alignment_selection : str MDAnalysis selection for trajectory alignment. Default: "protein and name CA". alignment_mode : str Reference mode for alignment: "centroid", "average", or "frame". Default: "centroid". alignment_frame : int, optional Reference frame (1-indexed) when alignment_mode="frame". """ threshold: Optional[float] = Field( default=DEFAULT_DISTANCE_THRESHOLD, description="Distance threshold for contact analysis (Angstroms)", ) pairs: list[DistancePairSettings] = Field( default_factory=list, description="Distance pairs to monitor" ) # PBC and alignment settings (new) use_pbc: bool = Field( default=True, description="Use PBC-aware minimum image distances", ) align_trajectory: bool = Field( default=True, description="Align trajectory before distance calculation (removes drift)", ) alignment_selection: str = Field( default="protein and name CA", description="MDAnalysis selection for trajectory alignment", ) alignment_mode: str = Field( default="centroid", description="Reference mode: centroid, average, or frame", ) alignment_frame: Optional[int] = Field( default=None, description="Reference frame (1-indexed) when alignment_mode='frame'", )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "distances"
[docs] @field_validator("pairs", mode="after") @classmethod def validate_pairs(cls, v: list[DistancePairSettings]) -> list[DistancePairSettings]: """Ensure at least one pair is defined.""" if len(v) == 0: raise ValueError("At least one distance pair must be defined") return v
[docs] @field_validator("alignment_mode", mode="after") @classmethod def validate_alignment_mode(cls, v: str) -> str: """Validate alignment mode is one of the allowed values.""" valid = {"centroid", "average", "frame"} if v not in valid: raise ValueError(f"alignment_mode must be one of {valid}, got '{v}'") return v
[docs] @model_validator(mode="after") def validate_alignment_frame_required(self) -> "DistancesAnalysisSettings": """Ensure alignment_frame is provided when alignment_mode is 'frame'.""" if ( self.align_trajectory and self.alignment_mode == "frame" and self.alignment_frame is None ): raise ValueError("alignment_frame is required when alignment_mode is 'frame'") return self
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary.""" result: dict[str, Any] = { "enabled": True, "pairs": [p.to_analysis_yaml_dict() for p in self.pairs], "use_pbc": self.use_pbc, "align_trajectory": self.align_trajectory, } if self.align_trajectory: result["alignment_selection"] = self.alignment_selection result["alignment_mode"] = self.alignment_mode if self.alignment_frame is not None: result["alignment_frame"] = self.alignment_frame return result
[docs] def get_pair_selections(self) -> list[tuple[str, str]]: """Get list of (selection_a, selection_b) tuples.""" return [(p.selection_a, p.selection_b) for p in self.pairs]
[docs] def get_pair_labels(self) -> list[str]: """Get list of pair labels.""" return [p.label for p in self.pairs]
[docs] def get_pair_thresholds(self) -> list[float | None]: """Get list of thresholds per pair, using global threshold as fallback. Returns ------- list[float | None] List of thresholds, one per pair. If a pair has no explicit threshold, the global threshold is used. If neither is set, None is returned. """ return [p.threshold if p.threshold is not None else self.threshold for p in self.pairs]
[docs] def get_alignment_config(self) -> "AlignmentConfig": """Build an AlignmentConfig from these settings. Returns ------- AlignmentConfig Configuration for trajectory alignment, ready to pass to align_trajectory() or DistanceCalculator. Notes ----- Import is done inside the method to avoid circular imports. """ from polyzymd.analysis.core.alignment import AlignmentConfig return AlignmentConfig( enabled=self.align_trajectory, reference_mode=self.alignment_mode, # type: ignore[arg-type] reference_frame=self.alignment_frame, selection=self.alignment_selection, )
[docs] @ComparisonSettingsRegistry.register("distances") class DistancesComparisonSettings(BaseComparisonSettings): """Comparison settings for distance analysis. Currently empty — all distance comparison behavior uses defaults from ``BaseComparisonSettings``. This class exists as an extension point: add fields here when distance-specific comparison parameters are needed (e.g., per-pair significance thresholds) without modifying the orchestrator or other comparison types. """
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "distances"
# ============================================================================ # Catalytic Triad Settings # ============================================================================
[docs] class TriadPairSettings(BaseAnalysisSettings): """Configuration for one distance pair in a catalytic triad/active site. Attributes ---------- label : str Human-readable label for this pair (e.g., "Asp133-His156"). selection_a : str First atom/point selection. selection_b : str Second atom/point selection. """ label: str = Field(..., description="Human-readable label for this pair") selection_a: str = Field(..., description="First atom/point selection") selection_b: str = Field(..., description="Second atom/point selection")
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "triad_pair"
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary.""" return { "label": self.label, "selection_a": self.selection_a, "selection_b": self.selection_b, }
[docs] @AnalysisSettingsRegistry.register("catalytic_triad") class CatalyticTriadAnalysisSettings(BaseAnalysisSettings): """Catalytic triad/active site analysis settings. Attributes ---------- name : str Name of the triad/active site (e.g., "LipA_catalytic_triad"). pairs : list[TriadPairSettings] Distance pairs to monitor. threshold : float Distance threshold for contact/H-bond analysis (Angstroms). description : str, optional Description of the active site. """ name: str = Field(..., description="Name of the catalytic triad/active site") pairs: list[TriadPairSettings] = Field(..., description="Distance pairs to monitor") threshold: float = Field( default=DEFAULT_DISTANCE_THRESHOLD, description="Distance threshold for contact analysis (Angstroms)", ) description: Optional[str] = Field(default=None, description="Description of the active site")
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "catalytic_triad"
[docs] @field_validator("pairs", mode="after") @classmethod def validate_pairs(cls, v: list[TriadPairSettings]) -> list[TriadPairSettings]: """Ensure at least one pair is defined.""" if len(v) == 0: raise ValueError("At least one distance pair must be defined") return v
@property def n_pairs(self) -> int: """Number of distance pairs.""" return len(self.pairs)
[docs] def get_pair_selections(self) -> list[tuple[str, str]]: """Get list of (selection_a, selection_b) tuples.""" return [(p.selection_a, p.selection_b) for p in self.pairs]
[docs] def get_pair_labels(self) -> list[str]: """Get list of pair labels.""" return [p.label for p in self.pairs]
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary.""" result: dict[str, Any] = { "enabled": True, "name": self.name, "threshold": self.threshold, "pairs": [p.to_analysis_yaml_dict() for p in self.pairs], } if self.description: result["description"] = self.description return result
[docs] @ComparisonSettingsRegistry.register("catalytic_triad") class CatalyticTriadComparisonSettings(BaseComparisonSettings): """Comparison settings for catalytic triad analysis. Currently empty — all triad comparison behavior uses defaults from ``BaseComparisonSettings``. This class exists as an extension point: add fields here when triad-specific comparison parameters are needed (e.g., functional distance thresholds) without modifying the orchestrator or other comparison types. """
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "catalytic_triad"
# ============================================================================ # Polymer-Protein Contacts Settings # ============================================================================
[docs] class BindingPreferenceFieldsMixin(BaseAnalysisSettings): """Shared fields for experimental binding-preference-derived analyses. Both ``ContactsAnalysisSettings`` and ``BindingFreeEnergyAnalysisSettings`` need identical fields for surface exposure, protein grouping, and polymer type selection. This mixin provides them once, keeping defaults in sync. Attributes ---------- surface_exposure_threshold : float Relative SASA threshold for surface exposure (0.0-1.0). enzyme_pdb_for_sasa : str, optional Path to enzyme PDB for SASA calculation. include_default_aa_groups : bool Include default AA class groupings (aromatic, polar, etc.). protein_groups : dict[str, list[int]], optional Custom protein groups as {name: [resid1, resid2, ...]}. protein_partitions : dict[str, list[str]], optional Custom partitions for system coverage comparison. polymer_type_selections : dict[str, str], optional Custom polymer type selections as {name: "MDAnalysis selection"}. """ surface_exposure_threshold: float = Field( default=DEFAULT_SURFACE_EXPOSURE_THRESHOLD, ge=0.0, le=1.0, description=("Experimental binding-preference threshold for surface exposure (0.2 = 20%)"), ) enzyme_pdb_for_sasa: Optional[str] = Field( default=None, description="Path to enzyme PDB for SASA calculation (relative to comparison.yaml)", ) include_default_aa_groups: bool = Field( default=True, description="Include default AA class groupings (aromatic, polar, nonpolar, charged)", ) protein_groups: Optional[dict[str, list[int]]] = Field( default=None, description="Custom protein groups as {name: [resid1, resid2, ...]}", ) protein_partitions: Optional[dict[str, list[str]]] = Field( default=None, description=( "Custom partitions for system coverage comparison. " "Each partition defines a mutually exclusive set of protein groups " "that will generate one comparison plot. Format: {partition_name: [group1, group2, ...]}. " "Groups must be defined in protein_groups. If groups don't cover all protein residues, " "'rest_of_protein' is auto-added. Overlapping groups within a partition cause validation error." ), ) polymer_type_selections: Optional[dict[str, str]] = Field( default=None, description="Custom polymer type selections as {name: 'MDAnalysis selection'}", )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier (override in subclass).""" raise NotImplementedError
[docs] @AnalysisSettingsRegistry.register("contacts") class ContactsAnalysisSettings(BindingPreferenceFieldsMixin): """Polymer-protein contact analysis settings. Inherits binding preference fields (surface_exposure_threshold, enzyme_pdb_for_sasa, include_default_aa_groups, protein_groups, protein_partitions, polymer_type_selections) from ``BindingPreferenceFieldsMixin``. Attributes ---------- polymer_selection : str MDAnalysis selection for polymer atoms. protein_selection : str MDAnalysis selection for protein atoms. cutoff : float Distance cutoff for contacts in Angstroms. polymer_types : list[str], optional Filter contacts by polymer residue names. grouping : str How to group protein residues: aa_class, secondary_structure, or none. compute_residence_times : bool If True, compute residence time statistics. compute_binding_preference : bool If True, compute binding preference enrichment analysis. enrichment_normalization : str **DEPRECATED** (kept for backward compatibility). Enrichment is now always normalized by protein surface availability. This field is ignored. """ polymer_selection: str = Field( default="chainID C", description="MDAnalysis selection for polymer atoms" ) protein_selection: str = Field( default="protein", description="MDAnalysis selection for protein atoms" ) cutoff: float = Field( default=DEFAULT_CONTACT_CUTOFF, description="Contact distance cutoff in Angstroms" ) polymer_types: Optional[list[str]] = Field( default=None, description="Filter by polymer residue names" ) grouping: str = Field( default="aa_class", description="Group by: aa_class, secondary_structure, or none" ) compute_residence_times: bool = Field( default=True, description="Compute residence time statistics" ) # Binding preference settings compute_binding_preference: bool = Field( default=False, description=("Compute the experimental binding preference enrichment analysis"), ) enrichment_normalization: str = Field( default="residue", description="DEPRECATED: Enrichment is now always normalized by protein surface availability. This field is ignored.", )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "contacts"
[docs] @field_validator("grouping", mode="after") @classmethod def validate_grouping(cls, v: str) -> str: """Validate grouping mode.""" valid = {"aa_class", "secondary_structure", "none"} if v not in valid: raise ValueError(f"grouping must be one of {valid}, got '{v}'") return v
[docs] @field_validator("enrichment_normalization", mode="after") @classmethod def validate_enrichment_normalization(cls, v: str) -> str: """Validate enrichment normalization method.""" valid = {"residue", "atoms"} if v not in valid: raise ValueError(f"enrichment_normalization must be one of {valid}, got '{v}'") return v
[docs] @model_validator(mode="after") def validate_protein_partitions(self) -> "ContactsAnalysisSettings": """Validate protein_partitions references and mutual exclusivity. Validates: 1. All groups referenced in partitions exist in protein_groups 2. Groups within each partition don't overlap (mutually exclusive) """ if not self.protein_partitions: return self # protein_groups must exist if partitions are defined if not self.protein_groups: raise ValueError( "protein_partitions requires protein_groups to be defined. " "Define the groups first, then reference them in partitions." ) protein_groups = self.protein_groups for partition_name, group_names in self.protein_partitions.items(): if not group_names: raise ValueError( f"Partition '{partition_name}' is empty. " "Each partition must contain at least one group." ) # Check all referenced groups exist for group_name in group_names: if group_name not in protein_groups: available = ", ".join(sorted(protein_groups.keys())) raise ValueError( f"Partition '{partition_name}' references undefined group '{group_name}'. " f"Available groups: {available}" ) # Check for overlapping groups within this partition seen_resids: dict[int, str] = {} # resid -> first group that contains it for group_name in group_names: group_resids = protein_groups[group_name] for resid in group_resids: if resid in seen_resids: raise ValueError( f"Partition '{partition_name}' has overlapping groups: " f"residue {resid} is in both '{seen_resids[resid]}' and '{group_name}'. " "Groups within a partition must be mutually exclusive." ) seen_resids[resid] = group_name return self
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary.""" result: dict[str, Any] = { "enabled": True, "polymer_selection": self.polymer_selection, "protein_selection": self.protein_selection, "cutoff": self.cutoff, "grouping": self.grouping, "compute_residence_times": self.compute_residence_times, } if self.polymer_types: result["polymer_types"] = self.polymer_types # Binding preference settings (only include if enabled) if self.compute_binding_preference: result["compute_binding_preference"] = True result["surface_exposure_threshold"] = self.surface_exposure_threshold result["include_default_aa_groups"] = self.include_default_aa_groups # Note: enrichment_normalization is deprecated and no longer included if self.enzyme_pdb_for_sasa: result["enzyme_pdb_for_sasa"] = self.enzyme_pdb_for_sasa if self.protein_groups: result["protein_groups"] = self.protein_groups if self.protein_partitions: result["protein_partitions"] = self.protein_partitions if self.polymer_type_selections: result["polymer_type_selections"] = self.polymer_type_selections return result
[docs] @ComparisonSettingsRegistry.register("contacts") class ContactsComparisonSettings(BaseComparisonSettings): """Comparison settings for polymer-protein contacts analysis. Attributes ---------- fdr_alpha : float False discovery rate alpha for Benjamini-Hochberg correction. min_effect_size : float Minimum Cohen's d effect size to highlight in reports. top_residues : int Number of top residues (by effect size) to display in console. """ fdr_alpha: float = Field( default=0.05, description="FDR alpha for Benjamini-Hochberg correction" ) min_effect_size: float = Field( default=0.5, description="Minimum Cohen's d to highlight (0.2=small, 0.5=medium, 0.8=large)", ) top_residues: int = Field( default=10, description="Number of top residues to display in console" )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "contacts"
[docs] @field_validator("fdr_alpha", mode="after") @classmethod def validate_fdr_alpha(cls, v: float) -> float: """Validate FDR alpha is in valid range.""" if not 0 < v < 1: raise ValueError(f"fdr_alpha must be between 0 and 1, got {v}") return v
# ============================================================================ # Utility Functions # ============================================================================ # ============================================================================ # Exposure Dynamics Settings # ============================================================================
[docs] @AnalysisSettingsRegistry.register("exposure") class ExposureAnalysisSettings(BaseAnalysisSettings): """Experimental exposure dynamics settings (dynamic SASA-based chaperone analysis). Attributes ---------- protein_selection : str MDAnalysis selection for protein atoms (chain A by default). polymer_selection : str MDAnalysis selection for polymer atoms (chain C by default). exposure_threshold : float Relative SASA threshold for classifying a residue as exposed. transient_lower : float Lower bound of exposure fraction for "transient" classification. transient_upper : float Upper bound of exposure fraction for "transient" classification. min_event_length : int Minimum exposed-window length (frames) to count as an event. probe_radius_nm : float Probe radius for MDTraj shrake_rupley, in nm. n_sphere_points : int Number of sphere points for shrake_rupley. protein_chain : str Chain letter for protein (default "A"). polymer_resnames : list[str], optional Subset of polymer monomer resnames to include. If None, all detected. """ protein_selection: str = Field( default="protein", description="MDAnalysis selection for protein" ) polymer_selection: str = Field( default="chainID C", description="MDAnalysis selection for polymer" ) exposure_threshold: float = Field( default=DEFAULT_SURFACE_EXPOSURE_THRESHOLD, ge=0.0, le=1.0, description="Experimental relative SASA threshold for exposed classification", ) transient_lower: float = Field( default=0.2, ge=0.0, le=1.0, description="Lower exposure fraction bound for 'transient' residues", ) transient_upper: float = Field( default=0.8, ge=0.0, le=1.0, description="Upper exposure fraction bound for 'transient' residues", ) min_event_length: int = Field( default=1, ge=1, description="Minimum exposed-window length (frames) to count as event", ) probe_radius_nm: float = Field(default=0.14, description="Probe radius for SASA in nm") n_sphere_points: int = Field( default=960, description="Number of sphere points for shrake_rupley" ) protein_chain: str = Field(default="A", description="Chain letter for protein") polymer_resnames: Optional[list[str]] = Field( default=None, description="Subset of polymer resnames to analyze. If None, all detected.", )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "exposure"
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary.""" result: dict[str, Any] = { "enabled": True, "exposure_threshold": self.exposure_threshold, "transient_lower": self.transient_lower, "transient_upper": self.transient_upper, "min_event_length": self.min_event_length, "protein_chain": self.protein_chain, } if self.polymer_resnames: result["polymer_resnames"] = self.polymer_resnames return result
[docs] @ComparisonSettingsRegistry.register("exposure") class ExposureComparisonSettings(BaseComparisonSettings): """Comparison settings for exposure dynamics analysis. Currently empty — all exposure comparison behavior uses defaults from ``BaseComparisonSettings``. This class exists as an extension point: add fields here when exposure-specific comparison parameters are needed (e.g., transient classification thresholds) without modifying the orchestrator or other comparison types. """
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "exposure"
# ============================================================================ # Binding Free Energy Settings # ============================================================================
[docs] @AnalysisSettingsRegistry.register("binding_free_energy") class BindingFreeEnergyAnalysisSettings(BindingPreferenceFieldsMixin): """Experimental settings for binding free energy analysis via Boltzmann inversion. Computes the selectivity free energy: ΔG_sel = -k_B·T · ln(contact_share / expected_share) where: - contact_share = fraction of polymer contacts directed at an AA group - expected_share = fraction of exposed surface belonging to that AA group - T = simulation temperature (from SimulationConfig) This is a post-processing analysis that consumes binding preference results from the contacts analysis layer (no new per-frame computation is needed). Inherits binding preference fields (surface_exposure_threshold, enzyme_pdb_for_sasa, include_default_aa_groups, protein_groups, protein_partitions, polymer_type_selections) from ``BindingPreferenceFieldsMixin``. Attributes ---------- units : str Energy units for output. One of "kT" (dimensionless, in units of k_bT — the thermal energy), "kcal/mol", or "kJ/mol". compute_binding_preference : bool Compute binding preference from contacts data when cached results are not found. """ units: str = Field( default="kT", description=( "Experimental output units: 'kT' (default, dimensionless), 'kcal/mol', or 'kJ/mol'" ), ) compute_binding_preference: bool = Field( default=True, description=( "Compute experimental binding preference from contacts data when " "cached results are not found. Set to False to only load pre-existing " "results." ), )
[docs] @field_validator("units") @classmethod def validate_units(cls, v: str) -> str: """Validate energy units.""" allowed = {"kT", "kcal/mol", "kJ/mol"} if v not in allowed: raise ValueError(f"units must be one of {sorted(allowed)}, got '{v}'") return v
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "binding_free_energy"
[docs] def k_b(self) -> float: """Return k_B in the selected energy units. Returns ------- float Boltzmann constant in kcal/(mol·K) or kJ/(mol·K). When units='kT', returns 0.0 — callers should use kT=1.0 directly instead of k_b() * T. """ if self.units == "kT": return 0.0 # Not used; comparator sets kT=1.0 directly if self.units == "kJ/mol": return 0.0083144626 # kJ/(mol·K) return 0.0019872041 # kcal/(mol·K) [default]
[docs] def to_analysis_yaml_dict(self) -> dict: """Convert to analysis.yaml-compatible dictionary. Returns ------- dict Dictionary suitable for writing to analysis.yaml. """ result: dict = { "enabled": True, "units": self.units, "compute_binding_preference": self.compute_binding_preference, "surface_exposure_threshold": self.surface_exposure_threshold, } if self.enzyme_pdb_for_sasa is not None: result["enzyme_pdb_for_sasa"] = self.enzyme_pdb_for_sasa if self.protein_groups is not None: result["protein_groups"] = self.protein_groups if self.protein_partitions is not None: result["protein_partitions"] = self.protein_partitions if self.polymer_type_selections is not None: result["polymer_type_selections"] = self.polymer_type_selections return result
[docs] @ComparisonSettingsRegistry.register("binding_free_energy") class BindingFreeEnergyComparisonSettings(BaseComparisonSettings): """Comparison settings for binding free energy analysis. Attributes ---------- fdr_alpha : float False discovery rate alpha for Benjamini-Hochberg correction of p-values across (polymer_type, AA_group) pairs. """ fdr_alpha: float = Field( default=0.05, description="FDR alpha for Benjamini-Hochberg correction", )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "binding_free_energy"
# ============================================================================ # Polymer Affinity Score Settings # ============================================================================
[docs] @AnalysisSettingsRegistry.register("polymer_affinity") class PolymerAffinityScoreSettings(BindingPreferenceFieldsMixin): """Experimental settings for polymer affinity score analysis. The polymer affinity score is a comparative metric that quantifies total polymer-protein interaction strength: S = Σ_{p,g} N_{p,g} × ΔG_sel_{p,g} [kT] where: N = mean_contact_fraction × n_exposed_in_group ΔG_sel = -ln(contact_share / expected_share) This is a post-processing analysis that consumes binding preference results from the contacts analysis layer — no new per-frame computation is needed. All scores are in kT (dimensionless); the temperature factor cancels in the Boltzmann inversion ratio. .. important:: This metric assumes thermodynamic independence of contacts. The absolute values are NOT rigorous binding free energies. Only relative differences between polymer compositions are meaningful (comparative ranking). Inherits binding preference fields (surface_exposure_threshold, enzyme_pdb_for_sasa, include_default_aa_groups, protein_groups, protein_partitions, polymer_type_selections) from ``BindingPreferenceFieldsMixin``. Attributes ---------- compute_binding_preference : bool Compute binding preference from contacts data when cached results are not found. """ compute_binding_preference: bool = Field( default=True, description=( "Compute experimental binding preference from contacts data when " "cached results are not found. Set to False to only load pre-existing " "results." ), )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "polymer_affinity"
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary. Returns ------- dict Dictionary suitable for writing to analysis.yaml. """ result: dict[str, Any] = { "enabled": True, "compute_binding_preference": self.compute_binding_preference, "surface_exposure_threshold": self.surface_exposure_threshold, "include_default_aa_groups": self.include_default_aa_groups, } if self.enzyme_pdb_for_sasa is not None: result["enzyme_pdb_for_sasa"] = self.enzyme_pdb_for_sasa if self.protein_groups is not None: result["protein_groups"] = self.protein_groups if self.protein_partitions is not None: result["protein_partitions"] = self.protein_partitions if self.polymer_type_selections is not None: result["polymer_type_selections"] = self.polymer_type_selections return result
[docs] @ComparisonSettingsRegistry.register("polymer_affinity") class PolymerAffinityScoreComparisonSettings(BaseComparisonSettings): """Comparison settings for polymer affinity score analysis. Attributes ---------- fdr_alpha : float False discovery rate alpha for Benjamini-Hochberg correction of pairwise p-values across conditions. """ fdr_alpha: float = Field( default=0.05, description="FDR alpha for Benjamini-Hochberg correction", )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "polymer_affinity"
# ============================================================================ # Secondary Structure Settings # ============================================================================
[docs] @AnalysisSettingsRegistry.register("secondary_structure") class SecondaryStructureAnalysisSettings(BaseAnalysisSettings): """Secondary structure (DSSP) analysis settings. Attributes ---------- chain_id : str Chain letter for the protein to analyze (default "A"). """ chain_id: str = Field( default="A", description="Chain letter for the protein to analyze", )
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "secondary_structure"
[docs] def to_analysis_yaml_dict(self) -> dict[str, Any]: """Convert to analysis.yaml-compatible dictionary.""" return { "enabled": True, "chain_id": self.chain_id, }
[docs] @ComparisonSettingsRegistry.register("secondary_structure") class SecondaryStructureComparisonSettings(BaseComparisonSettings): """Comparison settings for secondary structure analysis. Currently empty — all secondary structure comparison behavior uses defaults from ``BaseComparisonSettings``. This class exists as an extension point: add fields here when SS-specific comparison parameters are needed without modifying the orchestrator. """
[docs] @classmethod def analysis_type(cls) -> str: """Return the analysis type identifier.""" return "secondary_structure"
# ============================================================================ # Utility Functions # ============================================================================
[docs] def get_all_analysis_types() -> list[str]: """Get all registered analysis types. Returns ------- list[str] Sorted list of registered analysis type names. """ return AnalysisSettingsRegistry.list_available()
[docs] def get_all_comparison_types() -> list[str]: """Get all registered comparison settings types. Returns ------- list[str] Sorted list of registered comparison type names. """ return ComparisonSettingsRegistry.list_available()