Source code for polyzymd.analyses.shared.selectors.solvent

"""Solvent, cosolvent, and substrate selectors.

This module provides selectors for non-protein, non-polymer molecules:

- SolventMolecules: Select water molecules
- CosolventMolecules: Select cosolvent (e.g., DMSO)
- SubstrateMolecule: Select substrate/ligand molecules

Examples
--------
>>> # Select water molecules
>>> selector = SolventMolecules()
>>>
>>> # Select DMSO cosolvent
>>> selector = CosolventMolecules(residue_names=["DMSO", "DMS"])
>>>
>>> # Select substrate by residue name
>>> selector = SubstrateMolecule(residue_name="RBU")  # Resorufin butyrate
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from polyzymd.analyses.shared.selectors.base import MolecularSelector, SelectionResult

if TYPE_CHECKING:
    from MDAnalysis.core.universe import Universe


# Common water residue names across force fields
DEFAULT_WATER_RESNAMES = [
    "HOH",  # PDB standard
    "WAT",  # Common
    "TIP3",  # TIP3P
    "TIP4",  # TIP4P
    "TIP5",  # TIP5P
    "SOL",  # GROMACS
    "SPC",  # SPC water
    "T3P",  # TIP3P variant
    "W",  # Coarse-grained
]

# Common cosolvent residue names
# NOTE: ACE is intentionally excluded - it's commonly used for N-terminal
# acetyl caps on proteins, not acetone cosolvent. Use "ACO" for acetone.
DEFAULT_COSOLVENT_RESNAMES = [
    "DMSO",  # Dimethyl sulfoxide
    "DMS",  # DMSO variant
    "ACN",  # Acetonitrile
    "ACO",  # Acetone (not ACE - that's often acetyl cap)
    "MeOH",  # Methanol
    "MEOH",
    "EtOH",  # Ethanol
    "ETOH",
    "THF",  # Tetrahydrofuran
    "GLYC",  # Glycerol
    "GOL",  # Glycerol variant
]


[docs] class SolventMolecules(MolecularSelector): """Select solvent (water) molecules. Parameters ---------- residue_names : list[str], optional Residue names for water. Default uses common water names. exclude_near : str, optional Exclude waters within a cutoff of this selection. E.g., "protein" to exclude waters in first hydration shell. exclude_cutoff : float, optional Cutoff in Angstroms for exclude_near. Default 3.0. Examples -------- >>> # Select all water >>> selector = SolventMolecules() >>> >>> # Select bulk water (exclude first shell around protein) >>> selector = SolventMolecules( ... exclude_near="protein", ... exclude_cutoff=5.0 ... ) """
[docs] def __init__( self, residue_names: list[str] | None = None, exclude_near: str | None = None, exclude_cutoff: float = 3.0, ): self.residue_names = residue_names or DEFAULT_WATER_RESNAMES self.exclude_near = exclude_near self.exclude_cutoff = exclude_cutoff
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select water molecules.""" resname_str = " ".join(self.residue_names) selection = f"resname {resname_str}" if self.exclude_near: # Exclude waters near the specified selection selection = ( f"({selection}) and not (around {self.exclude_cutoff} ({self.exclude_near}))" ) atoms = universe.select_atoms(selection) if len(atoms) == 0: raise ValueError(f"No solvent molecules found with residue names: {self.residue_names}") return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={ "residue_names": self.residue_names, "exclude_near": self.exclude_near, "exclude_cutoff": self.exclude_cutoff, "n_molecules": len(atoms.residues), }, )
@property def label(self) -> str: if self.exclude_near: return f"bulk_water_{self.exclude_cutoff:.1f}A" return "water"
[docs] class CosolventMolecules(MolecularSelector): """Select cosolvent molecules (e.g., DMSO, acetonitrile). Parameters ---------- residue_names : list[str], optional Residue names for cosolvent. Default uses common names. You should typically specify this for your system. Examples -------- >>> # Select DMSO molecules >>> selector = CosolventMolecules(residue_names=["DMSO", "DMS"]) """
[docs] def __init__( self, residue_names: list[str] | None = None, ): self.residue_names = residue_names or DEFAULT_COSOLVENT_RESNAMES
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select cosolvent molecules.""" resname_str = " ".join(self.residue_names) selection = f"resname {resname_str}" atoms = universe.select_atoms(selection) if len(atoms) == 0: raise ValueError( f"No cosolvent molecules found with residue names: {self.residue_names}. " "Specify the correct residue names for your cosolvent." ) return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={ "residue_names": self.residue_names, "n_molecules": len(atoms.residues), }, )
@property def label(self) -> str: return "cosolvent"
[docs] class SubstrateMolecule(MolecularSelector): """Select substrate or ligand molecules. Parameters ---------- residue_name : str Residue name of the substrate. n_molecules : int, optional Expected number of substrate molecules. If provided, validates that exactly this many are found. Default None (no validation). Examples -------- >>> # Select resorufin butyrate substrate >>> selector = SubstrateMolecule(residue_name="RBU") >>> >>> # Select single substrate, validate count >>> selector = SubstrateMolecule(residue_name="RBU", n_molecules=1) """
[docs] def __init__( self, residue_name: str, n_molecules: int | None = None, ): self.residue_name = residue_name self.n_molecules = n_molecules
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select substrate molecules.""" selection = f"resname {self.residue_name}" atoms = universe.select_atoms(selection) if len(atoms) == 0: raise ValueError(f"No substrate molecules found with residue name: {self.residue_name}") n_found = len(atoms.residues) if self.n_molecules is not None and n_found != self.n_molecules: raise ValueError( f"Expected {self.n_molecules} substrate molecule(s), but found {n_found}" ) return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={ "residue_name": self.residue_name, "n_molecules": n_found, }, )
@property def label(self) -> str: return f"substrate_{self.residue_name}"
[docs] class IonSelector(MolecularSelector): """Select ion molecules (Na+, Cl-, etc.). Parameters ---------- residue_names : list[str], optional Residue names for ions. Default includes common ions. ion_type : {"all", "cations", "anions"}, optional Filter to specific ion types. Default "all". Examples -------- >>> # Select all ions >>> selector = IonSelector() >>> >>> # Select only sodium ions >>> selector = IonSelector(residue_names=["NA", "Na+", "SOD"]) """ # Common ion residue names DEFAULT_CATIONS = ["NA", "Na+", "SOD", "K", "K+", "POT", "MG", "Mg2+", "CA", "Ca2+"] DEFAULT_ANIONS = ["CL", "Cl-", "CLA", "BR", "Br-"]
[docs] def __init__( self, residue_names: list[str] | None = None, ion_type: str = "all", ): if ion_type not in ("all", "cations", "anions"): raise ValueError(f"ion_type must be 'all', 'cations', or 'anions', got {ion_type}") self.ion_type = ion_type if residue_names is not None: self.residue_names = residue_names elif ion_type == "cations": self.residue_names = self.DEFAULT_CATIONS elif ion_type == "anions": self.residue_names = self.DEFAULT_ANIONS else: self.residue_names = self.DEFAULT_CATIONS + self.DEFAULT_ANIONS
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select ion molecules.""" resname_str = " ".join(self.residue_names) selection = f"resname {resname_str}" atoms = universe.select_atoms(selection) if len(atoms) == 0: raise ValueError(f"No ions found with residue names: {self.residue_names}") return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={ "residue_names": self.residue_names, "ion_type": self.ion_type, "n_ions": len(atoms.residues), }, )
@property def label(self) -> str: return f"ions_{self.ion_type}"