Source code for polyzymd.analyses.shared.selectors.polymer

"""Polymer chain and residue selectors.

This module provides selectors for polymer chains and residues:

- PolymerChains: Select all polymer chains
- PolymerResiduesByType: Select polymer residues by residue name (monomer type)

For systems built with PolyzyMD, use chain_id="C" (the default) to select
polymers based on the PolyzyMD chain convention:
- Chain A: Protein/Enzyme
- Chain B: Substrate/Ligand
- Chain C: Polymers
- Chain D+: Solvent (water, ions, co-solvents)

Examples
--------
>>> # Select polymer chain C (PolyzyMD default)
>>> selector = PolymerChains()
>>> result = selector.select(universe)
>>>
>>> # Select by residue names (for non-PolyzyMD systems)
>>> selector = PolymerChains(chain_id=None, residue_names=["SBM", "EGP"])
>>>
>>> # Select specific polymer types within chain C
>>> selector = PolymerResiduesByType(residue_names=["SBM"])
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from polyzymd.analyses.shared.selectors.base import MolecularSelector, SelectionResult

if TYPE_CHECKING:
    from MDAnalysis.core.universe import Universe


# PolyzyMD chain convention
POLYZYMD_POLYMER_CHAIN_ID = "C"

# Common polymer residue names used in PolyzyMD simulations
# Users can extend this or provide their own lists
DEFAULT_POLYMER_RESNAMES = [
    # Sulfobetaine methacrylate variants
    "SBM",
    "SBMA",
    "SB",
    # Ethylene glycol methacrylate variants
    "EGM",
    "EGMA",
    "EGP",
    "EGPMA",
    "OEGMA",
    # Phosphorylcholine
    "MPC",
    "PC",
    # Generic polymer names
    "MON",  # Monomer
    "POL",  # Polymer
    "PLY",
]


[docs] class PolymerChains(MolecularSelector): """Select polymer chains from the system. For PolyzyMD-built systems, polymers are assigned to Chain C by convention. This selector uses chain ID selection by default, which is more reliable than residue name matching. Parameters ---------- chain_id : str, optional Chain ID for polymer selection. Default "C" (PolyzyMD convention). Set to None to use residue_names instead. residue_names : list[str], optional Residue names that identify polymer residues. Only used when chain_id is None, or as a filter within the chain. Default uses common PolyzyMD polymer names. chain_indices : list[int], optional If provided, select only these polymer chain indices (0-indexed) from within the selected atoms. Useful when analyzing specific polymer chains in multi-chain systems. segids : list[str], optional If provided, select only polymers with these segment IDs. Notes ----- The PolyzyMD chain convention is: - Chain A: Protein/Enzyme - Chain B: Substrate/Ligand - Chain C: Polymers - Chain D+: Solvent (water, ions, co-solvents) For systems not built with PolyzyMD, set chain_id=None and provide residue_names explicitly. Examples -------- >>> # PolyzyMD system (recommended) >>> selector = PolymerChains() # Uses chain C >>> >>> # Non-PolyzyMD system >>> selector = PolymerChains(chain_id=None, residue_names=["SBM", "EGM"]) >>> >>> # PolyzyMD system with specific polymer types >>> selector = PolymerChains(residue_names=["SBM"]) # SBM in chain C only """
[docs] def __init__( self, chain_id: str | None = POLYZYMD_POLYMER_CHAIN_ID, residue_names: list[str] | None = None, chain_indices: list[int] | None = None, segids: list[str] | None = None, ): self.chain_id = chain_id self.residue_names = residue_names or DEFAULT_POLYMER_RESNAMES self.chain_indices = chain_indices self.segids = segids
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select polymer atoms/residues.""" selection_parts = [] # Primary selection: by chain ID or residue names if self.chain_id is not None: selection_parts.append(f"chainID {self.chain_id}") # If residue_names also provided, use as additional filter if self.residue_names and self.residue_names != DEFAULT_POLYMER_RESNAMES: resname_str = " ".join(self.residue_names) selection_parts.append(f"resname {resname_str}") else: # Fall back to residue name selection resname_str = " ".join(self.residue_names) selection_parts.append(f"resname {resname_str}") if self.segids: segid_str = " ".join(self.segids) selection_parts.append(f"segid {segid_str}") selection = " and ".join(f"({part})" for part in selection_parts) atoms = universe.select_atoms(selection) if len(atoms) == 0: if self.chain_id is not None: raise ValueError( f"No polymer atoms found in chain '{self.chain_id}'. " "If this is not a PolyzyMD system, use chain_id=None and " "provide residue_names explicitly." ) else: raise ValueError( f"No polymer atoms found with residue names: {self.residue_names}. " "Check that polymer residue names match your topology." ) # If chain_indices specified, filter to those chains if self.chain_indices is not None: # Group residues by fragment (connected component) fragments = atoms.fragments if not fragments: # Fallback: use residue groups fragments = [atoms] selected_atoms = None for idx in self.chain_indices: if idx >= len(fragments): raise ValueError( f"Chain index {idx} out of range. Found {len(fragments)} polymer chains." ) if selected_atoms is None: selected_atoms = fragments[idx] else: selected_atoms = selected_atoms | fragments[idx] atoms = selected_atoms return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={ "chain_id": self.chain_id, "residue_names": self.residue_names, "chain_indices": self.chain_indices, "segids": self.segids, "n_chains": len(atoms.fragments) if atoms.fragments else 1, }, )
@property def label(self) -> str: if self.chain_indices: return f"polymer_chains_{'-'.join(str(i) for i in self.chain_indices)}" if self.chain_id: return f"polymer_chain{self.chain_id}" return "polymer"
[docs] class PolymerResiduesByType(MolecularSelector): """Select polymer residues by monomer type (residue name). This selector groups polymer residues by their residue names, allowing analysis of specific monomer types within copolymers. Parameters ---------- residue_names : list[str] Residue names to select (e.g., ["SBM", "EGP"] for SBMA-EGMA copolymer) exclude : bool, optional If True, select polymer residues NOT matching these names. Default False. Examples -------- >>> # Select SBMA monomers only >>> selector = PolymerResiduesByType(residue_names=["SBM", "SBMA"]) >>> >>> # Select non-SBMA monomers >>> selector = PolymerResiduesByType(residue_names=["SBM", "SBMA"], exclude=True) """
[docs] def __init__( self, residue_names: list[str], exclude: bool = False, ): if not residue_names: raise ValueError("residue_names cannot be empty") self.residue_names = residue_names self.exclude = exclude
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select polymer residues by type.""" resname_str = " ".join(self.residue_names) if self.exclude: # Select all polymers EXCEPT these types # First get all polymer residues all_polymer_str = " ".join(DEFAULT_POLYMER_RESNAMES) selection = f"resname {all_polymer_str} and not resname {resname_str}" else: selection = f"resname {resname_str}" atoms = universe.select_atoms(selection) if len(atoms) == 0: mode = "excluding" if self.exclude else "with" raise ValueError(f"No polymer residues found {mode} names: {self.residue_names}") return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={ "residue_names": self.residue_names, "exclude": self.exclude, }, )
@property def label(self) -> str: prefix = "not_" if self.exclude else "" return f"{prefix}{'_'.join(self.residue_names)}"
[docs] class PolymerSegments(MolecularSelector): """Select individual segments (residues) within polymer chains. This selector provides fine-grained access to polymer segments, useful for per-segment contact analysis. Parameters ---------- residue_names : list[str], optional Residue names that identify polymer residues. chain_index : int, optional Specific chain to select segments from (0-indexed). If None, selects from all chains. segment_indices : list[int], optional Specific segment indices within chains to select. Uses 0-indexed positions within each chain. Notes ----- A "segment" in this context refers to a single residue/monomer unit within a polymer chain, not MDAnalysis segments. """
[docs] def __init__( self, residue_names: list[str] | None = None, chain_index: int | None = None, segment_indices: list[int] | None = None, ): self.residue_names = residue_names or DEFAULT_POLYMER_RESNAMES self.chain_index = chain_index self.segment_indices = segment_indices
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select polymer segments.""" # First get all polymer atoms resname_str = " ".join(self.residue_names) all_polymer = universe.select_atoms(f"resname {resname_str}") if len(all_polymer) == 0: raise ValueError(f"No polymer atoms found with residue names: {self.residue_names}") # Get fragments (chains) fragments = all_polymer.fragments if not fragments: fragments = [all_polymer] # Select specific chain if requested if self.chain_index is not None: if self.chain_index >= len(fragments): raise ValueError( f"Chain index {self.chain_index} out of range. Found {len(fragments)} chains." ) fragments = [fragments[self.chain_index]] # Collect residues, optionally filtering by segment index selected_residues = [] for frag in fragments: residues = frag.residues if self.segment_indices is not None: for idx in self.segment_indices: if idx < len(residues): selected_residues.append(residues[idx]) else: selected_residues.extend(residues) if not selected_residues: raise ValueError("No polymer segments matched the selection criteria") # Combine into single AtomGroup atoms = selected_residues[0].atoms for res in selected_residues[1:]: atoms = atoms | res.atoms return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={ "residue_names": self.residue_names, "chain_index": self.chain_index, "segment_indices": self.segment_indices, "n_segments": len(selected_residues), }, )
@property def label(self) -> str: parts = ["polymer_segments"] if self.chain_index is not None: parts.append(f"chain{self.chain_index}") return "_".join(parts)