Source code for polyzymd.analyses.shared.selectors.polymer

"""Polymer chain and residue selectors.

This module provides selectors for polymer chains and residues:

- PolymerChains: Select all polymer chains
- PolymerResiduesByType: Select polymer residues by residue name (monomer type)

For systems built with PolyzyMD, use chain_id="C" (the default) to select
polymers based on the PolyzyMD chain convention:
- Chain A: Protein/Enzyme
- Chain B: Substrate/Ligand
- Chain C: Polymers
- Chain D+: Solvent (water, ions, co-solvents)

Examples
--------
>>> # Select polymer chain C (PolyzyMD default)
>>> selector = PolymerChains()
>>> result = selector.select(universe)
>>>
>>> # Select by residue names (for non-PolyzyMD systems)
>>> selector = PolymerChains(chain_id=None, residue_names=["SBM", "EGP"])
>>>
>>> # Select specific polymer types within chain C
>>> selector = PolymerResiduesByType(residue_names=["SBM"])
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from polyzymd.analyses.shared.selectors.base import MolecularSelector, SelectionResult

if TYPE_CHECKING:
    from MDAnalysis.core.universe import Universe


# PolyzyMD chain convention
POLYZYMD_POLYMER_CHAIN_ID = "C"

# Common polymer residue names used in PolyzyMD simulations
# Users can extend this or provide their own lists
DEFAULT_POLYMER_RESNAMES = [
    # Sulfobetaine methacrylate variants
    "SBM",
    "SBMA",
    "SB",
    # Ethylene glycol methacrylate variants
    "EGM",
    "EGMA",
    "EGP",
    "EGPMA",
    "OEGMA",
    # Phosphorylcholine
    "MPC",
    "PC",
    # Generic polymer names
    "MON",  # Monomer
    "POL",  # Polymer
    "PLY",
]



[docs]
class PolymerChains(MolecularSelector):
    """Select polymer chains from the system.

    For PolyzyMD-built systems, polymers are assigned to Chain C by convention.
    This selector uses chain ID selection by default, which is more reliable
    than residue name matching.

    Parameters
    ----------
    chain_id : str, optional
        Chain ID for polymer selection. Default "C" (PolyzyMD convention).
        Set to None to use residue_names instead.
    residue_names : list[str], optional
        Residue names that identify polymer residues.
        Only used when chain_id is None, or as a filter within the chain.
        Default uses common PolyzyMD polymer names.
    chain_indices : list[int], optional
        If provided, select only these polymer chain indices (0-indexed)
        from within the selected atoms. Useful when analyzing specific
        polymer chains in multi-chain systems.
    segids : list[str], optional
        If provided, select only polymers with these segment IDs.

    Notes
    -----
    The PolyzyMD chain convention is:
    - Chain A: Protein/Enzyme
    - Chain B: Substrate/Ligand
    - Chain C: Polymers
    - Chain D+: Solvent (water, ions, co-solvents)

    For systems not built with PolyzyMD, set chain_id=None and provide
    residue_names explicitly.

    Examples
    --------
    >>> # PolyzyMD system (recommended)
    >>> selector = PolymerChains()  # Uses chain C
    >>>
    >>> # Non-PolyzyMD system
    >>> selector = PolymerChains(chain_id=None, residue_names=["SBM", "EGM"])
    >>>
    >>> # PolyzyMD system with specific polymer types
    >>> selector = PolymerChains(residue_names=["SBM"])  # SBM in chain C only
    """


[docs]
    def __init__(
        self,
        chain_id: str | None = POLYZYMD_POLYMER_CHAIN_ID,
        residue_names: list[str] | None = None,
        chain_indices: list[int] | None = None,
        segids: list[str] | None = None,
    ):
        self.chain_id = chain_id
        self.residue_names = residue_names or DEFAULT_POLYMER_RESNAMES
        self.chain_indices = chain_indices
        self.segids = segids



[docs]
    def select(self, universe: "Universe") -> SelectionResult:
        """Select polymer atoms/residues."""
        selection_parts = []

        # Primary selection: by chain ID or residue names
        if self.chain_id is not None:
            selection_parts.append(f"chainID {self.chain_id}")
            # If residue_names also provided, use as additional filter
            if self.residue_names and self.residue_names != DEFAULT_POLYMER_RESNAMES:
                resname_str = " ".join(self.residue_names)
                selection_parts.append(f"resname {resname_str}")
        else:
            # Fall back to residue name selection
            resname_str = " ".join(self.residue_names)
            selection_parts.append(f"resname {resname_str}")

        if self.segids:
            segid_str = " ".join(self.segids)
            selection_parts.append(f"segid {segid_str}")

        selection = " and ".join(f"({part})" for part in selection_parts)
        atoms = universe.select_atoms(selection)

        if len(atoms) == 0:
            if self.chain_id is not None:
                raise ValueError(
                    f"No polymer atoms found in chain '{self.chain_id}'. "
                    "If this is not a PolyzyMD system, use chain_id=None and "
                    "provide residue_names explicitly."
                )
            else:
                raise ValueError(
                    f"No polymer atoms found with residue names: {self.residue_names}. "
                    "Check that polymer residue names match your topology."
                )

        # If chain_indices specified, filter to those chains
        if self.chain_indices is not None:
            # Group residues by fragment (connected component)
            fragments = atoms.fragments
            if not fragments:
                # Fallback: use residue groups
                fragments = [atoms]

            selected_atoms = None
            for idx in self.chain_indices:
                if idx >= len(fragments):
                    raise ValueError(
                        f"Chain index {idx} out of range. Found {len(fragments)} polymer chains."
                    )
                if selected_atoms is None:
                    selected_atoms = fragments[idx]
                else:
                    selected_atoms = selected_atoms | fragments[idx]

            atoms = selected_atoms

        return SelectionResult(
            atoms=atoms,
            residues=atoms.residues,
            label=self.label,
            metadata={
                "chain_id": self.chain_id,
                "residue_names": self.residue_names,
                "chain_indices": self.chain_indices,
                "segids": self.segids,
                "n_chains": len(atoms.fragments) if atoms.fragments else 1,
            },
        )


    @property
    def label(self) -> str:
        if self.chain_indices:
            return f"polymer_chains_{'-'.join(str(i) for i in self.chain_indices)}"
        if self.chain_id:
            return f"polymer_chain{self.chain_id}"
        return "polymer"




[docs]
class PolymerResiduesByType(MolecularSelector):
    """Select polymer residues by monomer type (residue name).

    This selector groups polymer residues by their residue names,
    allowing analysis of specific monomer types within copolymers.

    Parameters
    ----------
    residue_names : list[str]
        Residue names to select (e.g., ["SBM", "EGP"] for SBMA-EGMA copolymer)
    exclude : bool, optional
        If True, select polymer residues NOT matching these names. Default False.

    Examples
    --------
    >>> # Select SBMA monomers only
    >>> selector = PolymerResiduesByType(residue_names=["SBM", "SBMA"])
    >>>
    >>> # Select non-SBMA monomers
    >>> selector = PolymerResiduesByType(residue_names=["SBM", "SBMA"], exclude=True)
    """


[docs]
    def __init__(
        self,
        residue_names: list[str],
        exclude: bool = False,
    ):
        if not residue_names:
            raise ValueError("residue_names cannot be empty")

        self.residue_names = residue_names
        self.exclude = exclude



[docs]
    def select(self, universe: "Universe") -> SelectionResult:
        """Select polymer residues by type."""
        resname_str = " ".join(self.residue_names)

        if self.exclude:
            # Select all polymers EXCEPT these types
            # First get all polymer residues
            all_polymer_str = " ".join(DEFAULT_POLYMER_RESNAMES)
            selection = f"resname {all_polymer_str} and not resname {resname_str}"
        else:
            selection = f"resname {resname_str}"

        atoms = universe.select_atoms(selection)

        if len(atoms) == 0:
            mode = "excluding" if self.exclude else "with"
            raise ValueError(f"No polymer residues found {mode} names: {self.residue_names}")

        return SelectionResult(
            atoms=atoms,
            residues=atoms.residues,
            label=self.label,
            metadata={
                "residue_names": self.residue_names,
                "exclude": self.exclude,
            },
        )


    @property
    def label(self) -> str:
        prefix = "not_" if self.exclude else ""
        return f"{prefix}{'_'.join(self.residue_names)}"




[docs]
class PolymerSegments(MolecularSelector):
    """Select individual segments (residues) within polymer chains.

    This selector provides fine-grained access to polymer segments,
    useful for per-segment contact analysis.

    Parameters
    ----------
    residue_names : list[str], optional
        Residue names that identify polymer residues.
    chain_index : int, optional
        Specific chain to select segments from (0-indexed).
        If None, selects from all chains.
    segment_indices : list[int], optional
        Specific segment indices within chains to select.
        Uses 0-indexed positions within each chain.

    Notes
    -----
    A "segment" in this context refers to a single residue/monomer unit
    within a polymer chain, not MDAnalysis segments.
    """


[docs]
    def __init__(
        self,
        residue_names: list[str] | None = None,
        chain_index: int | None = None,
        segment_indices: list[int] | None = None,
    ):
        self.residue_names = residue_names or DEFAULT_POLYMER_RESNAMES
        self.chain_index = chain_index
        self.segment_indices = segment_indices



[docs]
    def select(self, universe: "Universe") -> SelectionResult:
        """Select polymer segments."""
        # First get all polymer atoms
        resname_str = " ".join(self.residue_names)
        all_polymer = universe.select_atoms(f"resname {resname_str}")

        if len(all_polymer) == 0:
            raise ValueError(f"No polymer atoms found with residue names: {self.residue_names}")

        # Get fragments (chains)
        fragments = all_polymer.fragments
        if not fragments:
            fragments = [all_polymer]

        # Select specific chain if requested
        if self.chain_index is not None:
            if self.chain_index >= len(fragments):
                raise ValueError(
                    f"Chain index {self.chain_index} out of range. Found {len(fragments)} chains."
                )
            fragments = [fragments[self.chain_index]]

        # Collect residues, optionally filtering by segment index
        selected_residues = []
        for frag in fragments:
            residues = frag.residues
            if self.segment_indices is not None:
                for idx in self.segment_indices:
                    if idx < len(residues):
                        selected_residues.append(residues[idx])
            else:
                selected_residues.extend(residues)

        if not selected_residues:
            raise ValueError("No polymer segments matched the selection criteria")

        # Combine into single AtomGroup
        atoms = selected_residues[0].atoms
        for res in selected_residues[1:]:
            atoms = atoms | res.atoms

        return SelectionResult(
            atoms=atoms,
            residues=atoms.residues,
            label=self.label,
            metadata={
                "residue_names": self.residue_names,
                "chain_index": self.chain_index,
                "segment_indices": self.segment_indices,
                "n_segments": len(selected_residues),
            },
        )


    @property
    def label(self) -> str:
        parts = ["polymer_segments"]
        if self.chain_index is not None:
            parts.append(f"chain{self.chain_index}")
        return "_".join(parts)