Source code for polyzymd.analyses.shared.selectors.protein

"""Protein residue selectors.

This module provides selectors for protein residues:

- ProteinResidues: Select all protein residues
- ProteinResiduesByGroup: Select protein residues by amino acid classification
- ProteinResiduesNearReference: Select residues within cutoff of reference atoms

Examples
--------
>>> # Select all protein residues
>>> selector = ProteinResidues()
>>> result = selector.select(universe)
>>>
>>> # Select aromatic residues only
>>> selector = ProteinResiduesByGroup(
...     grouping=ProteinAAClassification(),
...     groups=["aromatic"]
... )
>>>
>>> # Select residues near catalytic triad
>>> selector = ProteinResiduesNearReference(
...     reference_selection="resid 77 133 156",
...     cutoff=5.0
... )
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from polyzymd.analyses.shared.selectors.base import MolecularSelector, SelectionResult

if TYPE_CHECKING:
    from MDAnalysis.core.universe import Universe

    from polyzymd.analyses.shared.groupings.base import ResidueGrouping


[docs] class ProteinResidues(MolecularSelector): """Select all protein residues. Uses MDAnalysis "protein" selection keyword which matches standard amino acid residues. Parameters ---------- selection_modifier : str, optional Additional selection criteria to AND with "protein". E.g., "and not name H*" to exclude hydrogens. """
[docs] def __init__(self, selection_modifier: str | None = None): self.selection_modifier = selection_modifier
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select all protein atoms/residues.""" selection = "protein" if self.selection_modifier: selection = f"({selection}) and ({self.selection_modifier})" atoms = universe.select_atoms(selection) if len(atoms) == 0: raise ValueError( f"Selection '{selection}' matched no atoms. " "Check that the topology contains protein residues." ) return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={"selection": selection}, )
@property def label(self) -> str: return "protein"
[docs] class ProteinResiduesByGroup(MolecularSelector): """Select protein residues by amino acid group classification. Uses a ResidueGrouping to classify amino acids (e.g., aromatic, charged, polar, nonpolar) and selects only residues in the specified groups. Parameters ---------- grouping : ResidueGrouping Classification scheme for amino acids groups : list[str] Names of groups to include (e.g., ["aromatic", "charged_positive"]) exclude : bool, optional If True, select residues NOT in the specified groups. Default False. Examples -------- >>> from polyzymd.analyses.shared.groupings import ProteinAAClassification >>> >>> # Select aromatic residues >>> grouping = ProteinAAClassification() >>> selector = ProteinResiduesByGroup(grouping, groups=["aromatic"]) >>> >>> # Select all charged residues >>> selector = ProteinResiduesByGroup( ... grouping, ... groups=["charged_positive", "charged_negative"] ... ) """
[docs] def __init__( self, grouping: "ResidueGrouping", groups: list[str], exclude: bool = False, ): self.grouping = grouping self.groups = groups self.exclude = exclude
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select protein residues matching the specified groups.""" # First get all protein residues protein_atoms = universe.select_atoms("protein") if len(protein_atoms) == 0: raise ValueError("No protein atoms found in universe") protein_residues = protein_atoms.residues # Filter by group membership matching_resids = [] for res in protein_residues: group = self.grouping.classify(res.resname) in_group = group in self.groups if self.exclude: if not in_group: matching_resids.append(res.resid) else: if in_group: matching_resids.append(res.resid) if not matching_resids: group_str = ", ".join(self.groups) mode = "excluding" if self.exclude else "in" raise ValueError( f"No protein residues found {mode} groups: {group_str}. " f"Available groups: {self.grouping.available_groups}" ) # Select the matching residues resid_str = " ".join(str(r) for r in matching_resids) atoms = universe.select_atoms(f"protein and resid {resid_str}") return SelectionResult( atoms=atoms, residues=atoms.residues, label=self.label, metadata={ "groups": self.groups, "exclude": self.exclude, "n_matching": len(matching_resids), }, )
@property def label(self) -> str: prefix = "not_" if self.exclude else "" return f"protein_{prefix}{'_'.join(self.groups)}"
[docs] class ProteinResiduesNearReference(MolecularSelector): """Select protein residues within a cutoff distance of reference atoms. Useful for selecting residues near active sites, binding pockets, or other regions of interest. Parameters ---------- reference_selection : str MDAnalysis selection string for reference atoms (e.g., "resid 77 133 156") cutoff : float Distance cutoff in Angstroms. Residues with any atom within this distance of any reference atom are selected. include_reference : bool, optional Whether to include the reference residues themselves. Default True. frame : int, optional Frame to use for distance calculation. Default is current frame (0). Examples -------- >>> # Select residues within 5A of catalytic triad >>> selector = ProteinResiduesNearReference( ... reference_selection="resid 77 133 156", ... cutoff=5.0, ... ) >>> >>> # Select residues near substrate binding site (not including the site itself) >>> selector = ProteinResiduesNearReference( ... reference_selection="resname LIG", ... cutoff=4.0, ... include_reference=False, ... ) """
[docs] def __init__( self, reference_selection: str, cutoff: float, include_reference: bool = True, frame: int = 0, ): self.reference_selection = reference_selection self.cutoff = cutoff self.include_reference = include_reference self.frame = frame
[docs] def select(self, universe: "Universe") -> SelectionResult: """Select protein residues near the reference atoms.""" original_frame = universe.trajectory.frame try: # Go to the specified frame universe.trajectory[self.frame] # Select reference atoms ref_atoms = universe.select_atoms(self.reference_selection) if len(ref_atoms) == 0: raise ValueError( f"Reference selection '{self.reference_selection}' matched no atoms" ) # Select protein atoms within cutoff of reference # MDAnalysis "around" selection finds atoms within cutoff nearby_selection = f"protein and around {self.cutoff} ({self.reference_selection})" nearby_atoms = universe.select_atoms(nearby_selection) if self.include_reference: # Include reference residues if they are protein ref_protein = universe.select_atoms( f"protein and same residue as ({self.reference_selection})" ) nearby_atoms = nearby_atoms | ref_protein if len(nearby_atoms) == 0: raise ValueError( f"No protein atoms found within {self.cutoff}A of '{self.reference_selection}'" ) return SelectionResult( atoms=nearby_atoms, residues=nearby_atoms.residues, label=self.label, metadata={ "reference_selection": self.reference_selection, "cutoff": self.cutoff, "include_reference": self.include_reference, "frame": self.frame, "n_reference_atoms": len(ref_atoms), }, ) finally: universe.trajectory[original_frame]
@property def label(self) -> str: return f"near_ref_{self.cutoff:.1f}A"