Source code for polyzymd.analyses.shared.groupings.base

"""Base classes for residue grouping/classification.

This module provides the abstract base class for residue classification
schemes and concrete implementations for protein amino acids.

The Strategy pattern allows users to define custom classification schemes
for polymers, modified residues, or other systems.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any


[docs] class ResidueGrouping(ABC): """Abstract base class for residue classification schemes. Subclasses must implement classify() to map residue names to group labels. Examples -------- >>> class MyPolymerGrouping(ResidueGrouping): ... def classify(self, resname: str) -> str: ... if resname in ["SBM", "SBMA"]: ... return "zwitterionic" ... elif resname in ["EGP", "EGMA"]: ... return "hydrophilic" ... return "unknown" ... ... @property ... def available_groups(self) -> list[str]: ... return ["zwitterionic", "hydrophilic", "unknown"] """
[docs] @abstractmethod def classify(self, resname: str) -> str: """Classify a residue name into a group. Parameters ---------- resname : str Residue name (3-letter code for amino acids) Returns ------- str Group label for this residue type """ ...
@property @abstractmethod def available_groups(self) -> list[str]: """List of all group labels in this classification scheme.""" ...
[docs] def get_residues_in_group(self, group: str) -> list[str]: """Get all residue names that belong to a group. Parameters ---------- group : str Group label Returns ------- list[str] Residue names in this group Raises ------ ValueError If group is not in available_groups """ if group not in self.available_groups: raise ValueError(f"Unknown group '{group}'. Available: {self.available_groups}") return [r for r in self._all_resnames if self.classify(r) == group]
@property def _all_resnames(self) -> list[str]: """All residue names known to this grouping (override in subclass).""" return []
[docs] def to_dict(self) -> dict[str, Any]: """Serialize grouping scheme to dictionary.""" return { "type": self.__class__.__name__, "groups": {group: self.get_residues_in_group(group) for group in self.available_groups}, }
[docs] class ProteinAAClassification(ResidueGrouping): """Standard amino acid classification. Groups amino acids into: - aromatic: PHE, TRP, TYR, HIS - charged_positive: ARG, LYS - charged_negative: ASP, GLU - polar: ASN, CYS, GLN, SER, THR - nonpolar: ALA, GLY, ILE, LEU, MET, PRO, VAL This classification matches the scaffold notebooks and common biochemistry conventions. Parameters ---------- include_his_aromatic : bool, optional Whether to classify HIS as aromatic (default True). Some classifications put HIS with charged_positive. Examples -------- >>> grouping = ProteinAAClassification() >>> grouping.classify("PHE") 'aromatic' >>> grouping.classify("LYS") 'charged_positive' >>> grouping.get_residues_in_group("aromatic") ['PHE', 'TRP', 'TYR', 'HIS'] """ # Standard amino acid classification # Based on scaffold notebooks: Contact_Analysis_SBMA_EGMA_EGPMA_per_Residue.ipynb _CLASSIFICATION = { # Aromatic "PHE": "aromatic", "TRP": "aromatic", "TYR": "aromatic", "HIS": "aromatic", # Can be overridden # Charged positive "ARG": "charged_positive", "LYS": "charged_positive", # Charged negative "ASP": "charged_negative", "GLU": "charged_negative", # Polar "ASN": "polar", "CYS": "polar", "GLN": "polar", "SER": "polar", "THR": "polar", # Nonpolar "ALA": "nonpolar", "GLY": "nonpolar", "ILE": "nonpolar", "LEU": "nonpolar", "MET": "nonpolar", "PRO": "nonpolar", "VAL": "nonpolar", } _ALL_GROUPS = [ "aromatic", "charged_positive", "charged_negative", "polar", "nonpolar", "unknown", ]
[docs] def __init__(self, include_his_aromatic: bool = True): self.include_his_aromatic = include_his_aromatic self._classification = self._CLASSIFICATION.copy() if not include_his_aromatic: # Move HIS to charged_positive (alternative classification) self._classification["HIS"] = "charged_positive"
[docs] def classify(self, resname: str) -> str: """Classify amino acid by residue name.""" # Normalize: uppercase, handle common variants resname = resname.upper().strip() # Handle common protonation state variants variants = { "HIE": "HIS", "HID": "HIS", "HIP": "HIS", # Protonated histidine "HSE": "HIS", "HSD": "HIS", "HSP": "HIS", "CYSH": "CYS", "CYX": "CYS", # Disulfide bonded "ASH": "ASP", # Protonated aspartate "GLH": "GLU", # Protonated glutamate "LYN": "LYS", # Neutral lysine } resname = variants.get(resname, resname) return self._classification.get(resname, "unknown")
@property def available_groups(self) -> list[str]: return self._ALL_GROUPS @property def _all_resnames(self) -> list[str]: return list(self._CLASSIFICATION.keys())
[docs] def get_charged_groups(self) -> list[str]: """Convenience: get both charged group names.""" return ["charged_positive", "charged_negative"]
[docs] def get_hydrophobic_groups(self) -> list[str]: """Convenience: groups typically considered hydrophobic.""" return ["aromatic", "nonpolar"]
[docs] def get_hydrophilic_groups(self) -> list[str]: """Convenience: groups typically considered hydrophilic.""" return ["charged_positive", "charged_negative", "polar"]
[docs] class CustomGrouping(ResidueGrouping): """User-defined residue classification. Allows arbitrary mapping from residue names to group labels. Parameters ---------- classification : dict[str, str] Mapping from residue name to group label. default_group : str, optional Group label for unclassified residues. Default "other". Examples -------- >>> # Custom polymer classification >>> grouping = CustomGrouping({ ... "SBM": "zwitterionic", ... "SBMA": "zwitterionic", ... "EGP": "peg_like", ... "EGMA": "peg_like", ... }, default_group="unknown") >>> grouping.classify("SBM") 'zwitterionic' """
[docs] def __init__( self, classification: dict[str, str], default_group: str = "other", ): self._classification = {k.upper(): v for k, v in classification.items()} self.default_group = default_group # Compute available groups self._groups = sorted(set(self._classification.values())) if default_group not in self._groups: self._groups.append(default_group)
[docs] def classify(self, resname: str) -> str: """Classify residue by name using custom mapping.""" return self._classification.get(resname.upper(), self.default_group)
@property def available_groups(self) -> list[str]: return self._groups @property def _all_resnames(self) -> list[str]: return list(self._classification.keys())
[docs] @classmethod def from_groups( cls, groups: dict[str, list[str]], default_group: str = "other" ) -> "CustomGrouping": """Create grouping from group -> residue list mapping. Parameters ---------- groups : dict[str, list[str]] Mapping from group name to list of residue names default_group : str Group for unlisted residues Returns ------- CustomGrouping Examples -------- >>> grouping = CustomGrouping.from_groups({ ... "zwitterionic": ["SBM", "SBMA"], ... "peg_like": ["EGP", "EGMA", "OEGMA"], ... }) """ classification = {} for group_name, resnames in groups.items(): for resname in resnames: classification[resname] = group_name return cls(classification, default_group=default_group)