Source code for polyzymd.analyses.shared.binding_preference._polymer

"""Polymer composition extraction from topology selections."""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING

from ._models import PolymerComposition

if TYPE_CHECKING:
    from MDAnalysis.core.universe import Universe

logger = logging.getLogger(__name__)


[docs] def extract_polymer_composition( universe: "Universe", polymer_type_selections: dict[str, str] | None = None, polymer_chain: str = "C", ) -> PolymerComposition: """Extract polymer composition (residue and heavy atom counts) from trajectory. This function counts the number of residues and heavy atoms for each polymer type in the system. The composition data is used for dual normalization of binding preference enrichment ratios. Parameters ---------- universe : Universe MDAnalysis Universe with loaded topology polymer_type_selections : dict[str, str], optional Mapping of type name to MDAnalysis selection string. If None, auto-detects from ``chainID <polymer_chain>`` using unique resnames. polymer_chain : str Chain ID for auto-detection when *polymer_type_selections* is None. Defaults to ``"C"`` (PolyzyMD chain convention). Returns ------- PolymerComposition Composition data with residue_counts and heavy_atom_counts per type. Heavy atoms are defined as all atoms with element != 'H'. Notes ----- For auto-detection (polymer_type_selections=None), the function: 1. Selects all atoms in the specified polymer chain 2. Groups residues by resname 3. Counts residues and heavy atoms per resname For explicit selections, each selection string defines which atoms belong to that polymer type. The function counts residues (unique resids) and heavy atoms within each selection. Examples -------- >>> # Auto-detect from chain C >>> composition = extract_polymer_composition(universe) >>> print(composition.residue_counts) {'SBM': 50, 'EGM': 50} >>> print(composition.heavy_atom_counts) {'SBM': 750, 'EGM': 400} >>> # Explicit selections >>> selections = {"SBMA": "chainID C and resname SBM"} >>> composition = extract_polymer_composition(universe, selections) """ residue_counts: dict[str, int] = {} heavy_atom_counts: dict[str, int] = {} if polymer_type_selections is not None: # Use explicit selections for type_name, selection in polymer_type_selections.items(): try: atoms = universe.select_atoms(selection) if len(atoms) == 0: logger.warning( f"Polymer type '{type_name}' selection matched no atoms: {selection}" ) continue # Count unique residues n_residues = len(atoms.residues) residue_counts[type_name] = n_residues # Count heavy atoms (non-hydrogen) # MDAnalysis stores element info - filter out hydrogens try: heavy_atoms = atoms.select_atoms("not element H") n_heavy = len(heavy_atoms) except (AttributeError, ValueError, TypeError): # Fallback: count atoms with mass > 1.1 (heavier than H) n_heavy = sum(1 for a in atoms if a.mass > 1.1) heavy_atom_counts[type_name] = n_heavy logger.debug( f"Polymer type '{type_name}': {n_residues} residues, {n_heavy} heavy atoms" ) except (ValueError, AttributeError, TypeError) as e: logger.warning(f"Failed to extract composition for '{type_name}': {e}") else: # Auto-detect from polymer chain try: polymer_atoms = universe.select_atoms(f"chainID {polymer_chain}") if len(polymer_atoms) == 0: logger.warning(f"No atoms found in chainID {polymer_chain} for polymer composition") return PolymerComposition() # Group by resname resnames = set(polymer_atoms.residues.resnames) for resname in resnames: # Select atoms of this resname within polymer chain type_atoms = universe.select_atoms(f"chainID {polymer_chain} and resname {resname}") # Count residues n_residues = len(type_atoms.residues) residue_counts[resname] = n_residues # Count heavy atoms try: heavy_atoms = type_atoms.select_atoms("not element H") n_heavy = len(heavy_atoms) except (AttributeError, ValueError, TypeError): # Fallback: count atoms with mass > 1.1 n_heavy = sum(1 for a in type_atoms if a.mass > 1.1) heavy_atom_counts[resname] = n_heavy logger.debug( f"Auto-detected polymer '{resname}': {n_residues} residues, " f"{n_heavy} heavy atoms" ) except (ValueError, AttributeError, TypeError) as e: logger.warning(f"Failed to extract polymer composition: {e}") return PolymerComposition() composition = PolymerComposition( residue_counts=residue_counts, heavy_atom_counts=heavy_atom_counts, ) logger.info( f"Polymer composition extracted: {composition.total_residues} total residues, " f"{composition.total_heavy_atoms} total heavy atoms across {len(residue_counts)} types" ) return composition