Source code for polyzymd.analyses.shared.binding_preference._resolution

"""Helpers for resolving protein and polymer selections."""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING

from polyzymd.analyses.shared.aa_classification import DEFAULT_AA_CLASS_SELECTIONS

if TYPE_CHECKING:
    from MDAnalysis.core.universe import Universe

    from polyzymd.analyses.shared.surface_exposure import SurfaceExposureResult

logger = logging.getLogger(__name__)


[docs] def resolve_protein_group_selections( universe: "Universe", protein_group_selections: dict[str, str] | None = None, ) -> dict[str, set[int]]: """Resolve protein group MDAnalysis selections to residue IDs. Converts MDAnalysis selection strings into sets of residue IDs at analysis time. This allows the user to define groups with flexible selections while we work with concrete residue IDs internally. Parameters ---------- universe : Universe MDAnalysis Universe with loaded topology protein_group_selections : dict[str, str], optional Mapping of group name to MDAnalysis selection string. If None, uses DEFAULT_AA_CLASS_SELECTIONS. Returns ------- dict[str, set[int]] Mapping of group name to set of residue IDs Examples -------- >>> selections = {"aromatic": "protein and resname PHE TRP TYR HIS"} >>> groups = resolve_protein_group_selections(universe, selections) >>> print(groups["aromatic"]) {12, 45, 67, 89, ...} # Set of aromatic residue IDs """ if protein_group_selections is None: protein_group_selections = DEFAULT_AA_CLASS_SELECTIONS.copy() resolved: dict[str, set[int]] = {} for group_name, selection in protein_group_selections.items(): try: atoms = universe.select_atoms(selection) resids = set(atoms.residues.resids) resolved[group_name] = resids logger.debug( f"Resolved protein group '{group_name}': {len(resids)} residues ({selection})" ) except (ValueError, AttributeError, TypeError) as e: logger.warning( f"Failed to resolve protein group '{group_name}' with selection '{selection}': {e}" ) resolved[group_name] = set() return resolved
[docs] def resolve_protein_groups_from_surface_exposure( surface_exposure: "SurfaceExposureResult", include_default_aa_groups: bool = True, custom_protein_groups: dict[str, list[int]] | None = None, ) -> dict[str, set[int]]: """Resolve protein groups from surface exposure data without Universe. This function derives protein group → residue ID mappings using only the surface exposure result (which already contains resid, resname, and aa_class for each residue). This allows binding preference computation without requiring an MDAnalysis Universe at comparison time. The function supports: - Default AA class groups (aromatic, polar, nonpolar, charged_positive, charged_negative) derived from the aa_class field in surface exposure data - Custom user-defined groups specified as resid lists - Override behavior: if a custom group has the same name as a default, the custom definition takes precedence Parameters ---------- surface_exposure : SurfaceExposureResult Surface exposure analysis result containing residue data include_default_aa_groups : bool, default True If True, include default AA class groupings (aromatic, polar, etc.) derived from surface exposure data custom_protein_groups : dict[str, list[int]], optional User-defined protein groups as {group_name: [resid1, resid2, ...]}. If a group name matches a default AA class, it overrides that default. Returns ------- dict[str, set[int]] Mapping of group name to set of residue IDs Examples -------- >>> from polyzymd.analyses.shared.surface_exposure import SurfaceExposureFilter >>> filter = SurfaceExposureFilter(threshold=0.2) >>> surface_result = filter.calculate("enzyme.pdb") >>> # Get default AA groups + custom active_site group >>> groups = resolve_protein_groups_from_surface_exposure( ... surface_result, ... include_default_aa_groups=True, ... custom_protein_groups={"active_site": [77, 133, 156]} ... ) >>> print(groups.keys()) dict_keys(['aromatic', 'polar', 'nonpolar', 'charged_positive', 'charged_negative', 'active_site']) """ resolved: dict[str, set[int]] = {} all_valid_resids = surface_exposure.all_resids # Step 1: Build default AA class groups from surface exposure data if include_default_aa_groups: # Group residues by their aa_class from surface exposure aa_class_groups: dict[str, set[int]] = {} for res_exp in surface_exposure.residue_exposures: aa_class = res_exp.aa_class if aa_class and aa_class != "unknown": if aa_class not in aa_class_groups: aa_class_groups[aa_class] = set() aa_class_groups[aa_class].add(res_exp.resid) for group_name, resids in aa_class_groups.items(): resolved[group_name] = resids logger.debug( f"Default AA group '{group_name}': {len(resids)} residues from surface exposure" ) # Step 2: Add custom protein groups (with override behavior) if custom_protein_groups: for group_name, resid_list in custom_protein_groups.items(): # Convert to set and validate requested_resids = set(resid_list) valid_resids = requested_resids & all_valid_resids invalid_resids = requested_resids - all_valid_resids # Warn about invalid resids if invalid_resids: logger.warning( f"Custom protein group '{group_name}': {len(invalid_resids)} resids not found " f"in enzyme structure and will be ignored: {sorted(invalid_resids)}" ) # Override if same name as default (user intent takes precedence) if group_name in resolved: logger.info(f"Custom protein group '{group_name}' overrides default AA class group") resolved[group_name] = valid_resids logger.debug(f"Custom protein group '{group_name}': {len(valid_resids)} valid residues") return resolved
[docs] def resolve_polymer_type_selections( universe: "Universe", polymer_type_selections: dict[str, str] | None = None, polymer_chain: str = "C", ) -> list[str]: """Resolve polymer type selections and return list of polymer types. If explicit selections are provided, validates them and returns the keys. If None, auto-detects polymer types from the standard polymer chain. Parameters ---------- universe : Universe MDAnalysis Universe with loaded topology polymer_type_selections : dict[str, str], optional Mapping of type name to MDAnalysis selection string. If None, auto-detects from ``chainID <polymer_chain>``. polymer_chain : str Chain ID for auto-detection when *polymer_type_selections* is None. Defaults to ``"C"`` (PolyzyMD chain convention). Returns ------- list[str] List of polymer type names (resnames or selection keys) Examples -------- >>> # Auto-detect from chain C >>> polymer_types = resolve_polymer_type_selections(universe, None) >>> print(polymer_types) ['SBM', 'EGM'] >>> # Explicit selections >>> selections = {"SBMA": "chainID C and resname SBM"} >>> polymer_types = resolve_polymer_type_selections(universe, selections) >>> print(polymer_types) ['SBMA'] """ if polymer_type_selections is not None: # Validate selections and return keys valid_types = [] for type_name, selection in polymer_type_selections.items(): try: atoms = universe.select_atoms(selection) if len(atoms) > 0: valid_types.append(type_name) logger.debug(f"Polymer type '{type_name}': {len(atoms)} atoms ({selection})") else: logger.warning( f"Polymer type '{type_name}' selection matched no atoms: {selection}" ) except (ValueError, AttributeError, TypeError) as e: logger.warning(f"Invalid selection for polymer type '{type_name}': {e}") return valid_types # Auto-detect from polymer chain try: polymer_atoms = universe.select_atoms(f"chainID {polymer_chain}") if len(polymer_atoms) == 0: logger.warning(f"No atoms found in chainID {polymer_chain} for polymer auto-detection") return [] # Get unique resnames resnames = set(polymer_atoms.residues.resnames) logger.debug(f"Auto-detected polymer types from chain {polymer_chain}: {resnames}") return sorted(resnames) except (ValueError, AttributeError, TypeError) as e: logger.warning(f"Failed to auto-detect polymer types: {e}") return []