"""Special selection syntax for distance calculations.
This module provides parsing of extended selection syntax for defining
atom positions in distance calculations. It supports:
1. Standard MDAnalysis selections: "resid 133 and name OD1"
2. Midpoint of multiple atoms: "midpoint(resid 133 and name OD1 OD2)"
3. Center of mass of a group: "com(resid 50-75)"
Examples
--------
>>> from polyzymd.analyses.shared.selections import parse_selection_string, get_position
>>>
>>> # Standard selection - single atom
>>> ag = parse_selection(universe, "resid 77 and name OG")
>>> pos = get_position(ag) # Returns position of single atom
>>>
>>> # Midpoint of Asp carboxyl oxygens
>>> ag = parse_selection(universe, "midpoint(resid 133 and name OD1 OD2)")
>>> pos = get_position(ag) # Returns midpoint of OD1 and OD2
>>>
>>> # Center of mass of lid domain
>>> ag = parse_selection(universe, "com(resid 50-75)")
>>> pos = get_position(ag) # Returns COM of residues 50-75
Notes
-----
The `midpoint()` syntax is particularly useful for catalytic residues where
the functional position is between two atoms (e.g., Asp carboxyl oxygens,
Glu carboxyl oxygens).
The `com()` syntax is useful for domain motions where you want to track
the center of mass of a group of residues (e.g., lid opening in lipases).
"""
from __future__ import annotations
import logging
import re
import sys
from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING
import numpy as np
from numpy.typing import NDArray
if TYPE_CHECKING:
from MDAnalysis.core.groups import AtomGroup
from MDAnalysis.core.universe import Universe
LOGGER = logging.getLogger(__name__)
def _expected_selection_exceptions() -> tuple[type[Exception], ...]:
"""Return exception types that represent expected selection failures.
MDAnalysis exception classes are inspected only when already loaded so
selection helpers remain importable without the analysis stack installed.
"""
exceptions: list[type[Exception]] = [ValueError, AttributeError]
mda_exceptions = sys.modules.get("MDAnalysis.exceptions")
selection_error = getattr(mda_exceptions, "SelectionError", None)
if isinstance(selection_error, type) and issubclass(selection_error, Exception):
exceptions.append(selection_error)
return tuple(exceptions)
# =============================================================================
# Selection Translation (PolyzyMD → MDAnalysis)
# =============================================================================
[docs]
def translate_selection(selection: str) -> str:
"""Translate PolyzyMD selection keywords to MDAnalysis equivalents.
This allows users to use the same selection syntax in analysis as they
use in config.yaml for restraints and other atom selections.
Translations
------------
- ``pdbindex N`` → ``id N`` (PDB ATOM serial number)
The ``pdbindex`` keyword refers to the 1-indexed atom serial number
from the PDB ATOM record (column 7-11), which is what PyMOL displays
as "id". In MDAnalysis, this is accessed via the ``id`` selection keyword.
Note: MDAnalysis also has ``bynum`` which is 1-indexed *positional*
(i.e., bynum 1 = first atom, bynum 2 = second atom), but this does NOT
correspond to PDB serial numbers when there are gaps in numbering.
We use ``id`` because it matches actual PDB serial numbers.
Parameters
----------
selection : str
Selection string with possible PolyzyMD-specific keywords
Returns
-------
str
Selection string with MDAnalysis-compatible keywords
Examples
--------
>>> translate_selection("pdbindex 100 and name CA")
"id 100 and name CA"
>>> translate_selection("midpoint(pdbindex 100 and name OD1 OD2)")
"midpoint(id 100 and name OD1 OD2)"
"""
# pdbindex N → id N (PDB ATOM serial number)
translated = re.sub(r"\bpdbindex\b", "id", selection, flags=re.IGNORECASE)
if translated != selection:
LOGGER.debug("Translated selection keyword: 'pdbindex' → 'id' (PDB ATOM serial number)")
return translated
[docs]
class SelectionMode(str, Enum):
"""Mode for position calculation from atom selection."""
SINGLE = "single" # Single atom position
CENTROID = "centroid" # Center of geometry (default for multiple atoms)
MIDPOINT = "midpoint" # Explicit midpoint (same as centroid but explicit)
COM = "com" # Center of mass
[docs]
@dataclass
class ParsedSelection:
"""Result of parsing a selection string.
Attributes
----------
selection : str
The MDAnalysis selection string (without wrapper function)
mode : SelectionMode
How to compute the position
original : str
The original input string
"""
selection: str
mode: SelectionMode
original: str
def __str__(self) -> str:
return self.original
# Regex patterns for special syntax
_MIDPOINT_PATTERN = re.compile(r"^midpoint\s*\(\s*(.+)\s*\)$", re.IGNORECASE)
_COM_PATTERN = re.compile(r"^com\s*\(\s*(.+)\s*\)$", re.IGNORECASE)
[docs]
def parse_selection_string(selection: str) -> ParsedSelection:
"""Parse a selection string to extract mode and MDAnalysis selection.
Also translates PolyzyMD-specific keywords (like ``pdbindex``) to their
MDAnalysis equivalents (like ``id``).
Parameters
----------
selection : str
Selection string, possibly with special syntax:
- "resid 77 and name OG" - standard MDAnalysis
- "midpoint(resid 133 and name OD1 OD2)" - midpoint mode
- "com(resid 50-75)" - center of mass mode
- "pdbindex 100 and name CA" - PolyzyMD pdbindex (translated to id)
Returns
-------
ParsedSelection
Parsed selection with mode and clean selection string
Examples
--------
>>> parsed = parse_selection_string("midpoint(resid 133 and name OD1 OD2)")
>>> parsed.mode
<SelectionMode.MIDPOINT: 'midpoint'>
>>> parsed.selection
"resid 133 and name OD1 OD2"
>>> parsed = parse_selection_string("pdbindex 100 and name CA")
>>> parsed.selection
"id 100 and name CA"
"""
selection = selection.strip()
# Check for midpoint() syntax
midpoint_match = _MIDPOINT_PATTERN.match(selection)
if midpoint_match:
inner = midpoint_match.group(1).strip()
return ParsedSelection(
selection=translate_selection(inner),
mode=SelectionMode.MIDPOINT,
original=selection,
)
# Check for com() syntax
com_match = _COM_PATTERN.match(selection)
if com_match:
inner = com_match.group(1).strip()
return ParsedSelection(
selection=translate_selection(inner),
mode=SelectionMode.COM,
original=selection,
)
# Standard MDAnalysis selection (also translate)
return ParsedSelection(
selection=translate_selection(selection),
mode=SelectionMode.SINGLE,
original=selection,
)
[docs]
def select_atoms(universe: "Universe", selection: str) -> "AtomGroup":
"""Select atoms from universe using potentially special syntax.
Parameters
----------
universe : Universe
MDAnalysis Universe
selection : str
Selection string (standard or special syntax)
Returns
-------
AtomGroup
Selected atoms
Raises
------
ValueError
If selection matches no atoms, with diagnostic info
"""
from polyzymd.analyses.shared.diagnostics import get_selection_diagnostics
parsed = parse_selection_string(selection)
atoms = universe.select_atoms(parsed.selection)
if len(atoms) == 0:
diag = get_selection_diagnostics(universe, selection)
raise ValueError(f"Selection '{selection}' matched no atoms.\n\n{diag}")
return atoms
[docs]
def get_position(
atoms: "AtomGroup",
mode: SelectionMode = SelectionMode.SINGLE,
) -> NDArray[np.float64]:
"""Get position from atom group based on mode.
Parameters
----------
atoms : AtomGroup
MDAnalysis AtomGroup
mode : SelectionMode
How to compute position:
- SINGLE: Position of single atom (error if multiple)
- CENTROID/MIDPOINT: Center of geometry
- COM: Center of mass
Returns
-------
NDArray[np.float64]
3D position vector [x, y, z]
Raises
------
ValueError
If mode is SINGLE but multiple atoms selected
"""
if mode == SelectionMode.SINGLE:
if len(atoms) == 1:
return atoms.positions[0].astype(np.float64)
if len(atoms) > 1:
raise ValueError(
"SelectionMode.SINGLE requires exactly one atom, "
f"but selection matched {len(atoms)} atoms. "
"Use SelectionMode.MIDPOINT or SelectionMode.COM for multi-atom selections"
)
raise ValueError("Cannot compute position for empty atom selection")
elif mode in (SelectionMode.CENTROID, SelectionMode.MIDPOINT):
return atoms.center_of_geometry().astype(np.float64)
elif mode == SelectionMode.COM:
return atoms.center_of_mass().astype(np.float64)
else:
raise ValueError(f"Unknown selection mode: {mode}")
[docs]
def get_position_from_selection(
universe: "Universe",
selection: str,
) -> NDArray[np.float64]:
"""Get position from selection string in one step.
This is a convenience function that combines parsing, selection,
and position calculation.
Parameters
----------
universe : Universe
MDAnalysis Universe
selection : str
Selection string (standard or special syntax)
Returns
-------
NDArray[np.float64]
3D position vector [x, y, z]
Examples
--------
>>> # Single atom
>>> pos = get_position_from_selection(u, "resid 77 and name OG")
>>>
>>> # Midpoint of Asp carboxyl
>>> pos = get_position_from_selection(u, "midpoint(resid 133 and name OD1 OD2)")
"""
from polyzymd.analyses.shared.diagnostics import get_selection_diagnostics
parsed = parse_selection_string(selection)
atoms = universe.select_atoms(parsed.selection)
if len(atoms) == 0:
diag = get_selection_diagnostics(universe, selection)
raise ValueError(f"Selection '{selection}' matched no atoms.\n\n{diag}")
return get_position(atoms, parsed.mode)
[docs]
def validate_selection(universe: "Universe", selection: str) -> dict:
"""Validate a selection string and return diagnostic info.
Parameters
----------
universe : Universe
MDAnalysis Universe
selection : str
Selection string to validate
Returns
-------
dict
Diagnostic information:
- valid: bool
- n_atoms: int
- mode: str
- atoms: list of atom info dicts
- error: str (if invalid)
- diagnostics: str (detailed diagnostics if invalid)
"""
from polyzymd.analyses.shared.diagnostics import get_selection_diagnostics
try:
parsed = parse_selection_string(selection)
atoms = universe.select_atoms(parsed.selection)
if len(atoms) == 0:
diag = get_selection_diagnostics(universe, selection)
return {
"valid": False,
"error": f"Selection matched no atoms: {parsed.selection}",
"diagnostics": diag,
"mode": parsed.mode.value,
"n_atoms": 0,
}
atom_info = []
for atom in atoms[:10]: # Limit to first 10
atom_info.append(
{
"name": atom.name,
"resname": atom.resname,
"resid": atom.resid,
"index": atom.index,
}
)
return {
"valid": True,
"n_atoms": len(atoms),
"mode": parsed.mode.value,
"atoms": atom_info,
"truncated": len(atoms) > 10,
}
except _expected_selection_exceptions() as exc:
return {
"valid": False,
"error": str(exc),
"n_atoms": 0,
}