Source code for polyzymd.builders.enzyme

"""
Builder for enzyme/protein components.

This module handles loading PDB structures and partitioning them
for use with OpenFF force fields.
"""

from __future__ import annotations

import logging
from pathlib import Path
from typing import TYPE_CHECKING, Optional, Union

from openff.toolkit import Topology

if TYPE_CHECKING:
    from polyzymd.config.schema import EnzymeConfig

LOGGER = logging.getLogger(__name__)


[docs] class EnzymeBuilder: """Builder for loading and preparing enzyme structures. This class handles: - Loading PDB structures into OpenFF Topology - Basic validation of the loaded structure The PDB file should be properly prepared with: - Correct protonation states - Standard amino acid residue names - Sequential residue numbering Example: >>> builder = EnzymeBuilder() >>> topology = builder.build("path/to/enzyme.pdb") >>> print(f"Loaded enzyme with {topology.n_atoms} atoms") """
[docs] def __init__(self) -> None: """Initialize the EnzymeBuilder.""" self._topology: Optional[Topology] = None self._pdb_path: Optional[Path] = None
@property def topology(self) -> Optional[Topology]: """Get the loaded enzyme topology.""" return self._topology @property def pdb_path(self) -> Optional[Path]: """Get the path to the loaded PDB file.""" return self._pdb_path
[docs] def build(self, pdb_path: Union[str, Path]) -> Topology: """Load an enzyme structure from a PDB file. Args: pdb_path: Path to the enzyme PDB file. Returns: OpenFF Topology with the enzyme structure. Raises: FileNotFoundError: If the PDB file does not exist. """ pdb_path = Path(pdb_path) if not pdb_path.exists(): raise FileNotFoundError(f"Enzyme PDB file not found: {pdb_path}") LOGGER.info(f"Loading enzyme from {pdb_path}") # Load the PDB into an OpenFF Topology # Note: Topology.from_pdb() correctly preserves residue names and numbers # from the PDB file. We previously called partition() here, but that was # overwriting residue numbers with internal IDs, breaking downstream analysis. topology = Topology.from_pdb(str(pdb_path)) LOGGER.info( f"Successfully loaded enzyme: {topology.n_molecules} molecule(s), " f"{topology.n_atoms} atoms" ) self._topology = topology self._pdb_path = pdb_path return topology
[docs] def build_from_config(self, config: "EnzymeConfig") -> Topology: """Load enzyme from a configuration object. Args: config: EnzymeConfig with PDB path. Returns: OpenFF Topology with the enzyme structure. """ LOGGER.info(f"Building enzyme: {config.name}") return self.build(config.pdb_path)
[docs] def get_molecule(self) -> "Topology": """Get the first (and typically only) molecule from the topology. Returns: The enzyme molecule. Raises: RuntimeError: If no topology has been loaded. """ if self._topology is None: raise RuntimeError("No enzyme topology loaded. Call build() first.") return self._topology.molecule(0)
[docs] def validate(self) -> bool: """Validate the loaded enzyme topology. Returns: True if validation passes. Raises: RuntimeError: If no topology has been loaded. ValueError: If validation fails. """ if self._topology is None: raise RuntimeError("No enzyme topology loaded. Call build() first.") # Check that we have at least one molecule if self._topology.n_molecules == 0: raise ValueError("Enzyme topology contains no molecules") # Check that the molecule has atoms if self._topology.n_atoms == 0: raise ValueError("Enzyme topology contains no atoms") LOGGER.info("Enzyme topology validation passed") return True