"""
Builder for enzyme/protein components.
This module handles loading PDB structures and partitioning them
for use with OpenFF force fields.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import TYPE_CHECKING, Optional, Union
if TYPE_CHECKING:
from openff.toolkit import Topology
from polyzymd.config.schema import EnzymeConfig
LOGGER = logging.getLogger(__name__)
[docs]
class EnzymeBuilder:
"""Builder for loading and preparing enzyme structures.
This class handles:
- Loading PDB structures into OpenFF Topology
- Basic validation of the loaded structure
The PDB file should be properly prepared with:
- Correct protonation states
- Standard amino acid residue names
- Sequential residue numbering
Example:
>>> builder = EnzymeBuilder()
>>> topology = builder.build("path/to/enzyme.pdb")
>>> print(f"Loaded enzyme with {topology.n_atoms} atoms")
"""
[docs]
def __init__(self) -> None:
"""Initialize the EnzymeBuilder."""
self._topology: Optional[Topology] = None
self._pdb_path: Optional[Path] = None
@property
def topology(self) -> Optional[Topology]:
"""Get the loaded enzyme topology."""
return self._topology
@property
def pdb_path(self) -> Optional[Path]:
"""Get the path to the loaded PDB file."""
return self._pdb_path
[docs]
def build(self, pdb_path: Union[str, Path]) -> Topology:
"""Load an enzyme structure from a PDB file.
Args:
pdb_path: Path to the enzyme PDB file.
Returns:
OpenFF Topology with the enzyme structure.
Raises:
FileNotFoundError: If the PDB file does not exist.
"""
from openff.toolkit import Topology
pdb_path = Path(pdb_path)
if not pdb_path.exists():
raise FileNotFoundError(f"Enzyme PDB file not found: {pdb_path}")
LOGGER.info(f"Loading enzyme from {pdb_path}")
# Load the PDB into an OpenFF Topology
# Note: Topology.from_pdb() correctly preserves residue names and numbers
# from the PDB file. We previously called partition() here, but that was
# overwriting residue numbers with internal IDs, breaking downstream analysis.
topology = Topology.from_pdb(str(pdb_path))
LOGGER.info(
f"Successfully loaded enzyme: {topology.n_molecules} molecule(s), "
f"{topology.n_atoms} atoms"
)
self._topology = topology
self._pdb_path = pdb_path
return topology
[docs]
def build_from_config(self, config: "EnzymeConfig") -> Topology:
"""Load enzyme from a configuration object.
Args:
config: EnzymeConfig with PDB path.
Returns:
OpenFF Topology with the enzyme structure.
"""
LOGGER.info(f"Building enzyme: {config.name}")
return self.build(config.pdb_path)
[docs]
def get_molecule(self) -> "Topology":
"""Get the first (and typically only) molecule from the topology.
Returns:
The enzyme molecule.
Raises:
RuntimeError: If no topology has been loaded.
"""
if self._topology is None:
raise RuntimeError("No enzyme topology loaded. Call build() first.")
return self._topology.molecule(0)
[docs]
def validate(self) -> bool:
"""Validate the loaded enzyme topology.
Returns:
True if validation passes.
Raises:
RuntimeError: If no topology has been loaded.
ValueError: If validation fails.
"""
if self._topology is None:
raise RuntimeError("No enzyme topology loaded. Call build() first.")
# Check that we have at least one molecule
if self._topology.n_molecules == 0:
raise ValueError("Enzyme topology contains no molecules")
# Check that the molecule has atoms
if self._topology.n_atoms == 0:
raise ValueError("Enzyme topology contains no atoms")
LOGGER.info("Enzyme topology validation passed")
return True