Source code for polyzymd.builders.solvent

"""
Builder for solvent components.

This module handles solvation with water, ions, and optional co-solvents
using PACKMOL for molecular packing.

Solvent Parameterization
------------------------
This module uses pre-computed partial charges for solvent molecules to ensure:

1. **Consistency**: All copies of a solvent molecule have identical parameters
2. **Speed**: Charges are computed once, not N times for N molecules
3. **Reproducibility**: Same charges across different simulation runs

For built-in solvents (water models, DMSO, ethanol, etc.), charges are loaded
from pre-computed SDF files. For custom solvents, AM1BCC charges are computed
once and cached in ~/.polyzymd/solvent_cache/ for future use.

See Also
--------
polyzymd.data.solvent_molecules : Module providing pre-parameterized solvents
"""

from __future__ import annotations

import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union

import numpy as np
from numpy.typing import NDArray
from openff.toolkit import Molecule, Topology
from openff.units import Quantity

if TYPE_CHECKING:
    from polyzymd.config.schema import SolventConfig

LOGGER = logging.getLogger(__name__)

# Water model type
WaterModelType = Literal["tip3p", "spce", "tip4p", "tip4pew", "opc"]

# Box shape type
BoxShapeType = Literal["cube", "rhombic_dodecahedron", "truncated_octahedron"]


[docs] @dataclass class CoSolvent: """Specification for a co-solvent component. Supports two specification methods: - volume_fraction: Specify as fraction (0-1), e.g., 0.30 for 30% v/v - concentration: Specify as molarity (mol/L) Note: The molecule is NOT created in __post_init__ to avoid running charge calculations prematurely. Instead, molecules are loaded via get_solvent_molecule() in SolventBuilder.solvate() which uses cached charges to ensure all copies have identical parameters. Attributes: name: Identifier for the co-solvent. smiles: SMILES string for the molecule. volume_fraction: Volume fraction (0-1), mutually exclusive with concentration. concentration: Molar concentration (mol/L), mutually exclusive with volume_fraction. density: Density in g/mL (required for volume_fraction calculation). residue_name: 3-letter residue name. molecule: OpenFF Molecule (assigned later with cached charges). """ name: str smiles: str volume_fraction: Optional[float] = None concentration: Optional[float] = None density: Optional[float] = None # g/mL residue_name: str = "COS" molecule: Optional[Molecule] = field(default=None, repr=False)
[docs] def __post_init__(self) -> None: """Validate co-solvent specification. Note: We intentionally do NOT create the molecule here. This is done later in SolventBuilder.solvate() using get_solvent_molecule() which provides proper charge caching. """ # Set residue name if not already 3 chars if len(self.residue_name) > 3: self.residue_name = self.residue_name[:3].upper() # Validate that exactly one specification method is used if self.volume_fraction is None and self.concentration is None: raise ValueError( f"CoSolvent '{self.name}': Must specify either volume_fraction or concentration" ) if self.volume_fraction is not None and self.concentration is not None: raise ValueError( f"CoSolvent '{self.name}': Cannot specify both volume_fraction and concentration" ) # Density is required for volume_fraction if self.volume_fraction is not None and self.density is None: raise ValueError( f"CoSolvent '{self.name}': density (g/mL) is required for volume_fraction calculation" )
[docs] @dataclass class SolvationCounts: """Molecule counts from solvation for PDB chain/residue assignment. This dataclass stores the number of each molecule type added during solvation. It is used by SystemBuilder to assign PDB-compliant chain IDs and residue numbers. The molecule order in the solvated topology is: [solute molecules] + [water] + [Na+] + [Cl-] + [co-solvent 1] + [co-solvent 2] + ... Attributes: water: Number of water molecules added. na: Number of Na+ ions added. cl: Number of Cl- ions added. co_solvents: List of (name, count) tuples for each co-solvent type. """ water: int na: int cl: int co_solvents: List[Tuple[str, int]] = field(default_factory=list) @property def total_solvent_molecules(self) -> int: """Total number of solvent molecules (water + ions + co-solvents).""" cosolvent_total = sum(count for _, count in self.co_solvents) return self.water + self.na + self.cl + cosolvent_total
[docs] @dataclass class SolventComposition: """Complete specification of solvent composition. Attributes: water_model: Water model to use. co_solvents: List of co-solvent specifications. nacl_concentration: NaCl concentration in mol/L. kcl_concentration: KCl concentration in mol/L. mgcl2_concentration: MgCl2 concentration in mol/L. neutralize: Whether to neutralize system charge. """ water_model: WaterModelType = "tip3p" co_solvents: List[CoSolvent] = field(default_factory=list) nacl_concentration: float = 0.1 # mol/L kcl_concentration: float = 0.0 mgcl2_concentration: float = 0.0 neutralize: bool = True @property def water_volume_fraction(self) -> float: """Get the water volume fraction (1 - sum of co-solvent fractions). Note: Only counts co-solvents specified by volume_fraction. Co-solvents specified by concentration don't reduce water volume. """ total_cosolvent = sum( cs.volume_fraction for cs in self.co_solvents if cs.volume_fraction is not None ) return 1.0 - total_cosolvent
[docs] class SolventBuilder: """Builder for solvating molecular systems. This class handles: - Water solvation with different water models - Ion addition for neutralization and ionic strength - Co-solvent addition with specified volume fractions - Box shape and padding configuration Example: >>> builder = SolventBuilder() >>> composition = SolventComposition( ... water_model="tip3p", ... nacl_concentration=0.15, ... co_solvents=[CoSolvent("dmso", "CS(=O)C", 0.1)] ... ) >>> solvated = builder.solvate( ... topology=solute_topology, ... composition=composition, ... padding=1.2, # nm ... ) """
[docs] def __init__(self) -> None: """Initialize the SolventBuilder.""" self._solvated_topology: Optional[Topology] = None self._box_vectors: Optional[NDArray] = None self._composition: Optional[SolventComposition] = None self._solvation_counts: Optional[SolvationCounts] = None
@property def solvated_topology(self) -> Optional[Topology]: """Get the solvated topology.""" return self._solvated_topology @property def box_vectors(self) -> Optional[NDArray]: """Get the box vectors of the solvated system.""" return self._box_vectors @property def solvation_counts(self) -> Optional[SolvationCounts]: """Get the molecule counts from solvation. Returns: SolvationCounts with water, ion, and co-solvent molecule counts, or None if solvate() has not been called. """ return self._solvation_counts
[docs] def solvate( self, topology: Topology, composition: Optional[SolventComposition] = None, padding: float = 1.2, box_shape: BoxShapeType = "rhombic_dodecahedron", target_density: float = 1.0, tolerance: float = 2.0, ) -> Topology: """Solvate a topology with water, ions, and optional co-solvents. Args: topology: OpenFF Topology to solvate. composition: Solvent composition specification. padding: Distance from solute to box edge in nm. box_shape: Box geometry. target_density: Target density in g/mL. tolerance: Minimum molecular spacing for PACKMOL in Angstrom. Returns: Solvated OpenFF Topology. """ from polyzymd.data.solvent_molecules import get_solvent_molecule from polyzymd.utils import boxvectors from polyzymd.utils.packmol import solvate_with_packmol if composition is None: composition = SolventComposition() self._composition = composition LOGGER.info( f"Solvating system with {composition.water_model} water, " f"padding={padding} nm, shape={box_shape}" ) # Get box shape matrix box_shape_matrix = self._get_box_shape_matrix(box_shape) # Calculate box vectors from solute bounding box + padding padding_qty = Quantity(padding, "nanometer") min_box_vecs = boxvectors.get_topology_bbox(topology) box_vecs = boxvectors.pad_box_vectors_uniform(min_box_vecs, padding_qty) # Apply box shape transformation box_vecs = box_shape_matrix @ box_vecs LOGGER.info(f"Computed box vectors: {box_vecs}") # Center topology in box self._center_topology_in_box(topology, box_vecs) # Calculate box volume and target masses box_vol = boxvectors.get_box_volume(box_vecs, units_as_openmm=False) target_density_qty = Quantity(target_density, "gram / milliliter") target_mass = box_vol * target_density_qty solute_mass = self._calculate_topology_mass(topology) solvent_mass = target_mass - solute_mass LOGGER.info(f"Target solvent mass: {solvent_mass}") # Get water molecule with charges water = self._get_water_molecule(composition.water_model) # Calculate numbers of each component water_mass = sum(atom.mass for atom in water.atoms) molarity_pure_water = Quantity(55.5, "mole / liter") # Calculate ions na = Molecule.from_smiles("[Na+]") cl = Molecule.from_smiles("[Cl-]") nacl_mass = sum(atom.mass for atom in na.atoms) + sum(atom.mass for atom in cl.atoms) nacl_conc = Quantity(composition.nacl_concentration, "mole / liter") nacl_mass_fraction = (nacl_conc * nacl_mass) / (molarity_pure_water * water_mass) nacl_mass_to_add = solvent_mass * nacl_mass_fraction nacl_to_add = (nacl_mass_to_add / nacl_mass).m_as("dimensionless").round() # Calculate water to add water_mass_to_add = solvent_mass - nacl_mass_to_add water_to_add = (water_mass_to_add / water_mass).m_as("dimensionless").round() # Adjust for co-solvents (reduce water proportionally) if composition.co_solvents: water_fraction = composition.water_volume_fraction water_to_add = int(water_to_add * water_fraction) # Neutralize system solute_charge = sum(mol.total_charge for mol in topology.molecules) if composition.neutralize: na_to_add = int(np.ceil(nacl_to_add - solute_charge.m / 2.0)) cl_to_add = int(np.floor(nacl_to_add + solute_charge.m / 2.0)) else: na_to_add = int(nacl_to_add) cl_to_add = int(nacl_to_add) LOGGER.info(f"Adding {int(water_to_add)} water, {na_to_add} Na+, {cl_to_add} Cl-") # Build molecule and count lists solvent_molecules = [water, na, cl] solvent_counts = [int(water_to_add), na_to_add, cl_to_add] # Track co-solvent counts for SolvationCounts cosolvent_counts_list: List[Tuple[str, int]] = [] # Add co-solvents (using cached charges for consistency) for cosolvent in composition.co_solvents: if cosolvent.molecule is None: # Load molecule with pre-computed charges to ensure all copies # of the same co-solvent have identical parameters cosolvent.molecule = get_solvent_molecule( name=cosolvent.name, smiles=cosolvent.smiles, residue_name=cosolvent.residue_name, ) # Get molar mass of co-solvent cosolvent_molar_mass = sum(atom.mass for atom in cosolvent.molecule.atoms) if cosolvent.volume_fraction is not None: # ============================================================= # VOLUME FRACTION METHOD # ============================================================= # Formula: n = (V_box × φ × ρ) / M # Where: # V_box = simulation box volume # φ = volume fraction (e.g., 0.30 for 30%) # ρ = co-solvent density (g/mL) # M = molar mass (g/mol) # # This assumes ideal mixing (volumes are additive). # ============================================================= cosolvent_density = Quantity(cosolvent.density, "gram / milliliter") # Volume of co-solvent = box_volume × volume_fraction cosolvent_volume = box_vol * cosolvent.volume_fraction # Mass of co-solvent = volume × density cosolvent_mass_to_add = cosolvent_volume * cosolvent_density # Number of molecules = mass / molar_mass n_cosolvent = int( (cosolvent_mass_to_add / cosolvent_molar_mass).m_as("dimensionless").round() ) LOGGER.info( f"Adding {n_cosolvent} {cosolvent.name} molecules " f"({cosolvent.volume_fraction * 100:.1f}% v/v, ρ={cosolvent.density} g/mL)" ) elif cosolvent.concentration is not None: # ============================================================= # CONCENTRATION METHOD # ============================================================= # Formula: n = C × V × N_A # Where: # C = concentration (mol/L) # V = box volume (L) # N_A = Avogadro's number (implicit in unit conversion) # # This directly gives the number of moles, then molecules. # ============================================================= conc = Quantity(cosolvent.concentration, "mole / liter") n_moles = conc * box_vol n_cosolvent = int(n_moles.m_as("dimensionless").round()) LOGGER.info( f"Adding {n_cosolvent} {cosolvent.name} molecules ({cosolvent.concentration} M)" ) else: # Should not reach here due to validation in CoSolvent.__post_init__ raise ValueError( f"CoSolvent '{cosolvent.name}' has neither volume_fraction nor concentration" ) solvent_molecules.append(cosolvent.molecule) solvent_counts.append(n_cosolvent) cosolvent_counts_list.append((cosolvent.name, n_cosolvent)) # Pack the box using PACKMOL (with CONECT-record overflow protection) solvated_top = solvate_with_packmol( molecules=solvent_molecules, number_of_copies=solvent_counts, solute=topology, box_vectors=box_vecs, tolerance_angstrom=tolerance, ) # Set residue names self._set_solvent_residue_names(solvated_top, composition) self._solvated_topology = solvated_top self._box_vectors = box_vecs # Store solvation counts for PDB chain/residue assignment self._solvation_counts = SolvationCounts( water=int(water_to_add), na=na_to_add, cl=cl_to_add, co_solvents=cosolvent_counts_list, ) LOGGER.info( f"Solvation complete: {solvated_top.n_molecules} molecules, " f"{solvated_top.n_atoms} atoms" ) return solvated_top
[docs] def solvate_from_config( self, topology: Topology, config: "SolventConfig", ) -> Topology: """Solvate using configuration object. Args: topology: OpenFF Topology to solvate. config: SolventConfig with solvent settings. Returns: Solvated OpenFF Topology. """ # Build composition from config co_solvents = [ CoSolvent( name=cs.name, smiles=cs.smiles, # type: ignore[arg-type] # Validated in schema volume_fraction=cs.volume_fraction, concentration=cs.concentration, density=cs.density, residue_name=cs.residue_name or cs.name[:3].upper(), ) for cs in config.co_solvents ] composition = SolventComposition( water_model=config.primary.model.value, co_solvents=co_solvents, nacl_concentration=config.ions.nacl_concentration, kcl_concentration=config.ions.kcl_concentration, mgcl2_concentration=config.ions.mgcl2_concentration, neutralize=config.ions.neutralize, ) return self.solvate( topology=topology, composition=composition, padding=config.box.padding, box_shape=config.box.shape.value, target_density=config.box.target_density, tolerance=config.box.tolerance, )
def _get_box_shape_matrix(self, shape: BoxShapeType) -> NDArray: """Get the transformation matrix for the box shape. Args: shape: Box shape identifier. Returns: 3x3 transformation matrix. """ from openff.interchange.components import _packmol as packmol if shape == "cube": return packmol.UNIT_CUBE elif shape == "rhombic_dodecahedron": return packmol.RHOMBIC_DODECAHEDRON elif shape == "truncated_octahedron": return packmol.TRUNCATED_OCTAHEDRON else: LOGGER.warning(f"Unknown box shape '{shape}', using cube") return packmol.UNIT_CUBE def _get_water_molecule(self, model: WaterModelType) -> Molecule: """Get a water molecule with the appropriate charges. Uses pre-computed charges from the solvent cache for consistency. Args: model: Water model identifier. Returns: OpenFF Molecule for water with correct partial charges. """ from polyzymd.data.solvent_molecules import get_solvent_molecule # Map water model name to canonical form # get_solvent_molecule handles: tip3p, spce, tip4pew, opc, etc. return get_solvent_molecule(model) def _center_topology_in_box(self, topology: Topology, box_vecs: NDArray) -> None: """Center the topology at the center of the box. Args: topology: Topology to center. box_vecs: Box vectors (3x3 array with units). """ from openff.toolkit import unit # Calculate center of mass com = self._calculate_center_of_mass(topology) # Calculate box center box_center = np.sum(box_vecs, axis=0) / 2 # Translation vector trans_vec = box_center - com # Apply translation old_positions = topology.get_positions() new_positions = old_positions + trans_vec topology.set_positions(new_positions) LOGGER.debug(f"Centered topology, translation: {trans_vec}") def _calculate_center_of_mass(self, topology: Topology) -> NDArray: """Calculate the center of mass of a topology. Args: topology: Topology to analyze. Returns: Center of mass as array with units. """ from openff.toolkit import unit all_coords = [] all_masses = [] for molecule in topology.molecules: if molecule.conformers: coords = molecule.conformers[0].m_as(unit.angstrom) all_coords.append(coords) for atom in molecule.atoms: all_masses.append(atom.mass.magnitude) if not all_coords: return np.array([0.0, 0.0, 0.0]) * unit.angstrom positions = np.concatenate(all_coords, axis=0) masses = np.array(all_masses) com = np.average(positions, axis=0, weights=masses) * unit.angstrom return com def _calculate_topology_mass(self, topology: Topology) -> Quantity: """Calculate the total mass of a topology. Args: topology: Topology to analyze. Returns: Total mass with units. """ total_mass = sum(sum(atom.mass for atom in mol.atoms) for mol in topology.molecules) return total_mass def _set_solvent_residue_names( self, topology: Topology, composition: SolventComposition ) -> None: """Set residue names for solvent molecules. Args: topology: Solvated topology. composition: Solvent composition info. """ # Build SMILES to residue name mapping smiles_to_residue = { "[H][O][H]": "HOH", "[Na+]": "Na+", "[Cl-]": "Cl-", } # Add co-solvents for cs in composition.co_solvents: if cs.molecule: smiles_to_residue[cs.molecule.to_smiles()] = cs.residue_name # Apply to topology for mol in topology.molecules: mol_smiles = mol.to_smiles() if mol_smiles in smiles_to_residue: residue_name = smiles_to_residue[mol_smiles] for atom in mol.atoms: atom.metadata["residue_name"] = residue_name
[docs] def validate(self) -> bool: """Validate the solvated topology. Returns: True if validation passes. Raises: RuntimeError: If no topology has been solvated. ValueError: If validation fails. """ if self._solvated_topology is None: raise RuntimeError("No solvated topology. Call solvate() first.") if self._solvated_topology.n_atoms == 0: raise ValueError("Solvated topology contains no atoms") if self._box_vectors is None: raise ValueError("No box vectors defined") LOGGER.info("Solvated topology validation passed") return True