Source code for reforge.mdsystem.mmmd

"""
Module mmmd
===========

This module defines classes and functions to set up and run coarse-grained (CG)
molecular dynamics simulations using OpenMM and associated tools. It includes
the system preparation, file management, and MD run routines.

Classes
-------
MmSystem
    Sets up and analyzes protein-nucleotide-lipid systems for MD simulations.
MdRun
    Inherits from MmSystem to perform an MD simulation run, including energy
    minimization, equilibration, and production.
    
Helper Functions
----------------
sort_uld(alist)
    Sorts characters in a list so that uppercase letters come first, then lowercase, 
    followed by digits.
"""

import os
import sys
import importlib.resources
import shutil
import openmm as mm
from openmm import app
from openmm.unit import nanometer, molar
from reforge import pdbtools


################################################################################
# CG system class
################################################################################

[docs] class MmSystem: """Set up and analyze protein–nucleotide–lipid systems for CG MD simulation. This class initializes file paths and directories required for running CG molecular dynamics simulations. Parameters ---------- sysdir : str Directory for the system files. sysname : str Name of the system. **kwargs : dict, optional Additional keyword arguments (currently unused). Attributes ---------- sysname : str Name of the system. sysdir : str Absolute path to the system directory. wdir : str Working directory (sysdir joined with sysname). inpdb : str Path to the input PDB file. syspdb : str Path to the system PDB file. sysxml : str Path to the system XML file. mdcpdb : str Path to the MD configuration PDB file. trjpdb : str Path to the trajectory PDB file. prodir : str Directory for protein files. nucdir : str Directory for nucleotide files. iondir : str Directory for ion files. ionpdb : str Path to the ion PDB file. topdir : str Directory for topology files. mapdir : str Directory for mapping files. mdpdir : str Directory for MD parameter files. cgdir : str Directory for CG PDB files. mddir : str Directory for MD run files. datdir : str Directory for data files. pngdir : str Directory for PNG figures. _chains : list Cached list of chain identifiers. _mdruns : list Cached list of MD run directories. """ MDATDIR = importlib.resources.files("reforge") / "martini" / "data" MITPDIR = importlib.resources.files("reforge") / "martini" / "itp" NUC_RESNAMES = [ "A", "C", "G", "U", "RA3", "RA5", "RC3", "RC5", "RG3", "RG5", "RU3", "RU5", ] def __init__(self, sysdir, sysname, **kwargs): """Initialize the MD system with required directories and file paths. Parameters ---------- sysdir : str Directory for the system files. sysname : str Name of the system. **kwargs : dict, optional Additional keyword arguments. Notes ----- Sets up paths to various files required for CG MD simulation. """ self.sysname = sysname self.sysdir = os.path.abspath(sysdir) self.wdir = os.path.join(self.sysdir, sysname) self.inpdb = os.path.join(self.wdir, "inpdb.pdb") self.syspdb = os.path.join(self.wdir, "system.pdb") self.sysxml = os.path.join(self.wdir, "system.xml") self.mdcpdb = os.path.join(self.wdir, "mdc.pdb") self.trjpdb = os.path.join(self.wdir, "traj.pdb") self.prodir = os.path.join(self.wdir, "proteins") self.nucdir = os.path.join(self.wdir, "nucleotides") self.iondir = os.path.join(self.wdir, "ions") self.ionpdb = os.path.join(self.iondir, "ions.pdb") self.topdir = os.path.join(self.wdir, "topol") self.mapdir = os.path.join(self.wdir, "map") self.mdpdir = os.path.join(self.wdir, "mdp") self.cgdir = os.path.join(self.wdir, "cgpdb") self.mddir = os.path.join(self.wdir, "mdruns") self.datdir = os.path.join(self.wdir, "data") self.pngdir = os.path.join(self.wdir, "png") self._chains = [] self._mdruns = [] @property def chains(self): """Retrieve the chain identifiers from the system PDB file. Returns ------- list List of chain identifiers. Notes ----- If chain IDs have already been extracted, returns the cached list. Otherwise, parses the system PDB file. """ if self._chains: return self._chains chain_names = set() with open(self.syspdb, "r", encoding="utf-8") as file: for line in file: if line.startswith(("ATOM", "HETATM")): chain_id = line[21].strip() # Chain ID is at column 22 (index 21) if chain_id: chain_names.add(chain_id) self._chains = sort_uld(chain_names) return self._chains @chains.setter def chains(self, chains): self._chains = chains @property def mdruns(self): """Retrieve the list of MD run directories. Returns ------- list List of MD run directory names. Notes ----- If the list is already cached, returns it. Otherwise, looks up the directories in the MD runs folder. """ if self._mdruns: return self._mdruns if not os.path.isdir(self.mddir): return self._mdruns for adir in sorted(os.listdir(self.mddir)): dir_path = os.path.join(self.mddir, adir) if os.path.isdir(dir_path): self._mdruns.append(adir) return self._mdruns @mdruns.setter def mdruns(self, mdruns): self._mdruns = mdruns
[docs] def prepare_files(self): """Prepare simulation directories and copy necessary input files. Notes ----- Creates directories (proteins, nucleotides, topology, mapping, MD parameters, CG PDB, MD runs, data, PNG) and copies files from source directories. """ print("Preparing files and directories", file=sys.stderr) os.makedirs(self.prodir, exist_ok=True) os.makedirs(self.nucdir, exist_ok=True) os.makedirs(self.topdir, exist_ok=True) os.makedirs(self.mapdir, exist_ok=True) os.makedirs(self.mdpdir, exist_ok=True) os.makedirs(self.cgdir, exist_ok=True) os.makedirs(self.wdir, exist_ok=True) # In case working directory is not present os.makedirs(self.datdir, exist_ok=True) os.makedirs(self.pngdir, exist_ok=True) # Create additional directory (e.g. grodir) if needed grodir = os.path.join(self.wdir, "gro") os.makedirs(grodir, exist_ok=True) for file in os.listdir(self.MDATDIR): if file.endswith(".mdp"): fpath = os.path.join(self.MDATDIR, file) outpath = os.path.join(self.mdpdir, file) shutil.copy(fpath, outpath) shutil.copy(os.path.join(self.MDATDIR, "water.gro"), self.wdir) for file in os.listdir(self.MITPDIR): if file.endswith(".itp"): fpath = os.path.join(self.MITPDIR, file) outpath = os.path.join(self.topdir, file) shutil.copy(fpath, outpath)
[docs] def clean_pdb(self, pdb_file, **kwargs): """Clean the starting PDB file using PDBfixer by OpenMM. Parameters ---------- pdb_file : str Path to the input PDB file. **kwargs : dict, optional Additional keyword arguments for the cleaning routine. """ print("Cleaning the PDB", file=sys.stderr) pdbtools.clean_pdb(pdb_file, self.inpdb, **kwargs)
[docs] @staticmethod def forcefield(force_field="amber14-all.xml", water_model="amber14/tip3p.xml", **kwargs): """Create and return an OpenMM ForceField object. Parameters ---------- force_field : str, optional Force field file or identifier (default: 'amber14-all.xml'). water_model : str, optional Water model file or identifier (default: 'amber14/tip3p.xml'). **kwargs : dict, optional Additional keyword arguments for the ForceField constructor. Returns ------- openmm.app.ForceField The constructed ForceField object. """ forcefield = app.ForceField(force_field, water_model, **kwargs) return forcefield
[docs] @staticmethod def modeller(inpdb, forcefield, **kwargs): """Generate a modeller object with added solvent. Parameters ---------- inpdb : str Path to the input PDB file. forcefield : openmm.app.ForceField The force field object to be used. **kwargs : dict, optional Additional keyword arguments for solvent addition. Default values include: model : 'tip3p' boxShape : 'dodecahedron' padding : 1.0 * nanometer positiveIon : 'Na+' negativeIon : 'Cl-' ionicStrength : 0.1 * molar Returns ------- openmm.app.Modeller The modeller object with solvent added. """ kwargs.setdefault("model", "tip3p") kwargs.setdefault("boxShape", "dodecahedron") kwargs.setdefault("padding", 1.0 * nanometer) kwargs.setdefault("positiveIon", "Na+") kwargs.setdefault("negativeIon", "Cl-") kwargs.setdefault("ionicStrength", 0.1 * molar) pdb_file = app.PDBFile(inpdb) modeller_obj = app.Modeller(pdb_file.topology, pdb_file.positions) modeller_obj.addSolvent(forcefield, **kwargs) return modeller_obj
[docs] def model(self, forcefield, modeller_obj, barostat=None, thermostat=None, **kwargs): """Create a simulation model using the specified force field and modeller. Parameters ---------- forcefield : openmm.app.ForceField The force field object. modeller_obj : openmm.app.Modeller The modeller object with the prepared topology. barostat : openmm.Force, optional Barostat force to add (default: None). thermostat : openmm.Force, optional Thermostat force to add (default: None). **kwargs : dict, optional Additional keyword arguments for creating the system. Defaults include: nonbondedMethod : app.PME nonbondedCutoff : 1.0 * nanometer constraints : app.HBonds Returns ------- openmm.System The simulation system created by the force field. """ kwargs.setdefault("nonbondedMethod", app.PME) kwargs.setdefault("nonbondedCutoff", 1.0 * nanometer) kwargs.setdefault("constraints", app.HBonds) model_obj = forcefield.createSystem(modeller_obj.topology, **kwargs) if barostat: model_obj.addForce(barostat) if thermostat: model_obj.addForce(thermostat) with open(self.syspdb, "w", encoding="utf-8") as file: app.PDBFile.writeFile(modeller_obj.topology, modeller_obj.positions, file, keepIds=True) with open(self.sysxml, "w", encoding="utf-8") as file: file.write(mm.XmlSerializer.serialize(model_obj)) return model_obj
################################################################################ # MDRun class ################################################################################
[docs] class MdRun(MmSystem): """Run molecular dynamics (MD) simulation using specified input files. This class extends MmSystem to perform an MD simulation run including energy minimization, equilibration, and production. Parameters ---------- sysdir : str Directory for the system files. sysname : str Name of the system. runname : str Name of the MD run. Attributes ---------- rundir : str Directory for the run files. rmsdir : str Directory for RMS analysis. covdir : str Directory for covariance analysis. lrtdir : str Directory for LRT analysis. cludir : str Directory for clustering. pngdir : str Directory for output figures. """ def __init__(self, sysdir, sysname, runname): """Initialize the MD run with required directories and files. Parameters ---------- sysdir : str Directory for the system files. sysname : str Name of the system. runname : str Name of the MD run. """ super().__init__(sysdir, sysname) self.runname = runname self.rundir = os.path.join(self.mddir, self.runname) self.rmsdir = os.path.join(self.rundir, "rms_analysis") self.covdir = os.path.join(self.rundir, "cov_analysis") self.lrtdir = os.path.join(self.rundir, "lrt_analysis") self.cludir = os.path.join(self.rundir, "clusters") self.pngdir = os.path.join(self.rundir, "png")
[docs] def prepare_files(self): """Create directories required for the MD run. Notes ----- Creates directories for run outputs including RMS, covariance, LRT, clustering, and figures. """ os.makedirs(self.rundir, exist_ok=True) os.makedirs(self.rmsdir, exist_ok=True) os.makedirs(self.covdir, exist_ok=True) os.makedirs(self.lrtdir, exist_ok=True) os.makedirs(self.cludir, exist_ok=True) os.makedirs(self.pngdir, exist_ok=True)
[docs] def build_modeller(self): """Generate a modeller object from the system PDB file. Returns ------- openmm.app.Modeller The modeller object initialized with the system topology and positions. """ pdb_file = app.PDBFile(self.syspdb) modeller_obj = app.Modeller(pdb_file.topology, pdb_file.positions) return modeller_obj
[docs] def simulation(self, modeller_obj, integrator): """Initialize and return a simulation object for the MD run. Parameters ---------- modeller_obj : openmm.app.Modeller The modeller object with prepared topology and positions. integrator : openmm.Integrator The integrator for the simulation. Returns ------- openmm.app.Simulation The initialized simulation object. """ simulation_obj = app.Simulation(modeller_obj.topology, self.sysxml, integrator) simulation_obj.context.setPositions(modeller_obj.positions) return simulation_obj
[docs] def save_state(self, simulation_obj, file_prefix="sim"): """Save the current simulation state to XML and PDB files. Parameters ---------- simulation_obj : openmm.app.Simulation The simulation object. file_prefix : str, optional Prefix for the state files (default: 'sim'). Notes ----- Saves the simulation state as an XML file and writes the current positions to a PDB file. """ pdb_file = os.path.join(self.rundir, file_prefix + ".pdb") xml_file = os.path.join(self.rundir, file_prefix + ".xml") simulation_obj.saveState(xml_file) state = simulation_obj.context.getState(getPositions=True) positions = state.getPositions() with open(pdb_file, "w", encoding="utf-8") as file: app.PDBFile.writeFile(simulation_obj.topology, positions, file, keepIds=True)
[docs] def em(self, simulation_obj, tolerance=100, max_iterations=1000): """Perform energy minimization for the simulation. Parameters ---------- simulation_obj : openmm.app.Simulation The simulation object. tolerance : float, optional Tolerance for energy minimization (default: 100). max_iterations : int, optional Maximum number of iterations (default: 1000). Notes ----- Minimizes the energy, saves the minimized state, and logs progress. """ print("Minimizing energy...", file=sys.stderr) log_file = os.path.join(self.rundir, "em.log") reporter = app.StateDataReporter( log_file, 100, step=True, potentialEnergy=True, temperature=True ) simulation_obj.reporters.append(reporter) simulation_obj.minimizeEnergy(tolerance, max_iterations) self.save_state(simulation_obj, "em") print("Minimization complete.", file=sys.stderr)
[docs] def eq(self, simulation_obj, nsteps=10000, nlog=10000, **kwargs): """Run equilibration simulation. Parameters ---------- simulation_obj : openmm.app.Simulation The simulation object. nsteps : int, optional Number of steps for equilibration (default: 10000). nlog : int, optional Logging frequency (default: 10000). **kwargs : dict, optional Additional keyword arguments for the StateDataReporter. Notes ----- Loads the minimized state, runs equilibration, and saves the equilibrated state. """ print("Starting equilibration...") kwargs.setdefault("step", True) kwargs.setdefault("potentialEnergy", True) kwargs.setdefault("temperature", True) kwargs.setdefault("density", True) em_xml = os.path.join(self.rundir, "em.xml") log_file = os.path.join(self.rundir, "eq.log") reporter = app.StateDataReporter(log_file, nlog, **kwargs) simulation_obj.loadState(em_xml) simulation_obj.reporters.append(reporter) simulation_obj.step(nsteps) self.save_state(simulation_obj, "eq") print("Equilibration complete.")
[docs] def md(self, simulation_obj, nsteps=100000, nout=1000, nlog=10000, nchk=10000, **kwargs): """Run production MD simulation. Parameters ---------- simulation_obj : openmm.app.Simulation The simulation object. nsteps : int, optional Number of production steps (default: 100000). nout : int, optional Frequency for writing trajectory frames (default: 1000). nlog : int, optional Logging frequency (default: 10000). nchk : int, optional Checkpoint frequency (default: 10000). **kwargs : dict, optional Additional keyword arguments for the StateDataReporter. Notes ----- Loads the equilibrated state, runs production, and saves the final simulation state. """ print("Production run...") kwargs.setdefault("step", True) kwargs.setdefault("time", True) kwargs.setdefault("potentialEnergy", True) kwargs.setdefault("temperature", True) kwargs.setdefault("density", False) eq_xml = os.path.join(self.rundir, "eq.xml") trj_file = os.path.join(self.rundir, "md.dcd") log_file = os.path.join(self.rundir, "md.log") xml_file = os.path.join(self.rundir, "md.xml") pdb_file = os.path.join(self.rundir, "md.pdb") trj_reporter = app.DCDReporter(trj_file, nout, append=False) log_reporter = app.StateDataReporter(log_file, nlog, **kwargs) xml_reporter = app.CheckpointReporter(xml_file, nchk, writeState=True) # If PDBReporter is not used, remove the assignment to avoid unused variable warnings. reporters = [trj_reporter, log_reporter, xml_reporter] simulation_obj.loadState(eq_xml) simulation_obj.reporters.extend(reporters) simulation_obj.step(nsteps) self.save_state(simulation_obj, "md") print("Production complete.")
################################################################################ # Helper functions ################################################################################
[docs] def sort_uld(alist): """Sort characters in a list in a specific order. Parameters ---------- alist : list of str List of characters to sort. Returns ------- list of str Sorted list with uppercase letters first, then lowercase letters, and digits last. Notes ----- This function is used to help organize GROMACS multichain files. """ slist = sorted(alist, key=lambda x: (x.isdigit(), x.islower(), x.isupper(), x)) return slist