Source code for reforge.mdsystem.mmmd

"""
Module mmmd
===========

This module defines classes and functions to set up and run coarse-grained (CG)
molecular dynamics simulations using OpenMM and associated tools. It includes
the system preparation, file management, and MD run routines.

Classes
-------
MmSystem
    Sets up and analyzes protein-nucleotide-lipid systems for MD simulations.
MdRun
    Inherits from MmSystem to perform an MD simulation run, including energy
    minimization, equilibration, and production.
    
Helper Functions
----------------
sort_uld(alist)
    Sorts characters in a list so that uppercase letters come first, then lowercase, 
    followed by digits.
"""

import os
import sys
import importlib.resources
import shutil
import openmm as mm
from openmm import app
from openmm.unit import nanometer, molar
from reforge import pdbtools


################################################################################
# CG system class
################################################################################


[docs]
class MmSystem:
    """Set up and analyze protein–nucleotide–lipid systems for CG MD simulation.

    This class initializes file paths and directories required for running CG
    molecular dynamics simulations.

    Parameters
    ----------
    sysdir : str
        Directory for the system files.
    sysname : str
        Name of the system.
    **kwargs : dict, optional
        Additional keyword arguments (currently unused).

    Attributes
    ----------
    sysname : str
        Name of the system.
    sysdir : str
        Absolute path to the system directory.
    wdir : str
        Working directory (sysdir joined with sysname).
    inpdb : str
        Path to the input PDB file.
    syspdb : str
        Path to the system PDB file.
    sysxml : str
        Path to the system XML file.
    mdcpdb : str
        Path to the MD configuration PDB file.
    trjpdb : str
        Path to the trajectory PDB file.
    prodir : str
        Directory for protein files.
    nucdir : str
        Directory for nucleotide files.
    iondir : str
        Directory for ion files.
    ionpdb : str
        Path to the ion PDB file.
    topdir : str
        Directory for topology files.
    mapdir : str
        Directory for mapping files.
    mdpdir : str
        Directory for MD parameter files.
    cgdir : str
        Directory for CG PDB files.
    mddir : str
        Directory for MD run files.
    datdir : str
        Directory for data files.
    pngdir : str
        Directory for PNG figures.
    _chains : list
        Cached list of chain identifiers.
    _mdruns : list
        Cached list of MD run directories.
    """

    MDATDIR = importlib.resources.files("reforge") / "martini" / "data"
    MITPDIR = importlib.resources.files("reforge") / "martini" / "itp"
    NUC_RESNAMES = [
        "A",
        "C",
        "G",
        "U",
        "RA3",
        "RA5",
        "RC3",
        "RC5",
        "RG3",
        "RG5",
        "RU3",
        "RU5",
    ]

    def __init__(self, sysdir, sysname, **kwargs):
        """Initialize the MD system with required directories and file paths.

        Parameters
        ----------
        sysdir : str
            Directory for the system files.
        sysname : str
            Name of the system.
        **kwargs : dict, optional
            Additional keyword arguments.
            
        Notes
        -----
        Sets up paths to various files required for CG MD simulation.
        """
        self.sysname = sysname
        self.sysdir = os.path.abspath(sysdir)
        self.wdir = os.path.join(self.sysdir, sysname)
        self.inpdb = os.path.join(self.wdir, "inpdb.pdb")
        self.syspdb = os.path.join(self.wdir, "system.pdb")
        self.sysxml = os.path.join(self.wdir, "system.xml")
        self.mdcpdb = os.path.join(self.wdir, "mdc.pdb")
        self.trjpdb = os.path.join(self.wdir, "traj.pdb")
        self.prodir = os.path.join(self.wdir, "proteins")
        self.nucdir = os.path.join(self.wdir, "nucleotides")
        self.iondir = os.path.join(self.wdir, "ions")
        self.ionpdb = os.path.join(self.iondir, "ions.pdb")
        self.topdir = os.path.join(self.wdir, "topol")
        self.mapdir = os.path.join(self.wdir, "map")
        self.mdpdir = os.path.join(self.wdir, "mdp")
        self.cgdir = os.path.join(self.wdir, "cgpdb")
        self.mddir = os.path.join(self.wdir, "mdruns")
        self.datdir = os.path.join(self.wdir, "data")
        self.pngdir = os.path.join(self.wdir, "png")
        self._chains = []
        self._mdruns = []

    @property
    def chains(self):
        """Retrieve the chain identifiers from the system PDB file.

        Returns
        -------
        list
            List of chain identifiers.
            
        Notes
        -----
        If chain IDs have already been extracted, returns the cached list.
        Otherwise, parses the system PDB file.
        """
        if self._chains:
            return self._chains
        chain_names = set()
        with open(self.syspdb, "r", encoding="utf-8") as file:
            for line in file:
                if line.startswith(("ATOM", "HETATM")):
                    chain_id = line[21].strip()  # Chain ID is at column 22 (index 21)
                    if chain_id:
                        chain_names.add(chain_id)
        self._chains = sort_uld(chain_names)
        return self._chains

    @chains.setter
    def chains(self, chains):
        self._chains = chains

    @property
    def mdruns(self):
        """Retrieve the list of MD run directories.

        Returns
        -------
        list
            List of MD run directory names.
            
        Notes
        -----
        If the list is already cached, returns it. Otherwise, looks up the directories
        in the MD runs folder.
        """
        if self._mdruns:
            return self._mdruns
        if not os.path.isdir(self.mddir):
            return self._mdruns
        for adir in sorted(os.listdir(self.mddir)):
            dir_path = os.path.join(self.mddir, adir)
            if os.path.isdir(dir_path):
                self._mdruns.append(adir)
        return self._mdruns

    @mdruns.setter
    def mdruns(self, mdruns):
        self._mdruns = mdruns


[docs]
    def prepare_files(self):
        """Prepare simulation directories and copy necessary input files.

        Notes
        -----
        Creates directories (proteins, nucleotides, topology, mapping, MD parameters,
        CG PDB, MD runs, data, PNG) and copies files from source directories.
        """
        print("Preparing files and directories", file=sys.stderr)
        os.makedirs(self.prodir, exist_ok=True)
        os.makedirs(self.nucdir, exist_ok=True)
        os.makedirs(self.topdir, exist_ok=True)
        os.makedirs(self.mapdir, exist_ok=True)
        os.makedirs(self.mdpdir, exist_ok=True)
        os.makedirs(self.cgdir, exist_ok=True)
        os.makedirs(self.wdir, exist_ok=True)  # In case working directory is not present
        os.makedirs(self.datdir, exist_ok=True)
        os.makedirs(self.pngdir, exist_ok=True)
        # Create additional directory (e.g. grodir) if needed
        grodir = os.path.join(self.wdir, "gro")
        os.makedirs(grodir, exist_ok=True)
        for file in os.listdir(self.MDATDIR):
            if file.endswith(".mdp"):
                fpath = os.path.join(self.MDATDIR, file)
                outpath = os.path.join(self.mdpdir, file)
                shutil.copy(fpath, outpath)
        shutil.copy(os.path.join(self.MDATDIR, "water.gro"), self.wdir)
        for file in os.listdir(self.MITPDIR):
            if file.endswith(".itp"):
                fpath = os.path.join(self.MITPDIR, file)
                outpath = os.path.join(self.topdir, file)
                shutil.copy(fpath, outpath)



[docs]
    def clean_pdb(self, pdb_file, **kwargs):
        """Clean the starting PDB file using PDBfixer by OpenMM.

        Parameters
        ----------
        pdb_file : str
            Path to the input PDB file.
        **kwargs : dict, optional
            Additional keyword arguments for the cleaning routine.
        """
        print("Cleaning the PDB", file=sys.stderr)
        pdbtools.clean_pdb(pdb_file, self.inpdb, **kwargs)



[docs]
    @staticmethod
    def forcefield(force_field="amber14-all.xml", water_model="amber14/tip3p.xml", **kwargs):
        """Create and return an OpenMM ForceField object.

        Parameters
        ----------
        force_field : str, optional
            Force field file or identifier (default: 'amber14-all.xml').
        water_model : str, optional
            Water model file or identifier (default: 'amber14/tip3p.xml').
        **kwargs : dict, optional
            Additional keyword arguments for the ForceField constructor.

        Returns
        -------
        openmm.app.ForceField
            The constructed ForceField object.
        """
        forcefield = app.ForceField(force_field, water_model, **kwargs)
        return forcefield



[docs]
    @staticmethod
    def modeller(inpdb, forcefield, **kwargs):
        """Generate a modeller object with added solvent.

        Parameters
        ----------
        inpdb : str
            Path to the input PDB file.
        forcefield : openmm.app.ForceField
            The force field object to be used.
        **kwargs : dict, optional
            Additional keyword arguments for solvent addition. Default values include:
                model : 'tip3p'
                boxShape : 'dodecahedron'
                padding : 1.0 * nanometer
                positiveIon : 'Na+'
                negativeIon : 'Cl-'
                ionicStrength : 0.1 * molar

        Returns
        -------
        openmm.app.Modeller
            The modeller object with solvent added.
        """
        kwargs.setdefault("model", "tip3p")
        kwargs.setdefault("boxShape", "dodecahedron")
        kwargs.setdefault("padding", 1.0 * nanometer)
        kwargs.setdefault("positiveIon", "Na+")
        kwargs.setdefault("negativeIon", "Cl-")
        kwargs.setdefault("ionicStrength", 0.1 * molar)
        pdb_file = app.PDBFile(inpdb)
        modeller_obj = app.Modeller(pdb_file.topology, pdb_file.positions)
        modeller_obj.addSolvent(forcefield, **kwargs)
        return modeller_obj



[docs]
    def model(self, forcefield, modeller_obj, barostat=None, thermostat=None, **kwargs):
        """Create a simulation model using the specified force field and modeller.

        Parameters
        ----------
        forcefield : openmm.app.ForceField
            The force field object.
        modeller_obj : openmm.app.Modeller
            The modeller object with the prepared topology.
        barostat : openmm.Force, optional
            Barostat force to add (default: None).
        thermostat : openmm.Force, optional
            Thermostat force to add (default: None).
        **kwargs : dict, optional
            Additional keyword arguments for creating the system. Defaults include:
                nonbondedMethod : app.PME
                nonbondedCutoff : 1.0 * nanometer
                constraints : app.HBonds

        Returns
        -------
        openmm.System
            The simulation system created by the force field.
        """
        kwargs.setdefault("nonbondedMethod", app.PME)
        kwargs.setdefault("nonbondedCutoff", 1.0 * nanometer)
        kwargs.setdefault("constraints", app.HBonds)
        model_obj = forcefield.createSystem(modeller_obj.topology, **kwargs)
        if barostat:
            model_obj.addForce(barostat)
        if thermostat:
            model_obj.addForce(thermostat)
        with open(self.syspdb, "w", encoding="utf-8") as file:
            app.PDBFile.writeFile(modeller_obj.topology, modeller_obj.positions, file, keepIds=True)
        with open(self.sysxml, "w", encoding="utf-8") as file:
            file.write(mm.XmlSerializer.serialize(model_obj))
        return model_obj




################################################################################
# MDRun class
################################################################################


[docs]
class MdRun(MmSystem):
    """Run molecular dynamics (MD) simulation using specified input files.

    This class extends MmSystem to perform an MD simulation run including
    energy minimization, equilibration, and production.

    Parameters
    ----------
    sysdir : str
        Directory for the system files.
    sysname : str
        Name of the system.
    runname : str
        Name of the MD run.

    Attributes
    ----------
    rundir : str
        Directory for the run files.
    rmsdir : str
        Directory for RMS analysis.
    covdir : str
        Directory for covariance analysis.
    lrtdir : str
        Directory for LRT analysis.
    cludir : str
        Directory for clustering.
    pngdir : str
        Directory for output figures.
    """

    def __init__(self, sysdir, sysname, runname):
        """Initialize the MD run with required directories and files.

        Parameters
        ----------
        sysdir : str
            Directory for the system files.
        sysname : str
            Name of the system.
        runname : str
            Name of the MD run.
        """
        super().__init__(sysdir, sysname)
        self.runname = runname
        self.rundir = os.path.join(self.mddir, self.runname)
        self.rmsdir = os.path.join(self.rundir, "rms_analysis")
        self.covdir = os.path.join(self.rundir, "cov_analysis")
        self.lrtdir = os.path.join(self.rundir, "lrt_analysis")
        self.cludir = os.path.join(self.rundir, "clusters")
        self.pngdir = os.path.join(self.rundir, "png")


[docs]
    def prepare_files(self):
        """Create directories required for the MD run.

        Notes
        -----
        Creates directories for run outputs including RMS, covariance, LRT,
        clustering, and figures.
        """
        os.makedirs(self.rundir, exist_ok=True)
        os.makedirs(self.rmsdir, exist_ok=True)
        os.makedirs(self.covdir, exist_ok=True)
        os.makedirs(self.lrtdir, exist_ok=True)
        os.makedirs(self.cludir, exist_ok=True)
        os.makedirs(self.pngdir, exist_ok=True)



[docs]
    def build_modeller(self):
        """Generate a modeller object from the system PDB file.

        Returns
        -------
        openmm.app.Modeller
            The modeller object initialized with the system topology and positions.
        """
        pdb_file = app.PDBFile(self.syspdb)
        modeller_obj = app.Modeller(pdb_file.topology, pdb_file.positions)
        return modeller_obj



[docs]
    def simulation(self, modeller_obj, integrator):
        """Initialize and return a simulation object for the MD run.

        Parameters
        ----------
        modeller_obj : openmm.app.Modeller
            The modeller object with prepared topology and positions.
        integrator : openmm.Integrator
            The integrator for the simulation.

        Returns
        -------
        openmm.app.Simulation
            The initialized simulation object.
        """
        simulation_obj = app.Simulation(modeller_obj.topology, self.sysxml, integrator)
        simulation_obj.context.setPositions(modeller_obj.positions)
        return simulation_obj



[docs]
    def save_state(self, simulation_obj, file_prefix="sim"):
        """Save the current simulation state to XML and PDB files.

        Parameters
        ----------
        simulation_obj : openmm.app.Simulation
            The simulation object.
        file_prefix : str, optional
            Prefix for the state files (default: 'sim').

        Notes
        -----
        Saves the simulation state as an XML file and writes the current positions to a PDB file.
        """
        pdb_file = os.path.join(self.rundir, file_prefix + ".pdb")
        xml_file = os.path.join(self.rundir, file_prefix + ".xml")
        simulation_obj.saveState(xml_file)
        state = simulation_obj.context.getState(getPositions=True)
        positions = state.getPositions()
        with open(pdb_file, "w", encoding="utf-8") as file:
            app.PDBFile.writeFile(simulation_obj.topology, positions, file, keepIds=True)



[docs]
    def em(self, simulation_obj, tolerance=100, max_iterations=1000):
        """Perform energy minimization for the simulation.

        Parameters
        ----------
        simulation_obj : openmm.app.Simulation
            The simulation object.
        tolerance : float, optional
            Tolerance for energy minimization (default: 100).
        max_iterations : int, optional
            Maximum number of iterations (default: 1000).

        Notes
        -----
        Minimizes the energy, saves the minimized state, and logs progress.
        """
        print("Minimizing energy...", file=sys.stderr)
        log_file = os.path.join(self.rundir, "em.log")
        reporter = app.StateDataReporter(
            log_file, 100, step=True, potentialEnergy=True, temperature=True
        )
        simulation_obj.reporters.append(reporter)
        simulation_obj.minimizeEnergy(tolerance, max_iterations)
        self.save_state(simulation_obj, "em")
        print("Minimization complete.", file=sys.stderr)



[docs]
    def eq(self, simulation_obj, nsteps=10000, nlog=10000, **kwargs):
        """Run equilibration simulation.

        Parameters
        ----------
        simulation_obj : openmm.app.Simulation
            The simulation object.
        nsteps : int, optional
            Number of steps for equilibration (default: 10000).
        nlog : int, optional
            Logging frequency (default: 10000).
        **kwargs : dict, optional
            Additional keyword arguments for the StateDataReporter.

        Notes
        -----
        Loads the minimized state, runs equilibration, and saves the equilibrated state.
        """
        print("Starting equilibration...")
        kwargs.setdefault("step", True)
        kwargs.setdefault("potentialEnergy", True)
        kwargs.setdefault("temperature", True)
        kwargs.setdefault("density", True)
        em_xml = os.path.join(self.rundir, "em.xml")
        log_file = os.path.join(self.rundir, "eq.log")
        reporter = app.StateDataReporter(log_file, nlog, **kwargs)
        simulation_obj.loadState(em_xml)
        simulation_obj.reporters.append(reporter)
        simulation_obj.step(nsteps)
        self.save_state(simulation_obj, "eq")
        print("Equilibration complete.")



[docs]
    def md(self, simulation_obj, nsteps=100000, nout=1000, nlog=10000, nchk=10000, **kwargs):
        """Run production MD simulation.

        Parameters
        ----------
        simulation_obj : openmm.app.Simulation
            The simulation object.
        nsteps : int, optional
            Number of production steps (default: 100000).
        nout : int, optional
            Frequency for writing trajectory frames (default: 1000).
        nlog : int, optional
            Logging frequency (default: 10000).
        nchk : int, optional
            Checkpoint frequency (default: 10000).
        **kwargs : dict, optional
            Additional keyword arguments for the StateDataReporter.

        Notes
        -----
        Loads the equilibrated state, runs production, and saves the final simulation state.
        """
        print("Production run...")
        kwargs.setdefault("step", True)
        kwargs.setdefault("time", True)
        kwargs.setdefault("potentialEnergy", True)
        kwargs.setdefault("temperature", True)
        kwargs.setdefault("density", False)
        eq_xml = os.path.join(self.rundir, "eq.xml")
        trj_file = os.path.join(self.rundir, "md.dcd")
        log_file = os.path.join(self.rundir, "md.log")
        xml_file = os.path.join(self.rundir, "md.xml")
        pdb_file = os.path.join(self.rundir, "md.pdb")
        trj_reporter = app.DCDReporter(trj_file, nout, append=False)
        log_reporter = app.StateDataReporter(log_file, nlog, **kwargs)
        xml_reporter = app.CheckpointReporter(xml_file, nchk, writeState=True)
        # If PDBReporter is not used, remove the assignment to avoid unused variable warnings.
        reporters = [trj_reporter, log_reporter, xml_reporter]
        simulation_obj.loadState(eq_xml)
        simulation_obj.reporters.extend(reporters)
        simulation_obj.step(nsteps)
        self.save_state(simulation_obj, "md")
        print("Production complete.")




################################################################################
# Helper functions
################################################################################


[docs]
def sort_uld(alist):
    """Sort characters in a list in a specific order.

    Parameters
    ----------
    alist : list of str
        List of characters to sort.

    Returns
    -------
    list of str
        Sorted list with uppercase letters first, then lowercase letters, and digits last.

    Notes
    -----
    This function is used to help organize GROMACS multichain files.
    """
    slist = sorted(alist, key=lambda x: (x.isdigit(), x.islower(), x.isupper(), x))
    return slist