"""File: gmxmd.py
Description:
This module provides classes and functions for setting up, running, and
analyzing molecular dynamics (MD) simulations using GROMACS. The main
classes include:
- GmxSystem: Provides methods to prepare simulation files, process PDB
files, run GROMACS commands, and perform various analyses on MD data.
- MDRun: A subclass of GmxSystem dedicated to executing MD simulations and
performing post-processing tasks (e.g., RMSF, RMSD, covariance analysis).
Usage:
Import this module and instantiate the GmxSystem or MDRun classes to set up
and run your MD simulations.
Requirements:
- Python 3.x
- MDAnalysis
- NumPy
- Pandas
- GROMACS (with CLI tools such as gmx_mpi, gmx, etc.)
- The reforge package and its dependencies
Author: DY
Date: 2025-02-27
"""
from pathlib import Path
import importlib.resources
import shutil
import subprocess as sp
from reforge import cli, pdbtools, io
from reforge.pdbtools import AtomList
from reforge.utils import cd, clean_dir, logger
from reforge.mdsystem.mdsystem import MDSystem, MDRun
################################################################################
# GMX system class
################################################################################
[docs]
class GmxSystem(MDSystem):
"""Class to set up and analyze protein-nucleotide-lipid systems for MD
simulations using GROMACS.
Most attributes are paths to files and directories needed to set up
and run the MD simulation.
"""
MDATDIR = importlib.resources.files("reforge") / "martini" / "datdir"
MMDPDIR = importlib.resources.files("reforge") / "martini" / "datdir" / "mdp"
MITPDIR = importlib.resources.files("reforge") / "martini" / "itp"
def __init__(self, sysdir, sysname):
"""Initializes the MD system with required directories and file paths.
Parameters
----------
sysdir (str): Base directory for the system files.
sysname (str): Name of the MD system.
Sets up paths for various files required for coarse-grained MD simulation.
"""
super().__init__(sysdir, sysname)
self.sysgro = self.root / "system.gro"
self.systop = self.root / "system.top"
self.sysndx = self.root / "system.ndx"
self.mdpdir = self.root / "mdp"
# Define gro directory (used in make_gro_file)
self.grodir = self.root / "gro"
[docs]
def gmx(self, command="-h", clinput=None, clean_wdir=True, **kwargs):
"""Executes a GROMACS command from the system's root directory.
Parameters
----------
command (str): The GROMACS command to run (default: "-h").
clinput (str, optional): Input to pass to the command's stdin.
clean_wdir (bool, optional): If True, cleans the working directory after execution.
kwargs: Additional keyword arguments to pass to gromacs.
"""
with cd(self.root):
cli.gmx(command, clinput=clinput, **kwargs)
if clean_wdir:
clean_dir()
[docs]
def prepare_files(self):
"""Prepares the simulation by creating necessary directories and copying input files.
The method:
- Creates directories for proteins, nucleotides, topologies, maps, mdp files,
coarse-grained PDBs, GRO files, MD runs, data, and PNG outputs.
- Copies .mdp files from the master MDP directory.
- Copies 'water.gro' and 'atommass.dat' from the master data directory.
- Copies .itp files from the master ITP directory to the system topology directory.
"""
logger.info("Preparing files and directories")
self.prodir.mkdir(parents=True, exist_ok=True)
self.nucdir.mkdir(parents=True, exist_ok=True)
self.topdir.mkdir(parents=True, exist_ok=True)
self.mapdir.mkdir(parents=True, exist_ok=True)
self.mdpdir.mkdir(parents=True, exist_ok=True)
self.cgdir.mkdir(parents=True, exist_ok=True)
self.datdir.mkdir(parents=True, exist_ok=True)
self.pngdir.mkdir(parents=True, exist_ok=True)
# Copy .mdp files from master MDP directory
for file in self.MMDPDIR.iterdir():
if file.name.endswith(".mdp"):
outpath = self.mdpdir / file.name
shutil.copy(file, outpath)
# Copy water.gro and atommass.dat from master data directory
shutil.copy(self.MDATDIR / "water.gro", self.root)
shutil.copy(self.MDATDIR / "atommass.dat", self.root)
# Copy .itp files from master ITP directory
for file in self.MITPDIR.iterdir():
if file.name.endswith(".itp"):
outpath = self.topdir / file.name
shutil.copy(file, outpath)
[docs]
def make_cg_structure(self, **kwargs):
"""Merges coarse-grained PDB files into a single solute PDB file."""
logger.info("Merging CG PDB files into a single solute PDB...")
with cd(self.root):
cg_pdb_files = [p.name for p in self.cgdir.iterdir()]
# cg_pdb_files = pdbtools.sort_uld(cg_pdb_files)
cg_pdb_files = sort_for_gmx(cg_pdb_files)
cg_pdb_files = [self.cgdir / fname for fname in cg_pdb_files]
all_atoms = AtomList()
for file in cg_pdb_files:
atoms = pdbtools.pdb2atomlist(file)
all_atoms.extend(atoms)
all_atoms.renumber()
all_atoms.write_pdb(self.solupdb)
[docs]
def make_box(self, **kwargs):
"""Sets up simulation box with GROMACS editconf command.
Parameters
----------
kwargs: Additional keyword arguments for the GROMACS editconf command. Defaults:
- d: Distance parameter (default: 1.0)
- bt: Box type (default: "dodecahedron").
"""
kwargs.setdefault("d", 1.0)
kwargs.setdefault("bt", "dodecahedron")
with cd(self.root):
cli.gmx("editconf", f=self.solupdb, o=self.solupdb, **kwargs)
[docs]
def make_cg_topology(self, add_resolved_ions=False, prefix="chain"):
"""Creates the system topology file by including all relevant ITP files and
defining the system and molecule sections.
Parameters
----------
add_resolved_ions (bool, optional): If True, counts and includes resolved ions.
prefix (str, optional): Prefix for ITP files to include (default: "chain").
Writes the topology file (self.systop) with include directives and molecule counts.
"""
logger.info("Writing system topology...")
itp_files = [p.name for p in self.topdir.glob(f'{prefix}*itp')]
# itp_files = pdbtools.sort_uld(itp_files)
itp_files = sort_for_gmx(itp_files)
with self.systop.open("w") as f:
# Include section
f.write('#define GO_VIRT"\n')
f.write("#define RUBBER_BANDS\n")
f.write('#include "topol/martini_v3.0.0.itp"\n')
f.write('#include "topol/martini_v3.0.0_rna.itp"\n')
f.write('#include "topol/martini_ions.itp"\n')
if any(p.name == "go_atomtypes.itp" for p in self.topdir.iterdir()):
f.write('#include "topol/go_atomtypes.itp"\n')
f.write('#include "topol/go_nbparams.itp"\n')
f.write('#include "topol/martini_v3.0.0_solvents_v1.itp"\n')
f.write('#include "topol/martini_v3.0.0_phospholipids_v1.itp"\n')
f.write('#include "topol/martini_v3.0.0_ions_v1.itp"\n')
f.write("\n")
for filename in itp_files:
f.write(f'#include "topol/{filename}"\n')
# System name and molecule count
f.write("\n[ system ]\n")
f.write(f"Martini system for {self.sysname}\n")
f.write("\n[molecules]\n")
f.write("; name\t\tnumber\n")
for filename in itp_files:
molecule_name = Path(filename).stem
f.write(f"{molecule_name}\t\t1\n")
# Add resolved ions if requested.
if add_resolved_ions:
ions = self.count_resolved_ions()
for ion, count in ions.items():
if count > 0:
f.write(f"{ion} {count}\n")
[docs]
def make_gro_file(self, d=1.25, bt="dodecahedron"):
"""Generates the final GROMACS GRO file from coarse-grained PDB files.
Parameters
----------
d (float, optional): Distance parameter for the editconf command (default: 1.25).
bt (str, optional): Box type for the editconf command (default: "dodecahedron").
Converts PDB files to GRO files, merges them, and adjusts the system box.
"""
with cd(self.root):
cg_pdb_files = pdbtools.sort_uld([p.name for p in self.cgdir.iterdir()])
for file in cg_pdb_files:
if file.endswith(".pdb"):
pdb_file = self.cgdir / file
# Replace suffix and directory component as in the original string manipulation
gro_file_str = str(pdb_file).replace(".pdb", ".gro").replace("cgpdb", "gro")
gro_file = Path(gro_file_str)
command = f"gmx_mpi editconf -f {pdb_file} -o {gro_file}"
sp.run(command.split(), check=True)
# Merge all .gro files.
gro_files = sorted([p.name for p in self.grodir.iterdir()])
total_count = 0
for filename in gro_files:
if filename.endswith(".gro"):
filepath = self.grodir / filename
with filepath.open("r") as in_f:
lines = in_f.readlines()
atom_count = int(lines[1].strip())
total_count += atom_count
with self.sysgro.open("w") as out_f:
out_f.write(f"{self.sysname} \n")
out_f.write(f" {total_count}\n")
for filename in gro_files:
if filename.endswith(".gro"):
filepath = self.grodir / filename
with filepath.open("r") as in_f:
lines = in_f.readlines()[2:-1]
for line in lines:
out_f.write(line)
out_f.write("10.00000 10.00000 10.00000\n")
command = f"gmx_mpi editconf -f {self.sysgro} -d {d} -bt {bt} -o {self.sysgro}"
sp.run(command.split(), check=True)
[docs]
def solvate(self, **kwargs):
"""Solvates the system using GROMACS solvate command.
Parameters
----------
kwargs: Additional parameters for the solvate command. Defaults:
- cp: "solute.pdb"
- cs: "water.gro"
"""
kwargs.setdefault("cp", "solute.pdb")
kwargs.setdefault("cs", "water.gro")
self.gmx("solvate", p=self.systop, o=self.syspdb, **kwargs)
[docs]
def add_bulk_ions(self, solvent="W", **kwargs):
""" Adds bulk ions to neutralize the system using GROMACS genion.
Parameters
----------
solvent (str, optional):
Solvent residue name (default: "W").
kwargs (dict):
Additional parameters for genion. Defaults include:
- conc: 0.15
- pname: "NA"
- nname: "CL"
- neutral: ""
"""
kwargs.setdefault("conc", 0.15)
kwargs.setdefault("pname", "NA")
kwargs.setdefault("nname", "CL")
kwargs.setdefault("neutral", "")
self.gmx("grompp",
f=self.mdpdir / "ions.mdp", c=self.syspdb, p=self.systop, o="ions.tpr")
self.gmx("genion", clinput=f"{solvent}\n", # clinput=f"{solvent}\n",
s="ions.tpr", p=self.systop, o=self.syspdb, **kwargs)
self.gmx("editconf", f=self.syspdb, o=self.sysgro)
clean_dir(self.root, "ions.tpr")
[docs]
def make_system_ndx(self, backbone_atoms=("CA", "P", "C1'"), water_resname='W'):
"""Creates an index (NDX) file for the system, separating solute,
backbone, solvent, and individual chains.
Parameters
----------
backbone_atoms : list, optional
List of atom names to include in the backbone (default: ["CA", "P", "C1'"]).
"""
logger.info("Making index file from %s...", self.syspdb)
system = pdbtools.pdb2atomlist(self.syspdb)
solute = pdbtools.pdb2atomlist(self.solupdb)
solvent = AtomList(system[len(solute):])
backbone = solute.mask(backbone_atoms, mode="name")
not_water = system.mask_out(water_resname, mode='resname')
system.write_ndx(self.sysndx, header="[ System ]", append=False, wrap=15)
solute.write_ndx(self.sysndx, header="[ Solute ]", append=True, wrap=15)
backbone.write_ndx(self.sysndx, header="[ Backbone ]", append=True, wrap=15)
solvent.write_ndx(self.sysndx, header="[ Solvent ]", append=True, wrap=15)
not_water.write_ndx(self.sysndx, header="[ Not_Water ]", append=True, wrap=15)
chids = self.chains
for chid in chids:
chain = solute.mask(chid, mode="chid")
chain.write_ndx(self.sysndx, header=f"[ chain_{chid} ]", append=True, wrap=15)
logger.info("Written index to %s", self.sysndx)
[docs]
def initmd(self, runname):
"""Initializes a new GMX MD run.
Parameters
----------
runname (str): Name for the MD run.
Returns:
MDRun: An instance of the MDRun class for the specified run.
"""
mdrun = MDRun(self.sysdir, self.sysname, runname)
return mdrun
[docs]
class GmxRun(MDRun):
"""Subclass of GmxSystem for executing molecular dynamics (MD) simulations
and performing post-processing analyses.
"""
def __init__(self, sysdir, sysname, runname):
"""Initializes the MD run environment with additional directories for analysis.
Parameters
----------
sysdir (str): Base directory for the system.
sysname (str): Name of the MD system.
runname (str): Name for the MD run.
"""
super().__init__(sysdir, sysname, runname)
self.sysgro = self.root / "system.gro"
self.systop = self.root / "system.top"
self.sysndx = self.root / "system.ndx"
self.mdpdir = self.root / "mdp"
self.str = self.rundir / "mdc.pdb" # Structure file
self.trj = self.rundir / "mdc.trr" # Trajectory file
self.trj = self.trj if self.trj.exists() else self.rundir / "mdc.xtc"
[docs]
def gmx(self, command="-h", clinput=None, clean_wdir=True, **kwargs):
"""Executes a GROMACS command from the run's root directory.
Parameters
----------
command (str): The GROMACS command to run (default: "-h").
clinput (str, optional): Input to pass to the command's stdin.
clean_wdir (bool, optional): If True, cleans the working directory after execution.
kwargs: Additional keyword arguments to pass to gromacs.
"""
with cd(self.rundir):
cli.gmx(command, clinput=clinput, **kwargs)
if clean_wdir:
clean_dir()
[docs]
def empp(self, **kwargs):
"""Prepares the energy minimization run using GROMACS grompp.
Parameters
----------
kwargs: Additional parameters for grompp. Defaults include:
- f: Path to .mdp file.
- c: Structure file.
- r: Reference structure.
- p: Topology file.
- n: Index file.
- o: Output TPR file ("em.tpr").
"""
kwargs.setdefault("f", self.mdpdir / "em_cg.mdp")
kwargs.setdefault("c", self.sysgro)
kwargs.setdefault("r", self.sysgro)
kwargs.setdefault("p", self.systop)
kwargs.setdefault("n", self.sysndx)
kwargs.setdefault("o", "em.tpr")
self.gmx('grompp', **kwargs)
[docs]
def hupp(self, **kwargs):
"""Prepares the heat-up phase using GROMACS grompp.
Parameters
----------
kwargs: Additional parameters for grompp. Defaults include:
- f: Path to .mdp file
- c: Starting structure ("em.gro").
- r: Reference structure ("em.gro").
- p: Topology file.
- n: Index file.
- o: Output TPR file ("hu.tpr").
"""
kwargs.setdefault("f", self.mdpdir / "hu_cg.mdp")
kwargs.setdefault("c", "em.gro")
kwargs.setdefault("r", "em.gro")
kwargs.setdefault("p", self.systop)
kwargs.setdefault("n", self.sysndx)
kwargs.setdefault("o", "hu.tpr")
self.gmx('grompp', **kwargs)
[docs]
def eqpp(self, **kwargs):
"""Prepares the equilibration phase using GROMACS grompp.
Parameters
----------
kwargs: Additional parameters for grompp. Defaults include:
- f: Path to .mdp file.
- c: Starting structure ("hu.gro").
- r: Reference structure ("hu.gro").
- p: Topology file.
- n: Index file.
- o: Output TPR file ("eq.tpr").
"""
kwargs.setdefault("f", self.mdpdir / "eq_cg.mdp")
kwargs.setdefault("c", "hu.gro")
kwargs.setdefault("r", "hu.gro")
kwargs.setdefault("p", self.systop)
kwargs.setdefault("n", self.sysndx)
kwargs.setdefault("o", "eq.tpr")
self.gmx('grompp', **kwargs)
[docs]
def mdpp(self, **kwargs):
"""Prepares the production MD run using GROMACS grompp.
Parameters
----------
kwargs: Additional parameters for grompp. Defaults include:
- f: Path to .mdp file.
- c: Starting structure ("eq.gro").
- r: Reference structure ("eq.gro").
- p: Topology file.
- n: Index file.
- o: Output TPR file ("md.tpr").
"""
kwargs.setdefault("f", self.mdpdir / "md_cg.mdp")
kwargs.setdefault("c", "eq.gro")
kwargs.setdefault("r", "eq.gro")
kwargs.setdefault("p", self.systop)
kwargs.setdefault("n", self.sysndx)
kwargs.setdefault("o", "md.tpr")
self.gmx('grompp', **kwargs)
[docs]
def mdrun(self, **kwargs):
"""Executes the production MD run using GROMACS mdrun.
Parameters
----------
kwargs: Additional parameters for mdrun. Defaults include:
- deffnm: "md"
- nsteps: "-2"
- ntomp: "8"
"""
kwargs.setdefault("deffnm", "md")
kwargs.setdefault("nsteps", "-2")
kwargs.setdefault("ntomp", "8")
kwargs.setdefault("pin", "on")
kwargs.setdefault("pinstride", "1")
self.gmx('mdrun', **kwargs)
[docs]
def trjconv(self, clinput=None, **kwargs):
"""Converts trajectories using GROMACS trjconv.
Parameters
----------
clinput (str, optional): Input to be passed to trjconv.
kwargs: Additional parameters for trjconv.
"""
self.gmx('trjconv', clinput=clinput, **kwargs)
[docs]
def convert_tpr(self, clinput=None, **kwargs):
"""Converts TPR files using GROMACS convert-tpr."""
self.gmx('convert-tpr', clinput=clinput, **kwargs)
[docs]
def rmsf(self, clinput=None, **kwargs):
"""Calculates RMSF using GROMACS rmsf.
Parameters
----------
clinput (str, optional): Input for the rmsf command.
kwargs: Additional parameters for rmsf. Defaults include:
- s: Structure file.
- f: Trajectory file.
- o: Output xvg file.
"""
xvg_file = self.rmsdir / "rmsf.xvg"
npy_file = self.rmsdir / "rmsf.npy"
kwargs.setdefault("s", self.str)
kwargs.setdefault("f", self.trj)
kwargs.setdefault("o", xvg_file)
kwargs.setdefault("xvg", "none")
self.gmx('rmsf', clinput=clinput, **kwargs)
io.xvg2npy(xvg_file, npy_file, usecols=[1])
[docs]
def rmsd(self, clinput=None, **kwargs):
"""Calculates RMSD using GROMACS rms.
Parameters
----------
clinput (str, optional): Input for the rms command.
kwargs: Additional parameters for rms. Defaults include:
- s: Structure file.
- f: Trajectory file.
- o: Output xvg file.
"""
xvg_file = self.rmsdir / "rmsd.xvg"
npy_file = self.rmsdir / "rmsd.npy"
kwargs.setdefault("s", self.str)
kwargs.setdefault("f", self.trj)
kwargs.setdefault("o", xvg_file)
kwargs.setdefault("xvg", "none")
self.gmx('rms', clinput=clinput, **kwargs)
io.xvg2npy(xvg_file, npy_file, usecols=[0, 1])
[docs]
def rdf(self, clinput=None, **kwargs):
"""Calculates the radial distribution function using GROMACS rdf.
Parameters
----------
clinput (str, optional): Input for the rdf command.
kwargs: Additional parameters for rdf. Defaults include:
- f: Trajectory file.
- s: Structure file.
- n: Index file.
"""
kwargs.setdefault("f", "mdc.xtc")
kwargs.setdefault("s", "mdc.pdb")
kwargs.setdefault("n", self.sysndx)
kwargs.setdefault("xvg", "none")
self.gmx('rdf', clinput=clinput, **kwargs)
[docs]
def cluster(self, clinput=None, **kwargs):
"""Performs clustering using GROMACS cluster.
Parameters
----------
clinput (str, optional): Input for the clustering command.
kwargs: Additional parameters for cluster.
"""
kwargs.setdefault("s", self.str)
kwargs.setdefault("f", self.trj)
with cd(self.cludir):
cli.gmx('cluster', clinput=clinput, **kwargs)
[docs]
def covar(self, clinput=None, **kwargs):
"""Calculates and diagonalizes the covariance matrix using GROMACS covar.
Parameters
----------
clinput (str, optional): Input for the covar command.
kwargs: Additional parameters for covar. Defaults include:
- f: Trajectory file.
- s: Structure file.
- n: Index file.
"""
kwargs.setdefault("f", self.trj)
kwargs.setdefault("s", self.str)
with cd(self.covdir):
cli.gmx('covar', clinput=clinput, **kwargs)
[docs]
def anaeig(self, clinput=None, **kwargs):
r"""Analyzes eigenvectors using GROMACS anaeig.
Parameters
----------
clinput (str, optional): Input for the anaeig command.
kwargs: Additional parameters for anaeig. Defaults include:
- f: Trajectory file.
- s: Structure file.
- v: Output eigenvector file.
"""
kwargs.setdefault("f", self.trj)
kwargs.setdefault("s", self.str)
kwargs.setdefault("v", "eigenvec.trr")
with cd(self.covdir):
cli.gmx('anaeig', clinput=clinput, **kwargs)
[docs]
def make_edi(self, clinput=None, **kwargs):
"""Prepares files for essential dynamics analysis using GROMACS make-edi.
Parameters
----------
clinput (str, optional): Input for the make-edi command.
kwargs: Additional parameters for make-edi. Defaults include:
- f: Eigenvector file.
- s: Structure file.
"""
kwargs.setdefault("f", "eigenvec.trr")
kwargs.setdefault("s", self.str)
with cd(self.covdir):
cli.gmx('make_edi', clinput=clinput, **kwargs)
[docs]
def get_rmsf_by_chain(self, **kwargs):
"""Calculates RMSF for each chain in the system using GROMACS rmsf.
Parameters
----------
kwargs: Additional parameters for the rmsf command. Defaults include:
- f: Trajectory file.
- s: Structure file.
- n: Index file.
- res: Whether to output per-residue RMSF (default: "no").
- fit: Whether to fit the trajectory (default: "yes").
"""
kwargs.setdefault("s", self.str)
kwargs.setdefault("f", self.trj)
kwargs.setdefault("n", self.sysndx)
kwargs.setdefault("res", "yes")
kwargs.setdefault("fit", "yes")
kwargs.setdefault("xvg", "none")
for idx, chain in enumerate(self.chains):
idx = idx + 5
xvg_file = self.rmsdir / f"crmsf_{chain}.xvg"
npy_file = self.rmsdir / f"crmsf_{chain}.npy"
self.gmx('rmsf', clinput=f"{idx}\n{idx}\n", o=xvg_file, **kwargs)
io.xvg2npy(xvg_file, npy_file, usecols=[1])
[docs]
def get_rmsd_by_chain(self, **kwargs):
"""Calculates RMSD for each chain in the system using GROMACS rmsd.
Parameters
----------
kwargs: Additional parameters for the rmsd command. Defaults include:
- f: Trajectory file.
- s: Structure file.
- n: Index file.
"""
kwargs.setdefault("s", self.str)
kwargs.setdefault("f", self.trj)
kwargs.setdefault("n", self.sysndx)
kwargs.setdefault("fit", "rot+trans")
kwargs.setdefault("xvg", "none")
for idx, chain in enumerate(self.chains):
idx = idx + 5
xvg_file = self.rmsdir / f"crmsd_{chain}.xvg"
npy_file = self.rmsdir / f"crmsd_{chain}.npy"
self.gmx('rms', clinput=f"{idx}\n{idx}\n", o=xvg_file, **kwargs)
io.xvg2npy(xvg_file, npy_file, usecols=[1])
#######################
[docs]
def sort_for_gmx(data):
return sorted(data, key=lambda x: (
0 if x.split('_')[1][0].isupper() else 1 if x.split('_')[1][0].islower() else 2,
x.split('_')[1]
))