Source code for reforge.itpio

"""IO for GROMACS topology .itp files.

Description:
    This module provides functions for reading, parsing, formatting, and writing
    GROMACS ITP files. It includes utilities to extract and format different sections
    (such as bonds, angles, atoms, etc.) and higher-level functions tailored for
    Martini RNA and ion topologies.

Requirements:
    - Python 3.x
    - Standard library modules: shutil, typing
    - (Optional) Additional modules for extended functionality.

Author: DY
Date: 2025-02-27
"""

import shutil as sh
from typing import List, Tuple

###################################
# Generic functions
###################################


[docs]
def read_itp(filename):
    """Read a Gromacs ITP file and organize its contents by section.

    Parameters
    ----------
    filename : str
        The path to the ITP file.

    Returns
    -------
    dict
        A dictionary where keys are section names and values are lists of entries,
        each entry being a list of [connectivity, parameters, comment].
    """
    itp_data = {}
    with open(filename, "r", encoding="utf-8") as file:
        for line in file:
            # Skip comments and empty lines
            if line.strip() == "" or line.strip().startswith(";"):
                continue
            # Detect section headers; break long condition into multiple lines.
            if (line.startswith("[") and line.endswith("]\n")):
                tag = line.strip()[2:-2]
                itp_data[tag] = []
            else:
                connectivity, parameters, comment = line2bond(line, tag)
                itp_data[tag].append([connectivity, parameters, comment])
    return itp_data




[docs]
def line2bond(line, tag):
    """Parse a line from an ITP file and return connectivity, parameters, and comment.

    Parameters
    ----------
    line : str
        A line from the ITP file.
    tag : str
        The section tag (e.g. 'bonds', 'angles', etc.).

    Returns
    -------
    tuple
        A tuple (connectivity, parameters, comment) where connectivity is a tuple of ints,
        parameters is a tuple of numbers (first as int, rest as floats), and comment is a string.
    """
    data, _, comment = line.partition(";")
    data = data.split()
    comment = comment.strip()
    if tag == "bonds" or tag == "constraints":
        connectivity = data[:2]
        parameters = data[2:]
    elif tag == "angles":
        connectivity = data[:3]
        parameters = data[3:]
    elif tag == "dihedrals":
        connectivity = data[:4]
        parameters = data[4:]
    elif tag == "virtual_sites3":
        connectivity = data[:4]
        parameters = data[4:]
    else:
        connectivity = data
        parameters = []
    if parameters:
        parameters[0] = int(parameters[0])
        parameters[1:] = [float(i) for i in parameters[1:]]
    connectivity = tuple(int(i) for i in connectivity)
    parameters = tuple(parameters)
    return connectivity, parameters, comment




[docs]
def bond2line(connectivity=None, parameters="", comment=""):
    """Format a bond entry into a string for a Gromacs ITP file.

    Parameters
    ----------
    connectivity : tuple, optional
        Connectivity indices.
    parameters : tuple, optional
        Bond parameters.
    comment : str, optional
        Optional comment.

    Returns
    -------
    str
        A formatted string representing the bond entry.
    """
    connectivity_str = "   ".join(f"{int(atom):5d}" for atom in connectivity)
    type_str = ""
    parameters_str = ""
    if parameters:
        type_str = f"{int(parameters[0]):2d}"
        parameters_str = "   ".join(f"{float(param):7.4f}" for param in parameters[1:])
    line = connectivity_str + "   " + type_str + "   " + parameters_str
    if comment:
        line += " ; " + comment
    line += "\n"
    return line




[docs]
def format_header(molname="molecule", forcefield="", arguments="") -> List[str]:
    """Format the header of the topology file.

    Parameters
    ----------
    molname : str, optional
        Molecule name. Default is "molecule".
    forcefield : str, optional
        Force field identifier.
    arguments : str, optional
        Command-line arguments used.

    Returns
    -------
    List[str]
        A list of header lines.
    """
    lines = [f'; MARTINI ({forcefield}) Coarse Grained topology file for "{molname}"\n']
    lines.append("; Created using the following options:\n")
    lines.append(f"; {arguments}\n")
    lines.append("; " + "#" * 100 + "\n")
    return lines




[docs]
def format_sequence_section(sequence, secstruct) -> List[str]:
    """Format the sequence section.

    Parameters
    ----------
    sequence : iterable
        Sequence characters.
    secstruct : iterable
        Secondary structure characters.

    Returns
    -------
    List[str]
        Formatted lines for the sequence section.
    """
    sequence_str = "".join(sequence)
    secstruct_str = "".join(secstruct)
    lines = ["; Sequence:\n"]
    lines.append(f"; {sequence_str}\n")
    lines.append("; Secondary Structure:\n")
    lines.append(f"; {secstruct_str}\n")
    return lines




[docs]
def format_moleculetype_section(molname="molecule", nrexcl=1) -> List[str]:
    """Format the moleculetype section.

    Parameters
    ----------
    molname : str, optional
        Molecule name. Default is "molecule".
    nrexcl : int, optional
        Number of exclusions. Default is 1.

    Returns
    -------
    List[str]
        Formatted lines for the moleculetype section.
    """
    lines = ["\n[ moleculetype ]\n"]
    lines.append("; Name         Exclusions\n")
    lines.append(f"{molname:<15s} {nrexcl:3d}\n")
    return lines




[docs]
def format_atoms_section(atoms: List[Tuple]) -> List[str]:
    """Format the atoms section for a Gromacs ITP file.

    Parameters
    ----------
    atoms : List[Tuple]
        List of atom records.

    Returns
    -------
    List[str]
        A list of formatted lines.
    """
    lines = ["\n[ atoms ]\n"]
    fs8 = "%5d %5s %5d %5s %5s %5d %7.4f ; %s"
    fs9 = "%5d %5s %5d %5s %5s %5d %7.4f %7.4f ; %s"
    for atom in atoms:
        atom = tuple(atom)
        line = fs9 % atom if len(atom) == 9 else fs8 % atom
        line += "\n"
        lines.append(line)
    return lines




[docs]
def format_bonded_section(header: str, bonds: List[List]) -> List[str]:
    """Format a bonded section (e.g., bonds, angles) for a Gromacs ITP file.

    Parameters
    ----------
    header : str
        Section header.
    bonds : List[List]
        List of bond entries.

    Returns
    -------
    List[str]
        A list of formatted lines.
    """
    lines = [f"\n[ {header} ]\n"]
    for bond in bonds:
        line = bond2line(*bond)
        lines.append(line)
    return lines




[docs]
def format_posres_section(atoms: List[Tuple], posres_fc=500, 
                          selection: List[str] = None) -> List[str]:
    """Format the position restraints section.

    Parameters
    ----------
    atoms : List[Tuple]
        List of atom records.
    posres_fc : float, optional
        Force constant for restraints. Default is 500.
    selection : List[str], optional
        Atom names to select. Defaults to ["BB1", "BB2", "BB3", "SC1"] if not provided.

    Returns
    -------
    List[str]
        A list of formatted lines.
    """
    if selection is None:
        selection = ["BB1", "BB2", "BB3", "SC1"]
    lines = [
        "\n#ifdef POSRES\n",
        f"#define POSRES_FC {posres_fc:.2f}\n",
        " [ position_restraints ]\n",
    ]
    for atom in atoms:
        if atom[4] in selection:
            lines.append(f"  {atom[0]:5d}    1    POSRES_FC    POSRES_FC    POSRES_FC\n")
    lines.append("#endif")
    return lines




[docs]
def write_itp(filename, lines):
    """Write a list of lines to an ITP file.

    Parameters
    ----------
    filename : str
        Output file path.
    lines : List[str]
        Lines to write.
    """
    with open(filename, "w", encoding="utf-8") as file:
        for line in lines:
            file.write(line)



###################################
# HL functions for martini_rna
###################################


[docs]
def make_in_terms(input_file, output_file, dict_of_names):
    """Generate a Martini ITP file using input terms and a dictionary of names.

    Parameters
    ----------
    input_file : str
        Path to the input ITP file.
    output_file : str
        Path to the output ITP file.
    dict_of_names : dict
        Dictionary mapping keys to desired names.
    """
    tag = None
    pairs = []

    def get_sigma(b1, b2):
        list_of_pairs_1 = {("TA4", "TU3"), ("TA5", "TU4"),
                           ("TG3", "TY2"), ("TG4", "TY3"), ("TG5", "TY4")}
        list_of_pairs_2 = {("TA4", "TU2"), ("TA4", "TU4"),
                           ("TA5", "TU3"), ("TG3", "TY3"), ("TG4", "TY2"),
                           ("TG4", "TY4"), ("TG5", "TY3")}
        list_of_pairs_3 = {("TA4", "TY3"), ("TA5", "TY4"), ("TA4", "TY2"),
                           ("TA4", "TY4"), ("TA5", "TY3"), ("TG3", "TU2"),
                           ("TG4", "TU3"), ("TG5", "TU4"), ("TG3", "TU3"),
                           ("TG4", "TU2"), ("TG4", "TU4"), ("TG5", "TU3")}
        if (b1, b2) in list_of_pairs_1 or (b2, b1) in list_of_pairs_1:
            sigma = "2.75000e-01"
        elif (b1, b2) in list_of_pairs_2 or (b2, b1) in list_of_pairs_2:
            sigma = "2.750000e-01"
        elif (b1, b2) in list_of_pairs_3 or (b2, b1) in list_of_pairs_3:
            sigma = "2.750000e-01"
        else:
            sigma = "3.300000e-01"
        return sigma

    with open(output_file, "w", encoding="utf-8") as file:
        file.write("[ atomtypes ]\n")
        dict_of_vdw = {
            "TA1": ("3.250000e-01", "1.000000e-01"),
            "TA2": ("3.250000e-01", "1.000000e-01"),
            "TA3": ("3.250000e-01", "1.000000e-01"),
            "TA4": ("2.800000e-01", "1.368000e-01"),
            "TA5": ("2.800000e-01", "1.368000e-01"),
            "TA6": ("3.250000e-01", "1.000000e-01"),
            "TY1": ("3.250000e-01", "1.000000e-01"),
            "TY2": ("2.800000e-01", "1.000000e-01"),
            "TY3": ("2.800000e-01", "1.000000e-01"),
            "TY4": ("2.800000e-01", "1.000000e-01"),
            "TY5": ("3.250000e-01", "1.000000e-01"),
            "TG1": ("3.250000e-01", "1.000000e-01"),
            "TG2": ("3.250000e-01", "1.000000e-01"),
            "TG3": ("2.800000e-01", "1.368000e-01"),
            "TG4": ("2.800000e-01", "1.368000e-01"),
            "TG5": ("2.800000e-01", "1.368000e-01"),
            "TG6": ("3.250000e-01", "1.000000e-01"),
            "TG7": ("0.400000e-01", "1.368000e-01"),
            "TG8": ("3.250000e-01", "1.000000e-01"),
            "TU1": ("3.250000e-01", "1.000000e-01"),
            "TU2": ("2.800000e-01", "1.000000e-01"),
            "TU3": ("2.800000e-01", "1.368000e-01"),
            "TU4": ("2.800000e-01", "1.000000e-01"),
            "TU5": ("3.250000e-01", "1.000000e-01"),
            "TU6": ("0.400000e-01", "1.368000e-01"),
            "TU7": ("3.250000e-01", "1.000000e-01"),
        }
        for key in dict_of_names.keys():
            file.write(f"{key}  45.000  0.000  A  {dict_of_vdw[key][0]}  {dict_of_vdw[key][1]}\n")
        file.write("\n[ nonbond_params ]\n")

    with open(input_file, "r", encoding="utf-8") as file:
        lines = file.readlines()
    with open(output_file, "a", encoding="utf-8") as file:
        for line in lines:
            if line.startswith(";") or len(line.split()) < 2:
                continue
            parts = line.split()
            atom_name_1 = parts[0].strip()
            atom_name_2 = parts[1].strip()
            if (atom_name_1 in dict_of_names.values() and 
                atom_name_2 in dict_of_names.values()):
                keys_1 = [key for key, value in dict_of_names.items() if value == atom_name_1]
                keys_2 = [key for key, value in dict_of_names.items() if value == atom_name_2]
                for key_1 in keys_1:
                    for key_2 in keys_2:
                        if (key_1, key_2) in pairs or (key_2, key_1) in pairs:
                            continue
                        pairs.append((key_1, key_2))
                        parts[0] = key_1
                        parts[1] = key_2
                        parts[3] = get_sigma(key_1, key_2)
                        file.write("  ".join(parts) + "\n")




[docs]
def make_cross_terms(input_file, output_file, old_name, new_name):
    """Append cross-term entries to an ITP file by replacing an old name with a new one.

    Parameters
    ----------
    input_file : str
        Path to the input ITP file.
    output_file : str
        Path to the output ITP file.
    old_name : str
        The name to be replaced.
    new_name : str
        The replacement name.
    """
    switch = False
    with open(input_file, "r", encoding="utf-8") as file:
        lines = file.readlines()
    with open(output_file, "a", encoding="utf-8") as file:
        for line in lines:
            if line.startswith(";") or len(line.split()) < 2:
                continue
            if line.startswith("[ nonbond_params ]"):
                switch = True
            if switch:
                parts = line.split()
                atom_name_1 = parts[0].strip()
                atom_name_2 = parts[1].strip()
                if atom_name_1 == old_name:
                    parts[0] = new_name
                    file.write("  ".join(parts) + "\n")
                    continue
                if atom_name_2 == old_name:
                    parts[1] = new_name
                    file.write("  ".join(parts) + "\n")
                    continue
            else:
                continue




[docs]
def make_marnatini_itp():
    """Generate and copy a Martini RNA ITP file.

    This high-level function processes a base Martini ITP file using defined name mappings
    and copies the resulting file to several target directories.
    """
    dict_of_names = {
        "TA1": "SC5",
        "TA2": "TN1a",
        "TA3": "TC6",
        "TA4": "TN4r",
        "TA5": "TN4r",
        "TA6": "TN1a",
        "TY1": "SC3",
        "TY2": "TN3",
        "TY3": "TN4",
        "TY4": "TN3",
        "TY5": None,
        "TG1": "SC5",
        "TG2": "TN1a",
        "TG3": "TN3r",
        "TG4": "TN4r",
        "TG5": "TN4r",
        "TG6": "TN1a",
        "TG7": None,
        "TG8": None,
        "TU1": "SC3",
        "TU2": "TN3",
        "TU3": "TN4",
        "TU4": "TN3",
        "TU5": None,
        "TU6": None,
        "TU7": None,
    }
    out_file = "reforge/itp/martini_RNA.itp"
    make_in_terms("reforge/itp/martini.itp", out_file, dict_of_names)
    for new_name, old_name in dict_of_names.items():
        make_cross_terms("reforge/itp/martini.itp", out_file, old_name, new_name)
    sh.copy(out_file, "/scratch/dyangali/reforge/systems/dsRNA/topol/martini_v3.0.0_rna.itp")
    sh.copy(out_file, "/scratch/dyangali/reforge/systems/ssRNA/topol/martini_v3.0.0_rna.itp")
    sh.copy(out_file, "/scratch/dyangali/maRNAtini_sims/dimerization_pmf_us/topol/martini_RNA.itp")
    sh.copy(out_file, "/scratch/dyangali/maRNAtini_sims/angled_dimerization_pmf_us/topol/martini_RNA.itp")




[docs]
def make_ions_itp():
    """Generate an ITP file for ions.

    This function modifies a base Martini ITP file, adjusts parameters, and writes
    the final Martini ions ITP file.
    """
    import pandas as pd

    dict_of_names = {"TMG": "TD"}
    out_file = "reforge/itp/ions.itp"
    for new_name, old_name in dict_of_names.items():
        make_cross_terms("reforge/itp/martini_v3.0.0.itp", out_file, old_name, new_name)
    df = pd.read_csv(out_file, sep="\\s+", header=None)
    df[3] -= 0.08
    tmp_file = "reforge/itp/ions_tmp.itp"
    df.to_csv(tmp_file, sep=" ", header=None, index=False, float_format="%.6e")
    out_file = "reforge/itp/martini_ions.itp"
    new_lines = [
        "[ atomtypes ]\n",
        "TMG  45.000  0.000  A  0.0  0.0\n\n",
        "[ nonbond_params ]\n",
        "TMG TMG 1 3.580000e-01 1.100000e+00\n",
    ]
    with open(tmp_file, "r", encoding="utf-8") as file:
        original_content = file.readlines()
    with open(out_file, "w+", encoding="utf-8") as file:
        file.writelines(new_lines + original_content)




[docs]
def count_itp_atoms(file_path):
    """Count the number of atom entries in the [ atoms ] section of an ITP file.

    Parameters
    ----------
    file_path : str
        Path to the ITP file.

    Returns
    -------
    int
        The atom count, or 0 if an error occurs.
    """
    in_atoms_section = False
    atom_count = 0
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            for line in file:
                line = line.strip()
                if not line or line.startswith(";"):
                    continue
                if line.startswith("[ atoms ]"):
                    in_atoms_section = True
                    continue
                if in_atoms_section and line.startswith("["):
                    break
                if in_atoms_section:
                    atom_count += 1
        return atom_count
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return 0
    except Exception as e:
        print(f"An error occurred: {e}")
        return 0



if __name__ == "__main__":
    pass
    # make_ions_itp()
    # make_marnatini_itp()