Source code for reforge.forge.topology

#!/usr/bin/env python3
"""Topology Module

Description:
    This module provides classes and functions to construct a coarse-grained
    topology from force field parameters. It defines the Topology class along with
    a helper BondList class. Methods are provided to process atoms, bonds, and
    connectivity information, and to generate topology files for coarse-grained
    simulations.

Usage Example:
    >>> from topology import Topology
    >>> from reforge.forcefield import NucleicForceField
    >>> ff = NucleicForceField(...)  # Initialize the force field instance
    >>> topo = Topology(ff, sequence=['A', 'T', 'G', 'C'])
    >>> topo.from_sequence(['A', 'T', 'G', 'C'])
    >>> topo.write_to_itp('output.itp')

Requirements:
    - Python 3.x
    - NumPy
    - reForge utilities and force field modules

Author: DY
Date: YYYY-MM-DD
"""

import logging
from typing import List
import numpy as np
from reforge import itpio

############################################################
# Helper class for working with bonds
############################################################

[docs] class BondList(list): """BondList Class Description: A helper subclass of the built-in list for storing bond information. Each element represents a bond in the form [connectivity, parameters, comment]. Attributes ---------- (inherited from list) Holds individual bond representations. Usage Example: >>> bonds = BondList([['C1-C2', [1.0, 1.5], 'res1 bead1'], ... ['C2-O1', [1.1, 1.6], 'res2 bead2']]) >>> print(bonds.conns) ['C1-C2', 'C2-O1'] >>> bonds.conns = ['C1-C2_mod', 'C2-O1_mod'] >>> print(bonds.conns) ['C1-C2_mod', 'C2-O1_mod'] """ def __add__(self, other): """Implement addition of two BondList objects. Returns ------- BondList A new BondList containing the bonds from self and other. """ return BondList(super().__add__(other)) @property def conns(self): """Get connectivity values from each bond. Returns ------- list List of connectivity values (index 0 of each bond). """ return [bond[0] for bond in self] @conns.setter def conns(self, new_conns): """Set new connectivity values for each bond. Parameters ---------- new_conns : iterable A list-like object of new connectivity values. Must match the length of the BondList. """ if len(new_conns) != len(self): raise ValueError("Length of new connectivity list must match the number of bonds") for i, new_conn in enumerate(new_conns): bond = list(self[i]) bond[0] = new_conn self[i] = bond @property def params(self): """Get parameters from each bond. Returns ------- list List of parameter values (index 1 of each bond). """ return [bond[1] for bond in self] @params.setter def params(self, new_params): """Set new parameter values for each bond. Parameters ---------- new_params : iterable A list-like object of new parameter values. Must match the length of the BondList. """ if len(new_params) != len(self): raise ValueError("Length of new parameters list must match the number of bonds") for i, new_param in enumerate(new_params): bond = list(self[i]) bond[1] = new_param self[i] = bond @property def comms(self): """Get comments from each bond. Returns ------- list List of comments (index 2 of each bond). """ return [bond[2] for bond in self] @comms.setter def comms(self, new_comms): """Set new comments for each bond. Parameters ---------- new_comms : iterable A list-like object of new comment values. Must match the length of the BondList. """ if len(new_comms) != len(self): raise ValueError("Length of new comments list must match the number of bonds") for i, new_comm in enumerate(new_comms): bond = list(self[i]) bond[2] = new_comm self[i] = bond @property def measures(self): """Get measure values from each bond. Returns ------- list List of measures extracted from the second element of the bond parameters. """ return [bond[1][1] for bond in self] @measures.setter def measures(self, new_measures): """Set new measure values for each bond. Parameters ---------- new_measures : iterable A list-like object of new measure values. Must match the length of the BondList. """ if len(new_measures) != len(self): raise ValueError("Length of new measures list must match the number of bonds") for i, new_measure in enumerate(new_measures): bond = list(self[i]) param = list(bond[1]) param[1] = new_measure bond[1] = param self[i] = bond
[docs] def categorize(self): """Categorize bonds based on their comments. Description: Uses the stripped comment (index 2) as a key to group bonds into a dictionary. Returns ------- dict A dictionary mapping each unique comment to a BondList of bonds. """ keys = sorted({comm.strip() for comm in self.comms}) adict = {key: BondList() for key in keys} for bond in self: key = bond[2].strip() adict[key].append(bond) return adict
[docs] def filter(self, condition, bycomm=True): """Filter bonds based on a provided condition. Description: Selects bonds for which the condition (a callable) returns True. By default, the condition is applied to the comment field. Parameters ---------- condition : callable A function that takes a bond (or its comment) as input and returns True if the bond should be included. bycomm : bool, optional If True, the condition is applied to the comment (default is True). Returns ------- BondList A new BondList containing the bonds that meet the condition. """ if bycomm: return BondList([bond for bond in self if condition(bond[2])]) return BondList([bond for bond in self if condition(bond)])
############################################################ # Topology Class ############################################################
[docs] class Topology: """Topology Class Description: Constructs a coarse-grained topology from force field parameters. Provides methods for processing atoms, bonds, and connectivity, and for generating a topology file for coarse-grained simulations. Attributes ---------- ff : object Force field instance. sequence : list List of residue names. name : str Molecule name. nrexcl : int Exclusion parameter. atoms : list List of atom records. bonds, angles, dihs, cons, excls, pairs, vs3s, posres, elnet : BondList BondList instances for various bonded interactions. blist : list List containing all bond-type BondLists. secstruct : list Secondary structure as a list of characters. natoms : int Total number of atoms. """ def __init__(self, forcefield, sequence: List = None, secstruct: List = None, **kwargs) -> None: """Initialize a Topology instance. Description: Initialize a topology with force field parameters, sequence, and optional secondary structure. Atom records and bond lists are initialized here. Parameters ---------- forcefield : object An instance of the nucleic force field class. sequence : list, optional List of residue names. secstruct : list, optional Secondary structure as a list of characters. If not provided, defaults to 'F' for each residue. **kwargs : Additional keyword arguments. Recognized options include: molname : str Molecule name (default: "molecule"). nrexcl : int Exclusion parameter (default: 1). """ molname = kwargs.pop("molname", "molecule") nrexcl = kwargs.pop("nrexcl", 1) self.ff = forcefield self.sequence = sequence self.name = molname self.nrexcl = nrexcl self.atoms: List = [] self.bonds = BondList() self.angles = BondList() self.dihs = BondList() self.cons = BondList() self.excls = BondList() self.pairs = BondList() self.vs3s = BondList() self.posres = BondList() self.elnet = BondList() self.mapping: List = [] self.natoms = len(self.atoms) self.blist = [self.bonds, self.angles, self.dihs, self.cons, self.excls, self.pairs, self.vs3s] self.secstruct = secstruct if secstruct is not None else ["F"] * len(self.sequence) def __iadd__(self, other) -> "Topology": """Implement in-place addition of another Topology instance. Description: Merges another Topology into this one by updating atom numbers and connectivity. Parameters ---------- other : Topology Another Topology instance to merge with. Returns ------- Topology The merged topology (self). """ def update_atom(atom, atom_shift, residue_shift): atom[0] += atom_shift # Update atom id atom[2] += residue_shift # Update residue id atom[5] += atom_shift # Update charge group number return atom def update_bond(bond, atom_shift): conn = bond[0] conn = [idx + atom_shift for idx in conn] return [conn, bond[1], bond[2]] atom_shift = self.natoms residue_shift = len(self.sequence) new_atoms = [update_atom(atom, atom_shift, residue_shift) for atom in other.atoms] self.atoms.extend(new_atoms) for self_attrib, other_attrib in zip(self.blist, other.blist): updated_bonds = [update_bond(bond, atom_shift) for bond in other_attrib] self_attrib.extend(updated_bonds) return self def __add__(self, other) -> "Topology": """Implement addition of two Topology objects. Description: Returns a new Topology that is the merger of self and other. Parameters ---------- other : Topology Another Topology instance. Returns ------- Topology A new Topology instance resulting from the merge. """ new_top = self new_top += other return new_top
[docs] def lines(self) -> list: """Generate the topology file as a list of lines. Returns ------- list A list of strings, each representing a line in the topology file. """ lines = itpio.format_header(molname=self.name, forcefield=self.ff.name, arguments="") lines += itpio.format_sequence_section(self.sequence, self.secstruct) lines += itpio.format_moleculetype_section(molname=self.name, nrexcl=1) lines += itpio.format_atoms_section(self.atoms) lines += itpio.format_bonded_section("bonds", self.bonds) lines += itpio.format_bonded_section("angles", self.angles) lines += itpio.format_bonded_section("dihedrals", self.dihs) lines += itpio.format_bonded_section("constraints", self.cons) lines += itpio.format_bonded_section("exclusions", self.excls) lines += itpio.format_bonded_section("pairs", self.pairs) lines += itpio.format_bonded_section("virtual_sites3", self.vs3s) lines += itpio.format_bonded_section("bonds", self.elnet) lines += itpio.format_posres_section(self.atoms) logging.info("Created coarsegrained topology") return lines
[docs] def write_to_itp(self, filename: str): """Write the topology to an ITP file. Parameters ---------- filename : str The output file path. """ with open(filename, "w", encoding="utf-8") as file: for line in self.lines(): file.write(line)
@staticmethod def _update_bb_connectivity(conn, atid, reslen, prevreslen=None): """Update backbone connectivity indices for a residue. Description: Adjusts atom indices based on the length of the current residue and, if provided, the previous residue for dihedral definitions. Parameters ---------- conn : list of int Connectivity indices from the force field. Negative indices indicate connections relative to the previous residue. atid : int Atom ID of the first atom in the current residue. reslen : int Number of atoms in the current residue. prevreslen : int, optional Number of atoms in the previous residue. If None, negative indices are not updated. Returns ------- list A list of updated connectivity indices. Example ------- >>> conn = [0, 1, -1] >>> Topology._update_bb_connectivity(conn, 10, 5, prevreslen=4) [10, 11, 8] """ result = [] prev = -1 for idx in conn: if idx < 0: if prevreslen is not None: result.append(atid - prevreslen + idx + 3) continue return list(conn) if idx > prev: result.append(atid + idx) else: result.append(atid + idx + reslen) atid += reslen prev = idx return result @staticmethod def _update_sc_connectivity(conn, atid): """Update sidechain connectivity indices for a residue. Description: Adjusts sidechain connectivity by adding the starting atom index. Parameters ---------- conn : list of int Connectivity indices for the sidechain. atid : int Starting atom id for the residue. Returns ------- list A list of updated connectivity indices. """ return [atid + idx for idx in conn] def _check_connectivity(self, conn): """Check if the connectivity indices are within valid boundaries. Parameters ---------- conn : list of int Connectivity indices to check. Returns ------- bool True if all indices are between 1 and natoms, False otherwise. """ for idx in conn: if idx < 1 or idx > self.natoms: return False return True
[docs] def process_atoms(self, start_atom: int = 0, start_resid: int = 1): """Process atoms based on the sequence and force field definitions. Description: For each residue in the sequence, constructs atom records using both backbone and sidechain definitions from the force field. Parameters ---------- start_atom : int, optional Starting atom ID (default is 0). start_resid : int, optional Starting residue ID (default is 1). """ atid = start_atom resid = start_resid for resname in self.sequence: ff_atoms = self.ff.bb_atoms + self.ff.sc_atoms(resname) reslen = len(ff_atoms) for ffatom in ff_atoms: atom = [ ffatom[0] + atid, # atom id ffatom[1], # type resid, # residue id resname, # residue name ffatom[2], # name ffatom[3] + atid, # charge group ffatom[4], # charge ffatom[5], # mass "", ] self.atoms.append(atom) atid += reslen resid += 1 self.atoms.pop(0) # Remove dummy atom self.natoms = len(self.atoms)
[docs] def process_bb_bonds(self, start_atom: int = 0, start_resid: int = 1): """Process backbone bonds using force field definitions. Parameters ---------- start_atom : int, optional Starting atom ID. start_resid : int, optional Starting residue ID. """ logging.debug(self.sequence) atid = start_atom resid = start_resid prevreslen = None for resname in self.sequence: reslen = len(self.ff.bb_atoms) + len(self.ff.sc_atoms(resname)) ff_blist = self.ff.bb_blist for btype, ff_btype in zip(self.blist, ff_blist): for bond in ff_btype: if bond: connectivity = bond[0] parameters = bond[1] comment = bond[2] upd_conn = self._update_bb_connectivity(connectivity, atid, reslen, prevreslen) if self._check_connectivity(upd_conn): upd_bond = [list(upd_conn), list(parameters), comment] btype.append(upd_bond) prevreslen = reslen atid += reslen resid += 1
[docs] def process_sc_bonds(self, start_atom: int = 0, start_resid: int = 1): """Process sidechain bonds using force field definitions. Parameters ---------- start_atom : int, optional Starting atom ID. start_resid : int, optional Starting residue ID. """ atid = start_atom resid = start_resid for resname in self.sequence: reslen = len(self.ff.bb_atoms) + len(self.ff.sc_atoms(resname)) ff_blist = self.ff.sc_blist(resname) for btype, ff_btype in zip(self.blist, ff_blist): for bond in ff_btype: if bond: connectivity = bond[0] parameters = bond[1] comment = bond[2] upd_conn = self._update_sc_connectivity(connectivity, atid) if self._check_connectivity(upd_conn): upd_bond = [list(upd_conn), list(parameters), comment] btype.append(upd_bond) atid += reslen resid += 1 logging.info("Finished nucleic acid topology construction.")
[docs] def elastic_network(self, atoms, anames: List[str] = None, el: float = 0.5, eu: float = 1.1, ef: float = 500): """Construct an elastic network between selected atoms. Parameters ---------- atoms : list List of PDB atom objects. anames : list of str, optional Atom names to include (default: ["BB1", "BB3"]). el : float, optional Lower distance cutoff. eu : float, optional Upper distance cutoff. ef : float, optional Force constant. """ if anames is None: anames = ["BB1", "BB3"] def get_distance(v1, v2): return 0.1 * np.linalg.norm(np.array(v1) - np.array(v2)) selected = [atom for atom in atoms if atom.name in anames] for a1 in selected: for a2 in selected: if a2.atid - a1.atid > 3: v1 = a1.vec v2 = a2.vec d = get_distance(v1, v2) if el < d < eu: comment = f"{a1.resname}{a1.atid}-{a2.resname}{a2.atid}" self.elnet.append([[a1.atid, a2.atid], [6, d, ef], comment])
[docs] def from_sequence(self, sequence, secstruc=None): """Build topology from a given sequence. Parameters ---------- sequence : list Nucleic acid sequence (list of residue names). secstruc : list, optional Secondary structure. If not provided, defaults to all 'F'. """ self.sequence = sequence self.process_atoms() self.process_bb_bonds() self.process_sc_bonds()
[docs] def from_chain(self, chain, secstruc=None): """Build topology from a chain instance. Parameters ---------- chain : object Chain object containing residues. secstruc : list, optional Secondary structure. """ sequence = [residue.resname for residue in chain] self.from_sequence(sequence, secstruc=secstruc)
[docs] @staticmethod def merge_topologies(topologies): """Merge multiple Topology instances into one. Parameters ---------- topologies : list List of Topology objects. Returns ------- Topology A merged Topology instance. """ top = topologies.pop(0) if topologies: for new_top in topologies: top += new_top return top