#!/usr/bin/env python
version = "3.1"
authors = ["me and others"]
forcefields = ["martini30nucleic", "martini31nucleic"]
import importlib.resources
import os
import sys
from reforge import itpio
rna_system = "test"
# For working versions, the sections are in different modules
#
# Index of this file:
#
# 1. Options and documentation @DOC
# 2. Description, options and command line parsing @CMD
# 3. Helper functions and macros @FUNC
# 4. Finegrained to coarsegrained mapping @MAP
# 5. Secondary structure determination and interpretation @SS
# 6. Force field parameters (MARTINI/ELNEDYN) @FF
# 7. Elastic network @ELN
# 8. Structure I/O @IO
# 9. Topology generation @TOP
# 10. Main @MAIN
###################################
## 1 # OPTIONS AND DOCUMENTATION ## -> @DOC <-
###################################
# This is a simple and versatily option class that allows easy
# definition and parsing of options.
[docs]
class Option:
def __init__(self, func=str, num=1, default=None, description=""):
self.func = func
self.num = num
self.value = default
self.description = description
def __nonzero__(self):
if self.func == bool:
return self.value != False
return bool(self.value)
def __str__(self):
return self.value and str(self.value) or ""
[docs]
def setvalue(self, v):
if len(v) == 1:
self.value = self.func(v[0])
else:
self.value = [self.func(i) for i in v]
# Lists for gathering arguments to options that can be specified
# multiple times on the command line.
lists = {
"cystines": [],
"merges": [],
"links": [],
"multi": [],
}
# List of Help text and options.
# This way we can simply print this list if the user wants help.
options = [
# NOTE: Options marked with (+) can be given multiple times on the command line
# option type number default description
"""
========================================================================\n
""",
("-f", Option(str, 1, None, "Input file (PDB|GRO)")),
("-o", Option(str, 1, None, "Output topology (TOP)")),
("-x", Option(str, 1, None, "Output coarse grained structure (PDB)")),
("-seq", Option(str, 1, None, "Output list of bead numbers.")),
("-bmap", Option(str, 1, None, "Output index file containing bonded terms.")),
("-v", Option(bool, 0, False, "Verbose. Be load and noisy.")),
("-h", Option(bool, 0, False, "Display this help.")),
("-ss", Option(str, 1, None, "Secondary structure (File or string)")),
(
"-ssc",
Option(
float, 1, 0.5, "Cutoff fraction for ss in case of ambiguity (default: 0.5)."
),
),
("-dssp", Option(str, 1, None, "DSSP executable for determining structure")),
("-collagen", Option(bool, 0, False, "Use collagen parameters")),
(
"-his",
Option(bool, 0, False, "Interactively set the charge of each His-residue."),
),
("-nt", Option(bool, 0, False, "Set neutral termini (charged is default)")),
("-cb", Option(bool, 0, False, "Set charges at chain breaks (neutral is default)")),
("-cys", Option(lists["cystines"].append, 1, None, "Disulphide bond (+)")),
("-link", Option(lists["links"].append, 1, None, "Link (+)")),
(
"-merge",
Option(lists["merges"].append, 1, None, "Merge chains: e.g. -merge A,B,C (+)"),
),
("-name", Option(str, 1, None, "Moleculetype name")),
(
"-p",
Option(
str,
1,
"None",
"Output position restraints (None/All/Backbone) (default: None)",
),
),
(
"-pf",
Option(
float,
1,
1000,
"Position restraints force constant (default: 1000 kJ/mol/nm^2)",
),
),
(
"-ed",
Option(
bool,
0,
False,
"Use dihedrals for extended regions rather than elastic bonds)",
),
),
("-sep", Option(bool, 0, False, "Write separate topologies for identical chains.")),
(
"-ff",
Option(
str,
1,
"martini30nucleic",
"Which forcefield to use: " + " ,".join(n for n in forcefields[:-1]),
),
),
("-elastic", Option(bool, 0, True, "Write elastic bonds")),
("-ef", Option(float, 1, 500, "Elastic bond force constant Fc")),
("-el", Option(float, 1, 0.5, "Elastic bond lower cutoff: F = Fc if rij < lo")),
("-eu", Option(float, 1, 1.2, "Elastic bond upper cutoff: F = 0 if rij > up")),
("-ea", Option(float, 1, 0, "Elastic bond decay factor a")),
("-ep", Option(float, 1, 1, "Elastic bond decay power p")),
(
"-em",
Option(float, 1, 0, "Remove elastic bonds with force constant lower than this"),
),
(
"-eb",
Option(str, 1, "BB", "Comma separated list of bead names for elastic bonds"),
),
(
"-type",
Option(
str,
1,
"ss",
"Type of DNA/RNA topology (ss/ds-stiff/ds-soft) to create. (default: ss)",
),
),
(
"-multi",
Option(
lists["multi"].append, 1, None, "Chain to be set up for multiscaling (+)"
),
),
("-sys", Option(str, 1, "1RNA", "AA SYS")),
]
## Martini Quotes
martiniq = [
(
"Robert Benchley",
"Why don't you get out of that wet coat and into a dry martini?",
),
("James Thurber", "One martini is all right, two is two many, three is not enough"),
(
"Philip Larkin",
"The chromatic scale is what you use to give the effect of drinking a quinine martini and having an enema simultaneously.",
),
(
"William Emerson, Jr.",
"And when that first martini hits the liver like a silver bullet, there is a sigh of contentment that can be heard in Dubuque.",
),
(
"Alec Waugh",
"I am prepared to believe that a dry martini slightly impairs the palate, but think what it does for the soul.",
),
(
"Gerald R. Ford",
"The three-martini lunch is the epitome of American efficiency. Where else can you get an earful, a bellyful and a snootful at the same time?",
),
("P. G. Wodehouse", "He was white and shaken, like a dry martini."),
]
## DNA elastic network list
enStrandLengths = []
desc = ""
[docs]
def help():
"""Print help text and list of options and end the program."""
import sys
for item in options:
if type(item) == str:
print(item)
for item in options:
if type(item) != str:
print("%10s %s" % (item[0], item[1].description))
print
sys.exit()
##############################
## 2 # COMMAND LINE PARSING ## -> @CMD <-
##############################
import sys, logging
# Helper function to parse atom strings given on the command line:
# resid
# resname/resid
# chain/resname/resid
# resname/resid/atom
# chain/resname/resid/atom
# chain//resid
# chain/resname/atom
[docs]
def str2atom(a):
a = a.split("/")
if len(a) == 1: # Only a residue number:
return (None, None, int(a[0]), None)
if len(a) == 2: # Residue name and number (CYS/123):
return (None, a[0], int(a[1]), None)
if len(a) == 3:
if a[2].isdigit(): # Chain, residue name, residue number
return (None, a[1], int(a[2]), a[0])
else: # Residue name, residue number, atom name
return (a[2], a[0], int(a[1]), None)
return (a[3], a[1], int(a[2]), a[0])
[docs]
def option_parser(args, options, lists, version=0):
# Check whether there is a request for help
if "-h" in args or "--help" in args:
help()
# Convert the option list to a dictionary, discarding all comments
options = dict([i for i in options if not type(i) == str])
# This information we would like to print to some files, so let's put it in our information class
options["Version"] = version
options["Arguments"] = args[:]
while args:
ar = args.pop(0)
options[ar].setvalue([args.pop(0) for i in range(options[ar].num)])
## LOGGING ##
# Set the log level and communicate which options are set and what is happening
# If 'Verbose' is set, change the logger level
logLevel = options["-v"] and logging.DEBUG or logging.INFO
logging.basicConfig(format="%(levelname)-7s %(message)s", level=logLevel)
logging.info("MARTINIZE, script version %s" % version)
# logging.info('If you use this script please cite:')
# logging.info('de Jong et al., J. Chem. Theory Comput., 2013, DOI:10.1021/ct300646g')
# Write options based on selected topology type. @net
options["type"] = options["-type"].value
if options["type"] == "ss":
options["-ff"].setvalue(["martini30nucleic"])
elif options["type"] == "ds":
options["-ff"].setvalue(["martini30nucleic"])
lists["merges"].append("A, B")
options["-eu"].setvalue(["1.8"])
options["-ef"].setvalue(["200"])
options["-eb"].setvalue(["BB1", "BB3"])
elif options["type"] == "pol":
options["-ff"].setvalue(["martini31nucleic"])
else:
logging.error("Undefined topology type. Giving up...")
sys.exit()
options["-eb"].setvalue(["BB2"])
# The make the program flexible, the forcefield parameters are defined
# for multiple forcefield. Check if a existing one is defined:
try:
# Try to load the forcefield class from a different file
_tmp = __import__(options["-ff"].value.lower())
options["ForceField"] = getattr(_tmp, options["-ff"].value.lower())()
except:
# Try to load the forcefield class from the current file
options["ForceField"] = globals()[options["-ff"].value.lower()]()
# Process the raw options from the command line
# Boolean options are set to more intuitive variables
options["Collagen"] = options["-collagen"]
options["chHIS"] = options["-his"]
options["ChargesAtBreaks"] = options["-cb"]
options["NeutralTermini"] = options["-nt"]
# options['ExtendedDihedrals'] = options['-ed']
# options['RetainHETATM'] = False # options['-hetatm']
options["SeparateTop"] = options["-sep"]
options["MixedChains"] = False # options['-mixed']
options["ElasticNetwork"] = options["-elastic"]
# Parsing of some other options into variables
options["ElasticMaximumForce"] = options["-ef"].value
options["ElasticMinimumForce"] = options["-em"].value
options["ElasticLowerBound"] = options["-el"].value
options["ElasticUpperBound"] = options["-eu"].value
options["ElasticDecayFactor"] = options["-ea"].value
options["ElasticDecayPower"] = options["-ep"].value
options["ElasticBeads"] = options["-eb"].value.split(",")
options["PosResForce"] = options["-pf"].value
options["PosRes"] = [i.lower() for i in options["-p"].value.split(",")]
if "none" in options["PosRes"]:
options["PosRes"] = []
if "backbone" in options["PosRes"]:
options["PosRes"].append("BB")
if options["ForceField"].ElasticNetwork:
# Some forcefields, like elnedyn, always use an elatic network. This is set in the
# forcefield file, with the parameter ElasticNetwork.
options["ElasticNetwork"] = True
# Merges, links and cystines
options["mergeList"] = (
"all" in lists["merges"] and ["all"] or [i.split(",") for i in lists["merges"]]
)
# Process links
linkList = []
linkListCG = []
for i in lists["links"]:
ln = i.split(",")
a, b = str2atom(ln[0]), str2atom(ln[1])
if len(ln) > 3: # Bond with given length and force constant
bl, fc = (ln[2] and float(ln[2]) or None, float(ln[3]))
elif len(a) == 3: # Constraint at given distance
bl, fc = float(a[2]), None
else: # Constraint at distance in structure
bl, fc = None, None
# Store the link, but do not list the atom name in the
# atomistic link list. Otherwise it will not get noticed
# as a valid link when checking for merging chains
linkList.append(((None, a[1], a[2], a[3]), (None, b[1], b[2], b[3])))
linkListCG.append((a, b, bl, fc))
# Cystines -- This should be done for all special bonds listed in the _special_ dictionary
CystineCheckBonds = False # By default, do not detect cystine bridges
CystineMaxDist2 = (10 * 0.22) ** 2 # Maximum distance (A) for detection of SS bonds
for i in lists["cystines"]:
if i.lower() == "auto":
CystineCheckBonds = True
elif i.replace(".", "").isdigit():
CystineCheckBonds = True
CystineMaxDist2 = (10 * float(i)) ** 2
else:
# This item should be a pair of cysteines
cysA, cysB = [str2atom(j) for j in i.split(",")]
# Internally we handle the residue number shifted by ord(' ')<<20. We have to add this to the
# cys-residue numbers given here as well.
constant = 32 << 20
linkList.append(
(
("SG", "CYS", cysA[2] + constant, cysA[3]),
("SG", "CYS", cysB[2] + constant, cysB[3]),
)
)
linkListCG.append(
(
("SC1", "CYS", cysA[2] + constant, cysA[3]),
("SC1", "CYS", cysB[2] + constant, cysB[3]),
-1,
-1,
)
)
# Now we have done everything to it, we can add Link/cystine related stuff to options
# 'multi' is not stored anywhere else, so that we also add
options["linkList"] = linkList
options["linkListCG"] = linkListCG
options["CystineCheckBonds"] = CystineCheckBonds
options["CystineMaxDist2"] = CystineMaxDist2
options["multi"] = lists["multi"]
if "ForceField" in options.keys():
logging.info("The %s forcefield will be used." % (options["ForceField"].name))
else:
logging.error("Forcefield '%s' has not been implemented." % (options["-ff"]))
sys.exit()
if options["PosRes"]:
logging.info("Position restraints will be generated.")
logging.warning(
"Position restraints are only enabled if -DPOSRES is set in the MDP file"
)
return options
#################################################
## 3 # HELPER FUNCTIONS, CLASSES AND SHORTCUTS ## -> @FUNC <-
#################################################
import math
# ----+------------------+
## A | STRING FUNCTIONS |
# ----+------------------+
# Split a string
[docs]
def spl(x):
return x.split()
# Split each argument in a list
[docs]
def nsplit(*x):
return [i.split() for i in x]
# Make a dictionary from two lists
[docs]
def hash(x, y):
return dict(zip(x, y))
# Function to reformat pattern strings
[docs]
def pat(x, c="."):
return x.replace(c, "\x00").split()
# Function to generate formatted strings according to the argument type
# ----+----------------+
## B | MATH FUNCTIONS |
# ----+----------------+
[docs]
def cos_angle(a, b):
p = sum([i * j for i, j in zip(a, b)])
q = math.sqrt(sum([i * i for i in a]) * sum([j * j for j in b]))
return min(max(-1, p / q), 1)
[docs]
def norm2(a):
return sum([i * i for i in a])
[docs]
def norm(a):
return math.sqrt(norm2(a))
[docs]
def distance2(a, b):
return (a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2 + (a[2] - b[2]) ** 2
##########################
## 4 # FG -> CG MAPPING ## -> @MAP <-
##########################
dnares3 = ["DA", "DC", "DG", "DT"]
dnares1 = ["dA", "dC", "dG", "dT"]
rnares3 = ["A", "C", "G", "U"]
rnares1 = ["A", "C", "G", "U"] #
# Residue classes:
nucleic = spl(
"DA DC DG DT A C G U \
RA3 RA5 RC3 RC5 RG3 RG5 RU3 RU5 \
DMA DHU MRA MRC MRG MRU NMC PSU SPA RAP \
1MG 2MA 2MG 3MP 3MU 4SU 5MC 5MG 5MU 6MA 7MG"
)
rnares3 = nucleic
rnares1 = nucleic
# Amino acid nucleic acid codes:
# The naming (AA and '3') is not strictly correct when adding DNA/RNA, but we keep it like this for consistincy.
AA3 = dnares3 + rnares3 # @#
AA1 = dnares1 + rnares1 # @#
# Dictionaries for conversion from one letter code to three letter code v.v.
AA123, AA321 = hash(AA1, AA3), hash(AA3, AA1)
residueTypes = dict([(i, "Nucleic") for i in nucleic])
[docs]
class CoarseGrained:
# Class for mapping an atomistic residue list to a coarsegrained one
# Should get an __init__ function taking a residuelist, atomlist, Pymol selection or ChemPy model
# The result should be stored in a list-type attribute
# The class should have pdbstr and grostr methods
# Standard mapping groups
dna_bb = nsplit("P OP1 OP2 O5' O3'", "C5' O4' C4'", "C3' C2' C1'")
rna_bb = nsplit("P OP1 OP2 O5' O3'", "C5' O4' C4'", "C3' C2' O2' C1'")
# Generic names for DNA and RNA beads
residue_bead_names_dna = spl("BB1 BB2 BB3 SC1 SC2 SC3 SC4 SC5 SC6 SC7 SC8")
residue_bead_names_rna = spl("BB1 BB2 BB3 SC1 SC2 SC3 SC4 SC5 SC6 SC7 SC8")
# This dictionary contains the bead names for all residues,
# following the order in 'mapping'
names = {}
# Add the default bead names for all DNA/RNA nucleic acids
names.update(
[
(
i,
(
"BB1",
"BB2",
"BB3",
"SC1",
"SC2",
"SC3",
"SC4",
"SC5",
"SC6",
"SC7",
"SC8",
),
)
for i in nucleic
]
)
# Crude mass for weighted average. No consideration of united atoms.
# This will probably give only minor deviations, while also giving less headache
# mass = {'H': 1,'C': 12,'N': 14,'O': 16,'S': 32,'P': 31,'M': 0}
mass = {"H": 1, "C": 1, "N": 1, "O": 1, "S": 1, "P": 1, "M": 0}
# Determine average position for a set of weights and coordinates
# This is a rather specific function that requires a list of items
# [(m,(x,y,z),id),..] and returns the weighted average of the
# coordinates and the list of ids mapped to this bead
[docs]
def aver(b):
# print(b)
mwx, ids = zip(
*[((m * x, m * y, m * z), i) for m, (x, y, z), i in b]
) # Weighted coordinates
# tm = sum(zip(*b)[0])
total_mass = sum(x[0] for x in b) # Sum of weights
return [sum(i) / total_mass for i in zip(*mwx)], ids # Centre of mass
# Return the CG beads for an atomistic residue, using the mapping specified above
# The residue 'r' is simply a list of atoms, and each atom is a list:
# [ name, resname, resid, chain, x, y, z ]
[docs]
def map(r, ff):
"""
ff - chosen force field for the mapping
"""
p = ff.mapping[r[0][1]] # Mapping for this residue
# Get the name, mass and coordinates for all atoms in the residue
a = [(i[0], CoarseGrained.mass.get(i[0][0], 0), i[4:]) for i in r]
# Store weight, coordinate and index for atoms that match a bead
q = [
[(m, coord, a.index((atom, m, coord))) for atom, m, coord in a if atom in i]
for i in p
]
# Bead positions
return zip(*[aver(i) for i in q])
# Mapping for index file
[docs]
def mapIndex(r, ff):
p = ff.mapping[r[0][1]] # Mapping for this residue
# Get the name, mass and coordinates for all atoms in the residue
a = [(i[0], CoarseGrained.mass.get(i[0][0], 0), i[4:]) for i in r]
# Store weight, coordinate and index for atoms that match a bead
return [
[(m, coord, a.index((atom, m, coord))) for atom, m, coord in a if atom in i]
for i in p
]
#############################
## 5 # SECONDARY STRUCTURE ## -> @SS <-
#############################
import logging, os, sys
import subprocess as subp
# ----+--------------------------------------+
## A | SECONDARY STRUCTURE TYPE DEFINITIONS |
# ----+--------------------------------------+
bbss = spl(" F E H 1 2 3 T S C") # SS one letter
# The following dictionary contains secondary structure types as assigned by
# different programs. The corresponding Martini secondary structure types are
# listed in cgss
#
# NOTE:
# Each list of letters in the dictionary ss should exactly match the list
# in cgss.
#
ssdefs = {
"dssp": list(".HGIBETSC~"), # DSSP one letter secondary structure code #@#
"pymol": list(".H...S...L"), # Pymol one letter secondary structure code #@#
"gmx": list(".H...ETS.C"), # Gromacs secondary structure dump code #@#
"self": list("FHHHEETSCC"), # Internal CG secondary structure codes #@#
}
cgss = list("FHHHEETSCC") # Corresponding CG secondary structure types #@#
# ----+-------------------------------------------+
## B | SECONDARY STRUCTURE PATTERN SUBSTITUTIONS |
# ----+-------------------------------------------+
# For all structure types specific dihedrals may be used if four or
# more consecutive residues are assigned that type.
# Helix start and end regions are special and require assignment of
# specific types. The following pattern substitutions are applied
# (in the given order). A dot matches any other type.
# Patterns can be added to the dictionaries. This only makes sense
# if for each key in patterns there is a matching key in pattypes.
patterns = {
"H": pat(".H. .HH. .HHH. .HHHH. .HHHHH. .HHHHHH. .HHHHHHH. .HHHH HHHH.") # @#
}
pattypes = {
"H": pat(".3. .33. .333. .3333. .13332. .113322. .1113222. .1111 2222.") # @#
}
# ----+----------+
## C | INTERNAL |
# ----+----------+
# Pymol Colors
# F E H 1 2 3 T S C
ssnum = (13, 4, 2, 2, 2, 2, 6, 22, 0) # @#
# Dictionary returning a number for a given type of secondary structure
# This can be used for setting the b-factor field for coloring
ss2num = hash(bbss, ssnum)
# List of programs for which secondary structure definitions can be processed
programs = ssdefs.keys()
# Dictionaries mapping ss types to the CG ss types
ssd = dict([(i, hash(ssdefs[i], cgss)) for i in programs])
# From the secondary structure dictionaries we create translation tables
# with which all secondary structure types can be processed. Anything
# not listed above will be mapped to C (coil).
# Note, a translation table is a list of 256 characters to map standard
# ascii characters to.
[docs]
def tt(program):
return "".join([ssd[program].get(chr(i), "C") for i in range(256)])
# The translation table depends on the program used to obtain the
# secondary structure definitions
sstt = dict([(i, tt(i)) for i in programs])
# The following translation tables are used to identify stretches of
# a certain type of secondary structure. These translation tables have
# every character, except for the indicated secondary structure, set to
# \x00. This allows summing the sequences after processing to obtain
# a single sequence summarizing all the features.
null = "\x00"
sstd = dict([(i, ord(i) * null + i + (255 - ord(i)) * null) for i in cgss])
# Pattern substitutions
[docs]
def typesub(seq, patterns, types):
seq = null + seq + null
for i, j in zip(patterns, types):
seq = seq.replace(i, j)
return seq[1:-1]
# The following function translates a string encoding the secondary structure
# to a string of corresponding Martini types, taking the origin of the
# secondary structure into account, and replacing termini if requested.
[docs]
def ssClassification(ss, program="self"):
# Translate dssp/pymol/gmx ss to Martini ss
ss = ss.translate(sstt[program])
# Separate the different secondary structure types
sep = dict([(i, ss.translate(sstd[i])) for i in sstd.keys()])
# Do type substitutions based on patterns
# If the ss type is not in the patterns lists, do not substitute
# (use empty lists for substitutions)
typ = [
typesub(sep[i], patterns.get(i, []), pattypes.get(i, [])) for i in sstd.keys()
]
# Translate all types to numerical values
typ = [[ord(j) for j in list(i)] for i in typ]
# Sum characters back to get a full typed sequence
typ = "".join([chr(sum(i)) for i in zip(*typ)])
# Return both the actual as well as the fully typed sequence
return ss, typ
###################################
## 5 # FORCE FIELDS ## -> @FF <-
###################################
[docs]
class ForceField:
[docs]
@staticmethod
def read_itp(itp_file):
itp_data = itpio.read_itp(itp_file)
return itp_data
[docs]
@staticmethod
def read_itps(mol, directory, version):
# itpdir = os.path.abspath(f'/scratch/dyangali/reforge/reforge/itp/{directory}')
itpdir = importlib.resources.files("reforge") / "martini" / "itp"
file = os.path.join(itpdir, f"{directory}", f"{mol}_A_{version}.itp")
a_itp_data = ForceField.read_itp(file)
file = os.path.join(itpdir, f"{directory}", f"{mol}_C_{version}.itp")
c_itp_data = ForceField.read_itp(file)
file = os.path.join(itpdir, f"{directory}", f"{mol}_G_{version}.itp")
g_itp_data = ForceField.read_itp(file)
file = os.path.join(itpdir, f"{directory}", f"{mol}_U_{version}.itp")
u_itp_data = ForceField.read_itp(file)
return a_itp_data, c_itp_data, g_itp_data, u_itp_data
[docs]
@staticmethod
def itp_to_indata(itp_data):
keys = [
"bonds",
"angles",
"dihedrals",
"impropers",
"virtual_sites3",
"exclusions",
"pairs",
]
connectivity = [itp_data[key].keys() if key in itp_data else [] for key in keys]
parameters = [itp_data[key].values() if key in itp_data else [] for key in keys]
return connectivity, parameters
def __init__(self):
# By default use an elastic network
self.ElasticNetwork = False
# Elastic networks bond shouldn't lead to exclusions (type 6)
# But Elnedyn has been parametrized with type 1.
self.EBondType = 6
## DNA DICTIONARIES ##
# Dictionary for the connectivities and parameters of bonds between DNA backbone beads
self.dnaBbBondDictC = dict(zip(self.dna_con["bond"], self.dna_bb["bond"]))
# Dictionary for the connectivities and parameters of angles between DNA backbone beads
self.dnaBbAngleDictC = dict(zip(self.dna_con["angle"], self.dna_bb["angle"]))
# Dictionary for the connectivities and parameters of dihedrals between DNA backbone beads
self.dnaBbDihDictC = dict(zip(self.dna_con["dih"], self.dna_bb["dih"]))
# Dictionary for exclusions for DNA backbone beads
self.dnaBbExclDictC = dict(zip(self.dna_con["excl"], self.dna_bb["excl"]))
# Dictionary for pairs for DNA backbone beads
self.dnaBbPairDictC = dict(zip(self.dna_con["pair"], self.dna_bb["pair"]))
## RNA DICTIONARIES ##
# Dictionary for the connectivities and parameters of bonds between rna backbone beads
self.rnaBbBondDictC = dict(zip(self.rna_con["bond"], self.rna_bb["bond"]))
# Dictionary for the connectivities and parameters of angles between rna backbone beads
self.rnaBbAngleDictC = dict(zip(self.rna_con["angle"], self.rna_bb["angle"]))
# Dictionary for the connectivities and parameters of dihedrals between rna backbone beads
self.rnaBbDihDictC = dict(zip(self.rna_con["dih"], self.rna_bb["dih"]))
# Dictionary for exclusions for rna backbone beads
self.rnaBbExclDictC = dict(zip(self.rna_con["excl"], self.rna_bb["excl"]))
# Dictionary for pairs for rna backbone beads
self.rnaBbPairDictC = dict(zip(self.rna_con["pair"], self.rna_bb["pair"]))
# The following function returns the backbone bead for a given residue and
# secondary structure type.
# 1. Check if the residue is DNA/RNA and return the whole backbone for those
# 2. Look up the proper dictionary for the residue
# 3. Get the proper type from it for the secondary structure
# If the residue is not in the dictionary of specials, use the default
# If the secondary structure is not listed (in the residue specific
# dictionary) revert to the default.
[docs]
def bbGetBead(self, r1, ss="F"):
if r1 in dnares3:
return self.dna_bb["atom"]
elif r1 in rnares3:
return self.rna_bb["atom"]
else:
sys.exit("This script supports only DNA or RNA.")
[docs]
def bbGetBond(self, r, ca, ss):
# Retrieve parameters for each residue from tables defined above
if r[0] in dnares3:
return ca in self.dnaBbBondDictC.keys() and self.dnaBbBondDictC[ca] or None
elif r[0] in rnares3:
return ca in self.rnaBbBondDictC.keys() and self.rnaBbBondDictC[ca] or None
else:
sys.exit("This script supports only DNA or RNA.")
[docs]
def bbGetAngle(self, r, ca, ss):
if r[0] in dnares3:
return (
ca in self.dnaBbAngleDictC.keys() and self.dnaBbAngleDictC[ca] or None
)
elif r[0] in rnares3:
return (
ca in self.rnaBbAngleDictC.keys() and self.rnaBbAngleDictC[ca] or None
)
else:
sys.exit("This script supports only DNA or RNA.")
[docs]
def bbGetExclusion(self, r, ca, ss):
if r[0] in dnares3:
return ca in self.dnaBbExclDictC.keys() and " " or None
elif r[0] in rnares3:
return ca in self.rnaBbExclDictC.keys() and " " or None
else:
sys.exit("This script supports only DNA or RNA.")
[docs]
def bbGetPair(self, r, ca, ss):
if r[0] in dnares3:
return ca in self.dnaBbPairDictC.keys() and " " or None
elif r[0] in rnares3:
return ca in self.rnaBbPairDictC.keys() and " " or None
else:
sys.exit("This script supports only DNA or RNA.")
[docs]
def bbGetDihedral(self, r, ca, ss):
# Retrieve parameters for each residue from table defined above
if r[0] in dnares3:
return ca in self.dnaBbDihDictC.keys() and self.dnaBbDihDictC[ca] or None
elif r[0] in rnares3:
return ca in self.rnaBbDihDictC.keys() and self.rnaBbDihDictC[ca] or None
else:
sys.exit("This script supports only DNA or RNA.")
[docs]
def getCharge(self, atype, aname):
return self.charges.get(atype, self.bbcharges.get(aname, 0))
[docs]
def messages(self):
"""Prints any force-field specific logging messages."""
import logging
pass
[docs]
def update_adenine(self, mapping, connectivity, itp_params):
parameters = mapping + itp_params
self.bases.update({"A": parameters})
self.base_connectivity.update({"A": connectivity})
self.bases.update({"RA3": parameters})
self.base_connectivity.update({"RA3": connectivity})
self.bases.update({"RA5": parameters})
self.base_connectivity.update({"RA5": connectivity})
self.bases.update({"2MA": parameters})
self.base_connectivity.update({"2MA": connectivity})
self.bases.update({"DMA": parameters})
self.base_connectivity.update({"DMA": connectivity})
self.bases.update({"SPA": parameters})
self.base_connectivity.update({"SPA": connectivity})
self.bases.update({"RAP": parameters})
self.base_connectivity.update({"RAP": connectivity})
self.bases.update({"6MA": parameters})
self.base_connectivity.update({"6MA": connectivity})
[docs]
def update_cytosine(self, mapping, connectivity, itp_params):
parameters = mapping + itp_params
self.bases.update({"C": parameters})
self.base_connectivity.update({"C": connectivity})
self.bases.update({"RC3": parameters})
self.base_connectivity.update({"RC3": connectivity})
self.bases.update({"RC5": parameters})
self.base_connectivity.update({"RC5": connectivity})
self.bases.update({"MRC": parameters})
self.base_connectivity.update({"MRC": connectivity})
self.bases.update({"5MC": parameters})
self.base_connectivity.update({"5MC": connectivity})
self.bases.update({"NMC": parameters})
self.base_connectivity.update({"NMC": connectivity})
[docs]
def update_guanine(self, mapping, connectivity, itp_params):
parameters = mapping + itp_params
self.bases.update({"G": parameters})
self.base_connectivity.update({"G": connectivity})
self.bases.update({"RG3": parameters})
self.base_connectivity.update({"RG3": connectivity})
self.bases.update({"RG5": parameters})
self.base_connectivity.update({"RG5": connectivity})
self.bases.update({"MRG": parameters})
self.base_connectivity.update({"MRG": connectivity})
self.bases.update({"1MG": parameters})
self.base_connectivity.update({"1MG": connectivity})
self.bases.update({"2MG": parameters})
self.base_connectivity.update({"2MG": connectivity})
self.bases.update({"7MG": parameters})
self.base_connectivity.update({"7MG": connectivity})
[docs]
def update_uracil(self, mapping, connectivity, itp_params):
parameters = mapping + itp_params
self.bases.update({"U": parameters})
self.base_connectivity.update({"U": connectivity})
self.bases.update({"RU3": parameters})
self.base_connectivity.update({"RU3": connectivity})
self.bases.update({"RU5": parameters})
self.base_connectivity.update({"RU5": connectivity})
self.bases.update({"MRU": parameters})
self.base_connectivity.update({"MRU": connectivity})
self.bases.update({"DHU": parameters})
self.base_connectivity.update({"DHU": connectivity})
self.bases.update({"PSU": parameters})
self.base_connectivity.update({"PSU": connectivity})
self.bases.update({"3MP": parameters})
self.base_connectivity.update({"3MP": connectivity})
self.bases.update({"3MU": parameters})
self.base_connectivity.update({"3MU": connectivity})
self.bases.update({"4SU": parameters})
self.base_connectivity.update({"4SU": connectivity})
self.bases.update({"5MU": parameters})
self.base_connectivity.update({"5MU": connectivity})
[docs]
@staticmethod
def update_non_standard_mapping(mapping):
mapping.update(
{
"RA3": mapping["A"],
"RA5": mapping["A"],
"2MA": mapping["A"],
"6MA": mapping["A"],
"RAP": mapping["A"],
"DMA": mapping["A"],
"DHA": mapping["A"],
"SPA": mapping["A"],
"RC3": mapping["C"],
"RC5": mapping["C"],
"5MC": mapping["C"],
"3MP": mapping["C"],
"MRC": mapping["C"],
"NMC": mapping["C"],
"RG3": mapping["G"],
"RG5": mapping["G"],
"1MG": mapping["G"],
"2MG": mapping["G"],
"7MG": mapping["G"],
"MRG": mapping["G"],
"RU3": mapping["U"],
"RU5": mapping["U"],
"4SU": mapping["U"],
"DHU": mapping["U"],
"PSU": mapping["U"],
"5MU": mapping["U"],
"3MU": mapping["U"],
"3MP": mapping["U"],
"MRU": mapping["U"],
}
)
[docs]
class martini30nucleic(ForceField):
# FF mapping
bb_mapping = nsplit(
"P OP1 OP2 O5' O3' O1P O2P",
"C5' 1H5' 2H5' H5' H5'' C4' H4' O4' C3' H3'",
"C1' C2' O2' O4'",
)
mapping = {
"A": bb_mapping
+ nsplit("N9 C8 H8", "N3 C4", "N1 C2 H2", "N6 C6 H61 H62", "N7 C5"),
"C": bb_mapping + nsplit("N1 C5 C6", "C2 O2", "N3", "N4 C4 H41 H42"),
"G": bb_mapping
+ nsplit("C8 H8 N9", "C4 N3", "C2 N2 H21 H22", "N1", "C6 O6", "C5 N7"),
"U": bb_mapping + nsplit("N1 C5 C6", "C2 O2", "N3", "C4 O4"),
}
ForceField.update_non_standard_mapping(mapping)
def __init__(self):
# parameters are defined here for the following (protein) forcefields:
self.name = "martini30nucleic"
# Charged types:
self.charges = {
"Qd": 1,
"Qa": -1,
"SQd": 1,
"SQa": -1,
"RQd": 1,
"AQa": -1,
} # @#
self.bbcharges = {"BB1": -1} # @#
# Not all (eg Elnedyn) forcefields use backbone-backbone-sidechain angles and BBBB-dihedrals.
self.UseBBSAngles = False
self.UseBBBBDihedrals = False
##################
# DNA PARAMETERS #
##################
# DNA BACKBONE PARAMETERS
self.dna_bb = {
"atom": spl("Q0 SN0 SC2"),
"bond": [],
"angle": [],
"dih": [],
"excl": [],
"pair": [],
}
# DNA BACKBONE CONNECTIVITY
self.dna_con = {
"bond": [],
"angle": [],
"dih": [],
"excl": [],
"pair": [],
}
self.bases = {}
self.base_connectivity = {}
##################
# RNA PARAMETERS # @ff
##################
# RNA BACKBONE PARAMETERS TUT
self.rna_bb = {
"atom": spl("Q1n C6 N2"),
"bond": [
(1, 0.350, 25000),
(1, 0.378, 12000),
(1, 0.239, 25000),
(1, 0.412, 12000),
],
"angle": [
(10, 110.0, 50), # 2, 117.0, 140
(10, 121.0, 180),
(10, 143.0, 300),
],
"dih": [
(1, 0.0, 25.0, 1),
(1, 0.0, 25.0, 1),
(1, -112.0, 15.0, 1),
],
"excl": [(), (), ()],
"pair": [],
}
# RNA BACKBONE CONNECTIVITY
self.rna_con = {
"bond": [(0, 1), (1, 0), (1, 2), (2, 0)],
"angle": [
(0, 1, 0),
(1, 0, 1),
(0, 1, 2),
],
"dih": [
(0, 1, 0, 1),
(1, 0, 1, 0),
(1, 0, 1, 2),
],
"excl": [
(2, 0),
(0, 2),
],
"pair": [],
}
# For bonds, angles, and dihedrals the first parameter should always
# be the type. It is pretty annoying to check the connectivity from
# elsewhere so we update these one base at a time.
a_itp, c_itp, g_itp, u_itp = ForceField.read_itps(rna_system, "regular", "new")
# ADENINE
mapping = [spl("TA0 TA1 TA2 TA3 TA4")]
connectivity, itp_params = martini30nucleic.itp_to_indata(a_itp)
self.update_adenine(mapping, connectivity, itp_params)
# CYTOSINE
mapping = [spl("TY0 TY1 TY2 TY3")]
connectivity, itp_params = martini30nucleic.itp_to_indata(c_itp)
self.update_cytosine(mapping, connectivity, itp_params)
# GUANINE
mapping = [spl("TG0 TG1 TG2 TG3 TG4 TG5")]
connectivity, itp_params = martini30nucleic.itp_to_indata(g_itp)
parameters = mapping + itp_params
self.update_guanine(mapping, connectivity, itp_params)
# URACIL
mapping = [spl("TU0 TU1 TU2 TU3")]
connectivity, itp_params = martini30nucleic.itp_to_indata(u_itp)
parameters = mapping + itp_params
self.update_uracil(mapping, connectivity, itp_params)
super().__init__()
[docs]
class martini31nucleic(ForceField):
# FF mapping
bb_mapping = nsplit(
"P OP1 OP2 O5' O3' O1P O2P",
"C5' 1H5' 2H5' H5' H5'' C4' H4' O4' C3' H3'",
"C1' C2' O2' O4'",
)
mapping = {
"A": bb_mapping
+ nsplit(
"C8",
"N3 C4",
"C2",
"N1",
"N6 C6 H61 H62",
"N7 C5",
),
"C": bb_mapping + nsplit("C6", "O2", "N3", "N4 C4 H41 H42", "C2"),
"G": bb_mapping
+ nsplit("C8", "C4 N3", "C2 N2 H22 H21", "N1", "O6", "C5 N7", "H1", "C6"),
"U": bb_mapping
+ nsplit(
"C6",
"O2",
"N3",
"O4",
"C2",
"H3",
"C4",
),
}
ForceField.update_non_standard_mapping(mapping)
def __init__(self):
# parameters are defined here for the following (protein) forcefields:
self.name = "martini31nucleic"
# Charged types:
charges = {
"TDU": 0.5,
"TA1": 0.4,
"TA2": -0.3,
"TA3": 0.5,
"TA4": -0.8,
"TA5": 0.6,
"TA6": -0.4,
"TY1": 0.0,
"TY2": -0.5,
"TY3": -0.6,
"TY4": 0.6,
"TY5": 0.5,
"TG1": 0.3,
"TG2": 0.0,
"TG3": 0.3,
"TG4": -0.3,
"TG5": -0.5,
"TG6": -0.6,
"TG7": 0.3,
"TG8": 0.5,
"TU1": 0.0,
"TU2": -0.5,
"TU3": -0.5,
"TU4": -0.5,
"TU5": 0.5,
"TU6": 0.5,
"TU7": 0.5,
}
self.charges = {key: value * 1.8 for key, value in charges.items()}
self.bbcharges = {"BB1": -1}
# Not all (eg Elnedyn) forcefields use backbone-backbone-sidechain angles and BBBB-dihedrals.
self.UseBBSAngles = False
self.UseBBBBDihedrals = False
##################
# DNA PARAMETERS #
##################
# DNA BACKBONE PARAMETERS
self.dna_bb = {
"atom": spl("Q0 SN0 SC2"),
"bond": [],
"angle": [],
"dih": [],
"excl": [],
"pair": [],
}
# DNA BACKBONE CONNECTIVITY
self.dna_con = {
"bond": [],
"angle": [],
"dih": [],
"excl": [],
"pair": [],
}
self.bases = {}
self.base_connectivity = {}
##################
# RNA PARAMETERS # @ff
##################
# RNA BACKBONE PARAMETERS TUT
self.rna_bb = {
"atom": spl("Q1 N4 N6"),
"bond": [
(1, 0.349, 18000),
(1, 0.377, 12000),
(1, 0.240, 18000),
(1, 0.412, 12000),
],
"angle": [(10, 119.0, 27), (10, 118.0, 140), (10, 138.0, 180)],
"dih": [
(
3,
13,
-7,
-25,
-6,
25,
-2,
), # (3, 10, -8, 22, 8, -26, -6) # (3, 4, -3, 9, 3, -10, -3)
(1, 0, 6, 1),
(1, -112.0, 15, 1),
], # (1, 15.0, 5, 1)
"excl": [(), (), ()],
"pair": [],
}
# RNA BACKBONE CONNECTIVITY
self.rna_con = {
"bond": [(0, 1), (1, 0), (1, 2), (2, 0)],
"angle": [
(0, 1, 0),
(1, 0, 1),
(0, 1, 2),
],
"dih": [
(0, 1, 0, 1),
(1, 0, 1, 0),
(1, 0, 1, 2),
],
"excl": [
(2, 0),
(0, 2),
],
"pair": [],
}
a_itp, c_itp, g_itp, u_itp = ForceField.read_itps(rna_system, "polar", "new")
# ADENINE
mapping = [spl("TA1 TA2 TA3 TA4 TA5 TA6")]
connectivity, itp_params = martini31nucleic.itp_to_indata(a_itp)
parameters = mapping + itp_params
self.update_adenine(mapping, connectivity, itp_params)
# CYTOSINE
mapping = [spl("TY1 TY2 TY3 TY4 TY5")]
connectivity, itp_params = martini31nucleic.itp_to_indata(c_itp)
parameters = mapping + itp_params
self.update_cytosine(mapping, connectivity, itp_params)
# GUANINE
mapping = [spl("TG1 TG2 TG3 TG4 TG5 TG6 TG7 TG8")]
connectivity, itp_params = martini31nucleic.itp_to_indata(g_itp)
parameters = mapping + itp_params
self.update_guanine(mapping, connectivity, itp_params)
# URACIL
mapping = [spl("TU1 TU2 TU3 TU4 TU5 TU6 TU7")]
connectivity, itp_params = martini31nucleic.itp_to_indata(u_itp)
parameters = mapping + itp_params
self.update_uracil(mapping, connectivity, itp_params)
super().__init__()
#########################
## 7 # ELASTIC NETWORK ## -> @ELN <-
#########################
import math
## ELASTIC NETWORK ##
# Only the decay function is defined here, the network
# itself is set up through the Topology class
# The function to determine the decay scaling factor for the elastic network
# force constant, based on the distance and the parameters provided.
# This function is very versatile and can be fitted to most commonly used
# profiles, including a straight line (rate=0)
[docs]
def decayFunction(distance, shift, rate, power):
return math.exp(-rate * math.pow(distance - shift, power))
[docs]
def rubberBands(
atomList,
lowerBound,
upperBound,
decayFactor,
decayPower,
forceConstant,
minimumForce,
):
out = []
u2 = upperBound**2
while len(atomList) > 3:
bi, xi = atomList.pop(0)
# This is a bit weird (=wrong I think) way of doing the cutoff...
# for bj,xj in atomList[2:]:
for bj, xj in atomList:
# Mind the nm/A conversion -- This has to be standardized! Global use of nm?
d2 = distance2(xi, xj) / 100
if d2 < u2:
dij = math.sqrt(d2)
fscl = decayFunction(dij, lowerBound, decayFactor, decayPower)
if fscl * forceConstant > minimumForce:
out.append(
{"atoms": (bi, bj), "parameters": (dij, "RUBBER_FC*%f" % fscl)}
)
return out
#######################
## 8 # STRUCTURE I/O ## -> @IO <-
#######################
import logging, math, random, sys
# ----+---------+
## A | PDB I/O |
# ----+---------+
d2r = 3.14159265358979323846264338327950288 / 180
# Reformatting of lines in structure file
pdbAtomLine = "ATOM %5d %4s%4s %1s%4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f\n"
pdbBoxLine = "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1\n"
[docs]
def pdbBoxString(box):
# Box vectors
u, v, w = box[0:3], box[3:6], box[6:9]
# Box vector lengths
nu, nv, nw = [math.sqrt(norm2(i)) for i in (u, v, w)]
# Box vector angles
alpha = nv * nw == 0 and 90 or math.acos(cos_angle(v, w)) / d2r
beta = nu * nw == 0 and 90 or math.acos(cos_angle(u, w)) / d2r
gamma = nu * nv == 0 and 90 or math.acos(cos_angle(u, v)) / d2r
return pdbBoxLine % (10 * norm(u), 10 * norm(v), 10 * norm(w), alpha, beta, gamma)
[docs]
def pdbAtom(a):
##01234567890123456789012345678901234567890123456789012345678901234567890123456789
##ATOM 2155 HH11 ARG C 203 116.140 48.800 6.280 1.00 0.00
if a.startswith("TER"):
return 0
# NOTE: The 27th field of an ATOM line in the PDB definition can contain an
# insertion code. We shift that 20 bits and add it to the residue number
# to ensure that residue numbers will be unique.
## ===> atom name, res name, res id, chain,
return (
a[12:16].strip(),
a[17:20].strip(),
int(a[22:26]) + (ord(a[26]) << 20),
a[21],
## x, y, z
float(a[30:38]),
float(a[38:46]),
float(a[46:54]),
)
[docs]
def pdbOut(atom, i=1):
insc = atom[2] >> 20
resi = atom[2] - (insc << 20)
pdbline = (
"ATOM %5i %-3s %3s%2s%4i%1s %8.3f%8.3f%8.3f%6.2f%6.2f %1s \n"
)
return pdbline % (
(i, atom[0][:3], atom[1], atom[3], resi, chr(insc))
+ atom[4:]
+ (1, 40, atom[0][0])
)
[docs]
def isPdbAtom(a):
return (
a.startswith("ATOM")
or (options["-hetatm"] and a.startswith("HETATM"))
or a.startswith("TER")
)
[docs]
def pdbBoxRead(a):
fa, fb, fc, aa, ab, ac = [float(i) for i in a.split()[1:7]]
ca, cb, cg, sg = (
math.cos(d2r * aa),
math.cos(d2r * ab),
math.cos(d2r * ac),
math.sin(d2r * ac),
)
wx, wy = 0.1 * fc * cb, 0.1 * fc * (ca - cb * cg) / sg
wz = math.sqrt(0.01 * fc * fc - wx * wx - wy * wy)
return [0.1 * fa, 0, 0, 0.1 * fb * cg, 0.1 * fb * sg, 0, wx, wy, wz]
# Function for splitting a PDB file in chains, based
# on chain identifiers and TER statements
[docs]
def pdbChains(pdbAtomList):
chain = []
for atom in pdbAtomList:
if not atom: # Was a "TER" statement
if chain:
yield chain
else:
logging.info("Skipping empty chain definition")
chain = []
continue
if not chain or chain[-1][3] == atom[3]:
chain.append(atom)
else:
yield chain
chain = [atom]
if chain:
yield chain
# Simple PDB iterator
[docs]
def pdbFrameIterator(streamIterator):
title, atoms, box = [], [], []
for i in streamIterator:
if i.startswith("ENDMDL"):
yield "".join(title), atoms, box
title, atoms, box = [], [], []
elif i.startswith("TITLE"):
title.append(i)
elif i.startswith("CRYST1"):
box = pdbBoxRead(i)
elif i.startswith("ATOM") or i.startswith("HETATM"):
atoms.append(pdbAtom(i))
if atoms:
yield "".join(title), atoms, box
# ----+-------------+
## C | GENERAL I/O |
# ----+-------------+
# It is not entirely clear where this fits in best.
# Called from main.
[docs]
def getChargeType(resname, resid, choices):
"""Get user input for the charge of residues, based on list with
choises."""
print("Which %s type do you want for residue %s:" % (resname, resid + 1))
for i, choice in choices.iteritems():
print("%s. %s" % (i, choice))
choice = None
while choice not in choices.keys():
choice = input("Type a number:")
return choices[choice]
# *NOTE*: This should probably be a CheckableStream class that
# reads in lines until either of a set of specified conditions
# is met, then setting the type and from thereon functioning as
# a normal stream.
[docs]
def streamTag(stream):
# Tag the stream with the type of structure file
# If necessary, open the stream, taking care of
# opening using gzip for gzipped files
# First check whether we have have an open stream or a file
# If it's a file, check whether it's zipped and open it
if type(stream) == str:
if stream.endswith("gz"):
logging.info("Read input structure from zipped file.")
s = gzip.open(stream)
else:
logging.info("Read input structure from file.")
s = open(stream)
else:
logging.info("Read input structure from command-line")
s = stream
# Read a few lines, but save them
x = [s.readline(), s.readline()]
if x[-1].strip().isdigit():
# Must be a GRO file
logging.info(
"Input structure is a GRO file. Chains will be labeled consecutively."
)
yield "GRO"
else:
# Must be a PDB file then
# Could wind further to see if we encounter an "ATOM" record
logging.info("Input structure is a PDB file.")
yield "PDB"
# Hand over the lines that were stored
for i in x:
yield i
# Now give the rest of the lines from the stream
for i in s:
yield i
# ----+-----------------+
## D | STRUCTURE STUFF |
# ----+-----------------+
# This list allows to retrieve atoms based on the name or the index
# If standard, dictionary type indexing is used, only exact matches are
# returned. Alternatively, partial matching can be achieved by setting
# a second 'True' argument.
[docs]
class Residue(list):
def __getitem__(self, tag):
if type(tag) == int:
# Call the parent class __getitem__
return list.__getitem__(self, tag)
if type(tag) == str:
for i in self:
if i[0] == tag:
return i
else:
return
if tag[1]:
return [i for i in self if tag[0] in i[0]] # Return partial matches
else:
return [i for i in self if i[0] == tag[0]] # Return exact matches only
[docs]
def residues(atomList):
residue = [atomList[0]]
for atom in atomList[1:]:
if (
atom[1] == residue[-1][1] # Residue name check
and atom[2] == residue[-1][2] # Residue id check
and atom[3] == residue[-1][3]
): # Chain id check
residue.append(atom)
else:
yield Residue(residue)
residue = [atom]
yield Residue(residue)
[docs]
def residueDistance2(r1, r2):
return min([distance2(i, j) for i in r1 for j in r2])
# Increased the cut-off from 2.5 to 3.0 for DNA. Should check that no problems arise for proteins.
[docs]
def breaks(
residuelist, selection=("P", "C2'", "C3'", "O3'", "C4'", "C5'", "O5'"), cutoff=3.0
):
# Extract backbone atoms coordinates
bb = [
[atom[4:] for atom in residue if atom[0] in selection]
for residue in residuelist
]
# Needed to remove waters residues from mixed residues.
bb = [res for res in bb if res != []]
# We cannot rely on some standard order for the backbone atoms.
# Therefore breaks are inferred from the minimal distance between
# backbone atoms from adjacent residues.
breaks = [
i + 1 for i in range(len(bb) - 1) if residueDistance2(bb[i], bb[i + 1]) > cutoff
]
return breaks
[docs]
def add_dummy(beads, dist=0.11, n=2):
# Generate a random vector in a sphere of -1 to +1, to add to the bead position
v = [
random.random() * 2.0 - 1,
random.random() * 2.0 - 1,
random.random() * 2.0 - 1,
]
# Calculated the length of the vector and divide by the final distance of the dummy bead
norm_v = norm(v) / dist
# Resize the vector
vn = [i / norm_v for i in v]
# m sets the direction of the added vector, currently only works when adding one or two beads.
m = 1
for j in range(n):
newName = "SCD"
newBead = (
newName,
tuple([i + (m * j) for i, j in zip(beads[-1][1], vn)]),
beads[-1][2],
)
beads.append(newBead)
m *= -2
return beads
[docs]
def check_merge(chains, m_list=[], l_list=[], ss_cutoff=0):
chainIndex = range(len(chains))
if "all" in m_list:
logging.info("All chains will be merged in a single moleculetype.")
return chainIndex, [chainIndex]
chainID = [chain._id for chain in chains]
# Mark the combinations of chains that need to be merged
merges = []
if m_list:
# Build a dictionary of chain IDs versus index
# To give higher priority to top chains the lists are reversed
# before building the dictionary
chainIndex = list(reversed(chainIndex))
chainID = list(reversed(chainID))
dct = dict(zip(chainID, chainIndex))
chainIndex = list(reversed(chainIndex))
# Convert chains in the merge_list to numeric, if necessary
# NOTE The internal numbering is zero-based, while the
# command line chain indexing is one-based. We have to add
# one to the number in the dictionary to bring it on par with
# the numbering from the command line, but then from the
# result we need to subtract one again to make indexing
# zero-based
# merges = [[(i.isdigit() and int(i) or dct[i]+1)-1 for i in j] for j in m_list]
merges = [[0, 1]]
for i in merges:
i.sort()
# Rearrange merge list to a list of pairs
pairs = [
(i[j], i[k])
for i in merges
for j in range(len(i) - 1)
for k in range(j + 1, len(i))
]
# Check each combination of chains for connections based on
# ss-bridges, links and distance restraints
for i in chainIndex[:-1]:
for j in chainIndex[i + 1 :]:
if (i, j) in pairs:
continue
# Check whether any link links these two groups
for a, b in l_list:
if (a in chains[i] and b in chains[j]) or (
a in chains[j] and b in chains[i]
):
logging.info(
"Merging chains %d and %d to allow link %s"
% (i + 1, j + 1, str((a, b)))
)
pairs.append(i < j and (i, j) or (j, i))
break
if (i, j) in pairs:
continue
# Sort the combinations
pairs.sort(reverse=True)
merges = []
while pairs:
merges.append(set([pairs[-1][0]]))
for i in range(len(pairs) - 1, -1, -1):
if pairs[i][0] in merges[-1]:
merges[-1].add(pairs.pop(i)[1])
elif pairs[i][1] in merges[-1]:
merges[-1].add(pairs.pop(i)[0])
merges = [list(i) for i in merges]
for i in merges:
i.sort()
order = [j for i in merges for j in i]
if merges:
logging.warning("Merging chains.")
logging.warning(
"This may change the order of atoms and will change the number of topology files."
)
logging.info("Merges: " + ", ".join([str([j + 1 for j in i]) for i in merges]))
if len(merges) == 1 and len(merges[0]) > 1 and set(merges[0]) == set(chainIndex):
logging.info("All chains will be merged in a single moleculetype")
# Determine the order for writing; merged chains go first
merges.extend([[j] for j in chainIndex if not j in order])
order.extend([j for j in chainIndex if not j in order])
return order, merges
## !! NOTE !! ##
## XXX The chain class needs to be simplified by extracting things to separate functions/classes
[docs]
class Chain:
# Attributes defining a chain
# When copying a chain, or slicing, the attributes in this list have to
# be handled accordingly.
_attributes = ("residues", "sequence", "seq", "ss", "ssclass", "sstypes")
def __init__(self, options, residuelist=[], name=None, multiscale=False):
self.residues = residuelist
self._atoms = [atom[:3] for residue in residuelist for atom in residue]
self.sequence = [residue[0][1] for residue in residuelist]
# *NOTE*: Check for unknown residues and remove them if requested
# before proceeding.
self.seq = "".join([AA321.get(i, "X") for i in self.sequence])
self.ss = ""
self.ssclass = ""
self.sstypes = ""
self.mapping = []
self.multiscale = False
self.options = options
# Unknown residues
self.unknowns = "X" in self.seq
# Determine the type of chain
self._type = ""
self.type()
# Determine number of atoms
self.natoms = len(self._atoms)
# BREAKS: List of indices of residues where a new fragment starts
# Only when polymeric (protein, DNA, RNA, ...)
# For now, let's remove it for the Nucleic acids...
# self.breaks = self.type() in ("Protein", "Mixed", "Nucleic") and breaks(self.residues) or []
self.breaks = breaks(self.residues)
# LINKS: List of pairs of pairs of indices of linked residues/atoms
# This list is used for cysteine bridges and peptide bonds involving side chains
# The list has items like ((#resi1, #atid1), (#resi2, #atid2))
# When merging chains, the residue number needs ot be update, but the atom id
# remains unchanged.
# For the coarse grained system, it needs to be checked which beads the respective
# atoms fall in, and bonded terms need to be added there.
self.links = []
# Chain identifier; try to read from residue definition if no name is given
self._id = name or residuelist and residuelist[0][0][3] or ""
# Container for coarse grained beads
self._cg = None
def __len__(self):
# Return the number of residues
# DNA/RNA contain non-CAP d/r to indicate type. We remove those first.
return len("".join(i for i in self.seq if i.isupper()))
def __add__(self, other):
newchain = Chain(name=self._id + "+" + other._id)
# Combine the chain items that can be simply added
for attr in self._attributes:
setattr(newchain, attr, getattr(self, attr) + getattr(other, attr))
# Set chain items, shifting the residue numbers
shift = len(self)
newchain.breaks = self.breaks + [shift] + [i + shift for i in other.breaks]
newchain.links = self.links + [
((i[0] + shift, i[1]), (j[0] + shift, j[1])) for i, j in other.links
]
newchain.natoms = len(newchain.atoms())
newchain.multiscale = False
# Return the merged chain
print(newchain.breaks)
return newchain
def __eq__(self, other):
return (
self.seq == other.seq
and self.ss == other.ss
and self.breaks == other.breaks
and self.links == other.links
and self.multiscale == False
)
# Extract a residue by number or the list of residues of a given type
# This facilitates selecting residues for links, like chain["CYS"]
def __getitem__(self, other):
if type(other) == str:
if not other in self.sequence:
return []
return [i for i in self.residues if i[0][1] == other]
elif type(other) == tuple:
# This functionality is set up for links
# between coarse grained beads. So these are
# checked first,
for i in self.cg():
if other == i[:4]:
return i
else:
for i in self.atoms():
if other[:3] == i[:3]:
return i
else:
return []
return self.sequence[other]
# Extract a piece of a chain as a new chain
def __getslice__(self, i, j):
newchain = Chain(self.options, name=self._id)
# Extract the slices from all lists
for attr in self._attributes:
setattr(newchain, attr, getattr(self, attr)[i:j])
# Breaks that fall within the start and end of this chain need to be passed on.
# Residue numbering is increased by 20 bits!!
# XXX I don't know if this works.
ch_sta, ch_end = newchain.residues[0][0][2], newchain.residues[-1][0][2]
newchain.breaks = [
crack for crack in self.breaks if ch_sta < (crack << 20) < ch_end
]
newchain.links = [link for link in self.links if ch_sta < (link << 20) < ch_end]
newchain.multiscale = False
newchain.natoms = len(newchain.atoms())
newchain.type()
# Return the chain slice
return newchain
def _contains(self, atomlist, atom):
atnm, resn, resi, chn = atom
# If the chain does not match, bail out
if chn != self._id:
return False
# Check if the whole tuple is in
if atnm and resn and resi:
return (atnm, resn, resi) in self.atoms()
# Fetch atoms with matching residue id
match = (not resi) and atomlist or [j for j in atomlist if j[2] == resi]
if not match:
return False
# Select atoms with matching residue name
match = (not resn) and match or [j for j in match if j[1] == resn]
if not match:
return False
# Check whether the atom is given and listed
if not atnm or [j for j in match if j[0] == atnm]:
return True
# It just is not in the list!
return False
def __contains__(self, other):
return self._contains(self.atoms(), other) or self._contains(self.cg(), other)
def __hash__(self):
return id(self)
[docs]
def atoms(self):
if not self._atoms:
self._atoms = [atom[:3] for residue in self.residues for atom in residue]
return self._atoms
# Split a chain based on residue types; each subchain can have only one type
[docs]
def split(self):
chains = []
chainStart = 0
for i in range(len(self.sequence) - 1):
if residueTypes.get(self.sequence[i], "Unknown") != residueTypes.get(
self.sequence[i + 1], "Unknown"
):
# Use the __getslice__ method to take a part of the chain.
chains.append(self[chainStart : i + 1])
chainStart = i + 1
if chains:
logging.debug(
"Splitting chain %s in %s chains" % (self._id, len(chains) + 1)
)
return chains + [self[chainStart:]]
[docs]
def getname(self, basename=None):
name = []
if basename:
name.append(basename)
if self.type() and not basename:
name.append(self.type())
if type(self._id) == int:
name.append(chr(64 + self._id))
elif self._id.strip():
name.append(str(self._id))
return "_".join(name)
[docs]
def set_ss(self, ss, source="self"):
if len(ss) == 1:
self.ss = len(self) * ss
else:
self.ss = ss
# Infer the Martini backbone secondary structure types
self.ssclass, self.sstypes = ssClassification(self.ss, source)
[docs]
def dss(self, method=None, executable=None):
# The method should take a list of atoms and return a
# string of secondary structure classifications
if self.type() == "Protein":
if method:
atomlist = [atom for residue in self.residues for atom in residue]
self.set_ss(
ssDetermination[method](self, atomlist, executable), source=method
)
else:
self.set_ss(len(self) * "C")
else:
self.set_ss(len(self.sequence) * "-")
return self.ss
[docs]
def type(self, other=None):
if other:
self._type = other
elif not self._type and len(self):
# Determine the type of chain
self._type = set(
[residueTypes.get(i, "Unknown") for i in set(self.sequence)]
)
self._type = len(self._type) > 1 and "Mixed" or list(self._type)[0]
return self._type
# XXX The following (at least the greater part of it) should be made a separate function, put under "MAPPING"
[docs]
def cg(self, force=False, com=False, dna=False):
# Generate the coarse grained structure
# Set the b-factor field to something that reflects the secondary structure
# If the coarse grained structure is set already, just return,
# unless regeneration is forced.
if self._cg and not force:
return self._cg
self._cg = []
atid = 1
bb = [1]
fail = False
previous = ""
for residue, rss, resname in zip(self.residues, self.sstypes, self.sequence):
# For DNA we need to get the O3' to the following residue when calculating COM
# The force and com options ensure that this part does not affect itp generation or anything else
if com:
# Just an initialization, this should complain if it isn't updated in the loop
store = 0
for ind, i in enumerate(residue):
if i[0] == "O3'":
if previous != "":
residue[ind] = previous
previous = i
else:
store = ind
previous = i
# We couldn't remove the O3' from the 5' end residue during the loop so we do it now
if store > 0:
del residue[store]
# Check if residues names has changed, for example because user has set residues interactively.
residue = [(atom[0], resname) + atom[2:] for atom in residue]
if residue[0][1] in ("SOL", "HOH", "TIP"):
continue
str2ff = {
"martini30nucleic": martini30nucleic,
"martini31nucleic": martini31nucleic,
}
ffname = options["ForceField"].name
ff = str2ff[ffname]
if not residue[0][1] in ff.mapping.keys():
logging.warning("Skipped unknown residue %s\n" % residue[0][1])
continue
# Get the mapping for this residue
# CG.map returns bead coordinates and mapped atoms
# This will fail if there are (too many) atoms missing, which is
# only problematic if a mapped structure is written; the topology
# is inferred from the sequence. So this is the best place to raise
# an error
try:
# The last residue, in the case of a polBB has only a CA-positioned bead.
beads, ids = map(residue, ff)
beads = zip(CoarseGrained.names[residue[0][1]], beads, ids)
except ValueError:
logging.error(
"Too many atoms missing from residue %s %d(ch:%s):",
residue[0][1],
residue[0][2] >> 20,
residue[0][3],
)
logging.error(repr([i[0] for i in residue]))
fail = True
for name, (x, y, z), ids in beads:
# Add the bead with coordinates and secondary structure id to the list
self._cg.append(
(
name,
residue[0][1][:3],
residue[0][2],
residue[0][3],
x,
y,
z,
ss2num[rss],
)
)
# Add the ids to the list, after converting them to indices to the list of atoms
self.mapping.append([atid + i for i in ids])
# Increment the atom id; This pertains to the atoms that are included in the output.
atid += len(residue)
# Keep track of the numbers for CONECTing
bb.append(bb[-1] + len(list(beads)))
if fail:
logging.error(
"Unable to generate coarse grained structure due to missing atoms."
)
sys.exit(1)
return self._cg
[docs]
def conect(self):
# Return pairs of numbers that should be CONECTed
# First extract the backbone IDs
cg = self.cg()
bb = [i + 1 for i, j in zip(range(len(cg)), cg) if j[0] == "BB"]
bb = zip(bb, bb[1:] + [len(bb)])
# Set the backbone CONECTs (check whether the distance is consistent with binding)
conect = [
(i, j) for i, j in bb[:-1] if distance2(cg[i - 1][4:7], cg[j - 1][4:7]) < 14
]
# Now add CONECTs for sidechains
for i, j in bb:
nsc = j - i - 1
##################
## 7 # TOPOLOGY ## -> @TOP <-
##################
import logging, math
# This is a generic class for Topology Bonded Type definitions
[docs]
class Bonded:
# The init method is generic to the bonded types,
# but may call the set method if atoms are given
# as (ID, ResidueName, SecondaryStructure) tuples
# The set method is specific to the different types.
def __init__(self, other=None, options=None, **kwargs):
self.atoms = []
self.type = -1
self.parameters = []
self.comments = []
self.category = None
if options and type(options) == dict:
self.options = options
if other:
# If other is given, then copy the attributes
# if it is of the same class or set the
# attributes according to the key names if
# it is a dictionary
if other.__class__ == self.__class__:
for attr in dir(other):
if not attr[0] == "_":
setattr(self, attr, getattr(other, attr))
elif type(other) == dict:
for attr in other.keys():
setattr(self, attr, other[attr])
elif type(other) in (list, tuple):
self.atoms = other
# For every item in the kwargs keys, set the attribute
# with the same name. This can be used to specify the
# attributes directly or to override attributes
# copied from the 'other' argument.
for key in kwargs:
setattr(self, key, kwargs[key])
# If atoms are given as tuples of
# (ID, ResidueName[, SecondaryStructure])
# then determine the corresponding parameters
# from the lists above
if self.atoms and type(self.atoms[0]) == tuple:
self.set(self.atoms, **kwargs)
def __nonzero__(self):
return bool(self.atoms) and bool(self.parameters)
def __str__(self):
if not self.atoms or not self.parameters:
return ""
s = ["%5d" % i for i in self.atoms]
# For exclusions, no type is defined, which equals -1
if self.type != -1:
s.append(" %5d " % self.type)
# Print integers and floats in proper format and neglect None terms
s.extend([formatString(i) for i in self.parameters if i != None])
if self.comments:
s.append(";")
if type(self.comments) == str:
s.append(self.comments)
else:
s.extend([str(i) for i in self.comments])
return " ".join(s)
def __iadd__(self, num):
self.atoms = tuple([i + int(num) for i in self.atoms])
return self
def __add__(self, num):
out = self.__class__(self)
out += num
return out
def __eq__(self, other):
if type(other) in (list, tuple):
return self.atoms == other
else:
return (
self.atoms == other.atoms
and self.type == other.type
and self.parameters == other.parameters
)
# This function needs to be overridden for descendents
[docs]
def set(self, atoms, **kwargs):
pass
# The set method of this class will look up parameters for backbone beads
# Side chain bonds ought to be set directly, using the constructor
# providing atom numbers, bond type, and parameters
# Constraints are bonds with kb = 100000.00000, which can be extracted
# using the category
[docs]
class Bond(Bonded):
[docs]
def set(self, atoms, **kwargs):
ids, r, ss, ca = zip(*atoms)
self.atoms = ids
self.type = 1
self.positionCa = ca
self.comments = "%s(%s)-%s(%s)" % (r[0], ss[0], r[1], ss[1])
# The category can be used to keep bonds sorted
self.category = kwargs.get("category")
if not self.parameters:
self.parameters = self.options["ForceField"].bbGetBond(r, ca, ss)
# If there are more than two parameters given, user HAS TO define the bond type as the first one.
# This is to allow use of any gromacs bond type.
if self.parameters and len(self.parameters) > 2:
self.type, self.parameters = self.parameters[0], self.parameters[1:]
# Backbone bonds also can be constraints. We could change the type further on, but this is more general.
# Even better would be to add a new type: BB-Constraint
if self.parameters and self.parameters[1] == "100000.00000":
self.category = "Constraint"
# Overriding __str__ method to suppress printing of bonds with Fc of 0
# def __str__(self):
# if not self.parameters:
# return ""
# return Bonded.__str__(self)
# Similar to the preceding class
[docs]
class Angle(Bonded):
[docs]
def set(self, atoms, **kwargs):
ids, r, ss, ca = zip(*atoms)
self.atoms = ids
self.type = 2
self.positionCa = ca
self.comments = "%s(%s)-%s(%s)-%s(%s)" % (r[0], ss[0], r[1], ss[1], r[2], ss[2])
self.category = kwargs.get("category")
self.parameters = self.options["ForceField"].bbGetAngle(r, ca, ss)
# If there are more than two parameters given, user HAS TO define the angle type as the first one.
# This is to allow use of any gromacs angle type.
if self.parameters and len(self.parameters) > 2:
self.type, self.parameters = self.parameters[0], self.parameters[1:]
# Similar to the preceding class
[docs]
class Vsite(Bonded):
[docs]
def set(self, atoms, **kwargs):
ids, r, ss, ca = zip(*atoms)
self.atoms = ids
self.type = 2
self.positionCa = ca
self.comments = "%s" % (r[0])
self.category = kwargs.get("category")
parameters = kwargs.get("parameters")
def __str__(self):
if not self.atoms:
return ""
# s = ["%5d" % i for i in self.atoms]
s = [
formatString(self.atoms[0]),
formatString(self.atoms[1]),
formatString(self.atoms[2]),
formatString(self.atoms[3]),
formatString(self.parameters[0]),
formatString(self.parameters[1]),
formatString(self.parameters[2]),
]
if self.comments:
s.append(";")
if type(self.comments) == str:
s.append(self.comments)
else:
s.extend([str(i) for i in self.comments])
return " ".join(s)
# Similar to the preceding class
[docs]
class Exclusion(Bonded):
[docs]
def set(self, atoms, **kwargs):
ids, r, ss, ca = zip(*atoms)
self.atoms = ids
self.positionCa = ca
self.comments = "%s" % (r[0])
self.category = kwargs.get("category")
self.parameters = kwargs.get("parameters")
if not self.parameters:
self.parameters = self.options["ForceField"].bbGetExclusion(r, ca, ss)
# Similar to the preceding class
[docs]
class Pair(Bonded):
[docs]
def set(self, atoms, **kwargs):
ids, r, ss, ca = zip(*atoms)
self.atoms = ids
self.positionCa = ca
self.comments = "%s(%s)-%s(%s)" % (r[0], ss[0], r[1], ss[1])
self.category = kwargs.get("category")
self.type = kwargs.get("type") or 1
if not self.parameters:
self.parameters = self.options["ForceField"].bbGetPair(r, ca, ss)
# Similar to the preceding class
[docs]
class Dihedral(Bonded):
[docs]
def set(self, atoms, **kwargs):
ids, r, ss, ca = zip(*atoms)
self.atoms = ids
self.type = 1
self.positionCa = ca
self.comments = "%s(%s)-%s(%s)-%s(%s)-%s(%s)" % (
r[0],
ss[0],
r[1],
ss[1],
r[2],
ss[2],
r[3],
ss[3],
)
self.category = kwargs.get("category")
if not self.parameters:
self.parameters = self.options["ForceField"].bbGetDihedral(r, ca, ss)
# If there are more than two parameters given, user HAS TO define the diheral type as the first one.
# This is to allow use of any gromacs dihedral type.
if self.parameters and len(self.parameters) > 2:
self.type, self.parameters = self.parameters[0], self.parameters[1:]
# This list allows to retrieve Bonded class items based on the category
# If standard, dictionary type indexing is used, only exact matches are
# returned. Alternatively, partial matching can be achieved by setting
# a second 'True' argument.
[docs]
class CategorizedList(list):
def __getitem__(self, tag):
if type(tag) == int:
# Call the parent class __getitem__
return list.__getitem__(self, tag)
if type(tag) == str:
return [i for i in self if i.category == tag]
if tag[1]:
return [i for i in self if tag[0] in i.category]
else:
return [i for i in self if i.category == tag[0]]
[docs]
class Topology:
def __init__(self, other=None, options=None, name=""):
self.name = ""
self.nrexcl = 1
self.atoms = CategorizedList()
self.pairs = CategorizedList()
self.vsites = CategorizedList()
self.exclusions = CategorizedList()
self.bonds = CategorizedList()
self.angles = CategorizedList()
self.dihedrals = CategorizedList()
self.impropers = CategorizedList()
self.constraints = CategorizedList()
self.posres = CategorizedList()
self.sequence = []
self.secstruc = ""
# Okay, this is sort of funny; we will add a
# #define mapping virtual_sites3
# to the topology file, followed by a header
# [ mapping ]
self.mapping = []
# For multiscaling we have to keep track of the number of
# real atoms that correspond to the beads in the topology
self.natoms = 0
self.multiscale = False
if options:
self.options = options
else:
self.options = {}
if not other:
# Returning an empty instance
return
elif isinstance(other, Topology):
for attrib in [
"atoms",
"vsites",
"bonds",
"angles",
"dihedrals",
"impropers",
"constraints",
"posres",
]:
setattr(self, attrib, getattr(other, attrib, []))
elif isinstance(other, Chain):
if other.type() == "Protein" and other.options["ForceField"].name in [
"polbb"
]:
self.fromAminoAcidSequencePolBB(other)
elif other.type() == "Protein":
self.fromAminoAcidSequence(other)
elif other.type() == "Nucleic":
self.fromNucleicAcidSequence(other)
else:
# This chain should not be polymeric, but a collection of molecules
# For each unique residue type fetch the proper moleculetype
self.fromMoleculeList(other)
if name:
self.name = name
def __iadd__(self, other):
if not isinstance(other, Topology):
other = Topology(other)
shift = len(self.atoms)
last = self.atoms[-1]
atoms = list(zip(*other.atoms))
atoms[0] = [i + shift for i in atoms[0]] # Update atom numbers
atoms[2] = [i + last[2] for i in atoms[2]] # Update residue numbers
atoms[5] = [i + last[5] for i in atoms[5]] # Update charge group numbers
atoms = list(zip(*atoms))
# The zippings above kills all atoms with specified mass (9 long lists)
# Let's add some band aid to fix it...
# This of course doesn't work if there was a secondary structure to keep
# but think it affects at this stage only the output comment.
for i in range(len(atoms)):
if atoms[i][-1] == 0:
atoms[i] = atoms[i] + ("c",)
self.atoms.extend(atoms)
for attrib in [
"bonds",
"vsites",
"exclusions",
"angles",
"dihedrals",
"impropers",
"constraints",
"posres",
]:
getattr(self, attrib).extend(
[source + shift for source in getattr(other, attrib)]
)
return self
def __add__(self, other):
out = Topology(self)
if not isinstance(other, Topology):
other = Topology(other)
out += other
return out
def __str__(self):
string = '; MARTINI (%s) Coarse Grained topology file for "%s"' % (
self.options["ForceField"].name,
self.name,
)
string += "\n; Created by py version %s \n; Using the following options: " % (
self.options["Version"]
)
string += " ".join(self.options["Arguments"])
string += "\n; #####################################################################################################"
string += "\n; This topology is based on development beta of Martini DNA and should NOT be used for production runs."
string += "\n; #####################################################################################################"
out = [string]
if self.sequence:
out += [
"; Sequence:",
"; " + "".join([AA321.get(AA) for AA in self.sequence]),
"; Secondary Structure:",
"; " + self.secstruc,
]
# Do not print a molecule name when multiscaling
# In that case, the topology created here needs to be appended
# at the end of an atomistic moleculetype
if not self.multiscale:
out += [
"\n[ moleculetype ]",
"; Name Exclusions",
"%-15s %3d" % (self.name, self.nrexcl),
]
out.append("\n[ atoms ]")
# For virtual sites and dummy beads we have to be able to specify the mass.
# Thus we need two different format strings:
fs8 = "%5d %5s %5d %5s %5s %5d %7.4f ; %s"
fs9 = "%5d %5s %5d %5s %5s %5d %7.4f %7.4f ; %s"
out.extend([len(i) == 9 and fs9 % i or fs8 % i for i in self.atoms])
# Print the pairs.
pairs = [str(i) for i in self.pairs if str(i)]
if pairs:
out.append("\n[ pairs ]")
out.extend(pairs)
# Print out the vsites only if they excist. Right now it can only be type 1 virual sites.
# TODO: This needs to be generalized for all virtual site types.
vsitesBB = [str(i) for i in self.vsites["BB"] if str(i)]
vsitesSC = [str(i) for i in self.vsites["SC"] if str(i)]
if vsitesBB or vsitesSC:
out.append("\n[ virtual_sites3 ]")
if vsitesBB:
out.append("; Backbone virtual sites.")
out.extend(vsitesBB)
if vsitesSC:
out.append("; Sidechain virtual sites.")
out.extend(vsitesSC)
# Bonds in order: backbone, backbone-sidechain, sidechain, short elastic, long elastic
out.append("\n[ bonds ]")
# Backbone-backbone
bonds = [str(i) for i in self.bonds["BB"] if str(i)]
if bonds:
out.append("; Backbone bonds")
out.extend(bonds)
# Rubber Bands
bonds = [str(i) for i in self.bonds["Rubber", True] if str(i)]
if bonds:
# Add a CPP style directive to allow control over the elastic network
out.append("#ifdef RUBBER_BANDS")
out.append(
"#ifndef RUBBER_FC\n#define RUBBER_FC %f\n#endif"
% self.options["ElasticMaximumForce"]
)
out.extend(bonds)
out.append("#endif")
# Backbone-Sidechain/Sidechain-Sidechain
bonds = [str(i) for i in self.bonds["SC"] if str(i)]
if bonds:
out.append("; Sidechain bonds")
out.extend(bonds)
# Short elastic/Long elastic
bonds = [str(i) for i in self.bonds["Elastic short"] if str(i)]
if bonds:
out.append("; Short elastic bonds for extended regions")
out.extend(bonds)
bonds = [str(i) for i in self.bonds["Elastic long"] if str(i)]
if bonds:
out.append("; Long elastic bonds for extended regions")
out.extend(bonds)
# Cystine bridges
bonds = [str(i) for i in self.bonds["Cystine"] if str(i)]
if bonds:
out.append("; Cystine bridges")
out.extend(bonds)
# Other links
bonds = [str(i) for i in self.bonds["Link"] if str(i)]
if bonds:
out.append("; Links/Cystine bridges")
out.extend(bonds)
# Constraints
out.append("\n[ constraints ]")
out.extend([str(i) for i in self.bonds["Constraint"]])
# Print out the exclusions only if they excist.
exclusions = [str(i) for i in self.exclusions if str(i)]
if exclusions:
out.append("\n[ exclusions ]")
out.extend(exclusions)
# Angles
out.append("\n[ angles ]")
out.append("; Backbone angles")
out.extend([str(i) for i in self.angles["BBB"] if str(i)])
out.append("; Backbone-sidechain angles")
out.extend([str(i) for i in self.angles["BBS"] if str(i)])
out.append("; Sidechain angles")
out.extend([str(i) for i in self.angles["SC"] if str(i)])
# Dihedrals
out.append("\n[ dihedrals ]")
out.append("; Backbone dihedrals")
out.extend([str(i) for i in self.dihedrals["BBBB"] if i.parameters])
out.append("; Sidechain dihedrals")
out.extend([str(i) for i in self.dihedrals["BSC"] if i.parameters])
out.append("; Sidechain improper dihedrals")
out.extend([str(i) for i in self.dihedrals["SC"] if i.parameters])
# Postition Restraints
if self.posres:
out.append("\n#ifdef POSRES")
out.append(
"#ifndef POSRES_FC\n#define POSRES_FC %.2f\n#endif"
% self.options["PosResForce"]
)
out.append(" [ position_restraints ]")
out.extend(
[
" %5d 1 POSRES_FC POSRES_FC POSRES_FC" % i
for i in self.posres
]
)
out.append("#endif")
logging.info("Created coarsegrained topology")
return "\n".join(out)
[docs]
def fromNucleicAcidSequence(
self,
sequence,
secstruc=None,
links=None,
breaks=None,
mapping=None,
rubber=False,
multi=False,
):
# Shift for the atom numbers of the atomistic part in a chain
# that is being multiscaled
shift = 0
# First check if we get a sequence or a Chain instance
if isinstance(sequence, Chain):
chain = sequence
links = chain.links
breaks = chain.breaks
# If the mapping is not specified, the actual mapping is taken,
# used to construct the coarse grained system from the atomistic one.
# The function argument "mapping" could be used to use a default
# mapping scheme in stead, like the mapping for the GROMOS96 force field.
mapping = mapping or chain.mapping
multi = False
self.secstruc = chain.sstypes or len(chain) * "F"
self.sequence = chain.sequence
self.multiscale = False
logging.debug(self.secstruc)
logging.debug(self.sequence)
# Fetch the base information
# Pad with empty lists for atoms, bonds, angles
# and dihedrals, and take the first five lists out
# This will avoid errors for residues for which
# these are not defined.
sc = [
(self.options["ForceField"].bases[res] + 8 * [[]])[:8]
for res in self.sequence
]
# ID of the first atom/residue
# The atom number and residue number follow from the last
# atom c.q. residue id in the list processed in the topology
# thus far. In the case of multiscaling, the real atoms need
# also be accounted for.
startAtom = self.natoms + 1
startResi = self.atoms and self.atoms[-1][2] + 1 or 1
# Backbone bead atom IDs
bbid = [[startAtom, startAtom + 1, startAtom + 2]]
for i in sc:
bbid1 = bbid[-1][0] + len(i[0]) + 3
bbid.append([bbid1, bbid1 + 1, bbid1 + 2])
# Residue numbers for this moleculetype topology
resid = range(startResi, startResi + len(self.sequence))
# This contains the information for deriving backbone bead types,
# bb bond types, bbb/bbs angle types, and bbbb dihedral types.
seqss = list(zip(bbid, self.sequence, self.secstruc))
# Fetch the proper backbone beads
# Since there are three beads we need to split these to the list
bb = [self.options["ForceField"].bbGetBead(res, typ) for num, res, typ in seqss]
bbMulti = [i for j in bb for i in j]
# For backbone parameters, iterate over fragments, inferred from breaks
for i, j in zip([0] + breaks, breaks + [-1]):
# Extract the fragment
frg = seqss[i:] if j == -1 else seqss[i:j]
# Expand the 3 bb beads per residue into one long list
# Resulting list contains three tuples per residue
# We use the useless ca parameter to get the correct backbone bond from bbGetBond
# The i is used to define ca because it conveniently here gives the right sequence 0,1,2,0,1,...
frg = [(j[0][i], j[1], j[2], i) for j in frg for i in range(len(j[0]))]
# Iterate over backbone bonds, two loops are needed because there are multipe cross bonds in the BB.
# Since bonded interactions can return None, we first collect them separately and then
# add the non-None ones to the list
# This part assumes you have a linear backbone (checks only combinations that make sense for such)
for ind, k in enumerate(frg):
# Exclusions iterate over the second atom.
# and then checked for each pair.
for l in frg[ind : ind + 3]:
# excl = Exclusion((k,l), category="BB", options=self.options,)
# if excl:
# self.exclusions.append(excl)
if l > k:
excl = Exclusion(
(k, l),
category="BB",
options=self.options,
)
self.exclusions.append(excl)
# Pairs iterate over the second atom.
# and then checked for each pair.
for l in frg[ind : ind + 3]:
pair = Pair(
(k, l),
category="BB",
options=self.options,
)
if pair:
self.pairs.append(pair)
# Bonds iterate over the second atom.
for l in frg[ind : ind + 3]:
if l > k:
bond = Bond(
(k, l),
category="BB",
options=self.options,
)
self.bonds.append(bond)
# The angles need a third loop.
for m in frg[ind : ind + 4]:
if m > l > k:
angle = Angle(
(
k,
l,
m,
),
options=self.options,
category="BBB",
)
self.angles.append(angle)
# The dihedrals need a fourth loop.
for n in frg[ind + 1 : ind + 6]:
if n > m > l > k:
dihed = Dihedral(
(
k,
l,
m,
n,
),
options=self.options,
category="BBBB",
)
if dihed.parameters == None:
continue
if dihed.parameters[1] != 0:
self.dihedrals.append(dihed)
# Now do the atom list, and take the sidechains along
#
atid = startAtom
# We need to do some trickery to get all 3 bb beads in to these lists
# This adds each element to a list three times, feel free to shorten up
residMulti = [i for i in resid for j in range(3)]
sequenceMulti = [i for i in self.sequence for j in range(3)]
scMulti = [i for i in sc for j in range(3)]
secstrucMulti = [i for i in self.secstruc for j in range(3)]
count = 0
for resi, resname, bbb, sidechn, ss in zip(
residMulti, sequenceMulti, bbMulti, scMulti, secstrucMulti
):
# We only want one side chain per three backbone beads so this skips the others
if (count % 3) == 0:
# Note added impropers in contrast to aa
(
scatoms,
bon_par,
ang_par,
dih_par,
imp_par,
vsite_par,
excl_par,
pair_par,
) = sidechn
# Side chain bonded terms
# Collect bond, angle and dihedral connectivity
# Impropers needed to be added here for DNA
bon_con, ang_con, dih_con, imp_con, vsite_con, excl_con, pair_con = (
self.options["ForceField"].base_connectivity[resname] + 7 * [[]]
)[:7]
# Ill try to fix the exclusions by using separate connectivity record for DNA
# bon_con,ang_con,dih_con,imp_con,vsite_con = (self.options['ForceField'].connectivity[resname]+5*[[]])[:5]
# Side Chain Bonds/Constraints
for atids, par in zip(bon_con, bon_par):
if par[2] == 100000.00000:
self.bonds.append(
Bond(
options=self.options,
atoms=atids,
parameters=[par[1]],
type=par[0],
comments=resname,
category="Constraint",
)
)
else:
bond = Bond(
options=self.options,
atoms=atids,
parameters=par[1:],
type=par[0],
comments=resname,
category="SC",
)
self.bonds.append(bond)
# Shift the atom numbers
self.bonds[-1] += atid
# Side Chain Angles
for atids, par in zip(ang_con, ang_par):
self.angles.append(
Angle(
options=self.options,
atoms=atids,
parameters=par[1:],
type=par[0],
comments=resname,
category="SC",
)
)
# Shift the atom numbers
self.angles[-1] += atid
# Side Chain Dihedrals
for atids, par in zip(dih_con, dih_par):
self.dihedrals.append(
Dihedral(
options=self.options,
atoms=atids,
parameters=par[1:],
type=par[0],
comments=resname,
category="BSC",
)
)
# Shift the atom numbers
self.dihedrals[-1] += atid
# Side Chain Impropers
for atids, par in zip(imp_con, imp_par):
self.dihedrals.append(
Dihedral(
options=self.options,
atoms=atids,
parameters=par[1:],
type=par[0],
comments=resname,
category="SC",
)
)
# Shift the atom numbers
self.dihedrals[-1] += atid
# Side Chain V-Sites
for atids, par in zip(vsite_con, vsite_par):
self.vsites.append(
Vsite(
options=self.options,
atoms=atids,
parameters=par,
comments=resname,
category="SC",
)
)
self.vsites[-1] += atid # Shift the atom numbers
# Side Chain Exclusions
for atids, par in zip(excl_con, excl_par):
exclusion = Exclusion(
options=self.options, atoms=atids, parameters=" ", category="SC"
)
self.exclusions.append(exclusion)
self.exclusions[-1] += atid # Shift the atom numbers
# Pairs
for atids, par in zip(pair_con, pair_par):
pair = Pair(options=self.options, atoms=atids, parameters=" ")
self.pairs.append(pair)
self.pairs[-1] += atid # Shift the atom numbers
# All residue atoms
counter = 0 # Counts over beads
# Need to tweak this to get all the backbone beads to the list with the side chain
bbbset = [bbMulti[count], bbMulti[count + 1], bbMulti[count + 2]]
for atype, aname in zip(
bbbset + list(scatoms), CoarseGrained.residue_bead_names_dna
):
# self.atoms.append((atid,atype,resi,resname,aname,atid,self.options['ForceField'].getCharge(atype,aname),ss))
if atid in [vSite.atoms[0] for vSite in self.vsites]:
charge = self.options["ForceField"].getCharge(atype, aname)
mass = 0
self.atoms.append(
(atid, atype, resi, resname, aname, atid, charge, mass, ss)
)
else:
charge = self.options["ForceField"].getCharge(atype, aname)
if aname == "BB1":
mass = 72
elif aname == "BB2" or aname == "BB3":
mass = 60
else:
mass = 36
self.atoms.append(
(atid, atype, resi, resname, aname, atid, charge, mass, ss)
)
# Doing this here saves going over all the atoms onesmore.
# Generate position restraints for all atoms or Backbone beads only. @POSRES
if "all" in self.options["PosRes"]:
if (
aname == "BB1"
or aname == "BB2"
or aname == "BB3"
or aname == "SC1"
) and atid - 1 > 1:
self.posres.append((atid - 1))
if "bb" in self.options["PosRes"]: # @POSRES
if (aname == "BB2") and atid - 1 > 1:
self.posres.append((atid - 1))
if mapping:
self.mapping.append(
(atid, [i + shift for i in mapping[counter]])
)
atid += 1
counter += 1
count += 1
# One more thing, we need to remove dihedrals (2) and an angle (1) that reach beyond the 3' end
# This is stupid to do now but the total number of atoms seems not to be available before
# This iterate the list in reverse order so that removals don't affect later checks
for i in range(len(self.dihedrals) - 1, -1, -1):
if max(self.dihedrals[i].atoms) > self.atoms[-1][0]:
del self.dihedrals[i]
for i in range(len(self.angles) - 1, -1, -1):
if max(self.angles[i].atoms) > self.atoms[-1][0]:
del self.angles[i]
for i in range(len(self.bonds) - 1, -1, -1):
if max(self.bonds[i].atoms) > self.atoms[-1][0]:
del self.bonds[i]
# The first residue of DNA (The 5' end) has a backbone that is only two beads. This is ugly
# but we'll save our headache of changing the whole internal logic.
for i in range(len(self.bonds) - 1, -1, -1):
if 1 in self.bonds[i].atoms:
del self.bonds[i]
else:
self.bonds[i].atoms = tuple([j - 1 for j in self.bonds[i].atoms])
for i in range(len(self.angles) - 1, -1, -1):
if 1 in self.angles[i].atoms:
del self.angles[i]
else:
self.angles[i].atoms = tuple([j - 1 for j in self.angles[i].atoms])
for i in range(len(self.dihedrals) - 1, -1, -1):
if 1 in self.dihedrals[i].atoms:
del self.dihedrals[i]
else:
self.dihedrals[i].atoms = tuple(
[j - 1 for j in self.dihedrals[i].atoms]
)
for i in range(len(self.vsites) - 1, -1, -1):
if 1 in self.vsites[i].atoms:
del self.vsites[i]
else:
self.vsites[i].atoms = tuple([j - 1 for j in self.vsites[i].atoms])
for i in range(len(self.exclusions) - 1, -1, -1):
if 1 in self.exclusions[i].atoms:
del self.exclusions[i]
else:
self.exclusions[i].atoms = tuple(
[j - 1 for j in self.exclusions[i].atoms]
)
for i in range(len(self.pairs) - 1, -1, -1):
if 1 in self.pairs[i].atoms:
del self.pairs[i]
else:
self.pairs[i].atoms = tuple([j - 1 for j in self.pairs[i].atoms])
# Remove the first bead and shift the numbers of all others by one.
del self.atoms[0]
for i in range(len(self.atoms)):
self.atoms[i] = tuple(
[self.atoms[i][0] - 1] + [j for j in self.atoms[i][1:]]
)
enStrandLengths.append(self.atoms[-1][0])
# Remove the last posres, faster than removing first and shifting all.
if len(self.posres) > 0:
del self.posres[-1]
# The rubber bands are best applied outside of the chain class, as that gives
# more control when chains need to be merged. The possibility to do it on the
# chain level is retained to allow building a complete chain topology in
# a straightforward manner after importing this script as module.
if rubber and chain:
rubberList = rubberBands(
[
(i[0], j[4:7])
for i, j in zip(self.atoms, chain.cg())
if i[4] in ElasticBeads
],
ElasticLowerBound,
ElasticUpperBound,
ElasticDecayFactor,
ElasticDecayPower,
ElasticMaximumForce,
ElasticMinimumForce,
)
# self.bonds.extend([Bond(i,options=self.options,type=6,category="Rubber band") for i in rubberList])
[docs]
def fromMoleculeList(self, other):
pass
#############
## 8 # MAIN # -> @MAIN <-
#############
import sys, logging, random, math, os, re
[docs]
def main(options):
# Check whether to read from a gro/pdb file or from stdin
# We use an iterator to wrap around the stream to allow
# inferring the file type, without consuming lines already
inStream = streamTag(options["-f"] and options["-f"].value or sys.stdin)
fileType = next(inStream)
frameIterator = pdbFrameIterator
## ITERATE OVER FRAMES IN STRUCTURE FILE ##
# Now iterate over the frames in the stream
# This should become a StructureFile class with a nice .next method
model = 1
cgOutPDB = None
ssTotal = []
cysteines = []
for title, atoms, box in frameIterator(inStream):
if fileType == "PDB":
# The PDB file can have chains, in which case we list and process them specifically
# TER statements are also interpreted as chain separators
# A chain may have breaks in which case the breaking residues are flagged
chains = [
Chain(options, [i for i in residues(chain)])
for chain in pdbChains(atoms)
]
# Check the chain identifiers
if model == 1 and len(chains) != len(set([i._id for i in chains])):
# Ending down here means that non-consecutive blocks of atoms in the
# PDB file have the same chain ID. The warning pertains to PDB files only,
# since chains from GRO files get a unique chain identifier assigned.
logging.warning(
"Several chains have identical chain identifiers in the PDB file."
)
n = 1
logging.info("Found %d chains:" % len(chains))
for chain in chains:
logging.info(
" %2d: %s (%s), %d atoms in %d residues."
% (n, chain._id, chain._type, chain.natoms, len(chain))
)
n += 1
# Check which chains need merging
if model == 1:
order, merge = check_merge(
chains,
options["mergeList"],
options["linkList"],
options["CystineCheckBonds"] and options["CystineMaxDist2"],
)
# Get the total length of the sequence
seqlength = sum([len(chain) for chain in chains])
logging.info("Total size of the system: %s residues." % seqlength)
## SECONDARY STRUCTURE
ss = ""
if options["Collagen"]:
for chain in chains:
chain.set_ss("F")
ss += chain.ss
# Now set the secondary structure for each of the chains
sstmp = ss
for chain in chains:
ln = min(len(sstmp), len(chain))
chain.set_ss(sstmp[:ln])
sstmp = ss[:ln]
# Collect the secondary structure classifications for different frames
ssTotal.append(ss)
# Write the coarse grained structure if requested
if options["-x"].value:
logging.info("Writing coarse grained structure.")
if cgOutPDB == None:
cgOutPDB = open(options["-x"].value, "w")
cgOutPDB.write("MODEL %8d\n" % model)
cgOutPDB.write(title)
cgOutPDB.write(pdbBoxString(box))
atid = 1
write_start = 0
for i in order:
ci = chains[i]
coarseGrained = ci.cg(com=True)
if coarseGrained:
# For DNA we need to remove the first bead on the 5' end and shift the atids.
if ci.type() == "Nucleic":
write_start = 1
else:
write_start = 0
for name, resn, resi, chain, x, y, z, ssid in coarseGrained[
write_start:
]:
insc = resi >> 20
resi -= insc << 20
cgOutPDB.write(
pdbAtomLine
% (
atid,
name,
resn[:3],
chain,
resi,
chr(insc),
x,
y,
z,
1,
ssid,
)
)
atid += 1
cgOutPDB.write("TER\n")
else:
logging.warning(
"No mapping for coarse graining chain %s (%s); chain is skipped."
% (ci._id, ci.type())
)
cgOutPDB.write("ENDMDL\n")
# Gather cysteine sulphur coordinates
cyslist = [cys["SG"] for chain in chains for cys in chain["CYS"]]
cysteines.append([cys for cys in cyslist if cys])
model += 1
# Evertything below here we only need, if we need to write a Topology
if options["-o"]:
# Collect the secondary structure stuff and decide what to do with it
# First rearrange by the residue
ssTotal = zip(*ssTotal)
ssAver = []
for i in ssTotal:
si = list(set(i))
if len(si) == 1:
# Only one type -- consensus
ssAver.append(si[0])
else:
# Transitions between secondary structure types
i = list(i)
si = [(1.0 * i.count(j) / len(i), j) for j in si]
si.sort()
if si[-1][0] > options["-ssc"].value:
ssAver.append(si[-1][1])
else:
ssAver.append(" ")
ssAver = "".join(ssAver)
logging.info(
"(Average) Secondary structure has been determined (see head of .itp-file)."
)
# Divide the secondary structure according to the division in chains
# This will set the secondary structure types to be used for the
# topology.
for chain in chains:
chain.set_ss(ssAver[: len(chain)])
ssAver = ssAver[len(chain) :]
# Now the chains are complete, each consisting of a residuelist,
# and a secondary structure designation if the chain is of type 'Protein'.
# There may be mixed chains, there may be HETATM things.
# Water has been discarded. Maybe this has to be changed at some point.
# The order in the coarse grained files matches the order in the set of chains.
#
# If there are no merges to be done, i.e. no global Elnedyn network, no
# disulphide bridges, no links, no distance restraints and no explicit merges,
# then we can write out the topology, which will match the coarse grained file.
#
# If there are merges to be done, the order of things may be changed, in which
# case the coarse grained structure will not match with the topology...
## REAL ITP STUFF ##
# Check whether we have identical chains, in which case we
# only write the ITP for one...
# This means making a distinction between chains and
# moleculetypes.
molecules = [tuple([chains[i] for i in j]) for j in merge]
# At this point we should have a list or dictionary of chains
# Each chain should be given a unique name, based on the value
# of options["-o"] combined with the chain identifier and possibly
# a number if there are chains with identical identifiers.
# For each chain we then write an ITP file using the name for
# moleculetype and name + ".itp" for the topology include file.
# In addition we write a master topology file, using the value of
# options["-o"], with an added extension ".top" if not given.
# XXX *NOTE*: This should probably be gathered in a 'Universe' class
itp = 0
moleculeTypes = {}
cumulative_atoms = 0
for mi, mol in enumerate(molecules):
# Check if the moleculetype is already listed
# If not, generate the topology from the chain definition
if not mol in moleculeTypes or options["SeparateTop"]:
# Name of the moleculetype
# NOTE: The naming should be changed; now it becomes Protein_X+Protein_Y+...
name = "+".join(
[chain.getname(options["-name"].value) for chain in mol]
)
moleculeTypes[mol] = name
# Write the molecule type topology
top = Topology(mol[0], options=options, name=name)
# This merges topologies, properties how adding happens in Topology method __iadd__
for m in mol[1:]:
top += Topology(m, options=options)
# Have to add the connections, like the connecting network
# Gather coordinates
mcg, coords = zip(
*[(j[:4], j[4:7]) for m in mol for j in m.cg(force=True)]
)
mcg = list(mcg)
# Elastic Network
# The elastic network is added after the topology is constructed, since that
# is where the correct atom list with numbering and the full set of
# coordinates for the merged chains are available.
# For Nucleic have to watch out for the missing first bead again.
# THIS IS BANDAID FOR NOW, ONLY WORKS FOR TWO CHAINS
if name.split("_")[0] == "Nucleic":
strands = len(enStrandLengths)
cuts = [enStrandLengths[0] + 1]
nucleic_coords = coords[1 : cuts[0]]
if strands != 1:
for i in range(1, strands):
cuts.append(cuts[-1] + enStrandLengths[i] + 1)
for i in range(1, strands):
nucleic_coords += coords[cuts[i - 1] + 1 : cuts[i]]
if options["ElasticNetwork"]:
rubberType = options["ForceField"].EBondType
rubberList = rubberBands(
[
(i[0], j)
for i, j in zip(top.atoms, nucleic_coords)
if i[4] in options["ElasticBeads"]
],
options["ElasticLowerBound"],
options["ElasticUpperBound"],
options["ElasticDecayFactor"],
options["ElasticDecayPower"],
options["ElasticMaximumForce"],
options["ElasticMinimumForce"],
)
top.bonds.extend(
[
Bond(
i,
options=options,
type=rubberType,
category="Rubber band",
)
for i in rubberList
]
)
else:
if options["ElasticNetwork"]:
print(options["ElasticBeads"])
print(top.atoms[0])
rubberType = options["ForceField"].EBondType
rubberList = rubberBands(
[
(i[0], j)
for i, j in zip(top.atoms, coords)
if i[4] in options["ElasticBeads"]
],
options["ElasticLowerBound"],
options["ElasticUpperBound"],
options["ElasticDecayFactor"],
options["ElasticDecayPower"],
options["ElasticMaximumForce"],
options["ElasticMinimumForce"],
)
top.bonds.extend(
[
Bond(
i,
options=options,
type=rubberType,
category="Rubber band",
)
for i in rubberList
]
)
# Write out the MoleculeType topology
destination = (
options["-o"]
and open(moleculeTypes[mol] + ".itp", "w")
or sys.stdout
)
destination.write(str(top))
itp += 1
# Check whether other chains are equal to this one
# Skip this step if we are to write all chains to separate moleculetypes
if not options["SeparateTop"]:
for j in range(mi + 1, len(molecules)):
if not molecules[j] in moleculeTypes and mol == molecules[j]:
# Molecule j is equal to a molecule mi
# Set the name of the moleculetype to the one of that molecule
moleculeTypes[molecules[j]] = moleculeTypes[mol]
# For the parameterization index files we need to update the cumulative number of atoms.
# For parameterization option SepareteTop should always be used because otherwise the numbering will fail.
cumulative_atoms += len(top.atoms)
logging.info("Written %d ITP file%s" % (itp, itp > 1 and "s" or ""))
# The following lines are always printed (if no errors occur).
print("\n\tThere you are. One MARTINI. Shaken, not stirred.\n")
Q = martiniq.pop(random.randint(0, len(martiniq) - 1))
print("\n", Q[1], "\n%80s" % ("--" + Q[0]), "\n")
if __name__ == "__main__":
import sys, logging
args = sys.argv[1:]
options, lists = options, lists
options = option_parser(args, options, lists, version)
main(options)