Source code for reforge.martini.martinize_nucleotides

#!/usr/bin/env python

version = "3.1"
authors = ["me and others"]

forcefields = ["martini30nucleic", "martini31nucleic"]

import importlib.resources
import os
import sys
from reforge import itpio

rna_system = "test"

# For working versions, the sections are in different modules
#
# Index of this file:
#
#   1. Options and documentation                             @DOC
#   2. Description, options and command line parsing         @CMD
#   3. Helper functions and macros                           @FUNC
#   4. Finegrained to coarsegrained mapping                  @MAP
#   5. Secondary structure determination and interpretation  @SS
#   6. Force field parameters (MARTINI/ELNEDYN)              @FF
#   7. Elastic network                                       @ELN
#   8. Structure I/O                                         @IO
#   9. Topology generation                                   @TOP
#  10. Main                                                  @MAIN

###################################
## 1 # OPTIONS AND DOCUMENTATION ##  -> @DOC <-
###################################


# This is a simple and versatily option class that allows easy
# definition and parsing of options.

[docs]
class Option:
    def __init__(self, func=str, num=1, default=None, description=""):
        self.func = func
        self.num = num
        self.value = default
        self.description = description

    def __nonzero__(self):
        if self.func == bool:
            return self.value != False
        return bool(self.value)

    def __str__(self):
        return self.value and str(self.value) or ""


[docs]
    def setvalue(self, v):
        if len(v) == 1:
            self.value = self.func(v[0])
        else:
            self.value = [self.func(i) for i in v]




# Lists for gathering arguments to options that can be specified
# multiple times on the command line.
lists = {
    "cystines": [],
    "merges": [],
    "links": [],
    "multi": [],
}

# List of Help text and options.
# This way we can simply print this list if the user wants help.
options = [
    #   NOTE: Options marked with (+) can be given multiple times on the command line
    #   option              type number default description
    """
========================================================================\n
""",
    ("-f", Option(str, 1, None, "Input file (PDB|GRO)")),
    ("-o", Option(str, 1, None, "Output topology (TOP)")),
    ("-x", Option(str, 1, None, "Output coarse grained structure (PDB)")),
    ("-seq", Option(str, 1, None, "Output list of bead numbers.")),
    ("-bmap", Option(str, 1, None, "Output index file containing bonded terms.")),
    ("-v", Option(bool, 0, False, "Verbose. Be load and noisy.")),
    ("-h", Option(bool, 0, False, "Display this help.")),
    ("-ss", Option(str, 1, None, "Secondary structure (File or string)")),
    (
        "-ssc",
        Option(
            float, 1, 0.5, "Cutoff fraction for ss in case of ambiguity (default: 0.5)."
        ),
    ),
    ("-dssp", Option(str, 1, None, "DSSP executable for determining structure")),
    ("-collagen", Option(bool, 0, False, "Use collagen parameters")),
    (
        "-his",
        Option(bool, 0, False, "Interactively set the charge of each His-residue."),
    ),
    ("-nt", Option(bool, 0, False, "Set neutral termini (charged is default)")),
    ("-cb", Option(bool, 0, False, "Set charges at chain breaks (neutral is default)")),
    ("-cys", Option(lists["cystines"].append, 1, None, "Disulphide bond (+)")),
    ("-link", Option(lists["links"].append, 1, None, "Link (+)")),
    (
        "-merge",
        Option(lists["merges"].append, 1, None, "Merge chains: e.g. -merge A,B,C (+)"),
    ),
    ("-name", Option(str, 1, None, "Moleculetype name")),
    (
        "-p",
        Option(
            str,
            1,
            "None",
            "Output position restraints (None/All/Backbone) (default: None)",
        ),
    ),
    (
        "-pf",
        Option(
            float,
            1,
            1000,
            "Position restraints force constant (default: 1000 kJ/mol/nm^2)",
        ),
    ),
    (
        "-ed",
        Option(
            bool,
            0,
            False,
            "Use dihedrals for extended regions rather than elastic bonds)",
        ),
    ),
    ("-sep", Option(bool, 0, False, "Write separate topologies for identical chains.")),
    (
        "-ff",
        Option(
            str,
            1,
            "martini30nucleic",
            "Which forcefield to use: " + " ,".join(n for n in forcefields[:-1]),
        ),
    ),
    ("-elastic", Option(bool, 0, True, "Write elastic bonds")),
    ("-ef", Option(float, 1, 500, "Elastic bond force constant Fc")),
    ("-el", Option(float, 1, 0.5, "Elastic bond lower cutoff: F = Fc if rij < lo")),
    ("-eu", Option(float, 1, 1.2, "Elastic bond upper cutoff: F = 0  if rij > up")),
    ("-ea", Option(float, 1, 0, "Elastic bond decay factor a")),
    ("-ep", Option(float, 1, 1, "Elastic bond decay power p")),
    (
        "-em",
        Option(float, 1, 0, "Remove elastic bonds with force constant lower than this"),
    ),
    (
        "-eb",
        Option(str, 1, "BB", "Comma separated list of bead names for elastic bonds"),
    ),
    (
        "-type",
        Option(
            str,
            1,
            "ss",
            "Type of DNA/RNA topology (ss/ds-stiff/ds-soft) to create. (default: ss)",
        ),
    ),
    (
        "-multi",
        Option(
            lists["multi"].append, 1, None, "Chain to be set up for multiscaling (+)"
        ),
    ),
    ("-sys", Option(str, 1, "1RNA", "AA SYS")),
]

## Martini Quotes
martiniq = [
    (
        "Robert Benchley",
        "Why don't you get out of that wet coat and into a dry martini?",
    ),
    ("James Thurber", "One martini is all right, two is two many, three is not enough"),
    (
        "Philip Larkin",
        "The chromatic scale is what you use to give the effect of drinking a quinine martini and having an enema simultaneously.",
    ),
    (
        "William Emerson, Jr.",
        "And when that first martini hits the liver like a silver bullet, there is a sigh of contentment that can be heard in Dubuque.",
    ),
    (
        "Alec Waugh",
        "I am prepared to believe that a dry martini slightly impairs the palate, but think what it does for the soul.",
    ),
    (
        "Gerald R. Ford",
        "The three-martini lunch is the epitome of American efficiency. Where else can you get an earful, a bellyful and a snootful at the same time?",
    ),
    ("P. G. Wodehouse", "He was white and shaken, like a dry martini."),
]

## DNA elastic network list
enStrandLengths = []

desc = ""



[docs]
def help():
    """Print help text and list of options and end the program."""
    import sys

    for item in options:
        if type(item) == str:
            print(item)
    for item in options:
        if type(item) != str:
            print("%10s  %s" % (item[0], item[1].description))
    print
    sys.exit()



##############################
## 2 # COMMAND LINE PARSING ##  -> @CMD <-
##############################
import sys, logging


# Helper function to parse atom strings given on the command line:
#   resid
#   resname/resid
#   chain/resname/resid
#   resname/resid/atom
#   chain/resname/resid/atom
#   chain//resid
#   chain/resname/atom

[docs]
def str2atom(a):
    a = a.split("/")
    if len(a) == 1:  # Only a residue number:
        return (None, None, int(a[0]), None)
    if len(a) == 2:  # Residue name and number (CYS/123):
        return (None, a[0], int(a[1]), None)
    if len(a) == 3:
        if a[2].isdigit():  # Chain, residue name, residue number
            return (None, a[1], int(a[2]), a[0])
        else:  # Residue name, residue number, atom name
            return (a[2], a[0], int(a[1]), None)
    return (a[3], a[1], int(a[2]), a[0])




[docs]
def option_parser(args, options, lists, version=0):
    # Check whether there is a request for help
    if "-h" in args or "--help" in args:
        help()

    # Convert the option list to a dictionary, discarding all comments
    options = dict([i for i in options if not type(i) == str])

    # This information we would like to print to some files, so let's put it in our information class
    options["Version"] = version
    options["Arguments"] = args[:]

    while args:
        ar = args.pop(0)
        options[ar].setvalue([args.pop(0) for i in range(options[ar].num)])

    ## LOGGING ##
    # Set the log level and communicate which options are set and what is happening
    # If 'Verbose' is set, change the logger level
    logLevel = options["-v"] and logging.DEBUG or logging.INFO
    logging.basicConfig(format="%(levelname)-7s    %(message)s", level=logLevel)

    logging.info("MARTINIZE, script version %s" % version)
    # logging.info('If you use this script please cite:')
    # logging.info('de Jong et al., J. Chem. Theory Comput., 2013, DOI:10.1021/ct300646g')

    # Write options based on selected topology type. @net
    options["type"] = options["-type"].value
    if options["type"] == "ss":
        options["-ff"].setvalue(["martini30nucleic"])
    elif options["type"] == "ds":
        options["-ff"].setvalue(["martini30nucleic"])
        lists["merges"].append("A, B")
        options["-eu"].setvalue(["1.8"])
        options["-ef"].setvalue(["200"])
        options["-eb"].setvalue(["BB1", "BB3"])
    elif options["type"] == "pol":
        options["-ff"].setvalue(["martini31nucleic"])
    else:
        logging.error("Undefined topology type. Giving up...")
        sys.exit()
    options["-eb"].setvalue(["BB2"])

    # The make the program flexible, the forcefield parameters are defined
    # for multiple forcefield. Check if a existing one is defined:
    try:
        # Try to load the forcefield class from a different file
        _tmp = __import__(options["-ff"].value.lower())
        options["ForceField"] = getattr(_tmp, options["-ff"].value.lower())()
    except:
        # Try to load the forcefield class from the current file
        options["ForceField"] = globals()[options["-ff"].value.lower()]()

    # Process the raw options from the command line
    # Boolean options are set to more intuitive variables
    options["Collagen"] = options["-collagen"]
    options["chHIS"] = options["-his"]
    options["ChargesAtBreaks"] = options["-cb"]
    options["NeutralTermini"] = options["-nt"]
    # options['ExtendedDihedrals']   = options['-ed']
    # options['RetainHETATM']        = False # options['-hetatm']
    options["SeparateTop"] = options["-sep"]
    options["MixedChains"] = False  # options['-mixed']
    options["ElasticNetwork"] = options["-elastic"]

    # Parsing of some other options into variables
    options["ElasticMaximumForce"] = options["-ef"].value
    options["ElasticMinimumForce"] = options["-em"].value
    options["ElasticLowerBound"] = options["-el"].value
    options["ElasticUpperBound"] = options["-eu"].value
    options["ElasticDecayFactor"] = options["-ea"].value
    options["ElasticDecayPower"] = options["-ep"].value
    options["ElasticBeads"] = options["-eb"].value.split(",")
    options["PosResForce"] = options["-pf"].value

    options["PosRes"] = [i.lower() for i in options["-p"].value.split(",")]
    if "none" in options["PosRes"]:
        options["PosRes"] = []
    if "backbone" in options["PosRes"]:
        options["PosRes"].append("BB")

    if options["ForceField"].ElasticNetwork:
        # Some forcefields, like elnedyn, always use an elatic network. This is set in the
        # forcefield file, with the parameter ElasticNetwork.
        options["ElasticNetwork"] = True

    # Merges, links and cystines
    options["mergeList"] = (
        "all" in lists["merges"] and ["all"] or [i.split(",") for i in lists["merges"]]
    )

    # Process links
    linkList = []
    linkListCG = []
    for i in lists["links"]:
        ln = i.split(",")
        a, b = str2atom(ln[0]), str2atom(ln[1])
        if len(ln) > 3:  # Bond with given length and force constant
            bl, fc = (ln[2] and float(ln[2]) or None, float(ln[3]))
        elif len(a) == 3:  # Constraint at given distance
            bl, fc = float(a[2]), None
        else:  # Constraint at distance in structure
            bl, fc = None, None
        # Store the link, but do not list the atom name in the
        # atomistic link list. Otherwise it will not get noticed
        # as a valid link when checking for merging chains
        linkList.append(((None, a[1], a[2], a[3]), (None, b[1], b[2], b[3])))
        linkListCG.append((a, b, bl, fc))

    # Cystines -- This should be done for all special bonds listed in the _special_ dictionary
    CystineCheckBonds = False  # By default, do not detect cystine bridges
    CystineMaxDist2 = (10 * 0.22) ** 2  # Maximum distance (A) for detection of SS bonds
    for i in lists["cystines"]:
        if i.lower() == "auto":
            CystineCheckBonds = True
        elif i.replace(".", "").isdigit():
            CystineCheckBonds = True
            CystineMaxDist2 = (10 * float(i)) ** 2
        else:
            # This item should be a pair of cysteines
            cysA, cysB = [str2atom(j) for j in i.split(",")]
            # Internally we handle the residue number shifted by ord(' ')<<20. We have to add this to the
            # cys-residue numbers given here as well.
            constant = 32 << 20
            linkList.append(
                (
                    ("SG", "CYS", cysA[2] + constant, cysA[3]),
                    ("SG", "CYS", cysB[2] + constant, cysB[3]),
                )
            )
            linkListCG.append(
                (
                    ("SC1", "CYS", cysA[2] + constant, cysA[3]),
                    ("SC1", "CYS", cysB[2] + constant, cysB[3]),
                    -1,
                    -1,
                )
            )

    # Now we have done everything to it, we can add Link/cystine related stuff to options
    # 'multi' is not stored anywhere else, so that we also add
    options["linkList"] = linkList
    options["linkListCG"] = linkListCG
    options["CystineCheckBonds"] = CystineCheckBonds
    options["CystineMaxDist2"] = CystineMaxDist2
    options["multi"] = lists["multi"]

    if "ForceField" in options.keys():
        logging.info("The %s forcefield will be used." % (options["ForceField"].name))
    else:
        logging.error("Forcefield '%s' has not been implemented." % (options["-ff"]))
        sys.exit()

    if options["PosRes"]:
        logging.info("Position restraints will be generated.")
        logging.warning(
            "Position restraints are only enabled if -DPOSRES is set in the MDP file"
        )

    return options



#################################################
## 3 # HELPER FUNCTIONS, CLASSES AND SHORTCUTS ##  -> @FUNC <-
#################################################

import math

# ----+------------------+
## A | STRING FUNCTIONS |
# ----+------------------+


# Split a string

[docs]
def spl(x):
    return x.split()



# Split each argument in a list

[docs]
def nsplit(*x):
    return [i.split() for i in x]



# Make a dictionary from two lists

[docs]
def hash(x, y):
    return dict(zip(x, y))



# Function to reformat pattern strings

[docs]
def pat(x, c="."):
    return x.replace(c, "\x00").split()



# Function to generate formatted strings according to the argument type

[docs]
def formatString(i):
    if type(i) == str:
        return i
    if type(i) == int:
        return "%5d" % i
    if type(i) == float and 0 < abs(i) < 1e-5:
        return "%2.1e" % i
    elif type(i) == float:
        return "%8.5f" % i
    else:
        return str(i)



# ----+----------------+
## B | MATH FUNCTIONS |
# ----+----------------+



[docs]
def cos_angle(a, b):
    p = sum([i * j for i, j in zip(a, b)])
    q = math.sqrt(sum([i * i for i in a]) * sum([j * j for j in b]))
    return min(max(-1, p / q), 1)




[docs]
def norm2(a):
    return sum([i * i for i in a])




[docs]
def norm(a):
    return math.sqrt(norm2(a))




[docs]
def distance2(a, b):
    return (a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2 + (a[2] - b[2]) ** 2



##########################
## 4 # FG -> CG MAPPING ##  -> @MAP <-
##########################


dnares3 = ["DA", "DC", "DG", "DT"]
dnares1 = ["dA", "dC", "dG", "dT"]
rnares3 = ["A", "C", "G", "U"]
rnares1 = ["A", "C", "G", "U"]  #

# Residue classes:
nucleic = spl(
    "DA DC DG DT A C G U \
    RA3 RA5 RC3 RC5 RG3 RG5 RU3 RU5 \
    DMA DHU MRA MRC MRG MRU NMC PSU SPA RAP \
    1MG 2MA 2MG 3MP 3MU 4SU 5MC 5MG 5MU 6MA 7MG"
)

rnares3 = nucleic
rnares1 = nucleic

# Amino acid nucleic acid codes:
# The naming (AA and '3') is not strictly correct when adding DNA/RNA, but we keep it like this for consistincy.
AA3 = dnares3 + rnares3  # @#
AA1 = dnares1 + rnares1  # @#

# Dictionaries for conversion from one letter code to three letter code v.v.
AA123, AA321 = hash(AA1, AA3), hash(AA3, AA1)

residueTypes = dict([(i, "Nucleic") for i in nucleic])



[docs]
class CoarseGrained:
    # Class for mapping an atomistic residue list to a coarsegrained one
    # Should get an __init__ function taking a residuelist, atomlist, Pymol selection or ChemPy model
    # The result should be stored in a list-type attribute
    # The class should have pdbstr and grostr methods

    # Standard mapping groups
    dna_bb = nsplit("P OP1 OP2 O5' O3'", "C5' O4' C4'", "C3' C2' C1'")
    rna_bb = nsplit("P OP1 OP2 O5' O3'", "C5' O4' C4'", "C3' C2' O2' C1'")

    # Generic names for DNA and RNA beads
    residue_bead_names_dna = spl("BB1 BB2 BB3 SC1 SC2 SC3 SC4 SC5 SC6 SC7 SC8")
    residue_bead_names_rna = spl("BB1 BB2 BB3 SC1 SC2 SC3 SC4 SC5 SC6 SC7 SC8")

    # This dictionary contains the bead names for all residues,
    # following the order in 'mapping'
    names = {}
    # Add the default bead names for all DNA/RNA nucleic acids
    names.update(
        [
            (
                i,
                (
                    "BB1",
                    "BB2",
                    "BB3",
                    "SC1",
                    "SC2",
                    "SC3",
                    "SC4",
                    "SC5",
                    "SC6",
                    "SC7",
                    "SC8",
                ),
            )
            for i in nucleic
        ]
    )

    # Crude mass for weighted average. No consideration of united atoms.
    # This will probably give only minor deviations, while also giving less headache
    # mass = {'H': 1,'C': 12,'N': 14,'O': 16,'S': 32,'P': 31,'M': 0}
    mass = {"H": 1, "C": 1, "N": 1, "O": 1, "S": 1, "P": 1, "M": 0}



# Determine average position for a set of weights and coordinates
# This is a rather specific function that requires a list of items
# [(m,(x,y,z),id),..] and returns the weighted average of the
# coordinates and the list of ids mapped to this bead

[docs]
def aver(b):
    # print(b)
    mwx, ids = zip(
        *[((m * x, m * y, m * z), i) for m, (x, y, z), i in b]
    )  # Weighted coordinates
    # tm  = sum(zip(*b)[0])
    total_mass = sum(x[0] for x in b)  # Sum of weights
    return [sum(i) / total_mass for i in zip(*mwx)], ids  # Centre of mass



# Return the CG beads for an atomistic residue, using the mapping specified above
# The residue 'r' is simply a list of atoms, and each atom is a list:
# [ name, resname, resid, chain, x, y, z ]

[docs]
def map(r, ff):
    """
    ff - chosen force field for the mapping
    """
    p = ff.mapping[r[0][1]]  # Mapping for this residue
    # Get the name, mass and coordinates for all atoms in the residue
    a = [(i[0], CoarseGrained.mass.get(i[0][0], 0), i[4:]) for i in r]
    # Store weight, coordinate and index for atoms that match a bead
    q = [
        [(m, coord, a.index((atom, m, coord))) for atom, m, coord in a if atom in i]
        for i in p
    ]
    # Bead positions
    return zip(*[aver(i) for i in q])



# Mapping for index file

[docs]
def mapIndex(r, ff):
    p = ff.mapping[r[0][1]]  # Mapping for this residue
    # Get the name, mass and coordinates for all atoms in the residue
    a = [(i[0], CoarseGrained.mass.get(i[0][0], 0), i[4:]) for i in r]
    # Store weight, coordinate and index for atoms that match a bead
    return [
        [(m, coord, a.index((atom, m, coord))) for atom, m, coord in a if atom in i]
        for i in p
    ]



#############################
## 5 # SECONDARY STRUCTURE ##  -> @SS <-
#############################
import logging, os, sys
import subprocess as subp

# ----+--------------------------------------+
## A | SECONDARY STRUCTURE TYPE DEFINITIONS |
# ----+--------------------------------------+


bbss = spl("  F     E     H     1     2     3     T     S     C")  # SS one letter


# The following dictionary contains secondary structure types as assigned by
# different programs. The corresponding Martini secondary structure types are
# listed in cgss
#
# NOTE:
#  Each list of letters in the dictionary ss should exactly match the list
#  in cgss.
#
ssdefs = {
    "dssp": list(".HGIBETSC~"),  # DSSP one letter secondary structure code     #@#
    "pymol": list(".H...S...L"),  # Pymol one letter secondary structure code    #@#
    "gmx": list(".H...ETS.C"),  # Gromacs secondary structure dump code        #@#
    "self": list("FHHHEETSCC"),  # Internal CG secondary structure codes        #@#
}
cgss = list("FHHHEETSCC")  # Corresponding CG secondary structure types   #@#


# ----+-------------------------------------------+
## B | SECONDARY STRUCTURE PATTERN SUBSTITUTIONS |
# ----+-------------------------------------------+


# For all structure types specific dihedrals may be used if four or
# more consecutive residues are assigned that type.

# Helix start and end regions are special and require assignment of
# specific types. The following pattern substitutions are applied
# (in the given order). A dot matches any other type.
# Patterns can be added to the dictionaries. This only makes sense
# if for each key in patterns there is a matching key in pattypes.
patterns = {
    "H": pat(".H. .HH. .HHH. .HHHH. .HHHHH. .HHHHHH. .HHHHHHH. .HHHH HHHH.")  # @#
}
pattypes = {
    "H": pat(".3. .33. .333. .3333. .13332. .113322. .1113222. .1111 2222.")  # @#
}


# ----+----------+
## C | INTERNAL |
# ----+----------+


# Pymol Colors
#           F   E   H   1   2   3   T   S   C
ssnum = (13, 4, 2, 2, 2, 2, 6, 22, 0)  # @#

# Dictionary returning a number for a given type of secondary structure
# This can be used for setting the b-factor field for coloring
ss2num = hash(bbss, ssnum)


# List of programs for which secondary structure definitions can be processed
programs = ssdefs.keys()


# Dictionaries mapping ss types to the CG ss types
ssd = dict([(i, hash(ssdefs[i], cgss)) for i in programs])


# From the secondary structure dictionaries we create translation tables
# with which all secondary structure types can be processed. Anything
# not listed above will be mapped to C (coil).
# Note, a translation table is a list of 256 characters to map standard
# ascii characters to.

[docs]
def tt(program):
    return "".join([ssd[program].get(chr(i), "C") for i in range(256)])



# The translation table depends on the program used to obtain the
# secondary structure definitions
sstt = dict([(i, tt(i)) for i in programs])


# The following translation tables are used to identify stretches of
# a certain type of secondary structure. These translation tables have
# every character, except for the indicated secondary structure, set to
# \x00. This allows summing the sequences after processing to obtain
# a single sequence summarizing all the features.
null = "\x00"
sstd = dict([(i, ord(i) * null + i + (255 - ord(i)) * null) for i in cgss])


# Pattern substitutions

[docs]
def typesub(seq, patterns, types):
    seq = null + seq + null
    for i, j in zip(patterns, types):
        seq = seq.replace(i, j)
    return seq[1:-1]



# The following function translates a string encoding the secondary structure
# to a string of corresponding Martini types, taking the origin of the
# secondary structure into account, and replacing termini if requested.

[docs]
def ssClassification(ss, program="self"):
    # Translate dssp/pymol/gmx ss to Martini ss
    ss = ss.translate(sstt[program])
    # Separate the different secondary structure types
    sep = dict([(i, ss.translate(sstd[i])) for i in sstd.keys()])
    # Do type substitutions based on patterns
    # If the ss type is not in the patterns lists, do not substitute
    # (use empty lists for substitutions)
    typ = [
        typesub(sep[i], patterns.get(i, []), pattypes.get(i, [])) for i in sstd.keys()
    ]
    # Translate all types to numerical values
    typ = [[ord(j) for j in list(i)] for i in typ]
    # Sum characters back to get a full typed sequence
    typ = "".join([chr(sum(i)) for i in zip(*typ)])
    # Return both the actual as well as the fully typed sequence
    return ss, typ



###################################
## 5 # FORCE FIELDS ##  -> @FF <-
###################################



[docs]
class ForceField:


[docs]
    @staticmethod
    def read_itp(itp_file):
        itp_data = itpio.read_itp(itp_file)
        return itp_data



[docs]
    @staticmethod
    def read_itps(mol, directory, version):
        # itpdir = os.path.abspath(f'/scratch/dyangali/reforge/reforge/itp/{directory}')
        itpdir = importlib.resources.files("reforge") / "martini" / "itp"
        file = os.path.join(itpdir, f"{directory}", f"{mol}_A_{version}.itp")
        a_itp_data = ForceField.read_itp(file)
        file = os.path.join(itpdir, f"{directory}", f"{mol}_C_{version}.itp")
        c_itp_data = ForceField.read_itp(file)
        file = os.path.join(itpdir, f"{directory}", f"{mol}_G_{version}.itp")
        g_itp_data = ForceField.read_itp(file)
        file = os.path.join(itpdir, f"{directory}", f"{mol}_U_{version}.itp")
        u_itp_data = ForceField.read_itp(file)
        return a_itp_data, c_itp_data, g_itp_data, u_itp_data



[docs]
    @staticmethod
    def itp_to_indata(itp_data):
        keys = [
            "bonds",
            "angles",
            "dihedrals",
            "impropers",
            "virtual_sites3",
            "exclusions",
            "pairs",
        ]
        connectivity = [itp_data[key].keys() if key in itp_data else [] for key in keys]
        parameters = [itp_data[key].values() if key in itp_data else [] for key in keys]
        return connectivity, parameters


    def __init__(self):

        # By default use an elastic network
        self.ElasticNetwork = False

        # Elastic networks bond shouldn't lead to exclusions (type 6)
        # But Elnedyn has been parametrized with type 1.
        self.EBondType = 6

        ## DNA DICTIONARIES ##
        # Dictionary for the connectivities and parameters of bonds between DNA backbone beads
        self.dnaBbBondDictC = dict(zip(self.dna_con["bond"], self.dna_bb["bond"]))
        # Dictionary for the connectivities and parameters of angles between DNA backbone beads
        self.dnaBbAngleDictC = dict(zip(self.dna_con["angle"], self.dna_bb["angle"]))
        # Dictionary for the connectivities and parameters of dihedrals between DNA backbone beads
        self.dnaBbDihDictC = dict(zip(self.dna_con["dih"], self.dna_bb["dih"]))
        # Dictionary for exclusions for DNA backbone beads
        self.dnaBbExclDictC = dict(zip(self.dna_con["excl"], self.dna_bb["excl"]))
        # Dictionary for pairs for DNA backbone beads
        self.dnaBbPairDictC = dict(zip(self.dna_con["pair"], self.dna_bb["pair"]))

        ## RNA DICTIONARIES ##
        # Dictionary for the connectivities and parameters of bonds between rna backbone beads
        self.rnaBbBondDictC = dict(zip(self.rna_con["bond"], self.rna_bb["bond"]))
        # Dictionary for the connectivities and parameters of angles between rna backbone beads
        self.rnaBbAngleDictC = dict(zip(self.rna_con["angle"], self.rna_bb["angle"]))
        # Dictionary for the connectivities and parameters of dihedrals between rna backbone beads
        self.rnaBbDihDictC = dict(zip(self.rna_con["dih"], self.rna_bb["dih"]))
        # Dictionary for exclusions for rna backbone beads
        self.rnaBbExclDictC = dict(zip(self.rna_con["excl"], self.rna_bb["excl"]))
        # Dictionary for pairs for rna backbone beads
        self.rnaBbPairDictC = dict(zip(self.rna_con["pair"], self.rna_bb["pair"]))

    # The following function returns the backbone bead for a given residue and
    # secondary structure type.
    # 1. Check if the residue is DNA/RNA and return the whole backbone for those
    # 2. Look up the proper dictionary for the residue
    # 3. Get the proper type from it for the secondary structure
    # If the residue is not in the dictionary of specials, use the default
    # If the secondary structure is not listed (in the residue specific
    # dictionary) revert to the default.


[docs]
    def bbGetBead(self, r1, ss="F"):
        if r1 in dnares3:
            return self.dna_bb["atom"]
        elif r1 in rnares3:
            return self.rna_bb["atom"]
        else:
            sys.exit("This script supports only DNA or RNA.")



[docs]
    def bbGetBond(self, r, ca, ss):
        # Retrieve parameters for each residue from tables defined above
        if r[0] in dnares3:
            return ca in self.dnaBbBondDictC.keys() and self.dnaBbBondDictC[ca] or None
        elif r[0] in rnares3:
            return ca in self.rnaBbBondDictC.keys() and self.rnaBbBondDictC[ca] or None
        else:
            sys.exit("This script supports only DNA or RNA.")



[docs]
    def bbGetAngle(self, r, ca, ss):
        if r[0] in dnares3:
            return (
                ca in self.dnaBbAngleDictC.keys() and self.dnaBbAngleDictC[ca] or None
            )
        elif r[0] in rnares3:
            return (
                ca in self.rnaBbAngleDictC.keys() and self.rnaBbAngleDictC[ca] or None
            )
        else:
            sys.exit("This script supports only DNA or RNA.")



[docs]
    def bbGetExclusion(self, r, ca, ss):
        if r[0] in dnares3:
            return ca in self.dnaBbExclDictC.keys() and " " or None
        elif r[0] in rnares3:
            return ca in self.rnaBbExclDictC.keys() and " " or None
        else:
            sys.exit("This script supports only DNA or RNA.")



[docs]
    def bbGetPair(self, r, ca, ss):
        if r[0] in dnares3:
            return ca in self.dnaBbPairDictC.keys() and " " or None
        elif r[0] in rnares3:
            return ca in self.rnaBbPairDictC.keys() and " " or None
        else:
            sys.exit("This script supports only DNA or RNA.")



[docs]
    def bbGetDihedral(self, r, ca, ss):
        # Retrieve parameters for each residue from table defined above
        if r[0] in dnares3:
            return ca in self.dnaBbDihDictC.keys() and self.dnaBbDihDictC[ca] or None
        elif r[0] in rnares3:
            return ca in self.rnaBbDihDictC.keys() and self.rnaBbDihDictC[ca] or None
        else:
            sys.exit("This script supports only DNA or RNA.")



[docs]
    def getCharge(self, atype, aname):
        return self.charges.get(atype, self.bbcharges.get(aname, 0))



[docs]
    def messages(self):
        """Prints any force-field specific logging messages."""
        import logging

        pass



[docs]
    def update_adenine(self, mapping, connectivity, itp_params):
        parameters = mapping + itp_params
        self.bases.update({"A": parameters})
        self.base_connectivity.update({"A": connectivity})
        self.bases.update({"RA3": parameters})
        self.base_connectivity.update({"RA3": connectivity})
        self.bases.update({"RA5": parameters})
        self.base_connectivity.update({"RA5": connectivity})
        self.bases.update({"2MA": parameters})
        self.base_connectivity.update({"2MA": connectivity})
        self.bases.update({"DMA": parameters})
        self.base_connectivity.update({"DMA": connectivity})
        self.bases.update({"SPA": parameters})
        self.base_connectivity.update({"SPA": connectivity})
        self.bases.update({"RAP": parameters})
        self.base_connectivity.update({"RAP": connectivity})
        self.bases.update({"6MA": parameters})
        self.base_connectivity.update({"6MA": connectivity})



[docs]
    def update_cytosine(self, mapping, connectivity, itp_params):
        parameters = mapping + itp_params
        self.bases.update({"C": parameters})
        self.base_connectivity.update({"C": connectivity})
        self.bases.update({"RC3": parameters})
        self.base_connectivity.update({"RC3": connectivity})
        self.bases.update({"RC5": parameters})
        self.base_connectivity.update({"RC5": connectivity})
        self.bases.update({"MRC": parameters})
        self.base_connectivity.update({"MRC": connectivity})
        self.bases.update({"5MC": parameters})
        self.base_connectivity.update({"5MC": connectivity})
        self.bases.update({"NMC": parameters})
        self.base_connectivity.update({"NMC": connectivity})



[docs]
    def update_guanine(self, mapping, connectivity, itp_params):
        parameters = mapping + itp_params
        self.bases.update({"G": parameters})
        self.base_connectivity.update({"G": connectivity})
        self.bases.update({"RG3": parameters})
        self.base_connectivity.update({"RG3": connectivity})
        self.bases.update({"RG5": parameters})
        self.base_connectivity.update({"RG5": connectivity})
        self.bases.update({"MRG": parameters})
        self.base_connectivity.update({"MRG": connectivity})
        self.bases.update({"1MG": parameters})
        self.base_connectivity.update({"1MG": connectivity})
        self.bases.update({"2MG": parameters})
        self.base_connectivity.update({"2MG": connectivity})
        self.bases.update({"7MG": parameters})
        self.base_connectivity.update({"7MG": connectivity})



[docs]
    def update_uracil(self, mapping, connectivity, itp_params):
        parameters = mapping + itp_params
        self.bases.update({"U": parameters})
        self.base_connectivity.update({"U": connectivity})
        self.bases.update({"RU3": parameters})
        self.base_connectivity.update({"RU3": connectivity})
        self.bases.update({"RU5": parameters})
        self.base_connectivity.update({"RU5": connectivity})
        self.bases.update({"MRU": parameters})
        self.base_connectivity.update({"MRU": connectivity})
        self.bases.update({"DHU": parameters})
        self.base_connectivity.update({"DHU": connectivity})
        self.bases.update({"PSU": parameters})
        self.base_connectivity.update({"PSU": connectivity})
        self.bases.update({"3MP": parameters})
        self.base_connectivity.update({"3MP": connectivity})
        self.bases.update({"3MU": parameters})
        self.base_connectivity.update({"3MU": connectivity})
        self.bases.update({"4SU": parameters})
        self.base_connectivity.update({"4SU": connectivity})
        self.bases.update({"5MU": parameters})
        self.base_connectivity.update({"5MU": connectivity})



[docs]
    @staticmethod
    def update_non_standard_mapping(mapping):
        mapping.update(
            {
                "RA3": mapping["A"],
                "RA5": mapping["A"],
                "2MA": mapping["A"],
                "6MA": mapping["A"],
                "RAP": mapping["A"],
                "DMA": mapping["A"],
                "DHA": mapping["A"],
                "SPA": mapping["A"],
                "RC3": mapping["C"],
                "RC5": mapping["C"],
                "5MC": mapping["C"],
                "3MP": mapping["C"],
                "MRC": mapping["C"],
                "NMC": mapping["C"],
                "RG3": mapping["G"],
                "RG5": mapping["G"],
                "1MG": mapping["G"],
                "2MG": mapping["G"],
                "7MG": mapping["G"],
                "MRG": mapping["G"],
                "RU3": mapping["U"],
                "RU5": mapping["U"],
                "4SU": mapping["U"],
                "DHU": mapping["U"],
                "PSU": mapping["U"],
                "5MU": mapping["U"],
                "3MU": mapping["U"],
                "3MP": mapping["U"],
                "MRU": mapping["U"],
            }
        )





[docs]
class martini30nucleic(ForceField):

    # FF mapping
    bb_mapping = nsplit(
        "P OP1 OP2 O5' O3' O1P O2P",
        "C5' 1H5' 2H5' H5' H5'' C4' H4' O4' C3' H3'",
        "C1' C2' O2' O4'",
    )
    mapping = {
        "A": bb_mapping
        + nsplit("N9 C8 H8", "N3 C4", "N1 C2 H2", "N6 C6 H61 H62", "N7 C5"),
        "C": bb_mapping + nsplit("N1 C5 C6", "C2 O2", "N3", "N4 C4 H41 H42"),
        "G": bb_mapping
        + nsplit("C8 H8 N9", "C4 N3", "C2 N2 H21 H22", "N1", "C6 O6", "C5 N7"),
        "U": bb_mapping + nsplit("N1 C5 C6", "C2 O2", "N3", "C4 O4"),
    }

    ForceField.update_non_standard_mapping(mapping)

    def __init__(self):

        # parameters are defined here for the following (protein) forcefields:
        self.name = "martini30nucleic"
        # Charged types:
        self.charges = {
            "Qd": 1,
            "Qa": -1,
            "SQd": 1,
            "SQa": -1,
            "RQd": 1,
            "AQa": -1,
        }  # @#
        self.bbcharges = {"BB1": -1}  # @#
        # Not all (eg Elnedyn) forcefields use backbone-backbone-sidechain angles and BBBB-dihedrals.
        self.UseBBSAngles = False
        self.UseBBBBDihedrals = False

        ##################
        # DNA PARAMETERS #
        ##################

        # DNA BACKBONE PARAMETERS
        self.dna_bb = {
            "atom": spl("Q0 SN0 SC2"),
            "bond": [],
            "angle": [],
            "dih": [],
            "excl": [],
            "pair": [],
        }
        # DNA BACKBONE CONNECTIVITY
        self.dna_con = {
            "bond": [],
            "angle": [],
            "dih": [],
            "excl": [],
            "pair": [],
        }

        self.bases = {}
        self.base_connectivity = {}

        ##################
        # RNA PARAMETERS # @ff
        ##################

        # RNA BACKBONE PARAMETERS TUT
        self.rna_bb = {
            "atom": spl("Q1n C6 N2"),
            "bond": [
                (1, 0.350, 25000),
                (1, 0.378, 12000),
                (1, 0.239, 25000),
                (1, 0.412, 12000),
            ],
            "angle": [
                (10, 110.0, 50),  # 2, 117.0, 140
                (10, 121.0, 180),
                (10, 143.0, 300),
            ],
            "dih": [
                (1, 0.0, 25.0, 1),
                (1, 0.0, 25.0, 1),
                (1, -112.0, 15.0, 1),
            ],
            "excl": [(), (), ()],
            "pair": [],
        }
        # RNA BACKBONE CONNECTIVITY
        self.rna_con = {
            "bond": [(0, 1), (1, 0), (1, 2), (2, 0)],
            "angle": [
                (0, 1, 0),
                (1, 0, 1),
                (0, 1, 2),
            ],
            "dih": [
                (0, 1, 0, 1),
                (1, 0, 1, 0),
                (1, 0, 1, 2),
            ],
            "excl": [
                (2, 0),
                (0, 2),
            ],
            "pair": [],
        }

        # For bonds, angles, and dihedrals the first parameter should always
        # be the type. It is pretty annoying to check the connectivity from
        # elsewhere so we update these one base at a time.

        a_itp, c_itp, g_itp, u_itp = ForceField.read_itps(rna_system, "regular", "new")

        # ADENINE
        mapping = [spl("TA0 TA1 TA2 TA3 TA4")]
        connectivity, itp_params = martini30nucleic.itp_to_indata(a_itp)
        self.update_adenine(mapping, connectivity, itp_params)

        # CYTOSINE
        mapping = [spl("TY0 TY1 TY2 TY3")]
        connectivity, itp_params = martini30nucleic.itp_to_indata(c_itp)
        self.update_cytosine(mapping, connectivity, itp_params)

        # GUANINE
        mapping = [spl("TG0 TG1 TG2 TG3 TG4 TG5")]
        connectivity, itp_params = martini30nucleic.itp_to_indata(g_itp)
        parameters = mapping + itp_params
        self.update_guanine(mapping, connectivity, itp_params)

        # URACIL
        mapping = [spl("TU0 TU1 TU2 TU3")]
        connectivity, itp_params = martini30nucleic.itp_to_indata(u_itp)
        parameters = mapping + itp_params
        self.update_uracil(mapping, connectivity, itp_params)

        super().__init__()




[docs]
class martini31nucleic(ForceField):

    # FF mapping
    bb_mapping = nsplit(
        "P OP1 OP2 O5' O3' O1P O2P",
        "C5' 1H5' 2H5' H5' H5'' C4' H4' O4' C3' H3'",
        "C1' C2' O2' O4'",
    )
    mapping = {
        "A": bb_mapping
        + nsplit(
            "C8",
            "N3 C4",
            "C2",
            "N1",
            "N6 C6 H61 H62",
            "N7 C5",
        ),
        "C": bb_mapping + nsplit("C6", "O2", "N3", "N4 C4 H41 H42", "C2"),
        "G": bb_mapping
        + nsplit("C8", "C4 N3", "C2 N2 H22 H21", "N1", "O6", "C5 N7", "H1", "C6"),
        "U": bb_mapping
        + nsplit(
            "C6",
            "O2",
            "N3",
            "O4",
            "C2",
            "H3",
            "C4",
        ),
    }

    ForceField.update_non_standard_mapping(mapping)

    def __init__(self):

        # parameters are defined here for the following (protein) forcefields:
        self.name = "martini31nucleic"

        # Charged types:
        charges = {
            "TDU": 0.5,
            "TA1": 0.4,
            "TA2": -0.3,
            "TA3": 0.5,
            "TA4": -0.8,
            "TA5": 0.6,
            "TA6": -0.4,
            "TY1": 0.0,
            "TY2": -0.5,
            "TY3": -0.6,
            "TY4": 0.6,
            "TY5": 0.5,
            "TG1": 0.3,
            "TG2": 0.0,
            "TG3": 0.3,
            "TG4": -0.3,
            "TG5": -0.5,
            "TG6": -0.6,
            "TG7": 0.3,
            "TG8": 0.5,
            "TU1": 0.0,
            "TU2": -0.5,
            "TU3": -0.5,
            "TU4": -0.5,
            "TU5": 0.5,
            "TU6": 0.5,
            "TU7": 0.5,
        }
        self.charges = {key: value * 1.8 for key, value in charges.items()}
        self.bbcharges = {"BB1": -1}

        # Not all (eg Elnedyn) forcefields use backbone-backbone-sidechain angles and BBBB-dihedrals.
        self.UseBBSAngles = False
        self.UseBBBBDihedrals = False

        ##################
        # DNA PARAMETERS #
        ##################

        # DNA BACKBONE PARAMETERS
        self.dna_bb = {
            "atom": spl("Q0 SN0 SC2"),
            "bond": [],
            "angle": [],
            "dih": [],
            "excl": [],
            "pair": [],
        }
        # DNA BACKBONE CONNECTIVITY
        self.dna_con = {
            "bond": [],
            "angle": [],
            "dih": [],
            "excl": [],
            "pair": [],
        }

        self.bases = {}
        self.base_connectivity = {}

        ##################
        # RNA PARAMETERS # @ff
        ##################

        # RNA BACKBONE PARAMETERS TUT
        self.rna_bb = {
            "atom": spl("Q1 N4 N6"),
            "bond": [
                (1, 0.349, 18000),
                (1, 0.377, 12000),
                (1, 0.240, 18000),
                (1, 0.412, 12000),
            ],
            "angle": [(10, 119.0, 27), (10, 118.0, 140), (10, 138.0, 180)],
            "dih": [
                (
                    3,
                    13,
                    -7,
                    -25,
                    -6,
                    25,
                    -2,
                ),  # (3,   10,  -8, 22, 8, -26, -6) # (3,   4,  -3, 9, 3, -10, -3)
                (1, 0, 6, 1),
                (1, -112.0, 15, 1),
            ],  # (1,     15.0,   5, 1)
            "excl": [(), (), ()],
            "pair": [],
        }
        # RNA BACKBONE CONNECTIVITY
        self.rna_con = {
            "bond": [(0, 1), (1, 0), (1, 2), (2, 0)],
            "angle": [
                (0, 1, 0),
                (1, 0, 1),
                (0, 1, 2),
            ],
            "dih": [
                (0, 1, 0, 1),
                (1, 0, 1, 0),
                (1, 0, 1, 2),
            ],
            "excl": [
                (2, 0),
                (0, 2),
            ],
            "pair": [],
        }

        a_itp, c_itp, g_itp, u_itp = ForceField.read_itps(rna_system, "polar", "new")

        # ADENINE
        mapping = [spl("TA1 TA2 TA3 TA4 TA5 TA6")]
        connectivity, itp_params = martini31nucleic.itp_to_indata(a_itp)
        parameters = mapping + itp_params
        self.update_adenine(mapping, connectivity, itp_params)

        # CYTOSINE
        mapping = [spl("TY1 TY2 TY3 TY4 TY5")]
        connectivity, itp_params = martini31nucleic.itp_to_indata(c_itp)
        parameters = mapping + itp_params
        self.update_cytosine(mapping, connectivity, itp_params)

        # GUANINE
        mapping = [spl("TG1 TG2 TG3 TG4 TG5 TG6 TG7 TG8")]
        connectivity, itp_params = martini31nucleic.itp_to_indata(g_itp)
        parameters = mapping + itp_params
        self.update_guanine(mapping, connectivity, itp_params)

        # URACIL
        mapping = [spl("TU1 TU2 TU3 TU4 TU5 TU6 TU7")]
        connectivity, itp_params = martini31nucleic.itp_to_indata(u_itp)
        parameters = mapping + itp_params
        self.update_uracil(mapping, connectivity, itp_params)

        super().__init__()



#########################
## 7 # ELASTIC NETWORK ##  -> @ELN <-
#########################
import math

## ELASTIC NETWORK ##

# Only the decay function is defined here, the network
# itself is set up through the Topology class


# The function to determine the decay scaling factor for the elastic network
# force constant, based on the distance and the parameters provided.
# This function is very versatile and can be fitted to most commonly used
# profiles, including a straight line (rate=0)

[docs]
def decayFunction(distance, shift, rate, power):
    return math.exp(-rate * math.pow(distance - shift, power))




[docs]
def rubberBands(
    atomList,
    lowerBound,
    upperBound,
    decayFactor,
    decayPower,
    forceConstant,
    minimumForce,
):
    out = []
    u2 = upperBound**2
    while len(atomList) > 3:
        bi, xi = atomList.pop(0)
        # This is a bit weird (=wrong I think) way of doing the cutoff...
        # for bj,xj in atomList[2:]:
        for bj, xj in atomList:
            # Mind the nm/A conversion -- This has to be standardized! Global use of nm?
            d2 = distance2(xi, xj) / 100
            if d2 < u2:
                dij = math.sqrt(d2)
                fscl = decayFunction(dij, lowerBound, decayFactor, decayPower)
                if fscl * forceConstant > minimumForce:
                    out.append(
                        {"atoms": (bi, bj), "parameters": (dij, "RUBBER_FC*%f" % fscl)}
                    )
    return out



#######################
## 8 # STRUCTURE I/O ##  -> @IO <-
#######################
import logging, math, random, sys

# ----+---------+
## A | PDB I/O |
# ----+---------+

d2r = 3.14159265358979323846264338327950288 / 180

# Reformatting of lines in structure file
pdbAtomLine = "ATOM  %5d %4s%4s %1s%4d%1s   %8.3f%8.3f%8.3f%6.2f%6.2f\n"
pdbBoxLine = "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1\n"



[docs]
def pdbBoxString(box):
    # Box vectors
    u, v, w = box[0:3], box[3:6], box[6:9]
    # Box vector lengths
    nu, nv, nw = [math.sqrt(norm2(i)) for i in (u, v, w)]
    # Box vector angles
    alpha = nv * nw == 0 and 90 or math.acos(cos_angle(v, w)) / d2r
    beta = nu * nw == 0 and 90 or math.acos(cos_angle(u, w)) / d2r
    gamma = nu * nv == 0 and 90 or math.acos(cos_angle(u, v)) / d2r
    return pdbBoxLine % (10 * norm(u), 10 * norm(v), 10 * norm(w), alpha, beta, gamma)




[docs]
def pdbAtom(a):
    ##01234567890123456789012345678901234567890123456789012345678901234567890123456789
    ##ATOM   2155 HH11 ARG C 203     116.140  48.800   6.280  1.00  0.00
    if a.startswith("TER"):
        return 0
    # NOTE: The 27th field of an ATOM line in the PDB definition can contain an
    #       insertion code. We shift that 20 bits and add it to the residue number
    #       to ensure that residue numbers will be unique.
    ## ===> atom name,       res name,        res id,                        chain,
    return (
        a[12:16].strip(),
        a[17:20].strip(),
        int(a[22:26]) + (ord(a[26]) << 20),
        a[21],
        ##            x,              y,              z
        float(a[30:38]),
        float(a[38:46]),
        float(a[46:54]),
    )




[docs]
def pdbOut(atom, i=1):
    insc = atom[2] >> 20
    resi = atom[2] - (insc << 20)
    pdbline = (
        "ATOM  %5i  %-3s %3s%2s%4i%1s   %8.3f%8.3f%8.3f%6.2f%6.2f           %1s  \n"
    )
    return pdbline % (
        (i, atom[0][:3], atom[1], atom[3], resi, chr(insc))
        + atom[4:]
        + (1, 40, atom[0][0])
    )




[docs]
def isPdbAtom(a):
    return (
        a.startswith("ATOM")
        or (options["-hetatm"] and a.startswith("HETATM"))
        or a.startswith("TER")
    )




[docs]
def pdbBoxRead(a):
    fa, fb, fc, aa, ab, ac = [float(i) for i in a.split()[1:7]]
    ca, cb, cg, sg = (
        math.cos(d2r * aa),
        math.cos(d2r * ab),
        math.cos(d2r * ac),
        math.sin(d2r * ac),
    )
    wx, wy = 0.1 * fc * cb, 0.1 * fc * (ca - cb * cg) / sg
    wz = math.sqrt(0.01 * fc * fc - wx * wx - wy * wy)
    return [0.1 * fa, 0, 0, 0.1 * fb * cg, 0.1 * fb * sg, 0, wx, wy, wz]



# Function for splitting a PDB file in chains, based
# on chain identifiers and TER statements

[docs]
def pdbChains(pdbAtomList):
    chain = []
    for atom in pdbAtomList:
        if not atom:  # Was a "TER" statement
            if chain:
                yield chain
            else:
                logging.info("Skipping empty chain definition")
            chain = []
            continue
        if not chain or chain[-1][3] == atom[3]:
            chain.append(atom)
        else:
            yield chain
            chain = [atom]
    if chain:
        yield chain



# Simple PDB iterator

[docs]
def pdbFrameIterator(streamIterator):
    title, atoms, box = [], [], []
    for i in streamIterator:
        if i.startswith("ENDMDL"):
            yield "".join(title), atoms, box
            title, atoms, box = [], [], []
        elif i.startswith("TITLE"):
            title.append(i)
        elif i.startswith("CRYST1"):
            box = pdbBoxRead(i)
        elif i.startswith("ATOM") or i.startswith("HETATM"):
            atoms.append(pdbAtom(i))
    if atoms:
        yield "".join(title), atoms, box



# ----+-------------+
## C | GENERAL I/O |
# ----+-------------+


# It is not entirely clear where this fits in best.
# Called from main.

[docs]
def getChargeType(resname, resid, choices):
    """Get user input for the charge of residues, based on list with
    choises."""
    print("Which %s type do you want for residue %s:" % (resname, resid + 1))
    for i, choice in choices.iteritems():
        print("%s. %s" % (i, choice))
    choice = None
    while choice not in choices.keys():
        choice = input("Type a number:")
    return choices[choice]



# *NOTE*: This should probably be a CheckableStream class that
# reads in lines until either of a set of specified conditions
# is met, then setting the type and from thereon functioning as
# a normal stream.

[docs]
def streamTag(stream):
    # Tag the stream with the type of structure file
    # If necessary, open the stream, taking care of
    # opening using gzip for gzipped files

    # First check whether we have have an open stream or a file
    # If it's a file, check whether it's zipped and open it
    if type(stream) == str:
        if stream.endswith("gz"):
            logging.info("Read input structure from zipped file.")
            s = gzip.open(stream)
        else:
            logging.info("Read input structure from file.")
            s = open(stream)
    else:
        logging.info("Read input structure from command-line")
        s = stream

    # Read a few lines, but save them
    x = [s.readline(), s.readline()]
    if x[-1].strip().isdigit():
        # Must be a GRO file
        logging.info(
            "Input structure is a GRO file. Chains will be labeled consecutively."
        )
        yield "GRO"
    else:
        # Must be a PDB file then
        # Could wind further to see if we encounter an "ATOM" record
        logging.info("Input structure is a PDB file.")
        yield "PDB"

    # Hand over the lines that were stored
    for i in x:
        yield i

    # Now give the rest of the lines from the stream
    for i in s:
        yield i



# ----+-----------------+
## D | STRUCTURE STUFF |
# ----+-----------------+


# This list allows to retrieve atoms based on the name or the index
# If standard, dictionary type indexing is used, only exact matches are
# returned. Alternatively, partial matching can be achieved by setting
# a second 'True' argument.

[docs]
class Residue(list):
    def __getitem__(self, tag):
        if type(tag) == int:
            # Call the parent class __getitem__
            return list.__getitem__(self, tag)
        if type(tag) == str:
            for i in self:
                if i[0] == tag:
                    return i
            else:
                return
        if tag[1]:
            return [i for i in self if tag[0] in i[0]]  # Return partial matches
        else:
            return [i for i in self if i[0] == tag[0]]  # Return exact matches only




[docs]
def residues(atomList):
    residue = [atomList[0]]
    for atom in atomList[1:]:
        if (
            atom[1] == residue[-1][1]  # Residue name check
            and atom[2] == residue[-1][2]  # Residue id check
            and atom[3] == residue[-1][3]
        ):  # Chain id check
            residue.append(atom)
        else:
            yield Residue(residue)
            residue = [atom]
    yield Residue(residue)




[docs]
def residueDistance2(r1, r2):
    return min([distance2(i, j) for i in r1 for j in r2])



# Increased the cut-off from 2.5 to 3.0 for DNA. Should check that no problems arise for proteins.

[docs]
def breaks(
    residuelist, selection=("P", "C2'", "C3'", "O3'", "C4'", "C5'", "O5'"), cutoff=3.0
):
    # Extract backbone atoms coordinates
    bb = [
        [atom[4:] for atom in residue if atom[0] in selection]
        for residue in residuelist
    ]
    # Needed to remove waters residues from mixed residues.
    bb = [res for res in bb if res != []]
    # We cannot rely on some standard order for the backbone atoms.
    # Therefore breaks are inferred from the minimal distance between
    # backbone atoms from adjacent residues.
    breaks = [
        i + 1 for i in range(len(bb) - 1) if residueDistance2(bb[i], bb[i + 1]) > cutoff
    ]
    return breaks




[docs]
def contacts(atoms, cutoff=5):
    rla = range(len(atoms))
    crd = [atom[4:] for atom in atoms]
    return [
        (i, j)
        for i in rla[:-1]
        for j in rla[i + 1 :]
        if distance2(crd[i], crd[j]) < cutoff
    ]




[docs]
def add_dummy(beads, dist=0.11, n=2):
    # Generate a random vector in a sphere of -1 to +1, to add to the bead position
    v = [
        random.random() * 2.0 - 1,
        random.random() * 2.0 - 1,
        random.random() * 2.0 - 1,
    ]
    # Calculated the length of the vector and divide by the final distance of the dummy bead
    norm_v = norm(v) / dist
    # Resize the vector
    vn = [i / norm_v for i in v]
    # m sets the direction of the added vector, currently only works when adding one or two beads.
    m = 1
    for j in range(n):
        newName = "SCD"
        newBead = (
            newName,
            tuple([i + (m * j) for i, j in zip(beads[-1][1], vn)]),
            beads[-1][2],
        )
        beads.append(newBead)
        m *= -2
    return beads




[docs]
def check_merge(chains, m_list=[], l_list=[], ss_cutoff=0):
    chainIndex = range(len(chains))
    if "all" in m_list:
        logging.info("All chains will be merged in a single moleculetype.")
        return chainIndex, [chainIndex]
    chainID = [chain._id for chain in chains]
    # Mark the combinations of chains that need to be merged
    merges = []
    if m_list:
        # Build a dictionary of chain IDs versus index
        # To give higher priority to top chains the lists are reversed
        # before building the dictionary
        chainIndex = list(reversed(chainIndex))
        chainID = list(reversed(chainID))
        dct = dict(zip(chainID, chainIndex))
        chainIndex = list(reversed(chainIndex))
        # Convert chains in the merge_list to numeric, if necessary
        # NOTE The internal numbering is zero-based, while the
        # command line chain indexing is one-based. We have to add
        # one to the number in the dictionary to bring it on par with
        # the numbering from the command line, but then from the
        # result we need to subtract one again to make indexing
        # zero-based
        # merges = [[(i.isdigit() and int(i) or dct[i]+1)-1 for i in j] for j in m_list]
        merges = [[0, 1]]
        for i in merges:
            i.sort()
    # Rearrange merge list to a list of pairs
    pairs = [
        (i[j], i[k])
        for i in merges
        for j in range(len(i) - 1)
        for k in range(j + 1, len(i))
    ]
    # Check each combination of chains for connections based on
    # ss-bridges, links and distance restraints
    for i in chainIndex[:-1]:
        for j in chainIndex[i + 1 :]:
            if (i, j) in pairs:
                continue
            # Check whether any link links these two groups
            for a, b in l_list:
                if (a in chains[i] and b in chains[j]) or (
                    a in chains[j] and b in chains[i]
                ):
                    logging.info(
                        "Merging chains %d and %d to allow link %s"
                        % (i + 1, j + 1, str((a, b)))
                    )
                    pairs.append(i < j and (i, j) or (j, i))
                    break
            if (i, j) in pairs:
                continue
    # Sort the combinations
    pairs.sort(reverse=True)
    merges = []
    while pairs:
        merges.append(set([pairs[-1][0]]))
        for i in range(len(pairs) - 1, -1, -1):
            if pairs[i][0] in merges[-1]:
                merges[-1].add(pairs.pop(i)[1])
            elif pairs[i][1] in merges[-1]:
                merges[-1].add(pairs.pop(i)[0])
    merges = [list(i) for i in merges]
    for i in merges:
        i.sort()
    order = [j for i in merges for j in i]
    if merges:
        logging.warning("Merging chains.")
        logging.warning(
            "This may change the order of atoms and will change the number of topology files."
        )
        logging.info("Merges: " + ", ".join([str([j + 1 for j in i]) for i in merges]))
    if len(merges) == 1 and len(merges[0]) > 1 and set(merges[0]) == set(chainIndex):
        logging.info("All chains will be merged in a single moleculetype")
    # Determine the order for writing; merged chains go first
    merges.extend([[j] for j in chainIndex if not j in order])
    order.extend([j for j in chainIndex if not j in order])
    return order, merges



## !! NOTE !! ##
## XXX The chain class needs to be simplified by extracting things to separate functions/classes

[docs]
class Chain:
    # Attributes defining a chain
    # When copying a chain, or slicing, the attributes in this list have to
    # be handled accordingly.
    _attributes = ("residues", "sequence", "seq", "ss", "ssclass", "sstypes")

    def __init__(self, options, residuelist=[], name=None, multiscale=False):
        self.residues = residuelist
        self._atoms = [atom[:3] for residue in residuelist for atom in residue]
        self.sequence = [residue[0][1] for residue in residuelist]
        # *NOTE*: Check for unknown residues and remove them if requested
        #         before proceeding.
        self.seq = "".join([AA321.get(i, "X") for i in self.sequence])
        self.ss = ""
        self.ssclass = ""
        self.sstypes = ""
        self.mapping = []
        self.multiscale = False
        self.options = options

        # Unknown residues
        self.unknowns = "X" in self.seq

        # Determine the type of chain
        self._type = ""
        self.type()

        # Determine number of atoms
        self.natoms = len(self._atoms)

        # BREAKS: List of indices of residues where a new fragment starts
        # Only when polymeric (protein, DNA, RNA, ...)
        # For now, let's remove it for the Nucleic acids...
        # self.breaks     = self.type() in ("Protein", "Mixed", "Nucleic") and breaks(self.residues) or []
        self.breaks = breaks(self.residues)

        # LINKS:  List of pairs of pairs of indices of linked residues/atoms
        # This list is used for cysteine bridges and peptide bonds involving side chains
        # The list has items like ((#resi1, #atid1), (#resi2, #atid2))
        # When merging chains, the residue number needs ot be update, but the atom id
        # remains unchanged.
        # For the coarse grained system, it needs to be checked which beads the respective
        # atoms fall in, and bonded terms need to be added there.
        self.links = []

        # Chain identifier; try to read from residue definition if no name is given
        self._id = name or residuelist and residuelist[0][0][3] or ""

        # Container for coarse grained beads
        self._cg = None

    def __len__(self):
        # Return the number of residues
        # DNA/RNA contain non-CAP d/r to indicate type. We remove those first.
        return len("".join(i for i in self.seq if i.isupper()))

    def __add__(self, other):
        newchain = Chain(name=self._id + "+" + other._id)
        # Combine the chain items that can be simply added
        for attr in self._attributes:
            setattr(newchain, attr, getattr(self, attr) + getattr(other, attr))
        # Set chain items, shifting the residue numbers
        shift = len(self)
        newchain.breaks = self.breaks + [shift] + [i + shift for i in other.breaks]
        newchain.links = self.links + [
            ((i[0] + shift, i[1]), (j[0] + shift, j[1])) for i, j in other.links
        ]
        newchain.natoms = len(newchain.atoms())
        newchain.multiscale = False
        # Return the merged chain
        print(newchain.breaks)
        return newchain

    def __eq__(self, other):
        return (
            self.seq == other.seq
            and self.ss == other.ss
            and self.breaks == other.breaks
            and self.links == other.links
            and self.multiscale == False
        )

    # Extract a residue by number or the list of residues of a given type
    # This facilitates selecting residues for links, like chain["CYS"]
    def __getitem__(self, other):
        if type(other) == str:
            if not other in self.sequence:
                return []
            return [i for i in self.residues if i[0][1] == other]
        elif type(other) == tuple:
            # This functionality is set up for links
            # between coarse grained beads. So these are
            # checked first,
            for i in self.cg():
                if other == i[:4]:
                    return i
            else:
                for i in self.atoms():
                    if other[:3] == i[:3]:
                        return i
                else:
                    return []
        return self.sequence[other]

    # Extract a piece of a chain as a new chain
    def __getslice__(self, i, j):
        newchain = Chain(self.options, name=self._id)
        # Extract the slices from all lists
        for attr in self._attributes:
            setattr(newchain, attr, getattr(self, attr)[i:j])
        # Breaks that fall within the start and end of this chain need to be passed on.
        # Residue numbering is increased by 20 bits!!
        # XXX I don't know if this works.
        ch_sta, ch_end = newchain.residues[0][0][2], newchain.residues[-1][0][2]
        newchain.breaks = [
            crack for crack in self.breaks if ch_sta < (crack << 20) < ch_end
        ]
        newchain.links = [link for link in self.links if ch_sta < (link << 20) < ch_end]
        newchain.multiscale = False
        newchain.natoms = len(newchain.atoms())
        newchain.type()
        # Return the chain slice
        return newchain

    def _contains(self, atomlist, atom):
        atnm, resn, resi, chn = atom

        # If the chain does not match, bail out
        if chn != self._id:
            return False

        # Check if the whole tuple is in
        if atnm and resn and resi:
            return (atnm, resn, resi) in self.atoms()

        # Fetch atoms with matching residue id
        match = (not resi) and atomlist or [j for j in atomlist if j[2] == resi]
        if not match:
            return False

        # Select atoms with matching residue name
        match = (not resn) and match or [j for j in match if j[1] == resn]
        if not match:
            return False

        # Check whether the atom is given and listed
        if not atnm or [j for j in match if j[0] == atnm]:
            return True

        # It just is not in the list!
        return False

    def __contains__(self, other):
        return self._contains(self.atoms(), other) or self._contains(self.cg(), other)

    def __hash__(self):
        return id(self)


[docs]
    def atoms(self):
        if not self._atoms:
            self._atoms = [atom[:3] for residue in self.residues for atom in residue]
        return self._atoms


    # Split a chain based on residue types; each subchain can have only one type

[docs]
    def split(self):
        chains = []
        chainStart = 0
        for i in range(len(self.sequence) - 1):
            if residueTypes.get(self.sequence[i], "Unknown") != residueTypes.get(
                self.sequence[i + 1], "Unknown"
            ):
                # Use the __getslice__ method to take a part of the chain.
                chains.append(self[chainStart : i + 1])
                chainStart = i + 1
        if chains:
            logging.debug(
                "Splitting chain %s in %s chains" % (self._id, len(chains) + 1)
            )
        return chains + [self[chainStart:]]



[docs]
    def getname(self, basename=None):
        name = []
        if basename:
            name.append(basename)
        if self.type() and not basename:
            name.append(self.type())
        if type(self._id) == int:
            name.append(chr(64 + self._id))
        elif self._id.strip():
            name.append(str(self._id))
        return "_".join(name)



[docs]
    def set_ss(self, ss, source="self"):
        if len(ss) == 1:
            self.ss = len(self) * ss
        else:
            self.ss = ss
        # Infer the Martini backbone secondary structure types
        self.ssclass, self.sstypes = ssClassification(self.ss, source)



[docs]
    def dss(self, method=None, executable=None):
        # The method should take a list of atoms and return a
        # string of secondary structure classifications
        if self.type() == "Protein":
            if method:
                atomlist = [atom for residue in self.residues for atom in residue]
                self.set_ss(
                    ssDetermination[method](self, atomlist, executable), source=method
                )
            else:
                self.set_ss(len(self) * "C")
        else:
            self.set_ss(len(self.sequence) * "-")
        return self.ss



[docs]
    def type(self, other=None):
        if other:
            self._type = other
        elif not self._type and len(self):
            # Determine the type of chain
            self._type = set(
                [residueTypes.get(i, "Unknown") for i in set(self.sequence)]
            )
            self._type = len(self._type) > 1 and "Mixed" or list(self._type)[0]
        return self._type


    # XXX The following (at least the greater part of it) should be made a separate function, put under "MAPPING"

[docs]
    def cg(self, force=False, com=False, dna=False):
        # Generate the coarse grained structure
        # Set the b-factor field to something that reflects the secondary structure

        # If the coarse grained structure is set already, just return,
        # unless regeneration is forced.
        if self._cg and not force:
            return self._cg
        self._cg = []
        atid = 1
        bb = [1]
        fail = False
        previous = ""
        for residue, rss, resname in zip(self.residues, self.sstypes, self.sequence):
            # For DNA we need to get the O3' to the following residue when calculating COM
            # The force and com options ensure that this part does not affect itp generation or anything else
            if com:
                # Just an initialization, this should complain if it isn't updated in the loop
                store = 0
                for ind, i in enumerate(residue):
                    if i[0] == "O3'":
                        if previous != "":
                            residue[ind] = previous
                            previous = i
                        else:
                            store = ind
                            previous = i
                # We couldn't remove the O3' from the 5' end residue during the loop so we do it now
                if store > 0:
                    del residue[store]

            # Check if residues names has changed, for example because user has set residues interactively.
            residue = [(atom[0], resname) + atom[2:] for atom in residue]
            if residue[0][1] in ("SOL", "HOH", "TIP"):
                continue

            str2ff = {
                "martini30nucleic": martini30nucleic,
                "martini31nucleic": martini31nucleic,
            }
            ffname = options["ForceField"].name
            ff = str2ff[ffname]
            if not residue[0][1] in ff.mapping.keys():
                logging.warning("Skipped unknown residue %s\n" % residue[0][1])
                continue
            # Get the mapping for this residue
            # CG.map returns bead coordinates and mapped atoms
            # This will fail if there are (too many) atoms missing, which is
            # only problematic if a mapped structure is written; the topology
            # is inferred from the sequence. So this is the best place to raise
            # an error
            try:
                # The last residue, in the case of a polBB has only a CA-positioned bead.
                beads, ids = map(residue, ff)
                beads = zip(CoarseGrained.names[residue[0][1]], beads, ids)
            except ValueError:
                logging.error(
                    "Too many atoms missing from residue %s %d(ch:%s):",
                    residue[0][1],
                    residue[0][2] >> 20,
                    residue[0][3],
                )
                logging.error(repr([i[0] for i in residue]))
                fail = True

            for name, (x, y, z), ids in beads:
                # Add the bead with coordinates and secondary structure id to the list
                self._cg.append(
                    (
                        name,
                        residue[0][1][:3],
                        residue[0][2],
                        residue[0][3],
                        x,
                        y,
                        z,
                        ss2num[rss],
                    )
                )
                # Add the ids to the list, after converting them to indices to the list of atoms
                self.mapping.append([atid + i for i in ids])

            # Increment the atom id; This pertains to the atoms that are included in the output.
            atid += len(residue)

            # Keep track of the numbers for CONECTing
            bb.append(bb[-1] + len(list(beads)))

        if fail:
            logging.error(
                "Unable to generate coarse grained structure due to missing atoms."
            )
            sys.exit(1)

        return self._cg



[docs]
    def conect(self):
        # Return pairs of numbers that should be CONECTed
        # First extract the backbone IDs
        cg = self.cg()
        bb = [i + 1 for i, j in zip(range(len(cg)), cg) if j[0] == "BB"]
        bb = zip(bb, bb[1:] + [len(bb)])
        # Set the backbone CONECTs (check whether the distance is consistent with binding)
        conect = [
            (i, j) for i, j in bb[:-1] if distance2(cg[i - 1][4:7], cg[j - 1][4:7]) < 14
        ]
        # Now add CONECTs for sidechains
        for i, j in bb:
            nsc = j - i - 1




##################
## 7 # TOPOLOGY ##  -> @TOP <-
##################
import logging, math


# This is a generic class for Topology Bonded Type definitions

[docs]
class Bonded:
    # The init method is generic to the bonded types,
    # but may call the set method if atoms are given
    # as (ID, ResidueName, SecondaryStructure) tuples
    # The set method is specific to the different types.
    def __init__(self, other=None, options=None, **kwargs):
        self.atoms = []
        self.type = -1
        self.parameters = []
        self.comments = []
        self.category = None

        if options and type(options) == dict:
            self.options = options
        if other:
            # If other is given, then copy the attributes
            # if it is of the same class or set the
            # attributes according to the key names if
            # it is a dictionary
            if other.__class__ == self.__class__:
                for attr in dir(other):
                    if not attr[0] == "_":
                        setattr(self, attr, getattr(other, attr))
            elif type(other) == dict:
                for attr in other.keys():
                    setattr(self, attr, other[attr])
            elif type(other) in (list, tuple):
                self.atoms = other

        # For every item in the kwargs keys, set the attribute
        # with the same name. This can be used to specify the
        # attributes directly or to override attributes
        # copied from the 'other' argument.
        for key in kwargs:
            setattr(self, key, kwargs[key])

        # If atoms are given as tuples of
        # (ID, ResidueName[, SecondaryStructure])
        # then determine the corresponding parameters
        # from the lists above
        if self.atoms and type(self.atoms[0]) == tuple:
            self.set(self.atoms, **kwargs)

    def __nonzero__(self):
        return bool(self.atoms) and bool(self.parameters)

    def __str__(self):
        if not self.atoms or not self.parameters:
            return ""
        s = ["%5d" % i for i in self.atoms]
        # For exclusions, no type is defined, which equals -1
        if self.type != -1:
            s.append(" %5d " % self.type)
        # Print integers and floats in proper format and neglect None terms
        s.extend([formatString(i) for i in self.parameters if i != None])
        if self.comments:
            s.append(";")
            if type(self.comments) == str:
                s.append(self.comments)
            else:
                s.extend([str(i) for i in self.comments])
        return " ".join(s)

    def __iadd__(self, num):
        self.atoms = tuple([i + int(num) for i in self.atoms])
        return self

    def __add__(self, num):
        out = self.__class__(self)
        out += num
        return out

    def __eq__(self, other):
        if type(other) in (list, tuple):
            return self.atoms == other
        else:
            return (
                self.atoms == other.atoms
                and self.type == other.type
                and self.parameters == other.parameters
            )

    # This function needs to be overridden for descendents

[docs]
    def set(self, atoms, **kwargs):
        pass




# The set method of this class will look up parameters for backbone beads
# Side chain bonds ought to be set directly, using the constructor
# providing atom numbers, bond type, and parameters
# Constraints are bonds with kb = 100000.00000, which can be extracted
# using the category

[docs]
class Bond(Bonded):

[docs]
    def set(self, atoms, **kwargs):
        ids, r, ss, ca = zip(*atoms)
        self.atoms = ids
        self.type = 1
        self.positionCa = ca
        self.comments = "%s(%s)-%s(%s)" % (r[0], ss[0], r[1], ss[1])
        # The category can be used to keep bonds sorted
        self.category = kwargs.get("category")

        if not self.parameters:
            self.parameters = self.options["ForceField"].bbGetBond(r, ca, ss)
        # If there are more than two parameters given, user HAS TO define the bond type as the first one.
        # This is to allow use of any gromacs bond type.
        if self.parameters and len(self.parameters) > 2:
            self.type, self.parameters = self.parameters[0], self.parameters[1:]
        # Backbone bonds also can be constraints. We could change the type further on, but this is more general.
        # Even better would be to add a new type: BB-Constraint
        if self.parameters and self.parameters[1] == "100000.00000":
            self.category = "Constraint"



    # Overriding __str__ method to suppress printing of bonds with Fc of 0
    # def __str__(self):
    #     if not self.parameters:
    #         return ""
    #     return Bonded.__str__(self)


# Similar to the preceding class

[docs]
class Angle(Bonded):

[docs]
    def set(self, atoms, **kwargs):
        ids, r, ss, ca = zip(*atoms)
        self.atoms = ids
        self.type = 2
        self.positionCa = ca
        self.comments = "%s(%s)-%s(%s)-%s(%s)" % (r[0], ss[0], r[1], ss[1], r[2], ss[2])
        self.category = kwargs.get("category")

        self.parameters = self.options["ForceField"].bbGetAngle(r, ca, ss)
        # If there are more than two parameters given, user HAS TO define the angle type as the first one.
        # This is to allow use of any gromacs angle type.
        if self.parameters and len(self.parameters) > 2:
            self.type, self.parameters = self.parameters[0], self.parameters[1:]




# Similar to the preceding class

[docs]
class Vsite(Bonded):

[docs]
    def set(self, atoms, **kwargs):
        ids, r, ss, ca = zip(*atoms)
        self.atoms = ids
        self.type = 2
        self.positionCa = ca
        self.comments = "%s" % (r[0])
        self.category = kwargs.get("category")
        parameters = kwargs.get("parameters")


    def __str__(self):
        if not self.atoms:
            return ""
        # s = ["%5d" % i for i in self.atoms]
        s = [
            formatString(self.atoms[0]),
            formatString(self.atoms[1]),
            formatString(self.atoms[2]),
            formatString(self.atoms[3]),
            formatString(self.parameters[0]),
            formatString(self.parameters[1]),
            formatString(self.parameters[2]),
        ]
        if self.comments:
            s.append(";")
            if type(self.comments) == str:
                s.append(self.comments)
            else:
                s.extend([str(i) for i in self.comments])
        return " ".join(s)



# Similar to the preceding class

[docs]
class Exclusion(Bonded):

[docs]
    def set(self, atoms, **kwargs):
        ids, r, ss, ca = zip(*atoms)
        self.atoms = ids
        self.positionCa = ca
        self.comments = "%s" % (r[0])
        self.category = kwargs.get("category")
        self.parameters = kwargs.get("parameters")
        if not self.parameters:
            self.parameters = self.options["ForceField"].bbGetExclusion(r, ca, ss)




# Similar to the preceding class

[docs]
class Pair(Bonded):

[docs]
    def set(self, atoms, **kwargs):
        ids, r, ss, ca = zip(*atoms)
        self.atoms = ids
        self.positionCa = ca
        self.comments = "%s(%s)-%s(%s)" % (r[0], ss[0], r[1], ss[1])
        self.category = kwargs.get("category")
        self.type = kwargs.get("type") or 1
        if not self.parameters:
            self.parameters = self.options["ForceField"].bbGetPair(r, ca, ss)




# Similar to the preceding class

[docs]
class Dihedral(Bonded):

[docs]
    def set(self, atoms, **kwargs):
        ids, r, ss, ca = zip(*atoms)
        self.atoms = ids
        self.type = 1
        self.positionCa = ca
        self.comments = "%s(%s)-%s(%s)-%s(%s)-%s(%s)" % (
            r[0],
            ss[0],
            r[1],
            ss[1],
            r[2],
            ss[2],
            r[3],
            ss[3],
        )
        self.category = kwargs.get("category")
        if not self.parameters:
            self.parameters = self.options["ForceField"].bbGetDihedral(r, ca, ss)
        # If there are more than two parameters given, user HAS TO define the diheral type as the first one.
        # This is to allow use of any gromacs dihedral type.
        if self.parameters and len(self.parameters) > 2:
            self.type, self.parameters = self.parameters[0], self.parameters[1:]




# This list allows to retrieve Bonded class items based on the category
# If standard, dictionary type indexing is used, only exact matches are
# returned. Alternatively, partial matching can be achieved by setting
# a second 'True' argument.

[docs]
class CategorizedList(list):
    def __getitem__(self, tag):
        if type(tag) == int:
            # Call the parent class __getitem__
            return list.__getitem__(self, tag)

        if type(tag) == str:
            return [i for i in self if i.category == tag]

        if tag[1]:
            return [i for i in self if tag[0] in i.category]
        else:
            return [i for i in self if i.category == tag[0]]




[docs]
class Topology:
    def __init__(self, other=None, options=None, name=""):
        self.name = ""
        self.nrexcl = 1
        self.atoms = CategorizedList()
        self.pairs = CategorizedList()
        self.vsites = CategorizedList()
        self.exclusions = CategorizedList()
        self.bonds = CategorizedList()
        self.angles = CategorizedList()
        self.dihedrals = CategorizedList()
        self.impropers = CategorizedList()
        self.constraints = CategorizedList()
        self.posres = CategorizedList()
        self.sequence = []
        self.secstruc = ""
        # Okay, this is sort of funny; we will add a
        #   #define mapping virtual_sites3
        # to the topology file, followed by a header
        #   [ mapping ]
        self.mapping = []
        # For multiscaling we have to keep track of the number of
        # real atoms that correspond to the beads in the topology
        self.natoms = 0
        self.multiscale = False

        if options:
            self.options = options
        else:
            self.options = {}

        if not other:
            # Returning an empty instance
            return
        elif isinstance(other, Topology):
            for attrib in [
                "atoms",
                "vsites",
                "bonds",
                "angles",
                "dihedrals",
                "impropers",
                "constraints",
                "posres",
            ]:
                setattr(self, attrib, getattr(other, attrib, []))
        elif isinstance(other, Chain):
            if other.type() == "Protein" and other.options["ForceField"].name in [
                "polbb"
            ]:
                self.fromAminoAcidSequencePolBB(other)
            elif other.type() == "Protein":
                self.fromAminoAcidSequence(other)
            elif other.type() == "Nucleic":
                self.fromNucleicAcidSequence(other)
            else:
                # This chain should not be polymeric, but a collection of molecules
                # For each unique residue type fetch the proper moleculetype
                self.fromMoleculeList(other)
        if name:
            self.name = name

    def __iadd__(self, other):
        if not isinstance(other, Topology):
            other = Topology(other)
        shift = len(self.atoms)
        last = self.atoms[-1]
        atoms = list(zip(*other.atoms))
        atoms[0] = [i + shift for i in atoms[0]]  # Update atom numbers
        atoms[2] = [i + last[2] for i in atoms[2]]  # Update residue numbers
        atoms[5] = [i + last[5] for i in atoms[5]]  # Update charge group numbers
        atoms = list(zip(*atoms))
        # The zippings above kills all atoms with specified mass (9 long lists)
        # Let's add some band aid to fix it...
        # This of course doesn't work if there was a secondary structure to keep
        # but think it affects at this stage only the output comment.
        for i in range(len(atoms)):
            if atoms[i][-1] == 0:
                atoms[i] = atoms[i] + ("c",)
        self.atoms.extend(atoms)
        for attrib in [
            "bonds",
            "vsites",
            "exclusions",
            "angles",
            "dihedrals",
            "impropers",
            "constraints",
            "posres",
        ]:
            getattr(self, attrib).extend(
                [source + shift for source in getattr(other, attrib)]
            )
        return self

    def __add__(self, other):
        out = Topology(self)
        if not isinstance(other, Topology):
            other = Topology(other)
        out += other
        return out

    def __str__(self):
        string = '; MARTINI (%s) Coarse Grained topology file for "%s"' % (
            self.options["ForceField"].name,
            self.name,
        )
        string += "\n; Created by py version %s \n; Using the following options:  " % (
            self.options["Version"]
        )
        string += " ".join(self.options["Arguments"])
        string += "\n; #####################################################################################################"
        string += "\n; This topology is based on development beta of Martini DNA and should NOT be used for production runs."
        string += "\n; #####################################################################################################"
        out = [string]
        if self.sequence:
            out += [
                "; Sequence:",
                "; " + "".join([AA321.get(AA) for AA in self.sequence]),
                "; Secondary Structure:",
                "; " + self.secstruc,
            ]

        # Do not print a molecule name when multiscaling
        # In that case, the topology created here needs to be appended
        # at the end of an atomistic moleculetype
        if not self.multiscale:
            out += [
                "\n[ moleculetype ]",
                "; Name         Exclusions",
                "%-15s %3d" % (self.name, self.nrexcl),
            ]

        out.append("\n[ atoms ]")

        # For virtual sites and dummy beads we have to be able to specify the mass.
        # Thus we need two different format strings:
        fs8 = "%5d %5s %5d %5s %5s %5d %7.4f ; %s"
        fs9 = "%5d %5s %5d %5s %5s %5d %7.4f %7.4f ; %s"
        out.extend([len(i) == 9 and fs9 % i or fs8 % i for i in self.atoms])

        # Print the pairs.
        pairs = [str(i) for i in self.pairs if str(i)]
        if pairs:
            out.append("\n[ pairs ]")
            out.extend(pairs)

        # Print out the vsites only if they excist. Right now it can only be type 1 virual sites.
        # TODO: This needs to be generalized for all virtual site types.
        vsitesBB = [str(i) for i in self.vsites["BB"] if str(i)]
        vsitesSC = [str(i) for i in self.vsites["SC"] if str(i)]

        if vsitesBB or vsitesSC:
            out.append("\n[ virtual_sites3 ]")
        if vsitesBB:
            out.append("; Backbone virtual sites.")
            out.extend(vsitesBB)
        if vsitesSC:
            out.append("; Sidechain virtual sites.")
            out.extend(vsitesSC)

        # Bonds in order: backbone, backbone-sidechain, sidechain, short elastic, long elastic
        out.append("\n[ bonds ]")
        # Backbone-backbone
        bonds = [str(i) for i in self.bonds["BB"] if str(i)]
        if bonds:
            out.append("; Backbone bonds")
            out.extend(bonds)
        # Rubber Bands
        bonds = [str(i) for i in self.bonds["Rubber", True] if str(i)]
        if bonds:
            # Add a CPP style directive to allow control over the elastic network
            out.append("#ifdef RUBBER_BANDS")
            out.append(
                "#ifndef RUBBER_FC\n#define RUBBER_FC %f\n#endif"
                % self.options["ElasticMaximumForce"]
            )
            out.extend(bonds)
            out.append("#endif")
        # Backbone-Sidechain/Sidechain-Sidechain
        bonds = [str(i) for i in self.bonds["SC"] if str(i)]
        if bonds:
            out.append("; Sidechain bonds")
            out.extend(bonds)
        # Short elastic/Long elastic
        bonds = [str(i) for i in self.bonds["Elastic short"] if str(i)]
        if bonds:
            out.append("; Short elastic bonds for extended regions")
            out.extend(bonds)
        bonds = [str(i) for i in self.bonds["Elastic long"] if str(i)]
        if bonds:
            out.append("; Long elastic bonds for extended regions")
            out.extend(bonds)
        # Cystine bridges
        bonds = [str(i) for i in self.bonds["Cystine"] if str(i)]
        if bonds:
            out.append("; Cystine bridges")
            out.extend(bonds)
        # Other links
        bonds = [str(i) for i in self.bonds["Link"] if str(i)]
        if bonds:
            out.append("; Links/Cystine bridges")
            out.extend(bonds)

        # Constraints
        out.append("\n[ constraints ]")
        out.extend([str(i) for i in self.bonds["Constraint"]])

        # Print out the exclusions only if they excist.
        exclusions = [str(i) for i in self.exclusions if str(i)]
        if exclusions:
            out.append("\n[ exclusions ]")
            out.extend(exclusions)

        # Angles
        out.append("\n[ angles ]")
        out.append("; Backbone angles")
        out.extend([str(i) for i in self.angles["BBB"] if str(i)])
        out.append("; Backbone-sidechain angles")
        out.extend([str(i) for i in self.angles["BBS"] if str(i)])
        out.append("; Sidechain angles")
        out.extend([str(i) for i in self.angles["SC"] if str(i)])

        # Dihedrals
        out.append("\n[ dihedrals ]")
        out.append("; Backbone dihedrals")
        out.extend([str(i) for i in self.dihedrals["BBBB"] if i.parameters])
        out.append("; Sidechain dihedrals")
        out.extend([str(i) for i in self.dihedrals["BSC"] if i.parameters])
        out.append("; Sidechain improper dihedrals")
        out.extend([str(i) for i in self.dihedrals["SC"] if i.parameters])

        # Postition Restraints
        if self.posres:
            out.append("\n#ifdef POSRES")
            out.append(
                "#ifndef POSRES_FC\n#define POSRES_FC %.2f\n#endif"
                % self.options["PosResForce"]
            )
            out.append(" [ position_restraints ]")
            out.extend(
                [
                    "  %5d    1    POSRES_FC    POSRES_FC    POSRES_FC" % i
                    for i in self.posres
                ]
            )
            out.append("#endif")

        logging.info("Created coarsegrained topology")
        return "\n".join(out)


[docs]
    def fromNucleicAcidSequence(
        self,
        sequence,
        secstruc=None,
        links=None,
        breaks=None,
        mapping=None,
        rubber=False,
        multi=False,
    ):

        # Shift for the atom numbers of the atomistic part in a chain
        # that is being multiscaled
        shift = 0
        # First check if we get a sequence or a Chain instance
        if isinstance(sequence, Chain):
            chain = sequence
            links = chain.links
            breaks = chain.breaks
            # If the mapping is not specified, the actual mapping is taken,
            # used to construct the coarse grained system from the atomistic one.
            # The function argument "mapping" could be used to use a default
            # mapping scheme in stead, like the mapping for the GROMOS96 force field.
            mapping = mapping or chain.mapping
            multi = False
            self.secstruc = chain.sstypes or len(chain) * "F"
            self.sequence = chain.sequence
            self.multiscale = False

        logging.debug(self.secstruc)
        logging.debug(self.sequence)

        # Fetch the base information
        # Pad with empty lists for atoms, bonds, angles
        # and dihedrals, and take the first five lists out
        # This will avoid errors for residues for which
        # these are not defined.

        sc = [
            (self.options["ForceField"].bases[res] + 8 * [[]])[:8]
            for res in self.sequence
        ]

        # ID of the first atom/residue
        # The atom number and residue number follow from the last
        # atom c.q. residue id in the list processed in the topology
        # thus far. In the case of multiscaling, the real atoms need
        # also be accounted for.
        startAtom = self.natoms + 1
        startResi = self.atoms and self.atoms[-1][2] + 1 or 1

        # Backbone bead atom IDs
        bbid = [[startAtom, startAtom + 1, startAtom + 2]]
        for i in sc:
            bbid1 = bbid[-1][0] + len(i[0]) + 3
            bbid.append([bbid1, bbid1 + 1, bbid1 + 2])

        # Residue numbers for this moleculetype topology
        resid = range(startResi, startResi + len(self.sequence))

        # This contains the information for deriving backbone bead types,
        # bb bond types, bbb/bbs angle types, and bbbb dihedral types.
        seqss = list(zip(bbid, self.sequence, self.secstruc))

        # Fetch the proper backbone beads
        # Since there are three beads we need to split these to the list
        bb = [self.options["ForceField"].bbGetBead(res, typ) for num, res, typ in seqss]
        bbMulti = [i for j in bb for i in j]

        # For backbone parameters, iterate over fragments, inferred from breaks
        for i, j in zip([0] + breaks, breaks + [-1]):
            # Extract the fragment
            frg = seqss[i:] if j == -1 else seqss[i:j]

            # Expand the 3 bb beads per residue into one long list
            # Resulting list contains three tuples per residue
            # We use the useless ca parameter to get the correct backbone bond from bbGetBond
            # The i is used to define ca because it conveniently here gives the right sequence 0,1,2,0,1,...
            frg = [(j[0][i], j[1], j[2], i) for j in frg for i in range(len(j[0]))]

            # Iterate over backbone bonds, two loops are needed because there are multipe cross bonds in the BB.
            # Since bonded interactions can return None, we first collect them separately and then
            # add the non-None ones to the list
            # This part assumes you have a linear backbone (checks only combinations that make sense for such)
            for ind, k in enumerate(frg):

                # Exclusions iterate over the second atom.
                # and then checked for each pair.
                for l in frg[ind : ind + 3]:
                    # excl = Exclusion((k,l), category="BB", options=self.options,)
                    # if excl:
                    #     self.exclusions.append(excl)
                    if l > k:
                        excl = Exclusion(
                            (k, l),
                            category="BB",
                            options=self.options,
                        )
                        self.exclusions.append(excl)

                # Pairs iterate over the second atom.
                # and then checked for each pair.
                for l in frg[ind : ind + 3]:
                    pair = Pair(
                        (k, l),
                        category="BB",
                        options=self.options,
                    )
                    if pair:
                        self.pairs.append(pair)

                # Bonds iterate over the second atom.
                for l in frg[ind : ind + 3]:
                    if l > k:
                        bond = Bond(
                            (k, l),
                            category="BB",
                            options=self.options,
                        )
                        self.bonds.append(bond)

                    # The angles need a third loop.
                    for m in frg[ind : ind + 4]:
                        if m > l > k:
                            angle = Angle(
                                (
                                    k,
                                    l,
                                    m,
                                ),
                                options=self.options,
                                category="BBB",
                            )
                            self.angles.append(angle)

                        # The dihedrals need a fourth loop.
                        for n in frg[ind + 1 : ind + 6]:
                            if n > m > l > k:
                                dihed = Dihedral(
                                    (
                                        k,
                                        l,
                                        m,
                                        n,
                                    ),
                                    options=self.options,
                                    category="BBBB",
                                )
                                if dihed.parameters == None:
                                    continue
                                if dihed.parameters[1] != 0:
                                    self.dihedrals.append(dihed)

        # Now do the atom list, and take the sidechains along
        #
        atid = startAtom
        # We need to do some trickery to get all 3 bb beads in to these lists
        # This adds each element to a list three times, feel free to shorten up
        residMulti = [i for i in resid for j in range(3)]
        sequenceMulti = [i for i in self.sequence for j in range(3)]
        scMulti = [i for i in sc for j in range(3)]
        secstrucMulti = [i for i in self.secstruc for j in range(3)]
        count = 0
        for resi, resname, bbb, sidechn, ss in zip(
            residMulti, sequenceMulti, bbMulti, scMulti, secstrucMulti
        ):
            # We only want one side chain per three backbone beads so this skips the others
            if (count % 3) == 0:
                # Note added impropers in contrast to aa
                (
                    scatoms,
                    bon_par,
                    ang_par,
                    dih_par,
                    imp_par,
                    vsite_par,
                    excl_par,
                    pair_par,
                ) = sidechn

                # Side chain bonded terms
                # Collect bond, angle and dihedral connectivity
                # Impropers needed to be added here for DNA
                bon_con, ang_con, dih_con, imp_con, vsite_con, excl_con, pair_con = (
                    self.options["ForceField"].base_connectivity[resname] + 7 * [[]]
                )[:7]
                # Ill try to fix the exclusions by using separate connectivity record for DNA
                # bon_con,ang_con,dih_con,imp_con,vsite_con = (self.options['ForceField'].connectivity[resname]+5*[[]])[:5]

                # Side Chain Bonds/Constraints
                for atids, par in zip(bon_con, bon_par):
                    if par[2] == 100000.00000:
                        self.bonds.append(
                            Bond(
                                options=self.options,
                                atoms=atids,
                                parameters=[par[1]],
                                type=par[0],
                                comments=resname,
                                category="Constraint",
                            )
                        )
                    else:
                        bond = Bond(
                            options=self.options,
                            atoms=atids,
                            parameters=par[1:],
                            type=par[0],
                            comments=resname,
                            category="SC",
                        )
                        self.bonds.append(bond)
                    # Shift the atom numbers
                    self.bonds[-1] += atid

                # Side Chain Angles
                for atids, par in zip(ang_con, ang_par):
                    self.angles.append(
                        Angle(
                            options=self.options,
                            atoms=atids,
                            parameters=par[1:],
                            type=par[0],
                            comments=resname,
                            category="SC",
                        )
                    )
                    # Shift the atom numbers
                    self.angles[-1] += atid

                # Side Chain Dihedrals
                for atids, par in zip(dih_con, dih_par):
                    self.dihedrals.append(
                        Dihedral(
                            options=self.options,
                            atoms=atids,
                            parameters=par[1:],
                            type=par[0],
                            comments=resname,
                            category="BSC",
                        )
                    )
                    # Shift the atom numbers
                    self.dihedrals[-1] += atid

                # Side Chain Impropers
                for atids, par in zip(imp_con, imp_par):
                    self.dihedrals.append(
                        Dihedral(
                            options=self.options,
                            atoms=atids,
                            parameters=par[1:],
                            type=par[0],
                            comments=resname,
                            category="SC",
                        )
                    )
                    # Shift the atom numbers
                    self.dihedrals[-1] += atid

                # Side Chain V-Sites
                for atids, par in zip(vsite_con, vsite_par):
                    self.vsites.append(
                        Vsite(
                            options=self.options,
                            atoms=atids,
                            parameters=par,
                            comments=resname,
                            category="SC",
                        )
                    )
                    self.vsites[-1] += atid  # Shift the atom numbers

                # Side Chain Exclusions
                for atids, par in zip(excl_con, excl_par):
                    exclusion = Exclusion(
                        options=self.options, atoms=atids, parameters=" ", category="SC"
                    )
                    self.exclusions.append(exclusion)
                    self.exclusions[-1] += atid  # Shift the atom numbers

                # Pairs
                for atids, par in zip(pair_con, pair_par):
                    pair = Pair(options=self.options, atoms=atids, parameters=" ")
                    self.pairs.append(pair)
                    self.pairs[-1] += atid  # Shift the atom numbers

                # All residue atoms
                counter = 0  # Counts over beads
                # Need to tweak this to get all the backbone beads to the list with the side chain
                bbbset = [bbMulti[count], bbMulti[count + 1], bbMulti[count + 2]]
                for atype, aname in zip(
                    bbbset + list(scatoms), CoarseGrained.residue_bead_names_dna
                ):
                    # self.atoms.append((atid,atype,resi,resname,aname,atid,self.options['ForceField'].getCharge(atype,aname),ss))
                    if atid in [vSite.atoms[0] for vSite in self.vsites]:
                        charge = self.options["ForceField"].getCharge(atype, aname)
                        mass = 0
                        self.atoms.append(
                            (atid, atype, resi, resname, aname, atid, charge, mass, ss)
                        )
                    else:
                        charge = self.options["ForceField"].getCharge(atype, aname)
                        if aname == "BB1":
                            mass = 72
                        elif aname == "BB2" or aname == "BB3":
                            mass = 60
                        else:
                            mass = 36
                        self.atoms.append(
                            (atid, atype, resi, resname, aname, atid, charge, mass, ss)
                        )
                    # Doing this here saves going over all the atoms onesmore.
                    # Generate position restraints for all atoms or Backbone beads only. @POSRES
                    if "all" in self.options["PosRes"]:
                        if (
                            aname == "BB1"
                            or aname == "BB2"
                            or aname == "BB3"
                            or aname == "SC1"
                        ) and atid - 1 > 1:
                            self.posres.append((atid - 1))
                    if "bb" in self.options["PosRes"]:  # @POSRES
                        if (aname == "BB2") and atid - 1 > 1:
                            self.posres.append((atid - 1))
                    if mapping:
                        self.mapping.append(
                            (atid, [i + shift for i in mapping[counter]])
                        )
                    atid += 1
                    counter += 1
            count += 1

        # One more thing, we need to remove dihedrals (2) and an angle (1)  that reach beyond the 3' end
        # This is stupid to do now but the total number of atoms seems not to be available before
        # This iterate the list in reverse order so that removals don't affect later checks
        for i in range(len(self.dihedrals) - 1, -1, -1):
            if max(self.dihedrals[i].atoms) > self.atoms[-1][0]:
                del self.dihedrals[i]
        for i in range(len(self.angles) - 1, -1, -1):
            if max(self.angles[i].atoms) > self.atoms[-1][0]:
                del self.angles[i]
        for i in range(len(self.bonds) - 1, -1, -1):
            if max(self.bonds[i].atoms) > self.atoms[-1][0]:
                del self.bonds[i]

        # The first residue of DNA (The 5' end) has a backbone that is only two beads. This is ugly
        # but we'll save our headache of changing the whole internal logic.
        for i in range(len(self.bonds) - 1, -1, -1):
            if 1 in self.bonds[i].atoms:
                del self.bonds[i]
            else:
                self.bonds[i].atoms = tuple([j - 1 for j in self.bonds[i].atoms])
        for i in range(len(self.angles) - 1, -1, -1):
            if 1 in self.angles[i].atoms:
                del self.angles[i]
            else:
                self.angles[i].atoms = tuple([j - 1 for j in self.angles[i].atoms])
        for i in range(len(self.dihedrals) - 1, -1, -1):
            if 1 in self.dihedrals[i].atoms:
                del self.dihedrals[i]
            else:
                self.dihedrals[i].atoms = tuple(
                    [j - 1 for j in self.dihedrals[i].atoms]
                )
        for i in range(len(self.vsites) - 1, -1, -1):
            if 1 in self.vsites[i].atoms:
                del self.vsites[i]
            else:
                self.vsites[i].atoms = tuple([j - 1 for j in self.vsites[i].atoms])
        for i in range(len(self.exclusions) - 1, -1, -1):
            if 1 in self.exclusions[i].atoms:
                del self.exclusions[i]
            else:
                self.exclusions[i].atoms = tuple(
                    [j - 1 for j in self.exclusions[i].atoms]
                )
        for i in range(len(self.pairs) - 1, -1, -1):
            if 1 in self.pairs[i].atoms:
                del self.pairs[i]
            else:
                self.pairs[i].atoms = tuple([j - 1 for j in self.pairs[i].atoms])

        # Remove the first bead and shift the numbers of all others by one.
        del self.atoms[0]
        for i in range(len(self.atoms)):
            self.atoms[i] = tuple(
                [self.atoms[i][0] - 1] + [j for j in self.atoms[i][1:]]
            )
        enStrandLengths.append(self.atoms[-1][0])

        # Remove the last posres, faster than removing first and shifting all.
        if len(self.posres) > 0:
            del self.posres[-1]

        # The rubber bands are best applied outside of the chain class, as that gives
        # more control when chains need to be merged. The possibility to do it on the
        # chain level is retained to allow building a complete chain topology in
        # a straightforward manner after importing this script as module.
        if rubber and chain:
            rubberList = rubberBands(
                [
                    (i[0], j[4:7])
                    for i, j in zip(self.atoms, chain.cg())
                    if i[4] in ElasticBeads
                ],
                ElasticLowerBound,
                ElasticUpperBound,
                ElasticDecayFactor,
                ElasticDecayPower,
                ElasticMaximumForce,
                ElasticMinimumForce,
            )

            # self.bonds.extend([Bond(i,options=self.options,type=6,category="Rubber band") for i in rubberList])


[docs]
    def fromMoleculeList(self, other):
        pass




#############
## 8 # MAIN #  -> @MAIN <-
#############
import sys, logging, random, math, os, re



[docs]
def main(options):
    # Check whether to read from a gro/pdb file or from stdin
    # We use an iterator to wrap around the stream to allow
    # inferring the file type, without consuming lines already
    inStream = streamTag(options["-f"] and options["-f"].value or sys.stdin)
    fileType = next(inStream)
    frameIterator = pdbFrameIterator

    ## ITERATE OVER FRAMES IN STRUCTURE FILE ##

    # Now iterate over the frames in the stream
    # This should become a StructureFile class with a nice .next method
    model = 1
    cgOutPDB = None
    ssTotal = []
    cysteines = []

    for title, atoms, box in frameIterator(inStream):

        if fileType == "PDB":
            # The PDB file can have chains, in which case we list and process them specifically
            # TER statements are also interpreted as chain separators
            # A chain may have breaks in which case the breaking residues are flagged
            chains = [
                Chain(options, [i for i in residues(chain)])
                for chain in pdbChains(atoms)
            ]

        # Check the chain identifiers
        if model == 1 and len(chains) != len(set([i._id for i in chains])):
            # Ending down here means that non-consecutive blocks of atoms in the
            # PDB file have the same chain ID. The warning pertains to PDB files only,
            # since chains from GRO files get a unique chain identifier assigned.
            logging.warning(
                "Several chains have identical chain identifiers in the PDB file."
            )

        n = 1
        logging.info("Found %d chains:" % len(chains))
        for chain in chains:
            logging.info(
                "  %2d:   %s (%s), %d atoms in %d residues."
                % (n, chain._id, chain._type, chain.natoms, len(chain))
            )
            n += 1

        # Check which chains need merging
        if model == 1:
            order, merge = check_merge(
                chains,
                options["mergeList"],
                options["linkList"],
                options["CystineCheckBonds"] and options["CystineMaxDist2"],
            )

        # Get the total length of the sequence
        seqlength = sum([len(chain) for chain in chains])
        logging.info("Total size of the system: %s residues." % seqlength)

        ## SECONDARY STRUCTURE
        ss = ""
        if options["Collagen"]:
            for chain in chains:
                chain.set_ss("F")
                ss += chain.ss
            # Now set the secondary structure for each of the chains
            sstmp = ss
            for chain in chains:
                ln = min(len(sstmp), len(chain))
                chain.set_ss(sstmp[:ln])
                sstmp = ss[:ln]
        # Collect the secondary structure classifications for different frames
        ssTotal.append(ss)

        # Write the coarse grained structure if requested
        if options["-x"].value:
            logging.info("Writing coarse grained structure.")
            if cgOutPDB == None:
                cgOutPDB = open(options["-x"].value, "w")
            cgOutPDB.write("MODEL %8d\n" % model)
            cgOutPDB.write(title)
            cgOutPDB.write(pdbBoxString(box))
            atid = 1
            write_start = 0
            for i in order:
                ci = chains[i]
                coarseGrained = ci.cg(com=True)
                if coarseGrained:
                    # For DNA we need to remove the first bead on the 5' end and shift the atids.
                    if ci.type() == "Nucleic":
                        write_start = 1
                    else:
                        write_start = 0
                    for name, resn, resi, chain, x, y, z, ssid in coarseGrained[
                        write_start:
                    ]:
                        insc = resi >> 20
                        resi -= insc << 20
                        cgOutPDB.write(
                            pdbAtomLine
                            % (
                                atid,
                                name,
                                resn[:3],
                                chain,
                                resi,
                                chr(insc),
                                x,
                                y,
                                z,
                                1,
                                ssid,
                            )
                        )
                        atid += 1
                    cgOutPDB.write("TER\n")
                else:
                    logging.warning(
                        "No mapping for coarse graining chain %s (%s); chain is skipped."
                        % (ci._id, ci.type())
                    )
            cgOutPDB.write("ENDMDL\n")

        # Gather cysteine sulphur coordinates
        cyslist = [cys["SG"] for chain in chains for cys in chain["CYS"]]
        cysteines.append([cys for cys in cyslist if cys])
        model += 1

    # Evertything below here we only need, if we need to write a Topology
    if options["-o"]:
        # Collect the secondary structure stuff and decide what to do with it
        # First rearrange by the residue
        ssTotal = zip(*ssTotal)
        ssAver = []
        for i in ssTotal:
            si = list(set(i))
            if len(si) == 1:
                # Only one type -- consensus
                ssAver.append(si[0])
            else:
                # Transitions between secondary structure types
                i = list(i)
                si = [(1.0 * i.count(j) / len(i), j) for j in si]
                si.sort()
                if si[-1][0] > options["-ssc"].value:
                    ssAver.append(si[-1][1])
                else:
                    ssAver.append(" ")

        ssAver = "".join(ssAver)
        logging.info(
            "(Average) Secondary structure has been determined (see head of .itp-file)."
        )

        # Divide the secondary structure according to the division in chains
        # This will set the secondary structure types to be used for the
        # topology.
        for chain in chains:
            chain.set_ss(ssAver[: len(chain)])
            ssAver = ssAver[len(chain) :]

        # Now the chains are complete, each consisting of a residuelist,
        # and a secondary structure designation if the chain is of type 'Protein'.
        # There may be mixed chains, there may be HETATM things.
        # Water has been discarded. Maybe this has to be changed at some point.
        # The order in the coarse grained files matches the order in the set of chains.
        #
        # If there are no merges to be done, i.e. no global Elnedyn network, no
        # disulphide bridges, no links, no distance restraints and no explicit merges,
        # then we can write out the topology, which will match the coarse grained file.
        #
        # If there are merges to be done, the order of things may be changed, in which
        # case the coarse grained structure will not match with the topology...

        ## REAL ITP STUFF ##
        # Check whether we have identical chains, in which case we
        # only write the ITP for one...
        # This means making a distinction between chains and
        # moleculetypes.

        molecules = [tuple([chains[i] for i in j]) for j in merge]

        # At this point we should have a list or dictionary of chains
        # Each chain should be given a unique name, based on the value
        # of options["-o"] combined with the chain identifier and possibly
        # a number if there are chains with identical identifiers.
        # For each chain we then write an ITP file using the name for
        # moleculetype and name + ".itp" for the topology include file.
        # In addition we write a master topology file, using the value of
        # options["-o"], with an added extension ".top" if not given.

        # XXX *NOTE*: This should probably be gathered in a 'Universe' class
        itp = 0
        moleculeTypes = {}
        cumulative_atoms = 0
        for mi, mol in enumerate(molecules):
            # Check if the moleculetype is already listed
            # If not, generate the topology from the chain definition
            if not mol in moleculeTypes or options["SeparateTop"]:
                # Name of the moleculetype
                # NOTE: The naming should be changed; now it becomes Protein_X+Protein_Y+...
                name = "+".join(
                    [chain.getname(options["-name"].value) for chain in mol]
                )
                moleculeTypes[mol] = name
                # Write the molecule type topology
                top = Topology(mol[0], options=options, name=name)
                # This merges topologies, properties how adding happens in Topology method __iadd__
                for m in mol[1:]:
                    top += Topology(m, options=options)

                # Have to add the connections, like the connecting network
                # Gather coordinates
                mcg, coords = zip(
                    *[(j[:4], j[4:7]) for m in mol for j in m.cg(force=True)]
                )
                mcg = list(mcg)

                # Elastic Network
                # The elastic network is added after the topology is constructed, since that
                # is where the correct atom list with numbering and the full set of
                # coordinates for the merged chains are available.
                # For Nucleic have to watch out for the missing first bead again.
                # THIS IS BANDAID FOR NOW, ONLY WORKS FOR TWO CHAINS
                if name.split("_")[0] == "Nucleic":
                    strands = len(enStrandLengths)
                    cuts = [enStrandLengths[0] + 1]
                    nucleic_coords = coords[1 : cuts[0]]
                    if strands != 1:
                        for i in range(1, strands):
                            cuts.append(cuts[-1] + enStrandLengths[i] + 1)
                        for i in range(1, strands):
                            nucleic_coords += coords[cuts[i - 1] + 1 : cuts[i]]
                    if options["ElasticNetwork"]:
                        rubberType = options["ForceField"].EBondType
                        rubberList = rubberBands(
                            [
                                (i[0], j)
                                for i, j in zip(top.atoms, nucleic_coords)
                                if i[4] in options["ElasticBeads"]
                            ],
                            options["ElasticLowerBound"],
                            options["ElasticUpperBound"],
                            options["ElasticDecayFactor"],
                            options["ElasticDecayPower"],
                            options["ElasticMaximumForce"],
                            options["ElasticMinimumForce"],
                        )
                        top.bonds.extend(
                            [
                                Bond(
                                    i,
                                    options=options,
                                    type=rubberType,
                                    category="Rubber band",
                                )
                                for i in rubberList
                            ]
                        )
                else:
                    if options["ElasticNetwork"]:
                        print(options["ElasticBeads"])
                        print(top.atoms[0])
                        rubberType = options["ForceField"].EBondType
                        rubberList = rubberBands(
                            [
                                (i[0], j)
                                for i, j in zip(top.atoms, coords)
                                if i[4] in options["ElasticBeads"]
                            ],
                            options["ElasticLowerBound"],
                            options["ElasticUpperBound"],
                            options["ElasticDecayFactor"],
                            options["ElasticDecayPower"],
                            options["ElasticMaximumForce"],
                            options["ElasticMinimumForce"],
                        )
                        top.bonds.extend(
                            [
                                Bond(
                                    i,
                                    options=options,
                                    type=rubberType,
                                    category="Rubber band",
                                )
                                for i in rubberList
                            ]
                        )

                # Write out the MoleculeType topology
                destination = (
                    options["-o"]
                    and open(moleculeTypes[mol] + ".itp", "w")
                    or sys.stdout
                )
                destination.write(str(top))

                itp += 1

            # Check whether other chains are equal to this one
            # Skip this step if we are to write all chains to separate moleculetypes
            if not options["SeparateTop"]:
                for j in range(mi + 1, len(molecules)):
                    if not molecules[j] in moleculeTypes and mol == molecules[j]:
                        # Molecule j is equal to a molecule mi
                        # Set the name of the moleculetype to the one of that molecule
                        moleculeTypes[molecules[j]] = moleculeTypes[mol]

            # For the parameterization index files we need to update the cumulative number of atoms.
            # For parameterization option SepareteTop should always be used because otherwise the numbering will fail.
            cumulative_atoms += len(top.atoms)

        logging.info("Written %d ITP file%s" % (itp, itp > 1 and "s" or ""))

    # The following lines are always printed (if no errors occur).
    print("\n\tThere you are. One MARTINI. Shaken, not stirred.\n")
    Q = martiniq.pop(random.randint(0, len(martiniq) - 1))
    print("\n", Q[1], "\n%80s" % ("--" + Q[0]), "\n")



if __name__ == "__main__":
    import sys, logging

    args = sys.argv[1:]
    options, lists = options, lists
    options = option_parser(args, options, lists, version)
    main(options)