"""
Description:
This module computes the persistence length of a nucleic acid system by
calculating angles between consecutive bond vectors in a coarse-grained model.
It provides functions to read a structure from a PDB file, compute geometric
measures (angles, rotation matrices), and finally plot the average angles.
Author: DY
"""
import matplotlib.pyplot as plt
import numpy as np
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB.PDBParser import PDBParser
[docs]
def get_structure_cif(cif_id="path/to/cif"):
"""
Read a structure from a CIF file.
Args:
cif_id (str): Path to the CIF file.
Returns:
structure: A Bio.PDB structure object.
"""
parser = MMCIFParser()
structure = parser.get_structure("structure", cif_id)
return structure
[docs]
def get_structure_pdb(pdb_id="path/to/pdb"):
"""
Read a structure from a PDB file.
Args:
pdb_id (str): Path to the PDB file.
Returns:
structure: A Bio.PDB structure object.
"""
parser = PDBParser()
structure = parser.get_structure("structure", pdb_id)
return structure
[docs]
def get_residues(model):
"""
Extract all residues from a model.
Args:
model: A Bio.PDB model object.
Returns:
list: A list of residue objects.
"""
result = []
for chain in model:
for _, residue in enumerate(chain.get_unpacked_list()):
result.append(residue)
return result
[docs]
def get_resid(residue):
"""
Extract the residue id from the string representation of a residue.
Args:
residue: A Bio.PDB residue object.
Returns:
int: The residue id.
"""
# Use the built-in repr function instead of __repr__()
rep = repr(residue)
try:
# Expecting a format like "... resid=123 ..."
return int(rep.split()[3].split("=")[1])
except (IndexError, ValueError):
raise ValueError("Could not extract residue id from: " + rep)
[docs]
def get_atoms_by_name(residues, atom_name="BB3"):
"""
Select atoms with a given name from a list of residues.
Args:
residues (list): List of residue objects.
atom_name (str): Name of the atom to select (default "BB3").
Returns:
tuple: A tuple (atoms, resids) where atoms is a list of matching atom objects,
and resids is a list of corresponding residue ids.
"""
atoms = []
resids = []
for residue in residues:
rid = get_resid(residue)
for atom in residue.get_atoms():
if atom.get_name() == atom_name:
atoms.append(atom)
resids.append(rid)
return atoms, resids
[docs]
def get_coords(atoms):
"""
Get the coordinates for a list of atoms.
Args:
atoms (list): List of atom objects.
Returns:
list: List of coordinate arrays.
"""
return [atom.get_coord() for atom in atoms]
[docs]
def get_angle(v1, v2):
"""
Compute the angle (in degrees) between two vectors.
Args:
v1: First vector.
v2: Second vector.
Returns:
float: Angle in degrees.
"""
dot_product = np.dot(v1, v2)
magnitude_v1 = np.linalg.norm(v1)
magnitude_v2 = np.linalg.norm(v2)
cos_theta = dot_product / (magnitude_v1 * magnitude_v2)
angle_radians = np.arccos(np.clip(cos_theta, -1.0, 1.0))
angle_degrees = np.degrees(angle_radians)
return angle_degrees
[docs]
def rotation_matrix_from_vectors(vec1, vec2):
"""
Find the rotation matrix that aligns vec1 to vec2.
Args:
vec1 (array-like): Source vector.
vec2 (array-like): Destination vector.
Returns:
np.ndarray: A 3x3 rotation matrix.
"""
if (vec1 == vec2).all():
return np.eye(3)
a = vec1 / np.linalg.norm(vec1)
b = vec2 / np.linalg.norm(vec2)
v = np.cross(a, b)
c = np.dot(a, b)
s = np.linalg.norm(v)
kmat = np.array([[0, -v[2], v[1]],
[v[2], 0, -v[0]],
[-v[1], v[0], 0]])
rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s**2))
return rotation_matrix
[docs]
def persistence_length(structure):
"""
Calculate and plot the persistence length from a structure.
For each model in the structure, the function computes the angle between
bond vectors (taken from atoms with name "BB3") after aligning them with
a reference vector. The average angles are then plotted and saved to a file.
Args:
structure: A Bio.PDB structure object.
"""
all_angles = []
global_vec0 = None
global_rmats = None
for idx, model in enumerate(structure):
residues = get_residues(model)
atoms, _ = get_atoms_by_name(residues, atom_name="BB3")
coords = np.array(get_coords(atoms))
if len(coords) < 11:
continue # Not enough data
vecs = coords[1:] - coords[:-1]
# For the first model, set the reference vector and rotation matrices.
if idx == 0:
rvec = vecs[10]
global_vec0 = vecs[0]
global_rmats = [rotation_matrix_from_vectors(vec, rvec) for vec in vecs]
# Use global values from the first model
rvec = vecs[10]
# Apply rotation matrices to align bond vectors.
vecs_trans = np.einsum("ijk,ik->ij", global_rmats, vecs)
# Compute angles between each transformed vector and the reference vector.
angles = [get_angle(vecs_trans[i], rvec) for i in range(len(vecs_trans))]
all_angles.append(angles)
if not all_angles:
print("No valid angle data found.", file=plt.sys.stderr)
return
all_angles = np.array(all_angles)
av_angles = np.average(all_angles, axis=0)
print(av_angles)
fig = plt.figure(figsize=(16.0, 6.0))
plt.plot(np.arange(len(av_angles))[:200], np.abs(av_angles)[:200])
fig.savefig("pers_length.png")
plt.close()
[docs]
def test_rmats():
"""
Test function for rotation_matrix_from_vectors.
"""
vecs = np.array([[1, 2, 3], [3, 2, 1], [4, 2, 6]])
rmats = [rotation_matrix_from_vectors(vec, vecs[2]) for vec in vecs]
print(np.einsum("ijk,ik->ij", rmats, vecs))
[docs]
def main():
"""
Main function to compute persistence length.
"""
wdir = "systems/100bpRNA/mdrun"
structure = get_structure_pdb(f"{wdir}/md.pdb")
persistence_length(structure)
if __name__ == "__main__":
main()