Source code for continuousflex.protocols.utilities.pdb_parser

#!/usr/bin/env python3
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# m_inout.py  - Library for coordinate input and output
# Part of the ModeHunter package, http://modehunter.biomachina.org
#
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# Request to cite published literature:
#
# When using this software in scholarly work please cite the publications(s)
# posted at http://modehunter.biomachina.org/fref.html
#
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# Legal notice:
#
# This software is copyrighted, (c) 2009, by Joseph N. Stember and Willy Wriggers
# under the following terms:
#
# The authors hereby grant permission to use, copy, modify, and re-distribute this
# software and its documentation for any purpose, provided that existing copyright
# notices are retained in all copies and that this notice is included verbatim in
# any distributions. No written agreement, license, or royalty fee is required for
# any of the authorized uses.
#
# IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR DIRECT,
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE
# OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY DERIVATIVES THEREOF, EVEN IF THE
# AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING,
# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE IS PROVIDED ON AN "AS
# IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

import numpy
import sys
import copy

# global list with chemical element names
element_name = ["H", "HE", "LI", "BE", "B", "C", "N", "O", "F", "NE",
                "NA", "MG", "AL", "SI", "P", "S", "CL", "AR", "K", "CA",
                "SC", "TI", "V", "CR", "MN", "FE", "CO", "NI", "CU", "ZN"]

# global list with corresponding atom masses
element_mass = [1.008, 4.003, 6.940, 9.012, 10.810, 12.011, 14.007, 16.000, 18.998, 20.179,
                22.990, 24.305, 26.982, 28.086, 30.974, 32.060, 35.453, 39.948, 39.102, 40.080,
                44.956, 47.880, 50.942, 51.996, 54.938, 55.847, 58.933, 58.700, 63.546, 65.380]


# global PDB class with data required structure
[docs]class PDB(object):
    def __init__(self, recd="ATOM  ", serial=1, type="  ", loc="  ", alt=" ", res="    ", chain=" ", seq=1, icode=" ",
                 x=0.0, y=0.0, z=0.0, occupancy=0.0, beta=0.0, footnote=0, segid="    ", element="  ", charge="  ",
                 weight=0.0):
        self.recd = recd  # 1- 6
        self.serial = serial  # 7-11
        self.type = type  # 13-14
        self.loc = loc  # 15-16
        self.alt = alt  # 17
        self.res = res  # 18-21
        self.chain = chain  # 22
        self.seq = seq  # 23-26
        self.icode = icode  # 27
        self.x = x  # 31-38
        self.y = y  # 39-46
        self.z = z  # 47-54
        self.occupancy = occupancy  # 55-60
        self.beta = beta  # 61-66
        self.footnote = footnote  # 68-70
        self.segid = segid  # 73-76
        self.element = element  # 77-78
        self.charge = charge  # 79-80
        self.weight = weight  # mass of atom assigned by read_pdb


# functions

[docs]def m_inout_read_pdb(filename):
    "Situs 2.x style pdb parser"

    pdb_list = []
    f = open(filename, 'r')
    for line in f:
        test_recd = line[0:5]

        # if test_recd.upper().strip() == "ATOM" or test_recd.upper() == "HETATM":
        if ((test_recd.upper().strip() == "ATOM") or (test_recd.upper().strip() == "HETATM")):
            # create new atom and assign standard fields
            new_atom = PDB()
            new_atom.recd = test_recd
            try:
                new_atom.serial = int(line[4:11])
            except:
                pass
            try:
                new_atom.type = line[12:14]
            except:
                pass
            try:
                new_atom.loc = line[14:16]
            except:
                pass
            try:
                new_atom.alt = line[16]
            except:
                pass
            try:
                new_atom.res = line[17:21]
            except:
                pass
            try:
                new_atom.chain = line[21]
            except:
                pass
            try:
                new_atom.seq = int(line[22:26])
            except:
                pass
            try:
                new_atom.icode = line[26]
            except:
                pass
            try:
                new_atom.x = float(line[30:38])
            except:
                pass
            try:
                new_atom.y = float(line[38:46])
            except:
                pass
            try:
                new_atom.z = float(line[46:54])
            except:
                pass
            try:
                new_atom.occupancy = float(line[54:60])
            except:
                pass
            try:
                new_atom.beta = float(line[60:66])
            except:
                pass
            try:
                new_atom.footnote = int(line[67:70])
            except:
                pass
            try:
                new_atom.segid = line[72:76]
            except:
                pass
            try:
                new_atom.element = line[76:78]
            except:
                pass
            try:
                new_atom.charge = line[78:80]
            except:
                pass
            # compute robust Situs 2.x style mass weight and append atom
            if (new_atom.type == "QV" and new_atom.loc == "OL") or (new_atom.type == "QP" and new_atom.loc == "DB"):
                new_atom.weight = 0.0  # codebook vectors have zero mass
            elif (new_atom.type == "DE" and new_atom.loc == "NS"):
                new_atom.weight = new_atom.occupancy  # volumetric map density mass assigned from occupancy field
            elif (new_atom.type.lstrip()[0] == "H"):
                new_atom.weight = element_mass[0]  # quick hack for hydrogens, may return false positives for Hg, Hf, Ho
            elif (new_atom.type.strip in element_name):
                new_atom.weight = element_mass[element_name.index(new_atom.type.strip)]  # standard element
            elif (new_atom.type.strip()[0] in element_name):
                new_atom.weight = element_mass[
                    element_name.index(new_atom.type.strip()[0])]  # catches frequent elements, C N O P S
            else:
                # Slavica: Mass of other atoms (eg, 1H,2H,3H, 1HH1, 2HH1, 1HD2, 2HD2 etc) to be incorporated later
                # print "Warning: atom %i: unable to identify atom type %s, assigning carbon mass" % (len(pdb_list)+1,new_atom.type)
                new_atom.weight = element_mass[5]
            pdb_list.append(new_atom)

    f.close()
    return pdb_list


[docs]def m_inout_write_pdb(pdb_list, filename, remarks):
    "pdb writer, Situs 2.x convention, renumbering atoms"

    f = open(filename, 'w')
    f.write("REMARKS " + remarks + "\n")

    for i in range(len(pdb_list)):
        f.write(pdb_list[i].recd.strip()[:6].ljust(6))
        # renumber atoms, ignoring .serial
        if ((i + 1) // 100000 == 0):
            f.write("%5d" % (i + 1))
        else:
            f.write("%05d" % (i + 1 - ((i + 1) // 100000) * 100000))
        f.write(" ")
        f.write(pdb_list[i].type.strip()[:2].rjust(2))
        f.write(pdb_list[i].loc.strip()[:2].ljust(2))
        f.write(pdb_list[i].alt.strip()[:1].ljust(1))
        if (len(pdb_list[i].res) < 4):
            f.write(pdb_list[i].res.strip().rjust(3))
            f.write(" ")
        else:
            f.write(pdb_list[i].res[:4])
        f.write(pdb_list[i].chain.strip()[:1].ljust(1))
        f.write("%4d" % pdb_list[i].seq)
        f.write(pdb_list[i].icode.strip()[:1].ljust(1))
        f.write("   ")
        f.write("%8.3f" % pdb_list[i].x)
        f.write("%8.3f" % pdb_list[i].y)
        f.write("%8.3f" % pdb_list[i].z)
        f.write("%6.2f" % pdb_list[i].occupancy)
        f.write("%6.2f" % pdb_list[i].beta)
        f.write(" ")
        f.write("%3d" % pdb_list[i].footnote)
        f.write("  ")
        f.write(pdb_list[i].segid.strip()[:4].ljust(4))
        f.write(pdb_list[i].element.strip()[:2].rjust(2))
        f.write(pdb_list[i].charge.strip()[:2].rjust(2))
        f.write("\n")
    f.close()


[docs]def m_inout_write_pdb_sampled(pdb_name, filename, step, thr_mass):
    "pdb writer, Situs 2.x convention, renumbering atoms"

    f = open(filename, 'w')

    pdb_list = m_inout_read_pdb(pdb_name)

    for i in range(0, len(pdb_list), step):
        if pdb_list[i].beta > thr_mass:
            f.write(pdb_list[i].recd.strip()[:6].ljust(6))
            # renumber atoms, ignoring .serial
            if ((i + 1) // 100000 == 0):
                f.write("%5d" % (i + 1))
            else:
                f.write("%05d" % (i + 1 - ((i + 1) // 100000) * 100000))
            f.write(" ")
            f.write(pdb_list[i].type.strip()[:2].rjust(2))
            f.write(pdb_list[i].loc.strip()[:2].ljust(2))
            f.write(pdb_list[i].alt.strip()[:1].ljust(1))
            if (len(pdb_list[i].res) < 4):
                f.write(pdb_list[i].res.strip().rjust(3))
                f.write(" ")
            else:
                f.write(pdb_list[i].res[:4])
            f.write(pdb_list[i].chain.strip()[:1].ljust(1))
            f.write("%4d" % pdb_list[i].seq)
            f.write(pdb_list[i].icode.strip()[:1].ljust(1))
            f.write("   ")
            f.write("%8.3f" % pdb_list[i].x)
            f.write("%8.3f" % pdb_list[i].y)
            f.write("%8.3f" % pdb_list[i].z)
            f.write("%6.2f" % pdb_list[i].occupancy)
            f.write("%6.2f" % pdb_list[i].beta)
            f.write(" ")
            f.write("%3d" % pdb_list[i].footnote)
            f.write("  ")
            f.write(pdb_list[i].segid.strip()[:4].ljust(4))
            f.write(pdb_list[i].element.strip()[:2].rjust(2))
            f.write(pdb_list[i].charge.strip()[:2].rjust(2))
            f.write("\n")
    f.close()


[docs]def m_inout_carbon_alphas(pdb_list):
    "return 0-based indices of carbon alphas in pdb_list"

    cas = []
    for i in range(len(pdb_list)):
        if (pdb_list[i].type == " C" and pdb_list[i].loc == "A "):
            cas.append(i)
    return cas


[docs]def m_inout_import_coords(filename):
    "get x y z coords from PDB file"
    coords = numpy.array([])
    pdb_list = m_inout_read_pdb(filename)
    for i in numpy.arange(len(pdb_list)):
        coords = numpy.append(coords, [pdb_list[i].x, pdb_list[i].y, pdb_list[i].z])
    return coords


[docs]def m_inout_import_bfact(filename):
    "get Bfactor from PDB file"
    bfact = numpy.array([])
    pdb_list = m_inout_read_pdb(filename)
    for i in numpy.arange(len(pdb_list)):
        bfact = numpy.append(bfact, [pdb_list[i].beta])
    return bfact