Source code for chimera.protocols.protocol_contacts
# **************************************************************************
# *
# * Authors: Marta Martinez (mmmtnez@cnb.csic.es)
# * Roberto Marabini (roberto@cnb.csic.es)
# *
# * L'Institut de genetique et de biologie moleculaire et cellulaire (IGBMC)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
from pwem.protocols import EMProtocol
from pyworkflow.object import Boolean
from pwem.constants import (SYM_DIHEDRAL_X)
from ..convert import CHIMERA_LIST
from ..constants import (CHIMERA_SYM_NAME, CHIMERA_I222)
from pyworkflow.protocol.params import (EnumParam,
IntParam,
PointerParam,
StringParam,
FloatParam,
LEVEL_ADVANCED, BooleanParam)
import sqlite3
import json
import collections
import os
from pwem.viewers.viewer_chimera import Chimera
from .. import Plugin
from operator import itemgetter
from pyworkflow.utils import red
[docs]class ChimeraProtContacts(EMProtocol):
"""Identifies interatomic clashes and contacts based on van der Waals radii
"""
_label = 'contacts'
_program = ""
commandDropView = """DROP view IF EXISTS {viewName}"""
TetrahedralOrientation = ['222', 'z3']
def __init__(self, **args):
EMProtocol.__init__(self, **args)
self.SYMMETRY = Boolean(True)
def _defineParams(self, form):
form.addSection(label='Input')
# pdbFileToBeRefined name is needed by the wizard. Do not change it
form.addParam('pdbFileToBeRefined', PointerParam, pointerClass="AtomStruct",
label='Atomic Structure:', allowsNull=True,
important=True,
help="Input atomic structure.")
form.addParam('chainStructure', StringParam, default="",
label='Chain Labeling',
help="Dictionary that maps chains to labels.\n"
"Example: {'A':'h1', 'B':'h1', 'E':'h2'}\n"
"Contacts are calculated between two chains with distinct "
"labels. Two chains with the same label are considered as "
"a group. Contacts will be computed between any chain included "
"in this group and any other group/chain. However, no contacts "
"among members of the group will be calculated.")
form.addParam('applySymmetry', BooleanParam,
label="Apply symmetry:", default=True,
help="'Symmetry = Yes' indicates that symmetry will be applied, and then"
" contacts will be computed between any two chains of the "
"atomic structure (the unit cell) "
"and between a chain of the unit cell and another chain of a "
"neigbour unit cell. Output results will show only non "
"redundant contatcs, i.e., contacts than you can infer by"
" symmetry will not be shown.\n'Symmetry = No' indicates that "
"symmetry will not be applied, and then "
"contacts will only be calculated between chains within the "
"atomic structure. Output results will show all contacts between"
" any couple of interacting chains.\n")
form.addParam('symmetryGroup', EnumParam,
choices=CHIMERA_LIST,
default=CHIMERA_I222,
label="Symmetry",
condition='applySymmetry',
help="https://scipion-em.github.io/docs/release-2.0.0/docs/developer/symmetries.html?highlight=symmetry"
"Symmetry for a description of the symmetry groups "
"format in CHIMERA.\n"
"If no symmetry is present, use _c1_."
'More information: \n'
'https://www.cgl.ucsf.edu/chimera/current/docs/UsersGuide/midas/sym.html'
)
form.addParam('symmetryOrder', IntParam, default=1,
condition='applySymmetry and symmetryGroup<=%d' % SYM_DIHEDRAL_X,
label='Symmetry Order',
help='Select the order of cyclic or dihedral symmetry.')
group = form.addGroup('Fit params for clashes and contacts')
group.addParam('cutoff', FloatParam,
label="cutoff (Angstroms): ", default=-0.4,
expertLevel=LEVEL_ADVANCED,
help="Large positive cutoff identifies the more severe clashes, "
"whereas negative cutoff indicates favorable contacts:\n"
"default contact rule: -0.4 (from 0.0 to -1.0)\n"
"default clash rule: 0.6 (from 0.4 to 1.0)\n"
'More information: \n'
'https://www.cgl.ucsf.edu/chimerax/docs/user/commands/clashes.html#top'
)
group.addParam('allowance', FloatParam,
label="allowance (Angstroms): ", default=0.0,
expertLevel=LEVEL_ADVANCED,
help="default contact rule: 0.0\n"
"default clash rule: 0.4\n"
'More information: \n'
'https://www.cgl.ucsf.edu/chimerax/docs/user/commands/clashes.html#top'
)
form.addLine('')
# --------------------------- INSERT steps functions --------------------
def _insertAllSteps(self):
self.sym = CHIMERA_SYM_NAME[self.symmetryGroup.get()]
self.symOrder = self.symmetryOrder.get()
if not self.applySymmetry:
self.sym = "Cn"
self.symOrder = 1
self.SYMMETRY = Boolean(False)
elif (self.sym == "Cn" or self.sym == "Dn") and self.symOrder == 1:
self.SYMMETRY = Boolean(False)
# connect to database, delete table and recreate it
# execute chimera findclash
self._insertFunctionStep('chimeraClashesStep')
self._insertFunctionStep('postProcessStep')
self._store()
[docs] def postProcessStep(self):
c, conn = connectDB(self.getDataBaseName(), None)
self.removeDuplicates(c)
[docs] def chimeraClashesStep(self):
labelDictAux = json.loads(self.chainStructure.get(),
object_pairs_hook=collections.OrderedDict)
labelDict = collections.OrderedDict(sorted(labelDictAux.items(), key=itemgetter(1)))
# labelDict = collections.OrderedDict(sorted(list(labelDictAux.items()), key=itemgetter(1)))
pdbFileName = os.path.abspath(self.pdbFileToBeRefined.get().getFileName())
# first element of dictionary
firstValue = labelDict[list(labelDict)[0]]
outFiles = []
f = open(self.getChimeraScriptFileName1(), "w")
f.write("from chimerax.core.commands import run\n")
f.write("run(session, 'open {}')\n".format(pdbFileName))
if self.sym == "Cn" and self.symOrder != 1:
f.write("run(session,'sym #1 C%d copies t')\n" % self.symOrder)
elif self.sym == "Dn" and self.symOrder != 1:
f.write("run(session,'sym #1 d%d copies t')\n" % self.symOrder)
elif self.sym == "T222" or self.sym == "TZ3":
f.write("run(session,'sym #1 t,%s copies t')\n" % self.sym[1:])
elif self.sym == "O":
f.write("run(session,'sym #1 O copies t')\n")
elif self.sym == "I222" or self.sym == "I222r" or self.sym == "In25" or \
self.sym == "In25r" or self.sym == "I2n3" or self.sym == "I2n3r" or \
self.sym == "I2n5" or self.sym == "I2n5r":
f.write("run(session,'sym #1 i,%s copies t')\n" % self.sym[1:])
self.SYMMETRY = self.SYMMETRY.get()
if self.SYMMETRY:
f.write("run(session,'delete #2 & #1 #>3')\n")
f.write("run(session,'save {symmetrizedModelName} #2')\n".format(
symmetrizedModelName=self.getSymmetrizedModelName()))
f.write("run(session, 'close #1')\n")
f.write("run(session, 'rename #2 id #1')\n")
self.endChimeraScript(firstValue, labelDict, outFiles, f)
f.write("run(session, 'exit')\n")
f.close()
args = " --nogui --script " + self.getChimeraScriptFileName1()
self._log.info('Launching: ' + Plugin.getProgram() + ' ' + args)
Chimera.runProgram(Plugin.getProgram(), args)
if self.SYMMETRY and not os.path.exists(self.getSymmetrizedModelName()):
# When self.SYMMETRY = TRUE and no one neighbor unit cell has not been
# generated at less than 3 Angstroms, probably because the symmetry
# center is not equal to the origin of coordinates, at least we have the
# contacts that are within the unit cell.
print(red("Error: No neighbor unit cells are available. "
"Is the symmetry center equal to the origin of "
"coordinates?"))
self.SYMMETRY = False
f = open(self.getChimeraScriptFileName2(), "w")
f.write("from chimerax.core.commands import run\n")
f.write("session, run('open {}')\n".format(pdbFileName))
self.endChimeraScript(firstValue, labelDict, outFiles, f)
f.write("run(session, 'exit')\n")
f.close()
args = " --nogui --script " + self.getChimeraScriptFileName2()
self._log.info('Launching: ' + Plugin.getProgram() + ' ' + args)
Chimera.runProgram(Plugin.getProgram(), args)
# parse all files created by chimera
c, conn = self.prepareDataBase()
self.parseFiles(outFiles, c)
conn.commit()
conn.close()
# return outFiles
[docs] def prepareDataBase(self, drop=True):
if drop:
return connectDB(self.getDataBaseName(), self.getTableName())
else:
return connectDB(self.getDataBaseName())
[docs] def parseFiles(self, outFiles, c):
labelDictAux = json.loads(self.chainStructure.get(),
object_pairs_hook=collections.OrderedDict)
labelDict = collections.OrderedDict(sorted(labelDictAux.items(), key=itemgetter(1)))
# labelDict = collections.OrderedDict(sorted(list(labelDictAux.items()), key=itemgetter(1)))
d = {}
d1 = {}
d2 = {}
anyResult = False
for inFile in outFiles:
print("processing file", inFile)
if not os.path.exists(inFile):
continue
else:
anyResult = True
counter = 0
# parse contact files. Note that C1 symmetry file is different from the rest
for line in open(inFile):
if counter < 8:
# print ("skip line", line
counter += 1
else:
# if not self.SYMMETRY:
if not self.SYMMETRY or line.split()[0].startswith("/"):
# Second option (line.split()[0].startswith("/") stands for
# cases in which the result of applying symmetry is identical
# to the starting structure (see test testContactsSymC2_b
# where after deleting the #2 submodel far more than 3 A from
# the input model, the resulting model is the same as the initial one.
info = line.split() # ['/A002', 'HEM', '1', 'ND', '/A', 'HIS', '87', 'NE2', '0.620', '2.660']
d1['modelId'] = "'" + "#1" + "'"
d1['aaName'] = "'" + info[1][0] + info[1][1:].lower() + "'" # 'Hem'
d1['aaNumber'] = info[2] # '1'
d1['chainId'] = "'" + info[0].split("/")[1] + "'" # 'A002'
d1['atomId'] = "'" + info[3] + "'" # 'ND'
d1['protId'] = "'" + labelDict[info[0].split("/")[1]] + "'"
d2['modelId'] = "'" + "#1" + "'"
d2['aaName'] = "'" + info[5][0] + info[5][1:].lower() + "'" # 'His'
d2['aaNumber'] = info[6] # '87'
d2['chainId'] = "'" + info[4].split("/")[1] + "'" # 'A'
d2['protId'] = "'" + labelDict[info[4].split("/")[1]] + "'"
d2['atomId'] = "'" + info[7] + "'" # 'NE2'
d['overlap'] = info[8] # '0.620'
d['distance'] = info[9] # '2.660'
else:
info = line.split()
# 5ni1_unit_cell_HEM.cif #1.2/A002 HEM 1 ND 5ni1_unit_cell_HEM.cif #1.2/A HIS 87 NE2 0.620 2.660
d1['modelId'] = "'" + info[1].split("/")[0] + "'" # '#1.2'
d1['aaName'] = "'" + info[2][0] + info[2][1:].lower() + "'" # 'Hem'
d1['aaNumber'] = info[3] # '1'
d1['chainId'] = "'" + info[1].split("/")[1] + "'" # 'A002'
d1['atomId'] = "'" + info[4] + "'" # 'ND'
d1['protId'] = "'" + labelDict[info[1].split("/")[1]] + "'" # 'HEM_A'
d2['modelId'] = "'" + info[6].split("/")[0] + "'" # '#1.2'
d2['aaName'] = "'" + info[7][0] + info[7][1:].lower() + "'" # 'His'
d2['aaNumber'] = info[8] # '87'
d2['chainId'] = "'" + info[6].split("/")[1] + "'" # N
d2['protId'] = "'" + labelDict[info[6].split("/")[1]] + "'" # 'chainA'
d2['atomId'] = "'" + info[9] + "'" # 'NE2'
d['overlap'] = info[10] # '0.620'
d['distance'] = info[11] # '2.660'
if d1['modelId'] == d2['modelId']:
if d1['protId'] <= d2['protId']:
for k in d1.keys():
# for k in list(d1.keys()):
d[k + '_1'] = d1[k]
d[k + '_2'] = d2[k]
else:
for k in d1.keys():
# for k in list(d1.keys()):
d[k + '_1'] = d2[k]
d[k + '_2'] = d1[k]
else:
if d1['modelId'] <= d2['modelId']:
for k in d1.keys():
# for k in list(d1.keys()):
d[k + '_1'] = d1[k]
d[k + '_2'] = d2[k]
else:
for k in d1.keys():
# for k in list(d1.keys()):
d[k + '_1'] = d2[k]
d[k + '_2'] = d1[k]
command = "INSERT INTO contacts "
keys = "("
values = " ("
for key, value in d.items():
keys += key + ", "
values += str(value) + ", "
keys = keys[:-2] + ")"
values = values[:-2] + ")"
command += keys + " VALUES " + values
# print(command)
c.execute(command)
return anyResult
# --------- util functions -----
[docs] def getSymmetrizedModelName(self):
return os.path.abspath(self._getExtraPath("symModel.cif"))
# return self._getExtraPath("symModel.pdb")
[docs] def getChimeraScriptFileName1(self):
return os.path.abspath(self._getTmpPath("chimera1.cxc"))
[docs] def getChimeraScriptFileName2(self):
return os.path.abspath(self._getTmpPath("chimera2.cxc"))
[docs] def endChimeraScript(self, firstValue, labelDict, outFiles, f):
protId = firstValue
chains = ""
comma = ''
for k, v in labelDict.items():
if protId == v:
# chains += "{}/{}".format(comma, k)
chains += "{}{}".format(comma, k)
comma = ','
outFileBase = v
else:
outFile = os.path.abspath(self._getExtraPath("{}.over".format(outFileBase)))
# outFile = self._getExtraPath("{}.over".format(outFileBase))
outFiles.append(outFile)
f.write("run(session,'echo {}')\nrun(session, 'contacts #1{} "
"intersubmodel true "
"intramol False "
"restrict any "
"saveFile {} overlapCutoff {} hbondAllowance {} namingStyle simple')\n".
format(chains, chains, outFile, self.cutoff, self.allowance))
protId = v
# chains = "/{}".format(k)
chains = "{}".format(k)
outFileBase = v
chains = "/" + chains.split("/")[-1]
outFile = os.path.abspath(self._getExtraPath("{}.over".format(outFileBase)))
# outFile = self._getExtraPath("{}.over".format(outFileBase))
outFiles.append(outFile)
f.write(
"run(session,'echo {}')\nrun(session, 'contacts #1{} "
"intersubmodel true "
"intramol False "
"restrict any "
"savefile {} overlap {} hbond {} namingStyle simple')\n".format(
chains, chains, outFile, self.cutoff, self.allowance))
# f.write("run('save %s')\n" % os.path.abspath(self._getExtraPath(sessionFile)))
[docs] def removeDuplicates(self, c):
# Remove duplicate contacts
# that is, given chains A,B
# we have contact A-B and B-A
commandEliminateDuplicates = """CREATE VIEW {} AS
SELECT DISTINCT modelId_1,
protId_1,
chainId_1,
aaName_1,
aaNumber_1,
atomId_1,
modelId_2,
protId_2,
chainId_2,
aaName_2,
aaNumber_2,
atomId_2,
overlap,
distance
FROM {}
"""
commandEliminateDuplicates2 = """
CREATE VIEW {} AS
SELECT *
FROM {}
EXCEPT -- Each bound appears two times, delete one of them
SELECT ca.*
FROM {} ca, {} cb
WHERE
ca.protId_1 = cb.protId_2
AND cb.protId_1 = ca.protId_2
AND cb.chainId_1 = ca.chainId_2
AND ca.aaNumber_1 = cb.aaNumber_2
AND cb.aaNumber_1 = ca.aaNumber_2
AND ca.atomId_1 = cb.atomId_2
AND cb.atomId_1 = ca.atomId_2
AND ca.modelId_2 > cb.modelId_2
EXCEPT -- Interprotein bounds in the same model are not allowed
SELECT ca.*
FROM {} ca
WHERE ca.modelId_1 = ca.modelId_2
AND ca.protId_1 = ca.protId_2
"""
if self.SYMMETRY:
sqlCommand = """
SELECT count(*) FROM {} ca
WHERE ca.modelId_1 = '#1.1'
""".format(self.getTableName())
c.execute(sqlCommand)
row = c.fetchone()
if int(row[0]) == 0:
self.SYMMETRY = False
else:
commandEliminateDuplicates2 +="""
EXCEPT -- One of the atoms must belong to the input unit cell
SELECT ca.*
FROM {} ca
WHERE ca.modelId_1 != '#1.1' AND
ca.modelId_2 != '#1.1'
""".format(self.getView1Name())
# # Remove duplicate contacts
# that is, given chains A,B
# we have contact A.a-B.b and B.b-A.a
c.execute(self.commandDropView.format(viewName="view_ND_1"))
# TODO: remove second contacts
c.execute(commandEliminateDuplicates.format("view_ND_1",
"contacts",
"contacts",
"contacts"))
# remove duplicate contacts due to symmetry
# h1-h1p, h1-h2p
c.execute(self.commandDropView.format(viewName="view_ND_2"))
c.execute(commandEliminateDuplicates2.format("view_ND_2", "view_ND_1",
"view_ND_1", "view_ND_1",
"view_ND_1"))
def _validate(self):
errors = []
if self.symmetryOrder.get() <= 0:
errors.append("Error: Symmetry Order should be a positive integer")
return errors
[docs]def connectDB(sqliteFN, tableName=None):
conn = sqlite3.connect(sqliteFN)
c = conn.cursor()
if tableName is not None:
commandDropTable = """DROP TABLE IF EXISTS {}"""
commandCreateTable = """
CREATE TABLE {}(
id integer primary key autoincrement,
modelId_1 char(8),
protId_1 char(8),
chainId_1 char(8),
aaName_1 char(3),
aaNumber_1 int,
atomId_1 char(8),
modelId_2 char(8),
protId_2 char(8),
chainId_2 char(8),
aaName_2 char(3),
aaNumber_2 int,
atomId_2 char(8),
overlap float,
distance float
);"""
c.execute(commandDropTable.format(tableName))
c.execute(commandCreateTable.format(tableName))
return c, conn