Source code for ccp4.viewers.viewer_refmac

# **************************************************************************
# *
# * Authors:     Roberto Marabini (roberto@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import os
import sys
from tkinter.messagebox import showerror
from pwem.convert import Ccp4Header
from pyworkflow.protocol.params import LabelParam
from pyworkflow.viewer import DESKTOP_TKINTER, WEB_DJANGO, ProtocolViewer
from pyworkflow.gui.text import _open_cmd
from pwem.viewers import TableView
from pyworkflow.gui.plotter import Plotter
from pwem.viewers.viewer_chimera import Chimera
from ccp4.protocols import CCP4ProtRunRefmac


[docs]def errorWindow(tkParent, msg): try: # if tkRoot is null the error message may be behind # other windows showerror("Error", # bar title msg, # message parent=tkParent) except: print("Error:", msg)
[docs]class ParseFile(): """class that parse refmac log files""" LASTITERATIONRESULTS = "lastIteration" FINALRESULTS = "finalResults" FOMPLOT = "fomplot" RFACTORPLOT = "rfactorplot" MLLPLOT = "-LLPlot" MLLFREEPLOT = "-LLfreePlot" GEOMETRYPLOT = "GeometryPlot" def __init__(self, fileName, tkParent=None, lastIteration=0): self.headerDict = {} # parsed headers goes here self.dataDict = {} # parsed data goes here self.msgDict = {} # error messages goes here self.titleDict = {} # titles go here self.tkParent = tkParent self.fileName = fileName self._parsefile(lastIteration) # last iteration def _parseFinalResults(self, filePointer): headerList = [] dataList = [] stop = False msg = "" while 1: line = filePointer.readline() if line.find('$TEXT:Result: $$ Final results $$') != -1: # detect final results break if not line: stop = True break if stop: msg = 'Can not find "Final result" information in log file: %s' \ % self.fileName else: # finalResultsDict={'header':} # parse header headerList.append(" ") line = filePointer.readline() words = line.strip().split() headerList.extend([words[0], words[1]]) # parse data: another 4 lines for i in range(4): row = [] line = filePointer.readline() words = line.strip().split() if words == ['$$']: break # the first column has 2 words row.extend([words[0] + " " + words[1], words[2], words[3]]) dataList.append(tuple(row)) # TODO: remove debug lines self.headerDict[self.FINALRESULTS] = headerList self.dataDict[self.FINALRESULTS] = dataList self.msgDict[self.FINALRESULTS] = msg
[docs] def retrievefinalResults(self): return self.headerDict[self.FINALRESULTS], \ self.dataDict[self.FINALRESULTS], \ self.msgDict[self.FINALRESULTS]
def _parseLastIteration(self, filePointer, iteration): headerList = ["variable", "value"] dataList = [] stop = False msg = "" while 1: line = filePointer.readline() # TODO: double check space after Cycle#Chequeado if line.find('$GRAPHS:Cycle %d. M(Fom) v. resln :' 'N:1,3,5,7,8,9,10:' % iteration) != -1: # detect final results break if not line: stop = True break if stop: msg = 'Can not find "Last Iteration" information in log file: %s' \ % self.fileName else: # find three lines with $$ counter = 4 while counter != 0: line = filePointer.readline() if line.find("$$") != -1: counter -= 1 if not line: stop = True break if stop: msg = 'Can not find "Last Iteration" information ' \ 'in log file: %s' % self.fileName for i in range(14): row = [] line = filePointer.readline() words = line.strip().split("=") # the first column has 2 words if len(words)>1: row.extend([words[0].strip(), words[1].strip()]) dataList.append(tuple(row)) self.headerDict[self.LASTITERATIONRESULTS] = headerList self.dataDict[self.LASTITERATIONRESULTS] = dataList self.msgDict[self.LASTITERATIONRESULTS] = msg
[docs] def retrievelastIteration(self): return self.headerDict[self.LASTITERATIONRESULTS],\ self.dataDict[self.LASTITERATIONRESULTS],\ self.msgDict[self.LASTITERATIONRESULTS]
# table parse cycle def _parseFomPlot(self, filePointer, iteration): # headerList = ["cycle", "fom"] # x label,y1 label and so on dataList = [] # each set of values in a different column stop = False msg = "" Ncyc = [] while 1: line = filePointer.readline() if line.find('Ncyc Rfact Rfree FOM -LL ' '-LLfree rmsBOND zBOND rmsANGL zANGL ' 'rmsCHIRAL $$') != -1: break if not line: stop = True break if stop: msg = 'Can not find "stats vs cycle" information in log file: ' \ '%s' % self.fileName else: # skip one line line = filePointer.readline() #Ncyc = [] Rfact = [] Rfree = [] FOM = [] mLL = [] mLLfree = [] rmsBOND = [] zBOND = [] rmsANGL = [] zANGL = [] rmsCHIRAL = [] for i in range(iteration): line = filePointer.readline() words = line.strip().split() # the first column has 2 words Ncyc.append(int(words[0])) Rfact.append(float(words[1])) Rfree.append(float(words[2])) FOM.append(float(words[3])) mLL.append(float(words[4])) mLLfree.append(float(words[5])) rmsBOND.append(float(words[6])) zBOND.append(float(words[7])) rmsANGL.append(float(words[8])) zANGL.append(float(words[9])) rmsCHIRAL.append(float(words[10])) if len(Ncyc) > 0 : self.headerDict[self.FOMPLOT] = ["cycle", "fom"] self.dataDict[self.FOMPLOT] = [Ncyc, FOM] self.msgDict[self.FOMPLOT] = msg self.titleDict[self.FOMPLOT] = "FOM vs Cycle" self.headerDict[self.RFACTORPLOT] = ["cycle", "Rfact", "Rfree"] self.dataDict[self.RFACTORPLOT] = [Ncyc, Rfact, Rfree] self.msgDict[self.RFACTORPLOT] = msg self.titleDict[self.RFACTORPLOT] = "Rfact and Rfree vs Cycle" self.headerDict[self.MLLPLOT] = ["cycle", "mLL"] self.dataDict[self.MLLPLOT] = [Ncyc, mLL] self.msgDict[self.MLLPLOT] = msg self.titleDict[self.MLLPLOT] = "-LL vs Cycle" self.headerDict[self.MLLFREEPLOT] = ["cycle", "mLLfree"] self.dataDict[self.MLLFREEPLOT] = [Ncyc, mLLfree] self.msgDict[self.MLLFREEPLOT] = msg self.titleDict[self.MLLFREEPLOT] = "-LLfree vs Cycle" self.headerDict[self.GEOMETRYPLOT] = ["cycle", "rmsBOND", "zBOND", "rmsANGL", "zANGL", "rmsCHIRAL"] self.dataDict[self.GEOMETRYPLOT] = [Ncyc, rmsBOND, zBOND, rmsANGL, zANGL, rmsCHIRAL] self.msgDict[self.GEOMETRYPLOT] = msg self.titleDict[self.GEOMETRYPLOT] = "rmsBOND, zBOND, rmsANGL, zANGL " \ "and rmsCHIRAL vs Cycle"
[docs] def retrieveFomPlot(self): return self.headerDict[self.FOMPLOT],\ self.dataDict[self.FOMPLOT],\ self.msgDict[self.FOMPLOT],\ self.titleDict[self.FOMPLOT]
[docs] def retrieveRFactorPlot(self): return self.headerDict[self.RFACTORPLOT],\ self.dataDict[self.RFACTORPLOT],\ self.msgDict[self.RFACTORPLOT],\ self.titleDict[self.RFACTORPLOT]
[docs] def retrievemLLPlot(self): return self.headerDict[self.MLLPLOT],\ self.dataDict[self.MLLPLOT], \ self.msgDict[self.MLLPLOT], \ self.titleDict[self.MLLPLOT]
[docs] def retrievemLLfreePlot(self): return self.headerDict[self.MLLFREEPLOT],\ self.dataDict[self.MLLFREEPLOT],\ self.msgDict[self.MLLFREEPLOT],\ self.titleDict[self.MLLFREEPLOT]
[docs] def retrieveGeometryPlot(self): return self.headerDict[self.GEOMETRYPLOT],\ self.dataDict[self.GEOMETRYPLOT],\ self.msgDict[self.GEOMETRYPLOT],\ self.titleDict[self.GEOMETRYPLOT]
def _parsefile(self, lastIteration=0): """ call the different functions that parse the data in the right order""" if not os.path.exists(self.fileName): msg = "File %s is not available. Wait until protocol has " \ "finished" % self.fileName errorWindow(self.getTkRoot(), msg) with open(self.fileName, "r") as filePointer: # LASTITERATION self._parseLastIteration(filePointer, lastIteration) # RfactorPlot FOMPLOT, LLplot LLfreePLot GeometryPlot self._parseFomPlot(filePointer, lastIteration) # FINALRESULTS self._parseFinalResults(filePointer)
[docs]class CCP4ProtRunRefmacViewer(ProtocolViewer): """ Viewer for CCP4 program refmac """ _label = 'Refmac Viewer' _environments = [DESKTOP_TKINTER, WEB_DJANGO] _targets = [CCP4ProtRunRefmac] # ROB: do we need this memory for something? # _memory = False # temporary metadata file with ctf that has some resolution greater # than X tmpMetadataFile = 'viewersTmp.sqlite' def __init__(self, **kwargs): ProtocolViewer.__init__(self, **kwargs) self.parseFile = ParseFile(self.protocol._getExtraPath( self.protocol.refineLogFileName), self.getTkRoot(), self.protocol.nRefCycle.get() + 1) def _checkProtocolHasEnded(self): try: self.protocol.outputPdb.getFileName() return True except: errorWindow(None, "Protocol has not finished. Wait until it ends") return False def _defineParams(self, form): if self._checkProtocolHasEnded(): form.addSection(label='Visualization of Refmac results') # group = form.addGroup('Overall results') form.addParam('displayMapModel', LabelParam, label="Volume and models", help="Display of input volume, input pdb that has to be" "refined and final refined model of the structure.") form.addParam('displayMask', LabelParam, label="Display Mask", #condition='self.protocol.generateMaskedVolume', help="Display of input volume, input pdb that has to be" "refined and final refined model of the structure.") form.addParam('showFinalResults', LabelParam, label="Final Results Table", help="Table of Final Results from refine.log file.") form.addParam('showLogFile', LabelParam, label="Show log file", help="Open refmac log file in a text editor.") form.addParam('showLastIteration', LabelParam, label="Results Table (last iteration)", help="Table stored in log file summarizing the last " "iteration.") form.addParam('displayRFactorPlot', LabelParam, label="R-factor vs. iteration", help="Plot R-factor as a function of the iteration.") form.addParam('displayFOMPlot', LabelParam, label="FOM vs. iteration", help="Plot Figure Of Merit as a function of the " "iteration.") form.addParam('displayLLPlot', LabelParam, label="-LL vs. iteration", help="Plot Log likelihood as a function of the " "iteration.") form.addParam('displayLLfreePlot', LabelParam, label="-LLfree vs. iteration", help="Plot Log likelihood as a function of the " "iteration") form.addParam('displayGeometryPlot', LabelParam, label="Geometry vs. iteration", help="""Plot Geometry as a function of the iteration: Geometry includes rmsBOND (root mean square bond lengths) zBOND (zscore of the deviation of bond lengths) rmsANGL (root mean square bond angles) zANGL (zscore of the deviation of bond angles) and rmsCHIRAL (root mean square of chiral index.""") def _getVisualizeDict(self): return { 'showFinalResults': self._visualizeFinalResults, 'showLastIteration': self._visualizeLastIteration, 'displayMapModel': self._visualizeMapModel, 'displayMask': self._displayMask, 'displayRFactorPlot': self._visualizeRFactorPlot, 'displayFOMPlot': self._visualizeFOMPlot, 'displayLLPlot': self._visualizeLLPlot, 'displayLLfreePlot': self._visualizeLLfreePlot, 'displayGeometryPlot': self._visualizeGeometryPlot, 'showLogFile': self._visualizeLogFile } def _displayMask(self, e=None): if self.protocol.generateMaskedVolume.get(): maskedMapFileName = os.path.abspath(self.protocol._getExtraPath( self.protocol._getMapMaskedByPdbBasedMaskFileName())) ccp4header = Ccp4Header(maskedMapFileName, readHeader=True) dim, _, _ = ccp4header.getDims() x, y, z = ccp4header.getOrigin() sampling, _, _ = ccp4header.getSampling() counter = 1 fnCmd = self.protocol._getExtraPath("chimera_mask.cxc") f = open(fnCmd, 'w') maskFileName = os.path.abspath(maskedMapFileName) f.write("open %s\n" % maskFileName) f.write("volume #%d style surface voxelSize %f\n" % (counter, sampling)) # No origin information in header :-( #f.write("volume #%d origin %0.2f,%0.2f,%0.2f\n" % # (counter, x, y, z)) f.close() # run in the background Chimera.runProgram(Chimera.getProgram(), fnCmd + "&") return [] else: errorWindow(self.getTkRoot(), "This protocol has been executed " "without the mask option") return [] def _visualizeMapModel(self, e=None): bildFileName = os.path.abspath(self.protocol._getExtraPath( "axis_output.bild")) if self.protocol.inputVolume.get() is None: _inputVol = self.protocol.inputStructure.get().getVolume() dim = _inputVol.getDim()[0] sampling = _inputVol.getSamplingRate() else: dim = self.protocol.inputVolume.get().getDim()[0] sampling = self.protocol.inputVolume.get().getSamplingRate() Chimera.createCoordinateAxisFile(dim, bildFileName=bildFileName, sampling=sampling) counter = 1 fnCmd = self.protocol._getExtraPath("chimera_output.cxc") f = open(fnCmd, 'w') # reference axis model = 0 f.write("open %s\n" % bildFileName) f.write("cofr 0,0,0\n") # input 3D map counter += 1 # 2 fnVol = self.protocol._getInputVolume() fnVolName = os.path.abspath(fnVol.getFileName()) if fnVolName.endswith(":mrc"): fnVolName= fnVolName.split(":")[0] f.write("open %s\n" % fnVolName) x, y, z = fnVol.getOrigin(force=True).getShifts() sampling = fnVol.getSamplingRate() f.write("volume #%d style surface voxelSize %f\nvolume #%d origin " "%0.2f,%0.2f,%0.2f\n" % (counter, sampling, counter, x, y, z)) # input PDB (usually from coot) counter += 1 # 3 pdbFileName = os.path.abspath( self.protocol.inputStructure.get().getFileName()) f.write("open %s\n" % pdbFileName) # second refmac step output -> refined PDB counter += 1 # 4 pdbFileName = os.path.abspath(self.protocol.outputPdb.getFileName()) f.write("open %s\n" % pdbFileName) f.close() # run in the background Chimera.runProgram(Chimera.getProgram(), fnCmd + "&") return [] def _visualizeFinalResults(self, e=None): headerList, dataList, msg = self.parseFile.retrievefinalResults() if not dataList: errorWindow(self.getTkRoot(), msg) return TableView(headerList=headerList, dataList=dataList, mesg="Values for a good fitted 3D map.\nR factor ~ 0.3,\n" "Rms BondLength ~ 0.02.", title="Refmac: Final Results Summary", height=len(dataList), width=250, padding=40) def _visualizeLogFile(self, e=None): """Show refmac log file.""" refineLogFileName = self.protocol._getExtraPath( self.protocol.refineLogFileName) _open_cmd(refineLogFileName, self.getTkRoot()) def _visualizeLastIteration(self, e=None): # Selection of lines from 'refine.log' file that include last # iteration characteristics headerList, dataList, msg = self.parseFile.retrievelastIteration() if not dataList: errorWindow(self.getTkRoot(), msg) return TableView(headerList=headerList, dataList=dataList, mesg=" ", title="Refmac: Last Iteration summary", height=len(dataList), width=200, padding=40) def _visualizeRFactorPlot(self, e=None): """ Plot Rfactor and Rfree vs cycle :N:[1,3]: """ headerList, dataList, msg, title = self.parseFile.retrieveRFactorPlot() if not dataList: errorWindow(self.getTkRoot(), msg) return xplotter = Plotter(windowTitle=title) a = xplotter.createSubPlot(title, headerList[0], 'Rfactor', yformat=False) # see # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.plot.html # for plot options a.plot(dataList[0], dataList[1], 'bx-', dataList[0], dataList[2], 'gx-' ) # plot start over line in blue xplotter.showLegend(headerList[1:]) xplotter.show() def _visualizeFOMPlot(self, e=None): """ Plot FOM vs cycle :N:1,4: """ headerList, dataList, msg, title = self.parseFile.retrieveFomPlot() if not dataList: errorWindow(self.getTkRoot(), msg) return xplotter = Plotter(windowTitle=title) a = xplotter.createSubPlot(title, headerList[0], headerList[1], yformat=False) # see # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.plot.html a.plot(dataList[0], dataList[1], 'bx-') xplotter.showLegend(headerList[1:]) xplotter.show() def _visualizeLLPlot(self, e=None): """ Plot -LL vs cycle :N:1,5: """ headerList, dataList, msg, title = self.parseFile.retrievemLLPlot() if not dataList: errorWindow(self.getTkRoot(), msg) return xplotter = Plotter(windowTitle=title) a = xplotter.createSubPlot(title, headerList[0], '-LL', yformat=False) # see # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.plot.html a.plot(dataList[0], dataList[1], 'bx-') xplotter.showLegend(headerList[1:]) xplotter.show() def _visualizeLLfreePlot(self, e=None): """ Plot -LLfree vs cycle :N:1,6: """ headerList, dataList, msg, title = \ self.parseFile.retrievemLLfreePlot() if not dataList: errorWindow(self.getTkRoot(), msg) return xplotter = Plotter(windowTitle=title) a = xplotter.createSubPlot(title, headerList[0], '-LLfree', yformat=False) # see # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.plot.html a.plot(dataList[0], dataList[1], 'bx-') xplotter.showLegend(headerList[1:]) xplotter.show() def _visualizeGeometryPlot(self, e=None): """ Plot rmsBOND,zBOND, rmsANGL, zANGL and rmsCHIRALvs cycle : N:1,7,8,9,10,11: """ headerList, dataList, msg, title = \ self.parseFile.retrieveGeometryPlot() if not dataList: errorWindow(self.getTkRoot(), msg) return xplotter = Plotter(windowTitle=title) a = xplotter.createSubPlot(title, headerList[0], 'geometry', yformat=False) # see # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.plot.html # for plot options a.plot(dataList[0], dataList[1], 'bx-', dataList[0], dataList[2], 'gx-', dataList[0], dataList[3], 'rx-', dataList[0], dataList[4], 'cx-', dataList[0], dataList[5], 'mx-', ) # plot start over line in blue xplotter.showLegend(headerList[1:]) xplotter.show()