Source code for xmipp3.protocols.protocol_classification_gpuCorr_full

# ******************************************************************************
# *
# * Authors:     Amaya Jimenez Moreno (ajimenez@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# ******************************************************************************

from shutil import copy
from os.path import exists, getmtime, splitext
from datetime import datetime
from os import system, popen, mkdir, listdir, remove
from os.path import join
from random import randint
import os

from pyworkflow import VERSION_2_0
import pyworkflow.protocol.params as params
from pyworkflow.utils import prettyTime, cleanPath
from pyworkflow.protocol.constants import STATUS_NEW
import pyworkflow.protocol.constants as const

from pwem.objects import SetOfParticles,  Set
from pwem.protocols import ProtAlign2D
from pwem.constants import ALIGN_2D, ALIGN_NONE
import pwem.emlib.metadata as md

from pwem import emlib
from pwem.emlib import Image, MD_APPEND, DT_DOUBLE

from xmipp3.constants import CUDA_ALIGN_SIGNIFICANT
from xmipp3.convert import (writeSetOfParticles, xmippToLocation,
                            rowToAlignment, rowToParticle)
from xmipp3.base import isXmippCudaPresent

HASH_SIZE = 100

[docs]class HashTableDict: def __init__(self, Ndict=HASH_SIZE): self.Ndict = Ndict self.dict = [{}]*Ndict
[docs] def isItemPresent(self, idx): return idx in self.dict[idx % self.Ndict]
[docs] def pushItem(self, idx): idxDict = idx % self.Ndict if not idx in self.dict[idxDict]: self.dict[idxDict][idx]=1
[docs]class XmippProtStrGpuCrrCL2D(ProtAlign2D): """ 2D alignment in full streaming using Xmipp GPU Correlation. The set of classes will be growing whilst new particle images are received.""" _label = 'gl2d streaming' _lastUpdateVersion = VERSION_2_0 _stepsCheckSecs = 10 # --------------------------- DEFINE param functions ----------------------- def _defineAlignParams(self, form): form.addHidden(params.GPU_LIST, params.StringParam, default='0', expertLevel=const.LEVEL_ADVANCED, label="Choose GPU IDs", help="Add a list of GPU devices that can be used") form.addParam('maxShift', params.IntParam, default=10, label='Maximum shift (%):', help='Maximum shift allowed during the alignment as ' 'percentage of the input set size', expertLevel=const.LEVEL_ADVANCED) form.addParam('keepBest', params.IntParam, default=1, label='Number of best images:', help='Number of classes to assign every input image ' 'during the alignment', expertLevel=const.LEVEL_ADVANCED) form.addParam('numberOfSplitIterations', params.IntParam, default=2, label='Number of iterations in split stage:', help='Maximum number of iterations in split stage', expertLevel=const.LEVEL_ADVANCED) form.addParam('numberOfClassifyIterations', params.IntParam, default=2, label='Number of iterations in classify stage:', help='Maximum number of iterations when the classification' ' of the whole image set is carried out', expertLevel=const.LEVEL_ADVANCED) form.addParam('imageSize', params.IntParam, default=64, label='Image size', help='The image size can be downsampled to accelerate ' 'the classification', expertLevel=const.LEVEL_ADVANCED) form.addParam('threshold', params.IntParam, default=500, label='Threshold to split', help='The threshold in the number of images assigned ' 'to one class to make a spliting of that class', expertLevel=const.LEVEL_ADVANCED) form.addParam('blockSize', params.IntParam, default=5000, label='Block size', help='The inputs will be processed in a block-by-block ' 'basis of this size', expertLevel=const.LEVEL_ADVANCED) form.addParam('numClasses', params.IntParam, default=1000, label='Maximum number of classes', help='Maximum number of classes to be generated', expertLevel=const.LEVEL_ADVANCED) form.addParam('useCL2D', params.BooleanParam, default=True, label='Use CL2D', help='If you set to *Yes*, you will use CL2D (CPU) ' 'to make the split process', expertLevel=const.LEVEL_ADVANCED,) form.addParallelSection(threads=0, mpi=8) # --------------------------- INSERT steps functions ----------------------- def _insertAllSteps(self): """" Mainly prepare the command line for calling cuda corrrelation program""" self._readingCheckPoint() self.listInFn = [] self.listOutFn = [] self.listOutSplitFn = [] self.doneListFn = [] self.lastDate = 0 self.imgsExp = self._getExtraPath('imagesExp.xmd') self.listNumImgs = [] self.listNameImgs = [] self.listRefImgs = [] self.particlesToProcess = [] self.randRef = None self.htAlreadyProcessed = HashTableDict() xOrig = self.inputParticles.get().getXDim() self.maximumShift = int(self.maxShift.get() * xOrig / 100) self._loadInputList() self.changeSampling = False xO = self.listOfParticles[0].getXDim() newSize = self.imageSize.get() self.newSamplingRate = self.listOfParticles[0].getSamplingRate() if xO != newSize: self.changeSampling=True factor = float(xO) / float(newSize) self.newSamplingRate = self.newSamplingRate * factor deps = [] numBlk, rem = divmod(float(len(self.listOfParticles)), float(self.blockSize.get())) numBlk = int(numBlk) rem = int(rem) if rem > 0: numBlk += 1 for i in range(numBlk): if i==0: deps += self._insertStepsForParticles(True, False) else: deps += self._insertStepsForParticles(False, True) self._insertFunctionStep('createOutputStep', prerequisities=deps, wait=True) def _insertStepsForParticles(self, flagSplit, reclassification): inputImgs = self._getInputFn() deps = [] stepIdClassify = self._insertFunctionStep\ ('classifyStep', inputImgs, flagSplit, reclassification, prerequisites=[]) deps.append(stepIdClassify) return deps # --------------------------- STEPS functions -------------------------- def _stepsCheck(self): self._checkNewInput() self._checkNewOutput()
[docs] def createOutputStep(self): self._createFinalMetadata() self._createFinalClasses()
def _checkNewInput(self): """ Check if there are new particles to be processed and add the necessary steps.""" particlesFile = self.inputParticles.get().getFileName() now = datetime.now() self.lastCheck = getattr(self, 'lastCheck', now) mTime = datetime.fromtimestamp(getmtime(particlesFile)) self.debug('Last check: %s, modification: %s' % (prettyTime(self.lastCheck), prettyTime(mTime))) # If the input have not changed since our last check, # it does not make sense to check for new input data if self.lastCheck > mTime and hasattr(self, 'listOfParticles'): return None self.lastCheck = now outputStep = self._getFirstJoinStep() # Open input and close it as soon as possible self._loadInputList() if len(self.listOfParticles)==0: return None deps=[] numBlk, rem = divmod(float(len(self.listOfParticles)), float(self.blockSize.get())) numBlk = int(numBlk) rem = int(rem) if rem>0: numBlk+=1 for i in range(numBlk): deps += self._insertStepsForParticles(False, True) if outputStep is not None: outputStep.addPrerequisites(*deps) self.updateSteps() def _checkNewOutput(self): """ Check for already done files and update the output set. """ newDone = self._readDoneList() # We have finished when there is not more inputs (stream closed) # and the number of processed particles is equal to the number of inputs self.finished = (self.isStreamClosed == Set.STREAM_CLOSED and len(newDone) == 0) streamMode = Set.STREAM_CLOSED if self.finished else Set.STREAM_OPEN if newDone: self._updateOutputSet(streamMode) self.doneListFn += newDone elif not self.finished: # If we are not finished and no new output have been produced # it does not make sense to proceed and updated the outputs # so we exit from the function here return if self.finished: # Unlock createOutputStep if finished all jobs outputStep = self._getFirstJoinStep() if outputStep and outputStep.isWaiting(): outputStep.setStatus(STATUS_NEW)
[docs] def classifyStep(self, expImgMd, flag_split, reclassification): auxList=[] for im in range(len(self.listOfParticles)): idx = self.listOfParticles[im].getObjId() if not self.htAlreadyProcessed.isItemPresent(idx): auxList.append(self.listOfParticles[im]) self.htAlreadyProcessed.pushItem(idx) self.particlesToProcess+=auxList if len(self.particlesToProcess)==0: return if len(self.particlesToProcess)<self.blockSize.get(): particlesToProcessAux = self.particlesToProcess lastIm = len(self.particlesToProcess)-1 else: particlesToProcessAux = self.particlesToProcess[:self.blockSize.get()] lastIm = self.blockSize.get()-1 self.generateInput(expImgMd, flag_split, reclassification, particlesToProcessAux) if flag_split: refImgMd = self._getExtraPath('split_last_classes.xmd') else: refImgMd = self._getExtraPath('last_classes.xmd') i=0 while i <self.numberOfClassifyIterations: outImgs, refImgMd = self.iterationStep(refImgMd, expImgMd, i, False) if flag_split==False and i+1 < self.numberOfClassifyIterations: classesFnPrev = self._getExtraPath('last_classes.xmd') self.averageClasses(refImgMd, classesFnPrev, refImgMd, True) i += 1 self.generateOutputClasses(refImgMd, flag_split) self.checkSplit() self.lastDate = self.particlesToProcess[lastIm].getObjCreation() self._saveCreationTimeFile(self.lastDate) for p in range(len(particlesToProcessAux)): self.particlesToProcess.pop(0) self._savingCheckPoint()
# --------------------------- UTILS functions ------------------------------
[docs] def splitStep(self, expImgMd): i=0 classesOut=None while i <self.numberOfSplitIterations: outImgs,classesOut = self.iterationStep(classesOut,expImgMd,i,True) i+=1 length = md.getSize(classesOut) if length == 1: i = 0 self.generateMdForClassification(classesOut)
[docs] def generateInput(self, inputImgs, flag_split, reclassification, particlesToProcess): inputStack = inputImgs.replace('xmd','stk') newSize = self.imageSize.get() if self.changeSampling: writeSetOfParticles(particlesToProcess, inputImgs, alignType=ALIGN_NONE) args = "-i %s -o %s --save_metadata_stack --fourier %d " % ( inputImgs, inputStack, newSize) self.runJob("xmipp_image_resize", args, numberOfMpi=1) else: writeSetOfParticles(particlesToProcess, inputImgs, alignType=ALIGN_NONE) if reclassification and len(self.listNumImgs)!=0: self.randRef = randint(1, len(self.listNumImgs)) self._unionReclassification(self.randRef, inputImgs) if flag_split: self.splitStep(inputImgs)
[docs] def generateMdForClassification(self, classesOut): listNameImgs = self.listNameImgs listNumImgs = self.listNumImgs count = 1 # Construct the new classes with the renumerated old classes mdNewClasses = md.MetaData() for i in range(len(listNumImgs)): if listNumImgs[i] != -1: name = listNameImgs[i] fn = name[name.find('@') + 1:-4] + '.xmd' numRef = int(name[0:6]) mdClass = md.MetaData("classes@" + fn) for row in md.iterRows(mdClass): if mdClass.getValue(md.MDL_REF, row.getObjId()) == numRef: row.setValue(md.MDL_REF, count) row.addToMd(mdNewClasses) count += 1 # Add the two new classes to the list of renumerated classes mdClass = md.MetaData("classes@" + classesOut) rows = md.iterRows(mdClass) for row in rows: row.setValue(md.MDL_REF, count) row.addToMd(mdNewClasses) count += 1 mdNewClasses.write('classes@' + self._getExtraPath('split_last_classes.xmd'), MD_APPEND) # Generate the intermediate images and the blocks of the intermediate # classes for the unchanged classes count=1 for i in range(len(listNumImgs)): if listNumImgs[i] != -1: # Read the list of images in this class mdImgsInClass = md.MetaData(self._getExtraPath( 'dataClass%06d.xmd' % (i+1))) mdImgsInClass.fillConstant(md.MDL_REF, count) mdImgsInClass.write(self._getExtraPath('dataClass%06d.xmd' % count)) count += 1 # Add the two new classes for newRef in range(0, 2): mdImgsInClass = md.MetaData( 'class%06d_images@%s' % (newRef + 1, classesOut)) mdImgsInClass.fillConstant(md.MDL_REF, count) mdImgsInClass.write(self._getExtraPath('dataClass%06d.xmd' % count)) count+=1
[docs] def averageClasses(self, finalMetadata, lastMetadata, newMetadata, flag_iter): metadataItemLast = md.MetaData(lastMetadata) metadataItemNew = md.MetaData(newMetadata) if flag_iter: metadataFinal = md.MetaData(finalMetadata) finalName = metadataFinal.getValue(md.MDL_IMAGE, 1) finalName = finalName[7:-3]+'stk' else: finalName = finalMetadata total=[] newRef = 1 i = 1 for itemLast, itemNew in zip(metadataItemLast, metadataItemNew): listToMultiply = [] numImgsLastClasses = metadataItemLast.getValue( md.MDL_CLASS_COUNT, itemLast) nameRefLastClasses = metadataItemLast.getValue(md.MDL_IMAGE, itemLast) nameRefNewClasses = metadataItemNew.getValue(md.MDL_IMAGE, itemNew) numImgsNewClasses = metadataItemNew.getValue(md.MDL_CLASS_COUNT, itemNew) if flag_iter==False and i==self.randRef: total.append(numImgsNewClasses) else: total.append(numImgsLastClasses + numImgsNewClasses) i += 1 if (numImgsLastClasses + numImgsNewClasses)==0: continue if numImgsNewClasses==0: im1 = Image(nameRefLastClasses) im1.write('%06d@' % newRef + finalName) newRef += 1 continue if numImgsLastClasses==0: im2 = Image(nameRefNewClasses) im2.write('%06d@' % newRef + finalName) newRef += 1 continue listToMultiply.append(float(numImgsLastClasses)/ float(numImgsLastClasses + numImgsNewClasses)) listToMultiply.append(float(numImgsNewClasses)/ float(numImgsLastClasses + numImgsNewClasses)) im1 = Image(nameRefLastClasses) im2 = Image(nameRefNewClasses) im2 = emlib.image_align(im1, im2) im1.inplaceMultiply(listToMultiply[0]) im2.inplaceMultiply(listToMultiply[1]) im1.convert2DataType(DT_DOUBLE) im2.convert2DataType(DT_DOUBLE) im1.inplaceAdd(im2) # Aligned im1.write('%06d@' % newRef + finalName) newRef+=1 return total
[docs] def generateOutputClasses(self, classesOut, firstTime): if firstTime: self._saveFileDataClasses(classesOut, self._getExtraPath( 'last_classes.xmd')) # Add the two new classes for i in range(1, 3): mdImgsInClass = md.MetaData( 'class%06d_images@%s' % (i,classesOut)) mdImgsInClass.write(self._getExtraPath('dataClass%06d.xmd' % i)) return finalMetadata = self._getExtraPath('aux_classes.stk') lastMetadata = self._getExtraPath('last_classes.xmd') newMetadata = classesOut total = self.averageClasses(finalMetadata, lastMetadata, newMetadata, False) copy(self._getExtraPath('aux_classes.stk'), self._getExtraPath('last_classes.stk')) mdAll = md.MetaData() newRef=1 for i in total: if i != 0: row = md.Row() row.setValue(md.MDL_REF, newRef) row.setValue(md.MDL_IMAGE, '%06d@'%newRef + finalMetadata) row.setValue(md.MDL_CLASS_COUNT, i) row.addToMd(mdAll) newRef+=1 mdAll.write('classes@'+finalMetadata[:-3] + 'xmd', MD_APPEND) copy(self._getExtraPath('aux_classes.xmd'), self._getExtraPath('last_classes.xmd')) newRef=1 for i, val in enumerate(total): if val != 0: self._unionDataClass(classesOut, i+1, newRef) newRef+=1
[docs] def iterationStep (self, refSet, imgsExp, iter, flag_split): if flag_split: outImgs, classesOut = self._getOutputSplitFn() else: outImgs, classesOut = self._getOutputClassFn() outDirName = splitext(imgsExp)[0] if iter==0 and flag_split==True: # First step: divide the metadata input file to generate # a couple of references self._params = {'imgsExp': imgsExp, 'outDir': outDirName} args = ('-i %(imgsExp)s -n 2 --oroot %(outDir)s') self.runJob("xmipp_metadata_split", args % self._params, numberOfMpi=1) # Second step: calculate the means of the previous metadata expSet1 = outDirName + '000001.xmd' avg1 = outDirName + '_000001' expSet2 = outDirName + '000002.xmd' avg2 = outDirName + '_000002' self._params = {'imgsSet': expSet1, 'outputAvg': avg1} args = ('-i %(imgsSet)s --save_image_stats %(outputAvg)s -v 0') self.runJob("xmipp_image_statistics", args % self._params, numberOfMpi=1) self._params = {'imgsSet': expSet2, 'outputAvg': avg2} args = ('-i %(imgsSet)s --save_image_stats %(outputAvg)s -v 0') self.runJob("xmipp_image_statistics", args % self._params, numberOfMpi=1) # Third step: generate a single metadata with the two previous avgs refSet = self._getExtraPath('refSet.xmd') self._params = {'avg1': avg1 + 'average.xmp', 'avg2': avg2 + 'average.xmp', 'outputMd': refSet} args = ('-i %(avg1)s --set union %(avg2)s -o %(outputMd)s') self.runJob("xmipp_metadata_utilities", args % self._params, numberOfMpi=1) # Fourth step: calling program xmipp_cuda_align_significant metadataRef = md.MetaData(refSet) if metadataRef.containsLabel(md.MDL_REF) is False: args = ('-i %(outputMd)s --fill ref lineal 1 1 -o %(outputMd)s') self.runJob("xmipp_metadata_utilities", args % self._params, numberOfMpi=1) count = 0 GpuListCuda = '' if self.useQueueForSteps() or self.useQueue(): GpuList = os.environ["CUDA_VISIBLE_DEVICES"] GpuList = GpuList.split(",") for elem in GpuList: GpuListCuda = GpuListCuda + str(count) + ' ' count += 1 else: GpuListAux = '' for elem in self.getGpuList(): GpuListCuda = GpuListCuda + str(count) + ' ' GpuListAux = GpuListAux + str(elem) + ',' count += 1 os.environ["CUDA_VISIBLE_DEVICES"] = GpuListAux if flag_split: fileTocopy = classesOut.replace('.xmd', '_classes.xmd') fileTocopy = fileTocopy.replace('extra/', 'extra/level_00/') self._params = {'imgsRef': refSet, 'imgsExp': imgsExp, 'maxshift': self.maximumShift, 'Nrefs': md.getSize(refSet), 'outDir': self._getExtraPath(), 'outImgCuda': self._getExtraPath("images.xmd"), 'rootFn': classesOut.split('/')[-1].replace('.xmd', ''), 'keepBest': self.keepBest.get(), 'outClassesCuda': fileTocopy, } args = '-i %(imgsExp)s --ref0 %(imgsRef)s --nref %(Nrefs)d ' \ '--iter 1 --distance correlation --classicalMultiref ' \ '--maxShift %(maxshift)d --odir %(outDir)s --oroot %(' \ 'rootFn)s --dontMirrorImages ' self.runJob("xmipp_classify_CL2D", args % self._params, numberOfMpi=self.numberOfMpi.get()) copy(fileTocopy, classesOut) copy(self._getExtraPath("images.xmd"), outImgs) else: self._params = {'imgsRef': refSet, 'imgsExp': imgsExp, 'outputFile': outImgs, 'keepBest': self.keepBest.get(), 'maxshift': self.maximumShift, 'outputClassesFile': classesOut, 'device': GpuListCuda, 'outputClassesFileNoExt': splitext(classesOut)[0], 'auxOut': self._getExtraPath('flipReferences%06d.xmd'%iter), } args = '-i %(imgsExp)s -r %(imgsRef)s -o %(outputFile)s ' \ '--keepBestN 1 --oUpdatedRefs %(outputClassesFileNoExt)s --dev %(device)s ' self.runJob(CUDA_ALIGN_SIGNIFICANT, args % self._params, numberOfMpi=1) if exists(outDirName + '000001.xmd'): cleanPath(expSet1) cleanPath(expSet2) cleanPath(avg1 + 'average.xmp') cleanPath(avg2 + 'average.xmp') cleanPath(avg1 + 'stddev.xmp') cleanPath(avg2 + 'stddev.xmp') return outImgs, classesOut
[docs] def checkSplit(self): outSet = self._getExtraPath('last_classes.xmd') self.listNameImgs = [] self.listNumImgs = [] self.listRefImgs = [] metadataItem = md.MetaData(outSet) for item in metadataItem: nameImg = metadataItem.getValue(md.MDL_IMAGE, item) self.listNameImgs.append(nameImg) numImgs = metadataItem.getValue(md.MDL_CLASS_COUNT, item) self.listNumImgs.append(numImgs) refImg = metadataItem.getValue(md.MDL_REF, item) self.listRefImgs.append(refImg) i=0 auxList = sorted(self.listNumImgs, reverse=True) while i<len(self.listNumImgs): if len(self.listNumImgs) < self.numClasses.get(): #inside the while # just in case we lose some class we want to allow one more # split if auxList[i] < (1.75 * self.threshold.get()): i+=1 continue maxValue = auxList[i] maxPos = self.listNumImgs.index(maxValue) self.listNumImgs[maxPos] = -1 bestRef = self.listRefImgs[maxPos] outputMd = self._getExtraPath('dataClass%06d.xmd' % bestRef) self.splitStep(outputMd) i=0 outSet = self._getExtraPath('split_last_classes.xmd') self.listNameImgs = [] self.listNumImgs = [] self.listRefImgs = [] metadataItem = md.MetaData(outSet) for item in metadataItem: nameImg = metadataItem.getValue(md.MDL_IMAGE, item) self.listNameImgs.append(nameImg) numImgs = metadataItem.getValue(md.MDL_CLASS_COUNT, item) self.listNumImgs.append(numImgs) refImg = metadataItem.getValue(md.MDL_REF, item) self.listRefImgs.append(refImg) copy(outSet, self._getExtraPath('last_classes.xmd')) auxList = sorted(self.listNumImgs, reverse=True) i=0 else: break
def _loadInputList(self): """ Load the input set of ctfs and create a list. """ particlesSet = self._loadInputParticleSet() lastDate = self._readCreationTimeFile() self.isStreamClosed = particlesSet.getStreamState() self.listOfParticles = [] for p in particlesSet.iterItems(orderBy='creation', where="creation>'%s'" % lastDate): idx = p.getObjId() if not self.htAlreadyProcessed.isItemPresent(idx): newPart = p.clone() newPart.setObjCreation(p.getObjCreation()) self.listOfParticles.append(newPart) particlesSet.close() self.debug("Closed db.") def _loadInputParticleSet(self): partSetFn = self.inputParticles.get().getFileName() updatedSet = SetOfParticles(filename=partSetFn) copyPartSet = SetOfParticles() updatedSet.loadAllProperties() copyPartSet.copy(updatedSet) updatedSet.close() return copyPartSet def _getUniqueFn(self, basename, list): if list == []: fn = basename + "_1.xmd" else: number = int(list[-1].split("_")[-1].split(".")[0]) + 1 fn = basename + "_%s.xmd" % number list.append(fn) return fn def _getInputFn(self): basename = self._getExtraPath('imagesExp') return self._getUniqueFn(basename, self.listInFn) def _getOutputSplitFn(self): nameImages = self._getExtraPath('split_general_images') imagesFn = self._getUniqueFn(nameImages, self.listOutSplitFn) classesFn = imagesFn.replace('images', 'classes') return imagesFn, classesFn def _getOutputClassFn(self): nameImages = self._getExtraPath('class_general_images') imagesFn = self._getUniqueFn(nameImages, self.listOutFn) classesFn = imagesFn.replace('images', 'classes') return imagesFn, classesFn def _readDoneList(self): return [fn for fn in self.listOutFn if fn not in self.doneListFn] def _getFirstJoinStep(self): for s in self._steps: if s.funcName == 'createOutputStep': return s return None def _createFinalClasses(self): # Here the defineOutputs function will call the write() method outputSet = self._createSetOfClasses2D(self.inputParticles) self._fillClasses(outputSet) self._defineOutputs(**{'outputClasses': outputSet}) self._defineSourceRelation(self.inputParticles, outputSet) self._store(outputSet) # Close set databaset to avoid locking it outputSet.close() def _updateOutputSet(self, state=Set.STREAM_OPEN): setFile = self._getPath('averages.sqlite') if exists(setFile): cleanPath(setFile) inputs = self.inputParticles.get() # Here the defineOutputs function will call the write() method outputAvg = self._createSetOfAverages() outputAvg.copyInfo(inputs) self._fillAverages(outputAvg) self._defineOutputs(**{'outputAverages': outputAvg}) self._defineSourceRelation(self.inputParticles, outputAvg) self._store(outputAvg) # Close set databaset to avoid locking it outputAvg.close() def _updateParticle(self, item, row): item.setClassId(row.getValue(md.MDL_REF)) item.setTransform(rowToAlignment(row, ALIGN_2D)) def _updateClass(self, item): classId = item.getObjId() if classId in self._classesInfo: index, fn, _ = self._classesInfo[classId] item.setAlignment2D() rep = item.getRepresentative() rep.setLocation(index, fn) rep.setSamplingRate(self.newSamplingRate) def _loadClassesInfo(self, filename): """ Read some information about the produced 2D classes from the metadata file. """ self._classesInfo = {} # store classes info, indexed by class id mdClasses = md.MetaData(filename) for classNumber, row in enumerate(md.iterRows(mdClasses)): index, fn = xmippToLocation(row.getValue(md.MDL_IMAGE)) self._classesInfo[classNumber + 1] = (index, fn, row.clone()) def _fillClasses(self, clsSet): """ Create the SetOfClasses2D from a given iteration. """ myFileParticles = self._getExtraPath('last_images.xmd') myFileClasses = self._getExtraPath('last_classes.xmd') self._loadClassesInfo(myFileClasses) xmpMd = myFileParticles iterator = md.SetMdIterator(xmpMd, sortByLabel=md.MDL_ITEM_ID, updateItemCallback=self._updateParticle, skipDisabled=True) clsSet.classifyItems(updateItemCallback=iterator.updateItem, updateClassCallback=self._updateClass) def _fillAverages(self, avgSet): """ Create the SetOfAverages from a given metadata """ myFileClasses = "classes@" + self._getExtraPath('last_classes.xmd') repSet = md.MetaData(myFileClasses) for rep in md.iterRows(repSet): particle = rowToParticle(rep) repId = rep.getValue(md.MDL_REF) #rep.getObjId() particle.setObjId(repId) avgSet.append(particle) def _saveFileDataClasses(self, fn, fnSave): line = "" numLine = 0 while not line.startswith('data_class0'): numLine += 1 line = popen('sed -n %ip %s' % (numLine, fn)).read() system('head -%i %s >> %s' % (numLine - 1, fn, fnSave)) def _createFinalMetadata(self): fnClasses = self._getExtraPath('last_classes.xmd') numClasses = md.getSize(fnClasses) for i in range(numClasses): num=i+1 fn = self._getExtraPath('dataClass%06d.xmd' % num) if exists(fn): line = "" numLine = 0 while not line.startswith(' '): numLine += 1 line = popen('sed -n %ip %s' % (numLine, fn)).read() aux = self._getExtraPath('aux.xmd') system('head -%i %s >> %s' % (numLine - 1, fn, aux)) dataHeader = self._getExtraPath('dataHeader.xmd') fp = open(aux, 'r') fout = open(dataHeader, 'w') for i, line in enumerate(fp): if i<2: continue if not line.startswith('data_noname'): fout.write(line) else: line = line.replace('noname', 'class%06d_images'%num) fout.write(line) fp.close() fout.close() cleanPath(aux) fnOut = self._getExtraPath('dataImages%06d.xmd' % num) system('tail -n +%i %s >> %s' % (i+2, fn, aux)) system('cat %s %s >> %s'%(dataHeader, aux, fnOut)) if num==1: system('cat %s >> %s' % (fn, self._getExtraPath('last_images.xmd'))) else: system('cat %s >> %s' %(aux, self._getExtraPath('last_images.xmd'))) cleanPath(aux) cleanPath(dataHeader) system('cat %s >> %s' % (fnOut, self._getExtraPath('last_classes.xmd'))) def _unionDataClass(self, fn, refOld, refNew): fnNew = self._getExtraPath('dataClass%06d.xmd' % refNew) fnOld = self._getExtraPath('dataClass%06d.xmd' % refOld) if refOld != refNew and exists(fnOld): mdOld = md.MetaData(fnOld) mdOld.fillConstant(md.MDL_REF, refNew) mdOld.write(fnOld) if exists(fnOld): fnAux = self._getExtraPath('aux.xmd') mdImgsInClass = md.MetaData('class%06d_images@%s' % (refOld, fn)) if not mdImgsInClass.isEmpty(): if refOld != refNew: mdImgsInClass.fillConstant(md.MDL_REF, refNew) mdImgsInClass.write(fnAux) line = "" numLine = 0 while not line.startswith(' '): numLine += 1 line = popen('sed -n %ip %s' % (numLine, fnAux)).read() fnSave = self._getExtraPath('newDataClass%06d.xmd'%refNew) system('tail -n +%i %s >> %s' % (numLine, fnAux, fnSave)) cleanPath(fnAux) if refOld==refNew: system('cat %s >> %s' % (fnSave, fnNew)) else: if exists(fnNew): cleanPath(fnNew) system('cat %s %s >> %s' %(fnOld, fnSave, fnNew)) cleanPath(fnSave) else: if refOld!=refNew: if exists(fnNew): cleanPath(fnNew) copy(fnOld, fnNew) else: mdImgsInClass = md.MetaData('class%06d_images@%s' % (refOld, fn)) if refOld != refNew: mdImgsInClass.fillConstant(md.MDL_REF, refNew) mdImgsInClass.write(fnNew) def _unionReclassification(self, ref, inputImgs): fn1 = self._getExtraPath('dataClass%06d.xmd' % ref) line = "" numLine = 0 while not line.startswith(' '): numLine += 1 line = popen('sed -n %ip %s' % (numLine, fn1)).read() fnSave = self._getExtraPath('aux.xmd') system('tail -n +%i %s >> %s' % (numLine, fn1, fnSave)) system('cat %s >> %s' % (fnSave, inputImgs)) cleanPath(fnSave) cleanPath(fn1) def _saveCreationTimeFile(self, cTime): fn = open(self._getExtraPath('creation.txt'),'w') fn.write(cTime) fn.close() def _readCreationTimeFile(self): if exists(self._getExtraPath('creation.txt')): fn = open(self._getExtraPath('creation.txt'), 'r') cTime = fn.readline() fn.close() else: cTime = 0 return cTime def _savingCheckPoint(self): if not exists(join(self._getExtraPath(), 'checkpoint')): mkdir(join(self._getExtraPath(), 'checkpoint')) listFolder = listdir(self._getExtraPath()) for fn in listFolder: if fn.startswith('dataClass'): copy(join(self._getExtraPath(),fn), self._getExtraPath(join('checkpoint',fn))) copy(self._getExtraPath('last_classes.xmd'), self._getExtraPath(join('checkpoint', 'last_classes.xmd'))) if exists(self._getExtraPath('last_classes.stk')): copy(self._getExtraPath('last_classes.stk'), self._getExtraPath(join('checkpoint', 'last_classes.stk'))) def _readingCheckPoint(self): if exists(join(self._getExtraPath(), 'checkpoint')): listFolder = listdir(join(self._getExtraPath(), 'checkpoint')) for fn in listFolder: copy(self._getExtraPath(join('checkpoint',fn)), self._getExtraPath(fn)) # --------------------------- INFO functions ------------------------------- def _validate(self): errors = [] newSize = self.imageSize.get() x, y, _ = self.inputParticles.get().getDim() if newSize>x or newSize>y: errors.append('The image size must be smaller than the size of ' 'the input images') return errors def _summary(self): summary = [] if not hasattr(self, 'outputClasses'): summary.append("Output alignment not ready yet.") else: summary.append("Input Particles: %s" % self.inputParticles.get().getSize()) summary.append("Aligned in streaming.") return summary def _citations(self): return ['Sorzano2010a'] def _methods(self): methods = [] if not hasattr(self, 'outputClasses'): methods.append("Output alignment not ready yet.") else: methods.append( "We aligned images %s in streaming using CL2D " "[Sorzano2010a] and GPU correlation methods " % self.getObjectTag('inputParticles')) methods.append(" and produced %s images." % self.getObjectTag('outputClasses')) return methods