# ******************************************************************************
# *
# * Authors: Amaya Jimenez Moreno (ajimenez@cnb.csic.es)
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# ******************************************************************************
from shutil import copy
from os.path import exists, getmtime, splitext
from datetime import datetime
from os import system, popen, mkdir, listdir, remove
from os.path import join
from random import randint
import os
from pyworkflow import VERSION_2_0
import pyworkflow.protocol.params as params
from pyworkflow.utils import prettyTime, cleanPath
from pyworkflow.protocol.constants import STATUS_NEW
import pyworkflow.protocol.constants as const
from pwem.objects import SetOfParticles, Set
from pwem.protocols import ProtAlign2D
from pwem.constants import ALIGN_2D, ALIGN_NONE
import pwem.emlib.metadata as md
from pwem import emlib
from pwem.emlib import Image, MD_APPEND, DT_DOUBLE
from xmipp3.constants import CUDA_ALIGN_SIGNIFICANT
from xmipp3.convert import (writeSetOfParticles, xmippToLocation,
rowToAlignment, rowToParticle)
from xmipp3.base import isXmippCudaPresent
HASH_SIZE = 100
[docs]class HashTableDict:
def __init__(self, Ndict=HASH_SIZE):
self.Ndict = Ndict
self.dict = [{}]*Ndict
[docs] def isItemPresent(self, idx):
return idx in self.dict[idx % self.Ndict]
[docs] def pushItem(self, idx):
idxDict = idx % self.Ndict
if not idx in self.dict[idxDict]:
self.dict[idxDict][idx]=1
[docs]class XmippProtStrGpuCrrCL2D(ProtAlign2D):
""" 2D alignment in full streaming using Xmipp GPU Correlation.
The set of classes will be growing whilst new particle images are
received."""
_label = 'gl2d streaming'
_lastUpdateVersion = VERSION_2_0
_stepsCheckSecs = 10
# --------------------------- DEFINE param functions -----------------------
def _defineAlignParams(self, form):
form.addHidden(params.GPU_LIST, params.StringParam, default='0',
expertLevel=const.LEVEL_ADVANCED,
label="Choose GPU IDs",
help="Add a list of GPU devices that can be used")
form.addParam('maxShift', params.IntParam, default=10,
label='Maximum shift (%):',
help='Maximum shift allowed during the alignment as '
'percentage of the input set size',
expertLevel=const.LEVEL_ADVANCED)
form.addParam('keepBest', params.IntParam, default=1,
label='Number of best images:',
help='Number of classes to assign every input image '
'during the alignment',
expertLevel=const.LEVEL_ADVANCED)
form.addParam('numberOfSplitIterations', params.IntParam, default=2,
label='Number of iterations in split stage:',
help='Maximum number of iterations in split stage',
expertLevel=const.LEVEL_ADVANCED)
form.addParam('numberOfClassifyIterations', params.IntParam, default=2,
label='Number of iterations in classify stage:',
help='Maximum number of iterations when the classification'
' of the whole image set is carried out',
expertLevel=const.LEVEL_ADVANCED)
form.addParam('imageSize', params.IntParam, default=64,
label='Image size',
help='The image size can be downsampled to accelerate '
'the classification',
expertLevel=const.LEVEL_ADVANCED)
form.addParam('threshold', params.IntParam, default=500,
label='Threshold to split',
help='The threshold in the number of images assigned '
'to one class to make a spliting of that class',
expertLevel=const.LEVEL_ADVANCED)
form.addParam('blockSize', params.IntParam, default=5000,
label='Block size',
help='The inputs will be processed in a block-by-block '
'basis of this size',
expertLevel=const.LEVEL_ADVANCED)
form.addParam('numClasses', params.IntParam, default=1000,
label='Maximum number of classes',
help='Maximum number of classes to be generated',
expertLevel=const.LEVEL_ADVANCED)
form.addParam('useCL2D', params.BooleanParam, default=True,
label='Use CL2D',
help='If you set to *Yes*, you will use CL2D (CPU) '
'to make the split process',
expertLevel=const.LEVEL_ADVANCED,)
form.addParallelSection(threads=0, mpi=8)
# --------------------------- INSERT steps functions -----------------------
def _insertAllSteps(self):
"""" Mainly prepare the command line for calling cuda corrrelation
program"""
self._readingCheckPoint()
self.listInFn = []
self.listOutFn = []
self.listOutSplitFn = []
self.doneListFn = []
self.lastDate = 0
self.imgsExp = self._getExtraPath('imagesExp.xmd')
self.listNumImgs = []
self.listNameImgs = []
self.listRefImgs = []
self.particlesToProcess = []
self.randRef = None
self.htAlreadyProcessed = HashTableDict()
xOrig = self.inputParticles.get().getXDim()
self.maximumShift = int(self.maxShift.get() * xOrig / 100)
self._loadInputList()
self.changeSampling = False
xO = self.listOfParticles[0].getXDim()
newSize = self.imageSize.get()
self.newSamplingRate = self.listOfParticles[0].getSamplingRate()
if xO != newSize:
self.changeSampling=True
factor = float(xO) / float(newSize)
self.newSamplingRate = self.newSamplingRate * factor
deps = []
numBlk, rem = divmod(float(len(self.listOfParticles)),
float(self.blockSize.get()))
numBlk = int(numBlk)
rem = int(rem)
if rem > 0:
numBlk += 1
for i in range(numBlk):
if i==0:
deps += self._insertStepsForParticles(True, False)
else:
deps += self._insertStepsForParticles(False, True)
self._insertFunctionStep('createOutputStep', prerequisities=deps,
wait=True)
def _insertStepsForParticles(self, flagSplit, reclassification):
inputImgs = self._getInputFn()
deps = []
stepIdClassify = self._insertFunctionStep\
('classifyStep', inputImgs, flagSplit, reclassification,
prerequisites=[])
deps.append(stepIdClassify)
return deps
# --------------------------- STEPS functions --------------------------
def _stepsCheck(self):
self._checkNewInput()
self._checkNewOutput()
[docs] def createOutputStep(self):
self._createFinalMetadata()
self._createFinalClasses()
def _checkNewInput(self):
""" Check if there are new particles to be processed and add
the necessary steps."""
particlesFile = self.inputParticles.get().getFileName()
now = datetime.now()
self.lastCheck = getattr(self, 'lastCheck', now)
mTime = datetime.fromtimestamp(getmtime(particlesFile))
self.debug('Last check: %s, modification: %s'
% (prettyTime(self.lastCheck),
prettyTime(mTime)))
# If the input have not changed since our last check,
# it does not make sense to check for new input data
if self.lastCheck > mTime and hasattr(self, 'listOfParticles'):
return None
self.lastCheck = now
outputStep = self._getFirstJoinStep()
# Open input and close it as soon as possible
self._loadInputList()
if len(self.listOfParticles)==0:
return None
deps=[]
numBlk, rem = divmod(float(len(self.listOfParticles)),
float(self.blockSize.get()))
numBlk = int(numBlk)
rem = int(rem)
if rem>0:
numBlk+=1
for i in range(numBlk):
deps += self._insertStepsForParticles(False, True)
if outputStep is not None:
outputStep.addPrerequisites(*deps)
self.updateSteps()
def _checkNewOutput(self):
""" Check for already done files and update the output set. """
newDone = self._readDoneList()
# We have finished when there is not more inputs (stream closed)
# and the number of processed particles is equal to the number of inputs
self.finished = (self.isStreamClosed == Set.STREAM_CLOSED
and len(newDone) == 0)
streamMode = Set.STREAM_CLOSED if self.finished else Set.STREAM_OPEN
if newDone:
self._updateOutputSet(streamMode)
self.doneListFn += newDone
elif not self.finished:
# If we are not finished and no new output have been produced
# it does not make sense to proceed and updated the outputs
# so we exit from the function here
return
if self.finished: # Unlock createOutputStep if finished all jobs
outputStep = self._getFirstJoinStep()
if outputStep and outputStep.isWaiting():
outputStep.setStatus(STATUS_NEW)
[docs] def classifyStep(self, expImgMd, flag_split, reclassification):
auxList=[]
for im in range(len(self.listOfParticles)):
idx = self.listOfParticles[im].getObjId()
if not self.htAlreadyProcessed.isItemPresent(idx):
auxList.append(self.listOfParticles[im])
self.htAlreadyProcessed.pushItem(idx)
self.particlesToProcess+=auxList
if len(self.particlesToProcess)==0:
return
if len(self.particlesToProcess)<self.blockSize.get():
particlesToProcessAux = self.particlesToProcess
lastIm = len(self.particlesToProcess)-1
else:
particlesToProcessAux = self.particlesToProcess[:self.blockSize.get()]
lastIm = self.blockSize.get()-1
self.generateInput(expImgMd, flag_split, reclassification,
particlesToProcessAux)
if flag_split:
refImgMd = self._getExtraPath('split_last_classes.xmd')
else:
refImgMd = self._getExtraPath('last_classes.xmd')
i=0
while i <self.numberOfClassifyIterations:
outImgs, refImgMd = self.iterationStep(refImgMd, expImgMd, i, False)
if flag_split==False and i+1 < self.numberOfClassifyIterations:
classesFnPrev = self._getExtraPath('last_classes.xmd')
self.averageClasses(refImgMd, classesFnPrev, refImgMd, True)
i += 1
self.generateOutputClasses(refImgMd, flag_split)
self.checkSplit()
self.lastDate = self.particlesToProcess[lastIm].getObjCreation()
self._saveCreationTimeFile(self.lastDate)
for p in range(len(particlesToProcessAux)):
self.particlesToProcess.pop(0)
self._savingCheckPoint()
# --------------------------- UTILS functions ------------------------------
[docs] def splitStep(self, expImgMd):
i=0
classesOut=None
while i <self.numberOfSplitIterations:
outImgs,classesOut = self.iterationStep(classesOut,expImgMd,i,True)
i+=1
length = md.getSize(classesOut)
if length == 1:
i = 0
self.generateMdForClassification(classesOut)
[docs] def generateMdForClassification(self, classesOut):
listNameImgs = self.listNameImgs
listNumImgs = self.listNumImgs
count = 1
# Construct the new classes with the renumerated old classes
mdNewClasses = md.MetaData()
for i in range(len(listNumImgs)):
if listNumImgs[i] != -1:
name = listNameImgs[i]
fn = name[name.find('@') + 1:-4] + '.xmd'
numRef = int(name[0:6])
mdClass = md.MetaData("classes@" + fn)
for row in md.iterRows(mdClass):
if mdClass.getValue(md.MDL_REF, row.getObjId()) == numRef:
row.setValue(md.MDL_REF, count)
row.addToMd(mdNewClasses)
count += 1
# Add the two new classes to the list of renumerated classes
mdClass = md.MetaData("classes@" + classesOut)
rows = md.iterRows(mdClass)
for row in rows:
row.setValue(md.MDL_REF, count)
row.addToMd(mdNewClasses)
count += 1
mdNewClasses.write('classes@'
+ self._getExtraPath('split_last_classes.xmd'),
MD_APPEND)
# Generate the intermediate images and the blocks of the intermediate
# classes for the unchanged classes
count=1
for i in range(len(listNumImgs)):
if listNumImgs[i] != -1:
# Read the list of images in this class
mdImgsInClass = md.MetaData(self._getExtraPath(
'dataClass%06d.xmd' % (i+1)))
mdImgsInClass.fillConstant(md.MDL_REF, count)
mdImgsInClass.write(self._getExtraPath('dataClass%06d.xmd' %
count))
count += 1
# Add the two new classes
for newRef in range(0, 2):
mdImgsInClass = md.MetaData(
'class%06d_images@%s' % (newRef + 1, classesOut))
mdImgsInClass.fillConstant(md.MDL_REF, count)
mdImgsInClass.write(self._getExtraPath('dataClass%06d.xmd' % count))
count+=1
[docs] def averageClasses(self, finalMetadata, lastMetadata, newMetadata,
flag_iter):
metadataItemLast = md.MetaData(lastMetadata)
metadataItemNew = md.MetaData(newMetadata)
if flag_iter:
metadataFinal = md.MetaData(finalMetadata)
finalName = metadataFinal.getValue(md.MDL_IMAGE, 1)
finalName = finalName[7:-3]+'stk'
else:
finalName = finalMetadata
total=[]
newRef = 1
i = 1
for itemLast, itemNew in zip(metadataItemLast, metadataItemNew):
listToMultiply = []
numImgsLastClasses = metadataItemLast.getValue(
md.MDL_CLASS_COUNT, itemLast)
nameRefLastClasses = metadataItemLast.getValue(md.MDL_IMAGE,
itemLast)
nameRefNewClasses = metadataItemNew.getValue(md.MDL_IMAGE,
itemNew)
numImgsNewClasses = metadataItemNew.getValue(md.MDL_CLASS_COUNT,
itemNew)
if flag_iter==False and i==self.randRef:
total.append(numImgsNewClasses)
else:
total.append(numImgsLastClasses + numImgsNewClasses)
i += 1
if (numImgsLastClasses + numImgsNewClasses)==0:
continue
if numImgsNewClasses==0:
im1 = Image(nameRefLastClasses)
im1.write('%06d@' % newRef + finalName)
newRef += 1
continue
if numImgsLastClasses==0:
im2 = Image(nameRefNewClasses)
im2.write('%06d@' % newRef + finalName)
newRef += 1
continue
listToMultiply.append(float(numImgsLastClasses)/
float(numImgsLastClasses + numImgsNewClasses))
listToMultiply.append(float(numImgsNewClasses)/
float(numImgsLastClasses + numImgsNewClasses))
im1 = Image(nameRefLastClasses)
im2 = Image(nameRefNewClasses)
im2 = emlib.image_align(im1, im2)
im1.inplaceMultiply(listToMultiply[0])
im2.inplaceMultiply(listToMultiply[1])
im1.convert2DataType(DT_DOUBLE)
im2.convert2DataType(DT_DOUBLE)
im1.inplaceAdd(im2) # Aligned
im1.write('%06d@' % newRef + finalName)
newRef+=1
return total
[docs] def generateOutputClasses(self, classesOut, firstTime):
if firstTime:
self._saveFileDataClasses(classesOut, self._getExtraPath(
'last_classes.xmd'))
# Add the two new classes
for i in range(1, 3):
mdImgsInClass = md.MetaData(
'class%06d_images@%s' % (i,classesOut))
mdImgsInClass.write(self._getExtraPath('dataClass%06d.xmd' % i))
return
finalMetadata = self._getExtraPath('aux_classes.stk')
lastMetadata = self._getExtraPath('last_classes.xmd')
newMetadata = classesOut
total = self.averageClasses(finalMetadata, lastMetadata, newMetadata,
False)
copy(self._getExtraPath('aux_classes.stk'),
self._getExtraPath('last_classes.stk'))
mdAll = md.MetaData()
newRef=1
for i in total:
if i != 0:
row = md.Row()
row.setValue(md.MDL_REF, newRef)
row.setValue(md.MDL_IMAGE, '%06d@'%newRef + finalMetadata)
row.setValue(md.MDL_CLASS_COUNT, i)
row.addToMd(mdAll)
newRef+=1
mdAll.write('classes@'+finalMetadata[:-3] + 'xmd', MD_APPEND)
copy(self._getExtraPath('aux_classes.xmd'),
self._getExtraPath('last_classes.xmd'))
newRef=1
for i, val in enumerate(total):
if val != 0:
self._unionDataClass(classesOut, i+1, newRef)
newRef+=1
[docs] def iterationStep (self, refSet, imgsExp, iter, flag_split):
if flag_split:
outImgs, classesOut = self._getOutputSplitFn()
else:
outImgs, classesOut = self._getOutputClassFn()
outDirName = splitext(imgsExp)[0]
if iter==0 and flag_split==True:
# First step: divide the metadata input file to generate
# a couple of references
self._params = {'imgsExp': imgsExp,
'outDir': outDirName}
args = ('-i %(imgsExp)s -n 2 --oroot %(outDir)s')
self.runJob("xmipp_metadata_split", args % self._params,
numberOfMpi=1)
# Second step: calculate the means of the previous metadata
expSet1 = outDirName + '000001.xmd'
avg1 = outDirName + '_000001'
expSet2 = outDirName + '000002.xmd'
avg2 = outDirName + '_000002'
self._params = {'imgsSet': expSet1,
'outputAvg': avg1}
args = ('-i %(imgsSet)s --save_image_stats %(outputAvg)s -v 0')
self.runJob("xmipp_image_statistics", args % self._params,
numberOfMpi=1)
self._params = {'imgsSet': expSet2,
'outputAvg': avg2}
args = ('-i %(imgsSet)s --save_image_stats %(outputAvg)s -v 0')
self.runJob("xmipp_image_statistics", args % self._params,
numberOfMpi=1)
# Third step: generate a single metadata with the two previous avgs
refSet = self._getExtraPath('refSet.xmd')
self._params = {'avg1': avg1 + 'average.xmp',
'avg2': avg2 + 'average.xmp',
'outputMd': refSet}
args = ('-i %(avg1)s --set union %(avg2)s -o %(outputMd)s')
self.runJob("xmipp_metadata_utilities", args % self._params,
numberOfMpi=1)
# Fourth step: calling program xmipp_cuda_align_significant
metadataRef = md.MetaData(refSet)
if metadataRef.containsLabel(md.MDL_REF) is False:
args = ('-i %(outputMd)s --fill ref lineal 1 1 -o %(outputMd)s')
self.runJob("xmipp_metadata_utilities", args % self._params,
numberOfMpi=1)
count = 0
GpuListCuda = ''
if self.useQueueForSteps() or self.useQueue():
GpuList = os.environ["CUDA_VISIBLE_DEVICES"]
GpuList = GpuList.split(",")
for elem in GpuList:
GpuListCuda = GpuListCuda + str(count) + ' '
count += 1
else:
GpuListAux = ''
for elem in self.getGpuList():
GpuListCuda = GpuListCuda + str(count) + ' '
GpuListAux = GpuListAux + str(elem) + ','
count += 1
os.environ["CUDA_VISIBLE_DEVICES"] = GpuListAux
if flag_split:
fileTocopy = classesOut.replace('.xmd', '_classes.xmd')
fileTocopy = fileTocopy.replace('extra/', 'extra/level_00/')
self._params = {'imgsRef': refSet,
'imgsExp': imgsExp,
'maxshift': self.maximumShift,
'Nrefs': md.getSize(refSet),
'outDir': self._getExtraPath(),
'outImgCuda': self._getExtraPath("images.xmd"),
'rootFn': classesOut.split('/')[-1].replace('.xmd', ''),
'keepBest': self.keepBest.get(),
'outClassesCuda': fileTocopy,
}
args = '-i %(imgsExp)s --ref0 %(imgsRef)s --nref %(Nrefs)d ' \
'--iter 1 --distance correlation --classicalMultiref ' \
'--maxShift %(maxshift)d --odir %(outDir)s --oroot %(' \
'rootFn)s --dontMirrorImages '
self.runJob("xmipp_classify_CL2D",
args % self._params, numberOfMpi=self.numberOfMpi.get())
copy(fileTocopy, classesOut)
copy(self._getExtraPath("images.xmd"), outImgs)
else:
self._params = {'imgsRef': refSet,
'imgsExp': imgsExp,
'outputFile': outImgs,
'keepBest': self.keepBest.get(),
'maxshift': self.maximumShift,
'outputClassesFile': classesOut,
'device': GpuListCuda,
'outputClassesFileNoExt': splitext(classesOut)[0],
'auxOut': self._getExtraPath('flipReferences%06d.xmd'%iter),
}
args = '-i %(imgsExp)s -r %(imgsRef)s -o %(outputFile)s ' \
'--keepBestN 1 --oUpdatedRefs %(outputClassesFileNoExt)s --dev %(device)s '
self.runJob(CUDA_ALIGN_SIGNIFICANT, args % self._params,
numberOfMpi=1)
if exists(outDirName + '000001.xmd'):
cleanPath(expSet1)
cleanPath(expSet2)
cleanPath(avg1 + 'average.xmp')
cleanPath(avg2 + 'average.xmp')
cleanPath(avg1 + 'stddev.xmp')
cleanPath(avg2 + 'stddev.xmp')
return outImgs, classesOut
[docs] def checkSplit(self):
outSet = self._getExtraPath('last_classes.xmd')
self.listNameImgs = []
self.listNumImgs = []
self.listRefImgs = []
metadataItem = md.MetaData(outSet)
for item in metadataItem:
nameImg = metadataItem.getValue(md.MDL_IMAGE, item)
self.listNameImgs.append(nameImg)
numImgs = metadataItem.getValue(md.MDL_CLASS_COUNT, item)
self.listNumImgs.append(numImgs)
refImg = metadataItem.getValue(md.MDL_REF, item)
self.listRefImgs.append(refImg)
i=0
auxList = sorted(self.listNumImgs, reverse=True)
while i<len(self.listNumImgs):
if len(self.listNumImgs) < self.numClasses.get(): #inside the while
# just in case we lose some class we want to allow one more
# split
if auxList[i] < (1.75 * self.threshold.get()):
i+=1
continue
maxValue = auxList[i]
maxPos = self.listNumImgs.index(maxValue)
self.listNumImgs[maxPos] = -1
bestRef = self.listRefImgs[maxPos]
outputMd = self._getExtraPath('dataClass%06d.xmd' % bestRef)
self.splitStep(outputMd)
i=0
outSet = self._getExtraPath('split_last_classes.xmd')
self.listNameImgs = []
self.listNumImgs = []
self.listRefImgs = []
metadataItem = md.MetaData(outSet)
for item in metadataItem:
nameImg = metadataItem.getValue(md.MDL_IMAGE, item)
self.listNameImgs.append(nameImg)
numImgs = metadataItem.getValue(md.MDL_CLASS_COUNT, item)
self.listNumImgs.append(numImgs)
refImg = metadataItem.getValue(md.MDL_REF, item)
self.listRefImgs.append(refImg)
copy(outSet, self._getExtraPath('last_classes.xmd'))
auxList = sorted(self.listNumImgs, reverse=True)
i=0
else:
break
def _loadInputList(self):
""" Load the input set of ctfs and create a list. """
particlesSet = self._loadInputParticleSet()
lastDate = self._readCreationTimeFile()
self.isStreamClosed = particlesSet.getStreamState()
self.listOfParticles = []
for p in particlesSet.iterItems(orderBy='creation',
where="creation>'%s'"
% lastDate):
idx = p.getObjId()
if not self.htAlreadyProcessed.isItemPresent(idx):
newPart = p.clone()
newPart.setObjCreation(p.getObjCreation())
self.listOfParticles.append(newPart)
particlesSet.close()
self.debug("Closed db.")
def _loadInputParticleSet(self):
partSetFn = self.inputParticles.get().getFileName()
updatedSet = SetOfParticles(filename=partSetFn)
copyPartSet = SetOfParticles()
updatedSet.loadAllProperties()
copyPartSet.copy(updatedSet)
updatedSet.close()
return copyPartSet
def _getUniqueFn(self, basename, list):
if list == []:
fn = basename + "_1.xmd"
else:
number = int(list[-1].split("_")[-1].split(".")[0]) + 1
fn = basename + "_%s.xmd" % number
list.append(fn)
return fn
def _getInputFn(self):
basename = self._getExtraPath('imagesExp')
return self._getUniqueFn(basename, self.listInFn)
def _getOutputSplitFn(self):
nameImages = self._getExtraPath('split_general_images')
imagesFn = self._getUniqueFn(nameImages, self.listOutSplitFn)
classesFn = imagesFn.replace('images', 'classes')
return imagesFn, classesFn
def _getOutputClassFn(self):
nameImages = self._getExtraPath('class_general_images')
imagesFn = self._getUniqueFn(nameImages, self.listOutFn)
classesFn = imagesFn.replace('images', 'classes')
return imagesFn, classesFn
def _readDoneList(self):
return [fn for fn in self.listOutFn if fn not in self.doneListFn]
def _getFirstJoinStep(self):
for s in self._steps:
if s.funcName == 'createOutputStep':
return s
return None
def _createFinalClasses(self):
# Here the defineOutputs function will call the write() method
outputSet = self._createSetOfClasses2D(self.inputParticles)
self._fillClasses(outputSet)
self._defineOutputs(**{'outputClasses': outputSet})
self._defineSourceRelation(self.inputParticles, outputSet)
self._store(outputSet)
# Close set databaset to avoid locking it
outputSet.close()
def _updateOutputSet(self, state=Set.STREAM_OPEN):
setFile = self._getPath('averages.sqlite')
if exists(setFile):
cleanPath(setFile)
inputs = self.inputParticles.get()
# Here the defineOutputs function will call the write() method
outputAvg = self._createSetOfAverages()
outputAvg.copyInfo(inputs)
self._fillAverages(outputAvg)
self._defineOutputs(**{'outputAverages': outputAvg})
self._defineSourceRelation(self.inputParticles, outputAvg)
self._store(outputAvg)
# Close set databaset to avoid locking it
outputAvg.close()
def _updateParticle(self, item, row):
item.setClassId(row.getValue(md.MDL_REF))
item.setTransform(rowToAlignment(row, ALIGN_2D))
def _updateClass(self, item):
classId = item.getObjId()
if classId in self._classesInfo:
index, fn, _ = self._classesInfo[classId]
item.setAlignment2D()
rep = item.getRepresentative()
rep.setLocation(index, fn)
rep.setSamplingRate(self.newSamplingRate)
def _loadClassesInfo(self, filename):
""" Read some information about the produced 2D classes
from the metadata file.
"""
self._classesInfo = {} # store classes info, indexed by class id
mdClasses = md.MetaData(filename)
for classNumber, row in enumerate(md.iterRows(mdClasses)):
index, fn = xmippToLocation(row.getValue(md.MDL_IMAGE))
self._classesInfo[classNumber + 1] = (index, fn, row.clone())
def _fillClasses(self, clsSet):
""" Create the SetOfClasses2D from a given iteration. """
myFileParticles = self._getExtraPath('last_images.xmd')
myFileClasses = self._getExtraPath('last_classes.xmd')
self._loadClassesInfo(myFileClasses)
xmpMd = myFileParticles
iterator = md.SetMdIterator(xmpMd, sortByLabel=md.MDL_ITEM_ID,
updateItemCallback=self._updateParticle,
skipDisabled=True)
clsSet.classifyItems(updateItemCallback=iterator.updateItem,
updateClassCallback=self._updateClass)
def _fillAverages(self, avgSet):
""" Create the SetOfAverages from a given metadata """
myFileClasses = "classes@" + self._getExtraPath('last_classes.xmd')
repSet = md.MetaData(myFileClasses)
for rep in md.iterRows(repSet):
particle = rowToParticle(rep)
repId = rep.getValue(md.MDL_REF) #rep.getObjId()
particle.setObjId(repId)
avgSet.append(particle)
def _saveFileDataClasses(self, fn, fnSave):
line = ""
numLine = 0
while not line.startswith('data_class0'):
numLine += 1
line = popen('sed -n %ip %s' % (numLine, fn)).read()
system('head -%i %s >> %s' % (numLine - 1, fn, fnSave))
def _createFinalMetadata(self):
fnClasses = self._getExtraPath('last_classes.xmd')
numClasses = md.getSize(fnClasses)
for i in range(numClasses):
num=i+1
fn = self._getExtraPath('dataClass%06d.xmd' % num)
if exists(fn):
line = ""
numLine = 0
while not line.startswith(' '):
numLine += 1
line = popen('sed -n %ip %s' % (numLine, fn)).read()
aux = self._getExtraPath('aux.xmd')
system('head -%i %s >> %s' % (numLine - 1, fn, aux))
dataHeader = self._getExtraPath('dataHeader.xmd')
fp = open(aux, 'r')
fout = open(dataHeader, 'w')
for i, line in enumerate(fp):
if i<2:
continue
if not line.startswith('data_noname'):
fout.write(line)
else:
line = line.replace('noname', 'class%06d_images'%num)
fout.write(line)
fp.close()
fout.close()
cleanPath(aux)
fnOut = self._getExtraPath('dataImages%06d.xmd' % num)
system('tail -n +%i %s >> %s' % (i+2, fn, aux))
system('cat %s %s >> %s'%(dataHeader, aux, fnOut))
if num==1:
system('cat %s >> %s' % (fn,
self._getExtraPath('last_images.xmd')))
else:
system('cat %s >> %s' %(aux,
self._getExtraPath('last_images.xmd')))
cleanPath(aux)
cleanPath(dataHeader)
system('cat %s >> %s' % (fnOut,
self._getExtraPath('last_classes.xmd')))
def _unionDataClass(self, fn, refOld, refNew):
fnNew = self._getExtraPath('dataClass%06d.xmd' % refNew)
fnOld = self._getExtraPath('dataClass%06d.xmd' % refOld)
if refOld != refNew and exists(fnOld):
mdOld = md.MetaData(fnOld)
mdOld.fillConstant(md.MDL_REF, refNew)
mdOld.write(fnOld)
if exists(fnOld):
fnAux = self._getExtraPath('aux.xmd')
mdImgsInClass = md.MetaData('class%06d_images@%s' % (refOld, fn))
if not mdImgsInClass.isEmpty():
if refOld != refNew:
mdImgsInClass.fillConstant(md.MDL_REF, refNew)
mdImgsInClass.write(fnAux)
line = ""
numLine = 0
while not line.startswith(' '):
numLine += 1
line = popen('sed -n %ip %s' % (numLine, fnAux)).read()
fnSave = self._getExtraPath('newDataClass%06d.xmd'%refNew)
system('tail -n +%i %s >> %s' % (numLine, fnAux, fnSave))
cleanPath(fnAux)
if refOld==refNew:
system('cat %s >> %s' % (fnSave, fnNew))
else:
if exists(fnNew):
cleanPath(fnNew)
system('cat %s %s >> %s' %(fnOld, fnSave, fnNew))
cleanPath(fnSave)
else:
if refOld!=refNew:
if exists(fnNew):
cleanPath(fnNew)
copy(fnOld, fnNew)
else:
mdImgsInClass = md.MetaData('class%06d_images@%s' % (refOld, fn))
if refOld != refNew:
mdImgsInClass.fillConstant(md.MDL_REF, refNew)
mdImgsInClass.write(fnNew)
def _unionReclassification(self, ref, inputImgs):
fn1 = self._getExtraPath('dataClass%06d.xmd' % ref)
line = ""
numLine = 0
while not line.startswith(' '):
numLine += 1
line = popen('sed -n %ip %s' % (numLine, fn1)).read()
fnSave = self._getExtraPath('aux.xmd')
system('tail -n +%i %s >> %s' % (numLine, fn1, fnSave))
system('cat %s >> %s' % (fnSave, inputImgs))
cleanPath(fnSave)
cleanPath(fn1)
def _saveCreationTimeFile(self, cTime):
fn = open(self._getExtraPath('creation.txt'),'w')
fn.write(cTime)
fn.close()
def _readCreationTimeFile(self):
if exists(self._getExtraPath('creation.txt')):
fn = open(self._getExtraPath('creation.txt'), 'r')
cTime = fn.readline()
fn.close()
else:
cTime = 0
return cTime
def _savingCheckPoint(self):
if not exists(join(self._getExtraPath(), 'checkpoint')):
mkdir(join(self._getExtraPath(), 'checkpoint'))
listFolder = listdir(self._getExtraPath())
for fn in listFolder:
if fn.startswith('dataClass'):
copy(join(self._getExtraPath(),fn),
self._getExtraPath(join('checkpoint',fn)))
copy(self._getExtraPath('last_classes.xmd'),
self._getExtraPath(join('checkpoint', 'last_classes.xmd')))
if exists(self._getExtraPath('last_classes.stk')):
copy(self._getExtraPath('last_classes.stk'),
self._getExtraPath(join('checkpoint', 'last_classes.stk')))
def _readingCheckPoint(self):
if exists(join(self._getExtraPath(), 'checkpoint')):
listFolder = listdir(join(self._getExtraPath(), 'checkpoint'))
for fn in listFolder:
copy(self._getExtraPath(join('checkpoint',fn)),
self._getExtraPath(fn))
# --------------------------- INFO functions -------------------------------
def _validate(self):
errors = []
newSize = self.imageSize.get()
x, y, _ = self.inputParticles.get().getDim()
if newSize>x or newSize>y:
errors.append('The image size must be smaller than the size of '
'the input images')
return errors
def _summary(self):
summary = []
if not hasattr(self, 'outputClasses'):
summary.append("Output alignment not ready yet.")
else:
summary.append("Input Particles: %s"
% self.inputParticles.get().getSize())
summary.append("Aligned in streaming.")
return summary
def _citations(self):
return ['Sorzano2010a']
def _methods(self):
methods = []
if not hasattr(self, 'outputClasses'):
methods.append("Output alignment not ready yet.")
else:
methods.append(
"We aligned images %s in streaming using CL2D "
"[Sorzano2010a] and GPU correlation methods " %
self.getObjectTag('inputParticles'))
methods.append(" and produced %s images."
% self.getObjectTag('outputClasses'))
return methods