Source code for xmipp3.protocols.protocol_eliminate_empty_images

# *****************************************************************************
# *
# * Authors:     Tomas Majtner         tmajtner@cnb.csic.es (2017)
# *              David Maluenda        dmaluenda@cnb.csic.es
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# *****************************************************************************

import os, sys
from datetime import datetime

import pwem.emlib.metadata as md
import pyworkflow.protocol.constants as cons
import pyworkflow.protocol.params as param
from pyworkflow import VERSION_2_0
from pyworkflow.object import Set
from pyworkflow.utils import cleanPath
from pyworkflow.utils.properties import Message

from pwem import ALIGN_NONE
from pwem.protocols import ProtClassify2D
from pwem.objects import SetOfParticles, SetOfAverages, SetOfClasses2D, Class2D, SetOfClasses, SetOfImages

from xmipp3.convert import (writeSetOfParticles, readSetOfParticles,
                            setXmippAttributes)


[docs]class XmippProtEliminateEmptyBase(ProtClassify2D): """ Base to eliminate images using statistical methods (variance of variances of sub-parts of input image) eliminates those samples, where there is no object/particle (only noise is presented there). Threshold parameter can be used for fine-tuning the algorithm for type of data. """ _lastUpdateVersion = VERSION_2_0 def __init__(self, **args): ProtClassify2D.__init__(self, **args) self.stepsExecutionMode = cons.STEPS_PARALLEL
[docs] def addAdvancedParams(self, form): form.addParam('addFeatures', param.BooleanParam, default=False, label='Add features', expertLevel=param.LEVEL_ADVANCED, help='Add features used for the ranking to each ' 'one of the input particles.') form.addParam('useDenoising', param.BooleanParam, default=True, label='Turning on denoising', expertLevel=param.LEVEL_ADVANCED, help='Option for turning on denoising method ' 'while computing emptiness feature.') form.addParam('denoising', param.IntParam, default=5, expertLevel=param.LEVEL_ADVANCED, condition='useDenoising', label='Denoising factor:', help='Factor to be used during Gaussian blurring. ' 'Higher value applies stronger denoising, ' 'could be more precise but also slower.')
# --------------------------- INSERT steps functions ---------------------- def _insertAllSteps(self): self.lenPartsSet = 0 # inputSize after checkNewInput self.outputSize = 0 # outputSize after checkNewOutput self.check = None # last creationTime of the processed images self.stepCount = 0 # to label the input.xmd self.fnInputMd = self._getExtraPath("input%d.xmd") # to feed the bin self.fnOutMdTmp = self._getExtraPath("outTemp.xmd") # partial out self.fnElimMdTmp = self._getExtraPath("elimTemp.xmd") # partial out2 self.fnOutputMd = self._getExtraPath("output.xmd") # final out self.fnElimMd = self._getExtraPath("eliminated.xmd") # final out2 checkStep = self._insertNewPartsSteps() self._insertFunctionStep('createOutputStep', prerequisites=checkStep, wait=True) def _insertNewPartsSteps(self): deps = [] self.stepCount += 1 stepId = self._insertFunctionStep('eliminationStep', self.stepCount, prerequisites=[]) deps.append(stepId) return deps
[docs] def eliminationStep(self, stepId): """ Common code for particles and classes/averages """ fnInputMd = self.fnInputMd % stepId partsSet = self.prepareImages() if self.check == None: # if no previous, get all writeSetOfParticles(partsSet, fnInputMd, alignType=ALIGN_NONE, orderBy='creation') else: # if previous, take the last ones writeSetOfParticles(partsSet, fnInputMd, alignType=ALIGN_NONE, orderBy='creation', where='creation>"' + str(self.check) + '"') # special use of partSet before closing it self.specialBehavoir(partsSet) args = "-i %s -o %s -e %s -t %f" % (fnInputMd, self.fnOutputMd, self.fnElimMd, self.threshold.get()) if self.addFeatures: args += " --addFeatures" if self.useDenoising: args += " --useDenoising -d %f" % self.denoising.get() self.runJob("xmipp_image_eliminate_empty_particles", args)
[docs] def specialBehavoir(self, inSet): """ To be implemented by child. Must set self.check and inSet.close() """ pass
def _getFirstJoinStep(self): for s in self._steps: if s.funcName == self._getFirstJoinStepName(): return s return None def _getFirstJoinStepName(self): # This function will be used for streaming, to check which is # the first function that need to wait for all particles # to have completed, this can be overriden in subclasses # (e.g., in Xmipp 'sortPSDStep') return 'createOutputStep'
[docs] def createOutputStep(self): pass
def _stepsCheck(self): # Input particles set can be loaded or None when checked for new inputs # If None, we load it self._checkNewInput() self._checkNewOutput() def _checkNewInput(self): # Check if there are new particles to process from the input set partsFile = self.getInput().getFileName() self.lastmTime = getattr(self, 'lastmTime', None) mTime = datetime.fromtimestamp(os.path.getmtime(partsFile)) # If the input movies.sqlite have not changed since our last check, # it does not make sense to check for new input data if self.lastmTime == mTime: return self.lastmTime = mTime outputStep = self._getFirstJoinStep() self.prepareImages() fDeps = self._insertNewPartsSteps() if outputStep is not None: outputStep.addPrerequisites(*fDeps) self.updateSteps()
[docs] def prepareImages(self): """ Must set: - self.inputImages: Images to process in a SetOfImages. - self.streamClosed: Streaming state of the input. - self.lenPartsSet: Size of the input set. """ pass
def _checkNewOutput(self): if getattr(self, 'finished', False): return self.finished = self.inputImages.isStreamClosed() and self.outputSize == self.lenPartsSet self.createOutputs() if self.finished: # Unlock createOutputStep if finished all jobs cleanPath(self._getPath('particlesAUX.sqlite')) cleanPath(self._getPath('averagesAUX.sqlite')) outputStep = self._getFirstJoinStep() if outputStep and outputStep.isWaiting(): outputStep.setStatus(cons.STATUS_NEW)
[docs] def createOutputs(self): """ To be implemented by child. (create, fill and close the outputSet) """ pass
def _loadOutputSet(self, SetClass, baseName): setFile = self._getPath(baseName) if os.path.exists(setFile): outputSet = SetClass(filename=setFile) outputSet.loadAllProperties() outputSet.enableAppend() else: outputSet = SetClass(filename=setFile) outputSet.setStreamState(outputSet.STREAM_OPEN) inputs = self.inputImages outputSet.copyInfo(inputs) return outputSet def _updateOutputSet(self, outputName, outputSet, state=Set.STREAM_OPEN): outputSet.setStreamState(state) if self.hasAttribute(outputName): outputSet.write() # Write to commit changes outputAttr = getattr(self, outputName) # Copy the properties to the object contained in the protocol outputAttr.copy(outputSet, copyId=False) # Persist changes self._store(outputAttr) else: self._defineOutputs(**{outputName: outputSet}) self._defineTransformRelation(self.getInput(), outputSet) self._store(outputSet) # Close set databaset to avoid locking it outputSet.close() # --------------------------- UTILS functions ----------------------------- def _updateParticle(self, item, row): setXmippAttributes(item, row, md.MDL_SCORE_BY_EMPTINESS) if row.getValue(md.MDL_ENABLED) <= 0: item._appendItem = False else: item._appendItem = True
[docs] def getInput(self): """ Get the input as it is in the form """ pass
[docs]class XmippProtEliminateEmptyParticles(XmippProtEliminateEmptyBase): """ Takes a set of particles and using statistical methods (variance of variances of sub-parts of input image) eliminates those samples, where there is no object/particle (only noise is presented there). Threshold parameter can be used for fine-tuning the algorithm for type of data. """ _label = 'eliminate empty particles' def __init__(self, **args): XmippProtEliminateEmptyBase.__init__(self, **args) # --------------------------- DEFINE param functions ---------------------- def _defineParams(self, form): form.addSection(label=Message.LABEL_INPUT) # - - - F O R P A R T I C L E S - - - form.addParam('inputParticles', param.PointerParam, important=True, label="Input particles", pointerClass='SetOfParticles', help='Select the input particles to be classified.') form.addParam('threshold', param.FloatParam, default=1.1, label='Threshold used in elimination:', help='Higher threshold => more particles will be ' 'eliminated. Set to -1 for no elimination, even so ' 'the "xmipp_scoreEmptiness" value will be attached to ' 'every paricle for a posterior inspection.') self.addAdvancedParams(form) # --------------------------- INSERT steps functions ----------------------
[docs] def specialBehavoir(self, partsSet): """ Just setting the self.check """ for p in partsSet.iterItems(orderBy='creation', direction='DESC'): self.check = p.getObjCreation() break partsSet.close()
[docs] def createOutputs(self): streamMode = (Set.STREAM_CLOSED if getattr(self, 'finished', False) else Set.STREAM_OPEN) def updateOutputs(mdFn, suffix): """ Common use for accepted and discarded output. """ newData = os.path.exists(mdFn) # new data if partial out exists lastToClose = (getattr(self, 'finished', False) and # last if fisished hasattr(self, '%sParticles' % suffix)) # and exists if newData or lastToClose: outSet = self._loadOutputSet(SetOfParticles, '%sParticles.sqlite' % suffix) if newData: partsSet = self._createSetOfParticles("AUX") readSetOfParticles(mdFn, partsSet) outSet.copyItems(partsSet, updateItemCallback=self._updateParticle, itemDataIterator=md.iterRows(mdFn, sortByLabel=md.MDL_ITEM_ID)) self.outputSize = self.outputSize + len(partsSet) self._updateOutputSet('%sParticles'%suffix, outSet, streamMode) cleanPath(mdFn) updateOutputs(self.fnOutMdTmp, 'output') updateOutputs(self.fnElimMdTmp, 'eliminated')
[docs] def getInput(self): return self.inputParticles.get()
[docs] def prepareImages(self): self.inputImages = self.getInput() partsSet = self.inputImages partsSet.loadAllProperties() self.streamClosed = partsSet.isStreamClosed() self.lenPartsSet = len(partsSet) return partsSet
DISCARDED = 0 ACCEPTED = 1 ATTACHED = 2
[docs]class XmippProtEliminateEmptyClasses(XmippProtEliminateEmptyBase): """ Takes a set of classes (or averages) and using statistical methods (variances of sub-parts of input image) eliminates those samples, where there is no object/particle (only noise is presented there). Threshold parameter can be used for fine-tuning the algorithm for type of data. Also discards classes with less population than a given percentage. """ _label = 'eliminate empty classes' def __init__(self, **args): XmippProtEliminateEmptyBase.__init__(self, **args) self.enableCls = {} # --------------------------- DEFINE param functions ---------------------- def _defineParams(self, form): form.addSection(label=Message.LABEL_INPUT) # - - - F O R C L A S S E S - - - form.addParam('inputClasses', param.PointerParam, important=True, pointerClass='SetOfClasses, SetOfAverages', label="Input classes", help='Select the input averages to be classified.') form.addParam('threshold', param.FloatParam, default=8.0, label='Threshold used in elimination', help='Higher threshold => more particles will be ' 'eliminated. Set to -1 for no elimination, even so ' 'the "xmipp_scoreEmptiness" value will be attached to ' 'every paricle for a posterior inspection.') form.addParam('usePopulation', param.BooleanParam, default=True, label='Use class population', help="Consider class population to reject a class.") form.addParam('minPopulation', param.FloatParam, default=20, label='Min. population (%)', condition="usePopulation", expertLevel=param.LEVEL_ADVANCED, help="Minimum population to accept a class.\n" "Classes with less population than the mean population " "times this value will be rejected.") self.addAdvancedParams(form) # --------------------------- INSERT steps functions ---------------------- def _validate(self): errors = [] if (not isinstance(self.getInput(), SetOfClasses) and self.usePopulation.get()): errors.append("Using population to reject classes is not possible " "with Averages as input.\nPlease, introduce a " "setOfClasses to analyse or disable the _use class " "population_ option.") return errors
[docs] def specialBehavoir(self, partSet): idsToCheck = [] for p in partSet.iterItems(orderBy='creation', direction='ASC'): self.check = p.getObjCreation() idsToCheck.append(p.getObjId()) partSet.close() self.rejectByPopulation(idsToCheck)
[docs] def createOutputs(self): streamMode = Set.STREAM_CLOSED if getattr(self, 'finished', False) \ else (Set.STREAM_CLOSED if self.streamClosed else Set.STREAM_OPEN) def updateOutputs(mdFn, suffix): lastToClose = getattr(self, 'finished', False) and \ hasattr(self, '%sClasses' % suffix) newData = os.path.exists(mdFn) enableOut = {} if newData or lastToClose: outSet = self._loadOutputSet(SetOfAverages, '%sAverages.sqlite' % suffix) if newData: # if new data, we read it partsSet = self._createSetOfParticles("AUX") readSetOfParticles(mdFn, partsSet) # updating the enableCls dictionary # print(" - %s Averages:" % ("ACCEPTED" if suffix == 'output' else "DISCARTDED")) for part in partsSet: partId = part.getObjId() if partId not in self.enableCls: # this happends when a classifier give an empty class continue # - accept if we are in accepted and the current is accepted # - discard if we are in the discarted scope and any currentStatus = self.enableCls[partId] decision = suffix == 'output' and currentStatus == ACCEPTED enableOut[partId] = ACCEPTED if decision else DISCARDED # print("%d: %s -> %s" % (partId, currentStatus, decision)) # updating the Averages set outSet.copyItems(partsSet, updateItemCallback=self._updateParticle, itemDataIterator=md.iterRows(mdFn, sortByLabel=md.MDL_ITEM_ID)) self.outputSize = self.outputSize + len(partsSet) self._updateOutputSet('%sAverages' % suffix, outSet, streamMode) cleanPath(mdFn) return enableOut accOut = updateOutputs(self.fnOutMdTmp, 'output') discOut = updateOutputs(self.fnElimMdTmp, 'eliminated') self.createOutputClasses('output', streamMode, accOut) self.createOutputClasses('eliminated', streamMode, discOut)
# ------------- UTILS Fuctions ------------------------------------
[docs] def prepareImages(self): inSet = self.getInput() if isinstance(inSet, SetOfImages): firstRep = inSet.getFirstItem() getImage = lambda item: item.clone() self.classesDict = None else: # SetOfClasses firstRep = inSet.getFirstItem().getFirstItem() getImage = lambda item: item.getRepresentative().clone() self.classesDict = {cls.getObjId(): cls.getSize() for cls in inSet} self.inputImages = self._createSetOfAverages("AUX") self.inputImages.enableAppend() self.inputImages.copyAttributes(firstRep, '_samplingRate') self.inputImages.copyAttributes(inSet, '_streamState') self.streamClosed = self.inputImages.isStreamClosed() for item in inSet: self.inputImages.append(getImage(item)) self.lenPartsSet = len(self.inputImages) self.inputImages.write() self._store(self.inputImages) return self.inputImages
[docs] def rejectByPopulation(self, ids): if self.usePopulation.get() and self.classesDict is not None: sizeDict = {clsId: size for clsId, size in self.classesDict.items() if clsId in ids} meanPop = sum(sizeDict.values())/len(sizeDict) for clsId, size in sizeDict.items(): # minPopulation is normalized to 100% not to 1 decision = int(size*100 > meanPop * self.minPopulation.get()) self.enableCls[clsId] = ACCEPTED if decision else DISCARDED else: self.enableCls = {clsId: ACCEPTED for clsId in ids}
[docs] def createOutputClasses(self, suffix, streamingState, enableDict): if not self.classesDict or not enableDict: # If there are no classes, nothing to do return baseName = '%sClasses.sqlite' % suffix setFile = self._getPath(baseName) if os.path.exists(setFile): outputSet = SetOfClasses2D(filename=setFile) outputSet.loadAllProperties() outputSet.enableAppend() else: outputSet = SetOfClasses2D(filename=setFile) outputSet.setStreamState(streamingState) outputSet.copyInfo(self.getInput()) # if fails, delete decision = ACCEPTED if suffix == 'output' else DISCARDED desiredIds = [ids for ids, enable in enableDict.items() if enable == decision] enableFunc = lambda cls: cls.getObjId() in desiredIds outputSet.appendFromClasses(self.getInput(), enableFunc) outputSet.setStreamState(streamingState) outputName = '%sClasses' % suffix if self.hasAttribute(outputName): outputSet.write() # Write to commit changes outputAttr = getattr(self, outputName) # Copy the properties to the object contained in the protocol outputAttr.copy(outputSet, copyId=False) # Persist changes self._store(outputAttr) else: self._defineOutputs(**{outputName: outputSet}) self._defineSourceRelation(self.inputClasses, outputSet) self._store(outputSet) # Close set databaset to avoid locking it outputSet.close()
[docs] def getInput(self): return self.inputClasses.get()