Source code for xmipp3.protocols.protocol_deep_micrograph_screen

# **************************************************************************
# *
# * Authors:     Ruben Sanchez Garcia (
# *              David Maluenda (
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address ''
# *
# **************************************************************************

import os

import pyworkflow.utils as pwutils
from pyworkflow.protocol.constants import (STEPS_PARALLEL, STATUS_NEW)
import pyworkflow.protocol.params as params
from pwem.protocols import ProtExtractParticles
from pyworkflow.object import Set, Pointer

from xmipp3.base import XmippProtocol
from xmipp_base import createMetaDataFromPattern
from xmipp3.convert import (writeMicCoordinates, readSetOfCoordinates)
from xmipp3.constants import SAME_AS_PICKING, OTHER

[docs]class XmippProtDeepMicrographScreen(ProtExtractParticles, XmippProtocol): """Protocol to remove coordinates in carbon zones or large impurities""" _label = 'deep micrograph cleaner' _conda_env= "xmipp_MicCleaner" def __init__(self, **kwargs): ProtExtractParticles.__init__(self, **kwargs) self.stepsExecutionMode = STEPS_PARALLEL #--------------------------- DEFINE param functions ------------------------ def _defineParams(self, form): form.addSection(label='Input') form.addParam('inputCoordinates', params.PointerParam, pointerClass='SetOfCoordinates', important=True, label="Input coordinates", help='Select the SetOfCoordinates ') form.addParam('micsSource', params.EnumParam, choices=['same as coordinates', 'other'], default=0, important=True, display=params.EnumParam.DISPLAY_HLIST, label='Micrographs source', help='By default, the micrographs from which the computation ' 'will be performed will be the ones used in the picking ' 'step ( _same as coordinates_ option ). \n' 'If you select other option, you must provide ' 'a different set of micrographs to evaluate its regions. \n' '*Note*: In the _other_ case, ensure that provided ' 'micrographs and coordinates are related ' 'by micName or by micId. Difference in pixel size ' 'will be handled automatically.\n' '*Note2*: *Particles must be dark* over a bright ' 'background. If not, use the _other_ option to provide ' 'an inverted setOfMicrograph.') form.addParam('inputMicrographs', params.PointerParam, pointerClass='SetOfMicrographs', condition='micsSource != %s' % SAME_AS_PICKING, important=True, label='Input micrographs', help='Select the SetOfMicrographs from which to extract.') form.addParam('useOtherScale',params.EnumParam, choices=['same as coordinates', 'scale to micrographs'], default=0, condition='micsSource != %s' % SAME_AS_PICKING, display=params.EnumParam.DISPLAY_HLIST, label='Coordinates scale', help='If you select _same as coordinates_ option output coordinates ' 'will be mapped to the original micrographs and thus, they will preserve ' 'the scale.\nIf you select _scale to micrographs_ option, output coordinates ' 'will be mapped to the new micrographs and rescaled accordingly.') form.addParam("threshold", params.FloatParam, default=-1, label="Threshold", help="Deep learning goodness score to select/discard coordinates. The bigger the threshold "+ "the more coordiantes will be ruled out. Ranges from 0 to 1. Use -1 to skip thresholding. "+ "Manual thresholding can be performed after execution through analyze results button. "+ "\n0.75 <= Recommended threshold <= 0.9") form.addParam("streamingBatchSize", params.IntParam, default=-1, label="Batch size", expertLevel=params.LEVEL_ADVANCED, help="This value allows to group several items to be " "processed inside the same protocol step. You can " "use the following values: \n" "*0* Put in the same step all the items available.\n " "*>1* The number of items that will be grouped into " "a step. -1, automatic decission") form.addParam("saveMasks", params.BooleanParam, default=False,expertLevel=params.LEVEL_ADVANCED, label="saveMasks", help="Save predicted masks?") form.addHidden(params.USE_GPU, params.BooleanParam, default=True, label="Use GPU for execution", help="This protocol has both CPU and GPU implementation. " "Select the one you want to use. CPU may become " "quite slow.") form.addHidden(params.GPU_LIST, params.StringParam, default='0', label="Choose GPU IDs", help="Add a list of GPU devices that can be used.") # form.addParallelSection(threads=4, mpi=1) #--------------------------- INSERT steps functions ------------------------ def _insertInitialSteps(self): # Just overwrite this function to load some info # before the actual processing pwutils.makePath(self._getExtraPath('inputCoords')) pwutils.makePath(self._getExtraPath('outputCoords')) if self.saveMasks.get(): pwutils.makePath(self._getExtraPath("predictedMasks")) self._setupBasicProperties() return [] def _insertNewMicsSteps(self, inputMics): """ Insert steps to process new mics (from streaming) Params: inputMics: input mics set to be check """ return self._insertNewMics(inputMics, lambda mic: mic.getMicName(), self._insertExtractMicrographStepOwn, self._insertExtractMicrographListStepOwn, *self._getExtractArgs()) def _insertExtractMicrographStepOwn(self, mic, prerequisites, *args): raise ValueError("Batch size must be >1") def _insertExtractMicrographListStepOwn(self, micList, prerequisites, *args): """ Basic method to insert a picking step for a given micrograph. """ return self._insertFunctionStep('extractMicrographListStepOwn', [mic.getMicName() for mic in micList], *args, prerequisites=prerequisites)
[docs] def extractMicrographListStepOwn(self, micKeyList, *args): micList = [] for micName in micKeyList: mic = self.micDict[micName] micDoneFn = self._getMicDone(mic) micFn = mic.getFileName() coordList = self.coordDict[mic.getObjId()] self._convertCoordinates(mic, coordList) if self.isContinued() and os.path.exists(micDoneFn):"Skipping micrograph: %s, seems to be done" % micFn) else: # Clean old finished files pwutils.cleanPath(micDoneFn)"Extracting micrograph: %s " % micFn) micList.append(mic) self._computeMaskForMicrographList(micList, *args) for mic in micList: # Mark this mic as finished open(self._getMicDone(mic), 'w').close()
def _computeMaskForMicrographList(self, micList): """ Functional Step. Overrided in general protExtracParticle """ micsFnDone = self.getDoneMics() micLisfFn = [mic.getFileName() for mic in micList if not pwutils.removeBaseExt(mic.getFileName()) in micsFnDone] if len(micLisfFn)>0: inputMicsPathMetadataFname= self._getTmpPath("inputMics"+str(hash(micLisfFn[0]))+".xmd") mics_md= createMetaDataFromPattern( micLisfFn ) mics_md.write(inputMicsPathMetadataFname) args = '-i %s' % inputMicsPathMetadataFname args += ' -c %s' % self._getExtraPath('inputCoords') args += ' -o %s' % self._getExtraPath('outputCoords') args += ' -b %d' % self.getBoxSize() args += ' -s 1' #Downsampling is automatically managed by scipion args += ' -d %s' % self.getModel('deepMicrographCleaner', 'defaultModel.keras') if self.threshold.get() > 0: args += ' --deepThr %f ' % (1-self.threshold.get()) if self.saveMasks.get(): args += ' --predictedMaskDir %s ' % (self._getExtraPath("predictedMasks")) if self.useGpu.get(): if self.useQueueForSteps() or self.useQueue(): args += ' -g all ' else: args += ' -g %s'%(",".join([str(elem) for elem in self.getGpuList()])) else: args += ' -g -1' self.runJob('xmipp_deep_micrograph_cleaner', args) def _checkNewOutput(self): if getattr(self, 'finished', False): return # Load previously done items (from text file) doneList = self._readDoneList() # Check for newly done items newDone = [m for m in self.micDict.values() if m.getObjId() not in doneList and self._isMicDone(m)] # Update the file with the newly done mics # or exit from the function if no new done mics inputLen = len(self.micDict) self.debug('_checkNewOutput: ') self.debug(' input: %s, doneList: %s, newDone: %s' % (inputLen, len(doneList), len(newDone))) firstTime = len(doneList) == 0 allDone = len(doneList) + len(newDone) # We have finished when there is not more input mics (stream closed) # and the number of processed mics is equal to the number of inputs streamClosed = self._isStreamClosed() self.finished = streamClosed and allDone == inputLen self.debug(' is finished? %s ' % self.finished) self.debug(' is stream closed? %s ' % streamClosed) streamMode = Set.STREAM_CLOSED if self.finished else Set.STREAM_OPEN if newDone: self._updateOutputCoordSet(newDone, streamMode) self._writeDoneList(newDone) elif not self.finished: # If we are not finished and no new output have been produced # it does not make sense to proceed and updated the outputs # so we exit from the function here # Maybe it would be good idea to take a snap to avoid # so much IO if this protocol does not have much to do now if allDone == len(self.micDict): self._streamingSleepOnWait() return self.debug(' finished: %s ' % self.finished) self.debug(' self.streamClosed (%s) AND' % streamClosed) self.debug(' allDone (%s) == len(self.listOfMics (%s)' % (allDone, inputLen)) self.debug(' streamMode: %s' % streamMode) if self.finished: # Unlock createOutputStep if finished all jobs # Close the output set self._updateOutputCoordSet([], Set.STREAM_CLOSED) outputStep = self._getFirstJoinStep() if outputStep and outputStep.isWaiting(): outputStep.setStatus(STATUS_NEW) def _getScale(self): if self.micsSource==SAME_AS_PICKING or self.useOtherScale.get()==1: scale= 1 else: scale=(1./self.getBoxScale()) return scale def _updateOutputCoordSet(self, micList, streamMode): # Do no proceed if there is not micrograph ready if not micList: return [] outputDir = self._getExtraPath('outputCoords') outputCoords = self.getOutput() # If there are not outputCoordinates yet, it means that is the first # time we are updating output coordinates, so we need to first create # the output set firstTime = outputCoords is None if firstTime: if self.micsSource==SAME_AS_PICKING or self.useOtherScale.get()==1: boxSize = self.getBoxSize() micSetPtr = self.getInputMicrographs() else: boxSize = self.inputCoordinates.get().getBoxSize() micSetPtr = self.inputCoordinates.get().getMicrographs() outputCoords = self._createSetOfCoordinates(micSetPtr, suffix=self.getAutoSuffix()) outputCoords.copyInfo(self.inputCoordinates.get()) outputCoords.setBoxSize(boxSize) else: outputCoords.enableAppend()"Reading coordinates from mics: %s" % ','.join([mic.strId() for mic in micList])) readSetOfCoordinates(outputDir, micList, outputCoords, scale= self._getScale()) self.debug(" _updateOutputCoordSet Stream Mode: %s " % streamMode) self._updateOutputSet(self.getOutputName(), outputCoords, streamMode) if firstTime: self._defineSourceRelation(micSetPtr, outputCoords) return micList #--------------------------- INFO functions -------------------------------- def _getStreamingBatchSize(self): self.firstBatch=True if self.streamingBatchSize.get()==-1: if not hasattr(self, "actualBatchSize"): if self.isInStreaming(): self.actualBatchSize= 16 else: if self.firstBatch: self.firstBatch=False batchSize= 4 else: nPickMics = self._getNumPickedMics() self.actualBatchSize= min(50, nPickMics) batchSize= self.actualBatchSize else: batchSize= self.streamingBatchSize.get() return batchSize def _getNumPickedMics(self): nPickMics = 0 lastId=None for coord in self.inputCoordinates.get(): curId=coord.getMicId() if lastId!=curId: lastId=curId nPickMics+=1 return nPickMics def _validate(self): errors = self.validateDLtoolkit(assertModel=True, model=('deepMicrographCleaner', 'defaultModel.keras')) batchSize = self.streamingBatchSize.get() if batchSize == 1: errors.append('Batch size must be 0 (all at once) or larger than 1.') elif not self.isInStreaming() and batchSize > len(self.inputCoordinates.get().getMicrographs()): errors.append('Batch size (%d) must be <= that the number of micrographs ' '(%d) in static mode. Set it to 0 to use only one batch' %(batchSize, self._getNumPickedMics())) return errors def _citations(self): return ['***'] def _summary(self): summary = [] summary.append("Micrographs source: %s" % self.getEnumText("micsSource")) summary.append("Coordinates scale: %d" % (1./self.getBoxScale()) ) return summary def _methods(self): methodsMsgs = [] return methodsMsgs # --------------------------- UTILS functions ------------------------------ def _convertCoordinates(self, mic, coordList): writeMicCoordinates(mic, coordList, self._getMicPos(mic), getPosFunc=self._getPos) def _micsOther(self): """ Return True if other micrographs are used for extract. """ return self.micsSource == OTHER
[docs] def notOne(self, value): return abs(value - 1) > 0.0001
def _setupBasicProperties(self): # Set sampling rate (before and after doDownsample) and inputMics # according to micsSource type inputCoords = self.getCoords() mics = inputCoords.getMicrographs() self.samplingInput = inputCoords.getMicrographs().getSamplingRate() self.samplingMics = self.getInputMicrographs().getSamplingRate() self.samplingFactor = float(self.samplingMics / float(self.samplingInput)) scale = self.getBoxScale() self.debug("Scale: %f" % scale) if self.notOne(scale): # If we need to scale the box, then we need to scale the coordinates getPos = lambda coord: (int(coord.getX() * scale), int(coord.getY() * scale)) else: getPos = lambda coord: coord.getPosition() # Store the function to be used for scaling coordinates self._getPos = getPos
[docs] def getInputMicrographs(self): """ Return the micrographs associated to the SetOfCoordinates or Other micrographs. """ if not self._micsOther(): return self.inputCoordinates.get().getMicrographs() else: return self.inputMicrographs.get()
[docs] def getCoords(self): return self.inputCoordinates.get()
[docs] def getAutoSuffix(self): return '_Full' if self.threshold.get() < 0 else '_Auto_%03d'%int(self.threshold.get()*100)
[docs] def getOutputName(self): return 'outputCoordinates' + self.getAutoSuffix()
[docs] def getOutput(self): if (self.hasAttribute(self.getOutputName()) and getattr(self, self.getOutputName()).hasValue()): return getattr(self, self.getOutputName()) else: return None
[docs] def getCoordSampling(self): return self.getCoords().getMicrographs().getSamplingRate()
[docs] def getMicSampling(self): return self.getInputMicrographs().getSamplingRate()
[docs] def getBoxScale(self): """ Computing the sampling factor between input and output. We should take into account the differences in sampling rate between micrographs used for picking and the ones used for extraction. The downsampling factor could also affect the resulting scale. """ samplingPicking = self.getCoordSampling() samplingExtract = self.getMicSampling() f = float(samplingPicking) / samplingExtract return f
[docs] def getBoxSize(self): # This function is needed by the wizard return int(self.getCoords().getBoxSize() * self.getBoxScale())
def _getOutputImgMd(self): return self._getPath('images.xmd') def _getMicPos(self, mic): """ Return the corresponding .pos file for a given micrograph. """ micBase = pwutils.removeBaseExt(mic.getFileName()) return self._getExtraPath('inputCoords', micBase + ".pos") def _getMicXmd(self, mic): """ Return the corresponding .xmd with extracted particles for this micrograph. """ micBase = pwutils.removeBaseExt(mic.getFileName()) return self._getExtraPath(micBase + ".xmd")
[docs] def getDoneMics(self): out = set([]) for fName in os.listdir(self._getExtraPath('outputCoords')): out.add(pwutils.removeBaseExt(fName)) return out
[docs] def registerCoords(self, coordsDir): """ This method is usually inherited by all Pickers and it is used from the Java picking GUI to register a new SetOfCoordinates when the user click on +Particles button. """ inputset = self.getInputMicrographs() mySuffix = '_Manual_%s' % coordsDir.split('manualThresholding_')[1] outputName = 'outputCoordinates' + mySuffix outputset = self._createSetOfCoordinates(inputset, suffix=mySuffix) readSetOfCoordinates(coordsDir, outputset.getMicrographs(), outputset) # summary = self.getSummary(outputset) # outputset.setObjComment(summary) outputs = {outputName: outputset} self._defineOutputs(**outputs) # Using a pointer to define the relations is more robust to scheduling # and id changes between the protocol run.db and the main project # database. The pointer defined below points to the outputset object self._defineSourceRelation(self.getInputMicrographs(), Pointer(value=self, extended=outputName)) self._store()