Source code for xmipp3.protocols.protocol_preprocess_micrographs

# **************************************************************************
# *
# * Authors:     J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
# *              Laura del Cano (ldelcano@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import os
from os.path import basename

from pyworkflow.utils import getExt, replaceExt
from pyworkflow.protocol.constants import STEPS_PARALLEL, LEVEL_ADVANCED
import pyworkflow.protocol.constants as cons
from pyworkflow.protocol.params import (PointerParam, BooleanParam, IntParam,
                                        FloatParam, LabelParam)
from pyworkflow.object import Set

from pwem.protocols import ProtPreprocessMicrographs
from pwem.objects import SetOfMicrographs, Micrograph

OUTPUT_MICROGRAPHS = 'outputMicrographs'


[docs]class XmippProtPreprocessMicrographs(ProtPreprocessMicrographs): """This protocol preprocesses micrographs by performing several operations: cropping borders, take logarithm in order to have a linear relationship, removing bad pixels, invert contrast, downsampling micrograph, denoising, normalize the micrograph, gaussian filter and low or high filter. These steps help improve data quality before particle picking and reconstruction.""" _label = 'preprocess micrographs' _possibleOutputs = {OUTPUT_MICROGRAPHS: SetOfMicrographs} def __init__(self, **args): ProtPreprocessMicrographs.__init__(self, **args) self.stepsExecutionMode = STEPS_PARALLEL #--------------------------- DEFINE params functions ----------------------- def _defineParams(self, form): form.addSection(label='Preprocess') form.addParam('inputMicrographs', PointerParam, pointerClass='SetOfMicrographs', label="Input micrographs", important=True, help='Select the SetOfMicrograph to be preprocessed.') form.addParam('orderComment', LabelParam, label="Operations are performed in the order shown below", important=True) form.addParam('doCrop', BooleanParam, default=False, label='Crop borders?', help='Crop a given amount of pixels from each border.') form.addParam('cropPixels', IntParam, default=10, condition='doCrop', label='Pixels to crop', help='Amount of pixels you want to crop from borders.') form.addParam('doLog', BooleanParam, default=False, label='Take logarithm?', help='Depending on your acquisition system you may need ' 'to take the logarithm of the pixel values in ' 'order to have a linear relationship betweenthe ' 'gray values in the image and those in the volume. ' 'a - b ln(x+c) by default 4.431-0.4018*' 'LN((P1+336.6)) is applied (right one for nikon ' 'coolscan 9000)') line = form.addLine('Log', condition='doLog', help='Parameters in a - b ln(x+c).') line.addParam('logA', FloatParam, default=4.431, label='a') line.addParam('logB', FloatParam, default=0.402, label='b') line.addParam('logC', FloatParam, default=336.6, label='c') form.addParam('doRemoveBadPix', BooleanParam, default=False, label='Remove bad pixels?', help='Values will be thresholded to this multiple of ' 'standard deviations. Typical values are about 5, ' 'i.e., pixel values beyond 5 times the standard ' 'deviation will be substituted by the local median. ' 'Set this option to -1 for not applying it.') form.addParam('mulStddev', IntParam, default=5, condition='doRemoveBadPix', label='Multiple of Stddev', help='Multiple of standard deviation.') form.addParam('doInvert', BooleanParam, default=False, label='Invert contrast?', help='Multiply by -1') form.addParam('doDownsample', BooleanParam, default=False, label='Downsample micrographs?', help='Downsample micrographs by a given factor.') form.addParam('downFactor', FloatParam, default=2., condition='doDownsample', label='Downsampling factor', help='Non-integer downsample factors are possible. ' 'Must be larger than 1.') form.addParam('doDenoise', BooleanParam, default=False, label='Denoising', help="Apply a denoising method") form.addParam('maxIteration', IntParam, default=50, condition='doDenoise', label='Max. number of iterations', help='Max. number of iterations. Higher number = better ' 'output but slower calculation. Must be larger ' 'than 1.') form.addParam('doSmooth', BooleanParam, default=False, label='Gaussian filter', help="Apply a Gaussian filter in real space") form.addParam('sigmaConvolution', FloatParam, default=2, condition="doSmooth", label='Gaussian sigma (px)', help="The larger this value, the more the effect will " "be noticed") form.addParam('doHighPass', BooleanParam, default=False, label='Highpass filter', help="Apply a highpass filter in real space") form.addParam('highCutoff', FloatParam, default=0.002, condition="doHighPass", label='Cutoff frequency', help="In normalized frequencies (<0.5). For example, " "if you want to remove patterns larger than " "500 pixels, use 1/500=0.002") form.addParam('highRaised', FloatParam, default=0.001, condition="doHighPass", expertLevel=LEVEL_ADVANCED, label='Transition bandwidth', help="In normalized frequencies (<0.5). For example, " "if you want to remove patterns larger than " "1000 pixels, use 1/1000=0.001") form.addParam('doLowPass', BooleanParam, default=False, label='Lowpass filter', help="Apply a lowpass filter in real space") form.addParam('lowCutoff', FloatParam, default=0.4, condition="doLowPass", label='Cutoff frequency', help="In normalized frequencies (<0.5). For example, " "if you want to remove the crystalline ice at a frequency of 4A " "and the pixel size is 0.5A, then the cutoff should be 0.5/4=0.125") form.addParam('lowRaised', FloatParam, default=0.001, condition="doLowPass", expertLevel=LEVEL_ADVANCED, label='Transition bandwidth', help="In normalized frequencies (<0.5). The number of pixels in Fourier " "will be approximately lowRaised*Xdim") form.addParam('doNormalize', BooleanParam, default=False, label='Normalize micrograph?', help='Normalize micrographs to be zero mean and ' 'standard deviation one') form.addParallelSection(threads=2, mpi=1) def _defineInputs(self): """ Store some of the input parameter in a dictionary for an easy replacement in the programs command line. """ # Get pointer to input micrographs self.inputMics = self.inputMicrographs.get() # Parameters needed to preprocess the micrographs self.params = {'downFactor': self.downFactor.get(), 'cropPixels': 2 * self.cropPixels.get(), 'logA': self.logA.get(), 'logB': self.logB.get(), 'logC': self.logC.get(), 'stddev': self.mulStddev.get(), 'sigmaConvolution': self.sigmaConvolution.get(), 'highCutoff': self.highCutoff.get(), 'highRaised': self.highRaised.get(), 'lowCutoff': self.lowCutoff.get(), 'lowRaised': self.lowRaised.get(), 'maxIterTV': self.maxIteration.get()} #--------------------------- INSERT steps functions ------------------------ def _insertAllSteps(self): self._defineInputs() self.insertedDict = {} preprocessSteps = self._insertNewMicsSteps(self.insertedDict, self.inputMicrographs.get()) self._insertFunctionStep('createOutputStep', prerequisites=preprocessSteps, wait=True)
[docs] def createOutputStep(self): pass
def _getFirstJoinStepName(self): # This function will be used for streaming, to check which is # the first function that need to wait for all micrographs # to have completed, this can be overriden in subclasses # (e.g., in Xmipp 'sortPSDStep') return 'createOutputStep' def _getFirstJoinStep(self): for s in self._steps: if s.funcName == self._getFirstJoinStepName(): return s return None def _insertNewMicsSteps(self, insertedDict, inputMics): deps = [] for mic in inputMics: if mic.getObjId() not in insertedDict: fnOut = self._getOutputMicrograph(mic) stepId = self._insertStepsForMicrograph(mic.getFileName(), fnOut) deps.append(stepId) insertedDict[mic.getObjId()] = stepId return deps def _stepsCheck(self): # Input micrograph set can be loaded or None when checked for new inputs # If None, we load it self._checkNewInput() self._checkNewOutput() def _checkNewInput(self): # Check if there are new micrographs to process from the input set micsFile = self.inputMicrographs.get().getFileName() micsSet = SetOfMicrographs(filename=micsFile) micsSet.loadAllProperties() self.SetOfMicrographs = [m.clone() for m in micsSet] self.streamClosed = micsSet.isStreamClosed() micsSet.close() newMics = any(m.getObjId() not in self.insertedDict for m in self.inputMics) outputStep = self._getFirstJoinStep() if newMics: fDeps = self._insertNewMicsSteps(self.insertedDict, self.inputMics) if outputStep is not None: outputStep.addPrerequisites(*fDeps) self.updateSteps() def _checkNewOutput(self): if getattr(self, 'finished', False): return # Load previously done items (from text file) doneList = self._readDoneList() # Check for newly done items newDone = [m.clone() for m in self.SetOfMicrographs if int(m.getObjId()) not in doneList and self._isMicDone(m)] # We have finished when there is not more input micrographs (stream closed) # and the number of processed micrographs is equal to the number of inputs self.finished = self.streamClosed and (len(doneList) + len(newDone)) == len(self.SetOfMicrographs) streamMode = Set.STREAM_CLOSED if self.finished else Set.STREAM_OPEN if newDone: self._writeDoneList(newDone) elif not self.finished: # If we are not finished and no new output have been produced # it does not make sense to proceed and updated the outputs # so we exit from the function here return outSet = self.getOutputMics() def tryToAppend(outSet, micOut, tries=1): """ When micrograph is very big, sometimes it's not ready to be read Then we will wait for it up to a minute in 6 time-growing tries. """ try: if outSet.isEmpty(): outSet.setDim(micOut.getDim()) outSet.append(micOut) except Exception as ex: micFn = micOut.getFileName() # Runs/..../extra/filename.mrc errorStr = ('Image Extension: File %s has wrong size.' % micFn) print("Output micrographs not ready, yet. Try: %d/6 (next in %fs)" % (tries, tries*3)) if errorStr in str(ex) and tries < 7: from time import sleep sleep(tries*3) tryToAppend(outSet, micOut, tries+1) else: raise ex for mic in newDone: micOut = Micrograph() if self.doDownsample: micOut.setSamplingRate(self.inputMicrographs.get().getSamplingRate() * self.downFactor.get()) micOut.setObjId(mic.getObjId()) micOut.setFileName(self._getOutputMicrograph(mic)) micOut.setMicName(mic.getMicName()) tryToAppend(outSet, micOut) self._updateOutputSet(OUTPUT_MICROGRAPHS, outSet, streamMode) if len(doneList)==0: #firstTime self._defineTransformRelation(self.inputMicrographs, outSet) if self.finished: # Unlock createOutputStep if finished all jobs outputStep = self._getFirstJoinStep() if outputStep and outputStep.isWaiting(): outputStep.setStatus(cons.STATUS_NEW)
[docs] def getOutputMics(self): if not hasattr(self, OUTPUT_MICROGRAPHS): outputSet = SetOfMicrographs(filename=self.getPath('micrographs.sqlite')) outputSet.setStreamState(outputSet.STREAM_OPEN) inputs = self.inputMicrographs.get() outputSet.copyInfo(inputs) if self.doDownsample: outputSet.setSamplingRate(self.inputMicrographs.get().getSamplingRate() * self.downFactor.get()) self._defineOutputs(**{OUTPUT_MICROGRAPHS: outputSet}) self._defineTransformRelation(inputs, outputSet) self.info("Storing set 1rst time: %s" % outputSet) # self._store(outputSet) else: outputSet = getattr(self, OUTPUT_MICROGRAPHS) return outputSet
def _updateOutputSet(self, outputName, outputSet, state=Set.STREAM_OPEN): outputSet.setStreamState(state) outputSet.write() # Write to commit changes self._store(outputSet) # Close set dataset to avoid locking it # outputSet.close() def _insertStepsForMicrograph(self, inputMic, outputMic): self.params['inputMic'] = inputMic self.params['outputMic'] = outputMic self.lastStepId = None self.prerequisites = [] # First operation should not depend on nothing before # Crop self.__insertOneStep(self.doCrop, "xmipp_transform_window", " -i %(inputMic)s --crop %(cropPixels)d -v 0") # Take logarithm self.__insertOneStep(self.doLog, "xmipp_transform_filter", " -i %(inputMic)s --log --fa %(logA)f --fb %(logB)f --fc %(logC)f") # Remove bad pixels self.__insertOneStep(self.doRemoveBadPix, "xmipp_transform_filter", " -i %(inputMic)s --bad_pixels outliers %(stddev)f -v 0") # Invert self.__insertOneStep(self.doInvert, "xmipp_image_operate", "-i %(inputMic)s --mult -1") # Downsample self.__insertOneStep(self.doDownsample, "xmipp_transform_downsample", "-i %(inputMic)s --step %(downFactor)f --method fourier") # Denoise self.__insertOneStep(self.doDenoise, "xmipp_transform_filter", "-i %(inputMic)s --denoiseTV --maxIterTV %(maxIterTV)d") # Smooth self.__insertOneStep(self.doSmooth, "xmipp_transform_filter", "-i %(inputMic)s --fourier real_gaussian %(sigmaConvolution)f") # Highpass self.__insertOneStep(self.doHighPass, "xmipp_transform_filter", "-i %(inputMic)s --fourier high_pass %(highCutoff)f %(highRaised)f") # Lowpass self.__insertOneStep(self.doLowPass, "xmipp_transform_filter", "-i %(inputMic)s --fourier low_pass %(lowCutoff)f %(lowRaised)f") # Normalize self.__insertOneStep(self.doNormalize, "xmipp_transform_normalize", "-i %(inputMic)s --method OldXmipp") return self.lastStepId def __insertOneStep(self, condition, program, arguments): """Insert operation if the condition is met. Possible conditions are: doDownsample, doCrop...etc""" if condition.get(): # If the input micrograph and output micrograph differss, # add the -o option if self.params['inputMic'] != self.params['outputMic']: arguments += " -o %(outputMic)s" # Insert the command with the formatted parameters self.lastStepId = self._insertRunJobStep(program, arguments % self.params, prerequisites=self.prerequisites) self.prerequisites = [self.lastStepId] # next should depend on this step # Update inputMic for next step as outputMic self.params['inputMic'] = self.params['outputMic'] #--------------------------- INFO functions -------------------------------- def _validate(self): validateMsgs = [] # Some prepocessing option need to be marked if not(self.doCrop or self.doDownsample or self.doLog or self.doRemoveBadPix or self.doInvert or self.doNormalize or self.doDenoise or self.doSmooth or self.doHighPass or self.doLowPass): validateMsgs.append('Some preprocessing option need to be selected.') return validateMsgs def _citations(self): return ["Sorzano2009d"] def _hasOutput(self): return (getattr(self, 'outputMicrographs', False) and self.outputMicrographs.hasValue()) def _summary(self): if not self._hasOutput(): return ["*Output Micrographs* not ready yet."] summary = [] summary.append("Micrographs preprocessed: *%d*" % self.inputMicrographs.get().getSize()) if self.doCrop: summary.append("Cropped *%d* pixels per each border." % self.cropPixels) if self.doLog: summary.append("Formula applied: %f - %f ln(x + %f)" % (self.logA, self.logB, self.logC,)) if self.doRemoveBadPix: summary.append("Multiple of standard deviation to remove pixels: %d" % self.mulStddev) if self.doInvert: summary.append("Contrast inverted") if self.doDownsample: summary.append("Downsampling factor: %0.2f" % self.downFactor) if self.doDenoise: summary.append("Denoising applied with %d iterations" % self.maxIteration.get()) if self.doSmooth: summary.append("Gaussian filtered with sigma=%f (px)"%self.sigmaConvolution.get()) if self.doHighPass: summary.append("Highpass filtered with cutoff=%f (1/px)"%self.highCutoff.get()) if self.doLowPass: summary.append("Lowpass filtered with cutoff=%f (1/px)"%self.lowCutoff.get()) if self.doNormalize: summary.append("Normalized to mean 0 and variance 1") return summary def _methods(self): if not self._hasOutput(): return ['*Output micrographs* not ready yet.'] txt = "The micrographs in set %s have " % self.getObjectTag('inputMicrographs') if self.doCrop: txt += "been cropped by %d pixels " % self.cropPixels if self.doLog: txt += ("changed from transmisivity to density with the formula: " "%f - %f * ln(x + %f) " % (self.logA, self.logB, self.logC)) if self.doRemoveBadPix: txt += "had pixels removed, the ones with standard deviation beyond %d " % self.mulStddev.get() if self.doRemoveBadPix: txt += "contrast inverted " if self.doDownsample: txt += "been downsampled with a factor of %0.2f " % self.downFactor.get() if self.doDenoise: txt += "been Denoised with %d iterations " % self.maxIteration.get() if self.doSmooth: txt += "been Gaussian filtered with a sigma of %0.2f pixels "%self.sigmaConvolution.get() if self.doHighPass: txt += "been highpass filtered with a cutoff of %f (1/px) "%self.highCutoff.get() if self.doLowPass: txt += "been lowpass filtered with a cutoff of %f (1/px) "%self.lowCutoff.get() if self.doNormalize: txt += "been normalized to mean 0 and variance 1" return [txt, "The resulting set of micrographs is %s" % self.getObjectTag('outputMicrographs')] #--------------------------- UTILS functions ------------------------------- def _getOutputMicrograph(self, mic): """ Return the name of the output micrograph, given the input Micrograph object. """ fn = mic.getFileName() extFn = getExt(fn) if extFn != ".mrc": fn = replaceExt(fn, "mrc") fnOut = self._getExtraPath(basename(fn)) return fnOut def _readDoneList(self): """ Read from a text file the id's of the items that have been done. """ doneFile = self._getAllDone() doneList = [] # Check what items have been previously done if os.path.exists(doneFile): with open(doneFile) as f: doneList += [int(line.strip()) for line in f] return doneList def _getAllDone(self): return self._getExtraPath('DONE_all.TXT') def _writeDoneList(self, micList): """ Write to a text file the items that have been done. """ with open(self._getAllDone(), 'a') as f: for mic in micList: f.write('%d\n' % mic.getObjId()) def _isMicDone(self, mic): """ A movie is done if the marker file exists. """ return os.path.exists(self._getMicDone(mic)) def _getMicDone(self, mic): fn = mic.getFileName() extFn = getExt(fn) if extFn != ".mrc": fn = replaceExt(fn, "mrc") return self._getExtraPath('%s' % basename(fn))