# **************************************************************************
# *
# * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
# * Laura del Cano (ldelcano@cnb.csic.es)
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
import os
from os.path import basename
from pyworkflow.utils import getExt, replaceExt
from pyworkflow.protocol.constants import STEPS_PARALLEL, LEVEL_ADVANCED
import pyworkflow.protocol.constants as cons
from pyworkflow.protocol.params import (PointerParam, BooleanParam, IntParam,
FloatParam, LabelParam)
from pyworkflow.object import Set
from pwem.protocols import ProtPreprocessMicrographs
from pwem.objects import SetOfMicrographs, Micrograph
OUTPUT_MICROGRAPHS = 'outputMicrographs'
[docs]class XmippProtPreprocessMicrographs(ProtPreprocessMicrographs):
"""Protocol to preprocess a set of micrographs in the project.
You can crop borders, remove bad pixels, etc. """
_label = 'preprocess micrographs'
def __init__(self, **args):
ProtPreprocessMicrographs.__init__(self, **args)
self.stepsExecutionMode = STEPS_PARALLEL
#--------------------------- DEFINE params functions -----------------------
def _defineParams(self, form):
form.addSection(label='Preprocess')
form.addParam('inputMicrographs', PointerParam,
pointerClass='SetOfMicrographs',
label="Input micrographs", important=True,
help='Select the SetOfMicrograph to be preprocessed.')
form.addParam('orderComment', LabelParam,
label="Operations are performed in the order shown below",
important=True)
form.addParam('doCrop', BooleanParam, default=False,
label='Crop borders?',
help='Crop a given amount of pixels from each border.')
form.addParam('cropPixels', IntParam, default=10, condition='doCrop',
label='Pixels to crop',
help='Amount of pixels you want to crop from borders.')
form.addParam('doLog', BooleanParam, default=False,
label='Take logarithm?',
help='Depending on your acquisition system you may need '
'to take the logarithm of the pixel values in '
'order to have a linear relationship betweenthe '
'gray values in the image and those in the volume. '
'a - b ln(x+c) by default 4.431-0.4018*'
'LN((P1+336.6)) is applied (right one for nikon '
'coolscan 9000)')
line = form.addLine('Log', condition='doLog',
help='Parameters in a - b ln(x+c).')
line.addParam('logA', FloatParam, default=4.431, label='a')
line.addParam('logB', FloatParam, default=0.402, label='b')
line.addParam('logC', FloatParam, default=336.6, label='c')
form.addParam('doRemoveBadPix', BooleanParam, default=False,
label='Remove bad pixels?',
help='Values will be thresholded to this multiple of '
'standard deviations. Typical values are about 5, '
'i.e., pixel values beyond 5 times the standard '
'deviation will be substituted by the local median. '
'Set this option to -1 for not applying it.')
form.addParam('mulStddev', IntParam, default=5,
condition='doRemoveBadPix',
label='Multiple of Stddev',
help='Multiple of standard deviation.')
form.addParam('doInvert', BooleanParam, default=False,
label='Invert contrast?',
help='Multiply by -1')
form.addParam('doDownsample', BooleanParam, default=False,
label='Downsample micrographs?',
help='Downsample micrographs by a given factor.')
form.addParam('downFactor', FloatParam, default=2.,
condition='doDownsample',
label='Downsampling factor',
help='Non-integer downsample factors are possible. '
'Must be larger than 1.')
form.addParam('doDenoise', BooleanParam, default=False,
label='Denoising',
help="Apply a denoising method")
form.addParam('maxIteration', IntParam, default=50, condition='doDenoise',
label='Max. number of iterations',
help='Max. number of iterations. Higher number = better '
'output but slower calculation. Must be larger '
'than 1.')
form.addParam('doSmooth', BooleanParam, default=False,
label='Gaussian filter',
help="Apply a Gaussian filter in real space")
form.addParam('sigmaConvolution', FloatParam, default=2,
condition="doSmooth",
label='Gaussian sigma (px)',
help="The larger this value, the more the effect will "
"be noticed")
form.addParam('doHighPass', BooleanParam, default=False,
label='Highpass filter',
help="Apply a highpass filter in real space")
form.addParam('highCutoff', FloatParam, default=0.002,
condition="doHighPass",
label='Cutoff frequency',
help="In normalized frequencies (<0.5). For example, "
"if you want to remove patterns larger than "
"500 pixels, use 1/500=0.002")
form.addParam('highRaised', FloatParam, default=0.001,
condition="doHighPass", expertLevel=LEVEL_ADVANCED,
label='Transition bandwidth',
help="In normalized frequencies (<0.5). For example, "
"if you want to remove patterns larger than "
"1000 pixels, use 1/1000=0.001")
form.addParam('doLowPass', BooleanParam, default=False,
label='Lowpass filter',
help="Apply a lowpass filter in real space")
form.addParam('lowCutoff', FloatParam, default=0.4,
condition="doLowPass",
label='Cutoff frequency',
help="In normalized frequencies (<0.5). For example, "
"if you want to remove the crystalline ice at a frequency of 4A "
"and the pixel size is 0.5A, then the cutoff should be 0.5/4=0.125")
form.addParam('lowRaised', FloatParam, default=0.001,
condition="doLowPass", expertLevel=LEVEL_ADVANCED,
label='Transition bandwidth',
help="In normalized frequencies (<0.5). The number of pixels in Fourier "
"will be approximately lowRaised*Xdim")
form.addParam('doNormalize', BooleanParam, default=False,
label='Normalize micrograph?',
help='Normalize micrographs to be zero mean and '
'standard deviation one')
form.addParallelSection(threads=2, mpi=1)
def _defineInputs(self):
""" Store some of the input parameter in a dictionary for
an easy replacement in the programs command line.
"""
# Get pointer to input micrographs
self.inputMics = self.inputMicrographs.get()
# Parameters needed to preprocess the micrographs
self.params = {'downFactor': self.downFactor.get(),
'cropPixels': 2 * self.cropPixels.get(),
'logA': self.logA.get(),
'logB': self.logB.get(),
'logC': self.logC.get(),
'stddev': self.mulStddev.get(),
'sigmaConvolution': self.sigmaConvolution.get(),
'highCutoff': self.highCutoff.get(),
'highRaised': self.highRaised.get(),
'lowCutoff': self.lowCutoff.get(),
'lowRaised': self.lowRaised.get(),
'maxIterTV': self.maxIteration.get()}
#--------------------------- INSERT steps functions ------------------------
def _insertAllSteps(self):
self._defineInputs()
self.insertedDict = {}
preprocessSteps = self._insertNewMicsSteps(self.insertedDict,
self.inputMicrographs.get())
self._insertFunctionStep('createOutputStep',
prerequisites=preprocessSteps, wait=True)
[docs] def createOutputStep(self):
pass
def _getFirstJoinStepName(self):
# This function will be used for streaming, to check which is
# the first function that need to wait for all micrographs
# to have completed, this can be overriden in subclasses
# (e.g., in Xmipp 'sortPSDStep')
return 'createOutputStep'
def _getFirstJoinStep(self):
for s in self._steps:
if s.funcName == self._getFirstJoinStepName():
return s
return None
def _insertNewMicsSteps(self, insertedDict, inputMics):
deps = []
for mic in inputMics:
if mic.getObjId() not in insertedDict:
fnOut = self._getOutputMicrograph(mic)
stepId = self._insertStepsForMicrograph(mic.getFileName(), fnOut)
deps.append(stepId)
insertedDict[mic.getObjId()] = stepId
return deps
def _stepsCheck(self):
# Input micrograph set can be loaded or None when checked for new inputs
# If None, we load it
self._checkNewInput()
self._checkNewOutput()
def _checkNewInput(self):
# Check if there are new micrographs to process from the input set
micsFile = self.inputMicrographs.get().getFileName()
micsSet = SetOfMicrographs(filename=micsFile)
micsSet.loadAllProperties()
self.SetOfMicrographs = [m.clone() for m in micsSet]
self.streamClosed = micsSet.isStreamClosed()
micsSet.close()
newMics = any(m.getObjId() not in self.insertedDict
for m in self.inputMics)
outputStep = self._getFirstJoinStep()
if newMics:
fDeps = self._insertNewMicsSteps(self.insertedDict,
self.inputMics)
if outputStep is not None:
outputStep.addPrerequisites(*fDeps)
self.updateSteps()
def _checkNewOutput(self):
if getattr(self, 'finished', False):
return
# Load previously done items (from text file)
doneList = self._readDoneList()
# Check for newly done items
newDone = [m.clone() for m in self.SetOfMicrographs
if int(m.getObjId()) not in doneList and self._isMicDone(m)]
# We have finished when there is not more input micrographs (stream closed)
# and the number of processed micrographs is equal to the number of inputs
self.finished = self.streamClosed and (len(doneList) + len(newDone)) == len(self.SetOfMicrographs)
streamMode = Set.STREAM_CLOSED if self.finished else Set.STREAM_OPEN
if newDone:
self._writeDoneList(newDone)
elif not self.finished:
# If we are not finished and no new output have been produced
# it does not make sense to proceed and updated the outputs
# so we exit from the function here
return
outSet = self.getOutputMics()
def tryToAppend(outSet, micOut, tries=1):
""" When micrograph is very big, sometimes it's not ready to be read
Then we will wait for it up to a minute in 6 time-growing tries. """
try:
if outSet.isEmpty():
outSet.setDim(micOut.getDim())
outSet.append(micOut)
except Exception as ex:
micFn = micOut.getFileName() # Runs/..../extra/filename.mrc
errorStr = ('Image Extension: File %s has wrong size.' % micFn)
print("Output micrographs not ready, yet. Try: %d/6 (next in %fs)"
% (tries, tries*3))
if errorStr in str(ex) and tries < 7:
from time import sleep
sleep(tries*3)
tryToAppend(outSet, micOut, tries+1)
else:
raise ex
for mic in newDone:
micOut = Micrograph()
if self.doDownsample:
micOut.setSamplingRate(self.inputMicrographs.get().getSamplingRate()
* self.downFactor.get())
micOut.setObjId(mic.getObjId())
micOut.setFileName(self._getOutputMicrograph(mic))
micOut.setMicName(mic.getMicName())
tryToAppend(outSet, micOut)
self._updateOutputSet(OUTPUT_MICROGRAPHS, outSet, streamMode)
if len(doneList)==0: #firstTime
self._defineTransformRelation(self.inputMicrographs, outSet)
if self.finished: # Unlock createOutputStep if finished all jobs
outputStep = self._getFirstJoinStep()
if outputStep and outputStep.isWaiting():
outputStep.setStatus(cons.STATUS_NEW)
[docs] def getOutputMics(self):
if not hasattr(self, OUTPUT_MICROGRAPHS):
outputSet = SetOfMicrographs(filename=self.getPath('micrographs.sqlite'))
outputSet.setStreamState(outputSet.STREAM_OPEN)
inputs = self.inputMicrographs.get()
outputSet.copyInfo(inputs)
if self.doDownsample:
outputSet.setSamplingRate(self.inputMicrographs.get().getSamplingRate()
* self.downFactor.get())
self._defineOutputs(**{OUTPUT_MICROGRAPHS: outputSet})
self._defineTransformRelation(inputs, outputSet)
self.info("Storing set 1rst time: %s" % outputSet)
# self._store(outputSet)
else:
outputSet = getattr(self, OUTPUT_MICROGRAPHS)
return outputSet
def _updateOutputSet(self, outputName, outputSet, state=Set.STREAM_OPEN):
outputSet.setStreamState(state)
outputSet.write() # Write to commit changes
self._store(outputSet)
# Close set dataset to avoid locking it
# outputSet.close()
def _insertStepsForMicrograph(self, inputMic, outputMic):
self.params['inputMic'] = inputMic
self.params['outputMic'] = outputMic
self.lastStepId = None
self.prerequisites = [] # First operation should not depend on nothing before
# Crop
self.__insertOneStep(self.doCrop, "xmipp_transform_window",
" -i %(inputMic)s --crop %(cropPixels)d -v 0")
# Take logarithm
self.__insertOneStep(self.doLog, "xmipp_transform_filter",
" -i %(inputMic)s --log --fa %(logA)f --fb %(logB)f --fc %(logC)f")
# Remove bad pixels
self.__insertOneStep(self.doRemoveBadPix, "xmipp_transform_filter",
" -i %(inputMic)s --bad_pixels outliers %(stddev)f -v 0")
# Invert
self.__insertOneStep(self.doInvert, "xmipp_image_operate",
"-i %(inputMic)s --mult -1")
# Downsample
self.__insertOneStep(self.doDownsample, "xmipp_transform_downsample",
"-i %(inputMic)s --step %(downFactor)f --method fourier")
# Denoise
self.__insertOneStep(self.doDenoise, "xmipp_transform_filter",
"-i %(inputMic)s --denoiseTV --maxIterTV %(maxIterTV)d")
# Smooth
self.__insertOneStep(self.doSmooth, "xmipp_transform_filter",
"-i %(inputMic)s --fourier real_gaussian %(sigmaConvolution)f")
# Highpass
self.__insertOneStep(self.doHighPass, "xmipp_transform_filter",
"-i %(inputMic)s --fourier high_pass %(highCutoff)f %(highRaised)f")
# Lowpass
self.__insertOneStep(self.doLowPass, "xmipp_transform_filter",
"-i %(inputMic)s --fourier low_pass %(lowCutoff)f %(lowRaised)f")
# Normalize
self.__insertOneStep(self.doNormalize, "xmipp_transform_normalize",
"-i %(inputMic)s --method OldXmipp")
return self.lastStepId
def __insertOneStep(self, condition, program, arguments):
"""Insert operation if the condition is met.
Possible conditions are: doDownsample, doCrop...etc"""
if condition.get():
# If the input micrograph and output micrograph differss,
# add the -o option
if self.params['inputMic'] != self.params['outputMic']:
arguments += " -o %(outputMic)s"
# Insert the command with the formatted parameters
self.lastStepId = self._insertRunJobStep(program, arguments % self.params,
prerequisites=self.prerequisites)
self.prerequisites = [self.lastStepId] # next should depend on this step
# Update inputMic for next step as outputMic
self.params['inputMic'] = self.params['outputMic']
#--------------------------- INFO functions --------------------------------
def _validate(self):
validateMsgs = []
# Some prepocessing option need to be marked
if not(self.doCrop or self.doDownsample or self.doLog or self.doRemoveBadPix or self.doInvert
or self.doNormalize or self.doDenoise or self.doSmooth or self.doHighPass or self.doLowPass):
validateMsgs.append('Some preprocessing option need to be selected.')
return validateMsgs
def _citations(self):
return ["Sorzano2009d"]
def _hasOutput(self):
return (getattr(self, 'outputMicrographs', False) and
self.outputMicrographs.hasValue())
def _summary(self):
if not self._hasOutput():
return ["*Output Micrographs* not ready yet."]
summary = []
summary.append("Micrographs preprocessed: *%d*" % self.inputMicrographs.get().getSize())
if self.doCrop:
summary.append("Cropped *%d* pixels per each border." % self.cropPixels)
if self.doLog:
summary.append("Formula applied: %f - %f ln(x + %f)" % (self.logA, self.logB, self.logC,))
if self.doRemoveBadPix:
summary.append("Multiple of standard deviation to remove pixels: %d" % self.mulStddev)
if self.doInvert:
summary.append("Contrast inverted")
if self.doDownsample:
summary.append("Downsampling factor: %0.2f" % self.downFactor)
if self.doDenoise:
summary.append("Denoising applied with %d iterations" % self.maxIteration.get())
if self.doSmooth:
summary.append("Gaussian filtered with sigma=%f (px)"%self.sigmaConvolution.get())
if self.doHighPass:
summary.append("Highpass filtered with cutoff=%f (1/px)"%self.highCutoff.get())
if self.doLowPass:
summary.append("Lowpass filtered with cutoff=%f (1/px)"%self.lowCutoff.get())
if self.doNormalize:
summary.append("Normalized to mean 0 and variance 1")
return summary
def _methods(self):
if not self._hasOutput():
return ['*Output micrographs* not ready yet.']
txt = "The micrographs in set %s have " % self.getObjectTag('inputMicrographs')
if self.doCrop:
txt += "been cropped by %d pixels " % self.cropPixels
if self.doLog:
txt += ("changed from transmisivity to density with the formula: "
"%f - %f * ln(x + %f) " % (self.logA, self.logB, self.logC))
if self.doRemoveBadPix:
txt += "had pixels removed, the ones with standard deviation beyond %d " % self.mulStddev.get()
if self.doRemoveBadPix:
txt += "contrast inverted "
if self.doDownsample:
txt += "been downsampled with a factor of %0.2f " % self.downFactor.get()
if self.doDenoise:
txt += "been Denoised with %d iterations " % self.maxIteration.get()
if self.doSmooth:
txt += "been Gaussian filtered with a sigma of %0.2f pixels "%self.sigmaConvolution.get()
if self.doHighPass:
txt += "been highpass filtered with a cutoff of %f (1/px) "%self.highCutoff.get()
if self.doLowPass:
txt += "been lowpass filtered with a cutoff of %f (1/px) "%self.lowCutoff.get()
if self.doNormalize:
txt += "been normalized to mean 0 and variance 1"
return [txt, "The resulting set of micrographs is %s" %
self.getObjectTag('outputMicrographs')]
#--------------------------- UTILS functions -------------------------------
def _getOutputMicrograph(self, mic):
""" Return the name of the output micrograph, given
the input Micrograph object.
"""
fn = mic.getFileName()
extFn = getExt(fn)
if extFn != ".mrc":
fn = replaceExt(fn, "mrc")
fnOut = self._getExtraPath(basename(fn))
return fnOut
def _readDoneList(self):
""" Read from a text file the id's of the items that have been done. """
doneFile = self._getAllDone()
doneList = []
# Check what items have been previously done
if os.path.exists(doneFile):
with open(doneFile) as f:
doneList += [int(line.strip()) for line in f]
return doneList
def _getAllDone(self):
return self._getExtraPath('DONE_all.TXT')
def _writeDoneList(self, micList):
""" Write to a text file the items that have been done. """
with open(self._getAllDone(), 'a') as f:
for mic in micList:
f.write('%d\n' % mic.getObjId())
def _isMicDone(self, mic):
""" A movie is done if the marker file exists. """
return os.path.exists(self._getMicDone(mic))
def _getMicDone(self, mic):
fn = mic.getFileName()
extFn = getExt(fn)
if extFn != ".mrc":
fn = replaceExt(fn, "mrc")
return self._getExtraPath('%s' % basename(fn))