# **************************************************************************
# *
# * Authors: Grigory Sharov (gsharov@mrc-lmb.cam.ac.uk)
# *
# * MRC Laboratory of Molecular Biology, MRC-LMB
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
from pyworkflow.object import Float
from pyworkflow.protocol.params import (PointerParam, FloatParam, StringParam,
BooleanParam, IntParam, LEVEL_ADVANCED)
from pyworkflow.utils import removeExt
from pwem.protocols import ProtParticles
from pwem.objects import SetOfParticles, SetOfClasses
import pwem.emlib.metadata as md
from pwem.emlib.image import ImageHandler
import relion.convert as convert
[docs]class ProtRelionSortParticles(ProtParticles):
""" Relion particle sorting protocol.
It calculates difference images between particles and their aligned
(and CTF-convoluted) references, and produces Z-score on the characteristics
of these difference images (such as mean, standard deviation, skewness,
excess kurtosis and rotational symmetry).
"""
_label = 'sort particles'
# -------------------------- DEFINE param functions -----------------------
def _defineParams(self, form):
form.addSection(label='Input')
form.addParam('inputSet', PointerParam,
pointerClass='SetOfParticles,SetOfClasses2D,SetOfClasses3D',
label='Input particles', important=True,
help='Select a set of particles after Relion auto-picking'
' or 3D refinement. You can also select Relion 2D/3D'
' classes. Particles should have at least in-plane '
'alignment parameters and class assignment.')
form.addParam('referenceAverages', PointerParam,
pointerClass="SetOfAverages",
condition='inputSet and isInputAutoPicking',
label='Reference 2D averages',
help='Select references 2D averages used for auto-picking')
form.addParam('referenceVolume', PointerParam,
pointerClass="Volume",
condition='inputSet and isInputAutoRefine',
label='Reference volume',
help='Select reference volume 2D after 3D auto-refine')
form.addParam('maskDiameterA', IntParam, default=-1,
label='Particle mask diameter (A)',
help='The experimental images will be masked with a '
'soft circular mask with this <diameter>. Make '
'sure this diameter is not set too small because '
'that may mask away part of the signal! If set to '
'a value larger than the image size no masking '
'will be performed.\n\n'
'The same diameter will also be used for a '
'spherical mask of the reference structures if no '
'user-provided mask is specified.')
form.addParam('doLowPass', IntParam, default=-1,
expertLevel=LEVEL_ADVANCED,
label='Low pass filter references to (A):',
help='Lowpass filter in Angstroms for the references '
'(prevent Einstein-from-noise!)')
form.addParam('doInvert', BooleanParam, default=False,
expertLevel=LEVEL_ADVANCED,
label='Invert contrast of references?',
help='Density in particles is inverted compared to the '
'density in references')
form.addParam('doCTF', BooleanParam, default=False,
expertLevel=LEVEL_ADVANCED,
label='Do CTF-correction?',
help='If set to Yes, CTFs will be corrected inside the '
'MAP refinement. The resulting algorithm '
'intrinsically implements the optimal linear, '
'or Wiener filter. Note that input particles '
'should contains CTF parameters.')
form.addParam('ignoreCTFUntilFirstPeak', BooleanParam, default=False,
expertLevel=LEVEL_ADVANCED, condition='doCTF',
label='Ignore CTFs until their first peak?',
help='If set to Yes, then CTF-amplitude correction will '
'only be performed from the first peak of each CTF '
'onward. This can be useful if the CTF model is '
'inadequate at the lowest resolution. Still, in '
'general using higher amplitude contrast on the '
'CTFs (e.g. 10-20%) often yields better results. '
'Therefore, this option is not generally '
'recommended.')
form.addParam('minZ', FloatParam, default=0,
expertLevel=LEVEL_ADVANCED,
label='Min Z-value?',
help='Minimum Z-value to count in the sorting of '
'outliers')
form.addParam('extraParams', StringParam, default='',
expertLevel=LEVEL_ADVANCED, label='Additional parameters',
help='In this box command-line arguments may be '
'provided that are not generated by the GUI. This '
'may be useful for testing developmental options '
'and/or expert use of the program, e.g: \n'
'--verb 1\n')
form.addParallelSection(threads=0, mpi=1)
# -------------------------- INSERT steps functions -----------------------
[docs] def isInputAutoPicking(self):
inputSet = self.inputSet.get()
return (isinstance(inputSet, SetOfParticles) and
not inputSet.hasAlignmentProj())
[docs] def isInputAutoRefine(self):
inputSet = self.inputSet.get()
return (isinstance(inputSet, SetOfParticles) and
inputSet.hasAlignmentProj())
[docs] def isInputClasses(self):
return isinstance(self.inputSet.get(), SetOfClasses)
def _insertAllSteps(self):
self._createFilenameTemplates()
refAvg = self.referenceAverages.get()
avgId = refAvg.getObjId() if refAvg is not None else None
refVol = self.referenceVolume.get()
volId = refVol.getObjId() if refVol is not None else None
self._insertFunctionStep('convertInputStep',
self.inputSet.get().getObjId(), avgId, volId)
self._insertRelionStep()
self._insertFunctionStep('createOutputStep')
def _insertRelionStep(self):
""" Prepare the command line arguments before calling Relion. """
# Join in a single line all key, value pairs of the args dict
args = {}
self._setArgs(args)
params = ' '.join(['%s %s' % (k, str(v)) for k, v in args.items()])
if self.extraParams.hasValue():
params += ' ' + self.extraParams.get()
self._insertFunctionStep('runRelionStep', params)
# -------------------------- STEPS functions ------------------------------
def _createFilenameTemplates(self):
""" Centralize how files are called. """
myDict = {
'input_particles': self._getExtraPath('input_particles.star'),
'input_refs': self._getExtraPath('input_references.star'),
'output_star': self._getExtraPath('input_particles_sorted.star'),
'input_refvol': self._getTmpPath('input_vol.mrc')
}
self._updateFilenamesDict(myDict)
[docs] def convertInputStep(self, inputId, avgId, volId):
""" Create the input file in STAR format as expected by Relion.
If the input particles comes from Relion, just link the file.
Params:
particlesId: use this parameters just to force redo of convert if
the input particles are changed.
"""
inputSet = self.inputSet.get()
imgStar = self._getFileName('input_particles')
refStar = self._getFileName('input_refs')
# Pass stack file as None to avoid write the images files
self.info("Converting set from '%s' into '%s'"
% (inputSet.getFileName(), imgStar))
refSet = None
# case refine3D
if self.isInputClasses():
refSet = self.inputSet.get() # 2D or 3D classes
else:
if self.isInputAutoRefine():
ImageHandler().convert(self.referenceVolume.get(),
self._getFileName('input_refvol'))
else: # Autopicking case
refSet = self.referenceAverages.get()
self.classDict = {}
if refSet:
self.info("Converting reference from '%s' into %s"
% (refSet.getFileName(), refStar))
# Compute class mapping
classList = [cls.getObjId() for cls in refSet]
classList.sort()
for i, c in enumerate(classList):
self.classDict[c] = i + 1
convert.writeReferences(
refSet, removeExt(refStar),
postprocessImageRow=self._updateClasses)
# Write particles star file
allParticles = self._allParticles(iterate=False)
convert.writeSetOfParticles(
allParticles, imgStar,
outputDir=self._getPath(),
postprocessImageRow=self._postProcessImageRow)
[docs] def runRelionStep(self, params):
""" Execute relion steps with given params. """
self.runJob(self._getProgram(), params)
[docs] def createOutputStep(self):
sortedImgSet = self._createSetOfParticles()
particles = self._sampleParticles()
sortedImgSet.copyInfo(particles)
particlesIter = self._allParticles(iterate=False)
sortedStar = self._getFileName('output_star')
sortedImgSet.copyItems(particlesIter,
updateItemCallback=self._updateZScore,
itemDataIterator=md.iterRows(sortedStar))
self._defineOutputs(outputParticles=sortedImgSet)
self._defineSourceRelation(self.inputSet, sortedImgSet)
# -------------------------- INFO functions -------------------------------
def _validate(self):
errors = []
return errors
def _summary(self):
summary = []
if not hasattr(self, 'outputParticles'):
summary.append("Output particles not ready yet.")
else:
summary.append("Input %s particles were sorted by Z-score: %s" %
(self.inputSet.get().getSize(),
self.getObjectTag('outputParticles')))
return summary
# -------------------------- UTILS functions ------------------------------
def _allParticles(self, iterate=False):
# A handler function to iterate over the particles
inputSet = self.inputSet.get()
if self.isInputClasses():
iterParticles = inputSet.iterClassItems()
if iterate:
return iterParticles
else:
particles = SetOfParticles(filename=":memory:")
particles.copyInfo(inputSet.getFirstItem())
particles.copyItems(iterParticles)
return particles
else:
if iterate:
return inputSet.iterItems()
else:
return inputSet
def _sampleParticles(self):
""" Return the input particles set, or the first class
in the case the input are classes.
"""
if self.isInputClasses():
return self.inputSet.get().getFirstItem()
else:
return self.inputSet.get()
def _setArgs(self, args):
particles = self._sampleParticles()
if self.maskDiameterA <= 0:
maskDiameter = particles.getSamplingRate() * particles.getXDim()
else:
maskDiameter = self.maskDiameterA.get()
args.update({'--i': self._getFileName('input_particles'),
'--particle_diameter': maskDiameter,
'--angpix': particles.getSamplingRate(),
'--min_z': self.minZ.get()
})
args['--o'] = self._getFileName('output_star')
angpixRef = None
if self.isInputAutoRefine():
args['--ref'] = self._getFileName('input_refvol')
angpixRef = self.referenceVolume.get().getSamplingRate()
else:
args['--ref'] = self._getFileName('input_refs')
if self.referenceAverages.hasValue():
angpixRef = self.referenceAverages.get().getSamplingRate()
if angpixRef is not None:
args['--angpix_ref'] = '%0.5f' % angpixRef
if self.doInvert:
args['--invert'] = ''
if self.ignoreCTFUntilFirstPeak:
args['--ctf_intact_first_peak'] = ''
if self.doCTF:
args['--ctf'] = ''
def _getProgram(self, program='relion_particle_sort'):
""" Get the program name depending on the MPI use or not. """
if self.numberOfMpi > 1:
program += '_mpi'
return program
def _postProcessImageRow(self, img, imgRow):
if self.classDict:
classId = img.getClassId()
if classId is not None:
if classId not in self.classDict:
raise Exception("Class Id %s from particle %s is not found"
% (classId, img.getObjId()))
newClassId = self.classDict[classId]
else:
newClassId = imgRow.getValue(md.RLN_PARTICLE_CLASS)
else:
newClassId = 1
imgRow.setValue(md.RLN_PARTICLE_CLASS, newClassId)
def _updateClasses(self, img, imgRow):
index, _ = img.getLocation()
# renumber class numbers
imgRow.setValue(md.RLN_PARTICLE_CLASS, int(index))
def _updateZScore(self, img, imgRow):
zscore = imgRow.getValue(md.RLN_SELECT_PARTICLES_ZSCORE)
img._rlnSelectParticlesZscore = Float(zscore)