Source code for relion.protocols.protocol_initialmodel

# **************************************************************************
# *
# * Authors:     Grigory Sharov (gsharov@mrc-lmb.cam.ac.uk) [1]
# *              J.M. De la Rosa Trevin (delarosatrevin@scilifelab.se) [2]
# *
# * [1] MRC Laboratory of Molecular Biology, MRC-LMB
# * [2] SciLifeLab, Stockholm University
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
from emtable import Table

from pwem.constants import ALIGN_PROJ
from pwem.protocols import ProtInitialVolume
from pwem.objects import Volume
from pyworkflow.constants import PROD
from pyworkflow.protocol.params import (PointerParam, FloatParam,
                                        LabelParam, IntParam,
                                        EnumParam, StringParam,
                                        BooleanParam,
                                        LEVEL_ADVANCED)

import relion
import relion.convert as convert
from .protocol_base import ProtRelionBase


[docs]class ProtRelionInitialModel(ProtInitialVolume, ProtRelionBase): """ This protocols creates a 3D initial model using Relion. Generate a 3D initial model _de novo_ from 2D particles using Relion Stochastic Gradient Descent (SGD) algorithm. """ _label = '3D initial model' _devStatus = PROD IS_CLASSIFY = False IS_3D_INIT = True IS_2D = False CHANGE_LABELS = ['rlnChangesOptimalOrientations', 'rlnChangesOptimalOffsets'] def __init__(self, **args): ProtRelionBase.__init__(self, **args) def _initialize(self): """ This function is mean to be called after the working dir for the protocol have been set. (maybe after recovery from mapper) """ self._createFilenameTemplates() self._createIterTemplates() if not self.doContinue: self.continueRun.set(None) self.maskZero = False self.copyAlignment = False self.hasReferenceCTFCorrected = False # -------------------------- DEFINE param functions ----------------------- def _defineParams(self, form): self._defineConstants() self.IS_3D = not self.IS_2D form.addSection(label='Input') form.addParam('doContinue', BooleanParam, default=False, label='Continue from a previous run?', help='If you set to *Yes*, you should select a previous' 'run of type *%s* class and most of the input ' 'parameters will be taken from it.' % self.getClassName()) form.addParam('inputParticles', PointerParam, pointerClass='SetOfParticles', condition='not doContinue', important=True, label="Input particles", help='Select the input images from the project.') form.addParam('maskDiameterA', IntParam, default=-1, label='Particle mask diameter (A)', help='The experimental images will be masked with a ' 'soft circular mask with this <diameter>. ' 'Make sure this diameter is not set too small ' 'because that may mask away part of the signal! If ' 'set to a value larger than the image size no ' 'masking will be performed.\n\n' 'The same diameter will also be used for a ' 'spherical mask of the reference structures if no ' 'user-provided mask is specified.') form.addParam('continueRun', PointerParam, pointerClass=self.getClassName(), condition='doContinue', allowsNull=True, label='Select previous run', help='Select a previous run to continue from.') form.addParam('continueIter', StringParam, default='last', condition='doContinue', label='Continue from iteration', help='Select from which iteration do you want to ' 'continue. If you use *last*, then the last ' 'iteration will be used. Otherwise, a valid ' 'iteration number should be provided.') self.addSymmetry(form) form.addSection(label='CTF') form.addParam('continueMsg', LabelParam, default=True, condition='doContinue', label='CTF parameters are not available in continue mode') form.addParam('doCTF', BooleanParam, default=True, label='Do CTF-correction?', condition='not doContinue', help='If set to Yes, CTFs will be corrected inside the ' 'MAP refinement. The resulting algorithm ' 'intrinsically implements the optimal linear, or ' 'Wiener filter. Note that input particles should ' 'contains CTF parameters.') form.addParam('haveDataBeenPhaseFlipped', LabelParam, condition='not doContinue', label='Have data been phase-flipped? ' '(Don\'t answer, see help)', help='The phase-flip status is recorded and managed by ' 'Scipion. \n In other words, when you import or ' 'extract particles, \nScipion will record whether ' 'or not phase flipping has been done.\n\n' 'Note that CTF-phase flipping is NOT a necessary ' 'pre-processing step \nfor MAP-refinement in ' 'RELION, as this can be done inside the internal\n' 'CTF-correction. However, if the phases have been ' 'flipped, the program will handle it.') form.addParam('ignoreCTFUntilFirstPeak', BooleanParam, default=False, label='Ignore CTFs until first peak?', condition='not doContinue', help='If set to Yes, then CTF-amplitude correction will ' 'only be performed from the first peak ' 'of each CTF onward. This can be useful if the CTF ' 'model is inadequate at the lowest resolution. ' 'Still, in general using higher amplitude contrast ' 'on the CTFs (e.g. 10-20%) often yields better ' 'results. Therefore, this option is not generally ' 'recommended.') form.addParam('doCtfManualGroups', BooleanParam, default=False, label='Do manual grouping ctfs?', condition='not doContinue', help='Set this to Yes the CTFs will grouping manually.') form.addParam('defocusRange', FloatParam, default=1000, label='Defocus range for group creation (in Angstroms)', condition='doCtfManualGroups and not doContinue', help='Particles will be grouped by defocus.' 'This parameter is the bin for a histogram.' 'All particles assigned to a bin form a group') form.addParam('numParticles', FloatParam, default=10, label='minimum size for defocus group', condition='doCtfManualGroups and not doContinue', help='If defocus group is smaller than this value, ' 'it will be expanded until number of particles ' 'per defocus group is reached') form.addSection('Optimisation') form.addParam('numberOfClasses', IntParam, default=1, condition='not doContinue', label='Number of classes', help='The number of classes (K) for a multi-reference ' 'ab initio SGD refinement. These classes will be ' 'made in an unsupervised manner, starting from a ' 'single reference in the initial iterations of ' 'the SGD, and the references will become ' 'increasingly dissimilar during the in between ' 'iterations.') form.addParam('doFlattenSolvent', BooleanParam, default=True, condition='not doContinue', label='Flatten and enforce non-negative solvent?', help='If set to Yes, the job will apply a spherical ' 'mask and enforce all values in the reference ' 'to be non-negative.') form.addParam('symmetryGroup', StringParam, default='c1', condition='not doContinue', label="Symmetry", help='SGD sometimes works better in C1. If you make an ' 'initial model in C1 but want to run Class3D/Refine3D ' 'with a higher point group symmetry, the reference model ' 'must be rotated to conform the symmetry convention. ' 'You can do this by the relion_align_symmetry command.') group = form.addGroup('Sampling') group.addParam('angularSamplingDeg', EnumParam, default=1, choices=relion.ANGULAR_SAMPLING_LIST, label='Initial angular sampling (deg)', help='There are only a few discrete angular samplings' ' possible because we use the HealPix library to' ' generate the sampling of the first two Euler ' 'angles on the sphere. The samplings are ' 'approximate numbers and vary slightly over ' 'the sphere.') group.addParam('offsetSearchRangePix', FloatParam, default=6, label='Offset search range (pix)', help='Probabilities will be calculated only for ' 'translations in a circle with this radius (in ' 'pixels). The center of this circle changes at ' 'every iteration and is placed at the optimal ' 'translation for each image in the previous ' 'iteration.') group.addParam('offsetSearchStepPix', FloatParam, default=2, label='Offset search step (pix)', help='Translations will be sampled with this step-size ' '(in pixels). Translational sampling is also done ' 'using the adaptive approach. Therefore, if ' 'adaptive=1, the translations will first be ' 'evaluated on a 2x coarser grid.') form.addSection(label='SGD') self._defineSGD3(form) form.addParam('sgdNoiseVar', IntParam, default=-1, condition='not doContinue', expertLevel=LEVEL_ADVANCED, label='Increased noise variance half-life', help='When set to a positive value, the initial ' 'estimates of the noise variance will internally ' 'be multiplied by 8, and then be gradually ' 'reduced, having 50% after this many particles ' 'have been processed. By default, this option ' 'is switched off by setting this value to a ' 'negative number. In some difficult cases, ' 'switching this option on helps. In such cases, ' 'values around 1000 have found to be useful. ' 'Change the factor of eight with the additional ' 'argument *--sgd_sigma2fudge_ini*') form.addSection('Compute') self._defineComputeParams(form) form.addParam('extraParams', StringParam, default='', label='Additional arguments', help="In this box command-line arguments may be " "provided that are not generated by the GUI. This " "may be useful for testing developmental options " "and/or expert use of the program, e.g: \n" "--dont_combine_weights_via_disc\n" "--verb 1\n" "--pad 2\n") form.addParallelSection(threads=1, mpi=3) def _defineSGD3(self, form): """ Define SGD parameters for Relion version 3. """ group = form.addGroup('Iterations') group.addParam('numberOfIterInitial', IntParam, default=50, label='Number of initial iterations', help='Number of initial SGD iterations, at which the ' 'initial resolution cutoff and the initial subset ' 'size will be used, and multiple references are ' 'kept the same. 50 seems to work well in many ' 'cases. Increase if the correct solution is not ' 'found.') group.addParam('numberOfIterInBetween', IntParam, default=200, label='Number of in-between iterations', help='Number of SGD iterations between the initial and ' 'final ones. During these in-between iterations, ' 'the resolution is linearly increased, together ' 'with the mini-batch or subset size. In case of a ' 'multi-class refinement, the different references ' 'are also increasingly left to become dissimilar. ' '200 seems to work well in many cases. Increase ' 'if multiple references have trouble separating, ' 'or the correct solution is not found.') group.addParam('numberOfIterFinal', IntParam, default=50, label='Number of final iterations', help='Number of final SGD iterations, at which the ' 'final resolution cutoff and the final subset ' 'size will be used, and multiple references are ' 'left dissimilar. 50 seems to work well in many ' 'cases. Perhaps increase when multiple reference ' 'have trouble separating.') group.addParam('writeIter', IntParam, default=10, expertLevel=LEVEL_ADVANCED, label='Write-out frequency (iter)', help='Every how many iterations do you want to write the ' 'model to disk. Negative value means only write ' 'out model after entire iteration.') line = form.addLine('Resolution (A)', help='This is the resolution cutoff (in A) that ' 'will be applied during the initial and final ' 'SGD iterations. 35A and 15A respectively ' 'seems to work well in many cases.') line.addParam('initialRes', IntParam, default=35, label='Initial') line.addParam('finalRes', IntParam, default=15, label='Final') line = form.addLine('Mini-batch size', help='The number of particles that will be processed ' 'during the initial and final iterations. \n\n' 'For initial, 100 seems to work well in many ' 'cases. Lower values may result in wider ' 'searches of the energy landscape, but possibly ' 'at reduced resolutions. \n\n' 'For final, 300-500 seems to work well in many ' 'cases. Higher values may result in increased ' 'resolutions, but at increased computational ' 'costs.') line.addParam('initialBatch', IntParam, default=100, label='Initial') line.addParam('finalBatch', IntParam, default=500, label='Final')
[docs] def addSymmetry(self, container): pass
# -------------------------- INSERT steps functions ----------------------- # -------------------------- STEPS functions ------------------------------ def _getVolumes(self): """ Return the list of volumes generated. The number of volumes in the list will be equal to the number of classes requested by the user in the protocol. """ # Provide 1 as default value for making it backward compatible k = self.getAttributeValue('numberOfClasses', 1) pixelSize = self._getInputParticles().getSamplingRate() lastIter = self._lastIter() volumes = [] for i in range(1, k + 1): vol = Volume(self._getExtraPath('relion_it%03d_class%03d.mrc') % (lastIter, i)) vol.setSamplingRate(pixelSize) volumes.append(vol) return volumes
[docs] def createOutputStep(self): imgSet = self._getInputParticles() volumes = self._getVolumes() outImgSet = self._createSetOfParticles() outImgSet.copyInfo(imgSet) self._fillDataFromIter(outImgSet, self._lastIter()) if len(volumes) > 1: output = self._createSetOfVolumes() output.setSamplingRate(imgSet.getSamplingRate()) for vol in volumes: output.append(vol) self._defineOutputs(outputVolumes=output) else: output = volumes[0] self._defineOutputs(outputVolume=output) self._defineSourceRelation(self.inputParticles, output) self._defineOutputs(outputParticles=outImgSet) self._defineTransformRelation(self.inputParticles, outImgSet)
# -------------------------- INFO functions ------------------------------- def _validateNormal(self): errors = [] return errors def _validateContinue(self): errors = [] continueRun = self.continueRun.get() continueRun._initialize() lastIter = continueRun._lastIter() if self.continueIter.get() == 'last': continueIter = lastIter else: continueIter = int(self.continueIter.get()) if continueIter > lastIter: errors += ["The iteration from you want to continue must be " "%01d or less" % lastIter] return errors def _summaryNormal(self): summary = [] it = self._lastIter() or -1 if it >= 1: table = Table(fileName=self._getFileName('model', iter=it), tableName='model_general') row = table[0] resol = float(row.rlnCurrentResolution) summary.append("Current resolution: *%0.2f*" % resol) return summary def _summaryContinue(self): summary = ["Continue from iteration %01d" % self._getContinueIter()] return summary # -------------------------- UTILS functions ------------------------------ def _setBasicArgs(self, args): """ Return a dictionary with basic arguments. """ args.update({'--o': self._getExtraPath('relion'), '--oversampling': '1' }) if self.doFlattenSolvent: args['--flatten_solvent'] = '' if not self.doContinue: args.update({'--sym': self.symmetryGroup.get()}) args['--pad'] = 1 if self.skipPadding else 2 if self.skipGridding: args['--skip_gridding'] = '' self._setSGDArgs(args) self._setSamplingArgs(args) def _setSGDArgs(self, args): args['--sgd'] = '' args['--sgd_ini_iter'] = self.numberOfIterInitial.get() args['--sgd_inbetween_iter'] = self.numberOfIterInBetween.get() args['--sgd_fin_iter'] = self.numberOfIterFinal.get() args['--sgd_write_iter'] = self.writeIter.get() args['--sgd_ini_resol'] = self.initialRes.get() args['--sgd_fin_resol'] = self.finalRes.get() args['--sgd_ini_subset'] = self.initialBatch.get() args['--sgd_fin_subset'] = self.finalBatch.get() args['--K'] = self.numberOfClasses.get() if not self.doContinue: args['--denovo_3dref'] = '' args['--sgd_sigma2fudge_halflife'] = self.sgdNoiseVar.get() def _setSamplingArgs(self, args): """ Set sampling related params""" if not self.doContinue: args['--healpix_order'] = self.angularSamplingDeg.get() args['--offset_range'] = self.offsetSearchRangePix.get() args['--offset_step'] = self.offsetSearchStepPix.get() * 2 def _fillDataFromIter(self, imgSet, iteration): outImgsFn = self._getFileName('data', iter=iteration) imgSet.setAlignmentProj() px = imgSet.getSamplingRate() self.reader = convert.createReader(alignType=ALIGN_PROJ, pixelSize=px) mdIter = convert.Table.iterRows('particles@' + outImgsFn, key='rlnImageId') imgSet.copyItems(self._getInputParticles(), doClone=False, updateItemCallback=self._createItemMatrix, itemDataIterator=mdIter) def _createItemMatrix(self, item, row): self.reader.setParticleTransform(item, row)