Source code for reliontomo.protocols.protocol_base_refine

# *
# * Authors:     Scipion Team
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion-users@lists.sourceforge.net'
# *
# **************************************************************************
from os import remove
from os.path import abspath, exists
from pwem.protocols import EMProtocol
from pyworkflow import BETA
from pyworkflow.protocol import PointerParam, LEVEL_ADVANCED, IntParam, StringParam, BooleanParam, \
    EnumParam, PathParam, FloatParam, LEVEL_NORMAL, GE, LE
from pyworkflow.utils import Message
from reliontomo.constants import ANGULAR_SAMPLING_LIST, SYMMETRY_HELP_MSG, IN_PARTICLES_STAR
from reliontomo.protocols import ProtRelionMakePseudoSubtomograms


[docs]class ProtRelionRefineBase(EMProtocol): """Base protocol used for the getting the initial model and performing the auto-refinment""" _devStatus = BETA # -------------------------- DEFINE param functions ----------------------- def _defineParams(self, form): pass # I/O PARAMS ------------------------------------------------------------------------------------------------------- @staticmethod def _defineIOParams(form): form.addSection(label=Message.LABEL_INPUT) form.addParam('inOptSet', PointerParam, pointerClass='relionTomoMetadata', label='Input Relion Tomo Metadata', important=True, allowsNull=False) # CTF PARAMS ------------------------------------------------------------------------------------------------------- @staticmethod def _defineCTFParams(form): form.addSection(label='CTF') form.addParam('doCTF', BooleanParam, default=True, label='Do CTF-correction?', help='If set to Yes, CTFs will be corrected inside the MAP refinement. ' 'The resulting algorithm intrinsically implements the optimal linear, ' 'or Wiener filter. Note that input particles should contains CTF parameters.') form.addParam('ignoreCTFUntilFirstPeak', BooleanParam, default=False, label='Ignore CTFs until first peak?', help='If set to Yes, then CTF-amplitude correction will only be performed from the first peak ' 'of each CTF onward. This can be useful if the CTF model is inadequate at the lowest ' 'resolution. Still, in general using higher amplitude contrast on the CTFs (e.g. 10-20%) ' 'often yields better results. Therefore, this option is not generally recommended.') # OPTIMIZATION PARAMS ---------------------------------------------------------------------------------------------- @staticmethod def _insertOptimisationSection(form): form.addSection(label='Optimisation') @staticmethod def _insertVdamMiniBatchesParam(form): form.addParam('nVdamMiniBatches', IntParam, allowsNull=False, default=200, label='Number of VDAM mini-batches', help="How many iterations (i.e. mini-batches) to perform with the VDAM ((variable metric " "gradient descent with adaptive moments) algorithm. Using 200 (default) has given good " "results for many data sets. Using 100 will run faster, at the expense of some quality in " "the results.") @staticmethod def _insertRegularisationParam(form): form.addParam('regularisation', FloatParam, default=4, validators=[GE(0)], label='Regularisation parameter T', help="Bayes law strictly determines the relative weight between the contribution of the " "experimental data and the prior. However, in practice one may need to adjust this weight " "to put slightly more weight on the experimental data to allow optimal results. If it's set " "to 0, no regularisation will be applied. Values greater than 1 for this regularisation " "parameter (T in the JMB2011 paper) put more weight on the experimental data. Values " "around 2-4 have been observed to be useful for 3D initial model calculations.") @staticmethod def _insertNumOfClassesParam(form): form.addParam('numberOfClasses', IntParam, default=1, validators=[GE(1)], label='Number of classes to be defined') @staticmethod def _insertMaskDiameterParam(form): form.addParam('maskDiameter', IntParam, allowsNull=False, validators=[GE(0)], label='Circular mask diameter (Å)', help='Diameter of the circular mask that will be applied to the experimental images ' '(in Angstroms)') @staticmethod def _insertZeroMaskParam(form): form.addParam('zeroMask', BooleanParam, label='Mask individual particles with zeros?', default=True, help="If set to Yes, then in the individual particles, the area outside a circle with the " "radius of the particle will be set to zeros prior to taking the Fourier transform.\n\nThis " "will remove noise and therefore increase sensitivity in the alignment and classification.\n" "\nHowever, it will also introduce correlations between the Fourier components that are not " "modelled. When set to No, then the solvent area is filled with random noise, which " "prevents introducing correlations.\n\nHigh-resolution refinements (e.g. ribosomes or other " "large complexes in 3D auto-refine) tend to work better when filling the solvent area with " "random noise (i.e. setting this option to No), refinements of smaller complexes and most " "classifications go better when using zeros (i.e. setting this option to Yes).") @staticmethod def _insertFlattenSolventParam(form): form.addParam('flattenSolvent', BooleanParam, default=True, label='Flatten and enforce non-negative solvent?', help="If set to Yes, the job will apply a spherical mask and enforce all values in the " "reference to be non-negative.") @staticmethod def _insertSymmetryParam(form): form.addParam('symmetry', StringParam, label='Symmetry group', default='C1', help=SYMMETRY_HELP_MSG) @staticmethod def _insertDoInC1AndApplySymLaterParam(form): form.addParam('doInC1AndApplySymLater', BooleanParam, default=True, label='Run in C1 and apply symmetry later?', help="If set to Yes, the gradient-driven optimisation is run in C1 and the symmetry orientation " "is searched and applied later. If set to No, the entire optimisation is run in the " "symmetry point group indicated above.") # COMPUTE PARAMS --------------------------------------------------------------------------------------------------- @staticmethod def _defineComputeParams(form): form.addSection(label='Compute') form.addParam('parallelDiscIO', BooleanParam, default=True, label='Use parallel disc I/O?', help="If set to Yes, all MPI followers will read their own images from disc. Otherwise, only " "the leader will read images and send them through the network to the followers. Parallel " "file systems like gluster of fhgfs are good at parallel disc I/O. NFS may break with " "many followers reading in parallel. If your datasets contain particles with different " "box sizes, you have to say Yes.") form.addParam('pooledSubtomos', IntParam, default=1, validators=[GE(1)], label='Number of pooled particles', help="Particles are processed in individual batches by MPI followers. During each batch, a " "stack of particle images is only opened and closed once to improve disk access times.\n\n" "All particle images of a single batch are read into memory together. The size of these " "batches is at least one particle per thread used. This parameter controls how many " "particles are read together for each thread. If it is set to 3 and one uses 8 threads, " "batches of 3x8=24 particles will be read together.\n\nThis may improve performance on " "systems where disk access, and particularly metadata handling of disk access, is a " "problem. It has a modest cost of increased RAM usage.") form.addParam('skipGridding', BooleanParam, default=True, label='Skip griding?', help='Skip gridding in the Maximization step in the Expectation-Maximization algorithm. ' 'If this option is set to Yes, more memory will be consumed during the protocol execution, ' 'but it will be faster.') form.addParam('allParticlesRam', BooleanParam, default=False, label='Pre-read all particles into RAM?', help='If set to Yes, the leader process read all particles into memory. Be careful you have ' 'enough RAM for large data sets!') form.addParam('combineItersDisc', BooleanParam, default=False, label='Combine iterations through disc?', help='If set to Yes, the large arrays of summed weights will be sent through the MPI network ' 'instead of writing large files to disc.') form.addParam('scratchDir', PathParam, label='Copy particles to scratch directory', help='If provided, particle stacks will be copied to this local scratch disk prior for ' 'refinement.') form.addParam('doGpu', BooleanParam, default=False, label='Use GPU acceleration?', help='If set to Yes, it will use available gpu resources for some calculations.') form.addParam('gpusToUse', StringParam, condition='doGpu', default='0', label='GPUs to use:', help='It can be used to provide a list of which GPUs (e. g. "0:1:2:3") to use. MPI-processes are ' 'separated by ":", threads by ",". For example: "0,0:1,1:0,0:1,1"') # ADDITIONAL PARAMS ------------------------------------------------------------------------------------------------ @staticmethod def _defineAdditionalParams(form): form.addSection(label='Additional') form.addParam('keepOnlyLastIterFiles', BooleanParam, default=False, expertLevel=LEVEL_ADVANCED, label="Keep only files from last iteration?", help="If Yes is chosen, only the files which correspond to the last iteration will be saved " "in the protocol's extra directory. Otherwise, files corresponding to each iteration " "will be kept.") form.addParam('oversampling', IntParam, default=1, expertLevel=LEVEL_ADVANCED, label="Over-sampling", help="Adaptive oversampling order to speed-up calculations (0=no oversampling, 1=2x, 2=4x, etc)") form.addParam('extraParams', StringParam, label='Additional arguments', help="In this box command-line arguments may be provided that are not generated by the GUI. This " "may be useful for testing developmental options and/or expert use of the program, e.g: \n" "--verb 1\n" "--pad 2\n") form.addParallelSection(threads=1, mpi=1) # ANGULAR SAMPLING PARAMS ------------------------------------------------------------------------------------------ @staticmethod def _insertAngularCommonParams(form, expertLevel=LEVEL_NORMAL, angSampling=2, offsetRange=5, offsetStep=1, condition=True): form.addParam('angularSamplingDeg', EnumParam, default=angSampling, condition=condition, choices=ANGULAR_SAMPLING_LIST, label='Initial angular sampling interval (deg)', expertLevel=expertLevel, help='There are only a few discrete angular samplings possible because ' 'we use the HealPix library to generate the sampling of the first ' 'two Euler angles on the sphere. The samplings are approximate numbers ' 'and vary slightly over the sphere.') form.addParam('offsetSearchRangePix', IntParam, default=offsetRange, condition=condition, label='Initial offset search range (pix.)', expertLevel=expertLevel, validators=[GE(0.1), LE(30)], help='Probabilities will be calculated only for translations in a circle ' 'with this radius (in pixels). The center of this circle changes at ' 'every iteration and is placed at the optimal translation for each ' 'image in the previous iteration.') form.addParam('offsetSearchStepPix', FloatParam, default=offsetStep, condition=condition, label='Initial offset search step (pix.)', expertLevel=expertLevel, validators=[GE(0.1), LE(5)], help='Translations will be sampled with this step-size (in pixels). ' 'Translational sampling is also done using the adaptive approach. ' 'Therefore, if adaptive=1, the translations will first be evaluated' 'on a 2x coarser grid.') # -------------------------- INSERT steps functions ----------------------- def _insertAllSteps(self): pass # -------------------------- INFO functions ------------------------------- def _validate(self): msg = [] optSet = self.inOptSet.get() if ProtRelionMakePseudoSubtomograms().getClassName() not in optSet.getParticles(): msg.append('Input metadata is expected to come from making a set of pseudosubtomograms.') return msg # --------------------------- UTILS functions ----------------------------- def _genBaseCommand(self): cmd = '' cmd += self._genIOBaseCmd() # I/O args cmd += self._genCTFBaseCmd() # CTF args cmd += self._genOptimisationBaseCmd() # Optimisation args cmd += self._genComputeBaseCmd() # Compute args cmd += self._genAddiotionalBaseCmd() # Additional args return cmd def _genIOBaseCmd(self): cmd = '--i %s ' % self.inOptSet.get().getParticles() cmd += '--o %s ' % (self._getExtraPath() + '/') # If not, Relion will concatenate it directly as a prefix cmd += '--j %i ' % self.numberOfThreads return cmd def _genCTFBaseCmd(self): cmd = '' if self.doCTF.get(): cmd += '--ctf ' if self.ignoreCTFUntilFirstPeak.get(): cmd += '--ctf_intact_first_peak ' return cmd def _genOptimisationBaseCmd(self): cmd = '' cmd += '--particle_diameter %i ' % self.maskDiameter.get() return cmd def _genComputeBaseCmd(self): cmd = '' if not self.parallelDiscIO.get(): cmd += '--no_parallel_disc_io ' cmd += '--pool %i ' % self.pooledSubtomos.get() if self.allParticlesRam.get(): cmd += '--preread_images ' if self.skipGridding.get(): cmd += '--skip_gridding ' if not self.combineItersDisc.get(): cmd += '--dont_combine_weights_via_disc ' if self.scratchDir.get(): cmd += '--scratch_dir %s ' % self.scratchDir.get() if self.doGpu.get(): cmd += '--gpu "%s" ' % self.gpusToUse.get() return cmd def _genAddiotionalBaseCmd(self): cmd = '--oversampling %i ' % self.oversampling.get() if self.extraParams.get(): cmd += ' ' + self.extraParams.get() return cmd def _applyKeepIterFilesUserSelection(self): if self.keepOnlyLastIterFiles.get(): self._cleanUndesiredFiles() def _cleanUndesiredFiles(self): """Remove all files generated by relion_classify 3d excepting the ones which correspond to the last iteration. Example for iteration 25: relion_it025_class002.mrc relion_it025_class001.mrc relion_it025_model.star relion_it025_sampling.star relion_it025_optimiser.star relion_it025_data.star """ itPref = 'relion_it' clPref = 'class' starExt = '.star' mrcExt = '.mrc' # Classify calculations related files calcFiles = ['data', 'model', 'optimiser', 'sampling'] for i in range(self._lastIter()): for calcFile in calcFiles: fn = abspath(self._getExtraPath('{}{:03d}_{}{}'.format( itPref, i, calcFile, starExt))) if exists(fn): remove(fn) # Classes related files for itr in range(1, self.nClasses + 1): fn = abspath(self._getExtraPath('{}{:03d}_{}{:03d}{}'.format( itPref, i, clPref, itr, mrcExt))) if exists(fn): remove(fn)