Source code for reliontomo.protocols.protocol_3d_classify_subtomograms

# *
# * Authors:     Scipion Team
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion-users@lists.sourceforge.net'
# *
# **************************************************************************
from os import remove, listdir
from os.path import abspath, exists, isfile, join

from emtable import Table

from pyworkflow.object import Float
from pyworkflow.utils import moveFile
from reliontomo.protocols import ProtRelionRefineSubtomograms
from reliontomo.protocols.protocol_base_refine import ProtRelionRefineBase
from reliontomo import Plugin
from pyworkflow.protocol import FloatParam, BooleanParam, GE, LE
from reliontomo.utils import getProgram
from tomo.objects import SetOfClassesSubTomograms, SetOfAverageSubTomograms


[docs]class ProtRelion3DClassifySubtomograms(ProtRelionRefineSubtomograms): """3D Classification of subtomograms.""" _label = '3D Classification of subtomograms' modelTable = Table() classesTable = Table() opticsTable = Table() particlesTable = Table() def __init__(self, **args): ProtRelionRefineSubtomograms.__init__(self, **args) # -------------------------- DEFINE param functions ----------------------- def _defineParams(self, form): ProtRelionRefineSubtomograms._defineInputParams(form) ProtRelionRefineSubtomograms._defineReferenceParams(form) ProtRelionRefineSubtomograms._defineCTFParams(form) self._defineOptimisationParams(form) self._defineSamplingParams(form) ProtRelionRefineSubtomograms._defineComputeParams(form) ProtRelionRefineSubtomograms._defineAdditionalParams(form) @staticmethod def _defineOptimisationParams(form): ProtRelionRefineSubtomograms._insertOptimisationSection(form) ProtRelionRefineSubtomograms._insertNumOfClassesParam(form) ProtRelionRefineSubtomograms._insertRegularisationParam(form) ProtRelionRefineSubtomograms._insertNItersParam(form) form.addParam('useFastSubsets', BooleanParam, label='Use fast subsets (for large data sets)?', default=False, help='If set to Yes, the first 5 iterations will be done with random subsets of only K*1500 ' 'particles (K being the number of classes); the next 5 with K*4500 particles, the next ' '5 with 30% of the data set; and the final ones with all data. This was inspired by a ' 'cisTEM implementation by Niko Grigorieff et al.') ProtRelionRefineSubtomograms._insertMaskDiameterParam(form) ProtRelionRefineSubtomograms._insertZeroMaskParam(form) form.addParam('limitResolutionEStepTo', FloatParam, label='Limit resolution E-step to (Å)', default=-1, help='If set to a positive number, then the expectation step (i.e. the alignment) will be done ' 'only including the Fourier components up to this resolution (in Angstroms).\n\nThis is ' 'useful to prevent overfitting, as the classification runs in RELION are not guaranteed to ' 'be 100% overfitting-free (unlike the 3D auto-refine with its gold-standard FSC). ' 'In particular for very difficult data sets, e.g. of very small or featureless particles, ' 'this has been shown to give much better class averages. \n\nIn such cases, values in the ' 'range of 7-12 Angstroms have proven useful.') @staticmethod def _defineSamplingParams(form): form.addSection(label='Sampling') form.addParam('doImageAlignment', BooleanParam, label='Perform image alignment?', default=False, help='If set to No, then rather than performing both alignment and classification, only ' 'classification will be performed. This allows the use of very focused masks. It requires ' 'that the optimal orientations of all particles are already stored in the input STAR file.') ProtRelionRefineBase._insertAngularCommonParams(form, condition='doImageAlignment') form.addParam('doLocalAngleSearch', BooleanParam, label='Perform local angular searches?', default=False, condition='doImageAlignment', help="If set to Yes, then rather than performing exhaustive angular searches, local searches " "within the range given below will be performed.\n\nA prior Gaussian distribution centered " "at the optimal orientation in the previous iteration and with a stddev of 1/3 of the range " "given below will be enforced.") form.addParam('localAngularSearchRange', FloatParam, label='Local angular search range', condition='doImageAlignment and doLocalAngleSearch', default=5, validators=[GE(0), LE(15)], help="Local angular searches will be performed within +/- the given amount (in degrees) from " "the optimal orientation in the previous iteration.\n\nA Gaussian prior (also see previous " "option) will be applied, so that orientations closer to the optimal orientation in the " "previous iteration will get higher weights than those further away.") ProtRelionRefineSubtomograms._insertRelaxSymmetry(form, condition='doImageAlignment and doLocalAngleSearch') form.addParam('allowCoarser', BooleanParam, label='Allow coarser sampling?', default=False, condition='doImageAlignment', help="If set to Yes, the program will use coarser angular and translational samplings if the " "estimated accuracies of the assignments are still low in the earlier iterations. This may " "speed up the calculations.") # -------------------------- INSERT steps functions ----------------------- def _insertAllSteps(self): self._insertFunctionStep(self._classify3d) self._insertFunctionStep(self.createOutputStep) # -------------------------- STEPS functions ------------------------------ def _classify3d(self): nMpi = self.numberOfMpi.get() Plugin.runRelionTomo(self, getProgram('relion_refine', nMpi), self._genCl3dCommand(), numberOfMpi=nMpi)
[docs] def createOutputStep(self): # self._manageGeneratedFiles() subtomoSet = self.inputPseudoSubtomos.get() classes3D = self._createSetOfClassesSubTomograms(subtomoSet) self._fillClassesFromIter(classes3D, self.nIterations.get()) self._defineOutputs(outputClasses=classes3D) self._defineSourceRelation(subtomoSet, classes3D) # Create a SetOfVolumes and define its relations volumes = SetOfAverageSubTomograms.create(self._getPath(), template='avgSubtomograms%s.sqlite', suffix='') volumes.setSamplingRate(subtomoSet.getSamplingRate()) for class3D in classes3D: vol = class3D.getRepresentative() vol.setObjId(class3D.getObjId()) volumes.append(vol) self._defineOutputs(outputVolumes=volumes) self._defineSourceRelation(subtomoSet, volumes) # if not self.doContinue: # self._defineSourceRelation(self.referenceVolume, classes3D) # self._defineSourceRelation(self.referenceVolume, volumes) if self.keepOnlyLastIterFiles.get(): self._cleanUndesiredFiles()
# -------------------------- INFO functions ------------------------------- def _validate(self): pass # --------------------------- UTILS functions ----------------------------- def _genCl3dCommand(self): cmd = '--norm --scale ' # I/O args cmd += self._genIOBaseCmd() cmd += '--ref %s ' % self.referenceVolume.get().getFileName() if self.solventMask.get(): cmd += '--solvent_mask %s ' % self.solventMask.get().getFileName() if self.solventMask2.get(): cmd += '--solvent_mask2 %s ' % self.solventMask2.get().getFileName() # Reference args if self.isMapAbsoluteGreyScale.get(): cmd += 'firstiter_cc ' if self.initialLowPassFilterA.get(): cmd += '--ini_high %.2f ' % self.initialLowPassFilterA.get() cmd += '--sym %s ' % self.symmetry.get() # CTF args self._genCTFBaseCmd() # Optimisation args cmd += self._genOptimisationBaseCmd() if self.zeroMask.get(): cmd += '--zero_mask ' cmd += '--K %i ' % self.numberOfClasses.get() cmd += '--tau2_fudge %d ' % self.regularisation.get() cmd += '--iter %i ' % self.nIterations.get() if self.useFastSubsets.get(): cmd += '--fast_subsets ' if self.zeroMask.get(): cmd += '--zero_mask ' if self.limitResolutionEStepTo.get() > 0: cmd += '--strict_highres_exp %d ' % self.limitResolutionEStepTo.get() # Sampling args if self.doImageAlignment.get(): cmd += '--healpix_order %i ' % self.angularSamplingDeg.get() cmd += 'offset_range %i ' % self.offsetSearchRangePix.get() cmd += '--offset_step %d ' % (2 * self.offsetSearchStepPix.get()) if self.doLocalAngleSearch.get(): cmd += '--sigma_ang %d ' % self.localAngularSearchRange.get() if self.relaxSym.get(): cmd += '--relax_sym %s ' % self.relaxSym.get() if self.allowCoarser.get(): cmd += '--allow_coarser_sampling ' # Compute args cmd += self._genComputeBaseCmd() cmd += '--pad %i ' % (1 if self.skipPadding.get() else 2) # Additional args cmd += self._genAddiotionalBaseCmd() return cmd def _createSetOfClassesSubTomograms(self, subTomograms, suffix=''): classes = SetOfClassesSubTomograms.create(self._getPath(), template='subtomogramClasses%s.sqlite', suffix=suffix) classes.setImages(subTomograms) return classes def _fillClassesFromIter(self, clsSet, iteration): """ Create the SetOfClasses3D from a given iteration. """ self._loadClassifyInfo(iteration) clsSet.classifyItems(updateItemCallback=self._updateParticle, updateClassCallback=self._updateClass, itemDataIterator=self.particlesTable.__iter__()) def _loadClassifyInfo(self, iteration): """ Read some information about the produced Relion 3D classes from the *model.star file. """ self._classesInfo = {} # store classes info, indexed by class id modelStar = self._getIterGenFileName('model', iteration) with open(modelStar) as fid: self.modelTable.readStar(fid, 'model_general') self.classesTable.readStar(fid, 'model_classes') dataStar = self._getIterGenFileName('data', iteration) with open(dataStar) as fid: self.opticsTable.readStar(fid, 'optics') self.particlesTable.readStar(fid, 'particles') # Model table has only one row, while classes table has the same number of rows as classes found self.nClasses = int(self.modelTable._rows[0].rlnNrClasses) # Adapt data to the format expected by classifyItems and its callbacks for i, row in zip(range(self.nClasses), self.classesTable.__iter__()): self._classesInfo[i + 1] = (i + 1, row) @staticmethod def _updateParticle(item, row): item.setClassId(row.rlnClassNumber)#rlnGroupNumber)) item._rlnLogLikeliContribution = Float(row.rlnLogLikeliContribution) item._rlnMaxValueProbDistribution = Float(row.rlnMaxValueProbDistribution) def _updateClass(self, item): classId = item.getObjId() if classId in self._classesInfo: _, row = self._classesInfo[classId] fn = row.rlnReferenceImage + ":mrc" item.setAlignment3D() item.getRepresentative().setLocation(fn) item._rlnclassDistribution = Float(row.rlnClassDistribution) item._rlnAccuracyRotations = Float(row.rlnAccuracyRotations) item._rlnAccuracyTranslations = Float(row.rlnAccuracyTranslationsAngst) def _cleanUndesiredFiles(self): """Remove all files generated by relion_classify 3d excepting the ones which correspond to the last iteration. Example for iteration 25: relion_it025_class002.mrc relion_it025_class001.mrc relion_it025_model.star relion_it025_sampling.star relion_it025_optimiser.star relion_it025_data.star """ itPref = 'relion_it' clPref = 'class' starExt = '.star' mrcExt = '.mrc' # Classify calculations related files calcFiles = ['data', 'model', 'optimiser', 'sampling'] for i in range(self._lastIter()): for calcFile in calcFiles: fn = abspath(self._getExtraPath('{}{:03d}_{}{}'.format( itPref, i, calcFile, starExt))) if exists(fn): remove(fn) # Classes related files for itr in range(1, self.nClasses + 1): fn = abspath(self._getExtraPath('{}{:03d}_{}{:03d}{}'.format( itPref, i, clPref, itr, mrcExt))) if exists(fn): remove(fn) def _manageGeneratedFiles(self): """There's some kind of bug in relion4 which makes it generate the file in the protocol base directory instead of the extra directory. It uses extra as a prefix of each generated file instead. Hence, until it's solved, the files will be moved to the extra directory and the prefix extra_ will be removed""" prefix = 'extra_' genFiles = [f for f in listdir(self._getPath()) if isfile(join(self._getPath(), f))] for f in genFiles: if f.startswith(prefix): moveFile(self._getPath(f), self._getExtraPath(f.replace(prefix, ''))) def _getIterGenFileName(self, fileType, iteration): # Pattern to reproduce is it[3 digit number]_[file type].star return self._getExtraPath('it%03d_%s.star' % (iteration, fileType))