Source code for eman2.protocols.protocol_tomo_initialmodel

# **************************************************************************
# *
# * Authors:     Adrian Quintana (adrian@eyeseetea.com) [1]
# *              Arnau Sanchez  (arnau@eyeseetea.com) [1]
# *
# * [1] EyeSeeTea Ltd, London, UK
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
from pwem.protocols import EMProtocol
from pyworkflow.protocol import params
from pyworkflow.utils.path import makePath

import eman2
from eman2.convert import writeSetOfSubTomograms, getLastParticlesParams, updateSetOfSubTomograms

from tomo.protocols import ProtTomoBase
from tomo.objects import AverageSubTomogram, SetOfSubTomograms, SetOfAverageSubTomograms


[docs]class EmanProtTomoInitialModel(EMProtocol, ProtTomoBase):
    """
    This protocol wraps *e2spt_sgd.py* EMAN2 program.

    It will take a set of subtomograms (particles) and a subtomogram(reference)
    and build a subtomogram suitable for use as initial models in tomography.
    It also builds a set of subtomograms that contains the original particles
    plus the score, coverage and align matrix per subtomogram .
    """
    _label = 'tomo initial model'
    OUTPUT_DIR = 'sptsgd_00'

    def __init__(self, **kwargs):
        EMProtocol.__init__(self, **kwargs)

    # --------------------------- DEFINE param functions ----------------------
    def _defineParams(self, form):
        form.addSection(label='Input')
        form.addParam('particles', params.PointerParam,
                      pointerClass='SetOfSubTomograms',
                      label="Particles", important=True,
                      help='Select the set of subtomograms to build an initial model')

        form.addParam('reference', params.PointerParam,
                      pointerClass='Volume', allowsNull=True,
                      label="Reference volume",
                      help='Specify a 3D volume')

        form.addParam('mask', params.PointerParam,
                      label='Mask',
                      allowsNull=True,
                      pointerClass='VolumeMask',
                      help='Select a 3D Mask to be applied to the initial model')

        form.addSection(label='Optimization')
        form.addParam('symmetry', params.TextParam, default='c1',
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Symmetry',
                      help='Specify the symmetry.\nChoices are: c(n), d(n), '
                           'h(n), tet, oct, icos.\n'
                           'See http://blake.bcm.edu/emanwiki/EMAN2/Symmetry\n'
                           'for a detailed description of symmetry in Eman.')
        form.addParam('filterto', params.FloatParam, default=0.02,
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Filterto',
                      help='Filter map to frequency after each iteration. Default is 0.02')
        form.addParam('fourier', params.BooleanParam, default=True,
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Fourier',
                      help='Gradient descent in fourier space')
        form.addParam('batchSize', params.IntParam, default=12,
                      label='Batch Size',
                      help='SGD batch size')
        form.addParam('learningRate', params.FloatParam, default=0.1,
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Learn Rate',
                      help='Learning Rate. Default is 0.1')
        form.addParam('numberOfIterations', params.IntParam, default=5,
                      label='Number of iterations to perform',
                      help='The total number of refinement to perform.')
        form.addParam('numberOfBatches', params.IntParam, default=10,
                      label='Number of batches',
                      help='Number of batches per iteration')
        form.addParam('shrink', params.IntParam, default=1,
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Shrink factor',
                      help='Using a box-size >64 is not optimal for making '
                           'initial models. Suggest using this option to '
                           'shrink the input particles by an integer amount '
                           'prior to reconstruction. Default = 1, no shrinking')
        form.addParam('applySim', params.BooleanParam, default=False,
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Apply Symmetry',
                      help='Apply Symmetry')

    # --------------------------- INSERT steps functions ----------------------
    def _insertAllSteps(self):
        self._insertFunctionStep('convertImagesStep')
        self._insertFunctionStep('createInitialModelStep')
        self._insertFunctionStep('createOutputStep')

    # --------------------------- STEPS functions -----------------------------
    # Get Scipion references to subtomograms and write hdf files for eman2 to process.
[docs]    def convertImagesStep(self):
        partSet = self.particles.get()
        partAlign = partSet.getAlignment()
        storePath = self._getExtraPath("particles")
        makePath(storePath)
        writeSetOfSubTomograms(partSet, storePath, alignType=partAlign)

[docs]    def createInitialModelStep(self):
        command_params = {
            'symmetry': self.symmetry.get(),
            'filterto': self.filterto.get(),
            'batchSize': self.batchSize.get(),
            'learningRate': self.learningRate.get(),
            'numberOfIterations': self.numberOfIterations.get(),
            'numberOfBatches': self.numberOfBatches.get(),
            'mask': self.mask.get(),
            'shrink': self.shrink.get(),
            'reference': self.reference.get().getFileName() if self.reference.get() else None,
            'outputPath': self.getOutputPath(),
        }

        args = '%s/*.hdf' % self._getExtraPath("particles")
        if command_params['reference']:
            args += ' --reference=%(reference)s'

        args += (' --sym=%(symmetry)s --filterto=%(filterto)f'
                 ' --batchsize=%(batchSize)d --learnrate=%(learningRate)f --niter=%(numberOfIterations)d'
                 ' --nbatch=%(numberOfBatches)d')

        if command_params['shrink'] > 1:
            args += ' --shrink=%(shrink)d'
        if self.fourier.get():
            args += ' --fourier'
        if self.applySim.get():
            args += ' --applysim'
        if command_params['mask']:
            args += ' --mask=%(mask)s'

        args += ' --path=%(outputPath)s'

        program = eman2.Plugin.getProgram("e2spt_sgd.py")
        self._log.info('Launching: ' + program + ' ' + args % command_params)
        self.runJob(program, args % command_params)

[docs]    def createOutputStep(self):
        particles = self.particles.get()

        # Output 1: Subtomogram
        averageSubTomogram = AverageSubTomogram()
        averageSubTomogram.setFileName(self.getOutputPath('output.hdf'))
        averageSubTomogram.setSamplingRate(particles.getSamplingRate() * self.shrink.get())
        setOfAverageSubTomograms = self._createSet(SetOfAverageSubTomograms, 'subtomograms%s.sqlite', "")
        setOfAverageSubTomograms.copyInfo(particles)
        setOfAverageSubTomograms.setSamplingRate(particles.getSamplingRate() * self.shrink.get())
        setOfAverageSubTomograms.append(averageSubTomogram)

        # Output 2: setOfSubTomograms
        particleParams = getLastParticlesParams(self.getOutputPath())
        outputSetOfSubTomograms = self._createSet(SetOfSubTomograms, 'subtomograms%s.sqlite', "particles")
        outputSetOfSubTomograms.setCoordinates3D(particles.getCoordinates3D())
        outputSetOfSubTomograms.copyInfo(particles)
        outputSetOfSubTomograms.setSamplingRate(particles.getSamplingRate() * self.shrink.get())
        updateSetOfSubTomograms(particles, outputSetOfSubTomograms, particleParams)

        self._defineOutputs(averageSubTomogram=setOfAverageSubTomograms, outputParticles=outputSetOfSubTomograms)
        self._defineSourceRelation(self.particles, setOfAverageSubTomograms)
        self._defineSourceRelation(self.particles, outputSetOfSubTomograms)

[docs]    def getOutputPath(self, *args):
        return self._getExtraPath(self.OUTPUT_DIR, *args)

    def _methods(self):
        particles = self.particles.get()
        return [
            "Created an initial model using e2spt_sgd.py (stochastic gradient descent)",
            "A total of %d particles of dimensions %s were used (shrink %d)"
            % (particles.getSize(), particles.getDimensions(), self.shrink.get()),
        ]

    def _summary(self):
        particles = self.particles.get()
        reference = self.reference.get()
        lines = [
            "Particles: %d" % particles.getSize(),
            "Reference file used: %s" % reference.getFileName() if reference else None,
        ]

        return list(filter(bool, lines))