Source code for emantomo.protocols.protocol_tomo_boxing_convnet

# **************************************************************************
# *
# * Authors:     David Herreros Calero (dherreros@cnb.csic.es) [1]
# *
# * [1] Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import os
import glob


from pwem.emlib.image import ImageHandler

from pyworkflow import BETA
import pyworkflow.utils as pwutils
from pyworkflow.utils.properties import Message
from pyworkflow.protocol.params import IntParam, BooleanParam, StringParam, USE_GPU, GPU_LIST, LEVEL_ADVANCED

from tomo.protocols import ProtTomoPicking
from tomo.objects import SetOfCoordinates3D
import tomo.constants as const

from emantomo.convert import loadJson, readSetOfCoordinates3D
import emantomo


[docs]class EmanProtTomoConvNet(ProtTomoPicking):
    """Eman Deep Learning based picking for Tomography
    """
    _label = 'tomo boxer convnet'
    _devStatus = BETA
    # nn_boxSize = 96

    def __init__(self, **kwargs):
        ProtTomoPicking.__init__(self, **kwargs)

    # --------------------------- DEFINE param functions ----------------------
    def _defineParams(self, form):
        ProtTomoPicking._defineParams(self, form)
        form.addHidden(USE_GPU, BooleanParam, default=True,
                       label="Use GPU for execution",
                       help="This protocol has both CPU and GPU implementation.\
                           Select the one you want to use.")
        form.addHidden(GPU_LIST, StringParam, default='0',
                       expertLevel=LEVEL_ADVANCED,
                       label="Choose GPU IDs",
                       help="Add a list of GPU devices that can be used")
        form.addParam('boxSize', IntParam, label="Box Size",
                      default=96,
                      help='Final box size for the coordinates')
        form.addParam('groupId', IntParam, label="GroupId", default=1,
                      help="Select a group ID that will be given to the particles. This value is useful to indentify "
                           "different structures in a SetOfCoordinates3D when different sets are joint.")

    # --------------------------- STEPS functions -----------------------------
    def _insertAllSteps(self):
        self._insertFunctionStep('convertInputStep')
        self._insertFunctionStep('launchBoxingGUIStep', interactive=True)

[docs]    def convertInputStep(self):
        out_path = self._getExtraPath('tomograms')
        info_path = self._getExtraPath('info')
        pwutils.makePath(out_path)
        pwutils.makePath(info_path)
        # program = emantomo.Plugin.getProgram("e2proc3d.py")
        for tomo in self.inputTomograms.get().iterItems():
            tomo_file = tomo.getFileName()
            # tomo_file_hdf = pwutils.removeBaseExt(tomo_file) + ".hdf"
            # dim = tomo.getDimensions()
            # Only rescale Tomomgrams if needed. Otherwise create a symbolic link to save space
            # if self.minBoxSize.get() < self.nn_boxSize:
            #     out_file = os.path.join(out_path, pwutils.removeBaseExt(tomo_file) + ".mrc")
            #     factor = self.nn_boxSize / self.minBoxSize.get()
            #     ImageHandler.scaleSplines(tomo_file + ':mrc', out_file, factor)
            # else:
            #     args = "%s %s --process normalize --clip 927,927,300" % (tomo_file, os.path.join(out_path, tomo_file_hdf))
            #     pwutils.runJob(None, program, args, env=emantomo.Plugin.getEnviron())
            # args = "%s %s --process normalize --clip %d,%d,%d" \
            #        % (tomo_file, os.path.join(out_path, tomo_file_hdf), max(dim), max(dim), dim[2])
            # pwutils.runJob(None, program, args, env=emantomo.Plugin.getEnviron())
            out_file = os.path.join(out_path, pwutils.removeBaseExt(tomo_file))
            pwutils.createLink(tomo_file, out_file)
            self.writeInfoJson(tomo_file, info_path)

[docs]    def launchBoxingGUIStep(self):
        program = emantomo.Plugin.getProgram("e2spt_boxer_convnet.py")
        args = "--label particles_00"
        if self.useGpu.get():
            args += " --gpuid %s" % self.getGpuList()[0]
        pwutils.runJob(None, program, args, env=emantomo.Plugin.getEnviron(), cwd=self._getExtraPath())
        self._createOutput()

    def _createOutput(self):
        setTomograms = self.inputTomograms.get()
        outPath = self._getExtraPath("info")
        coord3DSetDict = {}
        suffix = self._getOutputSuffix(SetOfCoordinates3D)
        coord3DSet = self._createSetOfCoordinates3D(setTomograms, suffix)
        coord3DSet.setName("tomoCoord")
        coord3DSet.setPrecedents(setTomograms)
        coord3DSet.setSamplingRate(setTomograms.getSamplingRate())
        coord3DSet.setBoxSize(self.boxSize.get())
        for tomo in setTomograms.iterItems():
            outFile = '*%s_info.json' % pwutils.removeBaseExt(tomo.getFileName().split("__")[0])
            pattern = os.path.join(outPath, outFile)
            files = glob.glob(pattern)

            if not files or not os.path.isfile(files[0]):
                continue

            jsonFnbase = files[0]
            jsonBoxDict = loadJson(jsonFnbase)

            index = int((list(jsonBoxDict["class_list"].keys()))[0])
            coord3DSetDict[index] = coord3DSet

            # Populate Set of 3D Coordinates with 3D Coordinates
            # factor = self.minBoxSize.get() / self.nn_boxSize if self.minBoxSize.get() is not None else 1
            # FIXME: Correct the scaling factor when there is a mismatch between the sr in the header and in Scipion
            # FIXME: Could be useful in the future?
            # sr = setTomograms.getSamplingRate()
            # if mrcfile.validate(tomo.getFileName()):
            #     with mrcfile.open(tomo.getFileName()) as mrc:
            #         sr_header = mrc.voxel_size.tolist()[0]
            # factor *= sr / sr_header
            readSetOfCoordinates3D(jsonBoxDict, coord3DSetDict, tomo.clone(),
                                   origin=const.CENTER_GRAVITY, groupId=self.groupId.get())

        name = self.OUTPUT_PREFIX + suffix
        args = {}
        args[name] = coord3DSet
        self._defineOutputs(**args)
        self._defineSourceRelation(setTomograms, coord3DSet)

        # Update Outputs
        for index, coord3DSet in coord3DSetDict.items():
            self._updateOutputSet(name, coord3DSet, state=coord3DSet.STREAM_CLOSED)

    # --------------------------- UTILS functions -----------------------------
[docs]    def writeInfoJson(self, tomo_file, info_path):
        # boxSize = self.minBoxSize.get() if self.minBoxSize.get() else self.nn_boxSize
        contents = '{ "boxes_3d": [], "apix_unbin": %.2f, ' \
                   '"class_list": { "0": { "boxsize": 96, "name": ' \
                   '"particles_00"} } }' % (self.inputTomograms.get().getSamplingRate())
        info_file = os.path.join(info_path, pwutils.removeBaseExt(tomo_file) + "_info.json")
        with open(info_file, 'w') as fid:
            fid.write(contents)

    # --------------------------- INFO functions -----------------------------
[docs]    def getInfo(self, output):
        msg = '\tNumber of particles picked: *%d* \n' % output.getSize()
        msg += '\tParticle box size: *%d*' % output.getBoxSize()
        return msg

    def _methods(self):
        methodsMsgs = []
        if self.getOutputsSize() >= 1:
            for key, output in self.iterOutputAttributes():
                msg = self.getInfo(output)
                methodsMsgs.append("%s: \n %s" % (self.getObjectTag(output), msg))
        else:
            methodsMsgs.append(Message.TEXT_NO_OUTPUT_CO)

        return methodsMsgs

    def _summary(self):
        summary = []
        if self.getOutputsSize() < 1:
            summary.append(Message.TEXT_NO_OUTPUT_CO)
        else:
            for key, output in self.iterOutputAttributes():
                msg = self.getInfo(output)
                summary.append("%s: \n %s" % (self.getObjectTag(output), msg))
        return summary

[docs]    def validate(self):
        errors = []
        dim = self.inputTomograms.get().getFirstItem().getDimensions()
        if dim[0] != dim[1] and not emantomo.Plugin.isVersion(emantomo.constants.V_CB):
            errors.append("Error: input tomograms must be square. Please, use a resizing protocol or reconstruct "
                          "your tomograms so X and Y dimensions match.")
        return errors

    def _warnings(self):
        warnings = []
        # if self.minBoxSize.get() < self.nn_boxSize:
        #     warnings.append("Boxsize is smaller than the minimum size allowed by Eman (96). This implies "
        #                     "that temporary rescaled Tomograms will be created so your boxsize corresponds "
        #                     " to a size of 96 to work with Eman. This may occupy a large space in disk.")
        return warnings