Source code for xmipp3.protocols.protocol_postProcessing_deepPostProcessing

# -*- coding: utf-8 -*-
# **************************************************************************
# *
# * Authors:     Ruben Sanchez Garcia (rsanchez@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import os
from pyworkflow import VERSION_3_0
from pyworkflow.protocol.params import (PointerParam, FloatParam, EnumParam, LEVEL_ADVANCED,
                                        StringParam, GPU_LIST, BooleanParam, IntParam)
from pwem.protocols import ProtAnalysis3D
from pwem.objects import Volume
import xmipp3
from pyworkflow.utils import createLink

INPUT_VOL_BASENAME="inputVol.mrc"
INPUT_HALF1_BASENAME="inputHalf1.mrc"
INPUT_HALF2_BASENAME="inputHalf2.mrc"

INPUT_MASK_BASENAME="inputMask.mrc"
POSTPROCESS_VOL_BASENAME= "deepPostProcess.mrc"

[docs]class XmippProtDeepVolPostProc(ProtAnalysis3D, xmipp3.XmippProtocol):
    """    
    Given a map the protocol performs automatic deep post-processing to enhance visualization. Usage guide at
    https://github.com/rsanchezgarc/deepEMhancer
    """
    _label = 'deepEMhancer'
    _conda_env = 'xmipp_deepEMhancer'
    _lastUpdateVersion = VERSION_3_0

    NORMALIZATION_AUTO=0
    NORMALIZATION_STATS=1
    NORMALIZATION_MASK=2
    NORMALIZATION_OPTIONS=["Automatic normalization", "Normalization from statistics", "Normalization from binary mask"]

    TIGHT_MODEL=0
    WIDE_MODEL=1
    HI_RES=2
    MODEL_TARGET_OPTIONS=["tight target", "wide target", "highRes"]

    def __init__(self, **args):
        ProtAnalysis3D.__init__(self, **args)

    
    # --------------------------- DEFINE param functions ----------------------
    def _defineParams(self, form):
        form.addSection(label='Input')
        
        form.addHidden(GPU_LIST, StringParam, default='0',
                       label="Choose GPU ID",
                       help="GPU may have several cores. Set it to zero"
                            " if you do not know what we are talking about."
                            " First core index is 0, second 1 and so on. Select "
                            "the GPU ID in which the protocol will run (select only 1 GPU)")


        form.addParam('useHalfMapsInsteadVol', BooleanParam, default=False,
                      label="Would you like to use half maps?",
                      help='DeepEMhancer uses either half maps or non-sharpened non-masked input volumes. Please, select the type of input map(s) you will provide')

        form.addParam('halfMapsAttached', BooleanParam, default=True,
                      condition='useHalfMapsInsteadVol',
                      label="Are the half maps included in the volume?",
                      help='When you import a map, you can associate half maps to it. Select *yes* if the half maps are associated'
                           'to the input volume. If half maps are not associated, select *No* and'
                           'you will be able to provide then as regular maps')


        form.addParam('inputHalf1', PointerParam, pointerClass='Volume',
                      label="Volume Half 1", important=True,
                      condition='useHalfMapsInsteadVol and not halfMapsAttached',
                      help='Select half map 1 to apply deep postprocessing. ')

        form.addParam('inputHalf2', PointerParam, pointerClass='Volume',
                      label="Volume Half 2", important=True,
                      condition='useHalfMapsInsteadVol and not halfMapsAttached',
                      help='Select half map 2 to apply deep postprocessing. ')
        
        form.addParam('inputVolume', PointerParam, pointerClass='Volume',
                      label="Input Volume", important=True,
                      condition='not useHalfMapsInsteadVol or halfMapsAttached',
                      help='Select a volume to apply deep postprocessing. Unmasked, non-sharpened input required')


        form.addParam('normalization', EnumParam,
                      choices=self.NORMALIZATION_OPTIONS,
                      default=self.NORMALIZATION_AUTO,
                      label='Input normalization',
                      help='Input normalization is critical for the algorithm to work.\nIf you select *%s* input will be'
                           'automatically normalized (generally works but may fail).\nIf you select *%s* input will be'
                           'normalized according the statistics of the noise of the volume and thus, you will need to provide'
                           'the mean and standard deviation of the noise. Additionally, a binary mask (1 protein, 0 not protein) '
                           'for the protein can be used for normalization if you select *%s* . The mask should be as tight '
                           'as possible.\nnBad results may be obtained if normalization does not work, so you may want to try '
                           'different options if not good enough results are observerd'%tuple(self.NORMALIZATION_OPTIONS))

        form.addParam('inputMask', PointerParam, pointerClass='VolumeMask',
                      allowsNull=True,
                      condition=" normalization==%s"%self.NORMALIZATION_MASK,
                      label="binary mask",
                      help='The mask determines which voxels are protein (1) and which are not (0)')

        form.addParam('noiseMean', FloatParam,
                      allowsNull=True,
                      condition=" normalization==%s"%self.NORMALIZATION_STATS,
                      label="noise mean",
                      help='The mean of the noise used to normalize the input')

        form.addParam('noiseStd', FloatParam,
                      allowsNull=True,
                      condition=" normalization==%s"%self.NORMALIZATION_STATS,
                      label="noise standard deviation",
                      help='The standard deviation of the noise used to normalize the input')


        form.addParam('modelType', EnumParam,
                      condition=" normalization in [%s, %s]"%(self.NORMALIZATION_STATS,self.NORMALIZATION_AUTO),
                      choices=self.MODEL_TARGET_OPTIONS,
                      default=self.TIGHT_MODEL,
                      label='Model power',
                      help='Select the deep learning model to use.\nIf you select *%s* the postprocessing will be more sharpen,'
                           ' but some regions of the protein could be masked out.\nIf you select *%s* input will be less sharpen'
                           ' but most of the regions of the protein will be preserved\nOption *%s*,  is recommended for high'
                           ' resolution volumes'%tuple(self.MODEL_TARGET_OPTIONS))


        form.addParam('performCleaningStep', BooleanParam,
                      default=False, expertLevel=LEVEL_ADVANCED,
                      label='Remove small CC after processing',
                      help='If you set to *Yes*, a post-processing step will be launched to remove small connected components'
                           'that are likely noise. This step may remove protein in some unlikely situations, but generally, it'
                           'slighly improves results')

        form.addParam('sizeFraction_CC', FloatParam, default=0.05,
                      allowsNull=False,  expertLevel=LEVEL_ADVANCED,
                      condition=" performCleaningStep",
                      label="Relative size (0. to 1.) CC to remove",
                      help='The relative size of a small connected component to be removed, as the fraction of total voxels>0 ')

        form.addParam('batch_size', IntParam, default=8,
                      allowsNull=False,  expertLevel=LEVEL_ADVANCED,
                      label="Batch size",
                      help='Number of cubes to process simultaneously. Make it lower if CUDA Out Of Memory error happens and increase it if low GPU performance observed')

    # --------------------------- INSERT steps functions --------------------------------------------

    def _insertAllSteps(self):
            # Convert input into xmipp Metadata format

        self._insertFunctionStep('convertInputStep')
        self._insertFunctionStep('deepVolPostProStep')
        self._insertFunctionStep('createOutputStep')

    def _inputVol2Mrc(self, inputFname, outputFname):
        if inputFname.endswith(".mrc") or inputFname.endswith(".map"):
          if not os.path.exists(outputFname):
            createLink(inputFname, outputFname)
        else:
          self.runJob('xmipp_image_convert', " -i %s -o %s:mrc -t vol" % (inputFname, outputFname))

[docs]    def convertInputStep(self):
        """ Read the input volume.
        """

        if self.useHalfMapsInsteadVol.get():
          if self.halfMapsAttached.get():
            half1Fname, half2Fname = self.inputVolume.get().getHalfMaps().split(',')
          else:
            half1Fname, half2Fname =self.inputHalf1.get().getFileName(), self.inputHalf2.get().getFileName()

          self._inputVol2Mrc(half1Fname, self._getTmpPath(INPUT_HALF1_BASENAME))
          self._inputVol2Mrc(half2Fname, self._getTmpPath(INPUT_HALF2_BASENAME))

        else:
          self._inputVol2Mrc(self.inputVolume.get().getFileName(), self._getTmpPath(INPUT_VOL_BASENAME))

        if  self.inputMask.get() is not None:
          self._inputVol2Mrc(self.inputMask.get().getFileName(), self._getTmpPath(INPUT_MASK_BASENAME))


[docs]    def deepVolPostProStep(self):
        outputFname= self._getExtraPath(POSTPROCESS_VOL_BASENAME)
        if os.path.isfile(outputFname):
          return

        if self.useHalfMapsInsteadVol.get():
          half1= self._getTmpPath(INPUT_HALF1_BASENAME)
          half2= self._getTmpPath(INPUT_HALF2_BASENAME)
          params=" -i %s -i2 %s"%(half1, half2)
        else:
          inputFname = self._getTmpPath(INPUT_VOL_BASENAME)
          params=" -i %s "%inputFname

        params+=" -o %s "%outputFname
        params+= " --sampling_rate %f "%(self.inputVolume.get().getSamplingRate() if self.inputVolume.get() is not None
                                                                          else self.inputHalf1.get().getSamplingRate())
        params+= " -b %s " %(self.batch_size)

        if self.useQueueForSteps() or self.useQueue():
          params += ' -g all '
        else:
          params += ' -g %s' % (",".join([str(elem) for elem in self.getGpuList()]))

        if self.normalization==self.NORMALIZATION_MASK:
          params+= " --binaryMask %s "%(self._getTmpPath(INPUT_MASK_BASENAME))
        elif self.normalization==self.NORMALIZATION_STATS:
          params+= " --noise_stats_mean %f --noise_stats_std %f "%(self.noiseMean, self.noiseStd)


        if self.performCleaningStep:
          params+= " --cleaningStrengh %f" %self.sizeFraction_CC.get()
        else:
          params+= " --cleaningStrengh -1 "

        if  self.normalization in [self.NORMALIZATION_AUTO, self.NORMALIZATION_STATS]:
          if self.modelType == self.TIGHT_MODEL:
            params+= " --checkpoint %s "%self.getModel("deepEMhancer", "production_checkpoints/deepEMhancer_tightTarget.hd5")
          elif self.modelType == self.HI_RES:
            params+= " --checkpoint  %s "%self.getModel("deepEMhancer", "production_checkpoints/deepEMhancer_highRes.hd5")
          else:
            params+= " --checkpoint  %s "%self.getModel("deepEMhancer", "production_checkpoints/deepEMhancer_wideTarget.hd5")
        else: #self.NORMALIZATION_MASK
          params+= " --checkpoint  %s "%self.getModel("deepEMhancer", "production_checkpoints/deepEMhancer_masked.hd5")

        os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
        self.runJob("xmipp_deep_volume_postprocessing", params, numberOfMpi=1)

                                                        
[docs]    def createOutputStep(self):

        volume=Volume()
        volume.setFileName(self._getExtraPath(POSTPROCESS_VOL_BASENAME))

        if self.useHalfMapsInsteadVol.get():
            if self.halfMapsAttached.get():
                inVol = self.inputVolume.get()
            else:
                inVol = self.inputHalf1.get()

            volume.setSamplingRate(inVol.getSamplingRate())
            volume.setOrigin(inVol.getOrigin(force=True))

            self._defineOutputs(Volume=volume)
            self._defineTransformRelation(inVol, volume)
            if not self.halfMapsAttached.get():
              self._defineTransformRelation(self.inputHalf2, volume)
        else:
            inVol = self.inputVolume.get()
            volume.setSamplingRate(inVol.getSamplingRate())
            volume.setOrigin(inVol.getOrigin(force=True))

            self._defineOutputs(Volume=volume)
            self._defineTransformRelation(self.inputVolume, volume)


                
    # --------------------------- INFO functions ------------------------------

    def _methods(self):
        messages = []
        messages.append(
                "Information about the method in " + "Sanchez-Garcia et al., 2020 ( https://doi.org/10.1101/2020.06.12.148296 )")
        return messages
    
    def _summary(self):
        summary = []
        if self.useHalfMapsInsteadVol.get():
          summary.append("Input: half maps")
        else:
          summary.append("Input: raw data map")

        if self.normalization == self.NORMALIZATION_AUTO:
          summary.append("Normalization: auto")
        elif self.normalization == self.NORMALIZATION_STATS:
          summary.append("Normalization: manual statistics")
        elif self.normalization == self.NORMALIZATION_MASK:
          summary.append("Normalization: from mask")

        return summary

    def _validate(self):
        """ Check if the installation of this protocol is correct.
        Can't rely on package function since this is a "multi package" package
        Returning an empty list means that the installation is correct
        and there are not errors. If some errors are found, a list with
        the error messages will be returned.
        """
        error=self.validateDLtoolkit(model="deepEMhancer")

        return error
    
    def _citations(self):
        return ['Sanchez-Garcia, 2020, https://doi.org/10.1101/2020.06.12.148296']