# -*- coding: utf-8 -*-
# **************************************************************************
# *
# * Authors: Ruben Sanchez Garcia (rsanchez@cnb.csic.es)
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
import os
from pyworkflow import VERSION_3_0
from pyworkflow.protocol.params import (PointerParam, FloatParam, EnumParam, LEVEL_ADVANCED,
StringParam, GPU_LIST, BooleanParam, IntParam)
from pwem.protocols import ProtAnalysis3D
from pwem.objects import Volume
from xmipp3.base import XmippProtocol
from pyworkflow.utils import createLink
INPUT_VOL_BASENAME="inputVol.mrc"
INPUT_HALF1_BASENAME="inputHalf1.mrc"
INPUT_HALF2_BASENAME="inputHalf2.mrc"
INPUT_MASK_BASENAME="inputMask.mrc"
POSTPROCESS_VOL_BASENAME= "deepPostProcess.mrc"
[docs]class XmippProtDeepVolPostProc(ProtAnalysis3D, XmippProtocol):
"""
Given a map the protocol performs automatic deep post-processing to enhance
visualization. Usage guide at https://github.com/rsanchezgarc/deepEMhancer
AI Generated
## Overview
The deepEMhancer protocol performs automatic deep-learning-based
post-processing of cryo-EM maps.
Its purpose is to improve the visual interpretability of a map by enhancing
structural features, sharpening density, and suppressing noise-like regions.
The protocol uses a trained neural network model from deepEMhancer and
produces a post-processed volume that can be inspected, interpreted, or used
for visualization.
deepEMhancer is especially useful when the user wants to obtain a visually
clearer map from an unsharpened, unmasked input volume or from a pair of half
maps. However, the output should be interpreted carefully. The protocol
enhances the map using a learned model; it does not replace standard
validation, FSC analysis, local resolution estimation, or careful inspection of
the original data.
## Inputs and General Workflow
The protocol can work with either:
- a single input volume;
- two half maps.
The input data are converted or linked as MRC files. If half maps are used,
they may be read from the half-map information attached to an imported volume,
or they may be provided explicitly as two separate volumes.
The protocol then runs the deepEMhancer post-processing program using the
selected normalization strategy, neural-network model, GPU, and batch size. The
resulting post-processed map is registered in Scipion as an output volume.
The output volume preserves the sampling rate and origin information from the
input volume or half maps.
## Input Volume
When **Would you like to use half maps?** is set to **No**, the protocol uses a
single **Input Volume**.
This input should be an unmasked and non-sharpened map. This is important
because deepEMhancer expects input maps that have not already been strongly
post-processed. If a map has already been sharpened, masked, or aggressively
filtered, the neural-network output may be less reliable or may overemphasize
features introduced by previous processing.
The input volume should correspond to the structure that the user wants to
enhance for visualization and interpretation.
## Use of Half Maps
When **Would you like to use half maps?** is set to **Yes**, the protocol uses
two half maps instead of a single full map.
Half maps are independently reconstructed maps from two halves of the particle
data. They contain useful information about signal and noise consistency and
are often the preferred input for post-processing and validation workflows.
The protocol supports two ways of providing half maps:
- using half maps already attached to an imported volume;
- providing half map 1 and half map 2 explicitly as separate volumes.
Using half maps can give the post-processing method additional information
about reproducible signal and noise behavior.
## Half Maps Attached to the Volume
If **Are the half maps included in the volume?** is set to **Yes**, the
protocol obtains the half-map file names from the selected input volume.
This is the usual option when the volume was imported or generated in Scipion
with associated half maps.
If this option is set to **No**, the user must provide **Volume Half 1** and
**Volume Half 2** manually.
The two half maps should correspond to the same reconstruction, have the same
box size, sampling rate, origin, and orientation, and represent independent
halves of the same dataset.
## Input Normalization
The **Input normalization** parameter is one of the most important settings of
the protocol.
Normalization is critical because the neural network expects map intensities
to be on a scale compatible with the data used during training. Poor
normalization can lead to poor enhancement, excessive sharpening, loss of
density, or artificial-looking results.
The protocol provides three normalization modes:
- automatic normalization;
- normalization from noise statistics;
- normalization from a binary mask.
If the result is not satisfactory, trying a different normalization strategy is
often one of the first things to do.
## Automatic Normalization
With **Automatic normalization**, the protocol estimates the required
normalization automatically.
This is the simplest option and is often a good starting point. It avoids the
need to provide a mask or explicit noise statistics.
However, automatic normalization may fail in some cases, especially if the map
has unusual intensity distribution, strong artifacts, large empty regions,
strong masking effects, or non-standard preprocessing.
If the output looks too aggressive, too weak, or biologically implausible, the
user should consider trying one of the other normalization modes.
## Normalization from Statistics
With **Normalization from statistics**, the user provides the mean and standard
deviation of the noise.
The parameters are:
- **noise mean**;
- **noise standard deviation**.
This mode gives the user more control over the input intensity scaling. It can
be useful when the noise statistics are known or can be estimated reliably from
background regions.
Incorrect noise statistics can strongly affect the result. A wrong standard
deviation may cause the model to under-enhance or over-enhance the map.
## Normalization from Binary Mask
With **Normalization from binary mask**, the user provides a binary mask
indicating which voxels correspond to protein and which correspond to
background.
The mask should contain:
- value 1 for protein or molecular density;
- value 0 for non-protein or background.
The mask should be as tight as possible while still including the relevant
protein density. A mask that is too loose may include too much background in
the normalization. A mask that is too tight may exclude real density and affect
the enhancement.
When this normalization mode is selected, the protocol uses a model checkpoint
specifically intended for masked normalization.
## Model Power
The **Model power** parameter selects which deepEMhancer model target is used.
The available options are:
**Tight target** produces a more sharpened result. It may enhance structural
features strongly, but in some cases it may also remove or suppress weak
regions of the protein.
**Wide target** is less aggressive. It usually preserves more regions of the
protein, although the output may appear less sharply enhanced.
**HighRes** is recommended for high-resolution volumes.
The choice depends on the quality of the map and the purpose of the
post-processing. For visualization, the tight model may be attractive, but the
wide model can be safer when weak or flexible regions should be preserved.
## Cleaning Small Connected Components
The option **Remove small CC after processing** enables an additional cleaning
step that removes small connected components after post-processing.
These small components are often noise-like isolated regions. Removing them can
make the output map cleaner.
The cleaning strength is controlled by **Relative size CC to remove**, which
defines the relative size of connected components to remove as a fraction of
the total number of positive voxels.
This option can slightly improve visual results, but it should be used with
care. In unusual cases, small real protein regions could be removed, especially
for fragmented, flexible, or low-occupancy density.
## Batch Size
The **Batch size** parameter controls how many cubes of the volume are processed
simultaneously by the neural network.
A larger batch size may improve GPU utilization and speed. A smaller batch size
uses less GPU memory.
If a CUDA out-of-memory error occurs, reduce the batch size. If GPU memory is
underused and processing is slow, increasing the batch size may improve
performance.
This parameter affects computational performance, not the biological meaning of
the output.
## GPU Execution
deepEMhancer uses GPU execution.
The protocol allows selecting the GPU ID through the hidden GPU parameter. In a
queue environment, the protocol can use the GPU resources assigned by the
queue. Otherwise, it uses the selected GPU list.
The protocol also enables TensorFlow GPU memory growth to reduce the chance
that the process reserves all GPU memory at once.
If the required deep-learning toolkit or trained model is not available, the
protocol validation reports an installation error.
## Output Volume
The main output is **Volume**, the deepEMhancer post-processed map.
The output volume is written as an MRC file and registered in Scipion with the
same sampling rate and origin as the input map or half maps.
This volume is intended primarily for enhanced visualization and
interpretation. It may help reveal secondary-structure elements, connectivity,
and local features more clearly than the raw or unsharpened input map.
The output should be compared with the original map and, when available, with
the half maps, FSC curves, and local-resolution estimates.
## Interpretation and Cautions
deepEMhancer produces an enhanced map using a learned model. This can be very
useful, but it also means that the output should not be interpreted as a purely
experimental density map in the same way as the original reconstruction.
Enhanced features should be checked against the input map, half maps, and
independent validation evidence. This is especially important for weak density,
flexible regions, ligands, peripheral domains, or regions near the noise level.
The protocol is a post-processing and visualization tool. It does not replace
map validation.
## Practical Recommendations
Use unmasked, non-sharpened maps as input when using a single volume.
Use half maps when they are available, because they provide information about
reproducible signal.
Start with automatic normalization. If the result is unsatisfactory, try
normalization from a binary mask or from noise statistics.
Use the tight model when stronger sharpening is desired and the density is
robust. Use the wide model when preserving weak or extended regions is more
important. Use the high-resolution model for high-resolution maps.
Inspect the output together with the original input map. Do not rely only on
the enhanced map for biological conclusions.
Reduce the batch size if GPU memory errors occur.
Use the cleaning option cautiously, especially for maps with small real
features, flexible regions, or fragmented density.
## Final Perspective
deepEMhancer is a deep-learning-based post-processing protocol for improving
the visual quality of cryo-EM maps.
For biological users, its main value is that it can produce clearer and more
interpretable density maps, especially for visualization, figure preparation,
and model-building guidance.
The enhanced map should be treated as an aid to interpretation, not as a
replacement for the original reconstruction or for standard validation
procedures. Used carefully, deepEMhancer can be a powerful tool for revealing
structural features while keeping the user aware of the need for independent
validation.
"""
_label = 'deepEMhancer'
_conda_env = 'xmipp_deepEMhancer'
_lastUpdateVersion = VERSION_3_0
NORMALIZATION_AUTO=0
NORMALIZATION_STATS=1
NORMALIZATION_MASK=2
NORMALIZATION_OPTIONS=["Automatic normalization", "Normalization from statistics", "Normalization from binary mask"]
TIGHT_MODEL=0
WIDE_MODEL=1
HI_RES=2
MODEL_TARGET_OPTIONS=["tight target", "wide target", "highRes"]
def __init__(self, **args):
ProtAnalysis3D.__init__(self, **args)
# --------------------------- DEFINE param functions ----------------------
def _defineParams(self, form):
form.addSection(label='Input')
form.addHidden(GPU_LIST, StringParam, default='0',
label="Choose GPU ID",
help="GPU may have several cores. Set it to zero"
" if you do not know what we are talking about."
" First core index is 0, second 1 and so on. Select "
"the GPU ID in which the protocol will run (select only 1 GPU)")
form.addParam('useHalfMapsInsteadVol', BooleanParam, default=False,
label="Would you like to use half maps?",
help='DeepEMhancer uses either half maps or non-sharpened non-masked input volumes. Please, select the type of input map(s) you will provide')
form.addParam('halfMapsAttached', BooleanParam, default=True,
condition='useHalfMapsInsteadVol',
label="Are the half maps included in the volume?",
help='When you import a map, you can associate half maps to it. Select *yes* if the half maps are associated'
'to the input volume. If half maps are not associated, select *No* and'
'you will be able to provide then as regular maps')
form.addParam('inputHalf1', PointerParam, pointerClass='Volume',
label="Volume Half 1", important=True,
condition='useHalfMapsInsteadVol and not halfMapsAttached',
help='Select half map 1 to apply deep postprocessing. ')
form.addParam('inputHalf2', PointerParam, pointerClass='Volume',
label="Volume Half 2", important=True,
condition='useHalfMapsInsteadVol and not halfMapsAttached',
help='Select half map 2 to apply deep postprocessing. ')
form.addParam('inputVolume', PointerParam, pointerClass='Volume',
label="Input Volume", important=True,
condition='not useHalfMapsInsteadVol or halfMapsAttached',
help='Select a volume to apply deep postprocessing. Unmasked, non-sharpened input required')
form.addParam('normalization', EnumParam,
choices=self.NORMALIZATION_OPTIONS,
default=self.NORMALIZATION_AUTO,
label='Input normalization',
help='Input normalization is critical for the algorithm to work.\nIf you select *%s* input will be'
'automatically normalized (generally works but may fail).\nIf you select *%s* input will be'
'normalized according the statistics of the noise of the volume and thus, you will need to provide'
'the mean and standard deviation of the noise. Additionally, a binary mask (1 protein, 0 not protein) '
'for the protein can be used for normalization if you select *%s* . The mask should be as tight '
'as possible.\nnBad results may be obtained if normalization does not work, so you may want to try '
'different options if not good enough results are observerd'%tuple(self.NORMALIZATION_OPTIONS))
form.addParam('inputMask', PointerParam, pointerClass='VolumeMask',
allowsNull=True,
condition=" normalization==%s"%self.NORMALIZATION_MASK,
label="binary mask",
help='The mask determines which voxels are protein (1) and which are not (0)')
form.addParam('noiseMean', FloatParam,
allowsNull=True,
condition=" normalization==%s"%self.NORMALIZATION_STATS,
label="noise mean",
help='The mean of the noise used to normalize the input')
form.addParam('noiseStd', FloatParam,
allowsNull=True,
condition=" normalization==%s"%self.NORMALIZATION_STATS,
label="noise standard deviation",
help='The standard deviation of the noise used to normalize the input')
form.addParam('modelType', EnumParam,
condition=" normalization in [%s, %s]"%(self.NORMALIZATION_STATS,self.NORMALIZATION_AUTO),
choices=self.MODEL_TARGET_OPTIONS,
default=self.TIGHT_MODEL,
label='Model power',
help='Select the deep learning model to use.\nIf you select *%s* the postprocessing will be more sharpen,'
' but some regions of the protein could be masked out.\nIf you select *%s* input will be less sharpen'
' but most of the regions of the protein will be preserved\nOption *%s*, is recommended for high'
' resolution volumes'%tuple(self.MODEL_TARGET_OPTIONS))
form.addParam('performCleaningStep', BooleanParam,
default=False, expertLevel=LEVEL_ADVANCED,
label='Remove small CC after processing',
help='If you set to *Yes*, a post-processing step will be launched to remove small connected components'
'that are likely noise. This step may remove protein in some unlikely situations, but generally, it'
'slighly improves results')
form.addParam('sizeFraction_CC', FloatParam, default=0.05,
allowsNull=False, expertLevel=LEVEL_ADVANCED,
condition=" performCleaningStep",
label="Relative size (0. to 1.) CC to remove",
help='The relative size of a small connected component to be removed, as the fraction of total voxels>0 ')
form.addParam('batch_size', IntParam, default=8,
allowsNull=False, expertLevel=LEVEL_ADVANCED,
label="Batch size",
help='Number of cubes to process simultaneously. Make it lower if CUDA Out Of Memory error happens and increase it if low GPU performance observed')
# --------------------------- INSERT steps functions --------------------------------------------
def _insertAllSteps(self):
# Convert input into xmipp Metadata format
self._insertFunctionStep('convertInputStep')
self._insertFunctionStep('deepVolPostProStep')
self._insertFunctionStep('createOutputStep')
def _inputVol2Mrc(self, inputFname, outputFname):
if inputFname.endswith(".mrc") or inputFname.endswith(".map"):
if not os.path.exists(outputFname):
createLink(inputFname, outputFname)
else:
self.runJob('xmipp_image_convert', " -i %s -o %s:mrc -t vol" % (inputFname, outputFname))
[docs] def convertInputStep(self):
""" Read the input volume.
"""
if self.useHalfMapsInsteadVol.get():
if self.halfMapsAttached.get():
half1Fname, half2Fname = self.inputVolume.get().getHalfMaps().split(',')
else:
half1Fname, half2Fname =self.inputHalf1.get().getFileName(), self.inputHalf2.get().getFileName()
self._inputVol2Mrc(half1Fname, self._getTmpPath(INPUT_HALF1_BASENAME))
self._inputVol2Mrc(half2Fname, self._getTmpPath(INPUT_HALF2_BASENAME))
else:
self._inputVol2Mrc(self.inputVolume.get().getFileName(), self._getTmpPath(INPUT_VOL_BASENAME))
if self.inputMask.get() is not None:
self._inputVol2Mrc(self.inputMask.get().getFileName(), self._getTmpPath(INPUT_MASK_BASENAME))
[docs] def deepVolPostProStep(self):
outputFname= self._getExtraPath(POSTPROCESS_VOL_BASENAME)
if os.path.isfile(outputFname):
return
if self.useHalfMapsInsteadVol.get():
half1= self._getTmpPath(INPUT_HALF1_BASENAME)
half2= self._getTmpPath(INPUT_HALF2_BASENAME)
params=" -i %s -i2 %s"%(half1, half2)
else:
inputFname = self._getTmpPath(INPUT_VOL_BASENAME)
params=" -i %s "%inputFname
params+=" -o %s "%outputFname
params+= " --sampling_rate %f "%(self.inputVolume.get().getSamplingRate() if self.inputVolume.get() is not None
else self.inputHalf1.get().getSamplingRate())
params+= " -b %s " %(self.batch_size)
if self.useQueueForSteps() or self.useQueue():
params += ' -g all '
else:
params += ' -g %s' % (",".join([str(elem) for elem in self.getGpuList()]))
if self.normalization==self.NORMALIZATION_MASK:
params+= " --binaryMask %s "%(self._getTmpPath(INPUT_MASK_BASENAME))
elif self.normalization==self.NORMALIZATION_STATS:
params+= " --noise_stats_mean %f --noise_stats_std %f "%(self.noiseMean, self.noiseStd)
if self.performCleaningStep:
params+= " --cleaningStrengh %f" %self.sizeFraction_CC.get()
else:
params+= " --cleaningStrengh -1 "
if self.normalization in [self.NORMALIZATION_AUTO, self.NORMALIZATION_STATS]:
if self.modelType == self.TIGHT_MODEL:
params+= " --checkpoint %s "%self.getModel("deepEMhancer_v016", "production_checkpoints/deepEMhancer_tightTarget.hd5")
elif self.modelType == self.HI_RES:
params+= " --checkpoint %s "%self.getModel("deepEMhancer_v016", "production_checkpoints/deepEMhancer_highRes.hd5")
else:
params+= " --checkpoint %s "%self.getModel("deepEMhancer_v016", "production_checkpoints/deepEMhancer_wideTarget.hd5")
else: #self.NORMALIZATION_MASK
params+= " --checkpoint %s "%self.getModel("deepEMhancer_v016", "production_checkpoints/deepEMhancer_masked.hd5")
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
self.runJob("xmipp_deep_volume_postprocessing", params, numberOfMpi=1)
[docs] def createOutputStep(self):
volume=Volume()
volume.setFileName(self._getExtraPath(POSTPROCESS_VOL_BASENAME))
if self.useHalfMapsInsteadVol.get():
if self.halfMapsAttached.get():
inVol = self.inputVolume.get()
else:
inVol = self.inputHalf1.get()
volume.setSamplingRate(inVol.getSamplingRate())
volume.setOrigin(inVol.getOrigin(force=True))
self._defineOutputs(Volume=volume)
self._defineTransformRelation(inVol, volume)
if not self.halfMapsAttached.get():
self._defineTransformRelation(self.inputHalf2, volume)
else:
inVol = self.inputVolume.get()
volume.setSamplingRate(inVol.getSamplingRate())
volume.setOrigin(inVol.getOrigin(force=True))
self._defineOutputs(Volume=volume)
self._defineTransformRelation(self.inputVolume, volume)
# --------------------------- INFO functions ------------------------------
def _methods(self):
messages = []
messages.append(
"Information about the method in " + "Sanchez-Garcia et al., 2020 ( https://doi.org/10.1101/2020.06.12.148296 )")
return messages
def _summary(self):
summary = []
if self.useHalfMapsInsteadVol.get():
summary.append("Input: half maps")
else:
summary.append("Input: raw data map")
if self.normalization == self.NORMALIZATION_AUTO:
summary.append("Normalization: auto")
elif self.normalization == self.NORMALIZATION_STATS:
summary.append("Normalization: manual statistics")
elif self.normalization == self.NORMALIZATION_MASK:
summary.append("Normalization: from mask")
return summary
def _validate(self):
""" Check if the installation of this protocol is correct.
Can't rely on package function since this is a "multi package" package
Returning an empty list means that the installation is correct
and there are not errors. If some errors are found, a list with
the error messages will be returned.
"""
error=self.validateDLtoolkit(model="deepEMhancer_v016")
return error
def _citations(self):
return ['Sanchez-Garcia, 2020, https://doi.org/10.1101/2020.06.12.148296']