# -*- coding: utf-8 -*-
# **************************************************************************
# *
# * Authors: Jose Luis Vilas (jlvilas@cnb.csic.es)
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
import numpy as np
from pyworkflow import VERSION_1_1
from pyworkflow.protocol.params import (PointerParam, BooleanParam, FloatParam,
LEVEL_ADVANCED, EnumParam, DeprecatedParam)
from pwem.protocols import ProtAnalysis3D
from pyworkflow.object import Float
from pwem.emlib.image import ImageHandler
from pyworkflow.utils import getExt
from pwem.objects import Volume
import pwem.emlib.metadata as md
from pwem.convert.headers import Ccp4Header
from pyworkflow import BETA, UPDATED, NEW, PROD
MONORES_METHOD_URL = 'https://github.com/I2PC/scipion/wiki/XmippProtMonoRes'
OUTPUT_RESOLUTION_FILE = 'monoresResolutionMap.mrc'
OUTPUT_RESOLUTION_FILE_CHIMERA = 'monoresResolutionChimera.mrc'
OUTPUT_MASK_FILE = 'refinedMask.mrc'
FN_MEAN_VOL = 'meanvol'
METADATA_MASK_FILE = 'metadataresolutions'
FN_METADATA_HISTOGRAM = 'hist.xmd'
BINARY_MASK = 'binarymask'
FN_GAUSSIAN_MAP = 'gaussianfilter'
[docs]class XmippProtMonoRes(ProtAnalysis3D):
"""
Assigns local resolution values to each voxel within a given 3D map,
providing detailed insight into regional map quality. This aids in
interpreting structural data by highlighting areas of varying resolution.
AI Generated
## Overview
The Local MonoRes protocol estimates the local resolution of a 3D cryo-EM map
using the monogenic-signal approach.
Global resolution values summarize the reconstruction with a single number, but
cryo-EM maps often have regions with different levels of detail. A rigid core
may be well resolved, while flexible domains, peripheral regions, or poorly
ordered parts may be resolved at lower resolution. Local MonoRes addresses this
by assigning a local-resolution value to each voxel inside a mask.
The result is a 3D local-resolution map. Each voxel value represents the
estimated resolution, in angstroms, at that position. Lower numerical values
indicate better local resolution, while higher numerical values indicate poorer
local resolution.
The protocol can estimate local resolution from a single full map or from two
half maps. When half maps are used, the noise estimation can be performed from
the difference between the two halves.
## Inputs and General Workflow
The protocol can work in two main modes:
- using a single input volume;
- using two half volumes.
The user may provide a binary mask. If no mask is provided, the protocol
generates an approximate mask automatically by smoothing and thresholding the
input volume.
The protocol then runs the Xmipp monogenic-signal local-resolution program over
the selected resolution range. The output local-resolution map is registered in
Scipion as a volume. A Chimera-oriented resolution map and a histogram of
resolution values are also generated.
## Use Half Volumes
The **Would you like to use half volumes?** option controls whether the
protocol uses one full map or two half maps.
If this option is disabled, the protocol estimates local resolution from the
selected full map.
If this option is enabled, the protocol uses two half maps. This allows the
noise estimation to be based on the difference between independently
reconstructed halves, which is often preferable for validation-oriented local
resolution analysis.
Use half maps when they are available and correctly associated with the
reconstruction.
## Half Maps Stored with the Input Volume
When half-volume mode is enabled, the **Are the half volumes stored with the
input volume?** option controls how the half maps are provided.
If this option is enabled, the user selects **Input Half Maps**, which is a
volume object containing associated half-map file names. The protocol reads the
two half maps from that volume.
If this option is disabled, the user must explicitly provide **Volume Half 1**
and **Volume Half 2**.
In both cases, the two half maps should have the same box size, sampling rate,
origin, orientation, and processing state.
## Input Volume
When half-volume mode is disabled, the **Input Volume** parameter defines the
map to be analyzed.
This map should be a reconstruction with a correct sampling rate and origin.
The sampling rate is required to express local-resolution values in angstroms.
The map should be reasonably preprocessed for local-resolution analysis.
Extremely noisy maps, maps with strong artifacts, or maps with inappropriate
masking can produce unreliable local-resolution estimates.
## Binary Mask
The **Binary Mask** parameter defines the region in which local resolution is
estimated.
The mask separates specimen from background. If the user provides a mask, the
protocol binarizes it using the selected mask threshold. If no mask is
provided, the protocol creates an approximate mask automatically by applying a
Gaussian filter to the input volume and thresholding the result.
A good mask should include the molecular density while excluding solvent and
irrelevant background. If the mask is too tight, it may remove real density. If
it is too loose, background noise may affect the local-resolution estimate.
## Mask Threshold
The **Mask threshold** parameter is used when the provided mask is not already
binary.
Mask values below the threshold are converted to 0, and values above the
threshold are converted to 1. This creates the binary mask used by MonoRes.
The default threshold is 0.5, which is appropriate for many masks whose values
range between 0 and 1.
If the input mask has a different intensity convention, the threshold may need
to be adjusted.
## Automatic Mask Generation
If no mask is provided, the protocol generates one automatically.
It applies a real-space Gaussian-like low-pass filter to the input volume, then
uses a threshold based on a fraction of the maximum filtered value. The result
is binarized and used as the analysis mask.
This automatic mask is useful for quick analysis, but a carefully prepared user
mask is usually preferable for final reporting.
Users should inspect the generated mask when possible, especially for maps with
weak density, flexible regions, or strong background artifacts.
## Exclude Area
The **Exclude Area** parameter allows the user to provide an additional mask
for a region that should be excluded from local-resolution estimation.
This is an advanced option. It may be useful when a particular part of the map,
such as an artifact, contaminant, or region outside the biological molecule,
should not influence the analysis.
The exclusion mask should be used carefully, because excluding biologically
relevant density may bias the local-resolution interpretation.
## Resolution Range
The **Resolution Range** parameters define the range of resolutions tested by
MonoRes.
The range is given in angstroms:
- **High** defines the high-resolution end of the search;
- **Low** defines the low-resolution limit;
- **Step** defines the spacing between tested resolution values.
The protocol validates that a low-resolution limit is provided.
The step size controls how finely the resolution range is sampled. A smaller
step gives finer resolution estimates but increases computation time. If no
step is provided, the protocol uses a default value of 0.5 Å.
## Significance
The **Significance** parameter controls the statistical hypothesis tests used
by MonoRes.
The default value is 0.95. This value determines how strict the method is when
deciding whether signal at a given resolution is significant.
A higher significance level is more stringent. A lower value is more
permissive, but may be more sensitive to noise.
Most users should start with the default value.
## Noise Estimation from Half Maps
When half maps are used, the **Use noise inside protein?** option controls how
the noise distribution is estimated.
When enabled, the noise distribution is estimated inside the protein region by
using the difference between the two half maps. This is recommended because it
uses the independent half-map information to characterize noise in the region
of interest.
If disabled, noise estimation follows the alternative behavior of the
underlying MonoRes program.
For most half-map workflows, the recommended setting is to keep this option
enabled.
## Gaussian Noise Assumption
The **Consider noise gaussian?** option tells the protocol to assume that the
noise in the map follows a Gaussian distribution.
This assumption is often approximately reasonable, but it may not always hold.
Noise in cryo-EM maps can be affected by reconstruction, masking, filtering,
preferred orientation, and other processing effects.
This is an advanced option. Users should enable it only when they want the
Gaussian-noise assumption used explicitly by the MonoRes calculation.
## Output Resolution Volume
The main output is **resolution_Volume**.
This output is a 3D volume where voxel values represent local resolution in
angstroms. It is assigned the sampling rate and origin of the input map or half
maps.
The volume can be visualized directly or used to color a cryo-EM map according
to local resolution.
Lower values indicate regions estimated to have better local resolution.
Higher values indicate regions estimated to have poorer local resolution.
## Chimera-Oriented Resolution Map
The protocol also produces a Chimera-oriented local-resolution map internally.
This version is used to compute the minimum and maximum resolution values
reported in the protocol summary. It may also be useful for visualization
depending on the workflow.
The summary reports the highest and lowest estimated local resolution values.
Here, “highest resolution” corresponds to the smallest angstrom value.
## Histogram
The protocol creates a histogram of local-resolution values inside the output
mask.
The histogram summarizes the distribution of resolution values across the map.
This is useful because two maps may have the same global resolution but very
different local-resolution distributions.
For example, one map may have a small well-resolved core and large poorly
resolved regions, while another may have more uniform resolution throughout
the structure.
## Interpreting the Local-Resolution Map
The local-resolution map should be interpreted as a spatial map-quality
estimate.
Regions with better local resolution usually correspond to more rigid,
well-ordered, and better-supported parts of the structure. Regions with worse
local resolution may correspond to flexible domains, low occupancy, weaker
alignment signal, preferred-orientation effects, or lower local particle
support.
Local resolution should not be interpreted in isolation. It should be compared
with the original density, half-map agreement, global FSC, map-model fit, and
biological knowledge.
## Practical Recommendations
Use half maps when available, especially for validation-oriented analysis.
Provide a good binary mask for final analyses. Automatic masks are convenient,
but user-defined masks are usually more reliable.
Set the low-resolution limit to cover the expected poorest local resolution in
the map.
Use a step size of 0.5 Å as a practical starting point. Use smaller steps only
when finer sampling is needed.
Keep the default significance value at first.
Inspect the output resolution map together with the original density. Check
whether flexible, peripheral, or poorly ordered regions show worse local
resolution, as expected.
Be cautious near mask boundaries, weak density, and regions affected by
post-processing artifacts.
## Final Perspective
Local MonoRes is a local-resolution estimation protocol based on the monogenic
signal.
For biological users, its main value is that it shows how map quality changes
across the reconstruction rather than reducing the entire map to a single
global resolution number.
The output local-resolution map is useful for interpreting structural
features, identifying flexible or poorly resolved regions, guiding model
building, and communicating regional confidence in a cryo-EM map.
"""
_label = 'local MonoRes'
_lastUpdateVersion = VERSION_1_1
_devStatus = PROD
def __init__(self, **args):
ProtAnalysis3D.__init__(self, **args)
self.min_res_init = Float()
self.max_res_init = Float()
# --------------------------- DEFINE param functions ----------------------
def _defineParams(self, form):
form.addSection(label='Input')
self.halfVolumes = DeprecatedParam('useHalfVolumes', self)
form.addParam('useHalfVolumes', BooleanParam, default=False,
label="Would you like to use half volumes?",
help='The noise estimation for determining the '
'local resolution is performed via half volumes.')
self.halfVolumesFile = DeprecatedParam('hasHalfVolumesFile', self)
form.addParam('hasHalfVolumesFile', BooleanParam, default=True,
condition='useHalfVolumes', allowsNull=True,
label="Are the half volumes stored with the input volume?",
help='Usually, the half volumes are stored as properties of '
'the input volume. If this is not the case, set this to '
'False and specify the two halves you want to use.')
self.inputVolumes = DeprecatedParam('fullMap', self)
form.addParam('fullMap', PointerParam, pointerClass='Volume',
label="Input Volume", important=True,
condition='not useHalfVolumes',
help='Select a volume for determining its '
'local resolution.')
form.addParam('associatedHalves', PointerParam, pointerClass='Volume',
label="Input Half Maps", important=True,
condition='useHalfVolumes and hasHalfVolumesFile',
help='Select a volume for determining its '
'local resolution.')
self.inputVolume = DeprecatedParam('halfMap1', self)
form.addParam('halfMap1', PointerParam, pointerClass='Volume',
label="Volume Half 1", important=True,
condition='useHalfVolumes and not hasHalfVolumesFile',
help='Select the first half of a volume for determining its '
'local resolution.')
self.inputVolume2 = DeprecatedParam('halfMap2', self)
form.addParam('halfMap2', PointerParam, pointerClass='Volume',
label="Volume Half 2", important=True,
condition='useHalfVolumes and not hasHalfVolumesFile',
help='Select the second half of a volume for determining a '
'local resolution.')
self.Mask = DeprecatedParam('mask', self)
form.addParam('mask', PointerParam, pointerClass='VolumeMask',
allowsNull=True,
label="Binary Mask",
help='The mask determines which points are specimen'
' and which are not')
form.addParam('maskExcl', PointerParam, pointerClass='VolumeMask',
expertLevel=LEVEL_ADVANCED,
allowsNull=True,
label="Exclude Area",
help='The mask determines the area of the protein to be'
'excluded in the estimation of the local resolution')
group = form.addGroup('Extra parameters')
line = group.addLine('Resolution Range (Å)',
help="The range of resolutions to be analysed."
"The local resolution will be estimated from low to high in "
"steps given by the step box (advanced parameter)")
group.addParam('significance', FloatParam, default=0.95,
expertLevel=LEVEL_ADVANCED,
label="Significance",
help='Resolution is computed using hypothesis tests, '
'this value determines the significance of that test')
self.maskthreshold = DeprecatedParam('maskThreshold', self)
group.addParam('maskThreshold', FloatParam, default=0.5,
expertLevel=LEVEL_ADVANCED,
label="Mask threshold",
help='If the provided mask is not binary. Then, MonoRes'
'will try to binarize it. Mask values below the threshold'
'will be change to 0 and above the thresthol will be 1')
self.noiseonlyinhalves = DeprecatedParam('noiseOnlyInHalves', self)
form.addParam('noiseOnlyInHalves', BooleanParam, expertLevel=LEVEL_ADVANCED,
default=True,
label="Use noise inside protein?",
condition='useHalfVolumes',
help='(Yes recommended) the noise distribution will be estimated'
' in the protein region (inside the mask) by means of the '
'difference of both half maps.')
form.addParam('gaussianNoise', BooleanParam, expertLevel=LEVEL_ADVANCED,
default=False,
label="Consider noise gaussian?",
help='It assumes the noise in the map as gaussian. '
'Note that this assumption might not be true, despite ,it is '
' in general.')
line.addParam('minRes', FloatParam, default=0, label='High')
line.addParam('maxRes', FloatParam, allowsNull=True, label='Low')
line.addParam('stepSize', FloatParam, allowsNull=True,
expertLevel=LEVEL_ADVANCED, label='Step')
form.addParallelSection(threads=4, mpi=0)
# --------------------------- INSERT steps functions --------------------------------------------
def _createFilenameTemplates(self):
""" Centralize how files are called """
myDict = {
FN_MEAN_VOL: self._getExtraPath('mean_volume.mrc'),
METADATA_MASK_FILE: self._getExtraPath('mask_data.xmd'),
BINARY_MASK: self._getExtraPath('binarized_mask.mrc'),
FN_GAUSSIAN_MAP: self._getExtraPath('gaussianfilted.mrc')
}
self._updateFilenamesDict(myDict)
def _insertAllSteps(self):
# Convert input into xmipp Metadata format
self._createFilenameTemplates()
self._insertFunctionStep('convertInputStep')
self._insertFunctionStep('resolutionMonogenicSignalStep')
self._insertFunctionStep('createOutputStep')
self._insertFunctionStep("createHistogram")
[docs] def ifNomask(self, fnVol):
if self.useHalfVolumes:
if not self.hasHalfVolumesFile:
xdim, _ydim, _zdim = self.halfMap1.get().getDim()
params = ' -i %s' % fnVol
else:
xdim, _ydim, _zdim = self.associatedHalves.get().getDim()
params = ' -i %s' % fnVol
else:
xdim, _ydim, _zdim = self.fullMap.get().getDim()
params = ' -i %s' % fnVol
params += ' -o %s' % self._getFileName(FN_GAUSSIAN_MAP)
setsize = 0.02 * xdim
params += ' --fourier real_gaussian %f' % setsize
self.runJob('xmipp_transform_filter', params)
img = ImageHandler().read(self._getFileName(FN_GAUSSIAN_MAP))
imgData = img.getData()
max_val = np.amax(imgData) * 0.05
params = ' -i %s' % self._getFileName(FN_GAUSSIAN_MAP)
params += ' --select below %f' % max_val
params += ' --substitute binarize'
params += ' -o %s' % self._getFileName(BINARY_MASK)
self.runJob('xmipp_transform_threshold', params)
self.maskFn = self._getFileName(BINARY_MASK)
[docs] def resolutionMonogenicSignalStep(self):
if self.stepSize.hasValue():
freq_step = self.stepSize.get()
else:
freq_step = 0.5
samplingRate = -1
if self.useHalfVolumes:
params = ' --vol %s' % self.vol1Fn
params += ' --vol2 %s' % self.vol2Fn
if self.hasHalfVolumesFile:
samplingRate = self.associatedHalves.get().getSamplingRate()
else:
samplingRate = self.halfMap1.get().getSamplingRate()
if self.noiseOnlyInHalves.get() is True:
params += ' --noiseonlyinhalves'
else:
params = ' --vol %s' % self.vol0Fn
samplingRate = self.fullMap.get().getSamplingRate()
params += ' --sampling_rate %f' % samplingRate
params += ' --mask %s' % self._getFileName(BINARY_MASK)
if self.maskExcl.hasValue():
params += ' --maskExcl %s' % self.maskExcl.get().getFileName()
params += ' --minRes %f' % self.minRes.get()
params += ' --maxRes %f' % self.maxRes.get()
params += ' --step %f' % freq_step
params += ' -o %s' % self._getExtraPath()
if self.gaussianNoise.get() is True:
params += ' --gaussian'
params += ' --significance %f' % self.significance.get()
params += ' --threads %i' % self.numberOfThreads.get()
self.runJob('xmipp_resolution_monogenic_signal', params)
ccp4header = Ccp4Header(self._getExtraPath(OUTPUT_RESOLUTION_FILE), readHeader=True)
ccp4header.setSampling(samplingRate)
ccp4header.writeHeader()
ccp4header = Ccp4Header(self._getExtraPath(OUTPUT_RESOLUTION_FILE_CHIMERA), readHeader=True)
ccp4header.setSampling(samplingRate)
ccp4header.writeHeader()
[docs] def createHistogram(self):
M = float(self.max_res_init)
m = float(self.min_res_init)
range_res = round((M - m) * 4.0)
params = ' -i %s' % self._getExtraPath(OUTPUT_RESOLUTION_FILE)
params += ' --mask binary_file %s' % self._getExtraPath(OUTPUT_MASK_FILE)
params += ' --steps %f' % (range_res)
params += ' --range %f %f' % (self.min_res_init, self.max_res_init)
params += ' -o %s' % self._getExtraPath(FN_METADATA_HISTOGRAM)
self.runJob('xmipp_image_histogram', params)
[docs] def getMinMax(self, imageFile):
img = ImageHandler().read(imageFile)
imgData = img.getData()
min_res = round(np.amin(imgData) * 100) / 100
max_res = round(np.amax(imgData) * 100) / 100
return min_res, max_res
[docs] def createOutputStep(self):
volume = Volume()
volume.setFileName(self._getExtraPath(OUTPUT_RESOLUTION_FILE))
if self.useHalfVolumes:
if not self.hasHalfVolumesFile:
volume.setSamplingRate(self.halfMap1.get().getSamplingRate())
volume.setOrigin(self.halfMap1.get().getOrigin(True))
self._defineOutputs(resolution_Volume=volume)
self._defineSourceRelation(self.halfMap1, volume)
else:
volume.setSamplingRate(self.associatedHalves.get().getSamplingRate())
volume.setOrigin(self.associatedHalves.get().getOrigin(True))
self._defineOutputs(resolution_Volume=volume)
self._defineSourceRelation(self.associatedHalves, volume)
else:
volume.setSamplingRate(self.fullMap.get().getSamplingRate())
volume.setOrigin(self.fullMap.get().getOrigin(True))
self._defineOutputs(resolution_Volume=volume)
self._defineSourceRelation(self.fullMap, volume)
# Setting the min max for the summary
imageFile = self._getExtraPath(OUTPUT_RESOLUTION_FILE_CHIMERA)
min_, max_ = self.getMinMax(imageFile)
self.min_res_init.set(round(min_ * 100) / 100)
self.max_res_init.set(round(max_ * 100) / 100)
self._store(self.min_res_init)
self._store(self.max_res_init)
# --------------------------- INFO functions ------------------------------
def _methods(self):
messages = []
if hasattr(self, 'resolution_Volume'):
messages.append(
'Information about the method/article in ' + MONORES_METHOD_URL)
return messages
def _summary(self):
summary = []
summary.append("Highest resolution %.2f Å, "
"Lowest resolution %.2f Å. \n" % (self.min_res_init,
self.max_res_init))
return summary
def _validate(self):
errors = []
if not self.maxRes.get():
errors.append("You must provide a low resolution limit")
if self.useHalfVolumes.get():
if self.hasHalfVolumesFile.get():
if not self.associatedHalves.get():
errors.append("You need to select the Associated halves")
else:
if not self.halfMap1.get() or not self.halfMap2.get():
errors.append("You need to select the volumes half")
else:
if not self.fullMap.get():
errors.append("You need to select an input volume")
return errors
def _citations(self):
return ['Vilas2018']