# **************************************************************************
# *
# * Authors: Roberto Marabini (roberto@cnb.csic.es)
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
from pwem.objects import Volume
from pwem.protocols import ProtReconstruct3D
from pwem import emlib
from pwem.emlib.metadata import getFirstRow
import pyworkflow.protocol.params as params
import pyworkflow.protocol.constants as cons
from pyworkflow.utils.path import createLink
from xmipp3.convert import writeSetOfParticles
from xmipp3.base import isXmippCudaPresent
import os
from pyworkflow import UPDATED, PROD
[docs]class XmippProtReconstructFourier(ProtReconstruct3D):
"""
Reconstruct a volume using Xmipp_reconstruct_fourier from a given set of
particles.
The alignment parameters will be converted to a Xmipp xmd file
and used as direction projections to reconstruct.
AI Generated
## Overview
The Reconstruct Fourier protocol reconstructs a 3D volume from a set of
particles with known projection-alignment parameters.
In single-particle cryo-EM, each particle image is interpreted as a 2D
projection of the 3D structure from a particular direction. Once particles
have projection angles and shifts, those images can be inserted into Fourier
space and combined to produce a 3D map. This protocol performs that
reconstruction using Xmipp Fourier reconstruction programs.
The protocol can optionally correct the CTF before reconstruction, impose
symmetry, limit the maximum resolution used in Fourier space, and generate
independent half-map reconstructions for resolution assessment.
The main output is a reconstructed volume. If half maps are requested, the
output volume also keeps the two half-map file names associated with it.
## Inputs and General Workflow
The main input is a set of particles with projection-alignment information.
The protocol first converts the input particle set into Xmipp metadata format.
If CTF information is available and CTF correction is requested, the particles
are corrected by Wiener filtering before reconstruction. Otherwise, the
original particle metadata are used directly.
If the **Use halves** option is enabled, the corrected particle metadata are
split into two random subsets. Each subset is reconstructed independently, and
the two resulting half maps are averaged to produce the final output volume.
If halves are not requested, all particles are reconstructed together into a
single output volume.
Finally, the reconstructed volume is registered in Scipion with the sampling
rate of the input particles.
## Input Particles
The **Input particles** parameter should point to a SetOfParticles with 3D
projection alignment.
This is essential. The protocol does not determine particle orientations from
scratch. It assumes that each particle already has projection angles and shifts
from a previous refinement, angular assignment, classification, or imported
alignment.
If the angular assignments are poor, the reconstruction will also be poor. The
output volume should therefore be interpreted in relation to the quality of the
input particle alignment.
The input particles should also have a consistent box size, sampling rate, and
contrast convention.
## Symmetry Group
The **Symmetry group** parameter defines the symmetry imposed during
reconstruction.
For asymmetric particles, use **c1**. If the particle has known point-group
symmetry, the corresponding Xmipp symmetry group can be specified.
Correct symmetry can improve the reconstruction by averaging equivalent views
and increasing signal. However, incorrect symmetry can introduce artificial
density, blur asymmetric features, or obscure real biological differences.
Users should impose symmetry only when it is justified by prior structural or
biological knowledge.
## Maximum Resolution
The **Maximum resolution** parameter limits the highest-resolution information
used during Fourier reconstruction.
The value is given in angstroms. If it is set to -1, the protocol uses the
Nyquist limit.
Limiting the maximum resolution can be useful when the input particles or
angular assignments are not reliable at high frequency. It can reduce the
influence of high-frequency noise during reconstruction.
Internally, the angstrom value is converted to a digital frequency using the
particle sampling rate. The reconstruction program then uses this value as the
maximum Fourier-space resolution.
## CTF Correction
The **Correct CTF** option applies Wiener-filter CTF correction to the
particles before reconstruction, when CTF metadata are available.
CTF correction compensates for the contrast transfer effects introduced by the
microscope. This can make the reconstructed map more physically meaningful,
especially when particles come from different defocus values.
If the particles already have CTF information, the protocol writes a corrected
stack and metadata file before reconstruction. If the particles are marked as
phase-flipped, this information is passed to the CTF-correction step.
If CTF information is not present, the protocol simply reconstructs from the
input particle metadata without CTF correction.
## Correct CTF Envelope
The **Correct CTF envelope** option is available when CTF correction is
enabled.
The CTF envelope models additional attenuation of signal at higher spatial
frequencies. Correcting it may be useful when the envelope has been estimated
reliably.
This option should be used with caution. If the envelope estimate is poor,
correcting it may amplify noise or introduce artifacts.
Most users should enable it only when they understand how the envelope was
estimated and why it is appropriate for the dataset.
## Use Halves
The **Use halves** option creates two independent reconstructions from two
random subsets of the input particles.
This is useful for resolution estimation and validation. Half maps are commonly
used to compute FSC curves and to assess reproducibility of structural signal.
When this option is enabled, the protocol:
1. splits the particle metadata into two subsets;
2. reconstructs half map 1;
3. reconstructs half map 2;
4. averages the two half maps to create the final output volume;
5. stores the half-map file names in the output volume metadata.
The final averaged map can be used for visualization or downstream processing,
while the half maps can be used for validation.
## Padding Factor
The **Padding factor** parameters control padding of the input projections and
the reconstructed volume during Fourier reconstruction.
There are two values:
- **Projection padding**;
- **Volume padding**.
Padding can improve interpolation accuracy in Fourier space, but it increases
memory use and computation time. Larger padding values may therefore be more
accurate but slower and more demanding.
The default values are a practical compromise for many datasets. Advanced users
may adjust them when reconstruction accuracy or performance needs to be tuned.
## Approximative Version
The **Approximative version** option enables a faster approximation of the
Fourier reconstruction algorithm.
When enabled, reconstruction is faster but may be slightly less precise than
the full version.
This option is useful for routine reconstruction or exploratory workflows where
speed is important. If maximum numerical precision is required, advanced users
may disable the approximation.
The approximative version is not compatible with the legacy CPU implementation.
## Legacy Version
The **Legacy version** option uses the original CPU implementation of the
Fourier reconstruction algorithm.
This option is provided mainly for backward compatibility. In routine use, it
should usually not be necessary.
The legacy version cannot be used with GPU execution and is not compatible with
the approximative version.
## Extra Parameters
The **Extra parameters** field allows advanced users to pass additional options
to the underlying Xmipp Fourier reconstruction program.
This can be useful for specialized workflows that require options not exposed
directly in the graphical form.
Most users should leave this field empty. Incorrect extra parameters may cause
the reconstruction to fail or produce unexpected results.
## GPU and CPU Execution
The protocol supports GPU and CPU execution.
GPU execution is enabled by default and uses the Xmipp CUDA Fourier
reconstruction program when available. GPU execution is usually faster and is
recommended for large datasets.
If GPU execution is requested but the required Xmipp CUDA programs are not
available, the protocol reports a validation error.
The CPU version has limitations. In particular, the non-GPU version can use
only a single thread; MPI should be used instead for CPU parallelism. The
legacy version is CPU-only.
## Output Volume
The main output is **outputVolume**.
This volume is reconstructed from the input particles and registered with the
same sampling rate as the input particle set.
If **Use halves** is disabled, the output volume is reconstructed directly from
all particles.
If **Use halves** is enabled, the output volume is the average of the two
independent half-map reconstructions, and the half-map file names are stored
with the output volume.
The output volume can be used for visualization, post-processing, FSC
calculation, refinement assessment, or as input to other 3D analysis
protocols.
## Interpreting the Reconstruction
The reconstructed volume reflects the input particles and their assigned
orientations.
Good input alignments, appropriate CTF handling, and correct symmetry usually
lead to a more interpretable map. Poor alignments, incorrect CTF metadata,
wrong symmetry, or inconsistent particle populations can produce blurred or
distorted density.
If half maps are generated, they should be used for validation. Agreement
between half maps provides evidence that the reconstructed features are
supported reproducibly by independent subsets of particles.
## Practical Recommendations
Use this protocol after particles have reliable 3D projection alignment.
Enable CTF correction when reliable CTF metadata are available and the
downstream reconstruction strategy requires corrected particles.
Use half maps when you plan to estimate resolution or validate the map with
FSC.
Use symmetry only when it is biologically justified.
Keep the default padding values unless memory use, speed, or reconstruction
accuracy requires adjustment.
Use GPU execution when available. It is usually the practical choice for large
particle sets.
Inspect the output volume and, when generated, compare the two half maps. Poor
agreement between half maps may indicate overfitting, bad alignments,
heterogeneity, or insufficient particle quality.
## Final Perspective
Reconstruct Fourier is a core 3D reconstruction protocol. It converts a set of
projection-aligned particles into a 3D map using Fourier-space reconstruction.
For biological users, the key point is that the protocol does not solve the
orientation problem; it uses orientations that already exist in the input
particles. Therefore, the quality of the reconstructed volume depends strongly
on the quality of the previous alignment or refinement step.
Used with reliable particle alignments, appropriate CTF correction, and
well-chosen symmetry, the protocol produces maps and half maps that can support
subsequent validation, post-processing, interpretation, and refinement.
"""
_label = 'reconstruct fourier'
_devStatus = PROD
#--------------------------- DEFINE param functions --------------------------------------------
def _defineParams(self, form):
form.addHidden(params.USE_GPU, params.BooleanParam, default=True,
label="Use GPU for execution",
help="This protocol has both CPU and GPU implementation.\
Select the one you want to use.")
form.addHidden(params.GPU_LIST, params.StringParam, default='0',
expertLevel=cons.LEVEL_ADVANCED,
label="Choose GPU IDs",
help="Add a list of GPU devices that can be used")
form.addSection(label='Input')
form.addParam('inputParticles', params.PointerParam, pointerClass='SetOfParticles', pointerCondition='hasAlignmentProj',
label="Input particles",
help='Select the input images from the project.')
form.addParam('symmetryGroup', params.StringParam, default='c1',
label="Symmetry group",
help='See [[Xmipp Symmetry][http://www2.mrc-lmb.cam.ac.uk/Xmipp/index.php/Conventions_%26_File_formats#Symmetry]] page '
'for a description of the symmetry format accepted by Xmipp')
form.addParam('maxRes', params.FloatParam, default=-1,
label="Maximum resolution (A)",
help='Maximum resolution (in Angstrom) to consider \n'
'in Fourier space (default Nyquist).\n'
'Param *--maxres* in Xmipp.')
form.addParam('useHalves', params.BooleanParam, label='Use halves', default=False,
help='Create separate reconstructions from two random subsets. Useful for resolution measurements')
form.addParam('correctCTF', params.BooleanParam, label='Correct CTF', default=True,
help='Correct the CTF with a Wiener filter before reconstructing, if the CTF is available')
form.addParam('correctEnvelope', params.BooleanParam, label='Correct CTF envelope', default=False, condition="correctCTF",
help='Correct the CTF envelope')
line = form.addLine('Padding factor',
expertLevel=cons.LEVEL_ADVANCED,
help='Padding of the input images. Higher number will result in more precise interpolation in Fourier '
'domain, but slower processing time and higher memory requirements.')
line.addParam('pad_proj', params.IntParam, default=2, label='Projection')
line.addParam('pad_vol', params.IntParam, default=2, label='Volume')
form.addParam('legacy', params.BooleanParam, default=False,
label="Legacy version",
expertLevel=cons.LEVEL_ADVANCED,
help="Use original CPU version of the algorithm. This should not be necessary, but it's present"
" to ensure backward compatibility")
form.addParam('approx', params.BooleanParam, default=True,
label="Approximative version",
expertLevel=cons.LEVEL_ADVANCED,
help="If on, an approximation of the original algorithm will be used. This will result in"
" faster processing times, but (slightly) less precise result")
form.addParam('extraParams', params.StringParam, default='', expertLevel=cons.LEVEL_ADVANCED,
label='Extra parameters: ',
help='Extra parameters to *xmipp_(cuda_)reconstruct_fourier* program:\n'
"""
--iter () : Subtract projections of this map from the images used for reconstruction
""")
form.addParallelSection(threads=4, mpi=1)
#--------------------------- INSERT steps functions --------------------------------------------
def _createFilenameTemplates(self):
""" Centralize how files are called for iterations and references. """
myDict = {
'input_xmd': self._getTmpPath('input_particles.xmd'),
'input_tmp_root': self._getTmpPath('input_particles_corrected'),
'half1_xmd': self._getTmpPath('input_particles_corrected000001.xmd'),
'half2_xmd': self._getTmpPath('input_particles_corrected000002.xmd'),
'output_volume': self._getPath('output_volume.mrc'),
'half1_volume': self._getPath('half1.mrc'),
'half2_volume': self._getPath('half2.mrc')
}
self._updateFilenamesDict(myDict)
def _insertAllSteps(self):
self._createFilenameTemplates()
self._insertFunctionStep('convertInputStep')
if self.useHalves.get():
self._insertFunctionStep('splitInputStep')
self._insertReconstructStep('half1')
self._insertReconstructStep('half2')
self._insertFunctionStep('averageStep')
else:
self._insertReconstructStep()
self._insertFunctionStep('createOutputStep')
def _insertReconstructStep(self, half=None):
if half is None:
params = ' -i %s.xmd' % self._getFileName('input_tmp_root')
params += ' -o %s' % self._getFileName('output_volume')
else:
params = ' -i %s' % self._getFileName(half + '_xmd')
params += ' -o %s' % self._getFileName(half + '_volume')
params += ' --sym %s' % self.symmetryGroup.get()
maxRes = self.maxRes.get()
if maxRes == -1:
digRes = 0.5
else:
digRes = self.inputParticles.get().getSamplingRate() / self.maxRes.get()
params += ' --max_resolution %0.3f' %digRes
params += ' --padding %0.3f %0.3f' % (self.pad_proj.get(), self.pad_vol.get())
params += ' --sampling %f' % self.inputParticles.get().getSamplingRate()
params += ' %s' % self.extraParams.get()
params += ' --fast' if self.approx.get() else ''
if self.useGpu.get():
#AJ to make it work with and without queue system
params += ' --thr %d' % self.numberOfThreads.get()
if self.numberOfMpi.get()>1:
N_GPUs = len((self.gpuList.get()).split(','))
params += ' -gpusPerNode %d' % N_GPUs
params += ' -threadsPerGPU %d' % max(self.numberOfThreads.get(),4)
count=0
GpuListCuda=''
if self.useQueueForSteps() or self.useQueue():
GpuList = os.environ["CUDA_VISIBLE_DEVICES"]
GpuList = GpuList.split(",")
for elem in GpuList:
GpuListCuda = GpuListCuda+str(count)+' '
count+=1
else:
GpuListAux = ''
for elem in self.getGpuList():
GpuListCuda = GpuListCuda+str(count)+' '
GpuListAux = GpuListAux+str(elem)+','
count+=1
os.environ["CUDA_VISIBLE_DEVICES"] = GpuListAux
if self.numberOfMpi.get()==1:
params += ' --device %s'%(GpuListCuda) if self.useGpu.get() else ''
self._insertFunctionStep('reconstructStep', params)
#--------------------------- STEPS functions --------------------------------------------
[docs] def reconstructStep(self, params):
""" Create the input file in STAR format as expected by Xmipp.
If the input particles comes from Xmipp, just link the file.
"""
if self.useGpu.get():
if self.numberOfMpi.get()>1:
self.runJob('xmipp_cuda_reconstruct_fourier', params, numberOfMpi=len((self.gpuList.get()).split(','))+1)
else:
self.runJob('xmipp_cuda_reconstruct_fourier', params)
else:
if self.legacy.get():
self.runJob('xmipp_reconstruct_fourier', params)
else:
self.runJob('xmipp_reconstruct_fourier_accel', params)
[docs] def averageStep(self):
# Read
half1 = emlib.Image(self._getFileName('half1_volume'))
half2 = emlib.Image(self._getFileName('half2_volume'))
# Average
half1.inplaceAdd(half2)
half1.inplaceMultiply(0.5)
# Write
half1.write(self._getFileName('output_volume'))
[docs] def createOutputStep(self):
imgSet = self.inputParticles.get()
self.runJob("xmipp_image_header", "-i %s --sampling_rate %f"%\
(self._getFileName('output_volume'), imgSet.getSamplingRate()),
numberOfMpi=1)
volume = Volume()
volume.setFileName(self._getFileName('output_volume'))
volume.setSamplingRate(imgSet.getSamplingRate())
if self.useHalves.get():
volume.setHalfMaps([
self._getFileName('half1_volume'),
self._getFileName('half2_volume')
])
self._defineOutputs(outputVolume=volume)
self._defineSourceRelation(self.inputParticles, volume)
#--------------------------- INFO functions --------------------------------------------
def _validate(self):
""" Should be overriden in subclasses to
return summary message for NORMAL EXECUTION.
"""
errors = ProtReconstruct3D._validate(self)
if self.useGpu.get() and self.legacy.get():
errors.append("Legacy version is not implemented for GPU")
if self.approx.get() and self.legacy.get():
errors.append("Approximative version is not implemented for Legacy code")
if not self.useGpu.get() and self.numberOfThreads.get() > 1:
errors.append("CPU version can use only a single thread. Use MPI instead")
if self.useGpu and not isXmippCudaPresent():
errors.append("You have asked to use GPU, but I cannot find Xmipp GPU programs in the path")
return errors
def _summary(self):
""" Should be overriden in subclasses to
return summary message for NORMAL EXECUTION.
"""
return []
#--------------------------- UTILS functions --------------------------------------------