Source code for xmipp3.protocols.protocol_ransac

# **************************************************************************
# *
# * Authors:     Javier Vargas and Adrian Quintana (jvargas@cnb.csic.es aquintana@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import os
from math import floor

from pyworkflow.object import Float
from pyworkflow.utils.path import cleanPath, copyFile
from pyworkflow.protocol.params import (PointerParam, FloatParam, BooleanParam,
                                        IntParam, StringParam,
                                        STEPS_PARALLEL, LEVEL_ADVANCED, USE_GPU, GPU_LIST)

from pwem.emlib.image import ImageHandler
from pwem.protocols import ProtInitialVolume
from pwem.objects import SetOfClasses2D

from pwem import emlib
from xmipp3.convert import (writeSetOfClasses2D, readSetOfVolumes,
                            writeSetOfParticles)
from xmipp3.base import isMdEmpty, isXmippCudaPresent
from pyworkflow import BETA, UPDATED, NEW, PROD


[docs]class XmippProtRansac(ProtInitialVolume): """ Computes an initial 3d model from a set of projections/classes using RANSAC algorithm. This method is based on an initial non-lineal dimensionality reduction approach which allows to select representative small sets of class average images capturing the most of the structural information of the particle under study. These reduced sets are then used to generate volumes from random orientation assignments. The best volume is determined from these guesses using a random sample consensus (RANSAC) approach. AI Generated ## Overview The RANSAC protocol generates initial 3D volumes from a set of 2D class averages or projection averages. Initial model generation is a critical step in single-particle cryo-EM. Before a high-quality 3D refinement can be performed, the workflow usually needs a starting volume that has approximately the correct global shape and orientation distribution. The RANSAC protocol addresses this problem by generating many candidate initial volumes from small random subsets of the input averages and then selecting the best candidates according to how well they explain the full set of input images. The method follows a random sample consensus strategy. Each RANSAC iteration uses a small subset of 2D averages to propose a tentative 3D volume. The candidate volume is then projected, and the projections are compared with the input averages. Volumes supported by many well-correlated images are considered better candidates. The best volumes are then refined by projection matching. The output is a set of proposed initial volumes, each annotated with scoring information. ## Inputs and General Workflow The main input is a set of 2D class averages or projection averages. The protocol first converts the input images into Xmipp metadata format. It then low-pass filters and resizes the input averages to a working size suitable for initial-model search. This makes the procedure faster and focuses the search on low- to medium-resolution structural information. The protocol then runs many independent RANSAC iterations. In each iteration, it selects a small subset of input averages, assigns or estimates orientations, reconstructs a tentative volume, projects that volume, and compares the projections with the input averages. After all iterations, the protocol evaluates the candidate volumes using an inlier criterion based on projection correlation. The best volumes are selected and refined through several projection-matching iterations. Finally, the refined volumes are resized back to the original box size and written as output volumes. ## Input Averages The **Input averages** parameter should point to a set of 2D classes or averages. If the input is a SetOfClasses2D, the classes should have representative images. These representative images are the class averages used by the protocol. If the input is a SetOfAverages or similar particle-average set, the images are used directly. The quality of the input averages is very important. RANSAC initial-model generation works best when the averages show clear structural features and represent a broad range of particle views. If the input averages are noisy, dominated by contaminants, affected by strong preferred orientation, or contain many inconsistent particle populations, the candidate volumes may be poor or ambiguous. ## Symmetry Group The **Symmetry group** parameter defines the symmetry assumed during projection generation, reconstruction, and volume evaluation. For asymmetric particles, use **c1**. If the particle has known symmetry, the appropriate Xmipp symmetry group should be provided. Correct symmetry can help generate better initial volumes by enforcing equivalent views and reducing ambiguity. However, using an incorrect symmetry can produce misleading models by averaging non-equivalent features. Users should only impose symmetry when it is biologically justified. ## Angular Sampling Rate The **Angular sampling rate** parameter defines how finely projection directions are explored, in degrees. A smaller angular sampling value means a denser angular search. This can improve orientation assignment and candidate-volume evaluation, but increases computation time. A larger value is faster but may miss important orientation differences and reduce the quality of the initial model. The default value is a practical compromise for many datasets. Advanced users may adjust it depending on particle size, symmetry, and expected angular complexity. ## Number of RANSAC Iterations The **Number of RANSAC iterations** parameter controls how many candidate volumes are generated and tested. Each iteration proposes a volume from a different random subset or sampling of the input averages. Increasing the number of iterations increases the chance of finding a good initial model, especially when the input set contains outliers or multiple particle populations. However, more iterations require more computation. The default is designed to provide broad exploration while remaining practical for typical workflows. If the output volumes are unstable or poor, increasing the number of RANSAC iterations may help. ## Dimensionality Reduction The **Perform dimensionality reduction** option changes how representative input averages are selected during RANSAC. When enabled, the protocol uses Local Tangent Space Alignment, or LTSA, to organize the input averages in a reduced-dimensional space. It then samples representative images from this space using a grid. This can help select a small set of averages that captures the diversity of the dataset, rather than choosing purely random images. Dimensionality reduction is an advanced option. It requires that the number of input classes be large enough relative to the grid size. If there are too few classes, the protocol reports a validation error. ## Number of Grids per Dimension When dimensionality reduction is enabled, the **Number of grids per dimension** parameter controls how the reduced space is sampled. For example, a value of 3 creates a 3 by 3 grid, giving up to 9 regions from which representative classes can be sampled. A larger grid gives more detailed sampling of the reduced space, but requires more input classes. A smaller grid is more conservative. This parameter should be chosen according to the number of available classes and the diversity of the input averages. ## Number of Random Samples When dimensionality reduction is disabled, the **Number of random samples** parameter defines how many input averages are selected in each RANSAC iteration. These randomly selected averages are assigned random orientations, or orientations constrained by an optional initial volume, and are used to reconstruct a candidate volume. If too few samples are used, the candidate volumes may be unstable or insufficiently constrained. If too many are used, each iteration becomes more expensive and less exploratory. The default is intended to generate many quick candidate volumes from small subsets. ## Initial Volume The **Initial volume** parameter allows the user to provide a very rough volume to constrain the angular search. This is optional. The protocol can run without an initial volume. A rough initial volume may be useful when the specimen has a known overall shape, such as a cylinder for a filament-like object. In that case, the initial volume can help assign more plausible tilt angles, while still allowing the rotational angle to be uncertain. The initial volume should be used carefully. If it is too specific or incorrect, it may bias the search toward a wrong structure. It is best used as a broad geometrical constraint rather than as a detailed reference. ## Maximum Frequency of the Initial Volume The **Max frequency of the initial volume** parameter controls the resolution used during the initial-model search, expressed in angstroms. The protocol low-pass filters and resizes the input averages according to this parameter. The goal is to focus the RANSAC search on robust low-resolution features rather than noisy high-resolution details. A lower-resolution search is usually appropriate for initial model generation. The purpose is to obtain the correct global shape, not to recover fine features. ## Inliers Threshold The **Inliers threshold** defines the correlation value used to decide whether an input average supports a candidate volume. After a candidate volume is generated, the protocol projects it and compares those projections with the input averages. Averages with correlation above the threshold are considered inliers. Candidate volumes with more and better inliers receive higher scores. If the threshold is too high, few or no candidate volumes may be considered valid. If this happens, the user should lower the threshold. If the threshold is too low, poor candidate volumes may appear to have many inliers, reducing the selectivity of the method. ## Number of Best Volumes to Refine The **Number of best volumes to refine** parameter controls how many of the best RANSAC candidates are selected for refinement. After all RANSAC iterations are evaluated, the protocol ranks the candidates by their inlier support. The best candidates are copied into refinement branches. Keeping several candidates is useful because initial-model generation can be ambiguous. Different candidates may correspond to alternative orientations, different conformations, or different local optima. The output contains the refined versions of these selected candidates. ## Number of Iterations to Refine the Volumes The **Number of iterations to refine the volumes** parameter defines how many projection-matching refinement cycles are applied to each selected candidate. In each cycle, the current volume is projected, input averages are assigned to projections, and a new volume is reconstructed from the updated assignments. More iterations can improve the consistency of the candidate volume, but also increase computation time. Too many iterations are not always beneficial if the initial candidate is poor or if the input averages are inconsistent. ## Use All Images to Refine The **Use all images to refine** option controls which averages are used during the refinement of selected RANSAC volumes. If disabled, only inlier images are used. This keeps refinement focused on the images that strongly support the candidate volume. If enabled, all input averages are used for refinement. This may be useful when the input set is clean and the user wants each candidate to be refined using all available information. For heterogeneous or noisy input sets, using only inliers may be safer. ## GPU Execution The protocol can use GPU acceleration for reconstruction steps when available. GPU execution is enabled through the hidden GPU parameters. If GPU use is requested but the required Xmipp CUDA programs are not available, the protocol reports a validation error. GPU acceleration can substantially reduce computation time, especially when many RANSAC iterations and refinement steps are requested. ## Output Volumes The main output is **outputVolumes**, a set of refined candidate initial volumes. Each output volume is written in MRC format, resized to the original input box size, and assigned the sampling rate of the input averages. The output volumes are annotated with Xmipp scoring values, including: - total volume score; - thresholded volume score; - mean correlation score; - minimum correlation score. These scores help the user compare candidate volumes, but they should not be used blindly. Visual inspection and downstream refinement behavior are also important. ## Interpreting the Candidate Volumes The RANSAC output volumes should be interpreted as possible initial models. They are not final reconstructions. Their role is to provide plausible starting points for later 3D refinement. A good candidate should have a reasonable global shape, be compatible with the 2D class averages, and produce stable behavior in subsequent refinement. A poor candidate may show distorted shape, missing regions, strong artifacts, or features inconsistent with the class averages. When several output volumes are produced, users should inspect them and choose the most plausible candidates for downstream refinement. ## Practical Recommendations Use clean and representative 2D class averages as input. Remove obvious junk classes before running RANSAC. Use the correct symmetry group only when symmetry is known. Do not impose symmetry merely to make the output look cleaner. Start with the default number of RANSAC iterations and best volumes. Increase the number of iterations if the results are unstable or if the dataset is difficult. If no valid maps are found, lower the inliers threshold. Use dimensionality reduction only when there are enough input classes and when you want representative sampling of class variability. Consider providing a very rough initial volume only when there is a strong reason to constrain the angular search. Avoid using a detailed reference that could bias the initial model. Inspect all output volumes visually before selecting one for refinement. ## Final Perspective RANSAC is an initial-volume generation protocol based on repeated random hypotheses and consensus scoring. For biological users, its value is that it can propose several possible 3D starting models from 2D averages without requiring tilted-pair data. By testing many candidate volumes and retaining those best supported by the input averages, the protocol reduces dependence on any single random initialization. The resulting volumes should be treated as starting hypotheses. Their quality must be assessed by visual inspection, agreement with the 2D averages, scoring information, and performance in subsequent 3D refinement. """ _label = 'ransac' _devStatus = PROD def __init__(self, **args): ProtInitialVolume.__init__(self, **args) self.stepsExecutionMode = STEPS_PARALLEL #--------------------------- DEFINE param functions -------------------------------------------- def _defineParams(self, form): form.addHidden(USE_GPU, BooleanParam, default=True, label="Use GPU for execution", help="This protocol has both CPU and GPU implementation.\ Select the one you want to use.") form.addHidden(GPU_LIST, StringParam, default='0', expertLevel=LEVEL_ADVANCED, label="Choose GPU IDs", help="Add a list of GPU devices that can be used") form.addSection(label='Input') form.addParam('inputSet', PointerParam, label="Input averages", important=True, pointerClass='SetOfClasses2D, SetOfAverages',# pointerCondition='hasRepresentatives', help='Select the input images from the project.' 'It should be a SetOfClasses2D object') form.addParam('symmetryGroup', StringParam, default="c1", label='Symmetry group', help="See http://xmipp.cnb.csic.es/twiki/bin/view/Xmipp/Symmetry" " for a description of the symmetry groups format in Xmipp.\n" "If no symmetry is present, use _c1_.") form.addParam('angularSampling', FloatParam, default=5, expertLevel=LEVEL_ADVANCED, label='Angular sampling rate', help='In degrees.' ' This sampling defines how fine the projection gallery from the volume is explored.') form.addParam('nRansac', IntParam, default="400", expertLevel=LEVEL_ADVANCED, label="Number of RANSAC iterations", help='Number of initial volumes to test by RANSAC') form.addParam('dimRed', BooleanParam, default=False, expertLevel=LEVEL_ADVANCED, label='Perform dimensionality reduction', help='The dimensionality reduction is performed using the Local Tangent Space' 'Alignment. See http://www.stat.missouri.edu/~ys873/research/LTSA11.pdf') form.addParam('numGrids', IntParam, default=3, condition='dimRed', expertLevel=LEVEL_ADVANCED, label='Number of grids per dimension', help='Number of squares to sample the classes') form.addParam('numSamples', IntParam, default=8, condition='not dimRed', expertLevel=LEVEL_ADVANCED, label='Number of random samples', help='Number of squares to sample the classes') form.addParam('corrThresh', FloatParam, default=0.77, expertLevel=LEVEL_ADVANCED, label='Inliers threshold', help='Correlation value threshold to determine if an experimental projection is an inlier or outlier.') form.addParam('numVolumes', IntParam, default=10, expertLevel=LEVEL_ADVANCED, label='Number of best volumes to refine', help='Number of best volumes to refine using projection matching approach and the input classes') form.addParam('numIter', IntParam, default=10, expertLevel=LEVEL_ADVANCED, label='Number of iterations to refine the volumes', help='Number of iterations to refine the best volumes using projection matching approach and the input classes') form.addParam('initialVolume', PointerParam, label="Initial volume", expertLevel=LEVEL_ADVANCED, pointerClass='Volume', allowsNull=True, help='You may provide a very rough initial volume as a way to constraint the angular search.' 'For instance, when reconstructing a fiber, you may provide a cylinder so that side views' 'are assigned to the correct tilt angle, although the rotational angle may be completely wrong') form.addParam('maxFreq', IntParam, default=12, expertLevel=LEVEL_ADVANCED, label='Max frequency of the initial volume', help=' Max frequency of the initial volume in Angstroms') form.addParam('useAll', BooleanParam, default=False, expertLevel=LEVEL_ADVANCED, label='Use all images to refine', help=' When refining a RANSAC volume, use all images to refine it instead of only inliers') form.addParallelSection(threads=8, mpi=1) #--------------------------- INSERT steps functions -------------------------------------------- def _insertAllSteps(self): # Insert some initialization steps initialStepId = self._insertInitialSteps() deps = [] # Store all steps ids, final step createOutput depends on all of them for n in range(self.nRansac.get()): # CTF estimation with Xmipp stepId = self._insertFunctionStep(self.ransacIterationStep, n, prerequisites=[initialStepId], needsGPU=self.usesGpu()) # Make estimation steps indepent between them deps.append(stepId) # Look for threshold, evaluate volumes and get the best self._insertFunctionStep(self.getCorrThreshStep, prerequisites=deps, needsGPU=False) # Make estimation steps indepent between them) self._insertFunctionStep(self.evaluateVolumesStep, needsGPU=False) bestVolumesStepId = self._insertFunctionStep(self.getBestVolumesStep, needsGPU=False) deps = [] # Store all steps ids, final step createOutput depends on all of them # Refine the best volumes for n in range(self.numVolumes.get()): fnBase='proposedVolume%05d' % n fnRoot=self._getPath(fnBase) for it in range(self.numIter.get()): if it==0: self._insertFunctionStep(self.reconstructStep,fnRoot, needsGPU=self.usesGpu(),prerequisites=[bestVolumesStepId]) else: self._insertFunctionStep(self.reconstructStep,fnRoot, needsGPU=self.usesGpu()) self._insertFunctionStep(self.projMatchStep,fnBase, needsGPU=False) stepId = self._insertFunctionStep(self.resizeStep,fnRoot,self.Xdim, needsGPU=False) deps.append(stepId) # Score each of the final volumes self._insertFunctionStep(self.scoreFinalVolumes, prerequisites=deps, needsGPU=False) # Make estimation steps indepent between them self._insertFunctionStep(self.createOutputStep,needsGPU=False) def _insertInitialSteps(self): # Convert the input classes to a metadata ready for xmipp self.imgsFn = self._getExtraPath('input_classes.xmd') self._insertFunctionStep(self.convertInputStep, self.imgsFn, needsGPU=False) inputSet = self.inputSet.get() self.Xdim = inputSet.getDimensions()[0] fnOutputReducedClass = self._getExtraPath("reducedClasses.xmd") fnOutputReducedClassNoExt = os.path.splitext(fnOutputReducedClass)[0] # Low pass filter and resize maxFreq = self.maxFreq.get() ts = inputSet.getSamplingRate() K = 0.25 * (maxFreq / ts) if K < 1: K = 1 self.Xdim2 = self.Xdim / K if self.Xdim2 < 32: self.Xdim2 = 32 K = self.Xdim / self.Xdim2 freq = ts / maxFreq ts = K * ts self._insertRunJobStep("xmipp_transform_filter", "-i %s -o %s --fourier low_pass %f --oroot %s" %(self.imgsFn,fnOutputReducedClass,freq,fnOutputReducedClassNoExt), needsGPU=False) lastId = self._insertRunJobStep("xmipp_image_resize", "-i %s --fourier %d -o %s" %(fnOutputReducedClass,self.Xdim2,fnOutputReducedClassNoExt), needsGPU=False) # Generate projection gallery from the initial volume if self.initialVolume.hasValue(): lastId = self._insertFunctionStep(self.projectInitialVolume, needsGPU=False) return lastId #--------------------------- STEPS functions --------------------------------------------
[docs] def convertInputStep(self, classesFn): inputSet = self.inputSet.get() if isinstance(inputSet, SetOfClasses2D): writeSetOfClasses2D(inputSet, classesFn) else: writeSetOfParticles(inputSet, classesFn)
[docs] def ransacIterationStep(self, n): fnOutputReducedClass = self._getExtraPath("reducedClasses.xmd") fnBase = "ransac%05d"%n fnRoot = self._getTmpPath(fnBase) if self.dimRed: # Get a random sample of images self.runJob("xmipp_transform_dimred","-i %s --randomSample %s.xmd %d -m LTSA "%(fnOutputReducedClass,fnRoot,self.numGrids.get())) else: self.runJob("xmipp_metadata_utilities","-i %s -o %s.xmd --operate random_subset %d --mode overwrite "%(fnOutputReducedClass,fnRoot,self.numSamples.get())) self.runJob("xmipp_metadata_utilities","-i %s.xmd --fill angleRot rand_uniform -180 180 "%(fnRoot)) self.runJob("xmipp_metadata_utilities","-i %s.xmd --fill angleTilt rand_uniform 0 180 "%(fnRoot)) self.runJob("xmipp_metadata_utilities","-i %s.xmd --fill anglePsi rand_uniform 0 360 "%(fnRoot)) # If there is an initial volume, assign angles if self.initialVolume.hasValue(): fnGallery=self._getTmpPath('gallery_InitialVolume.stk') self.runJob("xmipp_angular_projection_matching", "-i %s.xmd -o %s.xmd --ref %s --Ri 0 --Ro %s --max_shift %s --append"\ %(fnRoot,fnRoot,fnGallery,str(self.Xdim/2),str(self.Xdim/20))) # Reconstruct with the small sample self.reconstructStep(fnRoot) fnVol = fnRoot+'.vol' # Generate projections from this reconstruction fnGallery=self._getTmpPath('gallery_'+fnBase+'.stk') self.runJob("xmipp_angular_project_library", "-i %s -o %s --sampling_rate %f --sym %s --method fourier 1 0.25 bspline --compute_neighbors --angular_distance -1 --experimental_images %s --max_tilt_angle 90"\ %(fnVol,fnGallery,self.angularSampling.get(),self.symmetryGroup.get(),fnOutputReducedClass)) # Assign angles to the rest of images fnAngles=self._getTmpPath('angles_'+fnBase+'.xmd') self.runJob("xmipp_angular_projection_matching", "-i %s -o %s --ref %s --Ri 0 --Ro %s --max_shift %s --append"\ %(fnOutputReducedClass,fnAngles,fnGallery,str(self.Xdim/2),str(self.Xdim/20))) # Delete intermediate files cleanPath(fnGallery) cleanPath(self._getTmpPath('gallery_'+fnBase+'_sampling.xmd')) cleanPath(self._getTmpPath('gallery_'+fnBase+'.doc')) cleanPath(fnVol) cleanPath(self._getTmpPath(fnBase+'.xmd'))
[docs] def reconstructStep(self, fnRoot): from pwem.emlib.metadata import getSize if os.path.exists(fnRoot+".xmd"): Nimages=getSize(fnRoot+".xmd") if Nimages>0: if self.useGpu.get(): # protocol will run several reconstructions at once, so execute each reconstruction separately args = "-i %s.xmd -o %s.vol --sym %s --thr %s --fast"\ % (fnRoot, fnRoot, self.symmetryGroup.get(), self.numberOfThreads.get()) #AJ to make it work with and without queue system if self.numberOfMpi.get()>1: N_GPUs = len((self.gpuList.get()).split(',')) args += ' -gpusPerNode %d' % N_GPUs args += ' -threadsPerGPU %d' % max(self.numberOfThreads.get(),4) gpuList = list(map(str, self._stepsExecutor.getGpuList())) gpuListArg = " ".join(gpuList) os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(gpuList) self.info("GPUs used in CUDA_VISIBLE_DEVICES: %s" % gpuListArg) if self.numberOfMpi.get() == 1: args += " --device " + gpuListArg if self.numberOfMpi.get() > 1: self.runJob('xmipp_cuda_reconstruct_fourier', args, numberOfMpi=len(gpuList)+1) else: self.runJob('xmipp_cuda_reconstruct_fourier', args) else: self.runJob("xmipp_reconstruct_fourier_accel","-i %s.xmd -o %s.vol --sym %s " %(fnRoot,fnRoot,self.symmetryGroup.get())) self.runJob("xmipp_transform_mask","-i %s.vol --mask circular -%d "%(fnRoot,self.Xdim2/2)) else: print(fnRoot+".xmd is empty. The corresponding volume is not generated.")
[docs] def resizeStep(self,fnRoot,Xdim): if os.path.exists(fnRoot+".vol"): self.runJob("xmipp_image_resize","-i %s.vol -o %s.vol --dim %d %d" %(fnRoot,fnRoot,Xdim,Xdim)) self.runJob("xmipp_image_convert","-i %s.vol -o %s.mrc -t vol" %(fnRoot,fnRoot)) cleanPath("%s.vol"%fnRoot) self.runJob("xmipp_image_header", "-i %s.mrc --sampling_rate %f" %\ (fnRoot, self.inputSet.get().getSamplingRate()))
[docs] def getCorrThreshStep(self): corrVector = [] fnCorr=self._getExtraPath("correlations.xmd") mdCorr= emlib.MetaData() for n in range(self.nRansac.get()): fnRoot="ransac%05d"%n fnAngles=self._getTmpPath("angles_"+fnRoot+".xmd") md = emlib.MetaData(fnAngles) for objId in md: corr = md.getValue(emlib.MDL_MAXCC, objId) corrVector.append(corr) objIdCorr = mdCorr.addObject() mdCorr.setValue(emlib.MDL_MAXCC,float(corr),objIdCorr) mdCorr.write("correlations@"+fnCorr,emlib.MD_APPEND) mdCorr= emlib.MetaData() sortedCorrVector = sorted(corrVector) indx = int(floor(self.corrThresh.get()*(len(sortedCorrVector)-1))) #With the line below commented the percentil is not used for the threshold and is used the value introduced in the form #CorrThresh = sortedCorrVector[indx]# objId = mdCorr.addObject() mdCorr.setValue(emlib.MDL_WEIGHT,self.corrThresh.get(),objId) mdCorr.write("corrThreshold@"+fnCorr,emlib.MD_APPEND) print("Correlation threshold: "+str(self.corrThresh.get()))
[docs] def evaluateVolumesStep(self): fnCorr=self._getExtraPath("correlations.xmd") fnCorr = 'corrThreshold@'+fnCorr mdCorr= emlib.MetaData(fnCorr) objId = mdCorr.firstObject() CorrThresh = mdCorr.getValue(emlib.MDL_WEIGHT,objId) numValid=0 for n in range(self.nRansac.get()): fnRoot="ransac%05d"%n fnAngles=self._getTmpPath("angles_"+fnRoot+".xmd") md = emlib.MetaData(fnAngles) numInliers=0 for objId in md: corr = md.getValue(emlib.MDL_MAXCC, objId) if (corr >= CorrThresh) : numInliers = numInliers+corr md= emlib.MetaData() objId = md.addObject() md.setValue(emlib.MDL_WEIGHT,float(numInliers),objId) if numInliers>0: numValid+=1 md.write("inliers@"+fnAngles,emlib.MD_APPEND) if numValid==0: raise ValueError("There are no valid map. Consider lowering the threshold to have more inliers. "\ "Current threshold %f"%CorrThresh)
[docs] def getBestVolumesStep(self): volumes = [] inliers = [] for n in range(self.nRansac.get()): fnAngles = self._getTmpPath("angles_ransac%05d"%n+".xmd") md=emlib.MetaData("inliers@"+fnAngles) numInliers=md.getValue(emlib.MDL_WEIGHT,md.firstObject()) volumes.append(fnAngles) inliers.append(numInliers) index = sorted(range(inliers.__len__()), key=lambda k: inliers[k]) fnBestAngles = '' threshold=self.getCCThreshold() i = self.nRansac.get()-1 indx = 0 while i >= 0 and indx < self.numVolumes: fnBestAngles = volumes[index[i]] fnBestAnglesOut = self._getPath("proposedVolume%05d"%indx+".xmd") copyFile(fnBestAngles, fnBestAnglesOut) self._log.info("Best volume %d = %s" % (indx, fnBestAngles)) if not self.useAll: self.runJob("xmipp_metadata_utilities","-i %s -o %s --query select \"maxCC>%f \" --mode append" %(fnBestAnglesOut,fnBestAnglesOut,threshold)) if not isMdEmpty(fnBestAnglesOut): indx += 1 else: indx += 1 i -= 1 # Remove unnecessary files for n in range(self.nRansac.get()): fnAngles = self._getTmpPath("angles_ransac%05d"%n+".xmd") cleanPath(fnAngles)
[docs] def projMatchStep(self,fnBase): fnRoot=self._getPath(fnBase) if os.path.exists(fnRoot+".vol"): fnGallery=self._getTmpPath('gallery_'+fnBase+'.stk') fnOutputReducedClass = self._getExtraPath("reducedClasses.xmd") AngularSampling=max(self.angularSampling.get()/2.0,7.5); self.runJob("xmipp_angular_project_library", "-i %s.vol -o %s --sampling_rate %f --sym %s --method fourier 1 0.25 bspline --compute_neighbors --angular_distance -1 --experimental_images %s"\ %(fnRoot,fnGallery,float(AngularSampling),self.symmetryGroup.get(),fnOutputReducedClass)) self.runJob("xmipp_angular_projection_matching", "-i %s.xmd -o %s.xmd --ref %s --Ri 0 --Ro %s --max_shift %s --append"\ %(fnRoot,fnRoot,fnGallery,str(self.Xdim/2),str(self.Xdim/20))) cleanPath(self._getTmpPath('gallery_'+fnBase+'_sampling.xmd')) cleanPath(self._getTmpPath('gallery_'+fnBase+'.doc')) cleanPath(self._getTmpPath('gallery_'+fnBase+'.stk'))
[docs] def scoreFinalVolumes(self): threshold=self.getCCThreshold() mdOut=emlib.MetaData() for n in range(self.numVolumes.get()): fnRoot=self._getPath('proposedVolume%05d'%n) fnAssignment=fnRoot+".xmd" if os.path.exists(fnAssignment): self.runJob("xmipp_metadata_utilities","-i %s --fill weight constant 1"%fnAssignment) MDassignment=emlib.MetaData(fnAssignment) sum=0 thresholdedSum=0 N=0 minCC=2 for id in MDassignment: cc=MDassignment.getValue(emlib.MDL_MAXCC,id) sum+=cc thresholdedSum+=cc-threshold if cc<minCC: minCC=cc N+=1 if N>0: avg=sum/N else: avg=0.0 id=mdOut.addObject() mdOut.setValue(emlib.MDL_IMAGE,fnRoot+".mrc",id) mdOut.setValue(emlib.MDL_VOLUME_SCORE_SUM,float(sum),id) mdOut.setValue(emlib.MDL_VOLUME_SCORE_SUM_TH,float(thresholdedSum),id) mdOut.setValue(emlib.MDL_VOLUME_SCORE_MEAN,float(avg),id) mdOut.setValue(emlib.MDL_VOLUME_SCORE_MIN,float(minCC),id) mdOut.write(self._getPath("proposedVolumes.xmd"))
[docs] def projectInitialVolume(self): fnOutputInitVolume=self._getTmpPath("initialVolume.vol") img = ImageHandler() img.convert(self.initialVolume.get(), fnOutputInitVolume) self.runJob("xmipp_image_resize","-i %s --dim %d %d"%(fnOutputInitVolume,self.Xdim2,self.Xdim2)) fnGallery=self._getTmpPath('gallery_InitialVolume.stk') fnOutputReducedClass = self._getExtraPath("reducedClasses.xmd") self.runJob("xmipp_angular_project_library", "-i %s -o %s --sampling_rate %f --sym %s --method fourier 1 0.25 bspline --compute_neighbors --angular_distance -1 --experimental_images %s"\ %(fnOutputInitVolume,fnGallery,self.angularSampling.get(),self.symmetryGroup.get(),fnOutputReducedClass))
def _postprocessVolume(self, vol, row): self._counter += 1 vol.setObjComment('ransac volume %02d' % self._counter) vol._xmipp_volScoreSum = Float(row.getValue(emlib.MDL_VOLUME_SCORE_SUM)) vol._xmipp_volScoreSumTh = Float(row.getValue(emlib.MDL_VOLUME_SCORE_SUM_TH)) vol._xmipp_volScoreMean = Float(row.getValue(emlib.MDL_VOLUME_SCORE_MEAN)) vol._xmipp_volScoreMin = Float(row.getValue(emlib.MDL_VOLUME_SCORE_MIN))
[docs] def createOutputStep(self): inputSet = self.inputSet.get() fn = self._getPath('proposedVolumes.xmd') # md = emlib.MetaData(fn) # md.addItemId() # md.write(fn) volumesSet = self._createSetOfVolumes() volumesSet.setSamplingRate(inputSet.getSamplingRate()) self._counter = 0 readSetOfVolumes(fn, volumesSet, postprocessImageRow=self._postprocessVolume) # Set a meaningful comment # for vol in volumesSet: # vol.setObjComment('ransac volume %02d' % vol.getObjId()) # volumesSet.update(vol) self._defineOutputs(outputVolumes=volumesSet) self._defineSourceRelation(self.inputSet, volumesSet) self._storeSummaryInfo(self.numVolumes.get())
#--------------------------- INFO functions -------------------------------------------- def _validate(self): errors = [] inputSet = self.inputSet.get() if isinstance(inputSet, SetOfClasses2D): if not self.inputSet.get().hasRepresentatives(): errors.append("The input classes should have representatives.") if self.dimRed: nGrids = self.numGrids.get() if (nGrids * nGrids) > inputSet.getSize(): errors.append('Dimensionaly reduction could not be applied') errors.append('if the number of classes is less than the number') errors.append('of grids squared. \n') errors.append('Consider either provide more classes or') errors.append('disable dimensionality reduction') if self.useGpu and not isXmippCudaPresent(): errors.append("You have asked to use GPU, but I cannot find Xmipp GPU programs in the path") return errors def _summary(self): summary = [] if hasattr(self, 'outputVolumes'): summary.append("RANSAC iterations: %d" % self.nRansac) summary.append("Number of volumes to refine: %d" % self.numVolumes.get()) summary.append("Number of refinement interations: %d" % self.numIter.get()) if self.summaryVar.hasValue(): summary.append(self.summaryVar.get()) else: summary.append("Output volumes not ready yet.") return summary def _methods(self): strline='' if hasattr(self, 'outputVolumes'): strline+='We obtained %d initial volume(s) from %s set of images and symmetry %s.'%\ (self.numVolumes.get(),self.getObjectTag('inputSet'),self.symmetryGroup.get()) strline+='The number of RANSAC volumes generated are %d.From this set, we selected the best %d volume(s) that was refined by %d projection matching iteration(s)'%\ (self.nRansac.get(),self.numVolumes.get(),self.numIter.get()) strline+= ' [Vargas2014]' if self.dimRed: strline+='We performed dimensionality reduction using a %d x %d grid'%\ (self.numGrids.get(),self.numGrids.get()) return [strline] #--------------------------- UTILS functions --------------------------------------------
[docs] def getCCThreshold(self): fnCorr = self._getExtraPath("correlations.xmd") mdCorr = emlib.MetaData("corrThreshold@"+fnCorr) return mdCorr.getValue(emlib.MDL_WEIGHT, mdCorr.firstObject())
def _storeSummaryInfo(self, numVolumes): """ Store some information when the protocol finishes. """ msg1 = '' msg2 = '' for n in range(numVolumes): fnBase = 'proposedVolume%05d' % n fnRoot = self._getPath(fnBase + ".xmd") if os.path.isfile(fnRoot): md = emlib.MetaData(fnRoot) size = md.size() if (size < 5): msg1 = "Num of inliers for model %d too small and equal to %d \n" % (n, size) msg1 += "Decrease the value of Inlier Threshold parameter and run again \n" fnRoot = self._getTmpPath("ransac00000.xmd") if os.path.isfile(fnRoot): md = emlib.MetaData(fnRoot) size = md.size() if (size < 5): msg2 = "Num of random samples too small and equal to %d.\n" % size msg2 += "If the option Dimensionality reduction is on, increase the number of grids per dimension (In this case we recommend to put Dimensionality reduction off).\n" msg2 += "If the option Dimensionality reduction is off, increase the number of random samples.\n" msg = msg1 + msg2 self.summaryVar.set(msg) def _citations(self): return ['Vargas2014']