Source code for xmipp3.protocols.protocol_validate_overfitting

# **************************************************************************
# *
# * Authors:     Mohsen Kazemi  (mkazemi@cnb.csic.es)
# *              C.O.S. Sorzano (coss@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

from math import sqrt
import glob
import os

from pyworkflow import VERSION_1_1
from pyworkflow.utils import getFloatListFromValues
from pyworkflow.utils.path import cleanPattern
from pwem.protocols import ProtReconstruct3D
from pyworkflow.protocol.params import (PointerParam, FloatParam,
                                        NumericListParam, IntParam,
                                        StringParam, BooleanParam,
                                        LEVEL_ADVANCED, GPU_LIST, USE_GPU)
from pwem import emlib
from xmipp3.convert import writeSetOfParticles
from xmipp3.base import isXmippCudaPresent


[docs]class XmippProtValidateOverfitting(ProtReconstruct3D): """ Check how the resolution changes with the number of projections used for 3D reconstruction. NOTE: Using the output plot, with the reconstruction of aligned gaussian noise, you can assess the validity of the reconstruction from your micrograph images. Practically, if the resolution of reconstruction based on your images is not considerably different from aligned gaussian noise one (for less number of particles),your images may not produce a valid reconstruction. This method has been proposed by: B. Heymann "Validation of 3D EM Reconstructions", 2015. (see References) AI Generated ## Overview The Validate Overfitting protocol evaluates how the estimated reconstruction resolution changes as a function of the number of particles used. In cryo-EM validation, one concern is that apparent high-resolution signal may come from overfitting, especially when few particles are used. This protocol addresses that concern by repeatedly reconstructing independent random subsets of particles of different sizes. For each subset size, it computes the resolution between two independently reconstructed half volumes. Optionally, the protocol also performs the same experiment using aligned Gaussian noise. This gives a noise-bound reference curve. If reconstructions from real particles behave similarly to reconstructions from aligned noise, especially at low particle numbers, the apparent resolution should be interpreted cautiously. The protocol writes text files containing the mean and standard deviation of the estimated resolution for each tested particle subset size. ## Inputs and General Workflow The main input is a set of particles with projection-alignment information. The protocol writes the input particles to Xmipp metadata format. If resizing is enabled, it creates resized versions of the particles and, when noise validation is requested, of the reference volume. For each requested subset size, and for each randomization iteration, the protocol creates two independent random particle subsets of that size. It reconstructs one volume from each subset, computes FSC between the two volumes, and records the frequency at which the FSC drops below 0.5. After all repetitions are complete, the protocol summarizes the results across iterations for each subset size. If noise validation is enabled, the same summary is produced for the Gaussian-noise reconstructions. ## Input Particles The **Input particles** parameter defines the particle set used for the validation experiment. The particles must have projection alignment, because the protocol reconstructs 3D volumes using the orientations already assigned to the particles. The protocol does not solve particle orientations from scratch. The reliability of the validation depends on the quality of these alignments. Poor angular assignments, strong heterogeneity, bad particles, or incorrect CTF handling in previous steps can affect the resulting resolution curves. ## Resize Input Particles and Volume The **Resize input particles and volume?** option allows the protocol to reduce the image and volume size before running the validation. This is useful when the goal is not to obtain the best possible reconstruction but to perform a faster validation experiment. Resizing reduces computational cost, especially because the protocol performs many reconstructions. If resizing is enabled, the input particles are resized using Fourier resizing. If the noise-bound calculation is also enabled, the reference volume is resized as well. The protocol also rescales particle shifts so that the alignment metadata remain consistent with the new image size. ## New Size The **New size** parameter is used when resizing is enabled. It defines the new particle and volume size in pixels. The new size must be smaller than the current particle size. The protocol validates that Fourier resizing is not used to increase dimensions and that the new size is not identical to the current size. A smaller size makes the protocol faster but limits the resolution range that can be meaningfully analyzed. ## Calculate the Noise Bound for Resolution The **Calculate the noise bound for resolution?** option enables the Gaussian noise control experiment. When this option is enabled, the protocol creates Gaussian-noise images, assigns orientations to them by aligning them to projections from a reference volume, reconstructs noise volumes, and computes FSC between independent noise reconstructions. This produces a reference curve describing the apparent resolution that can be obtained from aligned noise under the same reconstruction procedure. This option increases computational time but provides an important control for detecting overfitting. ## Initial 3D Reference Volume The **Initial 3D reference volume** parameter is required when the noise-bound calculation is enabled. The reference volume is used to generate a projection library. Gaussian-noise images are then aligned against this projection library, producing orientation assignments for the noise images. The resulting noise reconstructions show how much apparent structure can be introduced by aligning pure noise to a reference. The input can be a volume or a set of volumes, although the protocol uses the selected reference volume file for the projection library. ## Symmetry Group The **Symmetry group** parameter defines the symmetry imposed during reconstruction and, when the noise-bound calculation is enabled, during projection-library generation. For asymmetric particles, use **c1**. If the particle has known symmetry, the appropriate Xmipp symmetry group can be used. Using incorrect symmetry may artificially improve apparent signal or distort the validation. Symmetry should be used only when biologically justified. ## Number of Particles The **Number of particles** parameter defines the subset sizes tested by the protocol. For each listed value, the protocol reconstructs pairs of random subsets of that size. The default list covers a broad range from small subsets to several thousand particles. The protocol automatically ignores subset sizes larger than half of the input particle set. This is because each validation repetition needs two independent subsets of the selected size. The resulting curve shows how apparent resolution improves as more particles are used. ## Number of Iterations The **Number of times the randomization is performed** parameter defines how many repeated experiments are performed for each subset size. Each repetition uses new random subsets. The protocol then computes the mean and standard deviation of the estimated resolution across repetitions. More iterations provide a more stable estimate and better uncertainty assessment, but increase runtime. The default value is 10. ## Maximum Resolution The **Maximum resolution** parameter defines the highest digital frequency used during Fourier reconstruction. The value is expressed as a digital frequency. Nyquist corresponds to 0.5. This parameter limits the frequency range used during reconstruction. The default value of 0.5 allows reconstruction up to Nyquist. ## Angular Sampling Rate The **Angular sampling rate** parameter is used when the noise-bound calculation is enabled. It defines the angular sampling of the projection library generated from the reference volume. Gaussian-noise images are aligned against this library. A smaller angular sampling value generates a denser projection library and may make noise alignment more effective, but increases computation time. ## Reconstruction Procedure For each subset size and repetition, the protocol creates two random subsets of particles. Each subset is reconstructed independently using Xmipp Fourier reconstruction. The protocol uses the input particle orientations, selected symmetry, maximum resolution, padding, and sampling rate. If GPU execution is enabled, the CUDA Fourier reconstruction program is used when available. Otherwise, the CPU Fourier reconstruction program is used. The two reconstructed volumes are then compared by FSC. ## FSC Criterion The protocol computes FSC between the two independent reconstructions obtained from the two random subsets. It records the frequency at which the FSC first drops below 0.5. This frequency is used as the resolution-related value for that repetition. After all repetitions, the protocol computes the mean and standard deviation for each subset size. The output files report both the mean frequency and an inverse-square transformation used for plotting or analysis. ## Gaussian-Noise Reconstructions When noise-bound calculation is enabled, the protocol performs a parallel experiment with Gaussian-noise images. For each particle subset, it creates noise images, aligns them against reference projections, reconstructs volumes from the aligned noise, and computes FSC between independent noise reconstructions. This tests how much apparent resolution can be produced by the alignment and reconstruction process itself, even when the input images contain no real particle signal. ## Output Values The main numerical output is written to: `outputValues.txt` This file contains one row per accepted subset size. Each row includes: - number of particles; - mean recorded FSC frequency; - standard deviation of that frequency; - mean inverse-square-transformed value; - standard deviation of that transformed value. These values can be plotted to show how reconstruction resolution changes with particle number. ## Noise Output Values If the noise-bound calculation is enabled, the protocol also writes: `outputNoiseValues.txt` This file has the same structure as `outputValues.txt`, but for the aligned Gaussian-noise reconstructions. Comparison between `outputValues.txt` and `outputNoiseValues.txt` is central to the overfitting interpretation. ## Interpreting the Results A reliable dataset should show a clear difference between real-particle reconstructions and aligned-noise reconstructions. As the number of particles increases, the real-particle reconstruction should improve in a way that reflects genuine signal accumulation. The noise-bound curve indicates how much apparent resolution can arise from overfitting or alignment of noise. If the real-particle curve is close to the noise curve, especially for small particle numbers, the apparent resolution may not be reliable. If the real-particle curve is clearly better than the noise curve, this supports the validity of the reconstruction. The results should be interpreted together with half-map FSC, map appearance, particle quality, angular distribution, and biological plausibility. ## GPU Execution The protocol supports GPU reconstruction. If GPU execution is enabled, the protocol uses Xmipp CUDA Fourier reconstruction when available. If multiple MPI processes are used, GPU-related parameters are adjusted to distribute work over available GPUs. If GPU execution is requested but the required Xmipp CUDA programs are not available, the protocol reports a validation error. GPU execution is recommended because the protocol performs many independent reconstructions. ## Practical Recommendations Use this protocol after particles have projection-alignment parameters. Enable the noise-bound calculation when you want a stronger overfitting assessment, especially for small or difficult datasets. Use resizing when you want a faster diagnostic run and do not need to test the full-resolution behavior. Choose subset sizes that cover the range from small particle numbers to a substantial fraction of the dataset. The protocol will automatically ignore sizes larger than half the input set. Increase the number of iterations if the curves are noisy or unstable. Use the same symmetry and reconstruction conventions that are appropriate for the real dataset. Plot the real-particle and noise curves together when interpreting the result. ## Final Perspective Validate Overfitting is a reconstruction-validation protocol based on particle subsampling. For biological users, its main value is that it tests whether apparent resolution improves with particle number in a way that is clearly better than what can be obtained from aligned Gaussian noise. The protocol does not produce a final reconstruction. Instead, it produces diagnostic curves that help assess whether a reconstruction is supported by real signal or may be affected by overfitting. """ _label = 'validate overfitting' _lastUpdateVersion = VERSION_1_1 # --------------------------- DEFINE param functions -------------------------------------------- def _defineParams(self, form): form.addHidden(USE_GPU, BooleanParam, default=True, label="Use GPU for execution", help="This protocol has both CPU and GPU implementation.\ Select the one you want to use.") form.addHidden(GPU_LIST, StringParam, default='0', expertLevel=LEVEL_ADVANCED, label="Choose GPU IDs", help="Add a list of GPU devices that can be used") form.addSection(label='Input') form.addParam('inputParticles', PointerParam, pointerClass='SetOfParticles', pointerCondition='hasAlignmentProj', label="Input particles", help='Select the input images from the project.') form.addParam('doResize', BooleanParam, default=False, label='Resize input particles and volume?', help="If obtaining the best possible reconstruction " "is not your goal, you can resize your input " "particales and volume to reduce running time " "of the protocol") form.addParam('newSize', FloatParam, default=64, condition="doResize", label='New size (px)', help="Resizing input particles and volume" "using fourier method") form.addParam('doNoise', BooleanParam, default=False, label='Calculate the noise bound for resolution?', help="Select if you want to obtain the noise bound for " "resolution. This calculation will increase the " "computational time of this protocol.") form.addParam('input3DReference', PointerParam, pointerClass='Volume,SetOfVolumes', label='Initial 3D reference volume', condition='doNoise == True', help="Input 3D reference reconstruction to " "align gaussian noise.") form.addParam('symmetryGroup', StringParam, default='c1', label="Symmetry group", help="See [[Xmipp Symmetry][http://www2.mrc-lmb.cam.ac.uk/Xmipp/index.php/Conventions_%26_File_formats#Symmetry]] page" "for a description of the symmetry format" "accepted by Xmipp") form.addParam('numberOfParticles', NumericListParam, default="10 20 50 100 200 500 1000 1500 2000 3000 5000", expertLevel=LEVEL_ADVANCED, label="Number of particles", help="Number of particles in each subset and consequently " "number of subsets (for instance, in default values," "a number of 6 subsets with given values are chosen)\n" "Note:\n" "The number of particles in each subset should not " "be larger than 1/2 of the input set of particles. " "The protocol consider this issue automatically. It " "means that if the input set of particles are lower " "than 10,000, you could leave default values unchanged.") form.addParam('numberOfIterations', IntParam, default=10, expertLevel=LEVEL_ADVANCED, label="Number of times the randomization is performed") form.addParam('maxRes', FloatParam, default=0.5, expertLevel=LEVEL_ADVANCED, label="Maximum resolution (dig.freq)", help='Nyquist is 0.5') form.addParam('angSampRate', FloatParam, default=5, expertLevel=LEVEL_ADVANCED, label="Angular sampling rate") form.addParallelSection(threads=1, mpi=4) # --------------------------- INSERT steps functions -------------------------------------------- def _createFilenameTemplates(self): """ Centralize how files are called for iterations and references. """ myDict = { 'input_xmd': self._getExtraPath('input_particles.xmd') } self._updateFilenamesDict(myDict) def _insertAllSteps(self): self._createFilenameTemplates() # convertInputStep particlesMd = self._getFileName('input_xmd') imgSet = self.inputParticles.get() writeSetOfParticles(imgSet, particlesMd) # for debugging purpose debugging = False if self.doNoise.get(): inputNameRefVol = self.input3DReference.get().getFileName() fnNewVol = self._getExtraPath('newVolume.vol') fnNewImgStk = self._getExtraPath('newImages.stk') fnNewImgMd = self._getExtraPath('newImages.xmd') # do resizing if self.doResize.get(): if self.doNoise.get(): args = "-i %s ""-o %s --fourier %f " % (inputNameRefVol, fnNewVol, self.newSize) self.runJob("xmipp_image_resize", args) args = "-i %s -o %s --fourier %f" % (particlesMd, fnNewImgStk, self.newSize) args += " --save_metadata_stack %s" % fnNewImgMd args += " --keep_input_columns" self.runJob("xmipp_image_resize", args) oldSize = self.inputParticles.get().getDim()[0] scaleFactor = float(self.newSize.get()) / float(oldSize) args = "-i %s" % fnNewImgMd args += " --operate modify_values 'shiftX=shiftX*%f'" % scaleFactor self.runJob('xmipp_metadata_utilities', args, numberOfMpi=1) args = "-i %s" % fnNewImgMd args += " --operate modify_values 'shiftY=shiftY*%f'" % scaleFactor self.runJob('xmipp_metadata_utilities', args, numberOfMpi=1) # projections from reference volume if self.doNoise.get(): if self.doResize.get(): args = "-i %s -o %s" % (fnNewVol, self._getExtraPath( 'Ref_Projections.stk')) args += " --experimental_images %s" % fnNewImgMd else: args = "-i %s -o %s" % ( self.input3DReference.get().getFileName(), self._getExtraPath('Ref_Projections.stk')) args += " --experimental_images %s" % particlesMd args += " --sampling_rate %f --sym %s" % (self.angSampRate, self.symmetryGroup.get()) args += " --min_tilt_angle 0 --max_tilt_angle 90" args += " --compute_neighbors --angular_distance -1" self.runJob("xmipp_angular_project_library", args, numberOfMpi=self.numberOfMpi.get() * self.numberOfThreads.get()) numberOfParticles = getFloatListFromValues(self.numberOfParticles.get()) fractionCounter = 0 maxNumberOfParticles = 0.5 * self.inputParticles.get().getSize() for number in numberOfParticles: if number <= maxNumberOfParticles: #AJ before maxNumberOfParticles for iteration in range(0, self.numberOfIterations.get()): self._insertFunctionStep('reconstructionStep', number, fractionCounter, iteration, debugging, fnNewImgMd, particlesMd) fractionCounter += 1 self._insertFunctionStep('gatherResultsStep', debugging) # --------------------------- STEPS functions --------------------------------------------
[docs] def reconstructionStep(self, numberOfImages, fractionCounter, iteration, debugging, fnNewImgMd, particlesMd): fnRoot = self._getExtraPath("fraction%02d" % fractionCounter) Ts = self.inputParticles.get().getSamplingRate() # for noise fnRootN = self._getExtraPath("Nfraction%02d" % fractionCounter) for i in range(0, 2): fnImgs = fnRoot + "_images_%02d_%02d.xmd" % (i, iteration) if self.doResize.get(): self.runJob("xmipp_metadata_utilities", "-i %s -o %s --operate random_subset %d" % ( fnNewImgMd, fnImgs, numberOfImages), numberOfMpi=1) else: self.runJob("xmipp_metadata_utilities", "-i %s -o %s --operate random_subset %d" % ( particlesMd, fnImgs, numberOfImages), numberOfMpi=1) params = ' -i %s' % fnImgs params += ' -o %s' % fnRoot + "_%02d_%02d.vol" % (i, iteration) params += ' --sym %s' % self.symmetryGroup.get() params += ' --max_resolution %0.3f' % self.maxRes params += ' --padding 2' params += ' --fast' params += ' --sampling %f' % Ts if self.useGpu.get(): #AJ to make it work with and without queue system if self.numberOfMpi.get()>1: N_GPUs = len((self.gpuList.get()).split(',')) params += ' -gpusPerNode %d' % N_GPUs params += ' -threadsPerGPU %d' % max(self.numberOfThreads.get(),4) count=0 GpuListCuda='' if self.useQueueForSteps() or self.useQueue(): GpuList = os.environ["CUDA_VISIBLE_DEVICES"] GpuList = GpuList.split(",") for elem in GpuList: GpuListCuda = GpuListCuda+str(count)+' ' count+=1 else: GpuListAux = '' for elem in self.getGpuList(): GpuListCuda = GpuListCuda+str(count)+' ' GpuListAux = GpuListAux+str(elem)+',' count+=1 os.environ["CUDA_VISIBLE_DEVICES"] = GpuListAux if self.numberOfMpi.get()==1: params += " --device %s" %(GpuListCuda) params += ' --thr %d' % self.numberOfThreads.get() if self.numberOfMpi.get()>1: self.runJob('xmipp_cuda_reconstruct_fourier', params, numberOfMpi=len((self.gpuList.get()).split(','))+1) else: self.runJob('xmipp_cuda_reconstruct_fourier', params) else: self.runJob('xmipp_reconstruct_fourier_accel', params) # for noise if self.doNoise.get(): noiseStk = fnRoot + "_noises_%02d.stk" % i self.runJob("xmipp_image_convert", "-i %s -o %s" % (fnImgs, noiseStk), numberOfMpi=1) self.runJob("xmipp_image_operate", "-i %s --mult 0" % noiseStk) self.runJob("xmipp_transform_add_noise", "-i %s --type gaussian 3" % noiseStk, numberOfMpi=1) fnImgsNL = fnRoot + "_noisesL_%02d.xmd" % i noiseStk2 = fnRoot + "_noises2_%02d.stk" % i self.runJob("xmipp_image_convert", "-i %s -o %s --save_metadata_stack %s" % ( noiseStk, noiseStk2, fnImgsNL), numberOfMpi=1) fnImgsNoiseOld = fnRoot + "_noisesOld_%02d.xmd" % i fnImgsN = fnRoot + "_noises_%02d_%02d.xmd" % (i, iteration) self.runJob("xmipp_metadata_utilities", '-i %s -o %s --operate drop_column "image"' % ( fnImgs, fnImgsNoiseOld), numberOfMpi=1) self.runJob("xmipp_metadata_utilities", "-i %s --set merge %s -o %s" % ( fnImgsNL, fnImgsNoiseOld, fnImgsN), numberOfMpi=1) # alignment gaussian noise fnImgsAlign = self._getExtraPath( "Nfraction_alignment%02d" % fractionCounter) fnImgsAlignN = fnImgsAlign + "_%02d_%02d.xmd" % (i, iteration) args = "-i %s -o %s -r %s --Ri 0 --Ro -1 --mem 2 --append " % ( fnImgsN, fnImgsAlignN, self._getExtraPath("Ref_Projections.stk")) self.runJob('xmipp_angular_projection_matching', args, numberOfMpi=self.numberOfMpi.get()) # numberOfMpi = self.numberOfMpi.get() * self.numberOfThreads.get()) params = ' -i %s' % fnImgsAlignN params += ' -o %s' % fnRootN + "_%02d_%02d.vol" % ( i, iteration) params += ' --sym %s' % self.symmetryGroup.get() params += ' --max_resolution %0.3f' % self.maxRes params += ' --padding 2' params += ' --sampling %f' % Ts if self.useGpu.get(): #AJ to make it work with and without queue system if self.numberOfMpi.get()>1: N_GPUs = len((self.gpuList.get()).split(',')) params += ' -gpusPerNode %d' % N_GPUs params += ' -threadsPerGPU %d' % max(self.numberOfThreads.get(),4) count=0 GpuListCuda='' if self.useQueueForSteps() or self.useQueue(): GpuList = os.environ["CUDA_VISIBLE_DEVICES"] GpuList = GpuList.split(",") for elem in GpuList: GpuListCuda = GpuListCuda+str(count)+' ' count+=1 else: GpuListAux = '' for elem in self.getGpuList(): GpuListCuda = GpuListCuda+str(count)+' ' GpuListAux = GpuListAux+str(elem)+',' count+=1 os.environ["CUDA_VISIBLE_DEVICES"] = GpuListAux if self.numberOfMpi.get()==1: params += " --device %s" %(GpuListCuda) params += ' --thr %d' % self.numberOfThreads.get() if self.numberOfMpi.get()>1: self.runJob('xmipp_cuda_reconstruct_fourier', params, numberOfMpi=len((self.gpuList.get()).split(','))+1) else: self.runJob('xmipp_cuda_reconstruct_fourier', params) else: self.runJob('xmipp_reconstruct_fourier_accel', params) self.runJob('xmipp_resolution_fsc', "--ref %s -i %s -o %s --sampling_rate %f" % \ (fnRoot + "_00_%02d.vol" % iteration, fnRoot + "_01_%02d.vol" % iteration, fnRoot + "_fsc_%02d.xmd" % iteration, Ts), numberOfMpi=1) mdFSC = emlib.MetaData(fnRoot + "_fsc_%02d.xmd" % iteration) for id in mdFSC: fscValue = mdFSC.getValue(emlib.MDL_RESOLUTION_FRC, id) maxFreq = mdFSC.getValue(emlib.MDL_RESOLUTION_FREQREAL, id) if fscValue < 0.5: break fh = open(fnRoot + "_freq.txt", "a") fh.write("%f\n" % maxFreq) fh.close() # for noise if self.doNoise.get(): self.runJob('xmipp_resolution_fsc', "--ref %s -i %s -o %s --sampling_rate %f" % ( fnRootN + "_00_%02d.vol" % iteration, fnRootN + "_01_%02d.vol" % iteration, fnRootN + "_fsc_%02d.xmd" % iteration, Ts), numberOfMpi=1) cleanPattern(fnRoot + "_noises_0?_0?.xmd") cleanPattern(fnRoot + "_noisesOld_0?.xmd") cleanPattern(fnRoot + "_noisesL_0?.xmd") cleanPattern(fnRoot + "_noises2_0?.stk") mdFSCN = emlib.MetaData(fnRootN + "_fsc_%02d.xmd" % iteration) for id in mdFSCN: fscValueN = mdFSCN.getValue(emlib.MDL_RESOLUTION_FRC, id) maxFreqN = mdFSCN.getValue(emlib.MDL_RESOLUTION_FREQREAL, id) if fscValueN < 0.5: break fhN = open(fnRootN + "_freq.txt", "a") fhN.write("%f\n" % maxFreqN) fhN.close() if not debugging: cleanPattern(fnRoot + "_0?_0?.vol") cleanPattern(fnRoot + "_images_0?_0?.xmd") cleanPattern(fnRoot + "_fsc_0?.xmd") cleanPattern(fnRoot + "_noises_0?.stk") if self.doNoise.get(): cleanPattern(fnImgsAlign + "_0?_0?.xmd") cleanPattern(fnRootN + "_0?_0?.vol") cleanPattern(fnRootN + "_fsc_0?.xmd")
[docs] def gatherResultsStep(self, debugging): self._writeFreqsMetaData("fraction*_freq.txt", self._defineResultsTxt()) if self.doNoise.get(): self._writeFreqsMetaData("Nfraction*_freq.txt", self._defineResultsNoiseTxt()) if not debugging: #cleanPattern(self._getExtraPath("fraction*_freq.txt")) AJ volver cleanPattern(self._getExtraPath('newImages.stk')) cleanPattern(self._getExtraPath('newImages.xmd')) if self.doNoise.get(): cleanPattern(self._getExtraPath("Nfraction*_freq.txt")) cleanPattern(self._getExtraPath('Ref_Projections*')) cleanPattern(self._getExtraPath('newVolume.vol'))
# --------------------------- INFO functions -------------------------------------------- def _summary(self): """ Should be overriden in subclasses to return summary message for NORMAL EXECUTION. """ numberOfParticles = getFloatListFromValues(self.numberOfParticles.get()) maxNumberOfParticles = 0.5 * self.inputParticles.get().getSize() intNumberOfParticles = [int(float(x)) for x in numberOfParticles] particlesNumber = '' for number in intNumberOfParticles: if number <= maxNumberOfParticles: particlesNumber += str(number) + ' ' msg = [] msg.append("Number of particles: " + particlesNumber) msg.append("Number of times that reconstruction is performed per each " "particles subset: %d" % self.numberOfIterations) return msg def _methods(self): messages = [] messages.append('B. Heymann "Validation of 3D EM Reconstructions"') return messages def _citations(self): return ['B.Heymann2015'] def _validate(self): errors = [] if (self.doResize.get() and self.newSize.get() > self.inputParticles.get().getDim()[0]): errors.append("Fourier resize method cannot be used " "to increase the dimensions") if (self.doResize.get() and self.newSize.get() == self.inputParticles.get().getDim()[0]): errors.append("The new chosen size is equal to the " "recent particles size") if self.useGpu and not isXmippCudaPresent(): errors.append("You have asked to use GPU, but I cannot find the Xmipp GPU programs") return errors # --------------------------- UTILS functions -------------------------------------------- def _writeFreqsMetaData(self, pattern, outputFn): """ Glob and read frequencies either from reconstruction or noise and write the proper metadata file. """ fnFreqs = sorted(glob.glob(self._getExtraPath(pattern))) subset = 0 numberOfParticles = getFloatListFromValues(self.numberOfParticles.get()) validationMd = emlib.MetaData() fnOut = open(outputFn, 'w') for fnFreq in fnFreqs: data = [] dataInv = [] fnFreqOpen = open(fnFreq, "r") for line in fnFreqOpen: fields = line.split() rowdata = list(map(float, fields)) val = 1.0/(float(rowdata[0])*float(rowdata[0])) dataInv.append(val) data.extend(rowdata) meanRes = (sum(data) / len(data)) data[:] = [(x - meanRes) ** 2 for x in data] varRes = (sum(data) / (len(data) - 1)) stdRes = sqrt(varRes) meanResInv = (sum(dataInv) / len(dataInv)) dataInv[:] = [(x - meanResInv) ** 2 for x in dataInv] varResInv = (sum(dataInv) / (len(dataInv) - 1)) stdResInv = sqrt(varResInv) fnOut.write(str(numberOfParticles[subset]) + ' ' + str(meanRes) + ' ' + str(stdRes) + ' ' + str(meanResInv) + ' ' + str(stdResInv) + '\n') # # objId = validationMd.addObject() # validationMd.setValue(emlib.MDL_COUNT, # long(numberOfParticles[subset]), # objId) # validationMd.setValue(emlib.MDL_AVG, meanRes, objId) # validationMd.setValue(emlib.MDL_STDDEV, stdRes, objId) subset += 1 # validationMd.write(outputFn) fnOut.close() # def _defineResultsName(self): # return self._getExtraPath('results.xmd') # # def _defineResultsNoiseName(self): # return self._getExtraPath('resultsNoise.xmd') def _defineResultsTxt(self): return self._getExtraPath('outputValues.txt') def _defineResultsNoiseTxt(self): return self._getExtraPath('outputNoiseValues.txt')