# **************************************************************************
# *
# * Authors: Jeison Mendez (jmendez@utp.edu.co) (2020)
# *
# * Universidad Nacional Autónoma de México, UNAM
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
from glob import glob
import math
from os.path import join, isfile, exists
from shutil import copyfile, copy
import os
from pyworkflow.protocol.params import (PointerParam, FloatParam,
STEPS_PARALLEL,
StringParam, BooleanParam, IntParam,
LEVEL_ADVANCED, USE_GPU, GPU_LIST)
from pyworkflow.protocol import String, Float
from pyworkflow.protocol import params
from pyworkflow.utils.path import (moveFile, makePath, cleanPattern, copyFile, cleanPath)
from pyworkflow.gui.plotter import Plotter
from pwem.objects import Volume, SetOfVolumes
from pwem import emlib
from pwem.emlib.image import ImageHandler
from pwem.emlib.metadata import getFirstRow, getSize, iterRows
import pwem.emlib.metadata as md
from pwem.protocols import ProtAnalysis3D
from pwem.constants import ALIGN_PROJ
from xmipp3.convert import writeSetOfParticles, writeSetOfVolumes, \
getImageLocation, setXmippAttributes, createItemMatrix, readSetOfParticles, readSetOfImages
from _ast import Try
[docs]class XmippProtAngularGraphConsistency(ProtAnalysis3D):
"""
Performs soft alignment validation of a set of particles previously aligned
confronting them using Graph filtered correlations representation. This
protocol produces an histogram with two groups of particles.
AI Generated:
What this protocol is for
Angular graph consistency is a soft validation tool for single-particle
datasets in which particles have already been assigned orientations
(projection directions) with respect to a 3D reference volume. Instead
of redoing a full refinement, the protocol asks a different question:
“Are the assigned orientations internally consistent and stable when
confronted with nearby directions in projection space?” The underlying
idea is that reliable alignments tend to live in a “good neighborhood”:
if a particle truly matches a given projection direction, it should
also correlate strongly with very close-by directions in the projection
manifold, and the behavior of those correlations should be smooth and
coherent. When an assignment is wrong (or weakly determined),
correlations tend to be less stable and can drift toward a different
direction when you examine the neighborhood.
For a biological user, this protocol is primarily a quality-control
step. It helps you identify particles that are likely to be within a
reliable assignment zone versus particles that behave as if their
orientation is uncertain, inconsistent, or potentially incorrect. The
outcome is especially useful when you want to clean a dataset before
high-resolution refinement, or when you suspect that a portion of the
data may have been aligned incorrectly (for instance due to low SNR,
strong heterogeneity, preferred orientation, contamination, or an
imperfect reference).
Inputs and what they must contain
You provide an input volume (the 3D reference used to generate
projections) and a set of input particles that must already contain
alignment information (projection alignment). In practical terms, the
particles must have their assigned angles and shifts available; this
protocol is not intended for raw, unaligned particles. You also specify
the symmetry group of the particle (e.g., c1, c2, d7, etc.), because
symmetry affects what “nearby directions” mean and how angular
distances are interpreted.
What the protocol does, in plain terms
The protocol builds a projection library from the input volume using
the chosen symmetry and an angular sampling step (the spacing between
projection directions). It then compares each experimental particle
against projections in a way that emphasizes neighborhood structure:
it uses graph-filtered correlations to assess whether the particle’s
correlation landscape is compatible with a stable assignment.
Conceptually, you can think of it as checking whether the assigned
direction sits inside a smooth, high-correlation region of the
projection space rather than being a fragile, isolated maximum.
To make this validation more robust, the protocol also performs a set
of preprocessing steps that most users will appreciate even if they
never look at them explicitly: particles and references are brought to
a suitable sampling regime, images are masked in a way that reduces
boundary artifacts, and a low-pass target is applied so that the
validation focuses on signal that is actually reliable at the current
stage.
The end result is that each particle gets additional validation-related
measures, such as correlation with an optimal direction inferred from
the graph neighborhood, correlation with the originally assigned
direction, and an angular distance that describes how far the
“graph-preferred” direction is from the assigned one. These measures
are then used to separate particles into a “more reliable” region and
a “more questionable” region.
Key parameters and how to choose them biologically
The angular sampling (degrees) sets how dense the projection library
is. Smaller values give a finer angular grid, which can improve
sensitivity in detecting small angular instabilities, but it increases
computational cost. For many biological datasets, a few degrees is a
reasonable compromise, especially for validation rather than full
refinement. If you choose a very coarse sampling, the method may miss
subtle inconsistencies; if you choose a very fine sampling, it can
become heavier without necessarily improving practical decisions.
The target resolution (Å) is one of the most important knobs for robust
behavior. The protocol low-pass filters the particles to this
resolution before validation. Biologically, this is a good idea
because at this stage you usually want to validate orientations using
signal that is genuinely present in the data. Too aggressive (too
high-resolution) targets tend to make validation unstable because
high-frequency content is noisy and alignment at those frequencies can
be misleading. That is why the protocol recommends staying in the range
of roughly 8–10 Å unless you have a strong reason to deviate. If your
dataset is still early-stage or very noisy, staying closer to 10 Å can
make the validation more conservative and stable; if you have a strong
dataset with good SNR, you might lean toward 8 Å.
The correlation level for validation defines how strict the validation
should be in terms of correlation thresholds. Biologically, think of
it as how demanding you want to be: a higher threshold will flag more
particles as questionable (more conservative filtering), while a lower
threshold will accept more particles as reliable (more permissive). The
protocol guidance to keep it roughly between 0.90 and 0.97 is consistent
with typical practice: below that you may accept too much junk; above
that you may start discarding borderline-but-useful particles,
especially in heterogeneous or flexible specimens.
The minimum and maximum allowed tilt angles control which tilt angles
are considered in the projection space exploration. For many
single-particle datasets you will leave the defaults unless you have
a specific reason. These limits become relevant when you know that
certain tilt regions are physically implausible or when you want to
restrict validation to a region of orientation space. The “maximum tilt
without mirror check” parameter is related to how mirror ambiguity is
treated at high tilts; as a biological user, you normally only touch
this if you are explicitly dealing with mirror-related issues or
unusual acquisition geometry.
Finally, the symmetry group must match the specimen. If the symmetry is
wrong, the notion of angular distance and neighborhood consistency
becomes distorted, and the validation will be less meaningful. In
symmetric complexes, symmetry also implies that multiple directions
are equivalent; the protocol’s checks are symmetry-aware, which is
exactly what you want when deciding whether an assignment is stable.
Outputs and how to use them in practice
The main output is a new particle set (outputParticles) where each
particle carries updated projection-alignment metadata that includes
the validation descriptors. In addition, the protocol typically
produces an auxiliary particle set (often interpreted as a “suggested
disabling” subset) where particles judged as unreliable by the combined
criteria are marked as disabled. Biologically, this gives you a very
direct workflow option: you can take the auxiliary output and continue
refinement using only the particles that remain enabled, or you can
use it as a diagnostic to understand how much of your dataset appears
unstable.
The protocol also reports a concise global summary in terms of the
percentage of particles likely to be within the reliable assignment
zone. This is useful as a quick health indicator. If the percentage
is close to 100%, it suggests that most assignments are stable under
neighborhood-based validation. If it is substantially lower, it
indicates that a nontrivial fraction of particles behave as if their
orientation is unreliable, which can happen for many biological
reasons: strong compositional heterogeneity, conformational variability,
poor SNR, contamination, strong preferred orientation (causing
ambiguous views), or even a mismatch between the reference and the true
particle population.
How to interpret the result biologically (and what to do next)
When you obtain a high reliable percentage, you can usually proceed with
more confidence into high-resolution refinement or into analyses that
assume stable orientations (for example, certain forms of heterogeneity
analysis). It does not prove correctness, but it is reassuring evidence
that the alignment landscape is not fragile.
When you obtain a moderate or low reliable percentage, you should resist
the temptation to treat the protocol as a simple “reject button” without
thinking biologically. A large questionable fraction may indeed
indicate many misassigned particles, but it may also reflect real
biological heterogeneity (multiple conformations or compositions) where
a single reference forces unstable assignments. In those cases, the
right response may be to split the dataset (classification), improve
the reference, apply better masks, or adjust upstream alignment strategy
rather than simply discarding data.
A very practical use is to run this protocol after an initial angular
assignment/refinement, remove the most unreliable tail (using the
auxiliary output), and then rerun refinement. Often, this improves
convergence and reduces overfitting driven by poorly aligned particles.
In facility workflows, it can also serve as an objective QA step to
compare different processing branches: the branch that yields more
consistent angular assignments under this validation is often the
more robust one.
Recommended “default mindset”
Most biological users can treat this protocol as a conservative
validation step: keep the default target resolution unless you have a
clear reason, use an angular sampling that is not excessively fine, and
interpret the flagged particles as “needs scrutiny” rather than
“definitely wrong.” It is particularly valuable when your next steps
are sensitive to angular correctness, such as high-resolution
refinement, subtle difference mapping, or downstream interpretation
where misalignment can masquerade as biology.
"""
_label = 'angular graph consistency'
def __init__(self, *args, **kwargs):
ProtAnalysis3D.__init__(self, *args, **kwargs)
# --------------------------- DEFINE param functions --------------------------------------------
def _defineParams(self, form):
form.addSection(label='Input')
form.addParam('inputVolume', PointerParam, pointerClass='Volume',
label="Input volume", important=True,
help='Select the input volume(s).')
form.addParam('inputParticles', PointerParam,
pointerClass='SetOfParticles',
pointerCondition='hasAlignment',
label="Input particles", important=True,
help='Select the input projection images.')
form.addParam('symmetryGroup', StringParam, default='c1',
label="Symmetry group",
help='See [[Xmipp Symmetry][http://www2.mrc-lmb.cam.ac.uk/Xmipp/index.php/Conventions_%26_File_formats#Symmetry]] page '
'for a description of the symmetry format accepted by Xmipp')
form.addParam('angularSampling', FloatParam, default=3,
expertLevel=LEVEL_ADVANCED,
label="Angular Sampling (degrees)",
help='Angular distance (in degrees) between neighboring projection points ')
form.addParam('ccLevel', FloatParam, default=0.95,
expertLevel=LEVEL_ADVANCED,
label="correlation level for validation",
help='threshold correlation to be used in validation. Keep this value in the range [0.90 -- 0.97]')
form.addParam('minTilt', FloatParam, default=0,
expertLevel=LEVEL_ADVANCED,
label="Minimum allowed tilt angle",
help='Tilts below this value will not be considered for the alignment')
form.addParam('maxTilt', FloatParam, default=180,
expertLevel=LEVEL_ADVANCED,
label="Maximum allowed tilt angle without mirror check",
help='Tilts above this value will not be considered for the alignment without mirror check')
form.addParam('maximumTargetResolution', FloatParam, default=8,
label='Target resolution (A)',
help='Low pass filter the particles to this resolution. This usually helps a lot obtaining good alignment. You should have a good'
' reason to modify this value outside the range [8-10] A')
form.addParallelSection(threads=1, mpi=1)
# --------------------------- INSERT steps functions --------------------------------------------
def _insertAllSteps(self):
self.imgsFn = self._getExtraPath('images.xmd')
self._insertFunctionStep('convertInputStep',
self.inputParticles.get().getObjId())
self.TsOrig = self.inputParticles.get().getSamplingRate()
self._insertFunctionStep('doIteration000')
self._maximumTargetResolution = self.maximumTargetResolution.get()
self._insertFunctionStep('globalAssignment')
self._insertFunctionStep('cleanDirectory')
self._insertFunctionStep("createOutput")
[docs] def createOutput(self):
fnLastDir = self._getExtraPath("Iter1")
fnDirGlobal = fnLastDir
Ts = self.readInfoField(fnDirGlobal, "sampling", emlib.MDL_SAMPLINGRATE)
fnAnglesDisc=join(fnDirGlobal,"anglesDisc.xmd")
if exists(fnAnglesDisc):
fnAngles = self._getPath("angles.xmd")
self.runJob('xmipp_metadata_utilities','-i %s -o %s --operate modify_values "image=image1"'%(fnAnglesDisc,fnAngles), numberOfMpi=1)
self.runJob('xmipp_metadata_utilities','-i %s --operate sort particleId'%fnAngles,numberOfMpi=1)
self.runJob('xmipp_metadata_utilities','-i %s --operate drop_column image1'%fnAngles,numberOfMpi=1)
self.runJob('xmipp_metadata_utilities','-i %s --operate modify_values "itemId=particleId"'%fnAngles,numberOfMpi=1)
imgSet=self.inputParticles.get()
self.scaleFactor = Ts/imgSet.getSamplingRate()
outImgSet= self._createSetOfParticles()
outImgSet.copyInfo(imgSet)
outImgSet.setAlignmentProj()
outImgSet.setIsPhaseFlipped(imgSet.isPhaseFlipped())
self.iterMd = md.iterRows(fnAngles, md.MDL_PARTICLE_ID)
self.lastRow = next(self.iterMd)
outImgSet.copyItems(imgSet,
updateItemCallback=self._updateItem)
self._defineOutputs(outputParticles=outImgSet)
self._defineSourceRelation(self.inputParticles, outImgSet)
mdParticles = emlib.MetaData(fnAngles)
ccGraphPrevList = mdParticles.getColumnValues(emlib.MDL_GRAPH_CC_PREVIOUS)
ccAsigDirRefList = mdParticles.getColumnValues(emlib.MDL_ASSIGNED_DIR_REF_CC)
angDistPrevList = mdParticles.getColumnValues(emlib.MDL_GRAPH_DISTANCE2MAX_PREVIOUS)
# threshold
th_ccGraph = self.otsu(ccGraphPrevList)
th_ccAsigDir = self.otsu(ccAsigDirRefList)
th_angDist = self.otsu(angDistPrevList)
if (th_ccGraph < 0.50) and (th_ccAsigDir < 0.50):
print("applying default thresholds")
th_ccGraph = 0.95
th_ccAsigDir = 0.95
th_angDist = 3.0 * self.angularSampling.get()
print(th_ccGraph, th_ccAsigDir, th_angDist)
print("\033[1;33Thresholds:\033[0;33 \n")
print('correlation with projection in Graph max direction:',th_ccGraph,
'\ncorrelation with assigned projection:',th_ccAsigDir,
'\nangular distance to maxGraph:',th_angDist)
# a second output set
fnOutParticles = self._getPath('anglesAux.xmd')
copy(fnAngles, fnOutParticles)
mdParticles = emlib.MetaData(fnOutParticles)
n_false = 0;
nParticles = 0
for row in iterRows(mdParticles):
nParticles += 1
objId = row.getObjId()
assigDirRefCC = row.getValue(emlib.MDL_ASSIGNED_DIR_REF_CC)
graphCCPrev = row.getValue(emlib.MDL_GRAPH_CC_PREVIOUS)
graphDistMaxGraphPrev = row.getValue(emlib.MDL_GRAPH_DISTANCE2MAX_PREVIOUS)
if (assigDirRefCC < th_ccAsigDir) and ( graphCCPrev < th_ccGraph ) and (graphDistMaxGraphPrev > th_angDist):
n_false += 1
mdParticles.setValue(emlib.MDL_ENABLED, -1, objId)
mdParticles.write(fnOutParticles)
print('to be disabled:',n_false)
self.subsets = []
i=0
self.subsets.append(self._createSetOfParticles(str(i)))
self.subsets[i].copyInfo(self.inputParticles.get())
readSetOfParticles(fnOutParticles, self.subsets[i])
result = {'outputParticlesAux' : self.subsets[i]}
self._defineOutputs(**result)
self._store(self.subsets[i])
# extra output info
p_gsp = (1 - n_false/nParticles)* 100
self.percentage = Float(p_gsp)
self._store()
def _updateItem(self, particle, row):
count = 0
while self.lastRow and particle.getObjId() == self.lastRow.getValue(md.MDL_PARTICLE_ID):
count += 1
if count:
self._createItemMatrix(particle, self.lastRow)
try:
self.lastRow = next(self.iterMd)
except StopIteration:
self.lastRow = None
particle._appendItem = count > 0
def _createItemMatrix(self, particle, row):
row.setValue(emlib.MDL_SHIFT_X, row.getValue(emlib.MDL_SHIFT_X)*self.scaleFactor)
row.setValue(emlib.MDL_SHIFT_Y, row.getValue(emlib.MDL_SHIFT_Y)*self.scaleFactor)
setXmippAttributes(particle, row, md.MDL_ANGLE_ROT, md.MDL_ANGLE_TILT,
md.MDL_ANGLE_PSI, md.MDL_SHIFT_X, md.MDL_SHIFT_Y,
md.MDL_MAXCC, md.MDL_WEIGHT,
md.MDL_MAXCC_PREVIOUS, md.MDL_GRAPH_CC_PREVIOUS,
md.MDL_ASSIGNED_DIR_REF_CC, md.MDL_GRAPH_DISTANCE2MAX_PREVIOUS,
md.MDL_GRAPH_CC, md.MDL_GRAPH_DISTANCE2MAX)
createItemMatrix(particle, row, align=ALIGN_PROJ)
[docs] def doIteration000(self):
fnDirCurrent = self._getExtraPath('Iter0')
makePath(fnDirCurrent)
# Get volume sampling rate
if self.inputVolume.get() is None:
TsCurrent = self.inputParticles.get().getSamplingRate()
else:
TsCurrent = self.inputVolume.get().getSamplingRate()
self.writeInfoField(fnDirCurrent, "sampling", emlib.MDL_SAMPLINGRATE, TsCurrent)
# Copy reference volume and window if necessary
XDim = self.inputParticles.get().getDimensions()[0]
newXDim = int(round(XDim * self.TsOrig / TsCurrent))
self.writeInfoField(fnDirCurrent, "size", emlib.MDL_XSIZE, newXDim)
img = ImageHandler()
fnVol1 = join(fnDirCurrent, "volume.vol")
vol = self.inputVolume.get()
img.convert(vol, fnVol1)
volXdim = vol.getDim()[0]
if newXDim != volXdim:
self.runJob('xmipp_transform_window', "-i %s --size %d" % (fnVol1, newXDim), numberOfMpi=1)
[docs] def globalAssignment(self):
fnDirIter0 = self._getExtraPath("Iter0")
fnGlobal = self._getExtraPath("Iter1")
makePath(fnGlobal)
previousResolution = self._maximumTargetResolution
targetResolution = self._maximumTargetResolution
TsCurrent = max(self.TsOrig,targetResolution/3)
getShiftsFrom = ''
self.prepareImages(fnDirIter0, fnGlobal, TsCurrent, getShiftsFrom)
TsCurrent = self.readInfoField(fnGlobal, "sampling", emlib.MDL_SAMPLINGRATE) # Prepare images may have changed it
self.prepareReferences(fnDirIter0, fnGlobal, TsCurrent, targetResolution)
# Calculate angular step at this resolution
ResolutionAlignment = previousResolution
self.nextLowPass = True
self.nextResolutionOffset = 2
if self.nextLowPass:
ResolutionAlignment += self.nextResolutionOffset
newXDim = self.readInfoField(fnGlobal, "size", emlib.MDL_XSIZE)
angleStep = self.calculateAngStep(newXDim, TsCurrent, ResolutionAlignment)
angleStep = max(angleStep, self.angularSampling.get())
self.writeInfoField(fnGlobal, "angleStep", emlib.MDL_ANGLE_DIFF, float(angleStep))
# Global alignment
self.numberOfPerturbations = 1
perturbationList = [chr(x) for x in range(ord('a'), ord('a') + self.numberOfPerturbations)]
fnDirAssignment = join(fnGlobal, "assignment")
fnImgs = join(fnGlobal, "images.xmd")
makePath(fnDirAssignment)
# Create defocus groups
row = getFirstRow(fnImgs)
if row.containsLabel(emlib.MDL_CTF_MODEL) or row.containsLabel(emlib.MDL_CTF_DEFOCUSU):
self.runJob("xmipp_ctf_group", "--ctfdat %s -o %s/ctf:stk --pad 1.0 --sampling_rate %f --phase_flipped --error 0.1 --resol %f" % \
(fnImgs, fnDirAssignment, TsCurrent, targetResolution), numberOfMpi=1)
moveFile("%s/ctf_images.sel" % fnDirAssignment, "%s/ctf_groups.xmd" % fnDirAssignment)
cleanPath("%s/ctf_split.doc" % fnDirAssignment)
mdInfo = emlib.MetaData("numberGroups@%s" % join(fnDirAssignment, "ctfInfo.xmd"))
numberGroups = mdInfo.getValue(emlib.MDL_COUNT, mdInfo.firstObject())
ctfPresent = True
else:
numberGroups = 1
ctfPresent = False
# Generate projections
fnReferenceVol = join(fnGlobal, "volumeRef.vol")
for subset in perturbationList:
fnGallery = join(fnDirAssignment, "gallery%s.stk" % subset)
fnGalleryMd = join(fnDirAssignment, "gallery%s.xmd" % subset)
args = "-i %s -o %s --sampling_rate %f --sym %s --min_tilt_angle %f --max_tilt_angle %f --perturb %f " % \
(fnReferenceVol, fnGallery, angleStep, self.symmetryGroup, self.minTilt.get(), self.maxTilt.get(), math.sin(angleStep * math.pi / 180.0) / 4)
args += " --compute_neighbors --angular_distance -1 --experimental_images %s" % self._getExtraPath("images.xmd")
self.runJob("xmipp_angular_project_library", args, numberOfMpi = self.numberOfMpi.get() * self.numberOfThreads.get())
cleanPath(join(fnDirAssignment, "gallery_angles%s.doc" % subset))
moveFile(join(fnDirAssignment, "gallery%s.doc" % subset), fnGalleryMd)
fnAngles = join(fnGlobal, "anglesDisc%s.xmd" % subset)
for j in range(1, numberGroups + 1):
fnAnglesGroup = join(fnDirAssignment, "angles_group%03d%s.xmd" % (j, subset))
if not exists(fnAnglesGroup):
if ctfPresent:
fnGroup = "ctfGroup%06d@%s/ctf_groups.xmd" % (j, fnDirAssignment)
fnCTF = "%d@%s/ctf_ctf.stk" % (j, fnDirAssignment)
fnGalleryGroup = join(fnDirAssignment, "gallery%s_%06d.stk" % (subset, j))
fnGalleryGroupMd = join(fnDirAssignment, "gallery%s_%06d.xmd" % (subset, j))
self.runJob("xmipp_transform_filter", "-i %s -o %s --fourier binary_file %s --save_metadata_stack %s --keep_input_columns" % \
(fnGalleryMd, fnGalleryGroup, fnCTF, fnGalleryGroupMd),
numberOfMpi=min(self.numberOfMpi.get(), 24))
else:
fnGroup = fnImgs
fnGalleryGroupMd = fnGalleryMd
if getSize(fnGroup) == 0: # If the group is empty
continue
self.angularMaxShift = 10
maxShift = round(self.angularMaxShift * newXDim / 100)
R = self.inputParticles.get().getDimensions()[0] / 2
R = R * self.TsOrig / TsCurrent
args = '-i %s -o %s -ref %s -sampling %f -odir %s --Nsimultaneous %d -angleStep %f --maxShift %f --sym %s --useForValidation --refVol %s' % \
(fnGroup, fnAnglesGroup, fnGalleryGroupMd, TsCurrent, fnDirAssignment, self.numberOfMpi.get() * self.numberOfThreads.get(), angleStep, maxShift, self.symmetryGroup, fnReferenceVol)
self.runJob('xmipp_angular_assignment_mag', args, numberOfMpi=self.numberOfMpi.get() * self.numberOfThreads.get())
if exists(fnAnglesGroup):
if not exists(fnAngles) and exists(fnAnglesGroup):
copyFile(fnAnglesGroup, fnAngles)
else:
if exists(fnAngles) and exists(fnAnglesGroup):
self.runJob("xmipp_metadata_utilities", "-i %s --set union_all %s" % (fnAngles, fnAnglesGroup), numberOfMpi=1)
if exists(fnAngles) and exists(fnImgs):
self.runJob("xmipp_metadata_utilities", "-i %s --set join %s image" % (fnAngles, fnImgs), numberOfMpi=1)
self.saveSpace = True
if self.saveSpace and ctfPresent:
self.runJob("rm -f", fnDirAssignment + "/gallery*", numberOfMpi=1)
# Evaluate the stability of the alignment
fnOut = join(fnGlobal, "anglesDisc")
for set1 in perturbationList:
fnOut1 = join(fnGlobal, "anglesDisc%s" % set1)
fnAngles1 = fnOut1 + ".xmd"
counter2 = 0
for set2 in perturbationList:
if set1 == set2:
continue
fnAngles2 = join(fnGlobal, "anglesDisc%s.xmd" % set2)
fnOut12 = join(fnGlobal, "anglesDisc%s%s" % (set1, set2))
self.runJob("xmipp_angular_distance", "--ang1 %s --ang2 %s --oroot %s --sym %s --compute_weights 1 particleId 0.5 --check_mirrors --set 0" % (fnAngles2, fnAngles1, fnOut12, self.symmetryGroup), numberOfMpi=1)
self.runJob("xmipp_metadata_utilities", '-i %s --operate keep_column "angleDiff0 shiftDiff0 weightJumper0"' % (fnOut12 + "_weights.xmd"), numberOfMpi=1)
if counter2 == 0:
mdAllWeights = emlib.MetaData(fnOut12 + "_weights.xmd")
counter2 = 1
else:
mdWeight = emlib.MetaData(fnOut12 + "_weights.xmd")
if mdWeight.size() == mdAllWeights.size():
counter2 += 1
for id1, id2 in izip(mdWeight, mdAllWeights):
angleDiff0 = mdWeight.getValue(emlib.MDL_ANGLE_DIFF0, id1)
shiftDiff0 = mdWeight.getValue(emlib.MDL_SHIFT_DIFF0, id1)
weightJumper0 = mdWeight.getValue(emlib.MDL_WEIGHT_JUMPER0, id1)
angleDiff0All = mdAllWeights.getValue(emlib.MDL_ANGLE_DIFF0, id2)
shiftDiff0All = mdAllWeights.getValue(emlib.MDL_SHIFT_DIFF0, id2)
weightJumper0All = mdAllWeights.getValue(emlib.MDL_WEIGHT_JUMPER0, id2)
mdAllWeights.setValue(emlib.MDL_ANGLE_DIFF0, angleDiff0 + angleDiff0All, id2)
mdAllWeights.setValue(emlib.MDL_SHIFT_DIFF0, shiftDiff0 + shiftDiff0All, id2)
mdAllWeights.setValue(emlib.MDL_WEIGHT_JUMPER0, weightJumper0 + weightJumper0All, id2)
if counter2 > 1:
iCounter2 = 1.0 / counter2
for id in mdAllWeights:
angleDiff0All = mdAllWeights.getValue(emlib.MDL_ANGLE_DIFF0, id)
shiftDiff0All = mdAllWeights.getValue(emlib.MDL_SHIFT_DIFF0, id)
weightJumper0All = mdAllWeights.getValue(emlib.MDL_WEIGHT_JUMPER0, id)
mdAllWeights.setValue(emlib.MDL_ANGLE_DIFF0, angleDiff0All * iCounter2, id)
mdAllWeights.setValue(emlib.MDL_SHIFT_DIFF0, shiftDiff0All * iCounter2, id)
mdAllWeights.setValue(emlib.MDL_WEIGHT_JUMPER0, weightJumper0All * iCounter2, id)
if counter2 > 0:
mdAllWeights.write(fnOut1 + "_weights.xmd")
self.runJob("xmipp_metadata_utilities", '-i %s --set merge %s' % (fnAngles1, fnOut1 + "_weights.xmd"), numberOfMpi=1)
if not exists(fnOut + ".xmd") and exists(fnAngles1):
copyFile(fnAngles1, fnOut + ".xmd")
else:
if exists(fnAngles1) and exists(fnOut + ".xmd"):
self.runJob("xmipp_metadata_utilities", '-i %s --set union_all %s' % (fnOut + ".xmd", fnAngles1), numberOfMpi=1)
cleanPath(join(fnGlobal, "anglesDisc*_weights.xmd"))
[docs] def calculateAngStep(self, newXDim, TsCurrent, ResolutionAlignment):
k = newXDim * TsCurrent / ResolutionAlignment # Freq. index
return math.atan2(1, k) * 180.0 / math.pi # Corresponding angular step
[docs] def checkInfoField(self, fnDir, block):
fnInfo = join(fnDir, "iterInfo.xmd")
if not exists(fnInfo):
return False
blocks = emlib.getBlocksInMetaDataFile(fnInfo)
return block in blocks
[docs] def writeInfoField(self, fnDir, block, label, value):
mdInfo = emlib.MetaData()
objId = mdInfo.addObject()
mdInfo.setValue(label, value, objId)
mdInfo.write("%s@%s" % (block, join(fnDir, "iterInfo.xmd")), emlib.MD_APPEND)
[docs] def readInfoField(self, fnDir, block, label):
mdInfo = emlib.MetaData("%s@%s" % (block, join(fnDir, "iterInfo.xmd")))
return mdInfo.getValue(label, mdInfo.firstObject())
[docs] def prepareImages(self, fnDirPrevious, fnDir, TsCurrent, getShiftsFrom=''):
if self.checkInfoField(fnDir, "count"):
state = self.readInfoField(fnDir, "count", emlib.MDL_COUNT)
if state >= 1:
return
print("Preparing images to sampling rate=", TsCurrent)
XDim = self.inputParticles.get().getDimensions()[0]
newXDim = int(round(XDim * self.TsOrig / TsCurrent))
if newXDim < 40:
newXDim = int(40)
TsCurrent = XDim * (self.TsOrig / newXDim)
elif newXDim % 2 == 1:
newXDim += 1
TsCurrent = XDim * (self.TsOrig / newXDim)
self.writeInfoField(fnDir, "sampling", emlib.MDL_SAMPLINGRATE, TsCurrent)
self.writeInfoField(fnDir, "size", emlib.MDL_XSIZE, newXDim)
# Prepare particles
fnNewParticles = join(fnDir, "images.stk")
if newXDim != XDim:
self.runJob("xmipp_image_resize", "-i %s -o %s --fourier %d" % (self.imgsFn, fnNewParticles, newXDim), numberOfMpi=min(self.numberOfMpi.get(), 24))
else:
self.runJob("xmipp_image_convert", "-i %s -o %s --save_metadata_stack %s" % (self.imgsFn, fnNewParticles, join(fnDir, "images.xmd")),
numberOfMpi=1)
R = self.inputParticles.get().getDimensions()[0] / 2
self.angularMaxShift = 10
R = min(round(R * self.TsOrig / TsCurrent * (1 + self.angularMaxShift * 0.01)), newXDim / 2)
self.runJob("xmipp_transform_mask", "-i %s --mask circular -%d" % (fnNewParticles, R), numberOfMpi=min(self.numberOfMpi.get(), 24))
fnSource = join(fnDir, "images.xmd")
if not self.inputParticles.get().isPhaseFlipped():
self.runJob("xmipp_ctf_correct_phase", "-i %s --sampling_rate %f" % (fnSource, TsCurrent),
numberOfMpi=min(self.numberOfMpi.get(), 24))
self.writeInfoField(fnDir, "count", emlib.MDL_COUNT, int(1))
[docs] def prepareReferences(self, fnDirPrevious, fnDir, TsCurrent, targetResolution):
if self.checkInfoField(fnDir, "count"):
state = self.readInfoField(fnDir, "count", emlib.MDL_COUNT)
if state >= 2:
return
print("Preparing references to sampling rate=", TsCurrent)
newXDim = self.readInfoField(fnDir, "size", emlib.MDL_XSIZE)
oldXdim = self.readInfoField(fnDirPrevious, "size", emlib.MDL_XSIZE)
fnPreviousVol = join(fnDirPrevious, "volume.vol")
fnReferenceVol = join(fnDir, "volumeRef.vol")
if oldXdim != newXDim:
self.runJob("xmipp_image_resize", "-i %s -o %s --dim %d" % (fnPreviousVol, fnReferenceVol, newXDim), numberOfMpi=1)
else:
copyFile(fnPreviousVol, fnReferenceVol)
self.nextLowPass = True
if self.nextLowPass:
self.nextResolutionOffset = 2 # Resolution offset (A)
self.runJob('xmipp_transform_filter', '-i %s --fourier low_pass %f --sampling %f' % \
(fnReferenceVol, targetResolution + self.nextResolutionOffset, TsCurrent), numberOfMpi=1)
self.nextSpherical = True
if self.nextSpherical:
R = self.inputParticles.get().getDimensions()[0] / 2
self.runJob('xmipp_transform_mask', '-i %s --mask circular -%d' % \
(fnReferenceVol, round(R * self.TsOrig / TsCurrent)), numberOfMpi=1)
self.nextPositivity = True
if self.nextPositivity:
self.runJob('xmipp_transform_threshold', '-i %s --select below 0 --substitute value 0' % fnReferenceVol, numberOfMpi=1)
self.writeInfoField(fnDir, "count", emlib.MDL_COUNT, int(2))
[docs] def cleanDirectory(self):
fnDirCurrent = self._getExtraPath("Iter1")
if self.saveSpace:
fnGlobal = fnDirCurrent
if exists(fnGlobal):
cleanPath(join(fnGlobal, "images.stk"))
if exists(fnGlobal):
cleanPath(join(fnGlobal, "images.xmd"))
cleanPath(join(fnGlobal, "volumeRef.vol"))
# --------------------------- INFO functions --------------------------------------------
def _validate(self):
validateMsgs = []
# if there are Volume references, it cannot be empty.
if self.inputVolume.get() and not self.inputVolume.hasValue():
validateMsgs.append('Please provide an input reference volume.')
if self.inputParticles.get() and not self.inputParticles.hasValue():
validateMsgs.append('Please provide input particles.')
return validateMsgs
def _summary(self):
summary = [
"Input particles: %s" % self.inputParticles.get().getNameId()]
summary.append("-----------------")
if (not hasattr(self, 'outputParticles')):
summary.append("Output not ready yet.")
else:
if self.percentage == 100:
text = 'After validation, all of the images are likely to be within the realiable assignment zone'
else:
text = 'After validation, a %.2f' % self.percentage
text += r'% of the images are likely to be within the reliable assignment zone'
summary.append(text)
return summary
def _methods(self):
messages = []
if (hasattr(self, 'outputParticles')):
messages.append('correlation values of previous alignment process'
' are modified according to the spherical distance from'
' the assigned direction to a soft and high-valued correlation zone'
' of neighboring projection directions')
return messages
[docs] def otsu(self, ccList):
# from golden-highres
import numpy as np
ccNumber = len(ccList)
meanWeight = 1.0 / ccNumber
his, bins = np.histogram(ccList, int((max(ccList)-min(ccList))/0.01))
finalThres = -1
finalVal = -1
ccArange = np.arange(min(ccList),max(ccList),0.01)
if len(ccArange)==(len(his)+1):
ccArange=ccArange[:-1]
for t, j in enumerate(bins[1:-1]):
idx = t+1
pcb = np.sum(his[:idx])
pcf = np.sum(his[idx:])
Wb = pcb * meanWeight
Wf = pcf * meanWeight
mub = np.sum(ccArange[:idx] * his[:idx]) / float(pcb)
muf = np.sum(ccArange[idx:] * his[idx:]) / float(pcf)
value = Wb * Wf * (mub - muf) ** 2
if value > finalVal:
finalThres = bins[idx]
finalVal = value
return finalThres