Source code for xmipp3.protocols.protocol_center_particles

# ******************************************************************************
# *
# * Authors:     Amaya Jimenez Moreno (ajimenez@cnb.csic.es)
# *              Alberto García Mena (alberto.garcia@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# ******************************************************************************

import numpy as np
import os
from pyworkflow import VERSION_2_0
from pwem.constants import ALIGN_2D
from pwem.objects import Class2D, Particle, Coordinate, Transform, SetOfClasses2D, SetOfParticles
from pwem.protocols import ProtClassify2D
import pwem.emlib.metadata as md
import pyworkflow.protocol.params as params

from pwem.emlib import MD_APPEND
from xmipp3.convert import (rowToAlignment, alignmentToRow,
                            rowToParticle, writeSetOfClasses2D, xmippToLocation)

OUTPUT_CLASSES = 'outputClasses'
OUTPUT_PARTICLES = 'outputParticles'

[docs]class XmippProtCenterParticles(ProtClassify2D): """ Recenters particles that are initially un-centered by computing alignment shifts relative to a reference point. Proper centering enhances the accuracy of subsequent classification and refinement steps. AI Generated: What this protocol is for Center particles is a corrective protocol aimed at a very common practical problem in cryo-EM workflows: particles that are systematically off-center after picking/extraction or after an early alignment/classification step. Even small centering errors can reduce the quality of 2D class averages, bias alignment, and ultimately harm 3D refinement—especially when particles are small, have weak signal, or when you are trying to push resolution. This protocol takes an existing SetOfClasses2D (i.e., 2D classes with their member particles) and recenters the particles by computing and applying a consistent centering transform derived from the class information. The outcome is a new set of 2D classes and a new particle set in which the particles have been repositioned so that their centers are better aligned, improving downstream classification and refinement stability. A biological way to think about it is: you already have a meaningful set of class averages, but the particles contributing to them are not properly centered; this protocol uses the class centering information to “pull” particles toward the correct center, producing cleaner averages and better-behaved alignment in later steps. What you need to provide You provide two inputs: First, Input Classes: a SetOfClasses2D. This should be a set of classes that already contains the particle members and their current 2D alignment parameters. In practice, it is usually the output of a 2D classification step (or any step that produces classes with assigned shifts/angles for their particles). Second, a Set of micrographs associated with those classes. This is required because the protocol also updates particle coordinates consistently with the new centering, and those coordinates are defined relative to the micrographs. Conceptually, this input tells Scipion how the particles relate back to the original images once their centering offsets are adjusted. What the protocol does, in practical terms The protocol first computes a centering transform for each class representative (the class average). Using those class-level centering transforms, it then propagates the correction to all particles in each class by updating their 2D alignment parameters and adjusting their coordinates accordingly. The protocol is careful to keep the particle identity and class membership consistent, but it modifies the metadata so that the particle is effectively re-expressed as “more centered”. Importantly, this is not “another 2D classification”; it is a centering and bookkeeping step that uses the class-derived shifts to correct particle placement. You typically run it when you have evidence that the dataset is systematically shifted—e.g., class averages look sharp but consistently off-center, or you see that the highest-density region is not around the box center. Outputs: what you get and how to use them The protocol produces two outputs: outputClasses: a new SetOfClasses2D whose class representatives correspond to the centered results, and whose class members carry the updated 2D alignment metadata. This is the natural output to inspect if you want to see whether class averages look better centered and whether the classes are more interpretable. outputParticles: a new SetOfParticles containing the recentered particles. This is the output you would typically feed into subsequent steps such as another round of 2D classification, 3D initial model workflows, or refinement pipelines, because many downstream algorithms behave better when particles are well centered. A key point for biological processing is that the protocol gives you both a class-level and a particle-level product. If your next step is another 2D classification, you would normally use outputParticles. If you want to visually validate the effect quickly, you look at outputClasses and confirm that the averages are now centered. How to interpret the summary information After running, the protocol writes a short summary that includes how many particles required meaningful displacement and an estimate of the average displacement magnitude (in pixels). Biologically, this helps you understand whether you are correcting a subtle nuisance (small average shifts) or a major systematic problem (large shifts affecting a significant fraction of particles). If a large percentage of particles needed strong displacement, it may also indicate upstream issues such as mis-centered picking coordinates, extraction offsets, or a mismatch between picking and extraction box definitions. When this protocol is most useful (typical scenarios) This protocol is especially useful when: Your particles were extracted with a box center that does not coincide with the true particle center (common when using coarse picking or when the coordinate reference is inconsistent between programs). You have decent 2D classes but the averages are consistently displaced from the box center, suggesting systematic centering bias. You want to “clean up” centering before starting 3D, because poor centering can make initial model generation unstable and can worsen angular assignment. It is also often helpful as a stabilizing step between early 2D classification rounds: run 2D classification, center particles using this protocol, then run a second 2D classification that benefits from better-centered inputs. What this protocol does not replace Centering improves the geometry of the dataset, but it does not solve problems such as wrong picks, severe heterogeneity, strong contamination, or poor CTF correction. If class averages are bad because the particles are not the same object or are too noisy, centering will not “fix” that. Its role is more specific: it improves consistency by making sure that the object of interest sits near the box center across particles. Practical advice for biological users A simple way to decide whether to use this protocol is to visually inspect 2D class averages: if many good-looking classes have their main density displaced from the center, centering is likely worth doing. After running, inspect the new class averages and, if they look more consistently centered, continue downstream with outputParticles to benefit from improved alignment behavior. """ _label = 'center particles' _lastUpdateVersion = VERSION_2_0 _possibleOutputs = {OUTPUT_CLASSES:SetOfClasses2D, OUTPUT_PARTICLES:SetOfParticles} # --------------------------- DEFINE param functions ----------------------- def _defineParams(self, form): form.addSection(label='Input') form.addParam('inputClasses', params.PointerParam, pointerClass='SetOfClasses2D', important=True, label="Input Classes", help='Set of classes to be read') form.addParam('inputMics', params.PointerParam, pointerClass='SetOfMicrographs', important=True, label="Set of micrographs", help='Set of micrographs related to the selected input ' 'classes') form.addParallelSection(threads=0, mpi=0) # --------------------------- INSERT steps functions ----------------------- def _insertAllSteps(self): self._insertFunctionStep('realignStep') self._insertFunctionStep('createOutputStep') # --------------------------- STEPS functions --------------------------
[docs] def realignStep(self): inputMdName = self._getExtraPath('inputClasses.xmd') writeSetOfClasses2D(self.inputClasses.get(), inputMdName, writeParticles=True) centeredStackName = self._getExtraPath('centeredStack.stk') self._params = {'input': inputMdName, 'output': centeredStackName} args = ('-i %(input)s -o %(output)s --save_metadata_transform') self.runJob("xmipp_transform_center_image", args % self._params, numberOfMpi=1) centeredMdName = centeredStackName.replace('stk', 'xmd') centeredMd = md.MetaData(centeredMdName) centeredStack = md.MetaData(centeredStackName) listName = [] listTransform = [] for rowStk in md.iterRows(centeredStack): listName.append(rowStk.getValue(md.MDL_IMAGE)) for rowMd in md.iterRows(centeredMd): listTransform.append(rowToAlignment(rowMd, ALIGN_2D)) mdNewClasses = md.MetaData() for i, row in enumerate(md.iterRows(inputMdName)): newRow = md.Row() newRow.setValue(md.MDL_IMAGE, listName[i]) refNum = row.getValue(md.MDL_REF) newRow.setValue(md.MDL_REF, refNum) classCount = row.getValue(md.MDL_CLASS_COUNT) newRow.setValue(md.MDL_CLASS_COUNT, classCount) newRow.addToMd(mdNewClasses) mdNewClasses.write('classes@' + self._getExtraPath('final_classes.xmd'), MD_APPEND) mdImages = md.MetaData() i = 0 mdBlocks = md.getBlocksInMetaDataFile(inputMdName) resultMat = Transform() listDisplacament = [] centerSummary = self._getPath("summary.txt") centerSummary = open(centerSummary, "w") particlesCentered = 0 totalParticles = 0 for block in mdBlocks: if block.startswith('class00'): newMat = listTransform[i] newMatrix = newMat.getMatrix() mdClass = md.MetaData(block + "@" + inputMdName) mdNewClass = md.MetaData() i += 1 flag_psi = True for rowIn in md.iterRows(mdClass): #To create the transformation matrix (and its parameters) # for the realigned particles totalParticles += 1 if rowIn.getValue(md.MDL_ANGLE_PSI)!=0: flag_psi=True if rowIn.getValue(md.MDL_ANGLE_ROT)!=0: flag_psi=False inMat = rowToAlignment(rowIn, ALIGN_2D) inMatrix = inMat.getMatrix() resultMatrix = np.dot(newMatrix,inMatrix) resultMat.setMatrix(resultMatrix) rowOut=md.Row() rowOut.copyFromRow(rowIn) alignmentToRow(resultMat, rowOut, ALIGN_2D) if flag_psi==False: newAngle = rowOut.getValue(md.MDL_ANGLE_PSI) rowOut.setValue(md.MDL_ANGLE_PSI, 0.) rowOut.setValue(md.MDL_ANGLE_ROT, newAngle) #To create the new coordinates for the realigned particles inPoint = np.array([[0.],[0.],[0.],[1.]]) invResultMat = np.linalg.inv(resultMatrix) centerPoint = np.dot(invResultMat,inPoint) if int(centerPoint[0]) > 1 or int(centerPoint[1]) > 1: particlesCentered += 1 listDisplacament.append([int(centerPoint[0]), int(centerPoint[1])]) rowOut.setValue(md.MDL_XCOOR, rowOut.getValue( md.MDL_XCOOR)+int(centerPoint[0])) rowOut.setValue(md.MDL_YCOOR, rowOut.getValue( md.MDL_YCOOR)+int(centerPoint[1])) rowOut.addToMd(mdNewClass) mdNewClass.write(block + "@" + self._getExtraPath( 'final_classes.xmd'), MD_APPEND) mdImages.unionAll(mdNewClass) listModule = [(np.sqrt((x[0] * x[0]) + (x[1] * x[1]))) for x in listDisplacament] moduleDisp = round(sum(listModule) / len(listModule), 1) centerSummary.write("Particles centered: {} \t({}%) " "\nAverage module displacement: {}px\n". format(particlesCentered, round((100 * particlesCentered/totalParticles ), 1), moduleDisp)) centerSummary.close() mdImages.write(self._getExtraPath('final_images.xmd'))
[docs] def createOutputStep(self): inputParticles = self.inputClasses.get().getImages() outputClasses = self._createSetOfClasses2D(self.inputClasses.get().getImagesPointer()) self._fillClasses(outputClasses) outputParticles = self._createSetOfParticles() outputParticles.copyInfo(inputParticles) self._fillParticles(outputParticles) result = {OUTPUT_CLASSES: outputClasses, OUTPUT_PARTICLES: outputParticles} self._defineOutputs(**result) self._defineSourceRelation(self.inputClasses, outputClasses) self._defineSourceRelation(self.inputClasses, outputParticles)
# --------------------------- UTILS functions ------------------------------ def _fillClasses(self, outputClasses): """ Create the SetOfClasses2D """ inputSet = self.inputClasses.get().getImages() myRep = md.MetaData('classes@' + self._getExtraPath( 'final_classes.xmd')) for row in md.iterRows(myRep): fn = row.getValue(md.MDL_IMAGE) rep = Particle() rep.setLocation(xmippToLocation(fn)) repId = row.getObjId() newClass = Class2D(objId=repId) newClass.setAlignment2D() newClass.copyInfo(inputSet) newClass.setAcquisition(inputSet.getAcquisition()) newClass.setRepresentative(rep) outputClasses.append(newClass) i = 1 mdBlocks = md.getBlocksInMetaDataFile(self._getExtraPath( 'final_classes.xmd')) for block in mdBlocks: if block.startswith('class00'): mdClass = md.MetaData(block + "@" + self._getExtraPath( 'final_classes.xmd')) imgClassId = i newClass = outputClasses[imgClassId] newClass.enableAppend() for row in md.iterRows(mdClass): part = rowToParticle(row) newClass.append(part) i += 1 newClass.setAlignment2D() outputClasses.update(newClass) def _fillParticles(self, outputParticles): """ Create the SetOfParticles""" myParticles = md.MetaData(self._getExtraPath('final_images.xmd')) outputParticles.enableAppend() for row in md.iterRows(myParticles): #To create the new particle p = rowToParticle(row) outputParticles.append(p) # --------------------------- INFO functions ------------------------------- def _summary(self): summary = [] summary.append("Realignment of %s classes." % self.inputClasses.get().getSize()) centerSummary = self._getPath("summary.txt") if not os.path.exists(centerSummary): summary.append("No summary file yet.") else: centerSummary = open(centerSummary, "r") for line in centerSummary.readlines(): summary.append(line.rstrip()) centerSummary.close() return summary def _validate(self): errors = [] try: self.inputClasses.get().getImages() except AttributeError: errors.append('Try and catch InputClasses has no particles.') return errors