Source code for xmipp3.protocols.protocol_particle_pick_automatic

# **************************************************************************
# *
# * Authors:     Jose Gutierrez Tabuenca (jose.gutierrez@cnb.csic.es)
# *              Laura del Cano (laura.cano@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
from os.path import exists, basename, join

from pyworkflow.protocol.params import STEPS_PARALLEL, PointerParam, EnumParam, FileParam
from pyworkflow.utils.path import *

from pwem.protocols import ProtParticlePickingAuto

from pwem import emlib
from xmipp3.base import XmippProtocol
from xmipp3.convert import readSetOfCoordinates
from pyworkflow import BETA, UPDATED, NEW, PROD


MICS_SAMEASPICKING = 0
MICS_OTHER = 1
SRC_MANUAL_PICKING = 0
SRC_DIR = 1


[docs]class XmippParticlePickingAutomatic(ProtParticlePickingAuto, XmippProtocol): """Automatically picks particles from a set of micrographs using a previously trained model. This protocol speeds up particle selection by identifying particles consistently without manual intervention, improving throughput. AI Generated ## Overview The Auto-picking protocol automatically picks particles from micrographs using a model previously trained during an Xmipp supervised particle-picking session. This protocol is the automatic continuation of the manual or supervised picking workflow. In the first step, the user provides examples of particles and possibly non-particles. Xmipp uses that information to train a picking model. In this second step, the trained model is applied automatically to micrographs to produce particle coordinates without further manual selection. Auto-picking is useful when many micrographs need to be processed consistently. It can greatly reduce the amount of manual work, while still using the particle appearance learned from expert-guided picking. The main output is a set of coordinates that can be used by particle extraction protocols. ## Inputs and General Workflow The protocol requires a trained Xmipp picking model. This model can come from a previous manual-picking run in the same Scipion project, or from an external directory containing the required model files. The protocol copies or links the model files into the current run and reads the particle size from the picking configuration. The user then chooses which micrographs should be picked. The protocol can apply the model to the same micrographs used during supervised picking, or to a different but compatible set of micrographs. For each micrograph, the protocol runs the Xmipp automatic picking program and writes coordinate files. These files are then read into a Scipion SetOfCoordinates. ## Model Source The **Model source** parameter defines where the trained picker model comes from. There are two options: **Manual picking in this project** uses the model generated by a previous Xmipp manual or supervised picking protocol in the same Scipion project. **External directory** uses a trained model stored outside the current protocol run. This is useful when the same type of molecule is processed repeatedly and a previously trained picker should be reused. In both cases, the model directory must contain the required Xmipp picking files, including the model files, configuration file, and templates. ## Previous Xmipp Particle Picking Run When the model source is **Manual picking in this project**, the user must select the previous Xmipp particle-picking run. This previous run provides the trained model and configuration. It must have generated coordinates and must have been trained by using auto-picking for at least one micrograph. The protocol checks that the necessary model files exist. If they are missing, it reports that the input picking run has not been trained. This option is the standard workflow when the user has just completed supervised picking in the same project. ## External Model Directory When the model source is **External directory**, the user must provide a directory containing the trained picking model. This directory should contain the files needed by Xmipp automatic picking, including the model files, configuration file, and templates. This option is useful for repeated processing of the same specimen, the same type of particle, or very similar acquisition conditions. It allows users to reuse a trained picker without repeating the manual training step inside the current project. When using an external model directory, the protocol cannot use the option “Same as supervised” for micrographs, because there is no supervised picking run in the current project defining that micrograph set. In that case, the user should select another set of micrographs explicitly. ## Micrographs to Pick The **Micrographs to pick** parameter controls which micrographs will be processed automatically. If **Same as supervised** is selected, the protocol uses the same micrographs that were used in the previous supervised picking run. This can be useful to complete or reproduce automatic picking on the original training dataset. If **Other** is selected, the user provides a different set of micrographs. This is the usual option when the trained picker is applied to the rest of a dataset after training on a representative subset. The new micrographs should come from the same specimen and should have compatible imaging conditions. If they differ strongly in pixel size, contrast, defocus, acquisition settings, or specimen appearance, the trained model may not generalize well. ## Reusing Manual Coordinates When picking the same micrographs used during supervised picking, the protocol can reuse existing manual coordinate files when appropriate. If a micrograph already has a coordinate file from the previous picking run, the protocol may copy that file instead of running automatic picking again. This preserves manual work already performed by the user. If no suitable coordinate file is available, or if the micrograph still needs automatic selection, the protocol runs the automatic picker. This behavior helps combine supervised manual input with automatic completion. ## Particle Size The particle size is read from the configuration file of the trained picking model. This value comes from the supervised picking session and is passed to the automatic picking program. It defines the expected particle size used when searching for particles in the micrographs. It is important that the micrographs to be picked have the same sampling rate as the micrographs used for training. Otherwise, the same particle size in pixels would correspond to a different physical size, and the picker may fail or select incorrect features. ## Compatibility of New Micrographs When the trained picker is applied to another micrograph set, the new micrographs should be compatible with the training micrographs. The protocol checks, when possible, that the sampling rate and acquisition parameters match those of the supervised picking run. This is important because the learned model depends on the appearance, scale, and imaging conditions of the particles. A picker trained on one dataset may not work well on another dataset if the particle size, contrast, defocus range, detector settings, or specimen preparation differ substantially. For best results, train the picker on micrographs representative of the dataset to be automatically picked. ## Streaming Behavior The protocol includes streaming support through the standard automatic-picking framework. This means that automatic picking can be applied as new micrographs become available, which is useful in online or facility workflows. In a streaming workflow, the trained model can be used to pick incoming micrographs automatically after the supervised training step has been completed. ## Output Coordinates The main output is a **SetOfCoordinates**. Each coordinate corresponds to a particle position automatically selected by the trained Xmipp picker. The coordinate set is linked to the micrographs that were picked and stores the particle box size inherited from the model configuration. These coordinates can be passed directly to particle extraction protocols. As with any automatic picking result, users should inspect a subset of the coordinates before extraction. Automatic pickers can select contaminants, carbon, ice features, aggregates, or false positives if the model is not well trained or if the micrographs differ from the training examples. ## Practical Recommendations Use this protocol after completing a supervised Xmipp picking session with a representative set of micrographs. Train the picker on micrographs that cover the variability of the dataset: different defocus values, ice thicknesses, particle densities, and image qualities. Use **Other** micrographs when applying the trained picker to the rest of the dataset. Reuse an external model only when the new data are very similar to the data used to train the model. Check that the sampling rate is the same between training and target micrographs. Differences in pixel size can make the learned particle size incorrect. Inspect the automatic picks visually before extracting particles. Even a good trained picker may produce false positives in contaminated or unusual micrographs. If automatic picking performs poorly, return to the supervised picking step, add better examples, and retrain the model. ## Final Perspective Auto-picking is the scalable step of the Xmipp supervised particle-picking workflow. It applies a trained model to select particles automatically and consistently across many micrographs. For biological users, the protocol is valuable because it combines expert supervision with high-throughput processing. The user teaches the picker what the particles look like, and the protocol applies that knowledge to the rest of the dataset. The quality of the output coordinates depends directly on the quality and representativeness of the training examples and on the similarity between the training and target micrographs. """ _label = 'auto-picking (step 2)' _devStatus = PROD filesToCopy = ['model_svm.txt', 'model_pca_model.stk', 'model_rotpca_model.stk', 'model_particle_avg.xmp', 'templates.stk'] def __init__(self, **kwargs): ProtParticlePickingAuto.__init__(self, **kwargs) self.stepsExecutionMode = STEPS_PARALLEL # --------------------------- DEFINE param functions ----------------------- def _defineParams(self, form): form.addSection(label='Input') form.addParam('modelSource', EnumParam, label="Model source", choices=["Manual picking in this project", "External directory"], default=0, help="The files model_* can be copied from a previous protocol execution within this " "project or copied from an external directory. This latter option is useful in" "cases in which the same kind of molecule is processed many times.") form.addParam('xmippParticlePicking', PointerParam, label="Xmipp particle picking run", pointerClass='XmippProtParticlePicking', condition="modelSource==%d"%SRC_MANUAL_PICKING, #pointerCondition='isFinished', help='Select the previous xmipp particle picking run.') form.addParam('xmippParticlePickingDir', FileParam, label="Xmipp particle picking model directory", allowsNull=True, condition="modelSource==%d"%SRC_DIR, #pointerCondition='isFinished', help='The directory must contain the files model_*, config.xmd and templates.stk') form.addParam('micsToPick', EnumParam, choices=['Same as supervised', 'Other'], default=0, label='Micrographs to pick', display=EnumParam.DISPLAY_LIST, help="Select from which set of micrographs to pick using " "the training from supervised run." "If you use Same as supervised, the same set of " "micrographs used for training the picker will be " "used at this point. If you select Other, you can " "select another set of micrograph (normally from " "the same specimen) and pick them completely " "automatic using the trained picker.") form.addParam('inputMicrographs', PointerParam, pointerClass='SetOfMicrographs', condition='micsToPick==%d' % MICS_OTHER, label="Micrographs", help="Select other set of micrographs to pick using the " "trained picker.") self._defineStreamingParams(form) form.addParallelSection(threads=1, mpi=1) # --------------------------- INSERT steps functions ----------------------- def _insertInitialSteps(self): # Get pointer to input micrographs self.particlePickingRun = self.xmippParticlePicking.get() copyId = self._insertFunctionStep('copyInputFilesStep') return [copyId] # --------------------------- STEPS functions ------------------------------
[docs] def getSrcDir(self): if self.modelSource == SRC_MANUAL_PICKING: return self.xmippParticlePicking.get()._getExtraPath() else: return self.xmippParticlePickingDir.get()
[docs] def copyInputFilesStep(self): # Copy training model files to current run srcDir = self.getSrcDir() for f in self.filesToCopy: createLink(os.path.join(srcDir, f), self._getExtraPath(f)) copyFile(os.path.join(srcDir, "config.xmd"), self._getExtraPath("config.xmd")) # Get the box size mdInfo = emlib.MetaData("properties@"+self._getExtraPath("config.xmd")) self.boxSize = mdInfo.getValue(emlib.MDL_PICKING_PARTICLE_SIZE,mdInfo.firstObject()) mdInfo.setValue(emlib.MDL_PICKING_MANUALPARTICLES_SIZE,0,mdInfo.firstObject()) mdInfo.write("properties@"+self._getExtraPath("config.xmd"),emlib.MD_APPEND)
def _pickMicrograph(self, mic, *args): micPath = mic.getFileName() # Get particle picking boxsize from the previous run modelRoot = self._getExtraPath('model') micName = removeBaseExt(micPath) proceed = True if self.micsToPick == MICS_SAMEASPICKING: basePos = replaceBaseExt(micPath, "pos") fnPos = self.particlePickingRun._getExtraPath(basePos) if exists(fnPos): blocks = emlib.getBlocksInMetaDataFile(fnPos) copy = True if 'header' in blocks: mdheader = emlib.MetaData("header@" + fnPos) state = mdheader.getValue(emlib.MDL_PICKING_MICROGRAPH_STATE, mdheader.firstObject()) if state == "Available": copy = False if copy: # Copy manual .pos file of this micrograph copyFile(fnPos, self._getExtraPath(basename(fnPos))) proceed = False if proceed: args = "-i %s " % micPath args += "--particleSize %d " % self.boxSize args += "--model %s " % modelRoot args += "--outputRoot %s " % self._getExtraPath(micName) args += "--mode autoselect --thr %d" % self.numberOfThreads self.runJob("xmipp_micrograph_automatic_picking", args)
[docs] def readSetOfCoordinates(self, workingDir, coordSet): readSetOfCoordinates(workingDir, self.getInputMicrographs(), coordSet)
[docs] def readCoordsFromMics(self, workingDir, micList, coordSet): readSetOfCoordinates(workingDir, micList, coordSet)
# --------------------------- INFO functions ------------------------------- def _validate(self): validateMsgs = [] if self.modelSource == SRC_MANUAL_PICKING and not hasattr(self.xmippParticlePicking.get(),"outputCoordinates"): validateMsgs.append("You need to generate coordinates for the " "supervised picking") srcDir = self.getSrcDir() srcPaths = [os.path.join(srcDir,k) for k in self.filesToCopy] # Check that all needed files exist if missingPaths(*srcPaths): validateMsgs.append('Input picking run has not been trained, ' 'use *Autopick* for at least one micrograph') # If other set of micrographs is provided they should have same # sampling rate and acquisition if self.micsToPick.get() == MICS_OTHER and self.modelSource != SRC_DIR: inputMics = self.inputMicrographs.get() manualMics = self.xmippParticlePicking.get().inputMicrographs.get() # FIXME: manualMics is always None when scheduled... # it should be fixed in the update step at Scipion scheduler app if manualMics is not None: pixsizeInput = inputMics.getSamplingRate() pixsizeMics = manualMics.getSamplingRate() acq = manualMics.getAcquisition() if pixsizeInput != pixsizeMics: validateMsgs.append('New micrographs should have same sampling ' 'rate as the ones already picked.') if not inputMics.getAcquisition().equalAttributes(acq): validateMsgs.append('New micrographs should have same ' 'acquisition parameters as the ones ' 'already picked.') if self.modelSource.get()==SRC_DIR and self.micsToPick.get()==MICS_SAMEASPICKING: validateMsgs.append("You cannot take the model from a directory and indicate that the set of micrograohs " "is the same as picking. If you take the model from a directory, probably you want " "to pick from a different set.") return validateMsgs
[docs] def getSummary(self, coordSet): summary = [] if self.modelSource == SRC_MANUAL_PICKING: summary.append("Previous run: %s" % self.xmippParticlePicking.get().getNameId()) else: summary.append("Model from: %s" % self.xmippParticlePickingDir.get()) return "\n".join(summary)
[docs] def getMethods(self, output): manualPickName = self.xmippParticlePicking.get().getNameId() msg = 'Program picked %d particles ' % output.getSize() msg += 'of size %d ' % output.getBoxSize() msg += 'using training from %s. ' % manualPickName msg += 'For more detail see [Abrishami2013]' return msg
def _citations(self): return ['Abrishami2013'] # --------------------------- UTILS functions ------------------------------
[docs] def getCoordsDir(self): return self._getExtraPath()
[docs] def getInputMicrographsPointer(self): # Get micrographs to pick if self.micsToPick == MICS_SAMEASPICKING: inputPicking = self.xmippParticlePicking.get() return inputPicking.inputMicrographs if inputPicking else None else: return self.inputMicrographs
[docs] def getInputMicrographs(self): """ Return the input micrographs that can be the same of the supervised picking or other ones selected by the user. (This can be used to pick a new set of micrographs with the same properties than a previous trained ones. ) """ return self.getInputMicrographsPointer().get() if self.getInputMicrographsPointer() else None