# **************************************************************************
# *
# * Authors: Jose Gutierrez Tabuenca (jose.gutierrez@cnb.csic.es)
# * Laura del Cano (laura.cano@cnb.csic.es)
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
from os.path import exists, basename, join
from pyworkflow.protocol.params import STEPS_PARALLEL, PointerParam, EnumParam, FileParam
from pyworkflow.utils.path import *
from pwem.protocols import ProtParticlePickingAuto
from pwem import emlib
from xmipp3.base import XmippProtocol
from xmipp3.convert import readSetOfCoordinates
from pyworkflow import BETA, UPDATED, NEW, PROD
MICS_SAMEASPICKING = 0
MICS_OTHER = 1
SRC_MANUAL_PICKING = 0
SRC_DIR = 1
[docs]class XmippParticlePickingAutomatic(ProtParticlePickingAuto, XmippProtocol):
"""Automatically picks particles from a set of micrographs using a
previously trained model. This protocol speeds up particle selection by
identifying particles consistently without manual intervention, improving
throughput.
AI Generated
## Overview
The Auto-picking protocol automatically picks particles from micrographs using
a model previously trained during an Xmipp supervised particle-picking session.
This protocol is the automatic continuation of the manual or supervised picking
workflow. In the first step, the user provides examples of particles and
possibly non-particles. Xmipp uses that information to train a picking model.
In this second step, the trained model is applied automatically to micrographs
to produce particle coordinates without further manual selection.
Auto-picking is useful when many micrographs need to be processed consistently.
It can greatly reduce the amount of manual work, while still using the
particle appearance learned from expert-guided picking.
The main output is a set of coordinates that can be used by particle extraction
protocols.
## Inputs and General Workflow
The protocol requires a trained Xmipp picking model.
This model can come from a previous manual-picking run in the same Scipion
project, or from an external directory containing the required model files. The
protocol copies or links the model files into the current run and reads the
particle size from the picking configuration.
The user then chooses which micrographs should be picked. The protocol can
apply the model to the same micrographs used during supervised picking, or to a
different but compatible set of micrographs.
For each micrograph, the protocol runs the Xmipp automatic picking program and
writes coordinate files. These files are then read into a Scipion
SetOfCoordinates.
## Model Source
The **Model source** parameter defines where the trained picker model comes
from.
There are two options:
**Manual picking in this project** uses the model generated by a previous Xmipp
manual or supervised picking protocol in the same Scipion project.
**External directory** uses a trained model stored outside the current protocol
run. This is useful when the same type of molecule is processed repeatedly and
a previously trained picker should be reused.
In both cases, the model directory must contain the required Xmipp picking
files, including the model files, configuration file, and templates.
## Previous Xmipp Particle Picking Run
When the model source is **Manual picking in this project**, the user must
select the previous Xmipp particle-picking run.
This previous run provides the trained model and configuration. It must have
generated coordinates and must have been trained by using auto-picking for at
least one micrograph.
The protocol checks that the necessary model files exist. If they are missing,
it reports that the input picking run has not been trained.
This option is the standard workflow when the user has just completed
supervised picking in the same project.
## External Model Directory
When the model source is **External directory**, the user must provide a
directory containing the trained picking model.
This directory should contain the files needed by Xmipp automatic picking,
including the model files, configuration file, and templates.
This option is useful for repeated processing of the same specimen, the same
type of particle, or very similar acquisition conditions. It allows users to
reuse a trained picker without repeating the manual training step inside the
current project.
When using an external model directory, the protocol cannot use the option
“Same as supervised” for micrographs, because there is no supervised picking
run in the current project defining that micrograph set. In that case, the user
should select another set of micrographs explicitly.
## Micrographs to Pick
The **Micrographs to pick** parameter controls which micrographs will be
processed automatically.
If **Same as supervised** is selected, the protocol uses the same micrographs
that were used in the previous supervised picking run. This can be useful to
complete or reproduce automatic picking on the original training dataset.
If **Other** is selected, the user provides a different set of micrographs.
This is the usual option when the trained picker is applied to the rest of a
dataset after training on a representative subset.
The new micrographs should come from the same specimen and should have
compatible imaging conditions. If they differ strongly in pixel size, contrast,
defocus, acquisition settings, or specimen appearance, the trained model may
not generalize well.
## Reusing Manual Coordinates
When picking the same micrographs used during supervised picking, the protocol
can reuse existing manual coordinate files when appropriate.
If a micrograph already has a coordinate file from the previous picking run,
the protocol may copy that file instead of running automatic picking again.
This preserves manual work already performed by the user.
If no suitable coordinate file is available, or if the micrograph still needs
automatic selection, the protocol runs the automatic picker.
This behavior helps combine supervised manual input with automatic completion.
## Particle Size
The particle size is read from the configuration file of the trained picking
model.
This value comes from the supervised picking session and is passed to the
automatic picking program. It defines the expected particle size used when
searching for particles in the micrographs.
It is important that the micrographs to be picked have the same sampling rate
as the micrographs used for training. Otherwise, the same particle size in
pixels would correspond to a different physical size, and the picker may fail
or select incorrect features.
## Compatibility of New Micrographs
When the trained picker is applied to another micrograph set, the new
micrographs should be compatible with the training micrographs.
The protocol checks, when possible, that the sampling rate and acquisition
parameters match those of the supervised picking run. This is important because
the learned model depends on the appearance, scale, and imaging conditions of
the particles.
A picker trained on one dataset may not work well on another dataset if the
particle size, contrast, defocus range, detector settings, or specimen
preparation differ substantially.
For best results, train the picker on micrographs representative of the dataset
to be automatically picked.
## Streaming Behavior
The protocol includes streaming support through the standard automatic-picking
framework.
This means that automatic picking can be applied as new micrographs become
available, which is useful in online or facility workflows.
In a streaming workflow, the trained model can be used to pick incoming
micrographs automatically after the supervised training step has been
completed.
## Output Coordinates
The main output is a **SetOfCoordinates**.
Each coordinate corresponds to a particle position automatically selected by
the trained Xmipp picker. The coordinate set is linked to the micrographs that
were picked and stores the particle box size inherited from the model
configuration.
These coordinates can be passed directly to particle extraction protocols.
As with any automatic picking result, users should inspect a subset of the
coordinates before extraction. Automatic pickers can select contaminants,
carbon, ice features, aggregates, or false positives if the model is not well
trained or if the micrographs differ from the training examples.
## Practical Recommendations
Use this protocol after completing a supervised Xmipp picking session with a
representative set of micrographs.
Train the picker on micrographs that cover the variability of the dataset:
different defocus values, ice thicknesses, particle densities, and image
qualities.
Use **Other** micrographs when applying the trained picker to the rest of the
dataset.
Reuse an external model only when the new data are very similar to the data
used to train the model.
Check that the sampling rate is the same between training and target
micrographs. Differences in pixel size can make the learned particle size
incorrect.
Inspect the automatic picks visually before extracting particles. Even a good
trained picker may produce false positives in contaminated or unusual
micrographs.
If automatic picking performs poorly, return to the supervised picking step,
add better examples, and retrain the model.
## Final Perspective
Auto-picking is the scalable step of the Xmipp supervised particle-picking
workflow. It applies a trained model to select particles automatically and
consistently across many micrographs.
For biological users, the protocol is valuable because it combines expert
supervision with high-throughput processing. The user teaches the picker what
the particles look like, and the protocol applies that knowledge to the rest of
the dataset.
The quality of the output coordinates depends directly on the quality and
representativeness of the training examples and on the similarity between the
training and target micrographs.
"""
_label = 'auto-picking (step 2)'
_devStatus = PROD
filesToCopy = ['model_svm.txt', 'model_pca_model.stk', 'model_rotpca_model.stk',
'model_particle_avg.xmp', 'templates.stk']
def __init__(self, **kwargs):
ProtParticlePickingAuto.__init__(self, **kwargs)
self.stepsExecutionMode = STEPS_PARALLEL
# --------------------------- DEFINE param functions -----------------------
def _defineParams(self, form):
form.addSection(label='Input')
form.addParam('modelSource', EnumParam, label="Model source",
choices=["Manual picking in this project", "External directory"],
default=0, help="The files model_* can be copied from a previous protocol execution within this "
"project or copied from an external directory. This latter option is useful in"
"cases in which the same kind of molecule is processed many times.")
form.addParam('xmippParticlePicking', PointerParam,
label="Xmipp particle picking run",
pointerClass='XmippProtParticlePicking',
condition="modelSource==%d"%SRC_MANUAL_PICKING,
#pointerCondition='isFinished',
help='Select the previous xmipp particle picking run.')
form.addParam('xmippParticlePickingDir', FileParam,
label="Xmipp particle picking model directory",
allowsNull=True,
condition="modelSource==%d"%SRC_DIR,
#pointerCondition='isFinished',
help='The directory must contain the files model_*, config.xmd and templates.stk')
form.addParam('micsToPick', EnumParam,
choices=['Same as supervised', 'Other'],
default=0, label='Micrographs to pick',
display=EnumParam.DISPLAY_LIST,
help="Select from which set of micrographs to pick using "
"the training from supervised run."
"If you use Same as supervised, the same set of "
"micrographs used for training the picker will be "
"used at this point. If you select Other, you can "
"select another set of micrograph (normally from "
"the same specimen) and pick them completely "
"automatic using the trained picker.")
form.addParam('inputMicrographs', PointerParam,
pointerClass='SetOfMicrographs',
condition='micsToPick==%d' % MICS_OTHER,
label="Micrographs",
help="Select other set of micrographs to pick using the "
"trained picker.")
self._defineStreamingParams(form)
form.addParallelSection(threads=1, mpi=1)
# --------------------------- INSERT steps functions -----------------------
def _insertInitialSteps(self):
# Get pointer to input micrographs
self.particlePickingRun = self.xmippParticlePicking.get()
copyId = self._insertFunctionStep('copyInputFilesStep')
return [copyId]
# --------------------------- STEPS functions ------------------------------
[docs] def getSrcDir(self):
if self.modelSource == SRC_MANUAL_PICKING:
return self.xmippParticlePicking.get()._getExtraPath()
else:
return self.xmippParticlePickingDir.get()
[docs] def copyInputFilesStep(self):
# Copy training model files to current run
srcDir = self.getSrcDir()
for f in self.filesToCopy:
createLink(os.path.join(srcDir, f), self._getExtraPath(f))
copyFile(os.path.join(srcDir, "config.xmd"), self._getExtraPath("config.xmd"))
# Get the box size
mdInfo = emlib.MetaData("properties@"+self._getExtraPath("config.xmd"))
self.boxSize = mdInfo.getValue(emlib.MDL_PICKING_PARTICLE_SIZE,mdInfo.firstObject())
mdInfo.setValue(emlib.MDL_PICKING_MANUALPARTICLES_SIZE,0,mdInfo.firstObject())
mdInfo.write("properties@"+self._getExtraPath("config.xmd"),emlib.MD_APPEND)
def _pickMicrograph(self, mic, *args):
micPath = mic.getFileName()
# Get particle picking boxsize from the previous run
modelRoot = self._getExtraPath('model')
micName = removeBaseExt(micPath)
proceed = True
if self.micsToPick == MICS_SAMEASPICKING:
basePos = replaceBaseExt(micPath, "pos")
fnPos = self.particlePickingRun._getExtraPath(basePos)
if exists(fnPos):
blocks = emlib.getBlocksInMetaDataFile(fnPos)
copy = True
if 'header' in blocks:
mdheader = emlib.MetaData("header@" + fnPos)
state = mdheader.getValue(emlib.MDL_PICKING_MICROGRAPH_STATE,
mdheader.firstObject())
if state == "Available":
copy = False
if copy:
# Copy manual .pos file of this micrograph
copyFile(fnPos, self._getExtraPath(basename(fnPos)))
proceed = False
if proceed:
args = "-i %s " % micPath
args += "--particleSize %d " % self.boxSize
args += "--model %s " % modelRoot
args += "--outputRoot %s " % self._getExtraPath(micName)
args += "--mode autoselect --thr %d" % self.numberOfThreads
self.runJob("xmipp_micrograph_automatic_picking", args)
[docs] def readSetOfCoordinates(self, workingDir, coordSet):
readSetOfCoordinates(workingDir, self.getInputMicrographs(), coordSet)
[docs] def readCoordsFromMics(self, workingDir, micList, coordSet):
readSetOfCoordinates(workingDir, micList, coordSet)
# --------------------------- INFO functions -------------------------------
def _validate(self):
validateMsgs = []
if self.modelSource == SRC_MANUAL_PICKING and not hasattr(self.xmippParticlePicking.get(),"outputCoordinates"):
validateMsgs.append("You need to generate coordinates for the "
"supervised picking")
srcDir = self.getSrcDir()
srcPaths = [os.path.join(srcDir,k) for k in self.filesToCopy]
# Check that all needed files exist
if missingPaths(*srcPaths):
validateMsgs.append('Input picking run has not been trained, '
'use *Autopick* for at least one micrograph')
# If other set of micrographs is provided they should have same
# sampling rate and acquisition
if self.micsToPick.get() == MICS_OTHER and self.modelSource != SRC_DIR:
inputMics = self.inputMicrographs.get()
manualMics = self.xmippParticlePicking.get().inputMicrographs.get()
# FIXME: manualMics is always None when scheduled...
# it should be fixed in the update step at Scipion scheduler app
if manualMics is not None:
pixsizeInput = inputMics.getSamplingRate()
pixsizeMics = manualMics.getSamplingRate()
acq = manualMics.getAcquisition()
if pixsizeInput != pixsizeMics:
validateMsgs.append('New micrographs should have same sampling '
'rate as the ones already picked.')
if not inputMics.getAcquisition().equalAttributes(acq):
validateMsgs.append('New micrographs should have same '
'acquisition parameters as the ones '
'already picked.')
if self.modelSource.get()==SRC_DIR and self.micsToPick.get()==MICS_SAMEASPICKING:
validateMsgs.append("You cannot take the model from a directory and indicate that the set of micrograohs "
"is the same as picking. If you take the model from a directory, probably you want "
"to pick from a different set.")
return validateMsgs
[docs] def getSummary(self, coordSet):
summary = []
if self.modelSource == SRC_MANUAL_PICKING:
summary.append("Previous run: %s" %
self.xmippParticlePicking.get().getNameId())
else:
summary.append("Model from: %s" %
self.xmippParticlePickingDir.get())
return "\n".join(summary)
[docs] def getMethods(self, output):
manualPickName = self.xmippParticlePicking.get().getNameId()
msg = 'Program picked %d particles ' % output.getSize()
msg += 'of size %d ' % output.getBoxSize()
msg += 'using training from %s. ' % manualPickName
msg += 'For more detail see [Abrishami2013]'
return msg
def _citations(self):
return ['Abrishami2013']
# --------------------------- UTILS functions ------------------------------
[docs] def getCoordsDir(self):
return self._getExtraPath()
[docs] def getInputMicrographsPointer(self):
# Get micrographs to pick
if self.micsToPick == MICS_SAMEASPICKING:
inputPicking = self.xmippParticlePicking.get()
return inputPicking.inputMicrographs if inputPicking else None
else:
return self.inputMicrographs
[docs] def getInputMicrographs(self):
""" Return the input micrographs that can be the same of the supervised
picking or other ones selected by the user. (This can be used to pick
a new set of micrographs with the same properties than a previous
trained ones. )
"""
return self.getInputMicrographsPointer().get() if self.getInputMicrographsPointer() else None