Source code for pwem.protocols.protocol_extract_coordinates

# -*- coding: utf-8 -*-
# **************************************************************************
# *
# * Authors:     J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
# *              David Maluenda (dmaluenda@cnb.csic.es)  -streaming version-
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import os
import numpy as np
import time
from datetime import datetime

import pyworkflow.protocol.params as params
from pyworkflow.protocol.constants import STATUS_NEW
import pyworkflow.utils as pwutils
from pyworkflow.object import Float

from pwem.protocols import ProtParticlePickingAuto
import pwem.constants as emcts
import pwem.objects as emobj


[docs]class ProtExtractCoords(ProtParticlePickingAuto): """ Extract the coordinates information from a set of particles. This protocol is useful when we want to re-extract the particles (maybe resulting from classification and cleaning) with the original dimensions. It can be also handy to visualize the resulting particles in their location on micrographs. """ _label = 'extract coordinates' # --------------------------- DEFINE param functions ---------------------- def _defineParams(self, form): form.addSection(label='Input') form.addParam('inputParticles', params.PointerParam, pointerClass='SetOfParticles', label='Input particles', important=True, help='Select the particles from which you want\n' 'to extract the coordinates and micrographs.') form.addParam('inputMicrographs', params.PointerParam, pointerClass='SetOfMicrographs', label='Input micrographs', important=True, help='Select the micrographs to which you want to\n' 'associate the coordinates from the particles.') form.addParam('applyShifts', params.BooleanParam, default=False, label='Apply particle shifts?', help='Apply particle shifts (ONLY INTEGER PART) from 2D alignment to ' 'recalculate new coordinates. This can be useful ' 'for re-centering particle coordinates.\n' 'IMPORTANT: Only the integer part of the shifts will be applied in ' 'order to avoid interpolation. If you are re-extracting particles and ' 'want to apply the remaining decimal part of the shifts, set to "yes" the ' 'option "Were particle shifts applied?" in alignment assign protocol.' ) form.addParallelSection(threads=0, mpi=0) # --------------------------- INSERT steps functions ---------------------- def _insertAllSteps(self): self.streamingModeOn = self.getInputParticles().isStreamOpen() if self.streamingModeOn: self.inputSize = 0 self.outputSize = 0 self.micsDone = [] t0 = time.time() newParts, self.streamClosed = self.loadInputs() print("loadInputs() time: %fs" % (time.time() - t0)) stepsIds = self._insertNewSteps(newParts) self._insertFunctionStep('createOutputStep', prerequisites=stepsIds, wait=True) else: self._insertFunctionStep('createOutputStep') def _insertNewSteps(self, partIds): deps = [] stepId = self._insertFunctionStep('extractCoordsStep', partIds, prerequisites=[]) deps.append(stepId) return deps def _stepsCheck(self): # To allow streaming picking we need to detect: # 1) new micrographs ready to be picked # 2) new output coordinates that have been produced and add then # to the output set. if self.streamingModeOn: self._checkNewInput() self._checkNewOutput() else: pass def _checkNewInput(self): # Check if there are new particles to process from the input set partsFile = self.getInputParticles().getFileName() micsFile = self.getInputMicrographs().getFileName() now = datetime.now() self.lastCheck = getattr(self, 'lastCheck', now) mTimeParts = datetime.fromtimestamp(os.path.getmtime(partsFile)) mTimeMics = datetime.fromtimestamp(os.path.getmtime(micsFile)) # If the input movies.sqlite have not changed since our last check, # it does not make sense to check for new input data if self.lastCheck > mTimeParts and self.lastCheck > mTimeMics: return None self.lastCheck = now newParts, self.streamClosed = self.loadInputs() if len(newParts) > 0: fDeps = self._insertNewSteps(newParts) outputStep = self._getFirstJoinStep() if outputStep is not None: outputStep.addPrerequisites(*fDeps) self.updateSteps()
[docs] def extractCoordsStep(self, partsIds): outputCoords = self.extractCoordinates(partsIds) self.outputSize += len(outputCoords) t0 = time.time() outputCoords.write() outputCoords.close() print("write time: %fs" % (time.time() - t0))
[docs] def extractCoordinates(self, partsIds=None): inPart = self.getInputParticles() inMics = self.getInputMicrographs() scale = inPart.getSamplingRate() / inMics.getSamplingRate() print("Scaling coordinates by a factor *%0.2f*" % scale) alignType = inPart.getAlignment() suffix = self.getSuffix(partsIds[0]) if partsIds is not None else '' outputCoords = self._createSetOfCoordinates(inMics, suffix=suffix) # Prepare a double key dictionary to do the match: micname and micId micDict = dict() for mic in inMics.iterItems(): # Clone the mics! otherwise we will get pointers and # will end up with the same mic in the dictionary. clonedMic = mic.clone() micDict[clonedMic.getObjId()] = clonedMic micDict[clonedMic.getMicName()] = clonedMic def appendCoordFromParticle(part): coord = part.getCoordinate() # Try micname micName = coord.getMicName() mic = micDict.get(micName, None) # Try micid if mic is None: micKey = coord.getMicId() mic = micDict.get(micKey, None) if mic is None: print("Skipping particle, %s or id %s not found" % (micName, micKey)) else: newCoord.copyObjId(part) x, y = coord.getPosition() if self.applyShifts: # Get the shifts, they are returned with the sign reverted shifts = self.getShifts(part.getTransform(), alignType) # Add the shifts (values are inverted so subtract) x -= shifts[0] y -= shifts[1] # Apply the scale x *= scale y *= scale # Round coordinates to closer integer 39.9 --> 40 and not 39 finalX = round(x) finalY = round(y) # Annotate fractions if shifts applied if self.applyShifts: newCoord.xFrac = Float(finalX - x) newCoord.yFrac = Float(finalY-y) newCoord.setPosition(finalX, finalY) newCoord.setMicrograph(mic) outputCoords.append(newCoord) newCoord = emobj.Coordinate() if self.streamingModeOn: for partId in partsIds: particle = inPart[partId] appendCoordFromParticle(particle) else: for particle in inPart: appendCoordFromParticle(particle) boxSize = inPart.getXDim() * scale outputCoords.setBoxSize(boxSize) return outputCoords
def _checkNewOutput(self): if getattr(self, 'finished', False): return self.finished = self.streamClosed and \ self.inputSize == self.outputSize streamMode = emobj.Set.STREAM_CLOSED if self.finished else emobj.Set.STREAM_OPEN # we will read all ready files files = pwutils.glob(self.getTmpOutputPath('*')) newData = len(files) > 0 lastToClose = self.finished and hasattr(self, 'outputCoordinates') if newData or lastToClose: outSet = self._loadOutputSet() if newData: for tmpFile in files: tmpSet = emobj.SetOfCoordinates(filename=tmpFile) tmpSet.loadAllProperties() outSet.copyItems(tmpSet) outSet.setBoxSize(tmpSet.getBoxSize()) tmpSet.close() pwutils.cleanPath(tmpFile) self._updateOutputSet('outputCoordinates', outSet, state=streamMode) if self.finished: # Unlock createOutputStep if finished all jobs outputStep = self._getFirstJoinStep() if outputStep and outputStep.isWaiting(): outputStep.setStatus(STATUS_NEW) def _loadOutputSet(self): setFile = self._getPath("coordinates.sqlite") if os.path.exists(setFile): outputSet = emobj.SetOfCoordinates(filename=setFile) outputSet.loadAllProperties() outputSet.enableAppend() else: outputSet = emobj.SetOfCoordinates(filename=setFile) outputSet.setStreamState(outputSet.STREAM_OPEN) self._store(outputSet) self._defineTransformRelation(self.getInputParticles(), outputSet) self._defineSourceRelation(self.getInputMicrographs(), outputSet) # copyInfo raise an error since particles has no _boxsize # copyInfo does not do anything else # outputSet.copyInfo(self.getInputParticles()) outputSet.setMicrographs(self.getInputMicrographs()) return outputSet def _getFirstJoinStep(self): for s in self._steps: if s.funcName == self._getFirstJoinStepName(): return s return None def _getFirstJoinStepName(self): # This function will be used for streaming, to check which is # the first function that need to wait for all micrographs # to have completed, this can be overwritten in subclasses # (e.g., in Xmipp 'sortPSDStep') return 'createOutputStep'
[docs] def createOutputStep(self): if not self.streamingModeOn: outputCoords = self.extractCoordinates() self._defineOutputs(outputCoordinates=outputCoords) self._defineSourceRelation(self.inputParticles, outputCoords) self._defineSourceRelation(self.inputMicrographs, outputCoords)
# ------------- UTILS functions ----------------
[docs] def getSuffix(self, suffix): return "_tmp%s" % suffix
[docs] def getTmpOutputPath(self, suffix): return self._getPath("coordinates%s.sqlite" % self.getSuffix(suffix))
[docs] def loadInputs(self): micsFn = self.getInputMicrographs().getFileName() micsSet = emobj.SetOfMicrographs(filename=micsFn) micsSet.loadAllProperties() availableMics = [] for mic in micsSet: availableMics.append(mic.getObjId()) micsSetClosed = micsSet.isStreamClosed() micsSet.close() partsFn = self.getInputParticles().getFileName() partsSet = emobj.SetOfParticles(filename=partsFn) partsSet.loadAllProperties() newParts = [] newMics = [] for item in partsSet: micKey = item.getCoordinate().getMicId() if micKey not in self.micsDone and micKey in availableMics: newParts.append(item.getObjId()) if micKey not in self.micsDone: newMics.append(micKey) self.micsDone += newMics self.inputSize = partsSet.getSize() partSetClosed = partsSet.isStreamClosed() partsSet.close() return newParts, micsSetClosed and partSetClosed
[docs] def getShifts(self, transform, alignType): """ is2D == True-> matrix is 2D (2D images alignment) otherwise matrix is 3D (3D volume alignment or projection) invTransform == True -> for xmipp implies projection -> for xmipp implies alignment """ if alignType == emcts.ALIGN_NONE: return None inverseTransform = alignType == emcts.ALIGN_PROJ # only flip is meaningful if 2D case # in that case the 2x2 determinant is negative flip = False matrix = transform.getMatrix() if alignType == emcts.ALIGN_2D: # get 2x2 matrix and check if negative flip = bool(np.linalg.det(matrix[0:2, 0:2]) < 0) if flip: matrix[0, :2] *= -1. # invert only the first two columns keep x matrix[2, 2] = 1. # set 3D rot elif alignType == emcts.ALIGN_3D: flip = bool(np.linalg.det(matrix[0:3, 0:3]) < 0) if flip: matrix[0, :4] *= -1. # now, invert first line including x matrix[3, 3] = 1. # set 3D rot # flip = bool(numpy.linalg.det(matrix[0:3,0:3]) < 0) # if flip: # matrix[0,:4] *= -1.#now, invert first line including x shifts = self.geometryFromMatrix(matrix, inverseTransform) return shifts
[docs] def geometryFromMatrix(self, matrix, inverseTransform): from pwem.convert.transformations import translation_from_matrix if inverseTransform: matrix = np.linalg.inv(matrix) shifts = -translation_from_matrix(matrix) else: shifts = translation_from_matrix(matrix) return shifts
[docs] def getInputParticles(self): return self.inputParticles.get()
# --------------------------- INFO functions ------------------------------ def _summary(self): summary = [] ps1 = self.getInputParticles().getSamplingRate() ps2 = self.getInputMicrographs().getSamplingRate() summary.append(u'Input particles pixel size: *%0.3f* (Å/px)' % ps1) summary.append(u'Input micrographs pixel size: *%0.3f* (Å/px)' % ps2) summary.append('Scaling coordinates by a factor of *%0.3f*' % (ps1 / ps2)) if self.applyShifts: summary.append('Applied 2D shifts from particles') if hasattr(self, 'outputCoordinates'): summary.append('Output coordinates: *%d*' % self.outputCoordinates.getSize()) return summary def _methods(self): # No much to add to summary information return self._summary() def _validate(self): """ The function of this hook is to add some validation before the protocol is launched to be executed. It should return a list of errors. If the list is empty the protocol can be executed. """ errors = [] inputParticles = self.getInputParticles() first = inputParticles.getFirstItem() if first.getCoordinate() is None: errors.append('The input particles do not have coordinates!!!') if self.applyShifts and not inputParticles.hasAlignment(): errors.append('Input particles do not have alignment information!') return errors