# **************************************************************************
# *
# * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es) [1]
# * Kevin Savage (kevin.savage@diamond.ac.uk) [2]
# *
# * [1] Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# * [2] Diamond Light Source, Ltd
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
import enum
import os
from os.path import join, basename
import re
from datetime import timedelta, datetime
import logging
logger = logging.getLogger(__name__)
import pyworkflow.utils as pwutils
import pyworkflow.protocol.params as params
from pwem import Domain, getMatchingFiles
from pwem.emlib.image import ImageHandler
import pwem.constants as emcts
from .images import ProtImportImages
from ...objects import SetOfMicrographs, SetOfMovies
[docs]class ProtImportMicBase(ProtImportImages):
""" Just to have a base class to both
ProtImportMicrographs and ProtImportMovies
"""
_checkStacks = False
def _defineAcquisitionParams(self, form):
group = ProtImportImages._defineAcquisitionParams(self, form)
group.addParam('samplingRateMode', params.EnumParam,
choices=[pwutils.Message.LABEL_SAMP_MODE_1,
pwutils.Message.LABEL_SAMP_MODE_2],
default=emcts.SAMPLING_FROM_IMAGE,
label=pwutils.Message.LABEL_SAMP_MODE,
help=pwutils.Message.TEXT_SAMP_MODE)
group.addParam('samplingRate', params.FloatParam, default=1.0,
condition='samplingRateMode==%d' % emcts.SAMPLING_FROM_IMAGE,
label=pwutils.Message.LABEL_SAMP_RATE,
help=pwutils.Message.TEXT_SAMP_RATE)
group.addParam('scannedPixelSize', params.FloatParam, default=7.0,
condition='samplingRateMode==%d' % emcts.SAMPLING_FROM_SCANNER,
label=pwutils.Message.LABEL_SCANNED,
help='')
return group
def _defineBlacklistParams(self, form):
""" Options to blacklist certain items when launching the
import protocol.
"""
form.addSection(label="Rejection")
form.addParam("blacklistSet", params.PointerParam,
pointerClass=self._getBlacklistSetClass(),
allowsNull=True,
label="Reject from Set",
help="Files on this set will not be imported")
form.addParam('blacklistDateFrom', params.StringParam,
label="Reject from",
allowsNull=True,
help="Files acquired after this date will not be imported. "
"Must follow format: YYYY-mm-dd HH:MM:SS \n"
"e.g: 2019-01-14 14:18:05")
form.addParam('blacklistDateTo', params.StringParam,
label="Reject before",
allowsNull=True,
help="Files acquired before this date will not be imported. "
"Must follow format: YYYY-mm-dd HH:MM:SS \n"
"e.g: 2019-01-14 14:18:05")
form.addParam('useRegexps', params.BooleanParam,
default=True,
label='Rejection file has RegExps',
help="Choose Yes if the rejection file contains regular expressions. Set to No if "
"the rejection file contains file names. Ignore if not using a rejection file")
form.addParam('blacklistFile', params.FileParam,
label="Blacklist File",
allowsNull=True,
help="Reject everything included in this file. If Use RegExps is True,"
"lines will be interpreted as regular expressions. E.g: \n"
"(.*)GRID_0[1-5](.*)\n"
"(.*)/GRID_10/Falcon_2019_01_14-16_(.*)\n"
"If Use RegExps is False, lines will be interpreted as file names. E.g.\n"
"/path/to/GRID_10/Falcon_2019_01_14-16_51_20_0_movie.mrcs\n"
"/path/to/GRID_10/Falcon_2019_01_14-16_55_40_0_movie.mrcs"
)
def _getBlacklistSetClass(self):
""" Returns the class to be blacklisted by this protocol.
"""
return "SetOfImages"
def _validateBlacklist(self):
errors = []
blacklistBySet = self.blacklistSet.get()
if blacklistBySet and blacklistBySet.isStreamOpen():
errors.append("Can't blacklist an open set. "
"Please stop streaming or wait until streaming is done to blacklist this set.")
dates = [self.blacklistDateFrom.get(), self.blacklistDateTo.get()]
parsedDates = []
for d in dates:
if d:
try:
parsedDates.append(datetime.strptime(d, "%Y-%m-%d %H:%M:%S"))
except ValueError as e:
errors.append("Bad date formatting in blacklist date %s: %s" % (d, e))
if len(parsedDates) == 2 and parsedDates[0] > parsedDates[1]:
errors.append("Wrong blacklist dates: date from must be earlier than date to")
return errors
def _validate(self):
errors = ProtImportImages._validate(self)
errors += self._validateBlacklist()
return errors
[docs] def setSamplingRate(self, micSet):
""" Set the sampling rate to the given set. """
if self.samplingRateMode == emcts.SAMPLING_FROM_IMAGE:
micSet.setSamplingRate(self.samplingRate.get())
else:
micSet.setScannedPixelSize(self.scannedPixelSize.get())
def _acquisitionWizardCondition(self):
""" By default this wizard will appears only when we import from
a format that is not from files.
But movie-import also can have a wizard to read from FEI xml files. """
return 'True'
[docs] def loadAcquisitionInfo(self):
""" Return a proper acquisitionInfo (dict)
or an error message (str).
"""
if self.importFrom != self.IMPORT_FROM_FILES:
return ProtImportImages.loadAcquisitionInfo(self)
result = "Could not find acquisition information"
for fileName, fileId in self.iterFiles():
baseName = pwutils.removeExt(fileName)
xml1 = baseName.replace('_frames', '.xml')
if os.path.exists(xml1):
result = self._parseXML(xml1)
else:
xml2 = baseName + '.xml'
result = self._parseXML(xml2)
return result
def _parseXML(self, fileName):
""" Parse micrograph XML files from FEI. """
import xml.etree.ElementTree as ET
# get context
context = iter(ET.iterparse(fileName,
events=('start', 'end')))
labels = {'AccelerationVoltage': 'voltage',
'InstrumentModel': 'InstrumentModel',
'NominalMagnification': 'magnification'}
# acq['amplitudeContrast'] = None
# acq['sphericalAberration'] = None
acq = {}
def get(key, elem):
acq[labels[key]] = elem.text
pixelSize = False
for event, elem in context:
if event == 'start':
if 'pixelSize' in elem.tag:
print("started: pixelSize")
pixelSize = True
elif event == 'end':
for l in labels:
if '}%s' % l in elem.tag:
get(l, elem)
if '}numericValue' in elem.tag and pixelSize:
acq['samplingRate'] = float(elem.text) * 10e+09 # Convert to A
pixelSize = False
else:
raise Exception("Unknown event type %s" % event)
# Correct for units conversion
acq['voltage'] = float(acq['voltage']) / 1000.
return acq
[docs] def getItemsToBlacklistFromFile(self):
if not hasattr(self, '_fileItemsToBlacklist'):
blacklistfile = self.blacklistFile.get()
blacklistItems = set()
if blacklistfile:
with open(blacklistfile, 'r') as f:
for blacklistedItem in f:
blacklistedItem = blacklistedItem.strip()
blacklistItems.add(blacklistedItem)
self._fileItemsToBlacklist = blacklistItems
return self._fileItemsToBlacklist
[docs] def getBlacklistedItems(self):
if not hasattr(self, '_blacklistedItems'):
self._blacklistedItems = set()
return self._blacklistedItems
[docs] def isBlacklisted(self, fileName):
# check if already blacklisted
blacklistedItems = self.getBlacklistedItems()
if fileName in blacklistedItems:
return True
# Blacklisted by set
blacklistSet = self.blacklistSet.get()
if blacklistSet is not None:
for img in blacklistSet:
blacklistFileName = img.getFileName()
if ((os.path.islink(blacklistFileName)
and fileName == os.readlink(blacklistFileName))
or (self._getUniqueFileName(fileName) == os.path.basename(blacklistFileName))):
self.info("Blacklist warning: %s is blacklisted by the input set" % fileName)
blacklistedItems.add(fileName)
return True
# Blacklisted by date
blacklistDateFrom = self.blacklistDateFrom.get()
blacklistDateTo = self.blacklistDateTo.get()
doDateBlacklist = blacklistDateFrom is not None or blacklistDateTo is not None
if doDateBlacklist:
fileDate = datetime.fromtimestamp(os.path.getmtime(fileName))
if blacklistDateFrom:
parsedDateFrom = datetime.strptime(blacklistDateFrom, "%Y-%m-%d %H:%M:%S")
if blacklistDateTo:
parsedDateTo = datetime.strptime(blacklistDateTo, "%Y-%m-%d %H:%M:%S")
if parsedDateFrom <= fileDate <= parsedDateTo:
self.info("Blacklist warning: %s is blacklisted by date" % fileName)
blacklistedItems.add(fileName)
return True
else:
if parsedDateFrom <= fileDate:
self.info("Blacklist warning: %s is blacklisted by date" % fileName)
blacklistedItems.add(fileName)
return True
elif blacklistDateTo:
parsedDateTo = datetime.strptime(blacklistDateTo, "%Y-%m-%d %H:%M:%S")
if fileDate <= parsedDateTo:
self.info("Blacklist warning: %s is blacklisted by date" % fileName)
blacklistedItems.add(fileName)
return True
# Blacklisted by file
items2blacklist = self.getItemsToBlacklistFromFile()
for item2blacklist in items2blacklist:
if self.useRegexps.get():
if re.match(item2blacklist, fileName):
self.info("Blacklist warning: %s matched blacklist regexp %s"
% (fileName, item2blacklist))
blacklistedItems.add(fileName)
return True
elif fileName in item2blacklist:
self.info("Blacklist warning: %s is blacklisted " % fileName)
blacklistedItems.add(fileName)
return True
return False
[docs]class ImportMicsOutput(enum.Enum):
outputMicrographs = SetOfMicrographs
[docs]class ProtImportMicrographs(ProtImportMicBase):
"""Protocol to import a set of micrographs to the project"""
_label = 'import micrographs'
_possibleOutputs = ImportMicsOutput
_outputClassName = ImportMicsOutput.outputMicrographs.value.__name__
IMPORT_FROM_EMX = 1
IMPORT_FROM_XMIPP3 = 2
IMPORT_FROM_SCIPION = 3
def _getImportChoices(self):
""" Return a list of possible choices
from which the import can be done.
(usually packages formats such as: xmipp3, eman2, relion...etc.
"""
choices = ProtImportImages._getImportChoices(self)
return choices + ['emx', 'xmipp3', 'scipion']
def _getBlacklistSetClass(self):
""" Returns the class to be blacklisted by this protocol.
"""
return "SetOfMicrographs"
def _defineImportParams(self, form):
""" Just redefine to put some import parameters
before the acquisition related parameters.
"""
form.addParam('emxFile', params.FileParam,
condition='(importFrom == %d)' % self.IMPORT_FROM_EMX,
label='Input EMX file',
help="Select the EMX file containing micrographs information.\n"
"See more about [[http://i2pc.cnb.csic.es/emx][EMX format]]")
form.addParam('mdFile', params.FileParam,
condition='(importFrom == %d)' % self.IMPORT_FROM_XMIPP3,
label='Micrographs metadata file',
help="Select the micrographs Xmipp metadata file.\n"
"It is usually a _micrograph.xmd_ file result\n"
"from import, preprocess or downsample protocols.")
form.addParam('sqliteFile', params.FileParam,
condition='(importFrom == %d)' % self.IMPORT_FROM_SCIPION,
label='Micrographs sqlite file',
help="Select the micrographs sqlite file.\n")
# --------------------------- INSERT functions ----------------------------
def _insertAllSteps(self):
importFrom = self.importFrom.get()
ci = self.getImportClass()
if ci is None:
ProtImportMicBase._insertAllSteps(self)
else:
self._insertFunctionStep('importMicrographsStep', importFrom,
self.importFilePath)
# --------------------------- STEPS functions -----------------------------
[docs] def importMicrographsStep(self, importFrom, *args):
ci = self.getImportClass()
ci.importMicrographs()
summary = "Import from *%s* file:\n" % self.getEnumText('importFrom')
summary += self.importFilePath + '\n'
if self.hasAttribute('outputParticles'):
particles = self.outputParticles
summary += ' Particles: *%d* (ctf=%s, alignment=%s)\n' % (particles.getSize(),
particles.hasCTF(),
particles.getAlignment())
if self.hasAttribute('outputCoordinates'): # EMX files can contain only Coordinates information
summary += ' Coordinates: *%d* \n' % (self.outputCoordinates.getSize())
if self.hasAttribute('outputMicrographs'): # EMX files can contain only Coordinates information
summary += ' Micrographs: *%d* \n' % (self.outputMicrographs.getSize())
if self.copyFiles:
summary += ('\n_WARNING_: Binary files copied into project '
'(extra disk space)')
self.summaryVar.set(summary)
# --------------------------- INFO functions ------------------------------
def _validate(self):
ci = self.getImportClass()
if ci is None:
errors = ProtImportMicBase._validate(self)
for micFn, _ in self.iterFiles():
imgh = ImageHandler()
if imgh.isImageFile(micFn):
_, _, z, n = imgh.getDimensions(micFn)
if n > 1 or z > 1:
errors.append("The protocol not support micrographs "
"stored in stacks. If you want to "
"obtain your micrographs individually, "
"you can run the following command:\n"
"scipion run scipion_directory/scripts/"
"split_stacks.py --files *your files* "
"--ext *extension*")
# JMRT: only check the first image, for large dataset
# even reading the header can take a while
break
return errors
else:
return ci.validateMicrographs()
def _summary(self):
if self.importFrom == self.IMPORT_FROM_FILES:
return ProtImportMicBase._summary(self)
else:
return [self.summaryVar.get('No summary information.')]
# --------------------------- UTILS functions -----------------------------
[docs] def getImportClass(self):
""" Return the class in charge of importing the files. """
if self.importFrom == self.IMPORT_FROM_EMX:
EmxImport = Domain.importFromPlugin('emxlib.convert', 'EmxImport')
self.importFilePath = self.emxFile.get('').strip()
return EmxImport(self, self.importFilePath)
elif self.importFrom == self.IMPORT_FROM_XMIPP3:
XmippImport = Domain.importFromPlugin('xmipp3.convert', 'XmippImport')
self.importFilePath = self.mdFile.get('').strip()
return XmippImport(self, self.mdFile.get())
elif self.importFrom == self.IMPORT_FROM_SCIPION:
from .dataimport import ScipionImport
self.importFilePath = self.sqliteFile.get('').strip()
return ScipionImport(self, self.importFilePath)
else:
self.importFilePath = ''
return None
[docs]class ImportMoviesOutput(enum.Enum):
outputMovies = SetOfMovies
[docs]class ProtImportMovies(ProtImportMicBase):
""" Protocol to import a set of movies (from direct detector cameras)
to the project.
"""
_label = 'import movies'
_possibleOutputs = ImportMoviesOutput
_outputClassName = ImportMoviesOutput.outputMovies.value.__name__
def __init__(self, **kwargs):
ProtImportMicBase.__init__(self, **kwargs)
self.serverSocket = None
self.connectionList = None
self.gain = -1
self.dark = -1
def _getBlacklistSetClass(self):
""" Returns the class to be blacklisted by this protocol.
"""
return "SetOfMovies"
def _defineAcquisitionParams(self, form):
group = ProtImportMicBase._defineAcquisitionParams(self, form)
line = group.addLine('Dose (e/A^2)',
help="Initial accumulated dose (usually 0) and "
"dose per frame. ")
line.addParam('doseInitial', params.FloatParam, default=0,
label='Initial')
line.addParam('dosePerFrame', params.FloatParam, default=None,
allowsNull=True,
label='Per frame')
form.addParam('gainFile', params.FileParam,
label='Gain image',
help='A gain reference related to a set of movies'
' for gain correction')
form.addParam('darkFile', params.FileParam,
label='Dark image',
help='A dark image related to a set of movies')
def _defineParams(self, form):
ProtImportMicBase._defineParams(self, form)
form.addSection('Frames')
streamingConditioned = "dataStreaming"
framesCondition = "inputIndividualFrames"
form.addParam('inputIndividualFrames', params.BooleanParam,
default=False,
label="Input individual frames?",
help="Select Yes if movies are acquired in individual "
"frame files. ")
form.addParam('numberOfIndividualFrames', params.IntParam,
condition=framesCondition,
label='Number of frames',
help='Provide how many frames are per movie. ')
form.addParam('stackFrames', params.BooleanParam,
default=False, condition=framesCondition,
label="Create movie stacks?",
help="Select Yes if you want to create a new stack for "
"each movies with its frames. ")
# This is not working so for now its hidden
form.addParam('writeMoviesInProject', params.BooleanParam,
default=False,
condition=framesCondition + " and stackFrames",
label="Write stacks in the project folder?",
help="If Yes, the created stack files will be written "
"in the project folder. By default the movies will "
"be written in the same place where input frames "
"are.")
form.addParam('movieSuffix', params.StringParam,
default='_frames.mrcs',
condition=framesCondition + " and stackFrames",
label="Movie suffix",
help="Suffix added to the output movie filename."
"Use the extension to select the format ("
"e.g., .mrcs, .stk)")
form.addParam('deleteFrames', params.BooleanParam,
default=False,
condition=framesCondition + " and stackFrames",
label="Delete frame files?",
help="Select Yes if you want to remove the individual "
"frame files after creating the movie stack. ")
streamingSection = form.getSection('Streaming')
streamingSection.addParam('moviesToExclude', params.PointerParam,
pointerClass='SetOfMovies',
condition=streamingConditioned,
allowsNull=True,
expertLevel=params.LEVEL_ADVANCED,
label="Previous movies to exclude",
help="Select a setOfMovies that are already "
"imported that you want to exclude for "
"this import.")
# --------------------------- INSERT functions ----------------------------
def _insertAllSteps(self):
# Only the import movies has property 'inputIndividualFrames'
# so let's query in a non-intrusive manner
inputIndividualFrames = getattr(self, 'inputIndividualFrames', False)
if self.dataStreaming or inputIndividualFrames:
funcName = 'importImagesStreamStep'
else:
funcName = 'importImagesStep'
self._insertFunctionStep(funcName, self.getPattern(),
self.voltage.get(),
self.sphericalAberration.get(),
self.amplitudeContrast.get(),
self.magnification.get())
# --------------------------- INFO functions -------------------------------
def _validate(self):
"""Overwriting to skip file validation if streaming with socket"""
errors = ProtImportMicBase._validate(self)
if self.inputIndividualFrames and not self.stackFrames:
errors.append("Scipion does not support individual frames. "
"You must set to Yes *Create movie stacks?* "
"parameter.")
# Using getMatchFile to find a file that matches with the given pattern.In case it is empty, the
# validation fails, otherwise the first file that matches is returned.
if not self.gainFile.empty():
if not self.getGain():
errors.append("There is no file that corresponds to the gain file " + str(self.gainFile.get()))
else:
logger.info("It has been used as a gain file: %s" % self.getGain())
if not self.darkFile.empty():
if not self.getDark():
errors.append("There is no file that corresponds to the dark file " + str(self.darkFile.get()))
else:
logger.info("It has been used as a dark file: %s" % self.getDark())
return errors
[docs] def getGain(self):
if self.gain == -1:
self.gain = self.getGainOrDark(self.gainFile)
return self.gain
[docs] def getDark(self):
if self.dark == -1:
self.dark = self.getGainOrDark(self.darkFile)
return self.dark
[docs] def getGainOrDark(self, path):
if path.empty():
return None
matchinFiles = getMatchingFiles(path.get())
if not matchinFiles:
return None
return matchinFiles[0]
# --------------------------- UTILS functions ------------------------------
[docs] def setSamplingRate(self, movieSet):
ProtImportMicBase.setSamplingRate(self, movieSet)
movieSet.setGain(self.getGain())
movieSet.setDark(self.getDark())
acq = movieSet.getAcquisition()
acq.setDoseInitial(self.doseInitial.get())
acq.setDosePerFrame(self.dosePerFrame.get())
def _setupFirstImage(self, movie, imgSet):
# Create a movie object to read dimensions
dimMovie = movie.clone()
movieFn = movie.getFileName()
def decompress(program, args, ext, nExt):
movieFolder = self._getTmpPath()
movieName = basename(movie.getFileName())
movieTmpLink = join(movieFolder, movieName)
pwutils.cleanPath(movieTmpLink)
pwutils.createAbsLink(os.path.abspath(movieFn), movieTmpLink)
self.runJob(program, args % movieName, cwd=movieFolder)
dimMovie.setFileName(movieTmpLink.replace(ext, nExt))
if movieFn.endswith('bz2'):
decompress('bzip2', '-d -f %s', '.bz2', '')
elif movieFn.endswith('tbz'):
decompress('tar', 'jxf %s', '.tbz', '.mrc')
dim = dimMovie.getDim()
self.info("Dim: (%s)" % ", ".join(map(str, dim)))
range = [1, dim[2], 1]
movie.setFramesRange(range)
imgSet.setDim(dim)
imgSet.setFramesRange(range)
[docs] def ignoreCopy(self, source, dest):
pass
[docs] def getCopyOrLink(self):
if (self.inputIndividualFrames and self.stackFrames and
self.writeMoviesInProject):
return self.ignoreCopy
else:
return ProtImportMicBase.getCopyOrLink(self)