Source code for xmipp3.protocols.protocol_movie_dose_analysis

# **************************************************************************
# *
# * Authors:     Carlos Oscar S. Sorzano (coss@cnb.csic.es)
# *              Daniel Marchán Torres (da.marchan@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
import matplotlib.pyplot as plt
import numpy as np
import os
from pyworkflow import VERSION_3_0
from pyworkflow.object import Set
from pyworkflow.protocol import Protocol
from pyworkflow.protocol.params import (PointerParam, IntParam, FloatParam, LEVEL_ADVANCED)
from pyworkflow.utils.properties import Message
import pyworkflow.protocol.constants as cons
from pwem.emlib.image import ImageHandler
from pwem.objects import SetOfMovies
from pwem.protocols import ProtProcessMovies
from xmipp3.convert import getScipionObj
from pyworkflow import NEW, PROD

THRESHOLD = 2
OUTPUT_ACCEPTED = 'outputMovies'
OUTPUT_DISCARDED = 'outputMoviesDiscarded'

[docs]class XmippProtMovieDoseAnalysis(ProtProcessMovies, Protocol): """ Protocol for the dose analysis """ # FIXME: WITH .mrcs IT DOES NOT FILL THE LABELS _devStatus = PROD _label = 'movie dose analysis' _lastUpdateVersion = VERSION_3_0 _possibleOutputs = { OUTPUT_ACCEPTED: SetOfMovies, OUTPUT_DISCARDED: SetOfMovies } finished = False stats = {} estimatedIds = [] meanDoseList = [] medianDoseTemporal = [] medianDifferences = [] meanGlobal = 0 usingExperimental = False def __init__(self, **args): ProtProcessMovies.__init__(self, **args) # -------------------------- DEFINE param functions ---------------------- def _defineParams(self, form): form.addSection(label=Message.LABEL_INPUT) form.addParam('inputMovies', PointerParam, pointerClass='SetOfMovies', label=Message.LABEL_INPUT_MOVS, help='Select one or several movies. A dose analysis ' 'be calculated for each one of them.') form.addParam('percentage_threshold', FloatParam, default=5, label="Maximum percentage difference (%)", help='By default, a difference of 5% against the median dose is used to ' 'assume that the dose has an incorrect value.') form.addParam('n_samples', IntParam, default=20, label="Samples to estimate the median dose", expertLevel=LEVEL_ADVANCED, help='By default, 20 movies are used to ' 'compute the global median.') form.addParam('window', IntParam, default=50, label="Window step (movies)", expertLevel=LEVEL_ADVANCED, help='By default, every 50 movies (window=50) we ' 'compute the percentage of incorrect dose analysis to check if there ' 'is any anomally in the dose.') form.addParam('percentage_window', FloatParam, default=30, label="Windows maximum faulty percentage (%)", expertLevel=LEVEL_ADVANCED, help='By default, if 30% of the movies are discarded' 'it assume that the dose has an incorrect value that endures in time.') # -------------------------- STEPS functions ------------------------------
[docs] def createOutputStep(self): self._closeOutputSet()
def _insertAllSteps(self): # Build the list of all processMovieStep ids by # inserting each of the steps for each movie self.insertedDict = {} self.samplingRate = self.inputMovies.get().getSamplingRate() # Initial steps self.initializeParams() # Gain and Dark conversion step self.convertCIStep = [] convertStepId = self._insertFunctionStep('_convertInputStep', prerequisites=[]) self.convertCIStep.append(convertStepId) self._insertFunctionStep('createOutputStep', prerequisites=[], wait=True)
[docs] def initializeParams(self): self.framesRange = self.inputMovies.get().getFramesRange() self.dims = self.inputMovies.get().getFirstItem().getDim() self.pixelSize = self.inputMovies.get().getFirstItem().getSamplingRate() dosePerFrame = self.inputMovies.get().getFirstItem().getAcquisition().getDosePerFrame() if dosePerFrame != 0 and dosePerFrame != None: self.dosePerFrame = dosePerFrame else: self.usingExperimental = True
def _processMovie(self, movie): movieId = movie.getObjId() self.estimatedIds.append(movieId) stats = self.estimatePoissonCount(movie) self.stats[movieId] = stats fnMonitorSummary = self._getPath("summaryForMonitor.txt") if not os.path.exists(fnMonitorSummary): fhMonitorSummary = open(fnMonitorSummary, "w") else: fhMonitorSummary = open(fnMonitorSummary, "a") fhMonitorSummary.write("movie_%06d_poisson_count: mean=%f stdev=%f [min=%f,max=%f]\n" % (movieId, stats['mean'], stats['std'], stats['min'], stats['max'])) fhMonitorSummary.close()
[docs] def estimatePoissonCount(self, movie): mean_frames = [] n = movie.getNumberOfFrames() frames = [1, n/2, n] for frame in frames: frame_image = ImageHandler().read("%d@%s" % (frame, movie.getFileName())).getData() mean_dose_per_pixel = np.mean(frame_image) mean_dose_per_angstrom2 = mean_dose_per_pixel/ self.samplingRate**2 mean_frames.append(mean_dose_per_angstrom2) stats = computeStats(np.asarray(mean_frames)) self.meanDoseList.append(stats['mean']) return stats
def _loadOutputSet(self, SetClass, baseName): """ Load the output set if it exists or create a new one. fixSampling: correct the output sampling rate if binning was used, except for the case when the original movies are kept and shifts refers to that one. """ setFile = self._getPath(baseName) if os.path.exists(setFile) and os.path.getsize(setFile) > 0: outputSet = SetClass(filename=setFile) outputSet.loadAllProperties() outputSet.enableAppend() else: outputSet = SetClass(filename=setFile) outputSet.setStreamState(outputSet.STREAM_OPEN) inputMovies = self.inputMovies.get() outputSet.copyInfo(inputMovies) return outputSet def _checkNewOutput(self): if getattr(self, 'finished', False): return # If continue then we need to define again some parameters if self.isContinued(): self.initializeParams() if len(self.meanDoseList) >= self.n_samples.get() and not hasattr(self, 'mu'): medianDoseExperimental = np.median(self.meanDoseList) if hasattr(self, 'dosePerFrame'): refDose = self.dosePerFrame diff = abs((medianDoseExperimental/refDose)-1) * 100 if diff < THRESHOLD: self.mu = refDose else: self.mu = medianDoseExperimental self.usingExperimental = True self.info("The given dose per frame %f does not match with the experimental one %f" " having a difference of %f percent. " "Therefore, using the experimental as global median." %(refDose, medianDoseExperimental, diff)) else: self.mu = medianDoseExperimental if hasattr(self, 'mu'): # Load previously done items (from text file) doneList = self._readDoneList() # Check for newly done items newDone = [m for m in self.listOfMovies if m.getObjId() not in doneList and self._isMovieDone(m)] # Update the file with the newly done movies # or exit from the function if no new done movies self.debug('_checkNewOutput: ') self.debug(' listOfMovies: %s, doneList: %s, newDone: %s' % (len(self.listOfMovies), len(doneList), len(newDone))) allDone = len(doneList) + len(newDone) # We have finished when there is no more input movies # (stream closed) and the number of processed movies is # equal to the number of inputs self.finished = self.streamClosed and allDone == len(self.listOfMovies) streamMode = Set.STREAM_CLOSED if self.finished \ else Set.STREAM_OPEN if newDone: self._writeDoneList(newDone) elif not self.finished: # If we are not finished and no new output have been produced # it does not make sense to proceed and updated the outputs # so we exit from the function here return self.debug(' finished: %s ' % self.finished) self.debug(' self.streamClosed (%s) AND' % self.streamClosed) self.debug(' allDone (%s) == len(self.listOfMovies (%s)' % (allDone, len(self.listOfMovies))) self.debug(' streamMode: %s' % streamMode) # Find the acceptance intervals lower, upper = self.getLimitIntervals() self.info(f"Acceptance interval: ({lower:.4f}, {upper:.4f})") self.info(f"Global median: ({self.mu:.4f})") acceptedMovies = [] discardedMovies = [] for movie in newDone: newMovie = movie.clone() newMovie.setFramesRange(self.framesRange) movieId = newMovie.getObjId() stats = self.stats[movieId] mean = stats['mean'] std = stats['std'] minDose = stats['min'] maxDose = stats['max'] diff_median = ((mean/self.mu)-1)*100 setAttribute(newMovie, '_DIFF_TO_DOSE_PER_ANGSTROM2', abs(diff_median)) setAttribute(newMovie, '_MEAN_DOSE_PER_ANGSTROM2', mean) setAttribute(newMovie, '_STD_DOSE_PER_ANGSTROM2', std) setAttribute(newMovie, '_MIN_DOSE_PER_FRAME', minDose) setAttribute(newMovie, '_MAX_DOSE_PER_FRAME', maxDose) self.medianDifferences.append(diff_median) self.medianDoseTemporal.append(mean) self.info('Movie with id %d has a mean dose per frame of %f and a diff of %f percent' %(movie.getObjId(), mean, diff_median)) if lower <= mean <= upper: self.info('accepted') acceptedMovies.append(newMovie) else: self.info('discarded') discardedMovies.append(newMovie) if len(self.medianDifferences) % self.window.get() == 0: if self.usingExperimental: # Update the median global self.mu = np.median(self.meanDoseList) self.info('Updating median global to %f' %self.mu) windowList = self.medianDoseTemporal[-self.window.get():] percentage = (1 - (len([dose for dose in windowList if lower < dose < upper]) / len(windowList)))*100 self.info('The faulty percentage of this window is %f' %percentage) if percentage > self.percentage_window.get(): with open(self._getExtraPath('WARNING.TXT'), 'a') as f: self.info('Percentage of wrong dose in a window surpass the threshold: {}% > {}%' .format(percentage, self.percentage_window.get())) f.write('Percentage of wrong dose in a window surpass the threshold: {}% > {}% \n' .format(percentage, self.percentage_window.get())) f.close() if len(acceptedMovies)>0: moviesSet = self._loadOutputSet(SetOfMovies, 'movies.sqlite') for movie in acceptedMovies: moviesSet.append(movie) self._updateOutputSet(OUTPUT_ACCEPTED, moviesSet, streamMode) if len(discardedMovies)>0: moviesSetDiscarded = self._loadOutputSet(SetOfMovies, 'movies_discarded.sqlite') for movie in discardedMovies: moviesSetDiscarded.append(movie) self._updateOutputSet(OUTPUT_DISCARDED, moviesSetDiscarded, streamMode) plotDoseAnalysis(self.getDosePlot(), self.meanDoseList, self.mu, lower, upper) plotDoseAnalysisDiff(self.getDoseDiffPlot(), self.medianDifferences) if self.finished: # Unlock createOutputStep if finished all jobs outputStep = self._getFirstJoinStep() if outputStep and outputStep.isWaiting(): outputStep.setStatus(cons.STATUS_NEW) # ------------------------- UTILS functions --------------------------------
[docs] def getLimitIntervals(self): """ Funtion to obtain the acceptance interval limits.""" lower = self.mu - self.mu * (self.percentage_threshold.get()/100) upper = self.mu + self.mu * (self.percentage_threshold.get()/100) return lower, upper
[docs] def getDosePlot(self): return self._getExtraPath('dose_analysis_plot.png')
[docs] def getDoseDiffPlot(self): return self._getExtraPath('dose_analysis_diff_plot.png')
# --------------------------- INFO functions ------------------------------- def _validate(self): errors = [] if errors: errors.append("") return errors def _summary(self): fnSummary = self._getPath("summary.txt") if not os.path.exists(fnSummary): summary = ["No summary information yet."] else: fhSummary = open(fnSummary, "r") summary = [] for line in fhSummary.readlines(): summary.append(line.rstrip()) fhSummary.close() return summary
# --------------------- WORKERS --------------------------------------
[docs]def setAttribute(obj, label, value): if value is None: return setattr(obj, label, getScipionObj(value))
[docs]def computeStats(mean_frames): mean = np.mean(mean_frames) std = np.std(mean_frames) max_dose = np.max(mean_frames) min_dose = np.min(mean_frames) stats = {'mean': mean, 'std': std, 'max': max_dose, 'min': min_dose, } return stats
[docs]def plotDoseAnalysis(filename, doseValues, medianGlobal, lower, upper): x = np.arange(start=1, stop=len(doseValues)+1, step=1) plt.figure() plt.scatter(x, doseValues,s=10) plt.axhline(y=medianGlobal, color='r', linestyle='-', label='Median dose') plt.axhline(y=upper, color='b', linestyle='-.', label='Upper limit dose') plt.axhline(y=lower, color='g', linestyle='-.', label='Lower limit dose') plt.xlabel("Movies ID") plt.ylabel("Dose (electrons impacts per angstrom**2 )") plt.title('Dose vs time') plt.legend() plt.grid() plt.savefig(filename)
[docs]def plotDoseAnalysisDiff(filename, medianDifferences): medianDiff = np.median(medianDifferences) x = np.arange(start=1+1, stop=len(medianDifferences)+2, step=1) plt.figure() plt.scatter(x, medianDifferences, s=10) plt.axhline(y=medianDiff, color='r', linestyle='-', label='Median dose difference') plt.xlabel("Movies ID") plt.ylabel("Dose differences (%)") plt.title('Dose differences with respect to the global median vs time') plt.legend() plt.grid() plt.savefig(filename)