Source code for emfacilities.protocols.report_html

# -*- coding: utf-8 -*-
# **************************************************************************
# *
# * Authors:     J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
# *              Pablo Conesa (pconesa@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import json
import os
from os.path import join, exists, abspath, basename
import numpy as np
import multiprocessing
from datetime import datetime

from pyworkflow.protocol import getUpdatedProtocol
import pyworkflow.utils as pwutils

from pwem.emlib.image import ImageHandler

from emfacilities import Plugin
from .summary_provider import SummaryProvider

# --------------------- CONSTANTS -----------------------------------
# These constants are the keys used in the ctfMonitor function
# getData() to store paths to micrographs, psd files and shift plots
MIC_PATH = 'imgMicPath'
PSD_PATH = 'imgPsdPath'
SHIFT_PATH = 'imgShiftPath'
# These constants are the name of the folders where thumbnails
# for the html report will be stored. They are also the keys to
# used in the execution.summary.template.html to read data (where
# they will need to be changed if they're changed here)
MIC_THUMBS = 'imgMicThumbs'
PSD_THUMBS = 'imgPsdThumbs'
SHIFT_THUMBS = 'imgShiftThumbs'
MIC_ID = 'micId'
DEFOCUS_HIST_BIN_WIDTH = 0.5
RESOLUTION_HIST_BIN_WIDTH = 0.5


[docs]class ReportHtml: """ Create an html report with a summary of the processing. The report will be updated with a given frequency. """ def __init__(self, protocol, ctfMonitor, sysMonitor, movieGainMonitor, publishCmd=None, **kwargs): # The CTF protocol to monitor self.protocol = protocol self.ctfProtocol = protocol._getCtfProtocol() self.alignProtocol = protocol._getAlignProtocol() self.micThumbSymlinks = False self.reportPath = protocol.reportPath self.reportDir = protocol.reportDir self.provider = SummaryProvider(protocol) self.ctfMonitor = ctfMonitor self.sysMonitor = sysMonitor self.movieGainMonitor = movieGainMonitor self.lastThumbIndex = 0 self.thumbsReady = 0 self.thumbPaths = {MIC_THUMBS: [], PSD_THUMBS: [], SHIFT_THUMBS: [], MIC_PATH: [], SHIFT_PATH: [], PSD_PATH: [], MIC_ID: []} # Get the html template to be used, by default use the one # in scipion/config/templates self.template = self._getHTMLTemplatePath() self.publishCmd = publishCmd self.refreshSecs = kwargs.get('refreshSecs', 60) def _getHTMLTemplatePath(self): """ Returns the path of the customized template at config/execution.summary.html or the standard scipion HTML template""" # Try if there is a customized template template = os.path.join(basename(pwutils.Config.SCIPION_CONFIG), 'execution.summary.html') if not os.path.exists(template): template = os.path.join(Plugin.getPluginTemplateDir(), 'execution.summary.template.html') else: print("Customized HTML template found at %s." % template) return template
[docs] def getHTMLReportText(self): if exists(self.template): return open(self.template, encoding="utf-8").read() else: return ""
[docs] def info(self, msg): if self.protocol._log is not None: self.protocol.info(msg) else: print(msg)
[docs] def checkNewThumbsReady(self): thumbKeys = [MIC_THUMBS] if PSD_THUMBS in self.thumbPaths.keys(): lastThumb = min(len(self.thumbPaths[PSD_THUMBS]), len(self.thumbPaths[MIC_THUMBS])) thumbKeys.append(PSD_THUMBS) else: lastThumb = len(self.thumbPaths[MIC_THUMBS]) if SHIFT_THUMBS in self.thumbPaths.keys(): thumbKeys.append(SHIFT_THUMBS) for i in range(self.thumbsReady, lastThumb): pathsReady = [exists(join(self.reportDir, self.thumbPaths[k][i])) for k in thumbKeys] if all(pathsReady): self.thumbsReady += 1 return self.thumbsReady
[docs] def setUp(self): """Setup actions for the html report: create directories to store thumbnails, check if thumbnails are already generated in the alignment protocol. """ # make report folders pwutils.makePath(join(self.reportDir, MIC_THUMBS), join(self.reportDir, PSD_THUMBS), join(self.reportDir, SHIFT_THUMBS)) # check if align protocol already has thumbnails if (hasattr(self.alignProtocol, 'doComputeMicThumbnail') and self.alignProtocol._doComputeMicThumbnail()): self.micThumbSymlinks = True
[docs] def getThumbPaths(self, thumbsDone=0, ctfData=None, ext='png', micIdSet=None): """Adds to self.thumbPaths the paths to the report thumbnails that come from the alignment and/or ctf protocol. ===== Params ===== - thumbsDone: how many thumbnails have already been generated. we will get paths starting from this index - ctfData: dict resulting from ctfMonitor.getData() - ext: extension of the thumbnail images. Defaults to png. - micIdSet: mic indexes to use """ # get psd thumbs from ctfData if ctfData is not None: for i in range(thumbsDone, len(ctfData[PSD_PATH])): psdPath = ctfData[PSD_PATH][i] movie = basename(os.path.dirname(psdPath)) psdThumb = join(PSD_THUMBS, "%s_%s" % (movie, pwutils.replaceExt(basename(psdPath), ext))) self.thumbPaths[PSD_THUMBS].append(psdThumb) self.thumbPaths[PSD_PATH].append(psdPath) # get alignment and mic thumbs if self.alignProtocol is not None: getMicFromCTF = False updatedProt = getUpdatedProtocol(self.alignProtocol) if hasattr(updatedProt, 'outputMicrographs'): outputSet = updatedProt.outputMicrographs if micIdSet is None: micIdSet = list(outputSet.getIdSet()) else: return elif self.ctfProtocol is not None: getMicFromCTF = True updatedProt = getUpdatedProtocol(self.ctfProtocol) if hasattr(updatedProt, 'outputCTF'): outputSet = updatedProt.outputCTF if micIdSet is None: micIdSet = list(outputSet.getIdSet()) else: return else: return for micId in micIdSet[thumbsDone:]: mic = outputSet[micId] if getMicFromCTF: mic = mic.getMicrograph() if hasattr(mic, 'thumbnail'): srcMicFn = abspath(mic.thumbnail.getFileName()) else: srcMicFn = abspath(mic.getFileName()) micThumbFn = join(MIC_THUMBS, pwutils.replaceExt(basename(srcMicFn), ext)) self.thumbPaths[MIC_PATH].append(srcMicFn) self.thumbPaths[MIC_THUMBS].append(micThumbFn) shiftPlot = (getattr(mic, 'plotCart', None) or getattr(mic, 'plotGlobal', None)) if shiftPlot is not None: shiftPath = "" if shiftPlot is None else abspath(shiftPlot.getFileName()) shiftCopy = "" if shiftPlot is None else join(SHIFT_THUMBS, pwutils.replaceExt(basename(shiftPath), ext)) self.thumbPaths[SHIFT_PATH].append(shiftPath) self.thumbPaths[SHIFT_THUMBS].append(shiftCopy) else: if SHIFT_PATH in self.thumbPaths: self.thumbPaths.pop(SHIFT_PATH, None) if SHIFT_THUMBS in self.thumbPaths: self.thumbPaths.pop(SHIFT_THUMBS, None) self.thumbPaths[MIC_ID].append(micId) if self.ctfProtocol is None: def getMicPSDPath(mic): if hasattr(mic, 'psdJpeg'): return mic.psdJpeg.getFileName() elif hasattr(mic, 'psdCorr'): return mic.psdCorr.getFileName() else: return None psdPath = getMicPSDPath(mic) psdThumb = None if psdPath is None: psdThumb = join(PSD_THUMBS, pwutils.replaceExt(basename(str(psdPath)), ext)) self.thumbPaths[PSD_THUMBS].append(psdThumb) self.thumbPaths[PSD_PATH].append(psdPath) else: if PSD_THUMBS in self.thumbPaths: self.thumbPaths.pop(PSD_THUMBS, None) if PSD_PATH in self.thumbPaths: self.thumbPaths.pop(PSD_PATH, None)
[docs] def generateReportImages(self, firstThumbIndex=0, micScaleFactor=6): """ Function to generate thumbnails for the report. Uses data from self.thumbPaths. ===== Params ===== - firstThumbIndex: index from which we start generating thumbnails - micScaleFactor: how much to reduce in size the micrographs. """ ih = ImageHandler() numMics = len(self.thumbPaths[MIC_PATH]) for i in range(firstThumbIndex, numMics): print('Generating images for mic %d' % (i+1)) # mic thumbnails dstImgPath = join(self.reportDir, self.thumbPaths[MIC_THUMBS][i]) if not exists(dstImgPath): if self.micThumbSymlinks: pwutils.copyFile(self.thumbPaths[MIC_PATH][i], dstImgPath) else: ih.computeThumbnail(self.thumbPaths[MIC_PATH][i], dstImgPath, scaleFactor=micScaleFactor, flipOnY=True) # shift plots if SHIFT_THUMBS in self.thumbPaths: dstImgPath = join(self.reportDir, self.thumbPaths[SHIFT_THUMBS][i]) if not exists(dstImgPath): pwutils.copyFile(self.thumbPaths[SHIFT_PATH][i], dstImgPath) # Psd thumbnails # If there ARE thumbnail for the PSD (no ctf protocol and # moviealignment hasn't computed it if PSD_THUMBS in self.thumbPaths: if self.ctfProtocol is None: srcImgPath = self.thumbPaths[PSD_PATH][i] dstImgPath = join(self.reportDir, self.thumbPaths[PSD_THUMBS][i]) if not exists(dstImgPath) and srcImgPath is not None: if srcImgPath.endswith('psd'): psdImg1 = ih.read(srcImgPath) psdImg1.convertPSD() psdImg1.write(dstImgPath) ih.computeThumbnail(dstImgPath, dstImgPath, scaleFactor=1, flipOnY=True) else: pwutils.copyFile(srcImgPath, dstImgPath) else: dstImgPath = join(self.reportDir, self.thumbPaths[PSD_THUMBS][i]) if not exists(dstImgPath): ih.computeThumbnail(self.thumbPaths[PSD_PATH][i], dstImgPath, scaleFactor=1, flipOnY=True) return
[docs] def processDefocusValues(self, defocusList): maxDefocus = self.protocol.maxDefocus.get()*1e-4 minDefocus = self.protocol.minDefocus.get()*1e-4 # Convert defocus values to microns defocusList = [i*1e-4 for i in defocusList] edges = np.arange(0, maxDefocus+DEFOCUS_HIST_BIN_WIDTH, DEFOCUS_HIST_BIN_WIDTH) edges = np.insert(edges[edges > minDefocus], 0, minDefocus) values, binEdges = np.histogram(defocusList, bins=edges, range=(minDefocus, maxDefocus)) belowThresh = 0 aboveThresh = 0 labels = ["%0.1f-%0.1f" % (x[0], x[1]) for x in zip(binEdges, binEdges[1:])] for v in defocusList: if v < minDefocus: belowThresh += 1 elif v > maxDefocus: aboveThresh += 1 zipped = list(zip(values, labels)) zipped[:0] = [(belowThresh, "0-%0.1f" % minDefocus)] # TODO unresolved method for class Iterator in python3 # zipped.append((aboveThresh, "> %0.1f" % (maxDefocus))) return zipped
[docs] def getTimeSeries(self, data): from .protocol_monitor_ctf import (PHASE_SHIFT, TIME_STAMP, DEFOCUS_U, RESOLUTION) # Get timeStamp ts = data[TIME_STAMP] timeSeries = dict() # Get phaseShift phaseShiftSerie = data[PHASE_SHIFT] phaseShiftSerie = list(zip(ts, phaseShiftSerie)) # Add it to the series timeSeries[PHASE_SHIFT] = phaseShiftSerie # Get defocusU is coming in Å, reduce it to μm defocusSerie = data[DEFOCUS_U] defocusSerie = [i * 1e-4 for i in defocusSerie] defocusSerie = list(zip(ts, defocusSerie)) # Add it to the series timeSeries[DEFOCUS_U] = defocusSerie # Get Resolution resSerie = data[RESOLUTION] resSerie = list(zip(ts, resSerie)) # Add it to the series timeSeries[RESOLUTION] = resSerie return timeSeries
[docs] def getResolutionHistogram(self, resolutionValues): if len(resolutionValues) == 0: return [] maxValue = int(np.ceil(max(resolutionValues))) edges = np.append(np.arange(0, maxValue, RESOLUTION_HIST_BIN_WIDTH), maxValue) values, binEdges = np.histogram(resolutionValues, bins=edges, range=(0, maxValue)) return list(zip(values, binEdges))
[docs] def generate(self, finished): reportTemplate = self.getHTMLReportText() if not reportTemplate: raise Exception("HTML template file '%s' not found. " % self.template) project = self.protocol.getProject() projName = project.getShortName() acquisitionLines = '' self.provider.refreshObjects() for item in self.provider.acquisition: if not acquisitionLines == '': acquisitionLines += ',' acquisitionLines += '{propertyName:"%s", propertyValue:"%s"}' % item runLines = '' wasProtocol = None for obj in self.provider.getObjects(): # If it's a protocol isProtocol = True if obj.name else False if isProtocol: if runLines != '': runLines += ']},' runLines += '{protocolName: "%s", output:[' % obj.name else: if not wasProtocol: runLines += ',' runLines += '{name: "%s", size:"%s"}' % (obj.output, obj.outSize) wasProtocol = isProtocol # End the runLines JSON object runLines += ']}' # Ctf monitor chart data data = {} if self.ctfMonitor is None else self.ctfMonitor.getData() if data: numMicsDone = len(self.thumbPaths[PSD_THUMBS]) numMics = len(data[PSD_PATH]) numMicsToDo = numMics - numMicsDone self.getThumbPaths(ctfData=data, thumbsDone=numMicsDone, micIdSet=data['idValues']) if len(data['defocusU']) < 100: data['defocusCoverage'] = self.processDefocusValues(data['defocusU']) else: data['defocusCoverage'] = self.processDefocusValues(data['defocusU'][:-50]) data['defocusCoverageLast50'] = self.processDefocusValues(data['defocusU'][-50:]) data['resolutionHistogram'] = self.getResolutionHistogram(data['resolution']) data['timeSeries'] = self.getTimeSeries(data) else: # Thumbnails for Micrograph Table numMicsDone = len(self.thumbPaths[MIC_THUMBS]) self.getThumbPaths(thumbsDone=numMicsDone) numMics = len(self.thumbPaths[MIC_PATH]) numMicsToDo = numMics - numMicsDone if numMicsToDo <= 10: # we have few new images, eg streaming mode, generate thumbnails now self.generateReportImages(firstThumbIndex=numMicsDone) else: # we have many images, generate thumbs in a separate process process = multiprocessing.Process(target=self.generateReportImages, args=(numMicsDone,)) process.start() # send over only thumbnails of the mics that have been fully processed self.thumbsReady = self.checkNewThumbsReady() thumbsLoading = numMics - self.thumbsReady for k in [MIC_THUMBS, SHIFT_THUMBS, PSD_THUMBS]: if k in self.thumbPaths: data[k] = self.thumbPaths[k][:self.thumbsReady] + ['']*thumbsLoading data[MIC_ID] = self.thumbPaths[MIC_ID] reportFinished = self.thumbsReady == numMics def convert(o): if isinstance(o, np.int64): return int(o) raise TypeError ctfData = json.dumps(data, default=convert) # Movie gain monitor chart data data = [] if self.movieGainMonitor is None else self.movieGainMonitor.getData() movieGainData = json.dumps(data) # system monitor chart data data = self.sysMonitor.getData() systemData = json.dumps(data, default=convert) tnow = datetime.now() args = {'projectName': projName, 'startTime': pwutils.dateStr(project.getCreationTime(), secs=True), 'dateStr': pwutils.prettyTime(dt=tnow, secs=True), 'projectDuration': pwutils.prettyDelta(tnow-project.getCreationTime()), 'projectStatus': "FINISHED" if finished else "RUNNING", 'scipionVersion': os.environ['SCIPION_VERSION'], 'acquisitionLines': acquisitionLines, 'runLines': runLines, 'ctfData': ctfData, 'movieGainData': movieGainData, 'systemData': systemData, 'refresh': self.refreshSecs } self.info("Writing report html to: %s" % abspath(self.reportPath)) pwutils.cleanPath(self.reportPath) reportFile = open(self.reportPath, 'w', encoding="utf-8") reportTemplate = reportTemplate % args reportFile.write(reportTemplate) reportFile.close() if self.publishCmd: self.info("Publishing the report:") cmd = self.publishCmd % {'REPORT_FOLDER': self.reportDir} self.info(cmd) os.system(cmd) return reportFinished