Source code for xmipp3.protocols.protocol_screen_deeplearning

# **************************************************************************
# *
# * Authors:  Ruben Sanchez (rsanchez@cnb.csic.es), April 2017
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import os
import re

from pyworkflow import VERSION_2_0
from pyworkflow.utils.path import copyTree
import pyworkflow.protocol.params as params
from pwem.protocols import ProtProcessParticles
import pwem.emlib.metadata as md

from ..convert import writeSetOfParticles, setXmippAttributes
from ..base import XmippProtocol

N_MAX_NEG_SETS = 5


[docs]class XmippProtScreenDeepLearning(ProtProcessParticles, XmippProtocol): """ Protocol for screening particles using deep learning. """ _label = 'screen deep learning' _lastUpdateVersion = VERSION_2_0 _conda_env = 'xmipp_DLTK_v0.3' # --------------------------- DEFINE param functions -------------------------------------------- def _defineParams(self, form): # GPU settings form.addHidden(params.USE_GPU, params.BooleanParam, default=True, expertLevel=params.LEVEL_ADVANCED, label="Use GPU (vs CPU)", help="Set to true if you want to use GPU implementation") form.addHidden(params.GPU_LIST, params.StringParam, default='0', expertLevel=params.LEVEL_ADVANCED, label="Choose GPU IDs", help="GPU may have several cores. Set it to zero" " if you do not know what we are talking about." " First core index is 0, second 1 and so on.") form.addParallelSection(threads=2, mpi=0) form.addSection(label='Input') form.addParam('doContinue', params.BooleanParam, default=False, label='Use previously trained model?', help='If you set to *Yes*, you should select a previous ' 'run of type *%s* class and some of the input parameters ' 'will be taken from it.' % self.getClassName()) form.addParam('continueRun', params.PointerParam, label='Select previous run', allowsNull=True, condition='doContinue', pointerClass=self.getClassName(), help='Select a previous run to continue from.') form.addParam('keepTraining', params.BooleanParam, label='Continue training on previously trainedModel?', default=True, condition='doContinue', help='If you set to *Yes*, you should provide training set') form.addParam('inTrueSetOfParticles', params.PointerParam, label="True particles", pointerClass='SetOfParticles', allowsNull=True, condition="not doContinue or keepTraining", help='Select a set of particles that contains mostly true particles') form.addParam('numberOfNegativeSets', params.IntParam, label='Number of different negative dataset', default='1', condition="not doContinue or keepTraining", help='Data from all negative datasets will be used for training. ' 'Maximun number is 4.\n') for num in range(1, N_MAX_NEG_SETS): form.addParam('negativeSet_%d' % num, params.PointerParam, label="Set of negative train particles %d" % num, condition='(numberOfNegativeSets<=0 or numberOfNegativeSets >=%d) ' 'and (not doContinue or keepTraining)' % num, pointerClass='SetOfParticles', allowsNull=True, help='Select the set of negative particles for training.') form.addParam('inNegWeight_%d' % num, params.IntParam, label="Weight of negative train particles %d" % num, expertLevel=params.LEVEL_ADVANCED, default='1', allowsNull=True, condition='(numberOfNegativeSets<=0 or numberOfNegativeSets >=%d) and ' '(not doContinue or keepTraining)' % num, help='Select the weigh for the negative set of particles. ' 'The weight value indicates the number of times ' 'each image may be included at most per epoch. ' 'Positive particles are weighted with 1. ' 'If weight is -1, weight will be calculated such ' 'that the contribution of additional data is ' 'equal to the contribution of positive particles') form.addParam('predictSetOfParticles', params.PointerParam, label="Set of putative particles to score", pointerClass='SetOfParticles', help='Select the set of putative particles to classify as good (score close ' 'to 1.0) or bad (score close to 0.0).') form.addSection(label='Training') form.addParam('nEpochs', params.FloatParam, label="Number of epochs", default=5.0, condition="not doContinue or keepTraining", help='Number of epochs for neural network training.') form.addParam('learningRate', params.FloatParam, label="Learning rate", default=1e-4, condition="not doContinue or keepTraining", help='Learning rate for neural network training') form.addParam('auto_stopping', params.BooleanParam, label='Auto stop training when convergence is detected?', default=True, condition="not doContinue or keepTraining", help='If you set to *Yes*, the program will automatically ' 'stop training if there is no improvement for ' 'consecutive 2 epochs, learning rate will be ' 'decreased by a factor 10. ' 'If learningRate_t < 0.01*learningrate_0 training will stop. ' 'Warning: Sometimes convergence seems to be reached, ' 'but after time, improvement can still happen. ' 'Not recommended for very small data sets (<100 true particles)') form.addParam('l2RegStrength', params.FloatParam, label="Regularization strength", default=1e-5, expertLevel=params.LEVEL_ADVANCED, condition="not doContinue or keepTraining", help='L2 regularization for neural network weights.' 'Make it bigger if suffering overfitting. ' 'Typical values range from 1e-1 to 1e-6') form.addParam('nModels', params.IntParam, label="Number of models for ensemble", default=2, expertLevel=params.LEVEL_ADVANCED, condition="not doContinue", help='Number of models to fit in order to build an ensemble. ' 'Tipical values are 1 to 5. The more the better ' 'until a point where no gain is obtained. ' 'Each model increases running time linearly') form.addParam('doTesting', params.BooleanParam, default=False, label='Perform testing after training?', expertLevel=params.LEVEL_ADVANCED, help='If you set to *Yes*, you should select a testing ' 'positive set and a testing negative set') form.addParam('testPosSetOfParticles', params.PointerParam, label="Set of positive test particles", expertLevel=params.LEVEL_ADVANCED, pointerClass='SetOfParticles', condition='doTesting', help='Select the set of ground true positive particles.') form.addParam('testNegSetOfParticles', params.PointerParam, label="Set of negative test particles", expertLevel=params.LEVEL_ADVANCED, pointerClass='SetOfParticles', condition='doTesting', help='Select the set of ground false positive particles.') def _validate(self): return self.validateDLtoolkit() # --------------------------- INSERT steps functions -------------------------------------------- def _insertAllSteps(self): """ """ def _getFname2WeightDict(fnameToSetAndWeight): """ arg: fnameToSetAndWeight= { fname: [(SetOfParticles, weight:int)]} return: { fname: { weight:int } """ if fnameToSetAndWeight is None: return None dictONameToWeight = {fname: fnameToSetAndWeight[fname][-1] for fname in fnameToSetAndWeight if not fnameToSetAndWeight[fname][0] is None} if len(dictONameToWeight) == 0: return None else: return dictONameToWeight posSetTrainDict = {self._getExtraPath("inputTrueParticlesSet.xmd"): 1} negSetTrainDict = {} for num in range(1, N_MAX_NEG_SETS): if self.numberOfNegativeSets <= 0 or self.numberOfNegativeSets >= num: negSetTrainDict[self._getExtraPath("negativeSet_%d.xmd" % num)] = \ self.__dict__["inNegWeight_%d" % num].get() setPredictDict = {self._getExtraPath("predictSetOfParticles.xmd"): 1} if self.doTesting.get() and self.testPosSetOfParticles.get() and self.testNegSetOfParticles.get(): setTestPosDict = {self._getExtraPath("testTrueParticlesSet.xmd"): 1} setTestNegDict = {self._getExtraPath("testFalseParticlesSet.xmd"): 1} else: setTestPosDict = None setTestNegDict = None self._insertFunctionStep('convertInputStep', posSetTrainDict, negSetTrainDict, setPredictDict, setTestPosDict, setTestNegDict) if not self.doContinue.get() or self.keepTraining.get(): self._insertFunctionStep('train', posSetTrainDict, negSetTrainDict) self._insertFunctionStep('predict', setTestPosDict, setTestNegDict, setPredictDict) self._insertFunctionStep('createOutputStep') # --------------------------- STEPS functions -------------------------------
[docs] def convertInputStep(self, *dataDicts): def __getSetOfParticlesFromFname(fname): if fname == self._getExtraPath("inputTrueParticlesSet.xmd"): return self.inTrueSetOfParticles.get() elif fname == self._getExtraPath("predictSetOfParticles.xmd"): return self.predictSetOfParticles.get() elif fname == self._getExtraPath("testTrueParticlesSet.xmd"): return self.testPosSetOfParticles.get() elif fname == self._getExtraPath("testFalseParticlesSet.xmd"): return self.testNegSetOfParticles.get() else: matchOjb = re.match(self._getExtraPath(r"negativeSet_(\d+).xmd"), fname) if matchOjb: num = matchOjb.group(1) return self.__dict__["negativeSet_%s" % num].get() else: raise ValueError("Error, unexpected fname") if ((not self.doContinue.get() or self.keepTraining.get()) and self.nEpochs.get() > 0): assert not self.inTrueSetOfParticles.get() is None, \ "Positive particles must be provided for training if nEpochs!=0" for dataDict in dataDicts: if not dataDict is None: for fnameParticles in sorted(dataDict): setOfParticles = __getSetOfParticlesFromFname(fnameParticles) writeSetOfParticles(setOfParticles, fnameParticles)
def __dataDict_toStrs(self, dataDict): fnamesStr = [] weightsStr = [] for fname in dataDict: fnamesStr.append(fname) weightsStr.append(str(dataDict[fname])) return ":".join(fnamesStr), ":".join(weightsStr)
[docs] def train(self, posTrainDict, negTrainDict): """ posTrainDict, negTrainDict: { fnameToMetadata: weight (int) } """ nEpochs = self.nEpochs.get() netDataPath = self._getExtraPath('nnetData') if self.doContinue.get(): prevRunPath = self.continueRun.get()._getExtraPath('nnetData') copyTree(prevRunPath, netDataPath) if not self.keepTraining.get(): nEpochs = 0 if self.usesGpu(): numberOfThreads = None gpuToUse = self.getGpuList()[0] else: numberOfThreads = self.numberOfThreads.get() gpuToUse = None fnamesPos, weightsPos = self.__dataDict_toStrs(posTrainDict) fnamesNeg, weightsNeg = self.__dataDict_toStrs(negTrainDict) args = " -n %s --mode train -p %s -f %s --trueW %s --falseW %s" % (netDataPath, fnamesPos, fnamesNeg, weightsPos, weightsNeg) args += " -e %s -l %s -r %s -m %s " % (nEpochs, self.learningRate.get(), self.l2RegStrength.get(), self.nModels.get()) if not self.auto_stopping.get(): args += " -s" if not gpuToUse is None: args += " -g %s" % (gpuToUse) if not numberOfThreads is None: args += " -t %s" % (numberOfThreads) self.runJob('xmipp_deep_consensus', args, numberOfMpi=1, env=self.getCondaEnv())
[docs] def predict(self, posTestDict, negTestDict, predictDict): """ posTestDict, negTestDict, predictDict: { fnameToMetadata: { weight:int } """ netDataPath = self._getExtraPath('nnetData') if not os.path.isdir(netDataPath) and self.doContinue.get(): prevRunPath = self.continueRun.get()._getExtraPath('nnetData') copyTree(prevRunPath, netDataPath) if self.usesGpu(): numberOfThreads = None gpuToUse = self.getGpuList()[0] else: numberOfThreads = self.numberOfThreads.get() gpuToUse = None outParticlesPath = self._getPath("particles.xmd") fnamesPred, weightsPred = self.__dataDict_toStrs(predictDict) args = " -n %s --mode score -i %s -o %s " % (netDataPath, fnamesPred, outParticlesPath) if posTestDict and negTestDict: fnamesPosTest, weightsPosTest = self.__dataDict_toStrs(posTestDict) fnamesNegTest, weightsNegTest = self.__dataDict_toStrs(negTestDict) args += " --testingTrue %s --testingFalse %s " % (fnamesPosTest, fnamesNegTest) if not gpuToUse is None: args += " -g %s" % (gpuToUse) if not numberOfThreads is None: args += " -t %s" % (numberOfThreads) self.runJob('xmipp_deep_consensus', args, numberOfMpi=1, env=self.getCondaEnv())
[docs] def createOutputStep(self): imgSet = self.predictSetOfParticles.get() partSet = self._createSetOfParticles() partSet.copyInfo(imgSet) partSet.copyItems(imgSet, updateItemCallback=self._updateParticle, itemDataIterator=md.iterRows(self._getPath("particles.xmd"), sortByLabel=md.MDL_ITEM_ID)) self._defineOutputs(outputParticles=partSet) self._defineSourceRelation(imgSet, partSet)
# --------------------------- INFO functions -------------------------------------------- def _summary(self): summary = [] return summary def _methods(self): pass # --------------------------- UTILS functions -------------------------------------------- def _updateParticle(self, item, row): setXmippAttributes(item, row, md.MDL_ZSCORE_DEEPLEARNING1) if row.getValue(md.MDL_ENABLED) <= 0: item._appendItem = False else: item._appendItem = True