Source code for xmipp3.protocols.protocol_split_volume_hierarchical_cluster

# **************************************************************************
# *
# * Authors:     C.O.S. Sorzano (coss@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

from os.path import join, exists
import math
import os
from shutil import copy

import pyworkflow.protocol.params as params
from pyworkflow import BETA, UPDATED, NEW, PROD

from pyworkflow import VERSION_2_0
from pyworkflow.utils.path import makePath, cleanPattern, moveFile, cleanPath
from pwem.emlib.image import ImageHandler
from pwem.constants import ALIGN_PROJ
from pwem.objects import Image, Volume
from pwem.protocols import ProtAnalysis3D
import pwem.emlib.metadata as md

from xmipp3.convert import (createItemMatrix, writeSetOfParticles,
                            rowToAlignment, setXmippAttributes, xmippToLocation)

from pwem import emlib
from xmipp3.base import findRow, writeInfoField, readInfoField, isXmippCudaPresent
from xmipp3.constants import SYM_URL, CUDA_ALIGN_SIGNIFICANT
import numpy as np


[docs]class XmippProtSplitVolumeHierarchical(ProtAnalysis3D):
    """
    Construct image groups based on the angular assignment. All images assigned within a solid angle
    are assigned to a class. Classes are not exclusive and an image may be assigned to multiple classes
    """

    _label = 'split volume hierarchical'
    _lastUpdateVersion = VERSION_2_0
    _devStatus = BETA

    def __init__(self, *args, **kwargs):
        ProtAnalysis3D.__init__(self, *args, **kwargs)

    # --------------------------- DEFINE param functions ------------------------
    def _defineParams(self, form):
        form.addHidden(params.USE_GPU, params.BooleanParam, default=True,
                       label="Use GPU for execution",
                       help="This protocol has both CPU and GPU implementation.\
                       Select the one you want to use.")

        form.addHidden(params.GPU_LIST, params.StringParam, default='0',
                       expertLevel=params.LEVEL_ADVANCED,
                       label="Choose GPU IDs",
                       help="Add a list of GPU devices that can be used")

        form.addSection(label='Input')

        form.addParam('inputVolume', params.PointerParam, pointerClass='Volume',
                      label="Input volume",
                      help='Select the input volume.')

        form.addParam('inputParticles', params.PointerParam,
                      pointerClass='SetOfParticles',
                      pointerCondition='hasAlignmentProj',
                      label="Input particles",
                      help='Select the input experimental images with an '
                           'angular assignment.')

        form.addParam('symmetryGroup', params.StringParam, default='c1',
                      label="Symmetry group",
                      help='See %s page for a description of the symmetries '
                           'accepted by Xmipp' % SYM_URL)

        form.addParam('angularSampling', params.FloatParam, default=5,
                      label='Angular sampling',
                      expertLevel=params.LEVEL_ADVANCED, help="In degrees")

        form.addParam('angularDistance', params.FloatParam, default=10,
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Angular distance',
                      help="In degrees. An image belongs to a group if its "
                           "distance is smaller than this value")

        form.addParam('maxShift', params.FloatParam, default=15,
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Maximum shift',
                      help="In pixels")


        form.addSection("Directional Classes")

        form.addParam('directionalClasses', params.IntParam, default=1,
                      label='Number of directional classes',
                      help="By default only one class will be computed for "
                           "each projection direction. More classes could be"
                           "computed and this is needed for protocols "
                           "split-volume. ")

        form.addParam('homogeneize', params.IntParam, default=-1,
                      label='Homogeneize groups',
                      condition="directionalClasses==1",
                      help="Set to -1 for no homogeneization. Set to 0 for homogeneizing "
                           "to the minimum of class size. Set to any other number to "
                           "homogeneize to that particular number")

        form.addParam('targetResolution', params.FloatParam, default=10,
                      condition="directionalClasses > 1",
                      label='Target resolution (A)')

        form.addParam('class2dIterations', params.IntParam, default=5,
                      expertLevel=params.LEVEL_ADVANCED,
                      condition="directionalClasses > 1",
                      label='Number of 2D classification iterations')

        form.addParam('maxCLimgs', params.IntParam, default=5000,
                      condition="directionalClasses > 1",
                      expertLevel=params.LEVEL_ADVANCED,
                      label='Max. Number of images per cone',
                      help='If there are more than this number of images in a cone, '
                           'then a random subset of this size is taken. Set to -1'
                           'to disable this option.')

        form.addSection("Split volume")
        form.addParam('splitVolume', params.BooleanParam, label="Split volume",
                      condition="directionalClasses > 1", default=False,
                      help='If desired, the protocol can use the directional classes calculated in this protocol to divide the input volume '
                           'into 2 distinct 3D classes as measured by PCA. If the PCA component is just noise, it means that the algorithm '
                           'does not find a difference between the 2D classes')
        form.addParam('Niter', params.IntParam,
                      label="Number of iterations", default=5000,
                      condition="splitVolume",
                      expertLevel=params.LEVEL_ADVANCED,
                      help="Number of iterations to perform the volume splitting.")
        form.addParam('Nrec', params.IntParam,
                      label="Number of reconstructions", default=5,
                      condition="splitVolume",
                      expertLevel=params.LEVEL_ADVANCED,
                      help="Number of reconstructions to perform the hierarchical clustering.")
        form.addParam('fr_approx', params.BooleanParam,
                      label="Approximative reconstruction", default=True,
                      condition="splitVolume",
                      expertLevel=params.LEVEL_ADVANCED,
                      help="If on, an approximation of the Fourier reconstruction algorithm will be used. " \
                           "This will result in faster processing times, but (slightly) less precise result")
        form.addParam('fr_gpu_mpi', params.IntParam,
                      label="Reconstruction GPU MPI", default=1, validators=[params.GE(1,'Error must be greater than 1')],
                      condition="splitVolume",
                      expertLevel=params.LEVEL_ADVANCED,
                      help="Number of MPI processes used for the GPU version of the Fourier Reconstruction")
        form.addParam('fr_gpu_threads', params.IntParam,
                      label="Reconstruction threads", default=1, validators=[params.GE(1,'Error must be greater than 1')],
                      condition="splitVolume",
                      expertLevel=params.LEVEL_ADVANCED,
                      help="Number of threads used for the GPU version of the Fourier Reconstruction")

        form.addParallelSection(threads=0, mpi=8)

    # --------------------------- INSERT steps functions ------------------------

    def _insertAllSteps(self):
        self._insertFunctionStep('convertInputStep',
                                 self.inputParticles.get().getObjId(),
                                 self.inputVolume.get().getObjId())

        self._insertFunctionStep('constructGroupsStep',
                                 self.inputParticles.get().getObjId(),
                                 self.angularSampling.get(),
                                 self.angularDistance.get(),
                                 self.symmetryGroup.get())

        self._insertFunctionStep('classifyGroupsStep')
        if self.directionalClasses.get() == 1 and self.homogeneize.get() >= 0:
            self._insertFunctionStep('homogeneizeStep')

        self._insertFunctionStep('refineAnglesStep')

        if self.splitVolume and self.directionalClasses.get() > 1:
            self._insertFunctionStep("splitVolumeStep")

        self._insertFunctionStep('cleaningStep')
        self._insertFunctionStep('createOutputStep')

    # --------------------------- STEPS functions -------------------------------
[docs]    def convertInputStep(self, particlesId, volId):
        """ Write the input images as a Xmipp metadata file.
        particlesId: is only need to detect changes in
        input particles and cause restart from here.
        """
        inputParticles = self.inputParticles.get()
        inputVolume = self.inputVolume.get()

        writeSetOfParticles(inputParticles, self._getExpParticlesFn())

        img = ImageHandler()
        img.convert(inputVolume, self._getInputVolFn())

        if self._useSeveralClasses():
            # Scale particles
            Xdim = inputParticles.getXDim()
            Ts = inputParticles.getSamplingRate()
            newTs = self.targetResolution.get() * 0.4
            newTs = max(Ts, newTs)
            newXdim = int(Xdim * Ts / newTs)
            writeInfoField(self._getExtraPath(), "sampling",
                           emlib.MDL_SAMPLINGRATE, newTs)
            writeInfoField(self._getExtraPath(), "size", emlib.MDL_XSIZE,
                           newXdim)
            self.runJob("xmipp_image_resize",
                        "-i %s -o %s --save_metadata_stack %s --fourier %d" %
                        (self._getExpParticlesFn(),
                         self._getTmpPath('scaled_particles.stk'),
                         self._getTmpPath('scaled_particles.xmd'),
                         newXdim))
            # Scale volume
            Xdim = inputVolume.getXDim()
            if Xdim != newXdim:
                self.runJob("xmipp_image_resize", "-i %s --dim %d"
                            % (self._getInputVolFn(), newXdim), numberOfMpi=1)

[docs]    def constructGroupsStep(self, particlesId, angularSampling,
                            angularDistance, symmetryGroup):

        args = '-i %s ' % self._getInputVolFn()
        args += '-o %s ' % self._getExtraPath("gallery.stk")
        args += '--sampling_rate %f ' % self.angularSampling
        args += '--sym %s ' % self.symmetryGroup
        args += '--method fourier 1 0.25 bspline --compute_neighbors '
        args += '--angular_distance %f ' % self.angularDistance
        args += '--experimental_images %s ' % self._getInputParticlesFn()
        args += '--max_tilt_angle 90 '

        # Create a gallery of projections of the input volume
        # with the given angular sampling
        self.runJob("xmipp_angular_project_library", args)

        args = '--i1 %s ' % self._getInputParticlesFn()
        args += '--i2 %s ' % self._getExtraPath("gallery.doc")
        args += '-o %s ' % self._getExtraPath("neighbours.xmd")
        args += '--dist %f ' % self.angularDistance
        args += '--sym %s ' % self.symmetryGroup
        args += '--check_mirrors '

        # Compute several groups of the experimental images into
        # different angular neighbourhoods
        self.runJob("xmipp_angular_neighbourhood", args, numberOfMpi=1)

[docs]    def classifyOneGroup(self, projNumber, projMdBlock, projRef,
                         mdClasses, mdImages):
        """ Classify one of the neighbourhood groups if not empty.
         Class information will be stored in output metadata: mdOut
        """
        blockSize = md.getSize(projMdBlock)
        fnToUse = projMdBlock
        if self.maxCLimgs>0 and blockSize>self.maxCLimgs:
            fnToUSe = self._getTmpPath("coneImages.xmd")
            self.runJob("xmipp_metadata_utilities","-i %s -o %s --operate random_subset %d"\
                        %(projMdBlock,fnToUSe,self.maxCLimgs),numberOfMpi=1)
        Nclasses = self.directionalClasses.get()
        Nlevels = int(math.ceil(math.log(Nclasses) / math.log(2)))

        # Skip projection directions with not enough images to
        # create a given number of classes
        if blockSize / Nclasses < 10:
            return

        if self.useGpu.get():
            count=0
            GpuListCuda=''
            if self.useQueueForSteps() or self.useQueue():
                GpuList = os.environ["CUDA_VISIBLE_DEVICES"]
                GpuList = GpuList.split(",")
                for elem in GpuList:
                    GpuListCuda = GpuListCuda+str(count)+' '
                    count+=1
            else:
                GpuList = ' '.join([str(elem) for elem in self.getGpuList()])
                GpuListAux = ''
                for elem in self.getGpuList():
                    GpuListCuda = GpuListCuda+str(count)+' '
                    GpuListAux = GpuListAux+str(elem)+','
                    count+=1
                os.environ["CUDA_VISIBLE_DEVICES"] = GpuListAux

            fnDir = self._getExtraPath("direction_%s" % projNumber)
            if not exists(fnDir):
                makePath(fnDir)

            for i in range(self.class2dIterations.get()):
                mdRefName = join(fnDir, 'reference.xmd')
                if i==0:
                    mdRef = md.MetaData()
                    for j in range(2):
                        row = md.Row()
                        row.setValue(md.MDL_REF, j+1)
                        row.setValue(md.MDL_IMAGE, projRef)
                        row.addToMd(mdRef)
                    mdRef.write(mdRefName, emlib.MD_APPEND)
                else:
                    mdRefName = join(fnDir,"level_%02d"%(i-1), "class_classes.xmd")
                if not exists(join(fnDir,"level_%02d"%i)):
                    makePath(join(fnDir,"level_%02d"%i))
                args = '-i %s -r %s -o images.xmd --odir %s' \
                       ' --keepBestN 1 --oUpdatedRefs %s ' % (fnToUse, mdRefName, join(fnDir,"level_%02d"%i), 'class_classes')
                args += ' --dev %s ' %GpuListCuda
                self.runJob(CUDA_ALIGN_SIGNIFICANT, args, numberOfMpi=1)
            copy(join(fnDir,"level_%02d"%(self.class2dIterations.get()-1), "images.xmd"), join(fnDir,"images.xmd"))

            # After classification the stk and xmd files should be produced
            classesXmd = join(fnDir, "level_%02d/class_classes.xmd" % (self.class2dIterations.get()-1))
            classesStk = join(fnDir, "level_%02d/class_classes.stk" % (self.class2dIterations.get()-1))

        else:
            fnDir = self._getExtraPath("direction_%s" % projNumber)
            if not exists(join(fnDir,"level_00")):
                makePath(fnDir)
                # Run CL2D classification for the images assigned to one direction
                args = "-i %s " % fnToUse
                args += "--odir %s " % fnDir
                args += "--ref0 %s --iter %d --nref %d " % \
                        (projRef, self.class2dIterations, Nclasses)
                args += "--distance correlation --classicalMultiref "
                args += "--maxShift %f " % self.maxShift
                try:
                    self.runJob("xmipp_classify_CL2D", args, numberOfMpi=self.numberOfMpi.get() * self.numberOfThreads.get())
                except:
                    return

            # After CL2D the stk and xmd files should be produced
            classesXmd = join(fnDir, "level_%02d/class_classes.xmd" % Nlevels)
            classesStk = join(fnDir, "level_%02d/class_classes.stk" % Nlevels)

        # Let's check that the output was produced
        if not exists(classesStk):
            return

        # Run align of the class average and the projection representative
        fnAlignRoot = join(fnDir, "classes")
        args = "-i %s " % classesStk
        args += "--ref %s " % projRef
        args += " --oroot %s --iter 1" % fnAlignRoot
        self.runJob("xmipp_image_align", args, numberOfMpi=1)

        # Apply alignment
        args = "-i %s_alignment.xmd --apply_transform" % fnAlignRoot
        self.runJob("xmipp_transform_geometry", args, numberOfMpi=1)

        for classNo in range(1, Nclasses + 1):
            localImagesMd = emlib.MetaData("class%06d_images@%s"
                                           % (classNo, classesXmd))

            # New class detected
            self.classCount += 1
            # Check which images have not been assigned yet to any class
            # and assign them to this new class
            for objId in localImagesMd:
                imgId = localImagesMd.getValue(emlib.MDL_ITEM_ID, objId)
                # Add images not classify yet and store their class number
                if imgId not in self.classImages:
                    self.classImages.add(imgId)
                    newObjId = mdImages.addObject()
                    mdImages.setValue(emlib.MDL_ITEM_ID, imgId, newObjId)
                    mdImages.setValue(emlib.MDL_REF2, self.classCount, newObjId)

            newClassId = mdClasses.addObject()
            mdClasses.setValue(emlib.MDL_REF, projNumber, newClassId)
            mdClasses.setValue(emlib.MDL_REF2, self.classCount, newClassId)
            mdClasses.setValue(emlib.MDL_IMAGE, "%d@%s" %
                               (classNo, classesStk), newClassId)
            mdClasses.setValue(emlib.MDL_IMAGE1, projRef, newClassId)
            mdClasses.setValue(emlib.MDL_CLASS_COUNT, localImagesMd.size(),
                               newClassId)

[docs]    def classifyGroupsStep(self):
        # Create two metadatas, one for classes and another one for images
        mdClasses = emlib.MetaData()
        mdImages = emlib.MetaData()

        fnNeighbours = self._getExtraPath("neighbours.xmd")
        fnGallery = self._getExtraPath("gallery.stk")

        self.classCount = 0
        self.classImages = set()

        for block in emlib.getBlocksInMetaDataFile(fnNeighbours):
            # Figure out the projection number from the block name
            projNumber = int(block.split("_")[1])

            self.classifyOneGroup(projNumber,
                                  projMdBlock="%s@%s" % (block, fnNeighbours),
                                  projRef="%06d@%s" % (projNumber, fnGallery),
                                  mdClasses=mdClasses,
                                  mdImages=mdImages)

        galleryMd = emlib.MetaData(self._getExtraPath("gallery.doc"))
        # Increment the reference number to starts from 1
        galleryMd.operate("ref=ref+1")
        mdJoined = emlib.MetaData()
        # Add extra information from the gallery metadata
        mdJoined.join1(mdClasses, galleryMd, emlib.MDL_REF)
        # Remove unnecessary columns
        md.keepColumns(mdJoined, "ref", "ref2", "image", "image1",
                       "classCount", "angleRot", "angleTilt")

        # Write both classes and images
        fnDirectional = self._getDirectionalClassesFn()
        self.info("Writting classes info to: %s" % fnDirectional)
        mdJoined.write(fnDirectional)

        fnDirectionalImages = self._getDirectionalImagesFn()
        self.info("Writing images info to: %s" % fnDirectionalImages)
        mdImages.write(fnDirectionalImages)

[docs]    def homogeneizeStep(self):
        minClass = self.homogeneize.get()
        fnNeighbours = self._getExtraPath("neighbours.xmd")

        # Look for the block with the minimum number of images
        if minClass == 0:
            minClass = 1e38
            for block in emlib.getBlocksInMetaDataFile(fnNeighbours):
                projNumber = int(block.split("_")[1])
                fnDir = self._getExtraPath("direction_%d" % projNumber,
                                           "level_00", "class_classes.xmd")
                if exists(fnDir):
                    blockSize = md.getSize("class000001_images@" + fnDir)
                    if blockSize < minClass:
                        minClass = blockSize

        # Construct the homogeneized metadata
        mdAll = emlib.MetaData()
        mdSubset = emlib.MetaData()
        mdRandom = emlib.MetaData()
        for block in emlib.getBlocksInMetaDataFile(fnNeighbours):
            projNumber = int(block.split("_")[1])
            fnDir = self._getExtraPath("direction_%d" % projNumber, "level_00",
                                       "class_classes.xmd")
            if exists(fnDir):
                mdDirection = emlib.MetaData("class000001_images@" + fnDir)
                mdRandom.randomize(mdDirection)
                mdSubset.selectPart(mdRandom, 0,
                                    min(mdRandom.size(), minClass))
                mdAll.unionAll(mdSubset)
        mdAll.removeDuplicates(md.MDL_ITEM_ID)
        mdAll.sort(md.MDL_ITEM_ID)
        mdAll.fillConstant(md.MDL_PARTICLE_ID, 1)
        fnHomogeneous = self._getExtraPath("images_homogeneous.xmd")
        mdAll.write(fnHomogeneous)
        self.runJob("xmipp_metadata_utilities",
                    '-i %s --operate modify_values "particleId=itemId"' % fnHomogeneous,
                    numberOfMpi=1)

[docs]    def refineAnglesStep(self):
        fnTmpDir = self._getTmpPath()
        fnDirectional = self._getDirectionalClassesFn()
        inputParticles = self.inputParticles.get()
        newTs = readInfoField(self._getExtraPath(), "sampling",
                              emlib.MDL_SAMPLINGRATE)
        newXdim = readInfoField(self._getExtraPath(), "size",
                                emlib.MDL_XSIZE)

        # Generate projections
        fnGallery = join(fnTmpDir, "gallery.stk")
        fnGalleryMd = join(fnTmpDir, "gallery.doc")
        fnVol = self._getInputVolFn()
        args = "-i %s -o %s --sampling_rate %f --sym %s" % \
               (fnVol, fnGallery, 5.0, self.symmetryGroup)
        args += " --compute_neighbors --angular_distance -1 --experimental_images %s" % fnDirectional
        self.runJob("xmipp_angular_project_library", args,
                    numberOfMpi=self.numberOfMpi.get() * self.numberOfThreads.get())

        # Global angular assignment
        maxShift = 0.15 * newXdim
        fnAngles = join(fnTmpDir, "angles_iter001_00.xmd")
        if not self.useGpu.get():
            args = '-i %s --initgallery %s --maxShift %d --odir %s --dontReconstruct --useForValidation 0' % \
               (fnDirectional, fnGalleryMd, maxShift, fnTmpDir)
            self.runJob('xmipp_reconstruct_significant', args,
                    numberOfMpi=self.numberOfMpi.get() * self.numberOfThreads.get())
        else:
            count=0
            GpuListCuda=''
            if self.useQueueForSteps() or self.useQueue():
                GpuList = os.environ["CUDA_VISIBLE_DEVICES"]
                GpuList = GpuList.split(",")
                for elem in GpuList:
                    GpuListCuda = GpuListCuda+str(count)+' '
                    count+=1
            else:
                GpuList = ' '.join([str(elem) for elem in self.getGpuList()])
                GpuListAux = ''
                for elem in self.getGpuList():
                    GpuListCuda = GpuListCuda+str(count)+' '
                    GpuListAux = GpuListAux+str(elem)+','
                    count+=1
                os.environ["CUDA_VISIBLE_DEVICES"] = GpuListAux

            args = '-i %s -r %s -o %s --dev %s ' % (fnDirectional, fnGalleryMd, fnAngles, GpuListCuda)
            self.runJob(CUDA_ALIGN_SIGNIFICANT, args, numberOfMpi=1)

        self.runJob("xmipp_metadata_utilities",
                    "-i %s --operate drop_column ref" % fnAngles, numberOfMpi=1)
        self.runJob("xmipp_metadata_utilities",
                    "-i %s --set join %s ref2" % (fnAngles, fnDirectional),
                    numberOfMpi=1)

        # Local angular assignment
        fnAnglesLocalStk = self._getPath("directional_local_classes.stk")
        args = "-i %s -o %s --sampling %f --Rmax %d --padding %d --ref %s --max_resolution %f --applyTo image1 " % \
               (fnAngles, fnAnglesLocalStk, newTs, newXdim / 2, 2, fnVol,
                self.targetResolution)
        args += " --optimizeShift --max_shift %f" % maxShift
        args += " --optimizeAngles --max_angular_change %f" % self.angularDistance
        self.runJob("xmipp_angular_continuous_assign2", args,
                    numberOfMpi=self.numberOfMpi.get() * self.numberOfThreads.get())
        moveFile(self._getPath("directional_local_classes.xmd"),
                 self._getDirectionalClassesFn())

        cleanPattern(self._getExtraPath("direction_*"))

[docs]    def runReconstruction(self, fnXmd, fnVol):
        args = "-i %s -o %s --max_resolution 0.25 --sym %s -v 0" % \
               (fnXmd, fnVol, self.symmetryGroup.get())
        if self.fr_approx.get():
            args += " --fast"
        if self.useGpu.get():
            #AJ to make it work with and without queue system
            if self.numberOfMpi.get()>1:
                N_GPUs = len((self.gpuList.get()).split(','))
                args += ' -gpusPerNode %d' % N_GPUs
                args += ' -threadsPerGPU %d' % max(self.numberOfThreads.get(),4)
            count=0
            GpuListCuda=''
            if self.useQueueForSteps() or self.useQueue():
                GpuList = os.environ["CUDA_VISIBLE_DEVICES"]
                GpuList = GpuList.split(",")
                for elem in GpuList:
                    GpuListCuda = GpuListCuda+str(count)+' '
                    count+=1
            else:
                GpuListAux = ''
                for elem in self.getGpuList():
                    GpuListCuda = GpuListCuda+str(count)+' '
                    GpuListAux = GpuListAux+str(elem)+','
                    count+=1
                os.environ["CUDA_VISIBLE_DEVICES"] = GpuListAux
            if self.numberOfMpi.get()==1:
                args += ' --device %s' % GpuListCuda
            args += ' --thr %d' % self.fr_gpu_threads.get()
            if self.numberOfMpi.get()>1:
                self.runJob('xmipp_cuda_reconstruct_fourier', args, numberOfMpi=len((self.gpuList.get()).split(','))+1)
            else:
                self.runJob('xmipp_cuda_reconstruct_fourier', args)
        else:
            self.runJob('xmipp_reconstruct_fourier_accel', args)

[docs]    def splitVolumeStep(self):
        # TODO: This should be in a dedicated job: self.runJob(myScript.py, args)
        mdDirectional = md.MetaData(self._getDirectionalClassesFn())
        ref2vals = mdDirectional.getColumnValues(emlib.MDL_REF2)
        ref2Max = max(ref2vals)

        matrixCoOc=np.zeros((ref2Max, ref2Max))

        mpiCommand = self.getHostConfig().mpiCommand.get()
        mpiCommand2 = mpiCommand % {'JOB_NODES': self.numberOfMpi.get() * self.numberOfThreads.get(),
                                   'COMMAND': ''}
        # print(mpiCommand2)
        # print(self.numberOfMpi.get())
        # print(self.numberOfThreads.get())

        fails = 0
        for i in range(self.Nrec.get()):
            fnRoot = self._getExtraPath("split%06d"%i)
            args = "-i %s --oroot %s --Niter %d --sym %s --mpiCommand '%s'" % \
                   (self._getDirectionalClassesFn(), fnRoot,
                    self.Niter.get(), self.symmetryGroup.get(), mpiCommand2)
            try:
                self.runJob("xmipp_classify_first_split3", args, numberOfMpi=1)
            except:
                fails+=1
                continue

            ########### build co-ocurrence matrix ##############
            outMd = md.MetaData(fnRoot+'_avg1.xmd')
            listObjRef1=[]
            for id1 in outMd:
                refObj = outMd.getValue(emlib.MDL_REF2, id1)
                listObjRef1.append(refObj)
            for val1 in listObjRef1:
                for val2 in listObjRef1:
                    #print("MD1: val1= ", val1, " val2= ", val2)
                    matrixCoOc[val1-1][val2-1]+=1
            #print("matrixCoOc ", matrixCoOc)

            outMd = md.MetaData(fnRoot + '_avg2.xmd')
            listObjRef2 = []
            for id1 in outMd:
                refObj = outMd.getValue(emlib.MDL_REF2, id1)
                listObjRef2.append(refObj)
            for val1 in listObjRef2:
                for val2 in listObjRef2:
                    #print("MD1: val1= ", val1, " val2= ", val2)
                    matrixCoOc[val1 - 1][val2 - 1] += 1
            #print("matrixCoOc ", matrixCoOc)

        #Changing from co-ocurrence matrix to distance matrix (avoiding divide by zero)
        if fails==self.Nrec: #AJ revisar esto
            raise Exception('xmipp_classify_first_split3 has failed')
        for i in range(ref2Max):
            for j in range(ref2Max):
                if matrixCoOc[i][j]==0:
                    matrixCoOc[i][j]+=0.001
                matrixCoOc[i][j] = 1.0/matrixCoOc[i][j]
        np.savetxt(self._getExtraPath('coocurrenceMatrix.txt'), matrixCoOc, fmt='%.4e')

        ##### hierarchical clustering algorithm #########
        from sklearn.cluster import AgglomerativeClustering
        model = AgglomerativeClustering(n_clusters=2, linkage="complete", affinity="euclidean")
        model.fit(matrixCoOc)

        listLabels = model.labels_
        #print(model.labels_)

        ####### build the final volumes with the selected images by clustering #########
        defMd1 = md.MetaData()
        defMd2 = md.MetaData()
        origMd = md.MetaData(self._getDirectionalClassesFn())
        for row in md.iterRows(origMd):
            refIdx = origMd.getValue(emlib.MDL_REF2, row.getObjId())
            if(listLabels[refIdx-1]==0):
                row.addToMd(defMd1)
            else:
                row.addToMd(defMd2)

        defMd1.write(self._getExtraPath("split1.xmd"))
        defMd2.write(self._getExtraPath("split2.xmd"))

        self.runReconstruction(self._getExtraPath("split1.xmd"), self._getExtraPath("split1.vol"))
        self.runReconstruction(self._getExtraPath("split2.xmd"), self._getExtraPath("split2.vol"))

[docs]    def cleaningStep(self):
        cleanPattern(self._getExtraPath("gallery*"))
        cleanPattern(self._getExtraPath("mask.vol"))
        cleanPattern(self._getExtraPath("neighbours.xmd"))
        cleanPattern(self._getExtraPath("split00*"))

[docs]    def createOutputStep(self):

        inputParticles = self.inputParticles.get()
        # if not self._useSeveralClasses():
        #     newTs = inputParticles.getSamplingRate()
        # else:
        #     newTs = readInfoField(self._getExtraPath(), "sampling",
        #                           xmipp.MDL_SAMPLINGRATE)

        self.mdClasses = emlib.MetaData(self._getDirectionalClassesFn())
        self.mdImages = emlib.MetaData(self._getDirectionalImagesFn())

        origTs = inputParticles.getSamplingRate()
        lastTs = readInfoField(self._getExtraPath(), "sampling",
                               emlib.MDL_SAMPLINGRATE)

        if origTs!=lastTs:
            newXdim=inputParticles.getXDim()
            self.runJob("xmipp_image_resize", "-i %s -o %s --save_metadata_stack %s --fourier %d"
                        % (self._getDirectionalClassesFn(),
                           self._getPath("aux_directional_local_classes.stk"),
                           self._getPath("aux_directional_classes.xmd"),
                           newXdim), numberOfMpi=1)
            from shutil import copy
            copy(self._getPath("aux_directional_local_classes.stk"),
                 self._getPath("directional_local_classes.stk"))
            copy(self._getPath("aux_directional_classes.xmd"),
                 self._getPath("directional_classes.xmd"))
            cleanPattern(self._getPath("aux_directional*"))


        classes2D = self._createSetOfClasses2D(inputParticles)
        #classes2D.getImages().setSamplingRate(newTs)
        classes2D.getImages().setSamplingRate(origTs)

        self.averageSet = self._createSetOfAverages()
        self.averageSet.copyInfo(inputParticles)
        self.averageSet.setAlignmentProj()
        #self.averageSet.setSamplingRate(newTs)
        self.averageSet.setSamplingRate(origTs)

        # Let's use a SetMdIterator because it should be less particles
        # in the metadata produced than in the input set
        iterator = md.SetMdIterator(self.mdImages, sortByLabel=md.MDL_ITEM_ID,
                                    updateItemCallback=self._updateParticle,
                                    skipDisabled=True)

        fnHomogeneous = self._getExtraPath("images_homogeneous.xmd")
        if exists(fnHomogeneous):
            if origTs != lastTs:
                newXdim = inputParticles.getXDim()
                self.runJob("xmipp_image_resize", "-i %s --dim %d"
                            % (fnHomogeneous, newXdim), numberOfMpi=1)
            homogeneousSet = self._createSetOfParticles()
            homogeneousSet.copyInfo(inputParticles)
            #homogeneousSet.getImages().setSamplingRate(newTs)
            homogeneousSet.getImages().setSamplingRate(origTs)
            homogeneousSet.setAlignmentProj()
            self.iterMd = md.iterRows(fnHomogeneous, md.MDL_PARTICLE_ID)
            self.lastRow = next(self.iterMd)
            homogeneousSet.copyItems(inputParticles,
                                     updateItemCallback=self._updateHomogeneousItem)
            self._defineOutputs(outputHomogeneous=homogeneousSet)
            self._defineSourceRelation(self.inputParticles, homogeneousSet)

        #AJ testing
        #AJ por que desaparece una clase que tiene imagenes asignadas
        listRefId=[]
        for row in md.iterRows(self.mdClasses, emlib.MDL_REF2):

            refId = row.getValue(emlib.MDL_REF2, row.getObjId())
            if len(listRefId)>0 and refId != listRefId[-1]+1:
                whereEnd = listRefId[-1]+1
                for i in range(refId-whereEnd):
                    rowNew = row
                    rowNew.setValue(emlib.MDL_REF2, listRefId[-1]+i+1)
                    rowNew.setValue(emlib.MDL_IMAGE, 'None')
                    rowNew.setValue(emlib.MDL_IMAGE1, 'None')
                    rowNew.addToMd(self.mdClasses)
                    listRefId.append(listRefId[-1]+i+1)

                listRefId.append(refId)
            else:
                listRefId.append(refId)

        self.mdClasses.write(self._getDirectionalClassesFn())
        self.mdClasses = emlib.MetaData(self._getDirectionalClassesFn())
        #END AJ


        classes2D.classifyItems(updateItemCallback=iterator.updateItem,
                                updateClassCallback=self._updateClass)

        self._defineOutputs(outputClasses=classes2D)
        self._defineOutputs(outputAverages=self.averageSet)
        self._defineSourceRelation(self.inputParticles, classes2D)
        self._defineSourceRelation(self.inputParticles, self.averageSet)

        if self.splitVolume and self.directionalClasses.get() > 1:
            volumesSet = self._createSetOfVolumes()
            #volumesSet.setSamplingRate(newTs)
            volumesSet.setSamplingRate(origTs)
            for i in range(2):
                vol = Volume()
                fnVol = self._getExtraPath("split%d.vol" % (i + 1))
                if origTs != lastTs:
                    newXdim = inputParticles.getXDim()
                    self.runJob("xmipp_image_resize", "-i %s --dim %d" % (fnVol, newXdim), numberOfMpi=1)
                fnMrc = self._getExtraPath("split%d.mrc" % (i + 1))
                self.runJob("xmipp_image_convert", "-i %s -o %s -t vol" % (fnVol, fnMrc), numberOfMpi=1)
                self.runJob("xmipp_image_header", "-i %s --sampling_rate %f" % (fnMrc, origTs), numberOfMpi=1)
                cleanPath(fnVol)
                vol.setLocation(1, fnMrc)
                volumesSet.append(vol)

            self._defineOutputs(outputVolumes=volumesSet)
            self._defineSourceRelation(inputParticles, volumesSet)

    def _updateHomogeneousItem(self, particle, row):
        count = 0
        while self.lastRow and particle.getObjId() == self.lastRow.getValue(
                md.MDL_PARTICLE_ID):
            count += 1
            if count:
                createItemMatrix(particle, self.lastRow, align=ALIGN_PROJ)
            try:
                self.lastRow = next(self.iterMd)
            except StopIteration:
                self.lastRow = None

        particle._appendItem = count > 0


    def _updateParticle(self, item, row):
        item.setClassId(row.getValue(emlib.MDL_REF2))

    def _updateClass(self, item):
        classId = item.getObjId()
        classRow = findRow(self.mdClasses, emlib.MDL_REF2, classId)

        if classRow is not None:
            representative = item.getRepresentative()
            representative.setTransform(rowToAlignment(classRow, ALIGN_PROJ))
            representative.setLocation(
                xmippToLocation(classRow.getValue(emlib.MDL_IMAGE)))
            setXmippAttributes(representative, classRow, emlib.MDL_ANGLE_ROT)
            setXmippAttributes(representative, classRow, emlib.MDL_ANGLE_TILT)
            setXmippAttributes(representative, classRow, emlib.MDL_CLASS_COUNT)

            self.averageSet.append(representative)

            reprojection = Image()
            reprojection.setLocation(
                xmippToLocation(classRow.getValue(emlib.MDL_IMAGE1)))
            item.reprojection = reprojection


    # --------------------------- INFO functions -------------------------------

    def _validate(self):
        validateMsgs = ProtAnalysis3D._validate(self)
        # if there are Volume references, it cannot be empty.
        if self.inputVolume.get() and not self.inputVolume.hasValue():
            validateMsgs.append('Please provide an input reference volume.')
        if self.inputParticles.get() and not self.inputParticles.hasValue():
            validateMsgs.append('Please provide input particles.')
        if self.angularSampling.get()>40:
            validateMsgs.append("The angular sampling must be <= 40")
        if self.useGpu and not isXmippCudaPresent():
            validateMsgs.append("You have asked to use GPU, but I cannot find the Xmipp GPU programs")
        return validateMsgs

    def _summary(self):
        summary = []
        return summary

    # ----------------------- UTILITY FUNCTIONS ---------------------------------

    def _useSeveralClasses(self):
        return self.directionalClasses > 1

    def _getExpParticlesFn(self):
        return self._getPath('input_particles.xmd')

    def _getInputParticlesFn(self):
        if self._useSeveralClasses():
            return self._getTmpPath('scaled_particles.xmd')
        else:
            return self._getExpParticlesFn()

    def _getInputVolFn(self):
        return self._getTmpPath('volume.vol')

    def _getDirectionalClassesFn(self):
        return self._getPath("directional_classes.xmd")

    def _getDirectionalImagesFn(self):
        return self._getPath("directional_images.xmd")