Source code for continuousflex.protocols.protocol_heteroflow

# **************************************************************************
# * Authors:    Mohamad Harastani            (mohamad.harastani@upmc.fr)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

from pwem.protocols import ProtAnalysis3D
import xmipp3.convert
import pwem.emlib.metadata as md
import pyworkflow.protocol.params as params
from pyworkflow.utils.path import makePath, createLink
from continuousflex.protocols.utilities.spider_files3 import save_volume
import sys
from pyworkflow.utils import getListFromRangeString
from os.path import isfile
from joblib import Parallel, delayed
import continuousflex
from subprocess import check_call
from pwem.utils import runProgram
from pwem.emlib.image import ImageHandler
import numpy as np

REFERENCE_EXT = 0
REFERENCE_STA = 1

IMPORT_FLOWS = 0
FIND_FLOWS = 1

[docs]class FlexProtHeteroFlow(ProtAnalysis3D): """ Protocol for HeteroFlow. """ _label = 'tomoflow protocol' # --------------------------- DEFINE param functions -------------------------------------------- def _defineParams(self, form): form.addSection(label='Settings') group = form.addGroup('Choose what processes you want to perform:') group.addParam('copy_opflows', params.EnumParam, choices=['Import optical flows from the last refinement iteration and analyze the heterogeneity', 'Find optical flows for a set of ALIGNED volumes and analyze the heterogeneity'], default=IMPORT_FLOWS, label='Optical flows to analyze', display=params.EnumParam.DISPLAY_COMBO, help='You can choose to find the optical flows for a set of volumes or to import' ' precalculated optical flows from a refinement protocol previous run in the project' ' workspace') group = form.addGroup('Protocol of: Missing wedge correction and combined rigid-body and elastic alignment', condition='copy_opflows==%d'% IMPORT_FLOWS) group.addParam('refinementProt', params.PointerParam, pointerClass='FlexProtRefineSubtomoAlign', label='Point to the refinement protocol', allowsNull=True) group = form.addGroup('Input', condition='copy_opflows==%d' % FIND_FLOWS) group.addParam('inputVolumes', params.PointerParam, pointerClass='SetOfVolumes', allowsNull=True, label="Input volume(s)", important=True, help='Select volumes') group.addParam('StartingReference', params.EnumParam, choices=['From an external volume file', 'Select a volume'], default=REFERENCE_STA, label='Reference volume', display=params.EnumParam.DISPLAY_COMBO, help='Either an external volume file or a subtomogram average') group.addParam('ReferenceVolume', params.FileParam, pointerClass='params.FileParam', allowsNull=True, condition='StartingReference==%d' % REFERENCE_EXT, label="Reference volume", help='Choose a reference, typically from a STA previous run') group.addParam('STAVolume', params.PointerParam, pointerClass='Volume', allowsNull=True, condition='StartingReference==%d' % REFERENCE_STA, label="Selected volume", help='Choose a reference, typically from a STA previous run') group.addParam('WarpAndEstimate', params.BooleanParam, expertLevel=params.LEVEL_ADVANCED, default=True, label='Save a warped version of the reference for each input volume?', help='This will create a set of volumes, representing the fitted version of the input volumes, ' 'using the calculated optical flows, and calculate the cross correlation, mean square ' 'distance and the mean absolute distance between the input volumes and estimated volumes') form.addSection(label='3D OpticalFLow parameters') group = form.addGroup('Optical flows', condition='copy_opflows==%d' % FIND_FLOWS) group.addParam('N_GPU', params.IntParam, default=1, important=True, allowsNull=True, label = 'Parallel processes on GPU', help='This parameter indicates the number of volumes that will be processed in parallel' ' (independently). The more powerful your GPU, the higher the number you can choose.') group.addParam('GPU_list', params.NumericRangeParam, label="GPU id(s)", help='Select the GPU id(s) that will be used for optical flow calculation.' 'Examples: ' 'You can select a list like 0-4, and it will take the GPUs 0 1 2 3 4' 'You can also combine different selections like 1, 3-5 and it will take 1, 3, 4, 5', default='0') group.addParam('pyr_scale', params.FloatParam, default=0.5, label='pyr_scale', allowsNull=True, help='parameter specifying the image scale to build pyramids for each image (pyr_scale < 1). ' 'A classic pyramid is of generally 0.5 scale, every new layer added, it is halved to the previous one.') group.addParam('levels', params.IntParam, default=4, allowsNull=True, label='levels', help='levels=1 says, there are no extra layers (only the initial image).' ' It is the number of pyramid layers including the first image.' ' The coarsest possible pyramid level is 32x32x32 voxels') group.addParam('winsize', params.IntParam, default=10, allowsNull=True, label='winsize', help='It is the averaging window size, larger the size, the more robust the algorithm is to noise, ' 'and provide fast motion detection, though gives blurred motion fields.') group.addParam('iterations', params.IntParam, default=10, allowsNull=True, label='iterations', help='Number of iterations to be performed at each pyramid level. It refines the optical flow' ' accuracy at each scale and allows accounting for larger displacements.') group.addParam('poly_n', params.IntParam, default=5, allowsNull=True, label='poly_n', help='Size of the pixel neighborhood used to find polynomial expansion in each pixel;' ' larger values mean that the image will be approximated with smoother surfaces,' ' yielding more robust algorithm and more blurred motion field, typically poly_n = 5 or 7.') group.addParam('poly_sigma', params.FloatParam, default=1.2, label='poly_sigma', help='Standard deviation of the Gaussian that is used to smooth derivatives used as ' 'a basis for the polynomial expansion; for poly_n = 5, you can set poly_sigma = 1.2,' ' for poly_n = 7, a good value would be poly_sigma = 1.5.') group.addHidden('flags', params.IntParam, default=0, expertLevel=params.LEVEL_ADVANCED, label='flags', help='flag to pass for the optical flow') group.addHidden('factor1', params.IntParam, default=100, expertLevel=params.LEVEL_ADVANCED, label='gray scale factor1', help='this factor will be multiplied by the gray levels of each subtomogram') group.addHidden('factor2', params.IntParam, default=100, expertLevel=params.LEVEL_ADVANCED, label='gray scale factor2', help='this factor will be multiplied by the gray levels of the reference') # --------------------------- INSERT steps functions -------------------------------------------- def _insertAllSteps(self): # Define some outputs filenames self.imgsFn = self._getExtraPath('volumes.xmd') if(self.copy_opflows.get()==FIND_FLOWS): makePath(self._getExtraPath() + '/optical_flows') self._insertFunctionStep('convertInputStep') self._insertFunctionStep('doAlignmentStep') else: self._insertFunctionStep('copyOpticalFlows') self._insertFunctionStep('findCorrelationMatrix') if (self.WarpAndEstimate.get()): self._insertFunctionStep('warpByFlow') self._insertFunctionStep('createOutputStep') # --------------------------- STEPS functions --------------------------------------------
[docs] def convertInputStep(self): # Write a metadata with the volumes xmipp3.convert.writeSetOfVolumes(self.inputVolumes.get(), self.imgsFn)
[docs] def doAlignmentStep(self): imgFn = self.imgsFn StartingReference = self.StartingReference.get() ReferenceVolume = self.ReferenceVolume.get() if StartingReference == REFERENCE_STA: STAVolume = self.STAVolume.get().getFileName() else: STAVolume = ReferenceVolume # just in case the reference is in MRC format: path_vol0 = self._getExtraPath('reference.spi') params = '-i ' + STAVolume + ' -o ' + path_vol0 + ' --type vol' self.runJob('xmipp_image_convert', params) pyr_scale = self.pyr_scale.get() levels = self.levels.get() iterations = self.iterations.get() winsize = self.winsize.get() poly_n = self.poly_n.get() poly_sigma = self.poly_sigma.get() # TODO: the factor1 and 2 can be any value as long as we are using the subtomogram average (gray level values # are similar. It is not sure if we use an external reference what this should be! This could be normalized in # future flags = self.flags.get() factor1 = self.factor1.get() factor2 = self.factor2.get() mdImgs = md.MetaData(imgFn) of_root = self._getExtraPath() + '/optical_flows/' # This is a spherical mask with maximum radius mask_size = int(self.getVolumeDimesion()//2) # Parallel processing (finding multiple optical flows at the same time) GPUids = np.array(getListFromRangeString(self.GPU_list.get())) gpu_ps = np.tile(GPUids, mdImgs.size()) global segment def segment(objId): gpu_p = gpu_ps[objId-1] imgPath = mdImgs.getValue(md.MDL_IMAGE, objId) # getting a copy converted to spider format to solve the problem with stacks or mrc files tmp = self._getTmpPath('tmp_' + str(objId) + '.spi') runProgram('xmipp_image_convert', '-i ' + imgPath + ' -o ' + tmp + ' --type vol') print('processing optical flow for volume ', objId) path_flowx = of_root + str(objId).zfill(6) + '_opflowx.spi' path_flowy = of_root + str(objId).zfill(6) + '_opflowy.spi' path_flowz = of_root + str(objId).zfill(6) + '_opflowz.spi' path_vol_i = tmp if (isfile(path_flowx)): return else: args = " %s %s %f %d %d %d %d %f %d %d %s %s %s %d" % (path_vol0, path_vol_i, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, factor1, factor2, path_flowx, path_flowy, path_flowz, gpu_p) script_path = continuousflex.__path__[0] + '/protocols/utilities/optflow_run.py' command = "python " + script_path + args check_call(command, shell=True, stdout=sys.stdout, stderr=sys.stderr, env=None, cwd=None) arg_x = "-i %s --mask circular -%d --substitute 0 -o %s" % (path_flowx, mask_size, path_flowx) arg_y = "-i %s --mask circular -%d --substitute 0 -o %s" % (path_flowy, mask_size, path_flowy) arg_z = "-i %s --mask circular -%d --substitute 0 -o %s" % (path_flowz, mask_size, path_flowz) runProgram('xmipp_transform_mask', arg_x) runProgram('xmipp_transform_mask', arg_y) runProgram('xmipp_transform_mask', arg_z) # Running the multiple processing: ps = [objId for objId in mdImgs] Parallel(n_jobs=self.N_GPU.get(), backend="multiprocessing")(delayed(segment)(p) for p in ps)
[docs] def findCorrelationMatrix(self): imgFn = self.imgsFn mdImgs = md.MetaData(imgFn) N = 0 for objId in mdImgs: N += 1 metric_mat = np.zeros([N, N]) for i in range(1, N + 1): print('finding the correlation matrix row ', i) flowi = self.read_optical_flow_by_number(i) for j in range(i, N + 1): print(' column', j) flowj = self.read_optical_flow_by_number(j) metric_mat[i - 1, j - 1] = metric_mat[j - 1, i - 1] = self.metric_opflow_vols(flowi, flowj) correlation_matrix = self._getExtraPath('data.csv') np.savetxt(correlation_matrix, metric_mat, delimiter=',')
[docs] def copyOpticalFlows(self): # In this case we get from the refinment protocol the optical flows and the reference N = self.refinementProt.get().NumOfIters.get() self.imgsFn = self.refinementProt.get()._getExtraPath('volumes_aligned_'+str(N+1)+'.xmd') createLink(self.refinementProt.get()._getExtraPath() + '/optical_flows_' + str(N), self._getExtraPath() + '/optical_flows') createLink(self.refinementProt.get()._getExtraPath('reference'+str(N+1)+'.spi'), self._getExtraPath('reference.spi'))
[docs] def warpByFlow(self): import farneback3d makePath(self._getExtraPath() + '/estimated_volumes') estVol_root = self._getExtraPath() + '/estimated_volumes/' reference_fn = self._getExtraPath('reference.spi') # reference = open_volume(reference_fn) reference = ImageHandler().read(reference_fn).getData() stat_mat_fn = self._getExtraPath('cc_msd_mad.txt') # recount the number of volumes: imgFn = self.imgsFn mdImgs = md.MetaData(imgFn) N = 0 for objId in mdImgs: N += 1 for i in range(1, N + 1): print('Warping a copy of the reference volume by the optical flow ', i) flow_i = self.read_optical_flow_by_number(i) warped_i = farneback3d.warp_by_flow(reference, np.float32(flow_i)) warped_path_i = estVol_root + str(i).zfill(6) + '.spi' save_volume(warped_i, warped_path_i) # Find a matrix of metrics (normalized cross correlation, mean square distance, mean absolute distance) stat_mat = np.zeros([N, 3]) mdImgs = md.MetaData(imgFn) i = 0 for objId in mdImgs: print('Finding the cross corrlation, mean square distance and mean absolute distance for volume ', i + 1) imgPath = mdImgs.getValue(md.MDL_IMAGE, objId) # getting a copy converted to spider format to solve the problem with stacks or mrc files tmp = self._getTmpPath('tmp.spi') self.runJob('xmipp_image_convert', '-i ' + imgPath + ' -o ' + tmp + ' --type vol') # vol_i = open_volume(tmp) vol_i = ImageHandler().read(tmp).getData() warped_path_i = estVol_root + str(i + 1).zfill(6) + '.spi' warped_i = ImageHandler().read(warped_path_i).getData() # TODO: replace all the open_volume and save_volume by the ImageHandler() read and write # warped_i = open_volume(warped_path_i) stat_mat[i, 0] = self.ncc(warped_i,vol_i) stat_mat[i, 1] = self.vmsq(warped_i,vol_i) stat_mat[i, 2] = self.vmab(warped_i,vol_i) i += 1 np.savetxt(stat_mat_fn, stat_mat) pass
[docs] def createOutputStep(self): if (self.WarpAndEstimate.get()): # first making a metadata for the wrapped volumes: out_mdfn = self._getExtraPath('volumes_out.xmd') pattern = '"' + self._getExtraPath() + '/estimated_volumes/*.spi"' command = '-p ' + pattern + ' -o ' + out_mdfn self.runJob('xmipp_metadata_selfile_create',command) # now creating the output set of volumes: partSet = self._createSetOfVolumes('Warped') xmipp3.convert.readSetOfVolumes(out_mdfn, partSet) if (self.copy_opflows.get() == FIND_FLOWS): partSet.setSamplingRate(self.inputVolumes.get().getSamplingRate()) else: partSet.setSamplingRate(self.refinementProt.get().RefinedAverage.getSamplingRate()) self._defineOutputs(WarpedRefByFlows=partSet) pass pass
# --------------------------- INFO functions -------------------------------------------- def _summary(self): summary = [] return summary def _citations(self): return [] def _methods(self): pass # --------------------------- UTILS functions --------------------------------------------
[docs] def read_optical_flow(self, path_flowx, path_flowy, path_flowz): x = ImageHandler().read(path_flowx).getData() y = ImageHandler().read(path_flowy).getData() z = ImageHandler().read(path_flowz).getData() l = np.shape(x) # print(l) flow = np.zeros([3, l[0], l[1], l[2]]) flow[0, :, :, :] = x flow[1, :, :, :] = y flow[2, :, :, :] = z return flow
[docs] def read_optical_flow_by_number(self, num): op_path = self._getExtraPath() + '/optical_flows/' path_flowx = op_path + str(num).zfill(6) + '_opflowx.spi' path_flowy = op_path + str(num).zfill(6) + '_opflowy.spi' path_flowz = op_path + str(num).zfill(6) + '_opflowz.spi' flow = self.read_optical_flow(path_flowx, path_flowy, path_flowz) return flow
[docs] def metric_opflow_vols(self, flow1, flow2): # print(np.shape(flow1)) reshaped_flow1 = np.reshape(flow1, [3, np.shape(flow1)[1] * np.shape(flow1)[2] * np.shape(flow1)[3]]) reshaped_flow2 = np.reshape(flow2, [3, np.shape(flow2)[1] * np.shape(flow2)[2] * np.shape(flow2)[3]]) metric = np.sum(np.multiply(reshaped_flow1, reshaped_flow2)) return metric
def _printWarnings(self, *lines): """ Print some warning lines to 'warnings.xmd', the function should be called inside the working dir.""" fWarn = open("warnings.xmd", 'w') for l in lines: print >> fWarn, l fWarn.close()
[docs] def normalize(self, v): """Normalize the data. @param v: input volume. @return: Normalized volume. """ m = np.mean(v) v = v - m s = np.std(v) v = v / s return v
[docs] def ncc(self, v1, v2): """Compute the Normalized Cross Correlation between the two volumes. @param v1: volume 1. @param v2: volume 2. @return: NCC. """ vv1 = self.normalize(v1) vv2 = self.normalize(v2) score = np.sum(vv1 * vv2) / vv1.size return score
[docs] def vmsq(self, v1, v2): """Compute the normalized mean square distance between the two volumes :param v1: volume1 :param v2: volume2 :return: score for mean square diff """ from sklearn.metrics import mean_squared_error score = mean_squared_error(np.ndarray.flatten(v1), np.ndarray.flatten(v2)) / mean_squared_error( np.ndarray.flatten(v1), np.zeros([np.size(np.ndarray.flatten(v1))])) return score
[docs] def vmab(self, v1, v2): """Compute the normalized mean absolute distance between the two volumes :param v1: volume1 :param v2: volume2 :return: score for mean square diff """ from sklearn.metrics import mean_absolute_error score = mean_absolute_error(np.ndarray.flatten(v1), np.ndarray.flatten(v2)) / mean_absolute_error( np.ndarray.flatten(v1), np.zeros([np.size(np.ndarray.flatten(v1))])) return score
[docs] def getVolumeDimesion(self): return self.inputVolumes.get().getDimensions()[0]