Source code for atomstructutils.protocols.protocol_average_subunits

# **************************************************************************
# *
# * Authors:     Marta Martinez (mmmtnez@cnb.csic.es)
# *              Roberto Marabini (roberto@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
import os
from pwem.convert import Ccp4Header

from pwem.emlib.image import ImageHandler
from pwem.protocols import EMProtocol
from pwem.viewers import Chimera

from pwem.objects import Float, String
from pyworkflow.protocol.params import (IntParam,
                                        MultiPointerParam,
                                        PointerParam, FloatParam)
import numpy as np
from pwem.objects import Volume

from pwem.convert.atom_struct import AtomicStructHandler, fromCIFTommCIF

[docs]class ProtAverageSubunits(EMProtocol):
    """Average densities related with the given atomic structures.
    For example hexons in a unit cell. Alpha carbon are used
    to compute the transformation matrix"""
    _label = 'average_sub_unit'
    _program = ""
    DEBUG = True
    DEBUGX = not DEBUG

    def __init__(self, **kwargs):
        EMProtocol.__init__(self, **kwargs)
        self.listTransformationMatrices = []
        self.listRMS = []
        self.listObjID = []

[docs]    def outputFileName(self):
        return self._getExtraPath("outPut.mrc")

    def _defineParams(self, form):
        form.addSection(label='Input')
        form.addParam('inputVolume', PointerParam, pointerClass="Volume",
                      label='Input Volume', allowsNull=True,
                      important=True,
                      help="Volume to process")
        form.addParam('atomStructReference', PointerParam, pointerClass="AtomStruct",
                      label='Atomic Structure 1:', allowsNull=True,
                      important=True,
                      help="Input the reference atomic structure.")
        form.addParam('otherAtomStructs', MultiPointerParam,
                      pointerClass="AtomStruct", allowsNull=True,
                      label='Atomic structures',
                      help="Other atom Structs")
        form.addParam('rangeStart', IntParam,
                      label="First Residue", allowsNull=True,
                      default=-1,
                      help="First residue to be taken into account"
                           " -1 -> first residue")
        form.addParam('rangeEnd', IntParam,
                      label="Last Residue", allowsNull=True,
                      default=-1,
                      help="Last residue to be taken into account"
                           " -1 -> last residue")
        form.addParam('expand', FloatParam,
                      label="expand BB", allowsNull=True,
                      default=3,
                      help="Expand bounding box by this"
                           " number of A.")

    def _insertAllSteps(self):
        self._insertFunctionStep('createOutput')

[docs]    def computeTransformationMatrices(self, origin, sampling):
        """ compute transformation matrix between reference
        atom structure (self.atomStructReference) and the other
        atom structures (self.otherAtomStructs)
         read atomic structures
         identify common AA with first structure
         compute transformation matrix in PDB system
         function fills lists
            * listTransformationMatrices
            * listRMS (errors)
            * listObjID (atom structure names)
         transform matrix to unitcell system of equations
         shifts in px
        """
        # reference atom struct
        atomStructReferenceFn = \
            self.atomStructReference.get().getFileName()
        self.aStructReferenceHa = AtomicStructHandler(
            atomStructReferenceFn)

        # each one of the atom structs different from the reference
        for aStruct in self.otherAtomStructs:
            atomStructFn = aStruct.get().getFileName()
            matrix, rms = self.aStructReferenceHa.getTransformMatrix(
                atomStructFn, self.rangeStart.get(), self.rangeEnd.get())
            # getTransformMatrix
            # [[ 9.95590478e-01 -3.44402543e-03 -9.37429380e-02 -5.47417e+01]
            #  [-2.80233170e-03  9.97787836e-01 -6.64197386e-02  2.35492e+01]
            #  [ 9.37643145e-02  6.63895581e-02  9.93378417e-01  7.36214e+00]
            #  [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000e+00]]

            # transform matrix in atom struct (p) reference, shifts A
            # to unit cell (p') reference, shifts px
            convertAtomicModel2Unit = \
                self.convertAtomicModel2UnitCellMatrix(sampling, origin)
            # origin [-318.23999023  -53.04000092  212.16000366    1.]

            convertUnitCell2Atomic = \
                self.convertUnitCell2AtomicModelMatrix(sampling, origin)

            # @ -> matrix_ multiplication
            matrixUnitCell = \
                convertAtomicModel2Unit @ matrix @ convertUnitCell2Atomic
            #  matrixUnitCell
            #  [[ 9.95590478e-01 -3.44402543e-03 -9.37429380e-02  5.3709e+01]
            #   [-2.80233170e-03  9.97787836e-01 -6.64197386e-02 -7.6961e+00]
            #   [ 9.37643145e-02  6.63895581e-02  9.93378417e-01  2.0149e+01]
            #   [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.0000e+00]]

            # scipion uses the inverse matrix
            matrixUnitCellInv = np.linalg.inv(matrixUnitCell)
            # matrixUnitCellInv
            # [[9.95590478e-01 -2.80233170e-03  9.37643145e-02 -5.53830e+01]
            # [-3.44402543e-03  9.97787836e-01  6.63895581e-02  6.52637e+00]
            # [-9.37429380e-02 -6.64197386e-02  9.93378417e-01 -1.54925e+01]
            # [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000e+00]]

            self.listTransformationMatrices.append(matrixUnitCellInv)
            self.listRMS.append(Float(rms))  # root mean square error
            self.listObjID.append(String(aStruct.getNameId())) # filenames

[docs]    def convertUnitCell2AtomicModelMatrix(self, sampling, origin):
        """ Compute matrix that transform points in unit cell (px)
            to atom struct (A) system"""
        return np.linalg.inv(
            self.convertAtomicModel2UnitCellMatrix(sampling, origin))

[docs]    def convertAtomicModel2UnitCellMatrix(self, sampling, origin):
        """ Compute matrix that transform points in atomic structs (A)
            to unit cell (px) system. p' = (p - origin ) /sampling"""
        shiftScale = np.eye(4, dtype=float)
        shiftScale[:, 3] = - origin
        shiftScale = shiftScale / sampling
        shiftScale[3, 3] = 1.
        return shiftScale

[docs]    def computeAveragedSubUnit(self):
        """Average the pixel values of the pixels related by symmetry"""
        # 1) get sampling and origin
        volume = self.inputVolume.get()
        origin = np.ones(4)
        origin[:3] = volume.getShiftsFromOrigin()
        sampling = volume.getSamplingRate()

        # 2) get transformation matrices that relate otherAtomStructs
        # with reference atomStructReference, matrix unit cell (p')
        # system of coordinates
        self.computeTransformationMatrices(origin, sampling)

        # 3) get matrix with data (voxel values)
        # careful here, index (0,0,) is top left corner
        # relate to PDB coordinates (p) as:
        # are p' = (p - origin)/sampling
        ih = ImageHandler()
        img = ih.createImage()
        img.read(volume.getLocation())
        matrix = img.getData()  # get matrix with voxels
        # matrix.shape (220, 274, 455) (z,y,x)
        # scipion reports  455 274 220 (x,y,z)

        # 4) compute atomic struct bounding box in reference
        # atom structure in A (p)
        boundingBoxA = np.ones((2,4))
        boundingBoxA[:,:3] = self.aStructReferenceHa.\
            getBoundingBox(expand=self.expand.get())
        self.boundingBoxA = boundingBoxA  # self.bounding needed in viewer

        # compute bounding box in unit cell reference and pixels (p')
        boundingBoxPxOrigin = np.ones((2,4))
        convert = self.convertAtomicModel2UnitCellMatrix(sampling, origin)
        for i in range(2):
            boundingBoxPxOrigin[i,:] = convert @ boundingBoxA[i,:]
            # convert bounding box to closest integers
            boundingBoxPxOrigin = boundingBoxPxOrigin.astype(int)
        # bounding box A [[-176.99899292  -48.58000183  322.22198486    1.]
        #  [ -84.15100098   47.80899811  436.43399048    1.        ]]
        # boundingBoxPxOrigin [[103   3  80   1]
        #  [172  74 164   1]]

        # 5) loop through points and interpolated
        # grid points to average
        l = boundingBoxPxOrigin  # bounding box in pixels (int) around origin
        # values for tests (px - float)
        # 00032:   l [[103   3  80   1]
        # 00033:    [172  74 164   1]]

        # Coords in p' (unit cell system of reference)
        # this x_, y_, z_ are auxiliary ranges that will be
        # used to calculate a meshgrid. The purpose of meshgrid is to
        # create a rectangular grid out of an array of x_, y_ and z_
        # values.
        #
        # So, for example, if we want to create a grid where we have
        # a point at each integer value between 0 and 4 in x, y and z
        # directions. To create a rectangular grid, we need every
        # combination of the x, y and z points.
        x_ = np.arange(l[0,0], l[1,0]+1, 1); xdim = np.shape(x_)[0]
        y_ = np.arange(l[0,1], l[1,1]+1, 1); ydim = np.shape(y_)[0]
        z_ = np.arange(l[0,2], l[1,2]+1, 1); zdim = np.shape(z_)[0]
        # x_ x_.shape [103 104 105 106] [169 170 171 172] (70,)
        # y_ y_.shape [3 4 5 6] [71 72 73 74] (72,)
        # z_ z_.shape [80 81 82 83] [161 162 163 164] (85,)
        # mesh grid with all the points that need to be evaluated

        coords = np.meshgrid(x_, y_, z_, indexing='ij')
        # coords coords.shape [array([[[103, 103, 103, ..., 103, 103, 103],
        #         [103, 103, 103, ..., 103, 103, 103],
        #         [103, 103, 103, ..., 103, 103, 103],
        #         ...,
        #         ...,
        #         [ 80,  81,  82, ..., 162, 163, 164],
        #         [ 80,  81,  82, ..., 162, 163, 164],
        #         [ 80,  81,  82, ..., 162, 163, 164]]])]
        #  (3, 70, 72, 85)

        # stack the meshgrid to position vectors
        # meshgrid are several lists with x, y and z coordinates. We want
        # this values as vectors so we can multiply them by the transformation
        # matrix
        # Since I use generalized coordinates, vectors should have
        # 4 dimensions and the
        # last dimension should be always 1 (integer 1!!).
        xyz=np.vstack([coords[0].reshape(-1),
                       coords[1].reshape(-1),
                       coords[2].reshape(-1),
                       np.ones((xdim, ydim, zdim), dtype=int).reshape(-1)])
        #xyz xyz.shape [[103. 103. 103. ... 172. 172. 172.]
        # [  3.   3.   3. ...  74.  74.  74.]
        # [ 80.  81.  82. ... 162. 163. 164.]
        # [  1.   1.   1. ...   1.   1.   1.]] (4, 428400=  70 *  72 * 85)
        # let us create a volume with the same size than the input volume
        # to store the output
        outmatrix = np.zeros_like(matrix)

        # I can not use xyz as indexes for numpy since xys is in
        # generalized coordiantes
        x = xyz[0, :]
        y = xyz[1, :]
        z = xyz[2, :]
        # copy bounding box region to output volume
        # note that
        outmatrix[z,y,x] = matrix[z,y,x]

        # now copy the region related by the transformation matrices
        # interpolating the points if needed
        for counter, transformationMatrix in enumerate(self.listTransformationMatrices):
            transformed_xyz = transformationMatrix @ xyz
            # transformationMatrix
            # [[ 9.95590478e-01 -2.80233170e-03  9.37643145e-02  3.96147e+01]
            #  [-3.44402543e-03  9.97787836e-01  6.63895581e-02 -1.77753e+01]
            #  [-9.37429380e-02 -6.64197386e-02  9.93378417e-01 -8.00067e+00]
            #  [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  7.35294e-01]]
            # transformed_xyz transformed_xyz.shape
            # [[165.42162908 165.51539339 165.60915771 ... 241.60708031 241.70084462
            # 00183:     241.79460894]
            # 00184:    [  1.42342186   1.48981142   1.55620098 ...  77.47266421  77.53905377
            # 00185:      77.60544333]
            # 00186:    [ 85.10806994  86.10144836  87.09482677 ... 155.38103599 156.37441441
            # 00187:     157.36779283]
            # 00188:    [  1.           1.           1.         ...   1.           1.
            # 00189:       1.        ]] (4, 428400)

            # matrix(xp, yp and zp) should be averaged with matrix(x,y,z)
            xp = transformed_xyz[0, :]
            yp = transformed_xyz[1, :]
            zp = transformed_xyz[2, :]
            outmatrix[z,y,x] += self.trilinearInterpolation(xp, yp, zp, matrix)

        # divide sum by the number averaged regions
        factor = len(self.listTransformationMatrices) + 1.
        outmatrix /= factor
        img.setData(outmatrix)
        kkFn = self.outputFileName()
        img.write(kkFn)
        origin = volume.getOrigin(force=True).getShifts()
        Ccp4Header.fixFile(kkFn, kkFn, origin, sampling,
                           Ccp4Header.START)

[docs]    def createOutput(self):
        self.computeAveragedSubUnit()
        # save results
        inVol = self.inputVolume.get()
        outVol = Volume()
        origin = inVol.getOrigin(force=True)
        sampling = inVol.getSamplingRate()
        outVol.setSamplingRate(sampling)
        outVol.setOrigin(origin)
        outVol.setFileName(self.outputFileName())
        self._defineOutputs(outVol=outVol)
        with open("chimera_output.cxc", 'w') as f:
            dim = 150.
            bildFileName = os.path.abspath(self._getExtraPath(
                "axis_output.bild"))
            Chimera.createCoordinateAxisFile(dim,
                                             bildFileName=bildFileName,
                                             sampling=sampling)
            fnCmd = self._getExtraPath("chimera_output.cxc")
            f = open(fnCmd, 'w')
            # axis
            f.write("open %s\n" % bildFileName)
            f.write("cofr 0,0,0\n")  # set center of coordinates
            # 3D map
            f.write("open %s\n" % os.path.abspath(inVol.getFileName()))
            x_input, y_input, z_input = inVol.getShiftsFromOrigin()
            f.write("volume #2  voxelSize %f origin %0.2f,%0.2f,%0.2f\n" %
                    (sampling, x_input, y_input, z_input))
            f.write("open %s\n" % os.path.abspath(
                self.outputFileName()))
            x_input, y_input, z_input = outVol.getShiftsFromOrigin()
            f.write("volume #3  voxelSize %f origin %0.2f,%0.2f,%0.2f\n" %
                    (sampling, x_input, y_input, z_input))

            # reference pdb
            f.write("open %s\n" % os.path.abspath(
                self.atomStructReference.get().getFileName()))
            # other pdb
            for aStruct in self.otherAtomStructs:
                f.write("open %s\n" % os.path.abspath(
                    aStruct.get().getFileName()))

            bildFileFn = os.path.abspath(
                self._getExtraPath("chimera_output.bild"))
            with open(bildFileFn, 'w') as ff:
                ff.write(".transparency 0.8\n")
                # bounding box reference
                l = self.boundingBoxA
                ff.write(".color red\n")
                ff.write(".box %f %f %f %f %f %f\n" %
                         (l[0][0], l[0][1], l[0][2],
                          l[1][0], l[1][1], l[1][2]))
                ff.write(".transparency 0.\n")
            f.write("open %s\n" % bildFileFn)
            f.write("view\n")
            f.close()

    def _validate(self):
        errors = []
        # check number of chains in each entry
        atomStructReferenceFn = self.atomStructReference.get().getFileName()
        aStructReferenceHa = AtomicStructHandler(atomStructReferenceFn)
        listOfChains, listOfResidues = \
            aStructReferenceHa.getModelsChains()

        for aStruct in self.otherAtomStructs:
            atomStructFn = aStruct.get().getFileName()
            aStructReferenceHa = AtomicStructHandler(atomStructFn)
            listOfChains2, listOfResidues2 = \
                aStructReferenceHa.getModelsChains()
            if len(listOfChains[0]) != len(listOfChains2[0]):
                errors.append("Number of chains in reference atomic struct"
                              " and atomic struct called %s is differente"
                              " %d != %d" % (atomStructFn, len(listOfChains[0]),
                                             len(listOfChains2[0])))
        return errors

[docs]    def trilinearInterpolation(self, xf, yf, zf, input_array):
        """ Interpolates the value of point "indices" in matrix
            input_array.
            return the interpolated value
        """
        #output = np.empty(indices[0].shape)
        #x_indices = indices[0]
        #y_indices = indices[1]
        #z_indices = indices[2]

        x0 = xf.astype(np.integer)
        y0 = yf.astype(np.integer)
        z0 = zf.astype(np.integer)
        x1 = x0 + 1
        y1 = y0 + 1
        z1 = z0 + 1

        # Check if xyz1 is beyond array boundary:
        x1[np.where(x1 == input_array.shape[0])] = x0.max()
        y1[np.where(y1 == input_array.shape[1])] = y0.max()
        z1[np.where(z1 == input_array.shape[2])] = z0.max()

        x = xf - x0
        y = yf - y0
        z = zf - z0
        output = (input_array[z0, y0, x0] * (1 - x) * (1 - y) * (1 - z) +
                  input_array[z1, y0, x0] * x * (1 - y) * (1 - z) +
                  input_array[z0, y1, x0] * (1 - x) * y * (1 - z) +
                  input_array[z0, y0, x1] * (1 - x) * (1 - y) * z +
                  input_array[z1, y0, x1] * x * (1 - y) * z +
                  input_array[z0, y1, x1] * (1 - x) * y * z +
                  input_array[z1, y1, x0] * x * y * (1 - z) +
                  input_array[z1, y1, x1] * x * y * z)

        return output

    def _summary(self):
        summary = []
        for rms, objId in zip(self.listRMS, self.listObjID):
            summary.append("Obj: %d has rms %d", objId, rms)
        return summary

    def _citations(self):
        return []

    def _methods(self):
        return []