Source code for relion.convert.convert31

# **************************************************************************
# *
# * Authors:     J.M. de la Rosa Trevin (delarosatrevin@scilifelab.se) [1]
# *              Grigory Sharov (gsharov@mrc-lmb.cam.ac.uk) [2]
# *
# * [1] SciLifeLab, Stockholm University
# * [2] MRC Laboratory of Molecular Biology, MRC-LMB
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
"""
New conversion functions dealing with Relion3.1 new star files format.
"""
import os
import io
import numpy as np
from collections import OrderedDict
from emtable import Table


from pwem.constants import ALIGN_NONE, ALIGN_PROJ, ALIGN_2D, ALIGN_3D
from pwem.objects import (Micrograph, SetOfMicrographsBase, SetOfMovies,
                          Particle, CTFModel, Acquisition, Transform, Coordinate)
import pwem.convert.transformations as tfs

from .convert_base import WriterBase, ReaderBase
from .convert_utils import (convertBinaryFiles, locationToRelion,
                            relionToLocation)
from relion.constants import PARTICLE_EXTRA_LABELS


[docs]def getPixelSizeLabel(imageSet):
    """ Return the proper label for pixel size. """
    if (isinstance(imageSet, SetOfMicrographsBase)
            or isinstance(imageSet, Micrograph)):
        return 'rlnMicrographPixelSize'
    else:
        return 'rlnImagePixelSize'


[docs]class OpticsGroups:
    """ Store information about optics groups in an indexable way.
    Existing groups can be accessed by number of name.
    """

    def __init__(self, opticsTable):
        self.__fromTable(opticsTable)

    def __fromTable(self, opticsTable):
        self._dict = OrderedDict()
        # Also allow indexing by name
        self._dictName = OrderedDict()
        # Map optics rows both by name and by number
        for og in opticsTable:
            self.__store(og)

    def __store(self, og):
        self._dict[og.rlnOpticsGroup] = og
        self._dictName[og.rlnOpticsGroupName] = og

    def __getitem__(self, item):
        if isinstance(item, int):
            return self._dict[item]
        elif isinstance(item, str):
            return self._dictName[item]
        raise TypeError("Unsupported type '%s' of item '%s'"
                        % (type(item), item))

    def __contains__(self, item):
        return item in self._dict or item in self._dictName

    def __iter__(self):
        """ Iterate over all optics groups. """
        return iter(self._dict.values())

    def __len__(self):
        return len(self._dict)

    def __str__(self):
        return self.toString()

[docs]    def first(self):
        """ Return first optics group. """
        return next(iter(self._dict.values()))

[docs]    def update(self, ogId, **kwargs):
        og = self.__getitem__(ogId)
        newOg = og._replace(**kwargs)
        self.__store(newOg)
        return newOg

[docs]    def updateAll(self, **kwargs):
        """ Update all Optics Groups with these values. """
        missing = {k: v for k, v in kwargs.items() if not self.hasColumn(k)}
        existing = {k: v for k, v in kwargs.items() if self.hasColumn(k)}

        self.addColumns(**missing)

        for og in self:
            self.update(og.rlnOpticsGroup, **existing)

[docs]    def add(self, newOg):
        self.__store(newOg)

[docs]    def hasColumn(self, colName):
        return hasattr(self.first(), colName)

[docs]    def addColumns(self, **kwargs):
        """ Add new columns with default values (type inferred from it). """
        items = self.first()._asdict().items()
        cols = [Table.Column(k, type(v)) for k, v in items]

        for k, v in kwargs.items():
            cols.append(Table.Column(k, type(v)))

        t = Table(columns=cols)

        for og in self._dict.values():
            values = og._asdict()
            values.update(kwargs)
            t.addRow(**values)

        self.__fromTable(t)

[docs]    @staticmethod
    def fromStar(starFilePath):
        """ Create an OpticsGroups from a given STAR file.
        """
        return OpticsGroups(Table(fileName=starFilePath, tableName='optics'))

[docs]    @staticmethod
    def fromString(stringValue):
        """ Create an OpticsGroups from string content (STAR format)
        """
        f = io.StringIO(stringValue)
        t = Table()
        t.readStar(f, tableName='optics')
        return OpticsGroups(t)

[docs]    @staticmethod
    def fromImages(imageSet):
        acq = imageSet.getAcquisition()
        params = {'rlnImageSize': imageSet.getXDim(),
                  getPixelSizeLabel(imageSet): imageSet.getSamplingRate()}
        if isinstance(imageSet, SetOfMovies):
            params['rlnMicrographOriginalPixelSize'] = imageSet.getSamplingRate()
        try:
            og = OpticsGroups.fromString(acq.opticsGroupInfo.get())
            # always update sampling and image size from the set
            og.updateAll(**params)
            return og
        except:
            params.update({
                'rlnVoltage': acq.getVoltage(),
                'rlnSphericalAberration': acq.getSphericalAberration(),
                'rlnAmplitudeContrast': acq.getAmplitudeContrast(),
            })
            return OpticsGroups.create(**params)

[docs]    @staticmethod
    def create(**kwargs):
        opticsString1 = """

# version 30001

data_optics

loop_ 
_rlnOpticsGroupName #1
_rlnOpticsGroup #2
_rlnMicrographOriginalPixelSize #3
_rlnVoltage #4
_rlnSphericalAberration #5
_rlnAmplitudeContrast #6
_rlnImageSize #7
_rlnImageDimensionality #8
opticsGroup1            1      1.000000   300.000000     2.700000     0.100000     256            2
        """

        og = OpticsGroups.fromString(opticsString1)
        fog = og.first()
        newColumns = {k: v for k, v in kwargs.items() if not hasattr(fog, k)}
        og.addColumns(**newColumns)
        og.update(1, **kwargs)
        return og

    def _write(self, f):
        # Create columns from the first row
        items = self.first()._asdict().items()
        cols = [Table.Column(k, type(v)) for k, v in items]
        t = Table(columns=cols)
        for og in self._dict.values():
            t.addRow(*og)
        t.writeStar(f, tableName='optics')

[docs]    def toString(self):
        """ Return a string (STAR format) with the current optics groups.
        """
        f = io.StringIO()
        self._write(f)
        result = f.getvalue()
        f.close()

        return result

[docs]    def toStar(self, starFile):
        """ Write current optics groups to a given file.
        """
        self._write(starFile)

[docs]    def toImages(self, imageSet):
        """ Store the optics groups information in the image acquisition.
        """
        imageSet.getAcquisition().opticsGroupInfo.set(self.toString())


[docs]class Writer(WriterBase):
    """ Helper class to convert from Scipion SetOfImages subclasses
    into Relion>3.1 star files (and binaries if conversion needed).
    """

[docs]    def writeSetOfMovies(self, moviesIterable, starFile, **kwargs):
        self._writeSetOfMoviesOrMics(moviesIterable, starFile,
                                     'movies', 'rlnMicrographMovieName',
                                     **kwargs)

[docs]    def writeSetOfMicrographs(self, micsIterable, starFile, **kwargs):
        self._writeSetOfMoviesOrMics(micsIterable, starFile,
                                     'micrographs', 'rlnMicrographName',
                                     **kwargs)

    def _writeSetOfMoviesOrMics(self, imgIterable,
                                starFile, tableName, imgLabelName, **kwargs):
        """ This function can be used to write either movies or micrographs
        star files. Input can be any iterable of these type of images (e.g
        set, list, etc).
        """
        # Process the first item and create the table based
        # on the generated columns
        self._imgLabelName = imgLabelName
        self._postprocessImageRow = kwargs.get('postprocessImageRow', None)
        self._prefix = tableName[:3]

        micRow = OrderedDict()
        micRow[imgLabelName] = ''  # Just to add label, proper value later
        iterMics = iter(imgIterable)
        mic = next(iterMics)
        if self._optics is None:
            self._optics = OpticsGroups.fromImages(mic)
        self._imageSize = mic.getXDim()
        self._setCtf = mic.hasCTF()

        extraLabels = kwargs.get('extraLabels', [])
        self._extraLabels = [l for l in extraLabels
                             if mic.hasAttribute('_%s' % l)]

        self._micToRow(mic, micRow)
        if self._postprocessImageRow:
            self._postprocessImageRow(mic, micRow)

        micsTable = self._createTableFromDict(micRow)

        while mic is not None:
            micRow[imgLabelName] = self._convert(mic)
            self._micToRow(mic, micRow)

            if self._postprocessImageRow:
                self._postprocessImageRow(mic, micRow)

            micsTable.addRow(**micRow)
            mic = next(iterMics, None)

        with open(starFile, 'w') as f:
            f.write("# Star file generated with Scipion\n")
            f.write("# version 30001\n")
            self._optics.toStar(f)
            f.write("# version 30001\n")
            micsTable.writeStar(f, tableName=tableName)

    def _objToRow(self, obj, row, attributes):
        """ Set some attributes from the object to the row.
        For performance reasons, it is not validated that each attribute
        is already in the object, so it should be validated before.
        """
        for attr in attributes:
            row[attr] = obj.getAttributeValue('_%s' % attr)

    def _micToRow(self, mic, row):
        WriterBase._micToRow(self, mic, row)

        # Set CTF values
        if self._setCtf:
            self._ctfToRow(mic.getCTF(), row)

        # Set additional labels if present
        self._objToRow(mic, row, self._extraLabels)
        row['rlnOpticsGroup'] = mic.getAttributeValue('_rlnOpticsGroup', 1)

    def _align2DToRow(self, alignment, row):
        matrix = alignment.getMatrix()
        shifts = tfs.translation_from_matrix(matrix)
        shifts *= self._pixelSize
        angles = -np.rad2deg(tfs.euler_from_matrix(matrix, axes='szyz'))
        row['rlnOriginXAngst'], row['rlnOriginYAngst'] = shifts[:2]
        row['rlnAnglePsi'] = -(angles[0] + angles[2])

    def _alignProjToRow(self, alignment, row):
        matrix = np.linalg.inv(alignment.getMatrix())
        shifts = -tfs.translation_from_matrix(matrix)
        shifts *= self._pixelSize
        angles = -np.rad2deg(tfs.euler_from_matrix(matrix, axes='szyz'))
        row['rlnOriginXAngst'], row['rlnOriginYAngst'], row['rlnOriginZAngst'] = shifts
        row['rlnAngleRot'], row['rlnAngleTilt'], row['rlnAnglePsi'] = angles

    def _partToRow(self, part, row):
        row['rlnImageId'] = part.getObjId()

        # Add coordinate information
        coord = part.getCoordinate()
        if coord is not None:
            x, y = coord.getPosition()
            row['rlnCoordinateX'] = x
            row['rlnCoordinateY'] = y
            # Add some specific coordinate attributes
            self._objToRow(coord, row, self._coordLabels)
            micName = coord.getMicName()
            if micName:
                row['rlnMicrographName'] = str(micName.replace(" ", ""))
            else:
                if coord.getMicId():
                    row['rlnMicrographName'] = str(coord.getMicId())

        index, fn = part.getLocation()
        if self.outputStack:
            row['rlnOriginalParticleName'] = locationToRelion(index, fn)
            index, fn = self._counter, self._relOutputStack
            if self._counter > 0:
                self._ih.convert(part, (index, self.outputStack))
        else:
            if self.outputDir is not None:
                fn = self._filesDict.get(fn, fn)

        row['rlnImageName'] = locationToRelion(index, fn)

        # Set CTF values
        if self._setCtf:
            self._ctfToRow(part.getCTF(), row)

        # Set alignment if necessary
        if self._setAlign:
            self._setAlign(part.getTransform(), row)

        # Set additional labels if present
        self._objToRow(part, row, self._extraLabels)

        # Add now the new Optics Group stuff
        row['rlnOpticsGroup'] = part.getAttributeValue('_rlnOpticsGroup', 1)

        self._counter += 1

[docs]    def writeSetOfParticles(self, partsSet, starFile, **kwargs):
        # Process the first item and create the table based
        # on the generated columns
        self.update(['rootDir', 'outputDir', 'outputStack'], **kwargs)

        self._optics = OpticsGroups.fromImages(partsSet)
        partRow = OrderedDict()
        firstPart = partsSet.getFirstItem()

        # Convert binaries if required
        if self.outputStack:
            self._relOutputStack = os.path.relpath(self.outputStack,
                                                   os.path.dirname(starFile))
        if self.outputDir is not None:
            forceConvert = kwargs.get('forceConvert', False)
            self._filesDict = convertBinaryFiles(partsSet, self.outputDir,
                                                 forceConvert=forceConvert)

        # Compute some flags from the first particle...
        # when flags are True, some operations will be applied to all particles
        self._preprocessImageRow = kwargs.get('preprocessImageRow', None)

        self._setCtf = kwargs.get('writeCtf', True) and firstPart.hasCTF()

        alignType = kwargs.get('alignType', partsSet.getAlignment())

        if alignType == ALIGN_2D:
            self._setAlign = self._align2DToRow
        elif alignType == ALIGN_PROJ:
            self._setAlign = self._alignProjToRow
        elif alignType == ALIGN_3D:
            raise NotImplementedError(
                "3D alignment conversion for Relion not implemented. "
                "It seems the particles were generated with an incorrect "
                "alignment type. You may either re-launch the protocol that "
                "generates the particles with angles or set 'Consider previous"
                " alignment?' to No")
        elif alignType == ALIGN_NONE:
            self._setAlign = None
        else:
            raise TypeError("Invalid value for alignType: %s" % alignType)

        extraLabels = kwargs.get('extraLabels', [])
        extraLabels.extend(PARTICLE_EXTRA_LABELS)
        self._extraLabels = [l for l in extraLabels
                             if firstPart.hasAttribute('_%s' % l)]

        coord = firstPart.getCoordinate()
        self._coordLabels = []
        if coord is not None:
            self._coordLabels = [l for l in ['rlnClassNumber',
                                             'rlnAutopickFigureOfMerit',
                                             'rlnAnglePsi']
                                 if coord.hasAttribute('_%s' % l)]

        self._postprocessImageRow = kwargs.get('postprocessImageRow', None)

        self._imageSize = firstPart.getXDim()
        self._pixelSize = firstPart.getSamplingRate() or 1.0

        self._counter = 0  # Mark first conversion as special one
        firstPart.setAcquisition(partsSet.getAcquisition())
        self._partToRow(firstPart, partRow)

        if self._postprocessImageRow:
            self._postprocessImageRow(firstPart, partRow)

        partsTable = self._createTableFromDict(partRow)
        partsTable.addRow(**partRow)

        with open(starFile, 'w') as f:
            # Write particles table
            f.write("# Star file generated with Scipion\n")
            f.write("\n# version 30001\n")
            self._optics.toStar(f)
            f.write("# version 30001\n")
            # Write header first
            partsWriter = Table.Writer(f)
            partsWriter.writeTableName('particles')
            partsWriter.writeHeader(partsTable.getColumns())
            # Write all rows
            for part in partsSet:
                self._partToRow(part, partRow)
                if self._postprocessImageRow:
                    self._postprocessImageRow(part, partRow)
                partsWriter.writeRowValues(partRow.values())
                # partsTable.writeStarLine(f, partRow.values())


[docs]class Reader(ReaderBase):
    ALIGNMENT_LABELS = [
        "rlnOriginXAngst",
        "rlnOriginYAngst",
        "rlnOriginZAngst",
        "rlnAngleRot",
        "rlnAngleTilt",
        "rlnAnglePsi",
    ]

    CTF_LABELS = [
        "rlnDefocusU",
        "rlnDefocusV",
        "rlnDefocusAngle",
        "rlnCtfAstigmatism",
        "rlnCtfFigureOfMerit",
        "rlnCtfMaxResolution"
    ]

    COORD_LABELS = [
        "rlnCoordinateX",
        "rlnCoordinateY",
        "rlnMicrographName",
        # extra labels below
        "rlnAutopickFigureOfMerit",
        "rlnClassNumber",
        "rlnAnglePsi"
    ]

    def __init__(self, **kwargs):
        """
        """
        ReaderBase.__init__(self, **kwargs)

[docs]    def readSetOfParticles(self, starFile, partSet, **kwargs):
        """ Convert a star file into a set of particles.

        Params:
            starFile: the filename of the star file
            partsSet: output particles set

        Keyword Arguments:
            blockName: The name of the data block (default particles)
            alignType: alignment type
            removeDisabled: Remove disabled items

        """
        self._preprocessImageRow = kwargs.get('preprocessImageRow', None)
        self._alignType = kwargs.get('alignType', ALIGN_NONE)

        self._postprocessImageRow = kwargs.get('postprocessImageRow', None)

        self._optics = OpticsGroups.fromStar(starFile)

        self._pixelSize = getattr(self._optics.first(),
                                  'rlnImagePixelSize', 1.0)
        self._invPixelSize = 1. / self._pixelSize

        partsReader = Table.Reader(starFile, tableName='particles')

        firstRow = partsReader.getRow()
        self._setClassId = hasattr(firstRow, 'rlnClassNumber')
        self._setCtf = partsReader.hasAllColumns(self.CTF_LABELS[:3])
        self._setCoord = partsReader.hasAllColumns(self.COORD_LABELS[:3])
        particle = Particle()

        if self._setCtf:
            particle.setCTF(CTFModel())

        self._setAcq = kwargs.get("readAcquisition", True)
        if self._setAcq:
            acq = Acquisition()
            self.rowToAcquisition(self._optics.first(), acq)
            acq.setMagnification(kwargs.get('magnification', 10000))
            partSet.setAcquisition(acq)
        else:
            # readAcquisition=False ONLY during import particles
            # overwrite pixel size and optics
            self._pixelSize = kwargs.get('pixelSize', self._pixelSize)
            acq = partSet.getAcquisition()
            self._optics.updateAll(
                rlnVoltage=acq.getVoltage(),
                rlnSphericalAberration=acq.getSphericalAberration(),
                rlnAmplitudeContrast=acq.getAmplitudeContrast(),
                rlnImagePixelSize=self._pixelSize
            )

        extraLabels = kwargs.get('extraLabels', []) + PARTICLE_EXTRA_LABELS
        self.createExtraLabels(particle, firstRow, extraLabels)

        self._rowToPart(firstRow, particle)
        partSet.setSamplingRate(self._pixelSize)
        self._optics.toImages(partSet)
        partSet.append(particle)

        for row in partsReader:
            self._rowToPart(row, particle)
            partSet.append(particle)

        partSet.setHasCTF(self._setCtf)
        partSet.setAlignment(self._alignType)

    def _rowToPart(self, row, particle):
        particle.setObjId(getattr(row, 'rlnImageId', None))

        if self._preprocessImageRow:
            self._preprocessImageRow(particle, row)

        # Decompose Relion filename
        index, filename = relionToLocation(row.rlnImageName)
        particle.setLocation(index, filename)

        if self._setClassId:
            particle.setClassId(row.rlnClassNumber)

        if self._setCtf:
            self.rowToCtf(row, particle.getCTF())

        self.setParticleTransform(particle, row)
        self.setExtraLabels(particle, row)

        # TODO: coord extra labels, partId, micId,
        if self._setCoord:
            coord = self.rowToCoord(row)
            particle.setCoordinate(coord)

        if self._postprocessImageRow:
            self._postprocessImageRow(particle, row)

[docs]    @staticmethod
    def rowToCoord(row):
        """ Create a Coordinate from the row. """
        coord = Coordinate()
        coord.setPosition(row.rlnCoordinateX,
                          row.rlnCoordinateY)
        coord.setMicName(row.rlnMicrographName)

        return coord

[docs]    @staticmethod
    def rowToCtf(row, ctf):
        """ Create a CTFModel from the row. """
        ctf.setDefocusU(row.rlnDefocusU)
        ctf.setDefocusV(row.rlnDefocusV)
        ctf.setDefocusAngle(row.rlnDefocusAngle)
        ctf.setResolution(row.get('rlnCtfMaxResolution', 0))
        ctf.setFitQuality(row.get('rlnCtfFigureOfMerit', 0))

        if hasattr(row, 'rlnPhaseShift'):
            ctf.setPhaseShift(row.rlnPhaseShift)
        ctf.standardize()

        if hasattr(row, 'rlnCtfImage'):
            ctf.setPsdFile(row.rlnCtfImage)

[docs]    @staticmethod
    def rowToAcquisition(optics, acq):
        acq.setAmplitudeContrast(optics.rlnAmplitudeContrast)
        acq.setSphericalAberration(optics.rlnSphericalAberration)
        acq.setVoltage(optics.rlnVoltage)

[docs]    def setParticleTransform(self, particle, row):
        """ Set the transform values from the row. """

        if ((self._alignType == ALIGN_NONE) or
                not row.hasAnyColumn(self.ALIGNMENT_LABELS)):
            self.setParticleTransform = self.__setParticleTransformNone
        else:
            # Ensure the Transform object exists
            self._angles = np.zeros(3)
            self._shifts = np.zeros(3)

            particle.setTransform(Transform())

            if self._alignType == ALIGN_2D:
                self.setParticleTransform = self.__setParticleTransform2D
            elif self._alignType == ALIGN_PROJ:
                self.setParticleTransform = self.__setParticleTransformProj
            else:
                raise TypeError("Unexpected alignment type: %s"
                                % self._alignType)

        # Call again the modified function
        self.setParticleTransform(particle, row)

    def __setParticleTransformNone(self, particle, row):
        particle.setTransform(None)

    def __setParticleTransform2D(self, particle, row):
        angles = self._angles
        shifts = self._shifts
        ips = self._invPixelSize

        def _get(label):
            return float(getattr(row, label, 0.))

        shifts[0] = _get('rlnOriginXAngst') * ips
        shifts[1] = _get('rlnOriginYAngst') * ips
        angles[2] = -_get('rlnAnglePsi')
        radAngles = -np.deg2rad(angles)
        M = tfs.euler_matrix(radAngles[0], radAngles[1], radAngles[2], 'szyz')
        M[:3, 3] = shifts[:3]
        particle.getTransform().setMatrix(M)

    def __setParticleTransformProj(self, particle, row):
        angles = self._angles
        shifts = self._shifts
        ips = self._invPixelSize

        def _get(label):
            return float(getattr(row, label, 0.))

        shifts[0] = _get('rlnOriginXAngst') * ips
        shifts[1] = _get('rlnOriginYAngst') * ips
        shifts[2] = _get('rlnOriginZAngst') * ips

        angles[0] = _get('rlnAngleRot')
        angles[1] = _get('rlnAngleTilt')
        angles[2] = _get('rlnAnglePsi')

        radAngles = -np.deg2rad(angles)

        # TODO: jmrt: Maybe we should test performance and consider if keeping
        # TODO: the matrix and not creating one everytime will make things faster
        M = tfs.euler_matrix(radAngles[0], radAngles[1], radAngles[2], 'szyz')
        M[:3, 3] = -shifts[:3]
        M = np.linalg.inv(M)
        particle.getTransform().setMatrix(M)