Source code for pwem.convert.headers

# **************************************************************************
# *
# * Authors:     Roberto Marabini (roberto@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
"""
This module contains converter functions that will serve to:
1. define ccp4 environ
TODO:
2. Read/Write CCP4 specific files
"""

import collections
import struct
from math import isnan

from pwem import ALL_MRC_EXTENSIONS
from pyworkflow.utils import getExt
from ..emlib.image import ImageHandler

# File formats
MRC = 0
SPIDER = 1
UNKNOWNFORMAT = 2


[docs]class Ccp4Header: ORIGIN = 0 # save coordinate origin in the mrc header field=Origin (Angstrom) START = 1 # save coordinate origin in the mrc header field=start (pixel) """ In spite of the name this is the MRC2014 format no the CCP4. In an MRC EM file the origin is typically in header fields called xorigin, yorigin, zorigin which are specific to EM data (MRC 2000 format) and are not part of the nearly identical CCP4 format used for x-ray maps that Coot is expecting. The header is organised as 56 words followed by space for ten 80 character text labels as follows:: 1 NC # of Columns (fastest changing in map) 2 NR # of Rows 3 NS # of Sections (slowest changing in map) 4 MODE Data type 0 = envelope stored as signed bytes (from -128 lowest to 127 highest) 1 = Image stored as Integer*2 2 = Image stored as Reals 3 = Transform stored as Complex Integer*2 4 = Transform stored as Complex Reals 5 == 0 Note: Mode 2 is the normal mode used in the CCP4 programs. Other modes than 2 and 0 may NOT WORK 5 NCSTART Number of first COLUMN in map 6 NRSTART Number of first ROW in map 7 NSSTART Number of first SECTION in map 8 NX Number of intervals along X 9 NY Number of intervals along Y 10 NZ Number of intervals along Z 11 X length Cell Dimensions (Angstroms) 12 Y length " 13 Z length " 14 Alpha Cell Angles (Degrees) 15 Beta " 16 Gamma " 17 MAPC Which axis corresponds to Cols. (1,2,3 for X,Y,Z) 18 MAPR Which axis corresponds to Rows. (1,2,3 for X,Y,Z) 19 MAPS Which axis corresponds to Sects. (1,2,3 for X,Y,Z) 20 AMIN Minimum density value 21 AMAX Maximum density value 22 AMEAN Mean density value (Average) 23 ISPG Space group number 24 NSYMBT Number of bytes used for storing symmetry operators 25 LSKFLG Flag for skew transformation, =0 none, =1 if foll 26-34 SKWMAT Skew matrix S (in order S11, S12, S13, S21 etc) if LSKFLG .ne. 0. 35-37 SKWTRN Skew translation t if LSKFLG .ne. 0. Skew transformation is from standard orthogonal coordinate frame (as used for atoms) to orthogonal map frame, as Xo(map) = S * (Xo(atoms) - t) 38 future use (some of these are used by the MSUBSX routines . " in MAPBRICK, MAPCONT and FRODO) . " (all set to zero by default) . " 50-52 ORIGIN origin in X,Y,Z (pixel units) used for Fourier transforms (modes 3 and 4) 53 MAP Character string 'MAP ' to identify file type 54 MACHST Machine stamp indicating the machine type which wrote file 55 ARMS Rms deviation of map from mean density 56 NLABL Number of labels being used 57-256 LABEL(20,10) 10 80 character text labels (ie. A4 format) Symmetry records follow - if any - stored as text as in International Tables, operators separated by ``*`` and grouped into 'lines' of 80 characters (i.e. symmetry operators do not cross the ends of the 80-character 'lines' and the 'lines' do not terminate in a ``*``). """ def __init__(self, fileName, readHeader=False): self.isMovie = False self._header = collections.OrderedDict() self.chain = "< 3i i 3i 3i 3f 36s i 104s 3f" self._name = self.cleanFileNameAnnotation(fileName) if readHeader: self.loaded = True self.readHeader() else: self.loaded = False
[docs] def setOrigin(self, originTransformShift): # TODO: should we use originX,Y,Z and set this to 0 self._header['originX'] = originTransformShift[0] self._header['originY'] = originTransformShift[1] self._header['originZ'] = originTransformShift[2] self._header['NCSTART'] = 0 self._header['NRSTART'] = 0 self._header['NSSTART'] = 0
# def getOrigin(self): # return self._header['originX'], \ # self._header['originY'], \ # self._header['originZ']
[docs] def getOrigin(self, changeSign=False): """ Return in Angstroms""" x = self._header['originX'] y = self._header['originY'] z = self._header['originZ'] if (x == 0. and y == 0. and z == 0.) or isnan(x) or isnan(y) or isnan(z): sampling = self.computeSampling() x = self._header['NCSTART'] * sampling y = self._header['NRSTART'] * sampling z = self._header['NSSTART'] * sampling if changeSign: x *= -1. y *= -1. z *= -1. return x, y, z
[docs] def setSampling(self, sampling): self._header['Xlength'] = self._header['NX'] * sampling self._header['Ylength'] = self._header['NY'] * sampling self._header['Zlength'] = self._header['NZ'] * sampling
[docs] def getSampling(self): return self._header['Xlength'] / self._header['NX'], \ self._header['Ylength'] / self._header['NY'], \ self._header['Zlength'] / self._header['NZ']
[docs] def setMode(self, mode): self._header['Mode'] = mode
[docs] def setStartPixel(self, originTransformShift): # PIXEL """input pixels""" self._header['originX'] = 0. # originTransformShift[0] self._header['originY'] = 0. # originTransformShift[1] self._header['originZ'] = 0. # originTransformShift[2] self._header['NCSTART'] = originTransformShift[0] self._header['NRSTART'] = originTransformShift[1] self._header['NSSTART'] = originTransformShift[2]
[docs] def setStartAngstrom(self, originTransformShift, sampling): # Angstrom """input Angstroms""" self.setStartPixel(tuple([int(round(x / sampling)) for x in originTransformShift]))
[docs] def getStartPixel(self): # PIXEL """Returns pixels""" return self._header['NCSTART'], \ self._header['NRSTART'], \ self._header['NSSTART']
[docs] def getStartAngstrom(self, sampling): # Angstrom """Returns Angstroms""" return tuple([x * sampling for x in self.getStartPixel()])
[docs] def getDims(self): return self._header['NC'], \ self._header['NR'], \ self._header['NS']
[docs] def setDims(self, col, row, sec): self._header['NC'] = col self._header['NR'] = row self._header['NS'] = sec
[docs] def setGridSampling(self, x, y, z): self._header['NX'] = x self._header['NY'] = y self._header['NZ'] = z
[docs] def getGridSampling(self): return self._header['NX'], \ self._header['NY'], \ self._header['NZ']
[docs] def setCellDimensions(self, x, y, z): self._header['Xlength'] = x self._header['Ylength'] = y self._header['Zlength'] = z
[docs] def getCellDimensions(self): """ Returns dimensions in Angstroms""" return self._header['Xlength'], \ self._header['Ylength'], \ self._header['Zlength']
[docs] def setISPG(self, value): self._header['ISPG'] = value
[docs] def getISPG(self): return self._header['ISPG']
[docs] def getNumberOfObjects(self): # Special case for volume stacks... return max(int(self._header['NS'] / self._header['NZ']),1)
[docs] def getXYZN(self): if self.getISPG() and not self.isMovie: x, y, z = self.getGridSampling() # Stack of volumes n = self.getNumberOfObjects() else: x, y, z = self.getDims() n = 1 if self.isMovie: n = z z = 1 return x, y, z, n
[docs] def readHeader(self): # read header f = open(self._name, 'rb') s = f.read(52 * 4) # read header from word 0 to 51 f.close() a = struct.unpack(self.chain, s) # fill dicionary self._header['NC'] = a[0] self._header['NR'] = a[1] self._header['NS'] = a[2] self._header['Mode'] = a[3] self._header['NCSTART'] = a[4] self._header['NRSTART'] = a[5] self._header['NSSTART'] = a[6] self._header['NX'] = a[7] self._header['NY'] = a[8] self._header['NZ'] = a[9] self._header['Xlength'] = a[10] self._header['Ylength'] = a[11] self._header['Zlength'] = a[12] self._header['dummy1'] = a[13] + b"\n" # "< 3i i 3i 3i 3f 36s" self._header['ISPG'] = a[14] self._header['dummy2'] = a[15] + b"\n" # "< 3i i 3i 3i 3f 36s i 104s" self._header['originX'] = a[16] self._header['originY'] = a[17] self._header['originZ'] = a[18]
[docs] def getHeader(self): return self._header
[docs] def setHeader(self, newHeader): self._header = newHeader
[docs] def writeHeader(self): ss = struct.Struct(self.chain) t = tuple(self._header.values()) packed_data = ss.pack(*t) # Python 3 will fail writing bytes in a text file unless it's open # as rb or wb for reading and writing binaries, respectively. f = open(self._name, 'rb+') f.write(packed_data) f.close()
def __str__(self): s = "" for k, v in self._header.items(): s += "%s: %s\n" % (str(k), str(v)) return s
[docs] def computeSampling(self): return self._header['Zlength'] / self._header['NZ']
[docs] def copyCCP4Header(self, scipionOriginShifts, sampling, originField=START): """This function updates the values of sampling and origin in the header and save the header to disk. It has been designed for Volumes, it will NOT work for sets of volumes or images.""" if not self.loaded: self.readHeader() x, y, z = self.getDims() self.setCellDimensions(x * sampling, y * sampling, z * sampling) if originField == self.ORIGIN: self.setOrigin(scipionOriginShifts) else: self.setStartAngstrom(scipionOriginShifts, sampling) self.writeHeader()
[docs] def cleanFileNameAnnotation(self, fileName): ext = getExt(fileName) self.isMovie = False if ext == '.mrcs': self.isMovie = True elif ':mrcs' in ext : # Movie --> dims = [X, Y, Z = 1, N] self.isMovie = True fileName = fileName.replace(':mrcs', '') elif ':mrc' in ext: # Volume --> dims = [X, Y, Z, N = 1] fileName = fileName.replace(':mrc', '') return fileName
[docs] @classmethod def fixFile(cls, inFileName, outFileName, scipionOriginShifts, sampling=1.0, originField=START): """ Create new CCP4 binary file and fix its header. """ ImageHandler().convert(inFileName, outFileName) x, y, z, ndim = ImageHandler().getDimensions(inFileName) ccp4header = Ccp4Header(outFileName, readHeader=True) ccp4header.setGridSampling(x, y, z) ccp4header.setCellDimensions(x * sampling, y * sampling, z * sampling) if originField == cls.ORIGIN: ccp4header.setOrigin(scipionOriginShifts) else: ccp4header.setStartAngstrom(scipionOriginShifts, sampling) ccp4header.writeHeader()
[docs] @classmethod def isCompatible(cls, file): return getFileFormat(file) == MRC
[docs]def getFileFormat(fileName): ext = getExt(fileName).replace(".","") if ext in ALL_MRC_EXTENSIONS: return MRC elif ext == 'spi' or ext == 'vol': return SPIDER else: return UNKNOWNFORMAT
[docs]def fixVolume(paths): """ Fixes mrc (any extension) files that are defined as stacks but are meant to be volumes as defined in the mrc 2014 specs. Setting ISPG to 1. :param paths: accept a string or a list of strings :return: nothing, but mrc files header end up being updated. """ if isinstance(paths, str): paths = [paths] for path in paths: ccp4header = Ccp4Header(path, readHeader=True) ccp4header.setISPG(1) ccp4header.writeHeader()
[docs]def setMRCSamplingRate(paths, samplingRate): """ Sets the mrc file sampling rate value :param paths: accept a string or a list of strings :return: nothing, but mrc files header end up being updated with the right sampling rate """ if isinstance(paths, str): paths = [paths] for path in paths: ccp4header = Ccp4Header(path, readHeader=True) ccp4header.setSampling(samplingRate) ccp4header.writeHeader()