Source code for pwem.convert.headers
# **************************************************************************
# *
# * Authors: Roberto Marabini (roberto@cnb.csic.es)
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
"""
This module contains converter functions that will serve to:
1. define ccp4 environ
TODO:
2. Read/Write CCP4 specific files
"""
import collections
import struct
from math import isnan
from pwem import ALL_MRC_EXTENSIONS
from pyworkflow.utils import getExt
from ..emlib.image import ImageHandler
# File formats
MRC = 0
SPIDER = 1
UNKNOWNFORMAT = 2
[docs]class Ccp4Header:
ORIGIN = 0 # save coordinate origin in the mrc header field=Origin (Angstrom)
START = 1 # save coordinate origin in the mrc header field=start (pixel)
"""
In spite of the name this is the MRC2014 format no the CCP4.
In an MRC EM file the origin is typically in header fields called
xorigin, yorigin, zorigin which are specific to EM data (MRC 2000 format)
and are not part of the nearly identical CCP4 format used for x-ray maps
that Coot is expecting.
The header is organised as 56 words followed by space for ten 80
character text labels as follows::
1 NC # of Columns (fastest changing in map)
2 NR # of Rows
3 NS # of Sections (slowest changing in map)
4 MODE Data type
0 = envelope stored as signed bytes (from
-128 lowest to 127 highest)
1 = Image stored as Integer*2
2 = Image stored as Reals
3 = Transform stored as Complex Integer*2
4 = Transform stored as Complex Reals
5 == 0
Note: Mode 2 is the normal mode used in
the CCP4 programs. Other modes than 2 and 0
may NOT WORK
5 NCSTART Number of first COLUMN in map
6 NRSTART Number of first ROW in map
7 NSSTART Number of first SECTION in map
8 NX Number of intervals along X
9 NY Number of intervals along Y
10 NZ Number of intervals along Z
11 X length Cell Dimensions (Angstroms)
12 Y length "
13 Z length "
14 Alpha Cell Angles (Degrees)
15 Beta "
16 Gamma "
17 MAPC Which axis corresponds to Cols. (1,2,3 for X,Y,Z)
18 MAPR Which axis corresponds to Rows. (1,2,3 for X,Y,Z)
19 MAPS Which axis corresponds to Sects.
(1,2,3 for X,Y,Z)
20 AMIN Minimum density value
21 AMAX Maximum density value
22 AMEAN Mean density value (Average)
23 ISPG Space group number
24 NSYMBT Number of bytes used for storing symmetry
operators
25 LSKFLG Flag for skew transformation, =0 none, =1 if foll
26-34 SKWMAT Skew matrix S (in order S11, S12, S13, S21 etc)
if LSKFLG .ne. 0.
35-37 SKWTRN Skew translation t if LSKFLG .ne. 0.
Skew transformation is from standard orthogonal
coordinate frame (as used for atoms)
to orthogonal map frame, as
Xo(map) = S * (Xo(atoms) - t)
38 future use (some of these are used by the MSUBSX routines
. " in MAPBRICK, MAPCONT and FRODO)
. " (all set to zero by default)
. "
50-52 ORIGIN origin in X,Y,Z (pixel units) used for Fourier transforms (modes 3 and 4)
53 MAP Character string 'MAP ' to identify file type
54 MACHST Machine stamp indicating the machine type
which wrote file
55 ARMS Rms deviation of map from mean density
56 NLABL Number of labels being used
57-256 LABEL(20,10) 10 80 character text labels (ie. A4 format)
Symmetry records follow - if any - stored as text as in International
Tables, operators separated by ``*`` and grouped into 'lines' of 80
characters (i.e. symmetry operators do not cross the ends of the
80-character 'lines' and the 'lines' do not terminate in a ``*``).
"""
def __init__(self, fileName, readHeader=False):
self.isMovie = False
self._header = collections.OrderedDict()
self.chain = "< 3i i 3i 3i 3f 36s i 104s 3f"
self._name = self.cleanFileNameAnnotation(fileName)
if readHeader:
self.loaded = True
self.readHeader()
else:
self.loaded = False
[docs] def setOrigin(self, originTransformShift):
# TODO: should we use originX,Y,Z and set this to 0
self._header['originX'] = originTransformShift[0]
self._header['originY'] = originTransformShift[1]
self._header['originZ'] = originTransformShift[2]
self._header['NCSTART'] = 0
self._header['NRSTART'] = 0
self._header['NSSTART'] = 0
# def getOrigin(self):
# return self._header['originX'], \
# self._header['originY'], \
# self._header['originZ']
[docs] def getOrigin(self, changeSign=False):
""" Return in Angstroms"""
x = self._header['originX']
y = self._header['originY']
z = self._header['originZ']
if (x == 0. and y == 0. and z == 0.) or isnan(x) or isnan(y) or isnan(z):
sampling = self.computeSampling()
x = self._header['NCSTART'] * sampling
y = self._header['NRSTART'] * sampling
z = self._header['NSSTART'] * sampling
if changeSign:
x *= -1.
y *= -1.
z *= -1.
return x, y, z
[docs] def setSampling(self, sampling):
self._header['Xlength'] = self._header['NX'] * sampling
self._header['Ylength'] = self._header['NY'] * sampling
self._header['Zlength'] = self._header['NZ'] * sampling
[docs] def getSampling(self):
return self._header['Xlength'] / self._header['NX'], \
self._header['Ylength'] / self._header['NY'], \
self._header['Zlength'] / self._header['NZ']
[docs] def setStartPixel(self, originTransformShift): # PIXEL
"""input pixels"""
self._header['originX'] = 0. # originTransformShift[0]
self._header['originY'] = 0. # originTransformShift[1]
self._header['originZ'] = 0. # originTransformShift[2]
self._header['NCSTART'] = originTransformShift[0]
self._header['NRSTART'] = originTransformShift[1]
self._header['NSSTART'] = originTransformShift[2]
[docs] def setStartAngstrom(self, originTransformShift, sampling): # Angstrom
"""input Angstroms"""
self.setStartPixel(tuple([int(round(x / sampling)) for x in
originTransformShift]))
[docs] def getStartPixel(self): # PIXEL
"""Returns pixels"""
return self._header['NCSTART'], \
self._header['NRSTART'], \
self._header['NSSTART']
[docs] def getStartAngstrom(self, sampling): # Angstrom
"""Returns Angstroms"""
return tuple([x * sampling for x in self.getStartPixel()])
[docs] def setDims(self, col, row, sec):
self._header['NC'] = col
self._header['NR'] = row
self._header['NS'] = sec
[docs] def setGridSampling(self, x, y, z):
self._header['NX'] = x
self._header['NY'] = y
self._header['NZ'] = z
[docs] def getGridSampling(self):
return self._header['NX'], \
self._header['NY'], \
self._header['NZ']
[docs] def setCellDimensions(self, x, y, z):
self._header['Xlength'] = x
self._header['Ylength'] = y
self._header['Zlength'] = z
[docs] def getCellDimensions(self):
""" Returns dimensions in Angstroms"""
return self._header['Xlength'], \
self._header['Ylength'], \
self._header['Zlength']
[docs] def getNumberOfObjects(self):
# Special case for volume stacks...
return max(int(self._header['NS'] / self._header['NZ']),1)
[docs] def getXYZN(self):
if self.getISPG() and not self.isMovie:
x, y, z = self.getGridSampling() # Stack of volumes
n = self.getNumberOfObjects()
else:
x, y, z = self.getDims()
n = 1
if self.isMovie:
n = z
z = 1
return x, y, z, n
[docs] def readHeader(self):
# read header
f = open(self._name, 'rb')
s = f.read(52 * 4) # read header from word 0 to 51
f.close()
a = struct.unpack(self.chain, s)
# fill dicionary
self._header['NC'] = a[0]
self._header['NR'] = a[1]
self._header['NS'] = a[2]
self._header['Mode'] = a[3]
self._header['NCSTART'] = a[4]
self._header['NRSTART'] = a[5]
self._header['NSSTART'] = a[6]
self._header['NX'] = a[7]
self._header['NY'] = a[8]
self._header['NZ'] = a[9]
self._header['Xlength'] = a[10]
self._header['Ylength'] = a[11]
self._header['Zlength'] = a[12]
self._header['dummy1'] = a[13] + b"\n" # "< 3i i 3i 3i 3f 36s"
self._header['ISPG'] = a[14]
self._header['dummy2'] = a[15] + b"\n" # "< 3i i 3i 3i 3f 36s i 104s"
self._header['originX'] = a[16]
self._header['originY'] = a[17]
self._header['originZ'] = a[18]
[docs] def writeHeader(self):
ss = struct.Struct(self.chain)
t = tuple(self._header.values())
packed_data = ss.pack(*t)
# Python 3 will fail writing bytes in a text file unless it's open
# as rb or wb for reading and writing binaries, respectively.
f = open(self._name, 'rb+')
f.write(packed_data)
f.close()
def __str__(self):
s = ""
for k, v in self._header.items():
s += "%s: %s\n" % (str(k), str(v))
return s
[docs] def copyCCP4Header(self, scipionOriginShifts, sampling, originField=START):
"""This function updates the values of sampling and
origin in the header and save the header to disk.
It has been designed for Volumes, it will NOT work for sets
of volumes or images."""
if not self.loaded:
self.readHeader()
x, y, z = self.getDims()
self.setCellDimensions(x * sampling, y * sampling, z * sampling)
if originField == self.ORIGIN:
self.setOrigin(scipionOriginShifts)
else:
self.setStartAngstrom(scipionOriginShifts, sampling)
self.writeHeader()
[docs] def cleanFileNameAnnotation(self, fileName):
ext = getExt(fileName)
self.isMovie = False
if ext == '.mrcs':
self.isMovie = True
elif ':mrcs' in ext : # Movie --> dims = [X, Y, Z = 1, N]
self.isMovie = True
fileName = fileName.replace(':mrcs', '')
elif ':mrc' in ext: # Volume --> dims = [X, Y, Z, N = 1]
fileName = fileName.replace(':mrc', '')
return fileName
[docs] @classmethod
def fixFile(cls, inFileName, outFileName, scipionOriginShifts,
sampling=1.0, originField=START):
""" Create new CCP4 binary file and fix its header.
"""
ImageHandler().convert(inFileName, outFileName)
x, y, z, ndim = ImageHandler().getDimensions(inFileName)
ccp4header = Ccp4Header(outFileName, readHeader=True)
ccp4header.setGridSampling(x, y, z)
ccp4header.setCellDimensions(x * sampling, y * sampling, z * sampling)
if originField == cls.ORIGIN:
ccp4header.setOrigin(scipionOriginShifts)
else:
ccp4header.setStartAngstrom(scipionOriginShifts, sampling)
ccp4header.writeHeader()
[docs]def getFileFormat(fileName):
ext = getExt(fileName).replace(".","")
if ext in ALL_MRC_EXTENSIONS:
return MRC
elif ext == 'spi' or ext == 'vol':
return SPIDER
else:
return UNKNOWNFORMAT
[docs]def fixVolume(paths):
"""
Fixes mrc (any extension) files that are defined as stacks but are meant to be volumes as defined in the mrc 2014
specs. Setting ISPG to 1.
:param paths: accept a string or a list of strings
:return: nothing, but mrc files header end up being updated.
"""
if isinstance(paths, str):
paths = [paths]
for path in paths:
ccp4header = Ccp4Header(path, readHeader=True)
ccp4header.setISPG(1)
ccp4header.writeHeader()
[docs]def setMRCSamplingRate(paths, samplingRate):
"""
Sets the mrc file sampling rate value
:param paths: accept a string or a list of strings
:return: nothing, but mrc files header end up being updated with the right sampling rate
"""
if isinstance(paths, str):
paths = [paths]
for path in paths:
ccp4header = Ccp4Header(path, readHeader=True)
ccp4header.setSampling(samplingRate)
ccp4header.writeHeader()