Source code for eman2.convert.convert

# **************************************************************************
# *
# * Authors:     J.M. De la Rosa Trevin (delarosatrevin@scilifelab.se) [1]
# *              Laura del Cano (ldelcano@cnb.csic.es) [1]
# *              Josue Gomez Blanco (josue.gomez-blanco@mcgill.ca) [1]
# *              Grigory Sharov (gsharov@mrc-lmb.cam.ac.uk) [2]
# *
# * [1] Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# * [2] MRC Laboratory of Molecular Biology (MRC-LMB)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************

import glob
import json
import numpy
import os


import pyworkflow.utils as pwutils
from pwem.objects.data import Coordinate, Particle, Transform
import pwem.constants as emcts
from pwem.emlib.image import ImageHandler
import pwem.emlib.metadata as md

from .. import Plugin


[docs]def loadJson(jsonFn): """ This function loads the Json dictionary into memory """ with open(jsonFn) as jsonFile: jsonDict = json.load(jsonFile) return jsonDict
[docs]def writeJson(jsonDict, jsonFn): """ This function write a Json dictionary """ with open(jsonFn, 'w') as outfile: json.dump(jsonDict, outfile)
[docs]def readCTFModel(ctfModel, filename): """ Set values for the ctfModel. :param ctfModel: output CTF model :param filename: input file to parse """ jsonDict = loadJson(filename) keyPos = None ctfPhaseShift = 0.0 if 'ctf_frame' in jsonDict: keyPos = jsonDict['ctf_frame'][1] elif 'ctf' in jsonDict: keyPos = jsonDict['ctf'][0] else: setWrongDefocus(ctfModel) if keyPos: defocus = float(keyPos['defocus']) defocusAngle = float(keyPos['dfang']) dfdiff = float(keyPos['dfdiff']) ampcont = float(keyPos['ampcont']) defocusU = 10000.0 * defocus + 5000.0 * dfdiff defocusV = 20000.0 * defocus - defocusU ctfPhaseShift = calculatePhaseShift(ampcont) ctfModel.setStandardDefocus(defocusU, defocusV, defocusAngle) if 'ctf_im2d' in jsonDict: # psdFile = jsonDict['ctf_im2d']['__image__'][0] fnBase = pwutils.removeExt(filename) + '_jsonimg' psdFile = "1@%s.hdf" % fnBase if os.path.exists(psdFile): ctfModel.setPsdFile(psdFile) ctfModel.setPhaseShift(float(ctfPhaseShift))
[docs]def setWrongDefocus(ctfModel): """ Set parameters if results parsing has failed. :param ctfModel: the model to be updated """ ctfModel.setDefocusU(-999) ctfModel.setDefocusV(-1) ctfModel.setDefocusAngle(-999)
[docs]def writeCTFModel(ctfObj, filename): """ Write a CTFModel object as Xmipp .ctfparam""" pass
[docs]def jsonToCtfModel(ctfJsonFn, ctfModel): """ Create a CTFModel from a json file """ mdFn = str(ctfJsonFn).replace('particles', 'info') mdFn = mdFn.split('__ctf_flip')[0] + '_info.json' if os.path.exists(mdFn): readCTFModel(ctfModel, mdFn)
[docs]def readSetOfCoordinates(workDir, micSet, coordSet, invertY=False): """ Read from Eman .json files. :param workDir: where the Eman boxer output files are located. :param micSet: the SetOfMicrographs to associate the .json, which name should be the same of the micrographs. :param coordSet: the SetOfCoordinates that will be populated. """ # read boxSize from info/project.json jsonFnbase = os.path.join(workDir, 'info', 'project.json') jsonBoxDict = loadJson(jsonFnbase) size = int(jsonBoxDict["global.boxsize"]) jsonFninfo = os.path.join(workDir, 'info/') for mic in micSet: micBase = pwutils.removeBaseExt(mic.getFileName()) micPosFn = ''.join(glob.glob(jsonFninfo + '*' + micBase + '_info.json')) readCoordinates(mic, micPosFn, coordSet, invertY) coordSet.setBoxSize(size)
[docs]def readCoordinates(mic, fileName, coordsSet, invertY=False): """ Parse coords file and populate coordsSet. :param mic: input micrograph object :param fileName: input file to parse :param coordsSet: output set of coords :param invertY: flip Y axis """ if os.path.exists(fileName): jsonPosDict = loadJson(fileName) if "boxes" in jsonPosDict: boxes = jsonPosDict["boxes"] for box in boxes: x, y = box[:2] if invertY: y = mic.getYDim() - y coord = Coordinate() coord.setPosition(x, y) coord.setMicrograph(mic) coordsSet.append(coord)
[docs]def writeSetOfMicrographs(micSet, filename): """ Simplified function borrowed from xmipp. """ mdata = md.MetaData() for img in micSet: objId = mdata.addObject() imgRow = md.Row() imgRow.setValue(md.MDL_ITEM_ID, objId) index, fname = img.getLocation() fn = ImageHandler.locationToXmipp((index, fname)) imgRow.setValue(md.MDL_MICROGRAPH, fn) if img.isEnabled(): enabled = 1 else: enabled = -1 imgRow.setValue(md.MDL_ENABLED, enabled) imgRow.writeToMd(mdata, objId) mdata.write('Micrographs@%s' % filename)
[docs]def readSetOfParticles(lstFile, partSet, copyOrLink, direc): for index, fn in iterLstFile(lstFile): item = Particle() # set full path to particles stack file abspath = os.path.abspath(lstFile) fn = abspath.replace('sets/%s' % os.path.basename(lstFile), '') + fn newFn = os.path.join(direc, os.path.basename(fn)) if not os.path.exists(newFn): copyOrLink(fn, newFn) item.setLocation(index, newFn) partSet.append(item)
[docs]def writeSetOfParticles(partSet, path, **kwargs): """ Convert the imgSet particles to .hdf files as expected by Eman. This function should be called from a current dir where the images in the set are available. """ ext = pwutils.getExt(partSet.getFirstItem().getFileName())[1:] if ext == 'hdf': # create links if input has hdf format for fn in partSet.getFiles(): newFn = pwutils.removeBaseExt(fn).split('__ctf')[0] + '.hdf' newFn = os.path.join(path, newFn) pwutils.createLink(fn, newFn) print(" %s -> %s" % (fn, newFn)) else: firstCoord = partSet.getFirstItem().getCoordinate() or None hasMicName = False if firstCoord: hasMicName = firstCoord.getMicName() or False fileName = "" a = 0 proc = Plugin.createEmanProcess(args='write') for i, part in iterParticlesByMic(partSet): micName = micId = part.getMicId() if hasMicName: micName = pwutils.removeBaseExt(part.getCoordinate().getMicName()) objDict = part.getObjDict() if not micId: micId = 0 suffix = kwargs.get('suffix', '') if hasMicName and (micName != str(micId)): objDict['hdfFn'] = os.path.join(path, "%s%s.hdf" % (micName, suffix)) else: objDict['hdfFn'] = os.path.join(path, "mic_%06d%s.hdf" % (micId, suffix)) alignType = kwargs.get('alignType') if alignType != emcts.ALIGN_NONE: shift, angles = alignmentToRow(part.getTransform(), alignType) # json cannot encode arrays so I convert them to lists # json fail if has -0 as value objDict['_shifts'] = shift.tolist() objDict['_angles'] = angles.tolist() objDict['_itemId'] = part.getObjId() # the index in EMAN begins with 0 if fileName != objDict['_filename']: fileName = objDict['_filename'] if objDict['_index'] == 0: # TODO: Index appears to be the problem (when not given it works ok) a = 0 else: a = 1 objDict['_index'] = int(objDict['_index'] - a) # Write the e2converter.py process from where to read the image print(json.dumps(objDict), file=proc.stdin, flush=True) proc.stdout.readline() proc.kill()
[docs]def getImageDimensions(imageFile): """ This function will allow us to use EMAN2 to read some formats not currently supported by the native image library (Xmipp). Underneath, it will call a script to do the job. """ proc = Plugin.createEmanProcess('e2ih.py', args=imageFile) return tuple(map(int, proc.stdout.readline().split()))
[docs]def convertImage(inputLoc, outputLoc): """ This function will allow us to use EMAN2 to write some formats not currently supported by the native image library (Xmipp). Underneath, it will call an script to do the job. """ def _getFn(loc): """ Use similar naming convention as in Xmipp. This does not works for EMAN out of here. """ if isinstance(loc, tuple): if loc[0] != emcts.NO_INDEX: return "%06d@%s" % loc return loc[1] else: return loc proc = Plugin.createEmanProcess('e2ih.py', args='%s %s' % (_getFn(inputLoc), _getFn(outputLoc))) proc.wait()
[docs]def iterLstFile(filename): with open(filename) as f: for line in f: if '#' not in line: # Decompose Eman filename index, filename = int(line.split()[0]) + 1, line.split()[1] yield index, filename
[docs]def geometryFromMatrix(matrix, inverseTransform): """ Convert the transformation matrix to shifts and angles. :param matrix: input matrix :return: two lists, shifts and angles """ from pwem.convert.transformations import translation_from_matrix, euler_from_matrix if inverseTransform: from numpy.linalg import inv matrix = inv(matrix) shifts = -translation_from_matrix(matrix) else: shifts = translation_from_matrix(matrix) angles = -numpy.rad2deg(euler_from_matrix(matrix, axes='szyz')) return shifts, angles
[docs]def matrixFromGeometry(shifts, angles, inverseTransform): """ Create the transformation matrix from given 2D shifts in X and Y and the 3 euler angles. :param shifts: input list of shifts :param angles: input list of angles :return matrix """ from pwem.convert.transformations import euler_matrix from numpy import deg2rad radAngles = -deg2rad(angles) M = euler_matrix(radAngles[0], radAngles[1], radAngles[2], 'szyz') if inverseTransform: from numpy.linalg import inv M[:3, 3] = -shifts[:3] M = inv(M) else: M[:3, 3] = shifts[:3] return M
[docs]def alignmentToRow(alignment, alignType): """ is2D == True-> matrix is 2D (2D images alignment) otherwise matrix is 3D (3D volume alignment or projection) invTransform == True -> for xmipp implies projection -> for xmipp implies alignment """ # is2D = alignType == em.ALIGN_2D # inverseTransform = alignType == em.ALIGN_PROJ # transformation matrix is processed here because # it uses routines available through scipion python matrix = alignment.getMatrix() return geometryFromMatrix(matrix, True)
[docs]def rowToAlignment(alignmentList, alignType): """ is2D == True-> matrix is 2D (2D images alignment) otherwise matrix is 3D (3D volume alignment or projection) invTransform == True -> for xmipp implies projection """ # use all angles in 2D since we might have mirrors # is2D = alignType == em.ALIGN_2D inverseTransform = alignType == emcts.ALIGN_PROJ alignment = Transform() angles = numpy.zeros(3) shifts = numpy.zeros(3) shifts[0] = alignmentList[3] shifts[1] = alignmentList[4] shifts[2] = 0 angles[0] = alignmentList[0] angles[1] = alignmentList[1] angles[2] = alignmentList[2] matrix = matrixFromGeometry(shifts, angles, inverseTransform) alignment.setMatrix(matrix) return alignment
[docs]def iterParticlesByMic(partSet): """ Iterate the particles ordered by micrograph """ for i, part in enumerate(partSet.iterItems(orderBy=['_micId', 'id'], direction='ASC')): yield i, part
[docs]def convertReferences(refSet, outputFn): """ Simplified version of writeSetOfParticles function. Writes out an hdf stack. """ fileName = "" a = 0 proc = Plugin.createEmanProcess(args='write') for part in refSet: objDict = part.getObjDict() objDict['hdfFn'] = outputFn objDict['_itemId'] = part.getObjId() # the index in EMAN begins with 0 if fileName != objDict['_filename']: fileName = objDict['_filename'] if objDict['_index'] == 0: a = 0 else: a = 1 objDict['_index'] = int(objDict['_index'] - a) # Write the e2converter.py process from where to read the image print(json.dumps(objDict), file=proc.stdin, flush=True) proc.stdout.readline() proc.kill()
[docs]def calculatePhaseShift(ampcont): # calculate phase shift as in EMAN2 ctf.cpp if -100.0 < ampcont <= 100.0: PhaseShift = numpy.arcsin(ampcont / 100.0) elif ampcont > 100.0: PhaseShift = numpy.pi - numpy.arcsin(2.0 - ampcont / 100.0) else: PhaseShift = -numpy.pi - numpy.arcsin(-2.0 - ampcont / 100.0) ctfPhaseShift = numpy.rad2deg(PhaseShift) return ctfPhaseShift
[docs]def getLastParticlesParams(directory): """ Return a dictionary containing the params values of the last iteration. Key: Particle index (int) Value: Dict[{coverage: float, score: float, alignMatrix: list[float]}] """ # JSON files with particles params: path/to/particle_parms_NN.json particleParamsPaths = glob.glob(os.path.join(directory, 'particle_parms_*.json')) if not particleParamsPaths: raise FileNotFoundError("Particle params files not found") lastParticleParamsPath = sorted(particleParamsPaths)[-1] particlesParams = json.load(open(lastParticleParamsPath)) output = {} for key, values in particlesParams.items(): # key: '(path/to/particles/basename.hdf', nParticle)' # values: '{"coverage": 1.0, "score": 2.0, "xform.align3d": {"matrix": [...]}}' import re match = re.search(r'(\d+)\)$', key) if not match: continue particleIndex = int(match.group(1)) coverage = values.get("coverage") score = values.get("score") alignMatrix = values.get("xform.align3d", {}).get("matrix") if coverage and score and alignMatrix: customParticleParams = dict( coverage=coverage, score=score, alignMatrix=alignMatrix ) output[particleIndex] = customParticleParams return output