# **************************************************************************
# *
# * Authors: Yunior C. Fonseca Reyna (cfonseca@cnb.csic.es)
# *
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
import os
import ast
import requests
from pkg_resources import parse_version
import pwem.protocols as pw
import pyworkflow.object as pwobj
import pyworkflow.utils as pwutils
from pwem.objects import FSC, SetOfFSCs
from ..constants import V3_3_1, excludedFSCValues, fscValues
from ..convert import convertBinaryVol, writeSetOfParticles, ImageHandler
from ..utils import (getProjectPath, createEmptyProject,
createEmptyWorkSpace, getProjectName,
getCryosparcProjectsDir, createProjectDir,
doImportParticlesStar, doImportVolumes, killJob, clearJob,
get_job_streamlog, getSystemInfo, getJobStatus,
STOP_STATUSES, getCryosparcVersion)
[docs]class ProtCryosparcBase(pw.EMProtocol):
"""
This class contains the common functions for all Cryosparc protocols.
"""
_protCompatibility = []
_className = ""
_fscColumns = 6
def _initializeCryosparcProject(self):
"""
Initialize the cryoSPARC project and workspace
"""
self._initializeUtilsVariables()
# create empty project or load an exists one
folderPaths = getProjectPath(self.projectPath)
if not folderPaths:
self.emptyProject = createEmptyProject(self.projectPath, self.projectDirName)
self.projectName = self.emptyProject[-1].split()[-1]
else:
self.projectName = str(folderPaths[0])
self.projectName = pwobj.String(self.projectName)
self._store(self)
# create empty workspace
self.emptyWorkSpace = createEmptyWorkSpace(self.projectName, self.getRunName(),
self.getObjComment())
self.workSpaceName = pwobj.String(self.emptyWorkSpace[-1].split()[-1])
self._store(self)
self.currenJob = pwobj.String()
self._store(self)
def _initializeUtilsVariables(self):
"""
Initialize all utils cryoSPARC variables
"""
# Create a cryoSPARC project dir
self.projectDirName = getProjectName(self.getProject().getShortName())
self.projectPath = pw.pwutils.join(getCryosparcProjectsDir(),
self.projectDirName)
self.projectDir = createProjectDir(self.projectPath)
def _getScaledAveragesFile(self, csAveragesFile, force=False):
# For the moment this is the best possible result, scaling from 128 to
# 300 does not render nice results apart that the factor turns to
# 299x299. But without this the representative subset is wrong.
# return csAveragesFile
scaledFile = self._getScaledAveragesFileName(csAveragesFile, force)
if not os.path.exists(scaledFile):
inputSize = self._getInputParticles().getDim()[0]
csSize = ImageHandler().getDimensions(csAveragesFile)[0]
if csSize == inputSize:
print("No binning detected: linking averages cs file.",
flush=True)
pwutils.createLink(csAveragesFile, scaledFile)
else:
print("Scaling CS averages file to match particle "
"size (%s -> %s)." % (csSize, inputSize), flush=True)
try:
if force:
scaleFactor = inputSize/csSize
ImageHandler.scaleSplines(csAveragesFile, scaledFile,
scaleFactor,
finalDimension=inputSize,
forceVolume=force)
else:
ImageHandler.scale2DStack(csAveragesFile, scaledFile,
finalDimension=inputSize)
except Exception as ex:
print("The CS averages could not be scaled. %s " % ex)
return csAveragesFile
return scaledFile
def _getScaledAveragesFileName(self, csAveragesFile, isVolume=False):
extension = ".mrc" if isVolume else ".mrcs"
return pwutils.removeExt(csAveragesFile) + "_scaled" + extension
[docs] def setFilePattern(self, path):
baseName = os.path.basename(path).split('.')[0]
self.inputFileNamePattern = path.replace(baseName, '%s')
[docs] def updateParticlePath(self, part, row):
fn = part.getFileName()
baseName = os.path.basename(fn).split('.')[0]
newFileName = self.inputFileNamePattern % baseName
part.setFileName(newFileName)
def _getInputParticles(self):
if self.hasAttribute('inputParticles'):
return self.inputParticles.get()
return None
def _getInputVolume(self):
if self.hasAttribute('refVolume'):
return self.refVolume.get()
return None
def _getInputMask(self):
if self.hasAttribute('refMask'):
return self.refMask.get()
return None
def _initializeVolumeSuffix(self):
"""
Create a output volume suffix depend of the CS version
"""
cryosparcVersion = parse_version(getCryosparcVersion())
self.outputVolumeSuffix = '.imported_volume.map'
self.outputMaskSuffix = '.imported_mask.map'
self.outputVolumeHalf_A = '.imported_volume.map_half_A'
self.outputVolumeHalf_B = '.imported_volume.map_half_B'
if cryosparcVersion >= parse_version(V3_3_1):
self.outputVolumeSuffix = '.imported_volume_1.map'
self.outputMaskSuffix = '.imported_mask_1.map'
self.outputVolumeHalf_A = '.imported_volume_1.map_half_A'
self.outputVolumeHalf_B = '.imported_volume_1.map_half_B'
def _initializeMaskSuffix(self):
"""
Create a output mask suffix depend of the CS version
"""
cryosparcVersion = parse_version(getCryosparcVersion())
self.outputMaskSuffix = '.imported_mask.map'
if cryosparcVersion >= parse_version(V3_3_1):
self.outputMaskSuffix = '.imported_mask_1.map'
def _importVolume(self):
vol = self._getInputVolume()
self._initializeVolumeSuffix()
vol_fn = os.path.join(os.getcwd(), convertBinaryVol(vol, self._getTmpPath()))
importVolumeJob = doImportVolumes(self, vol_fn, vol, 'map', 'Importing volume...')
self.volume = pwobj.String(str(importVolumeJob.get()) + self.outputVolumeSuffix)
if vol.hasHalfMaps():
halfMaps = vol.getHalfMaps().split(",")
map_half_A_fn = os.path.abspath(halfMaps[0])
importVolumeHalfAJob = doImportVolumes(self, map_half_A_fn, vol,
'map_half_A', 'Importing half volume A...')
self.importVolumeHalfA = pwobj.String(str(importVolumeHalfAJob.get()) + self.outputVolumeHalf_A)
map_half_B_fn = os.path.abspath(halfMaps[1])
importVolumeHalfBJob = doImportVolumes(self, map_half_B_fn, vol,
'map_half_B', 'Importing half volume B...')
self.importVolumeHalfB = pwobj.String(str(importVolumeHalfBJob.get()) + self.outputVolumeHalf_B)
self.currenJob.set(importVolumeJob.get())
def _importMask(self):
self._initializeMaskSuffix()
maskFn = os.path.join(os.getcwd(), convertBinaryVol(self._getInputMask(),
self._getTmpPath()))
importMaskJob = doImportVolumes(self, maskFn, self._getInputMask(),
'mask', 'Importing mask... ')
self.currenJob.set(importMaskJob.get())
self.mask = pwobj.String(str(importMaskJob.get()) + self.outputMaskSuffix)
def _importParticles(self):
# import_particles_star
importedParticlesJob = doImportParticlesStar(self)
self.currenJob = pwobj.String(str(importedParticlesJob.get()))
self.particles = pwobj.String(str(importedParticlesJob.get()) +
'.imported_particles')
[docs] def setAborted(self):
""" Set the status to aborted and updated the endTime. """
pw.EMProtocol.setAborted(self)
if hasattr(self, 'projectName') and hasattr(self, 'currenJob') and self.currenJob.get() is not None:
status = getJobStatus(self.projectName.get(), self.currenJob.get())
if status not in STOP_STATUSES:
killJob(str(self.projectName.get()), str(self.currenJob.get()))
clearJob(str(self.projectName.get()), str(self.currenJob.get()))
[docs] def createFSC(self, idd, imgSet, vol):
# Need to get the cryosparc master address
system_info = getSystemInfo()
status_errors = system_info[0]
if not status_errors:
system_info = eval(system_info[1])
master_hostname = system_info.get('master_hostname')
port_webapp = system_info.get('port_webapp')
url = "http://%s:%s/file/%s" % (master_hostname, port_webapp, idd)
fscRequest = requests.get(url, allow_redirects=True)
fscFile = "fsc.txt"
fscFilePath = os.path.join(self._getExtraPath(), fscFile)
factor = self._getInputParticles().getDim()[0] * imgSet.getSamplingRate()
# Convert into scipion fsc format
open(fscFilePath, 'wb').write(fscRequest.content)
fscSet = self.getSetOfFCSsFromFile(fscFilePath, factor)
self._defineOutputs(outputFSC=fscSet)
self._defineSourceRelation(vol, fscSet)
[docs] def getSetOfFCSsFromFile(self, file, factor):
f = open(file, 'r')
lines = f.readlines()
fscSet = self._createSetOfFSCs()
columns = lines[0].strip().split('\t')[1:]
col = 1
fsc_t = None
fsc_nt = None
for column in columns:
if column == 'fsc_tightmask':
fsc_t = \
self.getFSCFromRawData(lines, column, col, factor).getData()[1]
if column == 'fsc_noisesub_true':
fsc_nt = \
self.getFSCFromRawData(lines, column, col, factor).getData()[1]
if column not in excludedFSCValues:
fsc = self.getFSCFromRawData(lines, column, col, factor)
fscSet.append(fsc)
col += 1
f.close()
corr = []
if fsc_t is not None and fsc_nt is not None: # Phase Randomized Masket Map can be calculated
for i in range(len(fsc_t)):
corr.append((fsc_t[i] - fsc_nt[i]) / (1.0 - fsc_nt[i]))
fsc = FSC(objLabel=fscValues['fsc_prmm'])
fsc_wv = fscSet.getFirstItem().getData()[0]
fsc.setData(fsc_wv, corr)
fscSet.append(fsc)
fscSet.write()
return fscSet
[docs] def getFSCFromRawData(self, lines, label, col, factor):
wv = []
corr = []
for x in lines[1:]:
wv_value = float(x.strip().split('\t')[0])
coor_value = x.strip().split('\t')[col]
wv.append(str(wv_value / factor))
corr.append(coor_value)
fsc = FSC(objLabel=fscValues[label])
fsc.setData(wv, corr)
return fsc
[docs] def findLastIteration(self, jobName):
get_job_streamlog(self.projectName.get(),
jobName,
self._getFileName('stream_log'))
# Get the metadata information from stream.log
with open(self._getFileName('stream_log')) as f:
data = f.readlines()
x = ast.literal_eval(data[0])
# Find the ID of last iteration and the map resolution
for y in x:
if 'text' in y:
z = str(y['text'])
if z.startswith('FSC Iteration') or z.startswith('FSC iIteration'):
idd = y['imgfiles'][2]['fileid']
itera = z.split(',')[0][-3:]
self._store(self)
elif 'Using Filter Radius' in z:
nomRes = str(y['text']).split('(')[1].split(')')[
0].replace(
'A', 'Å')
self.mapResolution = pwobj.String(nomRes)
self._store(self)
elif 'Estimated Bfactor' in z:
estBFactor = str(y['text']).split(':')[1].replace('\n',
'')
self.estBFactor = pwobj.String(estBFactor)
self._store(self)
return idd, itera
def _createModelFile(self):
pass