Source code for cryosparc2.protocols.protocol_base

# **************************************************************************
# *
# * Authors: Yunior C. Fonseca Reyna    (cfonseca@cnb.csic.es)
# *
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
import os
import ast
import requests
from pkg_resources import parse_version

import pwem.protocols as pw
import pyworkflow.object as pwobj
import pyworkflow.utils as pwutils
from pwem.objects import FSC, SetOfFSCs

from ..constants import V3_3_1, excludedFSCValues, fscValues
from ..convert import convertBinaryVol, writeSetOfParticles, ImageHandler
from ..utils import (getProjectPath, createEmptyProject,
                     createEmptyWorkSpace, getProjectName,
                     getCryosparcProjectsDir, createProjectDir,
                     doImportParticlesStar, doImportVolumes, killJob, clearJob,
                     get_job_streamlog, getSystemInfo, getJobStatus,
                     STOP_STATUSES, getCryosparcVersion)


[docs]class ProtCryosparcBase(pw.EMProtocol): """ This class contains the common functions for all Cryosparc protocols. """ _protCompatibility = [] _className = "" _fscColumns = 6 def _initializeCryosparcProject(self): """ Initialize the cryoSPARC project and workspace """ self._initializeUtilsVariables() # create empty project or load an exists one folderPaths = getProjectPath(self.projectPath) if not folderPaths: self.emptyProject = createEmptyProject(self.projectPath, self.projectDirName) self.projectName = self.emptyProject[-1].split()[-1] else: self.projectName = str(folderPaths[0]) self.projectName = pwobj.String(self.projectName) self._store(self) # create empty workspace self.emptyWorkSpace = createEmptyWorkSpace(self.projectName, self.getRunName(), self.getObjComment()) self.workSpaceName = pwobj.String(self.emptyWorkSpace[-1].split()[-1]) self._store(self) self.currenJob = pwobj.String() self._store(self) def _initializeUtilsVariables(self): """ Initialize all utils cryoSPARC variables """ # Create a cryoSPARC project dir self.projectDirName = getProjectName(self.getProject().getShortName()) self.projectPath = pw.pwutils.join(getCryosparcProjectsDir(), self.projectDirName) self.projectDir = createProjectDir(self.projectPath)
[docs] def convertInputStep(self): """ Create the input file in STAR format as expected by Relion. If the input particles comes from Relion, just link the file. """ imgSet = self._getInputParticles() if imgSet is not None: # Create links to binary files and write the relion .star file writeSetOfParticles(imgSet, self._getFileName('input_particles'), self._getTmpPath()) self._importParticles() volume = self._getInputVolume() if volume is not None: self._importVolume() mask = self._getInputMask() if mask is not None: self._importMask() else: self.mask = pwobj.String() self._store(self)
def _getScaledAveragesFile(self, csAveragesFile, force=False): # For the moment this is the best possible result, scaling from 128 to # 300 does not render nice results apart that the factor turns to # 299x299. But without this the representative subset is wrong. # return csAveragesFile scaledFile = self._getScaledAveragesFileName(csAveragesFile, force) if not os.path.exists(scaledFile): inputSize = self._getInputParticles().getDim()[0] csSize = ImageHandler().getDimensions(csAveragesFile)[0] if csSize == inputSize: print("No binning detected: linking averages cs file.", flush=True) pwutils.createLink(csAveragesFile, scaledFile) else: print("Scaling CS averages file to match particle " "size (%s -> %s)." % (csSize, inputSize), flush=True) try: if force: scaleFactor = inputSize/csSize ImageHandler.scaleSplines(csAveragesFile, scaledFile, scaleFactor, finalDimension=inputSize, forceVolume=force) else: ImageHandler.scale2DStack(csAveragesFile, scaledFile, finalDimension=inputSize) except Exception as ex: print("The CS averages could not be scaled. %s " % ex) return csAveragesFile return scaledFile def _getScaledAveragesFileName(self, csAveragesFile, isVolume=False): extension = ".mrc" if isVolume else ".mrcs" return pwutils.removeExt(csAveragesFile) + "_scaled" + extension
[docs] def setFilePattern(self, path): baseName = os.path.basename(path).split('.')[0] self.inputFileNamePattern = path.replace(baseName, '%s')
[docs] def updateParticlePath(self, part, row): fn = part.getFileName() baseName = os.path.basename(fn).split('.')[0] newFileName = self.inputFileNamePattern % baseName part.setFileName(newFileName)
def _getInputParticles(self): if self.hasAttribute('inputParticles'): return self.inputParticles.get() return None def _getInputVolume(self): if self.hasAttribute('refVolume'): return self.refVolume.get() return None def _getInputMask(self): if self.hasAttribute('refMask'): return self.refMask.get() return None def _initializeVolumeSuffix(self): """ Create a output volume suffix depend of the CS version """ cryosparcVersion = parse_version(getCryosparcVersion()) self.outputVolumeSuffix = '.imported_volume.map' self.outputMaskSuffix = '.imported_mask.map' self.outputVolumeHalf_A = '.imported_volume.map_half_A' self.outputVolumeHalf_B = '.imported_volume.map_half_B' if cryosparcVersion >= parse_version(V3_3_1): self.outputVolumeSuffix = '.imported_volume_1.map' self.outputMaskSuffix = '.imported_mask_1.map' self.outputVolumeHalf_A = '.imported_volume_1.map_half_A' self.outputVolumeHalf_B = '.imported_volume_1.map_half_B' def _initializeMaskSuffix(self): """ Create a output mask suffix depend of the CS version """ cryosparcVersion = parse_version(getCryosparcVersion()) self.outputMaskSuffix = '.imported_mask.map' if cryosparcVersion >= parse_version(V3_3_1): self.outputMaskSuffix = '.imported_mask_1.map' def _importVolume(self): vol = self._getInputVolume() self._initializeVolumeSuffix() vol_fn = os.path.join(os.getcwd(), convertBinaryVol(vol, self._getTmpPath())) importVolumeJob = doImportVolumes(self, vol_fn, vol, 'map', 'Importing volume...') self.volume = pwobj.String(str(importVolumeJob.get()) + self.outputVolumeSuffix) if vol.hasHalfMaps(): halfMaps = vol.getHalfMaps().split(",") map_half_A_fn = os.path.abspath(halfMaps[0]) importVolumeHalfAJob = doImportVolumes(self, map_half_A_fn, vol, 'map_half_A', 'Importing half volume A...') self.importVolumeHalfA = pwobj.String(str(importVolumeHalfAJob.get()) + self.outputVolumeHalf_A) map_half_B_fn = os.path.abspath(halfMaps[1]) importVolumeHalfBJob = doImportVolumes(self, map_half_B_fn, vol, 'map_half_B', 'Importing half volume B...') self.importVolumeHalfB = pwobj.String(str(importVolumeHalfBJob.get()) + self.outputVolumeHalf_B) self.currenJob.set(importVolumeJob.get()) def _importMask(self): self._initializeMaskSuffix() maskFn = os.path.join(os.getcwd(), convertBinaryVol(self._getInputMask(), self._getTmpPath())) importMaskJob = doImportVolumes(self, maskFn, self._getInputMask(), 'mask', 'Importing mask... ') self.currenJob.set(importMaskJob.get()) self.mask = pwobj.String(str(importMaskJob.get()) + self.outputMaskSuffix) def _importParticles(self): # import_particles_star importedParticlesJob = doImportParticlesStar(self) self.currenJob = pwobj.String(str(importedParticlesJob.get())) self.particles = pwobj.String(str(importedParticlesJob.get()) + '.imported_particles')
[docs] def setAborted(self): """ Set the status to aborted and updated the endTime. """ pw.EMProtocol.setAborted(self) if hasattr(self, 'projectName') and hasattr(self, 'currenJob') and self.currenJob.get() is not None: status = getJobStatus(self.projectName.get(), self.currenJob.get()) if status not in STOP_STATUSES: killJob(str(self.projectName.get()), str(self.currenJob.get())) clearJob(str(self.projectName.get()), str(self.currenJob.get()))
[docs] def createFSC(self, idd, imgSet, vol): # Need to get the cryosparc master address system_info = getSystemInfo() status_errors = system_info[0] if not status_errors: system_info = eval(system_info[1]) master_hostname = system_info.get('master_hostname') port_webapp = system_info.get('port_webapp') url = "http://%s:%s/file/%s" % (master_hostname, port_webapp, idd) fscRequest = requests.get(url, allow_redirects=True) fscFile = "fsc.txt" fscFilePath = os.path.join(self._getExtraPath(), fscFile) factor = self._getInputParticles().getDim()[0] * imgSet.getSamplingRate() # Convert into scipion fsc format open(fscFilePath, 'wb').write(fscRequest.content) fscSet = self.getSetOfFCSsFromFile(fscFilePath, factor) self._defineOutputs(outputFSC=fscSet) self._defineSourceRelation(vol, fscSet)
[docs] def getSetOfFCSsFromFile(self, file, factor): f = open(file, 'r') lines = f.readlines() fscSet = self._createSetOfFSCs() columns = lines[0].strip().split('\t')[1:] col = 1 fsc_t = None fsc_nt = None for column in columns: if column == 'fsc_tightmask': fsc_t = \ self.getFSCFromRawData(lines, column, col, factor).getData()[1] if column == 'fsc_noisesub_true': fsc_nt = \ self.getFSCFromRawData(lines, column, col, factor).getData()[1] if column not in excludedFSCValues: fsc = self.getFSCFromRawData(lines, column, col, factor) fscSet.append(fsc) col += 1 f.close() corr = [] if fsc_t is not None and fsc_nt is not None: # Phase Randomized Masket Map can be calculated for i in range(len(fsc_t)): corr.append((fsc_t[i] - fsc_nt[i]) / (1.0 - fsc_nt[i])) fsc = FSC(objLabel=fscValues['fsc_prmm']) fsc_wv = fscSet.getFirstItem().getData()[0] fsc.setData(fsc_wv, corr) fscSet.append(fsc) fscSet.write() return fscSet
[docs] def getFSCFromRawData(self, lines, label, col, factor): wv = [] corr = [] for x in lines[1:]: wv_value = float(x.strip().split('\t')[0]) coor_value = x.strip().split('\t')[col] wv.append(str(wv_value / factor)) corr.append(coor_value) fsc = FSC(objLabel=fscValues[label]) fsc.setData(wv, corr) return fsc
[docs] def findLastIteration(self, jobName): get_job_streamlog(self.projectName.get(), jobName, self._getFileName('stream_log')) # Get the metadata information from stream.log with open(self._getFileName('stream_log')) as f: data = f.readlines() x = ast.literal_eval(data[0]) # Find the ID of last iteration and the map resolution for y in x: if 'text' in y: z = str(y['text']) if z.startswith('FSC Iteration') or z.startswith('FSC iIteration'): idd = y['imgfiles'][2]['fileid'] itera = z.split(',')[0][-3:] self._store(self) elif 'Using Filter Radius' in z: nomRes = str(y['text']).split('(')[1].split(')')[ 0].replace( 'A', 'Å') self.mapResolution = pwobj.String(nomRes) self._store(self) elif 'Estimated Bfactor' in z: estBFactor = str(y['text']).split(':')[1].replace('\n', '') self.estBFactor = pwobj.String(estBFactor) self._store(self) return idd, itera
def _createModelFile(self): pass