# **************************************************************************
# *
# * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
# *
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
"""
# JMRT (2018-12-11) This module is almost not used at all. Maybe it
can be removed in a future. Just kept here for the moment inside
pw.utils and not imported by default
"""
import os
from collections import OrderedDict, namedtuple
from pyworkflow.mapper import SqliteFlatDb, SqliteDb
[docs]class DataSet(object):
""" Holds several Tables
All tables should have an unique tableName.
"""
def __init__(self, tables, tableName=None, volumeName=None, numberSlices=0):
self._tables = list(tables)
self._tableName = tableName
# FIXME: You have to see if the volumeName is used anywhere
self._volumeName = volumeName
self._numberSlices = numberSlices
self.projectPath = None
[docs] def currentTable(self):
""" Returns the name of the last selected table. """
return self._tableName
[docs] def setVolumeName(self, volumeName):
self._volumeName = volumeName
[docs] def getVolumeName(self):
return self._volumeName
[docs] def setNumberSlices(self, numberSlices):
self._numberSlices = numberSlices
[docs] def getNumberSlices(self):
return self._numberSlices
[docs] def getNumberSlicesForTemplate(self):
return range(self._numberSlices)
[docs] def listTables(self):
""" List the actual table names on the DataSet. """
return self._tables
[docs] def getTable(self, tableName=None):
if tableName is None:
tableName = self.listTables()[0]
if tableName not in self._tables:
raise Exception("DataSet: table '%s' not found.\n Current tables: %s" %
(tableName, self._tables))
table = self._loadTable(tableName)
self._tableName = tableName
return table
[docs] def getTypeOfColumn(self, label):
""" this method should be implemented by subclasses. """
pass
def _loadTable(self, tableName):
""" this method should be implemented by subclasses. """
pass
[docs]class Table(object):
""" Table to hold rows of data.
A table contains a list of columns. """
def __init__(self, *columns):
self._columns = OrderedDict()
self._rowDict = OrderedDict()
self._addColumn(Column('id', int))
for col in columns:
self._addColumn(col)
colNames = [col.getName() for col in self.iterColumns()]
# This imply that columns can only be added in __init__
self.Row = namedtuple('Row', colNames)
[docs] def setLabelToRender(self, labelToRender):
self._labelToRender = labelToRender
def _addColumn(self, col):
self._columns[col.getName()] = col
[docs] def getColumnValues(self, columnName):
if self.hasColumn(columnName):
return [getattr(row, columnName) for row in self.iterRows()]
else:
return [None] * self.getSize()
[docs] def iterColumns(self):
return self._columns.values()
[docs] def hasColumn(self, columnName):
""" Return true if column exists """
return columnName in self._columns
[docs] def getColumn(self, columnName):
if columnName not in self._columns:
raise Exception('Table: column "%s" not found.\nCurrent columns: %s' % (
columnName, '\n'.join(self._columns.keys())))
return self._columns[columnName]
[docs] def hasEnabledColumn(self):
""" Return true if enabled column exists """
return self.hasColumn('enabled')
[docs] def getColumns(self):
""" Return all columns. """
return self._columns.values()
[docs] def getNumberOfColumns(self):
return len(self._columns)
[docs] def getSize(self):
""" Return the number of rows. """
return len(self._rowDict)
[docs] def getRows(self):
""" Return all rows. """
return [row for row in self.iterRows()]
[docs] def getRow(self, rowId):
return self._rowDict[rowId]
def _setRow(self, rowId, row):
self._rowDict[rowId] = row
[docs] def getDataToRenderAndExtra(self):
return zip(self.getIdColumn(),
self.getColumnValues("enabled"),
self.getDataToRender(),
self.getTransformationMatrix())
[docs] def getDataToRender(self):
return self.getColumnValues(self._labelToRender)
[docs] def getIdColumn(self):
return self.getColumnValues("id")
def _convertValues(self, values):
""" Convert the input values to the actual
expected type of each column.
"""
cValues = {}
for k, v in values.items():
col = self.getColumn(k)
cValues[k] = col.convert(v)
return cValues
[docs] def addRow(self, rowId, **values):
""" With this implementation the rowId should be provided.
We need to work around to also allow automatic generation of id's
"""
values['id'] = rowId
for col in self.iterColumns():
if col.getName() not in values:
if col.hasDefault():
values[col.getName()] = col.getDefault()
else:
raise Exception('Table: value for column "%s" not provided.' % col.getName())
row = self.Row(**self._convertValues(values))
self._setRow(rowId, row)
[docs] def updateRow(self, rowId, **values):
""" Update a row given its rowId and some values to update. """
row = self.getRow(rowId)
self._setRow(rowId, row._replace(**self._convertValues(values)))
[docs] def iterRows(self):
""" Iterate over the rows. """
return self._rowDict.values()
[docs] def getValueFromIndex(self, index, label):
""" Return the value of the property 'label'
in the element that has this 'index'.
"""
value = list(self._rowDict.values())[index]._asdict()[label]
return value
[docs] def getIndexFromValue(self, value, label):
""" Search the element that has property 'label'
equals to value and returns its index.
"""
for index, row in enumerate(self.iterRows()):
if value == row._asdict()[label]:
return index
return -1
def __str__(self):
return '\n'.join([str(row) for row in self.iterRows()])
# JMRT (2018-12-11) This constants are duplicated in showj, since this module
# is not widely used I don't find convenient such a dependency
COL_RENDER_NONE = 0
COL_RENDER_ID = 1
COL_RENDER_TEXT = 2
COL_RENDER_IMAGE = 3
COL_RENDER_CHECKBOX = 4
COL_RENDER_VOLUME = 5
[docs]class Column(object):
def __init__(self, colName, colType=None, default=None,
label=None, renderType=COL_RENDER_NONE):
self._name = colName
self._type = colType
self._default = default
self._label = label or colName
self._renderType = renderType
[docs] def getName(self):
return self._name
[docs] def getLabel(self):
return self._label
[docs] def getType(self):
return self._type
[docs] def convert(self, value):
""" Try to convert the value to the column type. """
return self._type(value)
[docs] def hasDefault(self):
return self._default is not None
[docs] def getDefault(self):
return self._default
[docs] def getRenderType(self):
return self._renderType
[docs] def setRenderType(self, renderType):
self._renderType = renderType
[docs]class SqliteDataSet(DataSet):
""" Provide a DataSet implementation based on sqlite file.
The tables of the dataset will be the object tables in database.
Each block is a table on the dataset.
"""
def __init__(self, filename):
self._dbName = filename
db = SqliteDb()
db._createConnection(filename, 1000)
# Tables should be at pairs:
# PREFIX_Classes
# PREFIX_Objects
# where PREFIX can be empty
self.tablePrefixes = OrderedDict()
tables = db.getTables()
for t in tables:
if t.endswith('Classes'):
prefix = t.replace('Classes', '')
to = prefix + 'Objects'
if to not in tables:
raise Exception('SqliteDataSet: table "%s" found, but not "%s"' % (t, to))
flatDb = SqliteFlatDb(filename, tablePrefix=prefix)
tableName = prefix + self._getPlural(flatDb.getSelfClassName())
self.tablePrefixes[tableName] = prefix
# tablePrefixes.append(prefix)
DataSet.__init__(self, self.tablePrefixes.keys())
db.close()
def _getPlural(self, className):
""" Get the plural of word for tables labels. """
if className.startswith('Class'):
return className.replace('Class', 'Classes')
return className + 's'
def _loadTable(self, tableName):
""" Load information from tables PREFIX_Classes, PREFIX_Objects. """
tableName = self.tablePrefixes[tableName]
BASIC_COLUMNS = [Column('id', int, renderType=COL_RENDER_ID),
Column('enabled', bool, renderType=COL_RENDER_CHECKBOX),
Column('label', str),
Column('comment', str),
Column('creation', str)]
# Load columns from PREFIX_Classes table
columns = list(BASIC_COLUMNS)
db = SqliteDb()
db._createConnection(self._dbName, 1000)
db.executeCommand("SELECT * FROM %sClasses;" % tableName)
# This will store the images columns to join
# the _index and the _filename
imgCols = {}
for row in db._iterResults():
renderType = COL_RENDER_NONE
colName = row['column_name']
colLabel = row['label_property']
if colLabel != 'self':
# Keep track of _index and _filename pairs to mark as renderable images
if colLabel.endswith('_index'):
imgCols[colLabel.replace('_index', '')] = colName
elif colLabel.endswith('_filename'):
# TODO: Maybe not all the labels endswith "_filename"
# have to be rendered.
# for example in the RotSpectra with '_representative._filename'
prefix = colLabel.replace('_filename', '')
if prefix in imgCols:
renderType = COL_RENDER_IMAGE
imgCols[colName] = imgCols[prefix]
# CTF FIX
elif (colLabel.endswith('_psdFile') or
colLabel.endswith('_enhanced_psd') or
colLabel.endswith('_ctfmodel_quadrant') or
colLabel.endswith('_ctfmodel_halfplane')):
renderType = COL_RENDER_IMAGE
if row['class_name'] == 'Boolean':
renderType = COL_RENDER_CHECKBOX
columns.append(Column(colName, str, label=colLabel, renderType=renderType))
table = Table(*columns)
checkedImgCols = {} # Check if the image columns are volumes
# FIXME: Move this to scipion-em? Maybe remove the whole module that is not used?
from pwem.emlib.image import ImageHandler
ih = ImageHandler()
# Populate the table in the DataSet
db.executeCommand("SELECT * FROM %sObjects;" % tableName)
for row in db._iterResults():
rowDict = dict(row)
for k, v in rowDict.items():
if v is None:
rowDict[k] = ''
# Set the index@filename for images columns values
if k in imgCols:
colName = imgCols[k]
index = rowDict[colName]
filename = os.path.join(self.projectPath, rowDict[k])
filepath = filename.replace(":mrc", "")
if not checkedImgCols.get(colName, False):
if os.path.exists(filepath):
# print "Fn to get dims: %s@%s" % (index,filename)
x, y, z, n = ih.getDimensions((index, filename))
if z > 1:
table.getColumn(k).setRenderType(COL_RENDER_VOLUME)
checkedImgCols[colName] = True
if index:
rowDict[k] = '%06d@%s' % (index, filename)
table.addRow(row['id'], **rowDict)
return table
[docs]class SingleFileDataSet(DataSet):
""" DataSet implementation for single files such as Images or Volumes.
"""
def __init__(self, filename):
self._filename = filename
self._tableName = ""
DataSet.__init__(self, [self._tableName])
self._table = self._createSingleTable()
def _createSingleTable(self):
table = Table(Column('filename', str,
renderType=COL_RENDER_VOLUME)) # FIXME: for single images we need to read the dimensions
table.addRow(1, filename=self._filename)
return table
def _loadTable(self, tableName):
return self._table