Source code for pyworkflow.object

# **************************************************************************
# *
# * Authors:     J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABIlITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************
"""
This modules holds the base classes for the ORM implementation.
The Object class is the root in the hierarchy and some other
basic classes.
"""
import logging
logger = logging.getLogger(__name__)

from collections import OrderedDict
import datetime as dt
from os.path import getmtime

from pyworkflow.utils import getListFromValues, getListFromRangeString
from pyworkflow.utils.reflection import getSubclasses


# Binary relations always involve two objects, we 
# call them parent-child objects, the following
# constants reflect which direction of the relation we refer
RELATION_CHILDS = 0
RELATION_PARENTS = 1

# There are other relations related to data provenance,
# which object were used to produce a result (source of it)
RELATION_SOURCE = 'relation_datasource'
RELATION_TRANSFORM = 'relation_transform'


# Then column name of the parent id relation
OBJECT_PARENT_ID = 'object_parent_id'


[docs]class Object(object): """ All objects in our Domain should inherit from this class that will contain all base properties""" def __init__(self, value=None, **kwargs): object.__init__(self) if len(kwargs) == 0: self._objIsPointer = False # True if will be treated as a reference for storage self._objId = None # Unique identifier of this object in some context self._objParentId = None # identifier of the parent object self._objName = '' # The name of the object will contains the whole path of ancestors self._objLabel = '' # This will serve to label the objects self._objComment = '' # Performance: self._objTag = None # This attribute serve to make some annotation on the object. self._objDoStore = True else: self._objIsPointer = kwargs.get('objIsPointer', False) # True if will be treated as a reference for storage self._objId = kwargs.get('objId', None) # Unique identifier of this object in some context self._objParentId = kwargs.get('objParentId', None) # identifier of the parent object self._objName = kwargs.get('objName', '') # The name of the object will contains the whole path of ancestors self._objLabel = kwargs.get('objLabel', '') # This will serve to label the objects self._objComment = kwargs.get('objComment', '') #Performance: self._objTag = kwargs.get('objTag', None) # This attribute serve to make some annotation on the object. self._objDoStore = kwargs.get('objDoStore', True) # True if this object will be stored from his parent self._objCreation = None self._objParent = None # Reference to parent object self._objEnabled = True self.set(value)
[docs] @classmethod def getClassName(cls): return cls.__name__
[docs] def getClass(self): return type(self)
[docs] @classmethod def getDoc(cls): return cls.__doc__ or ''
# FIXME: This function should be renamed to hasAttribute when we address that issue
[docs] def hasAttributeExt(self, attrName): attrList = attrName.split('.') obj = self for partName in attrList: obj = getattr(obj, partName, None) if obj is None: return False return True
# FIXME: This function is not symmetric with setAttributeValue
[docs] def hasAttribute(self, attrName): return hasattr(self, attrName)
# FIXME: This function is not symmetric with setAttributeValue
[docs] def getAttributeValue(self, attrName, defaultValue=None): """ Get the attribute value given its name. Equivalent to getattr(self, name).get() """ attr = getattr(self, attrName, None) if attr is None: value = defaultValue elif callable(attr): value = attr() elif isinstance(attr, Object): value = attr.get() else: value = attr # behave well for non-Object attributes return value
[docs] def setAttributeValue(self, attrName, value, ignoreMissing=True): """ Set the attribute value given its name. Equivalent to setattr(self, name).set(value) If the attrName contains dot: x.y it will be equivalent to getattr(getattr(self, 'x'), 'y').set(value) If ignoreMissing is True, non-existing attrName will not raise an exception. """ attrList = attrName.split('.') obj = self for partName in attrList: obj = getattr(obj, partName, None) if obj is None: if ignoreMissing: return raise Exception("Object.setAttributeValue: %s has no attribute %s." % (self.__class__.__name__, attrName)) obj.set(value)
[docs] def getAttributes(self): """Return the list of attributes that are subclasses of Object""" for name in vars(self): value = getattr(self, name) if isinstance(value, Object): yield name, value
[docs] def getAttributesToStore(self): """Return the list of attributes than are subclasses of Object and will be stored""" for key, attr in self.getAttributes(): try: if attr is not None and attr._objDoStore: yield key, attr except Exception as e: logger.info("Object.getAttributesToStore: attribute '%s' (%s) seems to " "be overwritten, since '_objDoStore' was not found. Ignoring attribute." % (key, type(attr))) logger.debug("Exception: %s" % e)
[docs] def isPointer(self): """If this is true, the value field is a pointer to another object""" return self._objIsPointer
def _convertValue(self, value): """Convert a value to desired scalar type""" return value
[docs] def set(self, value): """Set the internal value, if it is different from None call the convert function in subclasses""" if value is not None: value = self._convertValue(value) self._objValue = value
[docs] def get(self): """Return internal value""" return self._objValue
[docs] def trace(self, callback): """ Add an observer when the set method is called. """ if self.set == self.__setTrace: pass else: self.__set = self.set self.set = self.__setTrace self.__setCallback = callback
def __setTrace(self, value): self.__set(value) self.__setCallback()
[docs] def getObjValue(self): """Return the internal value for storage. This is a good place to do some update of the internal value before been stored""" return self._objValue
[docs] def getObjId(self): """Return object id""" return self._objId
[docs] def setObjId(self, newId): """Set the object id""" self._objId = newId
[docs] def copyObjId(self, other): """ Copy the object id form other to self. """ self.setObjId(other.getObjId())
[docs] def hasObjId(self): return self._objId is not None
[docs] def cleanObjId(self): """ This function will set to None this object id and the id of all its children attributes. This function should be used when retrieving an object from a mapper and storing in a different one. """ self.setObjId(None) for _, attr in self.getAttributesToStore(): attr.cleanObjId()
[docs] def getObjParentId(self): return self._objParentId
[docs] def hasObjParentId(self): return self._objParentId is not None
[docs] def getObjLabel(self): """ Return the label associated with this object""" return self._objLabel
[docs] def setObjLabel(self, label): """ Set the label to better identify this object""" self._objLabel = label
[docs] def getObjComment(self): """ Return the comment associated with this object""" return self._objComment
[docs] def setObjComment(self, comment): """ Set the comment to better identify this object""" self._objComment = comment
[docs] def setObjCreation(self, creation): """ Set the creation time of the object. """ self._objCreation = creation
[docs] def getObjCreation(self): """ Return the stored creation time of the object. """ return self._objCreation
[docs] def getObjCreationAsDate(self): """ Return the stored creation time of the object as date """ return String.getDatetime(self._objCreation)
[docs] def strId(self): """String representation of id""" return str(self._objId)
[docs] def getName(self): # TODO: REMOVE THIS FUNCTION, SINCE IT DOES NOT COMPLAIN WITH _objX naming return self._objName
[docs] def getObjName(self): return self._objName
[docs] def setEnabled(self, enabled): self._objEnabled = bool(enabled)
[docs] def isEnabled(self): """Return if object is enabled""" return self._objEnabled
[docs] def getNameId(self): """ Return an unique and readable id that identifies this object. """ label = self.getObjLabel() if len(label) > 0: return label elif self.hasObjId(): return '%s.%s' % (self.getName(), self.strId()) return ''
[docs] def getLastName(self): """ If the name contains parent path, remove it and only return the attribute name in its parent. """ if '.' in self._objName: return self._objName.split('.')[-1] return self._objName
[docs] def setName(self, name): self._objName = name
[docs] def hasValue(self): return True
[docs] def getStore(self): """Return True if the object will be stored by the mapper""" return self._objDoStore
[docs] def setStore(self, value): """set the store flag""" self._objDoStore = value
def __hash__(self): # Issue found in scipion-em-xmipp/xmipp3/protocols/protocol_extract_particles_pairs.py (_setupBasicProperties) # TypeError: unhashable type: 'Micrograph' # Solution: Hashability makes an object usable as a dictionary key and a set member, because these data # structures use the hash value internally. All of Python’s immutable built_in objects are hashable, while no # mutable containers (such as lists or dictionaries) are. Objects which are instances of user-defined classes # are hashable by default; they all compare unequal, and their hash value is their id(). # https://docs.python.org/3.1/glossary.html return id(self) def __eq__(self, other): """Comparison for scalars should be by value and for other objects by reference""" if self._objValue is None: return object.__eq__(self, other) return self._objValue == other._objValue
[docs] def equalAttributes(self, other, ignore=[], verbose=False): """Compare that all attributes are equal""" for k, v1 in self.getAttributes(): # v1 = getattr(self, k) # This is necessary because of FakedObject simulation of getattr # Skip comparison of attribute names in 'ignore' list if k in ignore: continue v2 = getattr(other, k) if issubclass(type(v1), Object): comp = v1.equalAttributes(v2, ignore=ignore, verbose=verbose) else: comp = v1 == v2 if not comp: if verbose: logger.info("Different attributes: self.%s = %s, other.%s = %s" % (k, v1, k, v2)) return False return True
[docs] def copyAttributes(self, other, *attrNames): """ Copy attributes in attrNames from other to self. If the name X is in attrNames, it would be equivalent to: self.X.set(other.X.get()) This method is more useful for Scalar attributes. There are two paths for Pointer and PointerList. """ for name in attrNames: attr = getattr(self, name, None) otherAttr = getattr(other, name) if attr is None: setattr(self, name, otherAttr.clone()) elif isinstance(attr, Pointer): attr.copy(otherAttr) elif isinstance(attr, PointerList): for pointer in otherAttr: attr.append(pointer) elif isinstance(attr, Scalar) and otherAttr.hasPointer(): attr.copy(otherAttr) else: attr.set(otherAttr.get())
def __getObjDict(self, prefix, objDict, includeClass, includePointers=True): if prefix: prefix += '.' for k, v in self.getAttributesToStore(): self.fillObjDict(prefix, objDict, includeClass, k, v, includePointers=includePointers)
[docs] @staticmethod def fillObjDict(prefix, objDict, includeClass, k, v, includePointers=False): if not v.isPointer(): kPrefix = prefix + k if includeClass: objDict[kPrefix] = (v.getClassName(), v.getObjValue()) else: objDict[kPrefix] = v.getObjValue() if not isinstance(v, Scalar): v.__getObjDict(kPrefix, objDict, includeClass) # Is a pointer ... elif includePointers and not v.pointsNone(): # Should we take into account the prefix?? objDict[k]=v.getUniqueId()
[docs] def getObjDict(self, includeClass=False, includeBasic=False, includePointers=False): """ Return all attributes and values in a dictionary. Nested attributes will be separated with a dot in the dict key. :param includeClass: if True, the values will be a tuple (ClassName, value) otherwise only the values of the attributes :param includeBasic: if True include the id, label and comment. :param includePointers: If true pointer are also added using Pointer.getUniqueId --> "2.outputTomograms" :return a dictionary includeBasic example:: object.id: objId object.label: objLabel object.comment: objComment """ d = OrderedDict() if includeClass: d['self'] = (self.getClassName(),) if includeBasic: d['object.id'] = self.getObjId() d['object.label'] = self.getObjLabel() d['object.comment'] = self.getObjComment() self.__getObjDict('', d, includeClass, includePointers=includePointers ) return d
[docs] def setAttributesFromDict(self, attrDict, setBasic=True, ignoreMissing=False): """ Set object attributes from the dict obtained from getObjDict. WARNING: this function is yet experimental and not fully tested. """ if setBasic: self.setObjId(attrDict.get('object.id', None)) self.setObjLabel(attrDict.get('object.label', '')) self.setObjComment(attrDict.get('object.comment', '')) for attrName, value in attrDict.items(): if not attrName.startswith('object.'): self.setAttributeValue(attrName, value, ignoreMissing)
def __getMappedDict(self, prefix, objDict): if prefix: prefix += '.' for k, v in self.getAttributesToStore(): if not v.isPointer(): kPrefix = prefix + k objDict[kPrefix] = v if not isinstance(v, Scalar): v.__getMappedDict(kPrefix, objDict)
[docs] def getMappedDict(self): d = OrderedDict() self.__getMappedDict('', d) return d
[docs] def getNestedValue(self, key): """ Retrieve the value of nested attributes like: _ctfModel.defocusU. """ attr = self for p in key.split('.'): attr = getattr(attr, p) return attr.get()
[docs] def getValuesFromDict(self, objDict): """ Retrieve the values of the attributes for each of the keys that comes in the objDict. """ return [self.getNestedValue(k) for k in objDict if k != 'self']
[docs] def getValuesFromMappedDict(self, mappedDict): return [v.getObjValue() for v in mappedDict.values()]
[docs] def copy(self, other, copyId=True, ignoreAttrs=[], copyEnable=False): """ Copy all attributes values from one object to the other. The attributes will be created if needed with the corresponding type. :param other: the other object from which to make the copy. :param copyId: if true, the _objId will be also copied. :param ignoreAttrs: pass a list with attributes names to ignore. :param copyEnable: Pass true if you want enabled flag to be copied """ copyDict = {'internalPointers': []} self._copy(other, copyDict, copyId, ignoreAttrs=ignoreAttrs, copyEnable=copyEnable) self._updatePointers(copyDict) return copyDict
def _updatePointers(self, copyDict): """ Update the internal pointers after a copy. If there are pointers to other object in the copy the references should be updated. """ for ptr in copyDict['internalPointers']: pointedId = ptr.getObjValue().getObjId() if pointedId in copyDict: ptr.set(copyDict[pointedId]) def _copy(self, other, copyDict, copyId, level=1, ignoreAttrs=[], copyEnable=False): """ Recursively clone all attributes from one object to the other. (Currently, we are not deleting attributes missing in the 'other' object.) Params: :param copyDict: this dict is used to store the ids map between 'other' and 'self' attributes. It is used for update pointers and relations later on. This will only work if the ids of 'other' attributes has been properly set. :param copyId: Pass true to copy the id :param level: (1), depth to the copy :param ignoreAttrs: List with attributes to ignore :param copyEnable: (False) pass true to copy the enable flag """ # Copy basic object data # self._objName = other._objName if copyId: self._objId = other._objId self._objValue = other._objValue self._objLabel = other._objLabel self._objComment = other._objComment if copyEnable: self._objEnabled = other._objEnabled # Copy attributes recursively for name, attr in other.getAttributes(): if name not in ignoreAttrs: myAttr = getattr(self, name, None) if myAttr is None: myAttr = attr.getClass()() setattr(self, name, myAttr) myAttr._copy(attr, copyDict, copyId, level+2) # Store the attr in the copyDict if attr.hasObjId(): # storing in copyDict with id=", attr.getObjId() copyDict[attr.getObjId()] = myAttr # Use the copyDict to fix the reference in the copying object # if the pointed one is inside the same object if myAttr.isPointer() and myAttr.hasValue(): copyDict['internalPointers'].append(myAttr)
[docs] def clone(self, copyEnable=False): """ Clones the object :param copyEnable (False): clone also the enable flag""" #NOTE: Maybe be, as default, we should be cloning the enable, but we found it is not doing so. # So for now it is not cloned, probably to void carying out the enable which may be a decision by one method that should not be # tranferred to the next method. Only Subsetting protocols should respect this. clone = self.getClass()() clone.copy(self, copyEnable=copyEnable) return clone
[docs] def evalCondition(self, condition): """ Check if condition is meet. Examples of condition:: "hasCTF" "hasCTF and not hasAlignment" :param condition: the condition string, it can contain variables or methods without arguments to be evaluated. :return The value of the condition evaluated with values """ # Split in possible tokens import re tokens = re.split('\W+', condition) condStr = condition for t in tokens: if self.hasAttribute(t): condStr = condStr.replace(t, str(self.getAttributeValue(t))) return eval(condStr)
[docs] def printAll(self, name=None, level=0): """Print object and all its attributes. Mainly for debugging""" tab = ' ' * (level*3) idStr = '' # ' (id = %s, pid = %s)' % (self.getObjId(), self._objParentId) if name is None: logger.info(f"{tab} {self.getClassName()} {idStr}") else: if name == 'submitTemplate': # Skip this because very large value value = '...' else: value = self.getObjValue() logger.info(f"{tab} {name} = {value} {idStr}") for k, v in self.getAttributes(): v.printAll(k, level + 1)
[docs] def printObjDict(self, includeClasses=False): """Print object dictionary. Mainly for debugging""" import pprint pp = pprint.PrettyPrinter(indent=4) pp.pprint(dict(self.getObjDict(includeClasses)))
[docs]class OrderedObject(Object): """Legacy class, to be removed. Object should give same functionality and is faster""" pass
[docs]class Scalar(Object): """Base class for basic types"""
[docs] def hasValue(self): return self._objValue is not None
[docs] def equalAttributes(self, other, ignore=[], verbose=False): """Compare that all attributes are equal""" return self._objValue == other._objValue
def __str__(self): """String representation of the scalar value""" return str(self.get()) def __cmp(self, other): """ Comparison implementation for scalars. """ a = self.get() b = other.get() if isinstance(other, Object) else other # cmp does not longer exist in Python3, so we will follow # the recommend replacement: (a > b) - (a < b) # https://docs.python.org/3.0/whatsnew/3.0.html#ordering-comparisons # Consider None values if a is None and b is None: return True elif a is None or b is None: return False else: return (a > b) - (a < b) def __eq__(self, other): """ Comparison for scalars should be by value while for other objects by reference. """ return self.__cmp(other) == 0 def __ne__(self, other): return self.__cmp(other) != 0 def __lt__(self, other): return self.__cmp(other) < 0 def __le__(self, other): return self.__cmp(other) <= 0 def __gt__(self, other): return self.__cmp(other) > 0 def __ge__(self, other): return self.__cmp(other) >= 0
[docs] def get(self, default=None): """Get the value, if internal value is None the default argument passed is returned. """ if self.hasPointer(): # Get pointed value pointedValue = self._pointer.get() return default if pointedValue is None else pointedValue.get(default) if self.hasValue(): return self._objValue return default
def _copy(self, other, *args, **kwargs): if other.hasPointer(): self.setPointer(other.getPointer()) else: self.set(other.get())
[docs] def swap(self, other): """ Swap the contained value between self and other objects. """ tmp = self._objValue self._objValue = other._objValue other._objValue = tmp
[docs] def sum(self, value): self._objValue += self._convertValue(value)
[docs] def multiply(self, value): self._objValue *= value
[docs] def setPointer(self, pointer): """ Set an internal pointer from this Scalar. Then, the value (retrieved with get) will be obtained from the pointed object. """ if pointer is None: if self.hasPointer(): delattr(self, "_pointer") else: self._pointer = pointer
[docs] def hasPointer(self): """ Return True if this Scalar has an internal pointer from where the value will be retrieved with the get() method. """ return hasattr(self, '_pointer')
[docs] def getPointer(self): """ Return the internal pointer of this Scalar or None. """ return getattr(self, '_pointer', None)
[docs]class Integer(Scalar): """Integer object""" def _convertValue(self, value): return int(value)
[docs] def increment(self): """ Add 1 to the current value. """ self._objValue += 1
def __float__(self): return float(self.get()) def __int__(self): return self.get() def __long__(self): return int(self.get())
[docs]class String(Scalar): """String object. """ DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" FS = ".%f" # Femto seconds
[docs] @classmethod def getDatetime(cls, strValue, formatStr=None, fs=True): """ Converts the given string value to a datetime object. :param strValue: string representation of the date :param formatStr: if None, uses the default cls:String.DATETIME_FORMAT. :param fs: Use femto seconds or not, only when format=None """ if formatStr is None: try: formatStr = cls.DATETIME_FORMAT if fs: formatStr += cls.FS datetime = dt.datetime.strptime(strValue, formatStr) except Exception as ex: # Maybe the %f (femtoseconds) is not working # let's try to format without it datetime = dt.datetime.strptime(strValue, cls.DATETIME_FORMAT) else: datetime = dt.datetime.strptime(strValue, formatStr) return datetime
def _convertValue(self, value): return str(value)
[docs] def empty(self): """ Return true if None or len == 0 """ if not self.hasValue(): return True return len(self.get().strip()) == 0
[docs] def datetime(self, formatStr=None, fs=True): """ Get the datetime from this object string value. """ return String.getDatetime(self._objValue, formatStr, fs)
[docs] def getListFromValues(self, length=None, caster=int): """ Returns a list from a string with values as described at getListFromValues. Useful for """ return getListFromValues(self._objValue, length, caster)
[docs] def getListFromRange(self): """ Returns a list from a string with values as described at getListFromRangeString. Useful for NumericRangeParam params""" return getListFromRangeString(self._objValue)
[docs]class Float(Scalar): """Float object""" EQUAL_PRECISION = 0.001
[docs] @classmethod def setPrecision(cls, newPrecision): """ Set the precision to compare float values. Mainly used for testing purposes. """ cls.EQUAL_PRECISION = newPrecision
def _convertValue(self, value): return float(value)
[docs] def equalAttributes(self, other, ignore=[], verbose=False): """Compare that all attributes are equal""" # If both float has some value distinct of None # then we should compare the absolute value of difference # against the EQUAL_PRECISION to say equal or not if self.hasValue() and other.hasValue(): return abs(self._objValue - other._objValue) < self.EQUAL_PRECISION # If one has None as value, then both should have None # to have equal attributes if not self.hasValue() and not other.hasValue(): return True return False
def __float__(self): return self.get()
[docs]class Boolean(Scalar): """Boolean object""" def _convertValue(self, value): t = type(value) if t is bool: return value if t is str: v = value.strip().lower() return v == 'true' or v == '1' return bool(value) def __nonzero__(self): if not self.hasValue(): return False return self.get() def __bool__(self): return self.get()
[docs]class Pointer(Object): """Reference object to other one""" EXTENDED_ATTR = '__attribute__' EXTENDED_ITEMID = '__itemid__' _ERRORS = {} def __init__(self, value=None, **kwargs): Object.__init__(self, value, objIsPointer=True, **kwargs) # The _extended attribute will be used to point to attributes of a # pointed object or the id of an item inside a set self._extended = String() if 'extended' in kwargs: self.setExtended(kwargs.get('extended')) def __str__(self): """String representation of a pointer""" if self.hasValue(): className = self.getObjValue().getClassName() strId = self.getObjValue().strId() return '-> %s (%s)' % (className, strId) return '-> None' def __clean(self, extended): """ Remove old attributes conventions. """ # TODO: This replacements are needed now by backward compatibility # reasons, when we used __attribute__ and __item__ to mark both cases # in a future the following two lines can be removed. ext = extended.replace(self.EXTENDED_ATTR, '') ext = ext.replace(self.EXTENDED_ITEMID, '') return ext
[docs] def hasValue(self): return self._objValue is not None
[docs] def get(self, default=None): """ Get the pointed object. By default, all pointers store a "pointed object" value. The _extended attribute allows to also point to internal attributes or items (in case of sets) of the pointed object. """ extended = self._extended.get() if extended: ext = self.__clean(extended) parts = ext.split('.') value = self._objValue for p in parts: if hasattr(value, "__getitem__") and p.isdigit(): value = value[int(p)] # item case else: value = getattr(value, p, None) if value is None: break else: value = self._objValue return value
[docs] def set(self, other, cleanExtended=True): """ Set the pointer value but cleaning the extended property. """ Object.set(self, other) # This check is needed because set is call from the Object constructor # when this attribute is not setup yet (a dirty patch, I know) if cleanExtended and hasattr(self, '_extended'): self._extended.set(None)
[docs] def hasExtended(self): return bool(self._extended.get()) # consider empty string as false
[docs] def getExtended(self): return self.__clean(self._extended.get(''))
[docs] def setExtended(self, attribute): """ Set the attribute name of the "pointed object" that will be the result of the get() action. """ self._extended.set(attribute)
[docs] def getExtendedParts(self): """ Return the extended components as a list. """ if self.hasExtended(): return self.getExtended().split('.') else: return []
[docs] def setExtendedParts(self, parts): """ Set the extended attribute but using a list as input. """ self.setExtended('.'.join(parts))
[docs] def addExtended(self, attribute): """ Similar to setExtended, but concatenating more extensions instead of replacing the previous value. """ if self.hasExtended(): self._extended.set('%s.%s' % (self._extended.get(), attribute)) else: self.setExtended(attribute)
[docs] def removeExtended(self): """ Remove the last part of the extended attribute. """ if self.hasExtended(): parts = self.getExtendedParts() self.setExtendedParts(parts[:-1])
[docs] def getAttributes(self): yield '_extended', getattr(self, '_extended')
[docs] def pointsNone(self): return self.get() is None
[docs] def getUniqueId(self): """ Return the unique id concatenating the id of the direct pointed object plus the extended attribute. """ uniqueId = self.getObjValue().strId() if self.hasExtended(): uniqueId += '.%s' % self.getExtended() return uniqueId
[docs]class List(Object, list): ITEM_PREFIX = '__item__' """Class to store a list of objects""" def __init__(self, value=None, **kwargs): list.__init__(self) Object.__init__(self, value, **kwargs) def __getattr__(self, name): if name.startswith(self.ITEM_PREFIX): i = self._stringToIndex(name) if i < len(self): return self[i] raise AttributeError("List object has not attribute: " + name) def __setattr__(self, name, value): if name.startswith('__item__') or len(name) == 0: self.append(value) else: object.__setattr__(self, name, value)
[docs] def getAttributes(self): # First yield all attributes not contained in the list for name, attr in Object.getAttributes(self): yield name, attr # Now yield elements contained in the list for i, item in enumerate(self): yield self._indexToString(i), item
def _indexToString(self, i): """Return the way the string index is generated. String indexes will start in 1, that's why i+1 """ return "%s%06d" % (self.ITEM_PREFIX, i+1) def _stringToIndex(self, strIndex): """ From the string index representation obtain the index. For symmetry the number in the index string will be decreased in 1. """ return int(strIndex.split(self.ITEM_PREFIX)[1]) - 1 def __len__(self): return list.__len__(self)
[docs] def getSize(self): # Just to have similar API than Set return len(self)
[docs] def isEmpty(self): return len(self) == 0
[docs] def clear(self): del self[:]
def _convertValue(self, value): """Value should be a list.""" if not (isinstance(value, list)): raise Exception("List.set: value should be a list.") self.clear() for item in value: self.append(item) return None
[docs]class PointerList(List): def __init__(self, value=None, **kwargs): List.__init__(self, value, **kwargs)
[docs] def append(self, value): """ Append Pointer of objects to the list. If value is a Pointer, just add it to the list. If is another subclass of Object, create a Pointer first and append the pointer. """ if isinstance(value, Pointer): pointer = value elif isinstance(value, Object): pointer = Pointer() pointer.set(value) else: raise Exception("Only subclasses of Object can be added to PointerList\n" " Passing value: %s, type: %s" % (value, type(value))) List.append(self, pointer)
[docs]class CsvList(Scalar, list): """This class will store a list of objects in a single DB row separated by comma. pType: the type of the list elements, int, bool, str""" def __init__(self, pType=str, **kwargs): Scalar.__init__(self, **kwargs) list.__init__(self) self._pType = pType def _convertValue(self, value): """ Value should be a str with comma separated values or a list. """ self.clear() if value: if isinstance(value, str): for s in value.split(','): self.append(self._pType(s)) elif isinstance(value, list) or isinstance(value, tuple): for s in value: self.append(self._pType(s)) else: raise Exception("CsvList.set: Invalid value type: ", type(value))
[docs] def equalAttributes(self, other, ignore=[], verbose=False): """Compare that all attributes are equal""" return self == other
[docs] def getObjValue(self): self._objValue = ','.join(map(str, self)) return self._objValue
[docs] def get(self): return self.getObjValue()
def __str__(self): return list.__str__(self)
[docs] def isEmpty(self): return len(self) == 0
[docs] def clear(self): del self[:]
def __eq__(self, other): """ Comparison for scalars should be by value and for other objects by reference. """ return all(a == b for a, b in zip(self, other))
[docs]class Set(Object): """ This class will be a container implementation for elements. It will use an extra sqlite file to store the elements. All items will have a unique id that identifies each element in the set. """ ITEM_TYPE = None # This property should be defined to know the item type # This will be used for stream Set where data is populated on the fly STREAM_OPEN = 1 STREAM_CLOSED = 2 indexes = ['_index'] def __init__(self, filename=None, prefix='', mapperClass=None, classesDict=None, **kwargs): # Use the object value to store the filename super().__init__(**kwargs) self._mapper = None self._idCount = 0 self._size = Integer(0) # cached value of the number of images # It is a bit contradictory that initially a set is Closed # but this is the default behaviour of the Set before Streaming extension self._streamState = Integer(self.STREAM_CLOSED) self.setMapperClass(mapperClass) self._mapperPath = CsvList() # sqlite filename self._representative = None self._classesDict = classesDict self._indexes = kwargs.get('indexes', []) # If filename is passed in the constructor, it means that # we want to create a new object, so we need to delete it if # the file exists if filename: self._mapperPath.set('%s, %s' % (filename, prefix)) self.load()
[docs] def copy(self, other, copyId=True, ignoreAttrs=['_mapperPath', '_size', '_streamState']): """ Copies the attributes of the set :param other: Set to copy attributes from :param copyId: True. Copies the objId. :param ignoreAttrs: Attributes list to ignore while copying. _mapperPath, _size and _streamState are ignored by default. """ # Note: By default this behaves properly for cloning non identical objects. # This is to clone a set from another new set that is based on "other". # For exact clone, we need to pass empty ignoreAttrs. This case happens in Protocol.__tryUpdateOutputSet super().copy(other, copyId, ignoreAttrs)
def _getMapper(self): """ This method will open the connection to the items database on demand. That's why this method should be used instead of accessing directly _mapper. """ if self._mapper is None: self.load() return self._mapper
[docs] def aggregate(self, operations, operationLabel, groupByLabels=None): """ This method operate on sets of values. They are used with a GROUP BY clause to group values into subsets :param operations: list of aggregate function such as COUNT, MAX, MIN,... :param operationLabel: label to use by the aggregate function :param groupByLabels: list of labels to group :return: the aggregated value of each group """ return self._getMapper().aggregate(operations, operationLabel, groupByLabels)
[docs] def setMapperClass(self, MapperClass): """ Set the mapper to be used for storage. """ if MapperClass is None: from pyworkflow.mapper.sqlite import SqliteFlatMapper MapperClass = SqliteFlatMapper Object.__setattr__(self, '_MapperClass', MapperClass)
[docs] def getItem(self, field, value): """ Alternative to [] to get a single item form the set. :param field: attribute of the item to look up for. Should be a unique identifier :param value: value to look for""" return self.__getitem__({field:value})
def __getitem__(self, itemId): """ Get the image with the given id. NOTE: Performance warning: this method is expensive in terms of performance use of iterItems is preferred. Use this one only for a single item retrieval. :param itemId: the objId (integer) of the item. Alternatively itemId could be a dictionary like {_alternative_identifier: 'RG-1234'} where '_alternative_identifier' is an attribute of the item and 'RG-1234' the value to look for only the first matching item is returned in case there are more """ closedMapper = self._mapper is None if isinstance(itemId, dict): for obj in self._getMapper().selectBy(**itemId): item = obj break else: item = self._getMapper().selectById(itemId) if closedMapper: self.close() return item def __contains__(self, itemId): """ element in Set """ return self._getMapper().exists(itemId)
[docs] def iterItems(self, orderBy='id', direction='ASC', where=None, limit=None, iterate=True): return self._getMapper().selectAll(orderBy=orderBy, direction=direction, where=where, limit=limit, iterate=iterate) # has flat mapper, iterate is true
[docs] def getFirstItem(self): """ Return the first item in the Set. """ # This function is used in many contexts where the mapper can be # left open and could be problematic locking the db for other processes # So, we look if the mapper was closed before, in which case # we will close it after that closedMapper = self._mapper is None firstItem = self._getMapper().selectFirst() if closedMapper: self.close() return firstItem
def __iter__(self): """ Iterate over the set of images. """ return self.iterItems() def __len__(self): return self._size.get()
[docs] def getSize(self): """Return the number of images""" return self._size.get()
[docs] def isEmpty(self): return self.getSize() == 0
[docs] def getFileName(self): if len(self._mapperPath): return self._mapperPath[0] return None
[docs] def getPrefix(self): if len(self._mapperPath) > 1: return self._mapperPath[1] return None
[docs] def write(self, properties=True): """ Commit the changes made to the Set underlying database. :param properties: this flag controls when to write Set attributes to special table 'Properties' in the database. False value is use for example in SetOfClasses for not writing each Class2D properties. """ if properties: self._getMapper().setProperty('self', self.getClassName()) objDict = self.getObjDict() for key, value in objDict.items(): self._getMapper().setProperty(key, value) self._getMapper().commit()
def _loadClassesDict(self): return self._classesDict or globals()
[docs] def setClassesDict(self, classesDict): """ Set the dictionary with classes where to look for classes names. """ self._classesDict = classesDict
[docs] def load(self): """ Load extra data from files. """ if self._mapperPath.isEmpty(): raise Exception("Set.load: mapper path and prefix not set.") fn, prefix = self._mapperPath self._mapper = self._MapperClass(fn, self._loadClassesDict(), prefix, self._indexes) self._size.set(self._mapper.count()) self._idCount = self._mapper.maxId()
def __del__(self): # Close connections to db when destroy this object if self._mapper is not None: self.close()
[docs] def close(self): if self._mapper is not None: self._mapper.close() self._mapper = None
[docs] def clear(self): self._mapper.clear() self._idCount = 0 self._size.set(0)
[docs] def append(self, item): """ Add an item to the set. If the item has already an id, use it. If not, keep a counter with the max id and assign the next one. """ # The _idCount and _size properties work fine # under the assumption that once a Set is stored, # then it is read-only (no more appends). # # Anyway, this can be easily changed by updating # both from the underlying sqlite when reading a set. if not item.hasObjId(): self._idCount += 1 item.setObjId(self._idCount) else: self._idCount = max(self._idCount, item.getObjId()) self._insertItem(item) self._size.increment()
def _insertItem(self, item): self._getMapper().insert(item)
[docs] def update(self, item): """ Update an existing item. """ self._getMapper().update(item)
def __str__(self): return "%-20s (%d items%s)" % (self.getClassName(), self.getSize(), self._appendStreamState()) def _appendStreamState(self): return "" if self.isStreamClosed() else ", open set"
[docs] def getSubset(self, n): """ Return a subset of n element, making a clone of each. """ subset = [] for i, item in enumerate(self): subset.append(item.clone()) if i == n: break return subset
[docs] def setRepresentative(self, representative): self._representative = representative
[docs] def getRepresentative(self): return self._representative
[docs] def hasRepresentative(self): """ Return true if have a representative image. """ return self._representative is not None
[docs] def equalItemAttributes(self, other, ignore=[], verbose=False): """Compare that all items in self and other return True for equalAttributes. """ return all(x.getObjId() == y.getObjId() and x.equalAttributes(y, ignore=ignore, verbose=verbose) for x, y in zip(self, other))
[docs] def hasProperty(self, key): return self._getMapper().hasProperty(key)
[docs] def getProperty(self, key, defaultValue=None): return self._getMapper().getProperty(key, defaultValue)
[docs] def loadProperty(self, propertyName, defaultValue=None): """ Get the value of a property and set its value as an object attribute. """ try: self.setAttributeValue(propertyName, self.getProperty(propertyName, defaultValue)) except Exception as e: # There could be an exception with "extra" set properties. We tolerate it but warn the developers # This happens in streaming scenarios where properties are only coming from the set sqlite. logger.warning("DEVELOPERS: %s property could not be loaded from the disk. " "Is the property an extended property?. It is lost from now on." % propertyName)
[docs] def loadAllProperties(self): """ Retrieve all properties stored by the mapper. """ for key in self._getMapper().getPropertyKeys(): if key != 'self': self.loadProperty(key)
[docs] def getIdSet(self): """ Return a Python set object containing all ids. """ return set(self.getUniqueValues('id'))
[docs] def getFiles(self): files = set() if self.getFileName(): files.add(self.getFileName()) return files
[docs] def getStreamState(self): return self._streamState.get()
[docs] def setStreamState(self, newState): self._streamState.set(newState)
[docs] def isStreamOpen(self): return self.getStreamState() == self.STREAM_OPEN
[docs] def isStreamClosed(self): return self.getStreamState() == self.STREAM_CLOSED
[docs] def enableAppend(self): """ By default, when a Set is loaded, it is opened in read-only mode, so no new insertions are allowed. This function will allow to append more items to an existing set. """ self._getMapper().enableAppend()
# ******* Streaming helpers to deal with sets **********
[docs] def hasChangedSince(self, time): """ Returns if the set has changed since the timestamp passed as parameter. It will check the last modified time of the file this set uses to persists. :parameter time: timestamp to compare to the last modification time """ if time is None: return True # Get the file name localFile = self.getFileName() #self.lastCheck = getattr(self, 'lastCheck', now) # Get the last time it was modified modTime = dt.datetime.fromtimestamp(getmtime(localFile)) return time < modTime
[docs] def getUniqueValues(self, attributes, where=None): """ Returns a list (for a single label) or a dictionary with unique values for the passed labels. If more than one label is passed it will be unique rows similar ti SQL unique clause. :param attributes (string or list) item attribute/s to retrieve unique row values :param where (string) condition to filter the results""" return self._getMapper().unique(attributes, where)
[docs] def fmtDate(self, date): """ Formats a python date to a valid string for the mapper""" return self._getMapper().fmtDate(date)
[docs] @staticmethod def isItemEnabled(item): """ Returns if the item is enabled...to be used as a callback. In some other cases (new user subsets) this method will be replaced""" return item.isEnabled()
[docs]def ObjectWrap(value): """This function will act as a simple Factory to create objects from Python basic types""" t = type(value) if issubclass(t, Object): return value if t is int: return Integer(value) if t is bool: return Boolean(value) if t is float: return Float(value) if t is list: o = CsvList() o.set(value) return o if t is None: return None # If it is str, unicode or unknown type, convert to string return String(value)
[docs]class Dict(dict): """ Simple wrapper around dict class to have a default value. """ def __init__(self, default=None): self._default = default dict.__init__(self) def __getitem__(self, key): """ Get the image with the given id. """ return self.get(key, self._default) def __contains__(self, item): return True
# Define a global dict with all basic defined objects OBJECTS_DICT = getSubclasses(Object, globals())