Source code for pyworkflow.mapper.xmlmapper

# **************************************************************************
# *
# * Authors:     J.M. De la Rosa Trevin (
# *              Roberto Marabini       (
# *
# * Unidad de  Bioinformatica of Centro Nacional de Biotecnologia , CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address ''
# *
# **************************************************************************

from os.path import exists

    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

from .mapper import Mapper

[docs]class XmlMapper(Mapper): """Mapper for XML""" def __init__(self, filename, dictClasses=None, rootName='ALL', **args): self.filename = filename if exists(filename): self._read() else: self._create(rootName, **args) Mapper.__init__(self, dictClasses) # Objects map (id should be defined) self.objDict = {} # Store some objects during parsing # that have some Pointer attributes and need to be fixed later self.pendingPtrDict = {} # This dictionary serve to define how to write classes # for example if the pair 'Integer': 'attribute' is present # all Integer will be store as attributes in the xml # possible values are: # 'attribute', 'class_only', 'class_name', 'name_class', 'name_only' self.classTags = {} # Counter to provide default objects id's self.objCount = 0
[docs] def setClassTag(self, classname, tag): self.classTags[classname] = tag
[docs] def indent(self, elem, level=0): i = "\n" + level*" " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " if not elem.tail or not elem.tail.strip(): elem.tail = i for _elem in elem: self.indent(_elem, level+1) if not _elem.tail or not _elem.tail.strip(): _elem.tail = i else: # TODO: generalize the matching of consecutive items if level and (not elem.tail or not elem.tail.strip()): for ii in ['t11', 't12', 't13', 't21', 't22', 't23', 't31', 't32', 't33']: if elem.tag == ii: elem.tail = " " return elem.tail = i
def _create(self, rootName, **args): self.root = ET.Element(rootName) if 'header' in args: self.root.append(ET.Comment(args.get('header'))) if 'version' in args: self.root.set("version", str(args.get('version'))) def _read(self): if self.filename is None: raise Exception(" filename is None.") if not exists(self.filename): raise Exception(" filename '%s' does not exists.") self.tree = ET.parse(self.filename) self.root = self.tree.getroot()
[docs] def strId(self, objId): """Create an unique id from the objId, specially for dictionaries, where all keys and values will be converted to string""" if type(objId) is dict: ordKeys = objId.keys() ordKeys.sort() sid = '' for k in ordKeys: sid += '%s:%s ' % (k, objId[k]) return sid return str(objId)
[docs] def selectById(self, objId): """Retrieve an object give the id""" return self.objDict.get(self.strId(objId), None)
def _addObjectToDict(self, obj, objDict): """Add object if have id""" if obj.hasObjId(): key = self.strId(obj.getObjId()) else: self.objCount += 1 key = '%s_%06d' % (obj.getClassName(), self.objCount) objDict[key] = obj
[docs] def selectAll(self, iterate=False): """Select object from storage""" for child in self.root: obj = self._buildObjectFromClass(child.tag) self.fillObject(obj, child) self._addObjectToDict(obj, self.objDict) # set properly primary keys for obj in self.pendingPtrDict.values(): for key, attr in obj.getAttributesToStore(): if attr.isPointer(): ptr = self.selectById(attr._objId) attr._objId = None attr.set(ptr) return self.objDict.values()
[docs] def setChildObject(self, obj, childName, childClass=None): childObj = getattr(obj, childName, None) if childObj is None: if childClass is None: raise Exception('Attribute "%s" was not found in parent object and classname not stored' % childName) childObj = self._buildObjectFromClass(childClass) setattr(obj, childName, childObj) return childObj
[docs] def fillObject(self, obj, objElem): # Set attributes first if not obj.isPointer(): for k, v in objElem.attrib.items(): if k not in ['id', 'classname', 'attrname']: childObj = self.setChildObject(obj, k) childObj.set(v) # Set tree childs attributes for child in objElem: if 'classname' in child.attrib: childName = child.tag childClass = child.attrib['classname'] elif 'attrname' in child.attrib: childName = child.attrib['attrname'] childClass = child.tag else: tagKey = '%s.%s' % (obj.getClassName(), child.tag) tag = self.classTags.get(tagKey, None) if tag == 'name_only' or child.tag not in self.dictClasses: childName = child.tag childClass = None else: childName = '' childClass = child.tag childObj = self.setChildObject(obj, childName, childClass) if child.text and len(child.text.strip()): childObj.set(child.text.strip()) else: # does have attributes? attributes = child.attrib if attributes and childObj.isPointer(): childObj._objId = attributes # save obj in auxiliary file self._addObjectToDict(obj, self.pendingPtrDict) self.fillObject(childObj, child)
[docs] def commit(self): if self.filename is None: raise Exception("XmlMapper.commit: filename is None") self.indent(self.root) tree = ET.ElementTree(self.root) tree.write(self.filename, xml_declaration=True, encoding='utf-8')
[docs] def setObjectId(self, objElem, objId): # Set attributes of this object element if objId: if type(objId) is dict: for k, v in objId.items(): if v: objElem.set(k, str(v)) else: objElem.set('id', str(objId))
[docs] def insert(self, obj): """Insert a new object into the system""" objElem = self.addSubElement(self.root, obj.getClassName(), obj._objValue) self.setObjectId(objElem, obj.getObjId()) # Insert object childs self.insertObjectWithChilds(obj, objElem)
[docs] def addSubElement(self, parentElem, name, value=None): childElem = ET.SubElement(parentElem, name) if value is not None: childElem.text = str(value) return childElem
[docs] def insertObjectWithChilds(self, obj, parentElem): for key, attr in obj.getAttributesToStore(): if attr.hasValue(): attrClass = attr.getClassName() objClass = obj.getClassName() allKey = '%s.ALL' % objClass # First try with .ALL tag = self.classTags.get(allKey, '') allKey = 'ALL.%s' % attrClass tag = self.classTags.get(allKey, tag) allKey = 'ALL.%s' % key # also with ALL.attribute tag = self.classTags.get(allKey, tag) classKey = '%s.%s' % (objClass, attrClass) # Second, with .childClass tag = self.classTags.get(classKey, tag) attrKey = '%s.%s' % (objClass, key) # Finally with .attribute tag = self.classTags.get(attrKey, tag) if tag == 'attribute': parentElem.set(key, str(attr)) else: if attr.isPointer(): childElem = self.addSubElement(parentElem, key) self.setObjectId(childElem, attr.get().getObjId()) else: # Select if we want to use the classname or elem name # as element tagname if tag.startswith('class'): name = attrClass else: name = key childElem = self.addSubElement(parentElem, name, attr._objValue) if tag.endswith('class'): childElem.set('classname', attrClass) elif tag.endswith('name'): childElem.set('attrname', key) self.insertObjectWithChilds(attr, childElem)
[docs] def updateFrom(self, obj): """Update object data with storage info""" pass
[docs] def updateTo(self, obj): """Update storage with object info""" pass