Creating referenceable objects from XML

Gerard Flanagan grflanagan at yahoo.co.uk
Mon Dec 5 08:34:04 EST 2005


Michael Williams wrote:
> Hi All,
>
> I'm looking for a quality Python XML implementation.  All of the DOM
> and SAX implementations I've come across so far are rather
> convoluted.  Are there any quality implementations that will (after
> parsing the XML) return an object that is accessible by name? Such as
> the following:
>
>
>
>
> xml = """
> <book>
> 	<title>MyBook</title>
> 	<author>the author</author>
> </book>
> """
>
>
>
>
>
> And after parsing the XML allow me to access it as so:
>
> book.title
>
> I need it to somehow convert my XML to intuitively referenceable
> object.  Any ideas?  I could even do it myself if I knew the
> mechanism by which python classes do this (create variables on the fly).
>
> Thanks in advance!

Michael

Here's an approach to ElementTree that worked for me.  It's not generic
or anything and a bit brittle (eg. it won't handle missing nodes) but
maybe for a simple, flat schema or for a prototype?

All the best

Gerard

(TOY CODE - NOT TESTED MUCH)

from elementtree import ElementTree

class ElementWrapper(object):

    def __tostring(self):
        return ElementTree.tostring(self.element)

    def __fromstring(self, xml):
        self.element = ElementTree.fromstring(xml)

    xml = property( __tostring, __fromstring )

    def __init__(self, element=None):
        self.element = element

    def __str__(self):
        return self.xml

    def parse(self, infile):
        tree = ElementTree.parse(infile)
        self.element = tree.getroot()

    def write(self, outfile):
        ElementTree.ElementTree(self.element).write(outfile)

###########


from elementtree.ElementTree import Element
from elementwrapper import ElementWrapper

xmlns = 'http://schemas/email/0.1'


class MailDocument(ElementWrapper):

    def __build_element(self):
        root = Element('{%s}Mail' % xmlns)
        root.append( Element('{%s}Date' % xmlns) )
        root.append( Element('{%s}From' % xmlns) )
        root.append( Element('{%s}Subject' % xmlns) )
        root.append( Element('{%s}To' % xmlns) )
        root.append( Element('{%s}Cc' % xmlns) )
        root.append( Element('{%s}Body' % xmlns) )
        root.append( Element('{%s}Attachments' % xmlns) )
        self.element =  root

    #####################################################
    #    Properties
    #
    def __get_uid(self):
        return self.element.get('id')

    def __set_uid(self, id=''):
        self.element.set('id', id)

    def __get_date(self):
        return self.element[0].text

    def __set_date(self, value=''):
        self.element[0].text = value

    def __get_from(self):
        addr = self.element[1].get('address')
        nm = self.element[1].get('name')
        return addr, nm

    def __get_subject(self):
        return self.element[2].text

    def __set_subject(self, value=''):
        self.element[2].text = value

    def __get_body(self):
        return self.element[5].text

    def __set_body(self, value=''):
        self.element[5].text = value

    uid = property( __get_uid, __set_uid )
    From = property( __get_from)
    subject = property( __get_subject, __set_subject )
    date = property( __get_date, __set_date )
    body = property( __get_body, __set_body )

    def set_from_header(self, address='', name=''):
        self.element[1].set('address', address)
        self.element[1].set('name', name)
    #
    #    End Properties
    #####################################################

    #####################################################
    #    Lists
    #
    def add_to_header(self, address='', name=''):
        self.__add_mailto( self.element[3], address, name )

    def remove_to_header(self, index):
        elem = self.element[3][index]
        self.element[3].remove(elem)

    def add_cc_header(self, address='', name=''):
        self.__add_mailto( self.element[4], address, name )

    def remove_cc_header(self, index):
        elem = self.element[4][index]
        self.element[4].remove(elem)

    def add_attachment(self, filename='', fileuri='', filetype=''):
        elem = Element("{%s}Uri" % xmlns, value=fileuri, type=filetype
)
        elem.text = filename
        self.element[6].append( elem )

    def remove_attachment(self, index):
        elem = self.element[6][index]
        self.element[6].remove(elem)

    def __add_mailto(self, element, Address='', Name=''):
        element.append( Element("{%s}mailto" % xmlns, address=Address,
name=Name ) )

    def get_to_headers(self):
        hdrs = []
        for item in self.element[3]:
            hdrs.append( ( item.get('address'), item.get('name') ) )
        return hdrs

    def get_cc_headers(self):
        hdrs = []
        for item in self.element[4]:
            hdrs.append( (item.get('address'), item.get('name') ) )
        return hdrs

    def get_attachments(self):
        ret = []
        for item in self.__element[6]:
            hdrs.append( (item.text, item.get('value'),
item.get('type') ) )
        return hdrs
    #
    #    End Lists
    ########################################################

    ########################################################
    #    Initialise
    #
    def __init__(self):
        self.__build_element()
        self.__set_uid()
        self.__set_date()
        self.__set_subject()
        self.set_from_header()
        self.__set_body()
    #
    #    End Initialise
    ########################################################

xml_test ='''
<mail:Mail xmlns:mail="http://schemas/email/0.1">
<mail:Date>10/10/05</mail:Date>
<mail:From address='me at org.org' name='Mr. Jones'/>
<mail:Subject>just a note</mail:Subject>
<mail:To>
    <mail:mailto address='you at org.org' name='Mrs Jones' />
    <mail:mailto address='a.nother at org.org' name='Alan Nother' />
</mail:To>
<mail:Cc></mail:Cc>
<mail:Body>hi there,
just a note to say hi there!</mail:Body>
<mail:Attachments></mail:Attachments>
</mail:Mail>
'''
if __name__ == '__main__':
    mail = MailDocument()
    mail.xml = xml_test
    #mail.parse('test/data/test.xml')
    print 'From: ' + mail.From[0]
    print 'Subject: ' + mail.subject
    mail.set_from_header('new at new.com')
    print 'From: ' + mail.From[0]
    mail.add_to_header('aaa.bbb at ccc', 'aaaaaa')
    mail.add_to_header('fff.ggg at hhh', 'ffffff')
    print 'To:'
    for hdr in mail.get_to_headers():
        print hdr
    mail.remove_to_header(1)
    print 'To:'
    for hdr in mail.get_to_headers():
        print hdr
    #mail.write('test_copy.xml')




More information about the Python-list mailing list