Subclassing cElementTree.XMLTreeBuilder

mukappa mkeller.dci at gmail.com
Thu Jan 25 07:37:34 EST 2007


On Jan 25, 4:32 am, "Fredrik Lundh" <fred... at pythonware.com> wrote:
> "mukappa" wrote:
> > Is it possible to subclass cElementTree.XMLTreeBuilder?no, it's a factory function.  if you need to extend it, you'll have to wrap it.  but
> I'm not sure I see the use case; what is it you're trying to do here?
>
> </F>

I'm trying to parse xmpp stanzas coming in over a socket.  Since
cElementTree is
more efficient, I would like to use it when available.

Here is what I have now:

import logging
from elementtree import ElementTree

class StanzaTreeBuilder(ElementTree.XMLTreeBuilder):
    """Capture stanza elements (<stream> children) from parser events.

    Private methods are lifted from the old
        elementtree.XMLTreeBuilder.FancyTreeBuilder
    """

    def __init__(self, html=0):
        """Initialize state variables."""
        ElementTree.XMLTreeBuilder.__init__(self, html)
        self._parser.StartNamespaceDeclHandler = self._start_ns
        self._parser.EndNamespaceDeclHandler = self._end_ns
        self.namespaces = []
        self.level = 0
        self.stanza = None
        self.streamelem = None

    def _start(self, tag, attrib_in):
        elem = ElementTree.XMLTreeBuilder._start(self, tag, attrib_in)
        self.start(elem)

    def _start_list(self, tag, attrib_in):
        elem = ElementTree.XMLTreeBuilder._start_list(self, tag,
attrib_in)
        self.start(elem)

    def _end(self, tag):
        elem = ElementTree.XMLTreeBuilder._end(self, tag)
        self.end(elem)

    def _start_ns(self, prefix, value):
        self.namespaces.insert(0, (prefix, value))

    def _end_ns(self, prefix):
        assert self.namespaces.pop(0)[0] == prefix, "implementation
confused"

    def start(self, element):
        """Track nesting level (capture open <stream> on first
call)."""
        logging.debug("start(%s): %s" %
                (self.level, ElementTree.tostring(element)))
        if self.streamelem is None:
            assert element.tag == "http://etherx.jabber.org/streams"
            self.streamelem = element
            self.stanza = element
        self.level += 1

    def end(self, element):
        """Capture stanza when nesting level is 1."""
        self.level -= 1
        if self.level == 1:
            self.stanza = element
            self.streamelem.clear()
        logging.debug("end(%s): %s" %
                (self.level, ElementTree.tostring(element)))

def stanzagen(socket):
    """Yield open <stream ...> on first call, then complete stanzas.

    Caller needs 'id' from the opening <stream>, then he needs stanzas,
    one per call.  Works by parsing the socket one byte at a time, and
    yielding complete stanza at earliest opportunity (or None if none
    waiting), exits on EOF.  Stanzas are returned as ElementTree
Elements
    to give them a natural pythonic feel.

    see
http://online.effbot.org/2004_12_01_archive.htm#element-generator"""
    p = StanzaTreeBuilder()
    data = None
    while 1:
        try:
            data = socket.recv(1)
            if not data:
                logging.info("EOF from peer")
                p.close()
                break
            p.feed(data)
            if p.stanza is not None:
                yield p.stanza
                p.stanza = None
                data = None
        except:
            if data is not None:
                logging.error("error reading: %s" % (data))
                raise
            else:
                yield None




More information about the Python-list mailing list