[Python-checkins] cpython: Issue #14007: implemented the 'element_factory' feature of TreeBuilder in

eli.bendersky python-checkins at python.org
Wed May 30 16:59:10 CEST 2012


http://hg.python.org/cpython/rev/20b8f0ee3d64
changeset:   77246:20b8f0ee3d64
user:        Eli Bendersky <eliben at gmail.com>
date:        Wed May 30 17:57:50 2012 +0300
summary:
  Issue #14007: implemented the 'element_factory' feature of TreeBuilder in
_elementtree, with a test.

files:
  Doc/library/xml.etree.elementtree.rst |    6 +-
  Lib/test/test_xml_etree.py            |   18 +-
  Modules/_elementtree.c                |  105 +++++++++----
  3 files changed, 91 insertions(+), 38 deletions(-)


diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -716,9 +716,9 @@
    Generic element structure builder.  This builder converts a sequence of
    start, data, and end method calls to a well-formed element structure.  You
    can use this class to build an element structure using a custom XML parser,
-   or a parser for some other XML-like format.  The *element_factory* is called
-   to create new :class:`Element` instances when given.
-
+   or a parser for some other XML-like format.  *element_factory*, when given,
+   must be a callable accepting two positional arguments: a tag and
+   a dict of attributes.  It is expected to return a new element instance.
 
    .. method:: close()
 
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1959,6 +1959,8 @@
         ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
         '<html>text</html>')
 
+    sample2 = '''<toplevel>sometext</toplevel>'''
+
     def test_dummy_builder(self):
         class BaseDummyBuilder:
             def close(self):
@@ -1993,11 +1995,19 @@
         e = parser.close()
         self.assertEqual(e.tag, 'html')
 
-    # XXX in _elementtree, the constructor of TreeBuilder expects no
-    # arguments
-    @unittest.expectedFailure
     def test_element_factory(self):
-        tb = ET.TreeBuilder(element_factory=lambda: ET.Element())
+        lst = []
+        def myfactory(tag, attrib):
+            nonlocal lst
+            lst.append(tag)
+            return ET.Element(tag, attrib)
+
+        tb = ET.TreeBuilder(element_factory=myfactory)
+        parser = ET.XMLParser(target=tb)
+        parser.feed(self.sample2)
+        parser.close()
+
+        self.assertEqual(lst, ['toplevel'])
 
     @unittest.expectedFailure   # XXX issue 14007 with C ElementTree
     def test_doctype(self):
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -191,6 +191,15 @@
     return result;
 }
 
+/* Is the given object an empty dictionary?
+*/
+static int
+is_empty_dict(PyObject *obj)
+{
+    return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
+}
+
+
 /* -------------------------------------------------------------------- */
 /* the Element type */
 
@@ -297,14 +306,9 @@
     self = PyObject_GC_New(ElementObject, &Element_Type);
     if (self == NULL)
         return NULL;
-
-    /* use None for empty dictionaries */
-    if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
-        attrib = Py_None;
-
     self->extra = NULL;
 
-    if (attrib != Py_None) {
+    if (attrib != Py_None && !is_empty_dict(attrib)) {
         if (create_extra(self, attrib) < 0) {
             PyObject_Del(self);
             return NULL;
@@ -416,22 +420,14 @@
 
     self_elem = (ElementObject *)self;
 
-    /* Use None for empty dictionaries */
-    if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
-        Py_INCREF(Py_None);
-        attrib = Py_None;
-    }
-
-    if (attrib != Py_None) {
+    if (attrib != Py_None && !is_empty_dict(attrib)) {
         if (create_extra(self_elem, attrib) < 0) {
             PyObject_Del(self_elem);
             return -1;
         }
     }
 
-    /* If create_extra needed attrib, it took a reference to it, so we can
-     * release ours anyway.
-    */
+    /* We own a reference to attrib here and it's no longer needed. */
     Py_DECREF(attrib);
 
     /* Replace the objects already pointed to by tag, text and tail. */
@@ -1813,6 +1809,8 @@
     PyObject *stack; /* element stack */
     Py_ssize_t index; /* current stack size (0 means empty) */
 
+    PyObject *element_factory;
+
     /* element tracing */
     PyObject *events; /* list of events, or NULL if not collecting */
     PyObject *start_event_obj; /* event objects (NULL to ignore) */
@@ -1841,6 +1839,7 @@
         t->last = (ElementObject *)Py_None;
 
         t->data = NULL;
+        t->element_factory = NULL;
         t->stack = PyList_New(20);
         if (!t->stack) {
             Py_DECREF(t->this);
@@ -1859,11 +1858,38 @@
 static int
 treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"element_factory", NULL};
+    PyObject *element_factory = NULL;
+    TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
+                                     &element_factory)) {
+        return -1;
+    }
+
+    if (element_factory) {
+        Py_INCREF(element_factory);
+        Py_XDECREF(self_tb->element_factory);
+        self_tb->element_factory = element_factory;
+    }
+
     return 0;
 }
 
-static void
-treebuilder_dealloc(TreeBuilderObject *self)
+static int
+treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
+{
+    Py_VISIT(self->root);
+    Py_VISIT(self->this);
+    Py_VISIT(self->last);
+    Py_VISIT(self->data);
+    Py_VISIT(self->stack);
+    Py_VISIT(self->element_factory);
+    return 0;
+}
+
+static int
+treebuilder_gc_clear(TreeBuilderObject *self)
 {
     Py_XDECREF(self->end_ns_event_obj);
     Py_XDECREF(self->start_ns_event_obj);
@@ -1874,8 +1900,16 @@
     Py_XDECREF(self->data);
     Py_DECREF(self->last);
     Py_DECREF(self->this);
+    Py_CLEAR(self->element_factory);
     Py_XDECREF(self->root);
-
+    return 0;
+}
+
+static void
+treebuilder_dealloc(TreeBuilderObject *self)
+{
+    PyObject_GC_UnTrack(self);
+    treebuilder_gc_clear(self);
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
 
@@ -1904,9 +1938,14 @@
         self->data = NULL;
     }
 
-    node = create_new_element(tag, attrib);
-    if (!node)
+    if (self->element_factory) {
+        node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
+    } else {
+        node = create_new_element(tag, attrib);
+    }
+    if (!node) {
         return NULL;
+    }
 
     this = (PyObject*) self->this;
 
@@ -2180,10 +2219,11 @@
     0,                                              /* tp_getattro */
     0,                                              /* tp_setattro */
     0,                                              /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,       /* tp_flags */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+                                                    /* tp_flags */
     0,                                              /* tp_doc */
-    0,                                              /* tp_traverse */
-    0,                                              /* tp_clear */
+    (traverseproc)treebuilder_gc_traverse,          /* tp_traverse */
+    (inquiry)treebuilder_gc_clear,                  /* tp_clear */
     0,                                              /* tp_richcompare */
     0,                                              /* tp_weaklistoffset */
     0,                                              /* tp_iter */
@@ -2443,17 +2483,20 @@
         attrib = Py_None;
     }
 
-    if (TreeBuilder_CheckExact(self->target))
+    /* If we get None, pass an empty dictionary on */
+    if (attrib == Py_None) {
+        Py_DECREF(attrib);
+        attrib = PyDict_New();
+        if (!attrib)
+            return;
+    }
+
+    if (TreeBuilder_CheckExact(self->target)) {
         /* shortcut */
         res = treebuilder_handle_start((TreeBuilderObject*) self->target,
                                        tag, attrib);
+    }
     else if (self->handle_start) {
-        if (attrib == Py_None) {
-            Py_DECREF(attrib);
-            attrib = PyDict_New();
-            if (!attrib)
-                return;
-        }
         res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
     } else
         res = NULL;

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list