[Python-checkins] cpython: _elementtree.XMLParser._setevents should support any sequence, not just tuples

eli.bendersky python-checkins at python.org
Sun May 19 18:01:57 CEST 2013


http://hg.python.org/cpython/rev/b6c333579c2b
changeset:   83843:b6c333579c2b
user:        Eli Bendersky <eliben at gmail.com>
date:        Sun May 19 09:01:49 2013 -0700
summary:
  _elementtree.XMLParser._setevents should support any sequence, not just tuples

Also clean up some code around this

files:
  Lib/test/test_xml_etree.py   |  35 +++++++++
  Lib/xml/etree/ElementTree.py |  29 ++++---
  Modules/_elementtree.c       |  92 +++++++++++------------
  3 files changed, 96 insertions(+), 60 deletions(-)


diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -979,6 +979,21 @@
         parser.eof_received()
         self.assertEqual(parser.root.tag, '{namespace}root')
 
+    def test_ns_events(self):
+        parser = ET.IncrementalParser(events=('start-ns', 'end-ns'))
+        self._feed(parser, "<!-- comment -->\n")
+        self._feed(parser, "<root xmlns='namespace'>\n")
+        self.assertEqual(
+            list(parser.events()),
+            [('start-ns', ('', 'namespace'))])
+        self._feed(parser, "<element key='value'>text</element")
+        self._feed(parser, ">\n")
+        self._feed(parser, "<element>text</element>tail\n")
+        self._feed(parser, "<empty-element/>\n")
+        self._feed(parser, "</root>\n")
+        self.assertEqual(list(parser.events()), [('end-ns', None)])
+        parser.eof_received()
+
     def test_events(self):
         parser = ET.IncrementalParser(events=())
         self._feed(parser, "<root/>\n")
@@ -1026,6 +1041,26 @@
         parser.eof_received()
         self.assertEqual(parser.root.tag, 'root')
 
+    def test_events_sequence(self):
+        # Test that events can be some sequence that's not just a tuple or list
+        eventset = {'end', 'start'}
+        parser = ET.IncrementalParser(events=eventset)
+        self._feed(parser, "<foo>bar</foo>")
+        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
+
+        class DummyIter:
+            def __init__(self):
+                self.events = iter(['start', 'end', 'start-ns'])
+            def __iter__(self):
+                return self
+            def __next__(self):
+                return next(self.events)
+
+        parser = ET.IncrementalParser(events=DummyIter())
+        self._feed(parser, "<foo>bar</foo>")
+        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
+
+
     def test_unknown_event(self):
         with self.assertRaises(ValueError):
             ET.IncrementalParser(events=('start', 'end', 'bogus'))
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1498,33 +1498,38 @@
         except AttributeError:
             pass # unknown
 
-    def _setevents(self, event_list, events):
+    def _setevents(self, events_queue, events_to_report):
         # Internal API for IncrementalParser
+        # events_to_report: a list of events to report during parsing (same as
+        # the *events* of IncrementalParser's constructor.
+        # events_queue: a list of actual parsing events that will be populated
+        # by the underlying parser.
+        #
         parser = self._parser
-        append = event_list.append
-        for event in events:
-            if event == "start":
+        append = events_queue.append
+        for event_name in events_to_report:
+            if event_name == "start":
                 parser.ordered_attributes = 1
                 parser.specified_attributes = 1
-                def handler(tag, attrib_in, event=event, append=append,
+                def handler(tag, attrib_in, event=event_name, append=append,
                             start=self._start_list):
                     append((event, start(tag, attrib_in)))
                 parser.StartElementHandler = handler
-            elif event == "end":
-                def handler(tag, event=event, append=append,
+            elif event_name == "end":
+                def handler(tag, event=event_name, append=append,
                             end=self._end):
                     append((event, end(tag)))
                 parser.EndElementHandler = handler
-            elif event == "start-ns":
-                def handler(prefix, uri, event=event, append=append):
+            elif event_name == "start-ns":
+                def handler(prefix, uri, event=event_name, append=append):
                     append((event, (prefix or "", uri or "")))
                 parser.StartNamespaceDeclHandler = handler
-            elif event == "end-ns":
-                def handler(prefix, event=event, append=append):
+            elif event_name == "end-ns":
+                def handler(prefix, event=event_name, append=append):
                     append((event, None))
                 parser.EndNamespaceDeclHandler = handler
             else:
-                raise ValueError("unknown event %r" % event)
+                raise ValueError("unknown event %r" % event_name)
 
     def _raiseerror(self, value):
         err = ParseError(value)
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -3431,14 +3431,14 @@
 xmlparser_setevents(XMLParserObject *self, PyObject* args)
 {
     /* activate element event reporting */
-
-    Py_ssize_t i;
-    TreeBuilderObject* target;
-
-    PyObject* events; /* event collector */
-    PyObject* event_set = Py_None;
-    if (!PyArg_ParseTuple(args, "O!|O:_setevents",  &PyList_Type, &events,
-                          &event_set))
+    Py_ssize_t i, seqlen;
+    TreeBuilderObject *target;
+
+    PyObject *events_queue;
+    PyObject *events_to_report = Py_None;
+    PyObject *events_seq;
+    if (!PyArg_ParseTuple(args, "O!|O:_setevents",  &PyList_Type, &events_queue,
+                          &events_to_report))
         return NULL;
 
     if (!TreeBuilder_CheckExact(self->target)) {
@@ -3452,9 +3452,9 @@
 
     target = (TreeBuilderObject*) self->target;
 
-    Py_INCREF(events);
+    Py_INCREF(events_queue);
     Py_XDECREF(target->events);
-    target->events = events;
+    target->events = events_queue;
 
     /* clear out existing events */
     Py_CLEAR(target->start_event_obj);
@@ -3462,69 +3462,65 @@
     Py_CLEAR(target->start_ns_event_obj);
     Py_CLEAR(target->end_ns_event_obj);
 
-    if (event_set == Py_None) {
+    if (events_to_report == Py_None) {
         /* default is "end" only */
         target->end_event_obj = PyUnicode_FromString("end");
         Py_RETURN_NONE;
     }
 
-    if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
-        goto error;
-
-    for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
-        PyObject* item = PyTuple_GET_ITEM(event_set, i);
-        char* event;
-        if (PyUnicode_Check(item)) {
-            event = _PyUnicode_AsString(item);
-            if (event == NULL)
-                goto error;
-        } else if (PyBytes_Check(item))
-            event = PyBytes_AS_STRING(item);
-        else {
-            goto error;
+    if (!(events_seq = PySequence_Fast(events_to_report,
+                                       "events must be a sequence"))) {
+        return NULL;
+    }
+
+    seqlen = PySequence_Size(events_seq);
+    for (i = 0; i < seqlen; ++i) {
+        PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
+        char *event_name = NULL;
+        if (PyUnicode_Check(event_name_obj)) {
+            event_name = _PyUnicode_AsString(event_name_obj);
+        } else if (PyBytes_Check(event_name_obj)) {
+            event_name = PyBytes_AS_STRING(event_name_obj);
         }
-        if (strcmp(event, "start") == 0) {
-            Py_INCREF(item);
-            target->start_event_obj = item;
-        } else if (strcmp(event, "end") == 0) {
-            Py_INCREF(item);
+
+        if (event_name == NULL) {
+            Py_DECREF(events_seq);
+            PyErr_Format(PyExc_ValueError, "invalid events sequence");
+            return NULL;
+        } else if (strcmp(event_name, "start") == 0) {
+            Py_INCREF(event_name_obj);
+            target->start_event_obj = event_name_obj;
+        } else if (strcmp(event_name, "end") == 0) {
+            Py_INCREF(event_name_obj);
             Py_XDECREF(target->end_event_obj);
-            target->end_event_obj = item;
-        } else if (strcmp(event, "start-ns") == 0) {
-            Py_INCREF(item);
+            target->end_event_obj = event_name_obj;
+        } else if (strcmp(event_name, "start-ns") == 0) {
+            Py_INCREF(event_name_obj);
             Py_XDECREF(target->start_ns_event_obj);
-            target->start_ns_event_obj = item;
+            target->start_ns_event_obj = event_name_obj;
             EXPAT(SetNamespaceDeclHandler)(
                 self->parser,
                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
                 );
-        } else if (strcmp(event, "end-ns") == 0) {
-            Py_INCREF(item);
+        } else if (strcmp(event_name, "end-ns") == 0) {
+            Py_INCREF(event_name_obj);
             Py_XDECREF(target->end_ns_event_obj);
-            target->end_ns_event_obj = item;
+            target->end_ns_event_obj = event_name_obj;
             EXPAT(SetNamespaceDeclHandler)(
                 self->parser,
                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
                 );
         } else {
-            PyErr_Format(
-                PyExc_ValueError,
-                "unknown event '%s'", event
-                );
+            Py_DECREF(events_seq);
+            PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
             return NULL;
         }
     }
 
+    Py_DECREF(events_seq);
     Py_RETURN_NONE;
-
-  error:
-    PyErr_SetString(
-        PyExc_TypeError,
-        "invalid event tuple"
-        );
-    return NULL;
 }
 
 static PyMethodDef xmlparser_methods[] = {

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list