[Python-checkins] cpython: Closes Issue #14246: _elementtree parser will now handle io.StringIO

eli.bendersky python-checkins at python.org
Fri Mar 16 04:55:14 CET 2012


http://hg.python.org/cpython/rev/7bdf5c96fdc0
changeset:   75721:7bdf5c96fdc0
user:        Eli Bendersky <eliben at gmail.com>
date:        Fri Mar 16 05:53:30 2012 +0200
summary:
  Closes Issue #14246: _elementtree parser will now handle io.StringIO

files:
  Lib/test/test_xml_etree.py |  14 ++++++++++++++
  Modules/_elementtree.c     |  23 ++++++++++++++++++++++-
  2 files changed, 36 insertions(+), 1 deletions(-)


diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -16,6 +16,7 @@
 
 import sys
 import html
+import io
 import unittest
 
 from test import support
@@ -2026,6 +2027,18 @@
         del e[::2]
         self.assertEqual(self._subelem_tags(e), ['a1'])
 
+
+class StringIOTest(unittest.TestCase):
+    def test_read_from_stringio(self):
+        tree = ET.ElementTree()
+        stream = io.StringIO()
+        stream.write('''<?xml version="1.0"?><site></site>''')
+        stream.seek(0)
+        tree.parse(stream)
+
+        self.assertEqual(tree.getroot().tag, 'site')
+
+
 # --------------------------------------------------------------------
 
 
@@ -2077,6 +2090,7 @@
 
     test_classes = [
         ElementSlicingTest,
+        StringIOTest,
         ElementTreeTest,
         TreeBuilderTest]
     if module is pyET:
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -2682,6 +2682,7 @@
 
     PyObject* reader;
     PyObject* buffer;
+    PyObject* temp;
     PyObject* res;
 
     PyObject* fileobj;
@@ -2703,7 +2704,27 @@
             return NULL;
         }
 
-        if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
+        if (PyUnicode_CheckExact(buffer)) {
+            /* A unicode object is encoded into bytes using UTF-8 */
+            if (PyUnicode_GET_SIZE(buffer) == 0) {
+                Py_DECREF(buffer);
+                break;
+            }
+            temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
+            if (!temp) {
+                /* Propagate exception from PyUnicode_AsEncodedString */
+                Py_DECREF(buffer);
+                Py_DECREF(reader);
+                return NULL;
+            }
+
+            /* Here we no longer need the original buffer since it contains
+             * unicode. Make it point to the encoded bytes object.
+            */
+            Py_DECREF(buffer);
+            buffer = temp;
+        }
+        else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
             Py_DECREF(buffer);
             break;
         }

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list