[Python-checkins] gh-91810: Fix regression with writing an XML declaration with encoding='unicode' (GH-93426) (GH-93790)

ambv webhook-mailer at python.org
Thu Jun 16 12:16:52 EDT 2022


https://github.com/python/cpython/commit/a27f3ccea5d83917ef5d9d4c6b43d65e96c06ed3
commit: a27f3ccea5d83917ef5d9d4c6b43d65e96c06ed3
branch: 3.10
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: ambv <lukasz at langa.pl>
date: 2022-06-16T18:16:42+02:00
summary:

gh-91810: Fix regression with writing an XML declaration with encoding='unicode' (GH-93426) (GH-93790)

Suppress writing an XML declaration in open files in ElementTree.write()
with encoding='unicode' and xml_declaration=None.

If file patch is passed to ElementTree.write() with encoding='unicode',
always open a new file in UTF-8.
(cherry picked from commit d7db9dc3cc5b44d0b4ce000571fecf58089a01ec)

Co-authored-by: Serhiy Storchaka <storchaka at gmail.com>

files:
A Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst
M Lib/test/test_xml_etree.py
M Lib/xml/etree/ElementTree.py

diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 9f5b5b73412f0..d7b5e5208c53f 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -3739,13 +3739,7 @@ def test_write_to_filename_as_unicode(self):
         tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
         tree.write(TESTFN, encoding='unicode')
         with open(TESTFN, 'rb') as f:
-            data = f.read()
-            expected = "<site>\xf8</site>".encode(encoding, 'xmlcharrefreplace')
-            if encoding.lower() in ('utf-8', 'ascii'):
-                self.assertEqual(data, expected)
-            else:
-                self.assertIn(b"<?xml version='1.0' encoding=", data)
-                self.assertIn(expected, data)
+            self.assertEqual(f.read(), b"<site>\xc3\xb8</site>")
 
     def test_write_to_text_file(self):
         self.addCleanup(os_helper.unlink, TESTFN)
@@ -3760,17 +3754,13 @@ def test_write_to_text_file(self):
             tree.write(f, encoding='unicode')
             self.assertFalse(f.closed)
         with open(TESTFN, 'rb') as f:
-            self.assertEqual(f.read(),  convlinesep(
-                             b'''<?xml version='1.0' encoding='ascii'?>\n'''
-                             b'''<site>ø</site>'''))
+            self.assertEqual(f.read(),  b'''<site>ø</site>''')
 
         with open(TESTFN, 'w', encoding='ISO-8859-1') as f:
             tree.write(f, encoding='unicode')
             self.assertFalse(f.closed)
         with open(TESTFN, 'rb') as f:
-            self.assertEqual(f.read(),  convlinesep(
-                             b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
-                             b'''<site>\xf8</site>'''))
+            self.assertEqual(f.read(), b'''<site>\xf8</site>''')
 
     def test_write_to_binary_file(self):
         self.addCleanup(os_helper.unlink, TESTFN)
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 58da7c8d15623..2503d9ee76ab6 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -731,6 +731,7 @@ def write(self, file_or_filename,
         with _get_writer(file_or_filename, encoding) as (write, declared_encoding):
             if method == "xml" and (xml_declaration or
                     (xml_declaration is None and
+                     encoding.lower() != "unicode" and
                      declared_encoding.lower() not in ("utf-8", "us-ascii"))):
                 write("<?xml version='1.0' encoding='%s'?>\n" % (
                     declared_encoding,))
@@ -757,13 +758,10 @@ def _get_writer(file_or_filename, encoding):
     except AttributeError:
         # file_or_filename is a file name
         if encoding.lower() == "unicode":
-            file = open(file_or_filename, "w",
-                        errors="xmlcharrefreplace")
-        else:
-            file = open(file_or_filename, "w", encoding=encoding,
-                        errors="xmlcharrefreplace")
-        with file:
-            yield file.write, file.encoding
+            encoding="utf-8"
+        with open(file_or_filename, "w", encoding=encoding,
+                  errors="xmlcharrefreplace") as file:
+            yield file.write, encoding
     else:
         # file_or_filename is a file-like object
         # encoding determines if it is a text or binary writer
diff --git a/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst b/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst
new file mode 100644
index 0000000000000..e40005886afc3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst
@@ -0,0 +1,2 @@
+Suppress writing an XML declaration in open files in ``ElementTree.write()``
+with ``encoding='unicode'`` and ``xml_declaration=None``.



More information about the Python-checkins mailing list