[Python-checkins] cpython (3.3): Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache.

georg.brandl python-checkins at python.org
Thu Feb 11 12:24:42 EST 2016


https://hg.python.org/cpython/rev/b9c8f1c80f47
changeset:   100231:b9c8f1c80f47
branch:      3.3
parent:      99898:c5cae7366835
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Thu Dec 03 01:02:03 2015 +0200
summary:
  Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache.

files:
  Lib/test/test_unicode.py |  17 +++++++++++++++++
  Misc/NEWS                |   5 +++++
  Objects/unicodeobject.c  |   5 +++++
  3 files changed, 27 insertions(+), 0 deletions(-)


diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2303,6 +2303,23 @@
                 self.assertNotEqual(abc, abcdef)
                 self.assertEqual(abcdef.decode('unicode_internal'), text)
 
+    @support.cpython_only
+    def test_pep393_utf8_caching_bug(self):
+        # Issue #25709: Problem with string concatenation and utf-8 cache
+        from _testcapi import getargs_s_hash
+        for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
+            s = ''
+            for i in range(5):
+                # Due to CPython specific optimization the 's' string can be
+                # resized in-place.
+                s += chr(k)
+                # Parsing with the "s#" format code calls indirectly
+                # PyUnicode_AsUTF8AndSize() which creates the UTF-8
+                # encoded string cached in the Unicode object.
+                self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+                # Check that the second call returns the same result
+                self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+
 
 class StringModuleTest(unittest.TestCase):
     def test_formatter_parser(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,8 +10,13 @@
 Core and Builtins
 -----------------
 
+- Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache.
+
 - Issue #24407: Fix crash when dict is mutated while being updated.
 
+- Issue #24097: Fixed crash in object.__reduce__() if slot name is freed inside
+  __getattr__.
+
 - Issue #24096: Make warnings.warn_explicit more robust against mutation of the
   warnings.filters list.
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -679,6 +679,11 @@
     }
     new_size = (struct_size + (length + 1) * char_size);
 
+    if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
+        PyObject_DEL(_PyUnicode_UTF8(unicode));
+        _PyUnicode_UTF8(unicode) = NULL;
+        _PyUnicode_UTF8_LENGTH(unicode) = 0;
+    }
     _Py_DEC_REFTOTAL;
     _Py_ForgetReference(unicode);
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list