[Python-checkins] r80032 - in python/branches/release31-maint: Lib/pickle.py Lib/pickletools.py Lib/test/pickletester.py Misc/NEWS Modules/_pickle.c
victor.stinner
python-checkins at python.org
Tue Apr 13 13:09:22 CEST 2010
Author: victor.stinner
Date: Tue Apr 13 13:09:22 2010
New Revision: 80032
Log:
Merged revisions 80031 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
........
r80031 | victor.stinner | 2010-04-13 13:07:24 +0200 (mar., 13 avril 2010) | 4 lines
Issue #8383: pickle and pickletools use surrogatepass error handler when
encoding unicode as utf8 to support lone surrogates and stay compatible with
Python 2.x and 3.0
........
Modified:
python/branches/release31-maint/ (props changed)
python/branches/release31-maint/Lib/pickle.py
python/branches/release31-maint/Lib/pickletools.py
python/branches/release31-maint/Lib/test/pickletester.py
python/branches/release31-maint/Misc/NEWS
python/branches/release31-maint/Modules/_pickle.c
Modified: python/branches/release31-maint/Lib/pickle.py
==============================================================================
--- python/branches/release31-maint/Lib/pickle.py (original)
+++ python/branches/release31-maint/Lib/pickle.py Tue Apr 13 13:09:22 2010
@@ -499,7 +499,7 @@
def save_str(self, obj, pack=struct.pack):
if self.bin:
- encoded = obj.encode('utf-8')
+ encoded = obj.encode('utf-8', 'surrogatepass')
n = len(encoded)
self.write(BINUNICODE + pack("<i", n) + encoded)
else:
@@ -966,7 +966,7 @@
def load_binunicode(self):
len = mloads(b'i' + self.read(4))
- self.append(str(self.read(len), 'utf-8'))
+ self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
dispatch[BINUNICODE[0]] = load_binunicode
def load_short_binstring(self):
Modified: python/branches/release31-maint/Lib/pickletools.py
==============================================================================
--- python/branches/release31-maint/Lib/pickletools.py (original)
+++ python/branches/release31-maint/Lib/pickletools.py Tue Apr 13 13:09:22 2010
@@ -469,7 +469,7 @@
raise ValueError("unicodestring4 byte count < 0: %d" % n)
data = f.read(n)
if len(data) == n:
- return str(data, 'utf-8')
+ return str(data, 'utf-8', 'surrogatepass')
raise ValueError("expected %d bytes in a unicodestring4, but only %d "
"remain" % (n, len(data)))
Modified: python/branches/release31-maint/Lib/test/pickletester.py
==============================================================================
--- python/branches/release31-maint/Lib/test/pickletester.py (original)
+++ python/branches/release31-maint/Lib/test/pickletester.py Tue Apr 13 13:09:22 2010
@@ -515,7 +515,9 @@
def test_unicode(self):
endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
- '<\\>', '<\\\U00012345>']
+ '<\\>', '<\\\U00012345>',
+ # surrogates
+ '<\udc80>']
for proto in protocols:
for u in endcases:
p = self.dumps(u, proto)
Modified: python/branches/release31-maint/Misc/NEWS
==============================================================================
--- python/branches/release31-maint/Misc/NEWS (original)
+++ python/branches/release31-maint/Misc/NEWS Tue Apr 13 13:09:22 2010
@@ -28,6 +28,10 @@
Library
-------
+- Issue #8383: pickle and pickletools use surrogatepass error handler when
+ encoding unicode as utf8 to support lone surrogates and stay compatible with
+ Python 2.x and 3.0
+
- Issue #8179: Fix macpath.realpath() on a non-existing path.
- Issue #8139: ossaudiodev didn't initialize its types properly, therefore
Modified: python/branches/release31-maint/Modules/_pickle.c
==============================================================================
--- python/branches/release31-maint/Modules/_pickle.c (original)
+++ python/branches/release31-maint/Modules/_pickle.c Tue Apr 13 13:09:22 2010
@@ -1227,7 +1227,9 @@
if (self->bin) {
char pdata[5];
- encoded = PyUnicode_AsUTF8String(obj);
+ encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
+ PyUnicode_GET_SIZE(obj),
+ "surrogatepass");
if (encoded == NULL)
goto error;
@@ -3352,7 +3354,7 @@
if (unpickler_read(self, &s, size) < 0)
return -1;
- str = PyUnicode_DecodeUTF8(s, size, NULL);
+ str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
if (str == NULL)
return -1;
More information about the Python-checkins
mailing list