[Python-checkins] r68256 - in python/branches/release30-maint: Lib/test/pickletester.py Modules/_pickle.c Objects/unicodeobject.c
georg.brandl
python-checkins at python.org
Sun Jan 4 00:34:15 CET 2009
Author: georg.brandl
Date: Sun Jan 4 00:34:15 2009
New Revision: 68256
Log:
Merged revisions 67937-67938 via svnmerge from
svn+ssh://svn.python.org/python/branches/py3k
........
r67937 | alexandre.vassalotti | 2008-12-27 08:32:41 +0100 (Sat, 27 Dec 2008) | 3 lines
Update copy of PyUnicode_EncodeRawUnicodeEscape in _pickle.
Add astral character test case.
........
r67938 | alexandre.vassalotti | 2008-12-27 10:09:15 +0100 (Sat, 27 Dec 2008) | 3 lines
Fix wrong bytes type conversion in PyUnicode_AsUnicodeEscapeString.
Fix wrong bytes type conversion in PyUnicode_AsUnicodeDecodeString.
........
Modified:
python/branches/release30-maint/ (props changed)
python/branches/release30-maint/Lib/test/pickletester.py
python/branches/release30-maint/Modules/_pickle.c
python/branches/release30-maint/Objects/unicodeobject.c
Modified: python/branches/release30-maint/Lib/test/pickletester.py
==============================================================================
--- python/branches/release30-maint/Lib/test/pickletester.py (original)
+++ python/branches/release30-maint/Lib/test/pickletester.py Sun Jan 4 00:34:15 2009
@@ -484,14 +484,21 @@
self.assertRaises(ValueError, self.loads, buf)
def test_unicode(self):
- endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
- '<\\>', '<\\\U00012345>']
+ endcases = ['', '<\\u>', '<\\\u1234>', '<\n>', '<\\>',
+ '<\\\U00012345>']
for proto in protocols:
for u in endcases:
p = self.dumps(u, proto)
u2 = self.loads(p)
self.assertEqual(u2, u)
+ def test_unicode_high_plane(self):
+ t = '\U00012345'
+ for proto in protocols:
+ p = self.dumps(t, proto)
+ t2 = self.loads(p)
+ self.assertEqual(t2, t)
+
def test_bytes(self):
for proto in protocols:
for u in b'', b'xyz', b'xyz'*100:
Modified: python/branches/release30-maint/Modules/_pickle.c
==============================================================================
--- python/branches/release30-maint/Modules/_pickle.c (original)
+++ python/branches/release30-maint/Modules/_pickle.c Sun Jan 4 00:34:15 2009
@@ -1109,16 +1109,21 @@
static const char *hexdigits = "0123456789abcdef";
#ifdef Py_UNICODE_WIDE
- repr = PyBytes_FromStringAndSize(NULL, 10 * size);
+ const Py_ssize_t expandsize = 10;
#else
- repr = PyBytes_FromStringAndSize(NULL, 6 * size);
+ const Py_ssize_t expandsize = 6;
#endif
+
+ if (size > PY_SSIZE_T_MAX / expandsize)
+ return PyErr_NoMemory();
+
+ repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
if (repr == NULL)
return NULL;
if (size == 0)
goto done;
- p = q = PyBytes_AS_STRING(repr);
+ p = q = PyByteArray_AS_STRING(repr);
while (size-- > 0) {
Py_UNICODE ch = *s++;
#ifdef Py_UNICODE_WIDE
@@ -1136,6 +1141,32 @@
*p++ = hexdigits[ch & 15];
}
else
+#else
+ /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
+ if (ch >= 0xD800 && ch < 0xDC00) {
+ Py_UNICODE ch2;
+ Py_UCS4 ucs;
+
+ ch2 = *s++;
+ size--;
+ if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
+ ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+ *p++ = '\\';
+ *p++ = 'U';
+ *p++ = hexdigits[(ucs >> 28) & 0xf];
+ *p++ = hexdigits[(ucs >> 24) & 0xf];
+ *p++ = hexdigits[(ucs >> 20) & 0xf];
+ *p++ = hexdigits[(ucs >> 16) & 0xf];
+ *p++ = hexdigits[(ucs >> 12) & 0xf];
+ *p++ = hexdigits[(ucs >> 8) & 0xf];
+ *p++ = hexdigits[(ucs >> 4) & 0xf];
+ *p++ = hexdigits[ucs & 0xf];
+ continue;
+ }
+ /* Fall through: isolated surrogates are copied as-is */
+ s--;
+ size++;
+ }
#endif
/* Map 16-bit characters to '\uxxxx' */
if (ch >= 256 || ch == '\\' || ch == '\n') {
@@ -1146,14 +1177,14 @@
*p++ = hexdigits[(ch >> 4) & 0xf];
*p++ = hexdigits[ch & 15];
}
- /* Copy everything else as-is */
+ /* Copy everything else as-is */
else
*p++ = (char) ch;
}
size = p - q;
done:
- result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
+ result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Py_DECREF(repr);
return result;
}
Modified: python/branches/release30-maint/Objects/unicodeobject.c
==============================================================================
--- python/branches/release30-maint/Objects/unicodeobject.c (original)
+++ python/branches/release30-maint/Objects/unicodeobject.c Sun Jan 4 00:34:15 2009
@@ -3257,20 +3257,14 @@
PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
{
- PyObject *s, *result;
+ PyObject *s;
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
return NULL;
}
s = PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode));
-
- if (!s)
- return NULL;
- result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(s),
- PyByteArray_GET_SIZE(s));
- Py_DECREF(s);
- return result;
+ return s;
}
/* --- Raw Unicode Escape Codec ------------------------------------------- */
@@ -3482,7 +3476,7 @@
PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
{
- PyObject *s, *result;
+ PyObject *s;
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
return NULL;
@@ -3490,12 +3484,7 @@
s = PyUnicode_EncodeRawUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode));
- if (!s)
- return NULL;
- result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(s),
- PyByteArray_GET_SIZE(s));
- Py_DECREF(s);
- return result;
+ return s;
}
/* --- Unicode Internal Codec ------------------------------------------- */
More information about the Python-checkins
mailing list