[pypy-commit] pypy py3.5: Fix 2BYTE case in _PyUnicode_Ready(): don't prepend a BOM to the data
rlamy
pypy.commits at gmail.com
Sat Sep 16 13:29:09 EDT 2017
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92410:b6ba2262940e
Date: 2017-09-16 18:28 +0100
http://bitbucket.org/pypy/pypy/changeset/b6ba2262940e/
Log: Fix 2BYTE case in _PyUnicode_Ready(): don't prepend a BOM to the
data
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -190,6 +190,26 @@
b = s.encode('utf-32')[4:] # Skip the BOM
assert module.from_ucs4(b) == s
+ def test_substring(self):
+ module = self.import_extension('foo', [
+ ("slice_start", "METH_VARARGS",
+ '''
+ PyObject* text;
+ Py_ssize_t start, length;
+ if (!PyArg_ParseTuple(args, "On", &text, &start))
+ return NULL;
+ if (PyUnicode_READY(text) == -1) return NULL;
+ length = PyUnicode_GET_LENGTH(text);
+ if (start > length) return PyLong_FromSsize_t(start);
+ return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
+ PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text),
+ length-start);
+ ''')])
+ s = 'aАbБcСdД'
+ assert module.slice_start(s, 2) == 'bБcСdД'
+ s = 'xx\N{PILE OF POO}'
+ assert module.slice_start(s, 2) == '\N{PILE OF POO}'
+
def test_aswidecharstring(self):
module = self.import_extension('foo', [
("aswidecharstring", "METH_O",
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -1,6 +1,6 @@
from pypy.interpreter.error import OperationError, oefmt
from rpython.rtyper.lltypesystem import rffi, lltype
-from rpython.rlib.runicode import unicode_encode_latin_1, unicode_encode_utf_16
+from rpython.rlib.runicode import unicode_encode_latin_1, unicode_encode_utf_16_helper
from rpython.rlib.rarithmetic import widen
from pypy.module.unicodedata import unicodedb
@@ -289,8 +289,9 @@
set_utf8_len(py_obj, 0)
elif maxchar < 65536:
# XXX: assumes that sizeof(wchar_t) == 4
- ucs2_str = unicode_encode_utf_16(
- w_obj._value, len(w_obj._value), errors='strict')
+ ucs2_str = unicode_encode_utf_16_helper(
+ w_obj._value, len(w_obj._value), errors='strict',
+ byteorder=runicode.BYTEORDER)
ucs2_data = cts.cast('Py_UCS2 *', rffi.str2charp(ucs2_str))
set_data(py_obj, cts.cast('void*', ucs2_data))
set_len(py_obj, get_wsize(py_obj))
More information about the pypy-commit
mailing list