[pypy-commit] pypy py3.5: Add inefficient implementation of PyUnicode_FromKindAndData()
rlamy
pypy.commits at gmail.com
Fri Sep 15 16:58:39 EDT 2017
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92406:e27c61e1a09a
Date: 2017-09-15 21:58 +0100
http://bitbucket.org/pypy/pypy/changeset/e27c61e1a09a/
Log: Add inefficient implementation of PyUnicode_FromKindAndData()
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -154,6 +154,42 @@
res = module.test_unicode_format(1, "xyz")
assert res == "bla 1 ble xyz\n"
+ def test_fromkind(self):
+ module = self.import_extension('foo', [
+ ('from_ucs1', 'METH_O',
+ """
+ char* p;
+ Py_ssize_t size;
+ if (PyBytes_AsStringAndSize(args, &p, &size) < 0)
+ return NULL;
+ return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, p, size);
+ """),
+ ('from_ucs2', 'METH_O',
+ """
+ char* p;
+ Py_ssize_t size;
+ if (PyBytes_AsStringAndSize(args, &p, &size) < 0)
+ return NULL;
+ return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, p, size/2);
+ """),
+ ('from_ucs4', 'METH_O',
+ """
+ char* p;
+ Py_ssize_t size;
+ if (PyBytes_AsStringAndSize(args, &p, &size) < 0)
+ return NULL;
+ return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, p, size/4);
+ """)])
+ res = module.from_ucs1(b'spam')
+ assert res == 'spam'
+ s = "späm"
+ b = s.encode('utf-16')[2:] # Skip the BOM
+ s2 = module.from_ucs2(b)
+ assert module.from_ucs2(b) == s
+ s = "x\N{PILE OF POO}x"
+ b = s.encode('utf-32')[4:] # Skip the BOM
+ assert module.from_ucs4(b) == s
+
def test_aswidecharstring(self):
module = self.import_extension('foo', [
("aswidecharstring", "METH_O",
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -13,7 +13,8 @@
PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
make_typedescr, get_typedescr, as_pyobj)
from pypy.module.cpyext.bytesobject import PyBytes_Check, PyBytes_FromObject
-from pypy.module._codecs.interp_codecs import CodecState
+from pypy.module._codecs.interp_codecs import (
+ CodecState, latin_1_decode, utf_16_decode, utf_32_decode)
from pypy.objspace.std import unicodeobject
from rpython.rlib import rstring, runicode
from rpython.tool.sourcetools import func_renamer
@@ -34,7 +35,7 @@
dealloc=unicode_dealloc,
realize=unicode_realize)
-# Buffer for the default encoding (used by PyUnicde_GetDefaultEncoding)
+# Buffer for the default encoding (used by PyUnicode_GetDefaultEncoding)
DEFAULT_ENCODING_SIZE = 100
default_encoding = lltype.malloc(rffi.CCHARP.TO, DEFAULT_ENCODING_SIZE,
flavor='raw', zero=True)
@@ -307,6 +308,26 @@
set_ready(py_obj, 1)
return 0
+ at cts.decl("""PyObject* PyUnicode_FromKindAndData(
+ int kind, const void *buffer, Py_ssize_t size)""")
+def PyUnicode_FromKindAndData(space, kind, data, size):
+ if size < 0:
+ raise oefmt(space.w_ValueError, "size must be positive")
+ if kind == _1BYTE_KIND:
+ value = rffi.charpsize2str(data, size)
+ w_res = latin_1_decode(space, value, w_final=space.w_False)
+ elif kind == _2BYTE_KIND:
+ value = rffi.charpsize2str(data, 2 * size)
+ w_res = utf_16_decode(space, value, w_final=space.w_False)
+ elif kind == _4BYTE_KIND:
+ value = rffi.charpsize2str(data, 4 * size)
+ w_res = utf_32_decode(space, value, w_final=space.w_False)
+ else:
+ raise oefmt(space.w_SystemError, "invalid kind")
+ w_ret = space.unpackiterable(w_res)[0]
+ _PyUnicode_Ready(space, w_ret)
+ return w_ret
+
@cts.decl("Py_UNICODE * PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)")
def PyUnicode_AsUnicodeAndSize(space, ref, psize):
"""Return a read-only pointer to the Unicode object's internal Py_UNICODE
More information about the pypy-commit
mailing list