[pypy-commit] pypy default: hg merge cffi-char16-char32
arigo
pypy.commits at gmail.com
Mon Jun 5 02:25:20 EDT 2017
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r91518:6a4af0b6b51c
Date: 2017-06-05 08:24 +0200
http://bitbucket.org/pypy/pypy/changeset/6a4af0b6b51c/
Log: hg merge cffi-char16-char32
Support the char16_t and char32_t types in cffi. This means
reintroducing some surrogate handling in one of the two directions,
depending on the size of unichar.
diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h
--- a/lib_pypy/cffi/_cffi_include.h
+++ b/lib_pypy/cffi/_cffi_include.h
@@ -159,9 +159,9 @@
#define _cffi_from_c_struct \
((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[18])
#define _cffi_to_c_wchar_t \
- ((wchar_t(*)(PyObject *))_cffi_exports[19])
+ ((_cffi_wchar_t(*)(PyObject *))_cffi_exports[19])
#define _cffi_from_c_wchar_t \
- ((PyObject *(*)(wchar_t))_cffi_exports[20])
+ ((PyObject *(*)(_cffi_wchar_t))_cffi_exports[20])
#define _cffi_to_c_long_double \
((long double(*)(PyObject *))_cffi_exports[21])
#define _cffi_to_c__Bool \
@@ -174,7 +174,11 @@
#define _CFFI_CPIDX 25
#define _cffi_call_python \
((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX])
-#define _CFFI_NUM_EXPORTS 26
+#define _cffi_to_c_wchar3216_t \
+ ((int(*)(PyObject *))_cffi_exports[26])
+#define _cffi_from_c_wchar3216_t \
+ ((PyObject *(*)(int))_cffi_exports[27])
+#define _CFFI_NUM_EXPORTS 28
struct _cffi_ctypedescr;
@@ -215,6 +219,46 @@
return NULL;
}
+
+#ifdef HAVE_WCHAR_H
+typedef wchar_t _cffi_wchar_t;
+#else
+typedef uint16_t _cffi_wchar_t; /* same random pick as _cffi_backend.c */
+#endif
+
+_CFFI_UNUSED_FN static uint16_t _cffi_to_c_char16_t(PyObject *o)
+{
+ if (sizeof(_cffi_wchar_t) == 2)
+ return (uint16_t)_cffi_to_c_wchar_t(o);
+ else
+ return (uint16_t)_cffi_to_c_wchar3216_t(o);
+}
+
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x)
+{
+ if (sizeof(_cffi_wchar_t) == 2)
+ return _cffi_from_c_wchar_t(x);
+ else
+ return _cffi_from_c_wchar3216_t(x);
+}
+
+_CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o)
+{
+ if (sizeof(_cffi_wchar_t) == 4)
+ return (int)_cffi_to_c_wchar_t(o);
+ else
+ return (int)_cffi_to_c_wchar3216_t(o);
+}
+
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x)
+{
+ if (sizeof(_cffi_wchar_t) == 4)
+ return _cffi_from_c_wchar_t(x);
+ else
+ return _cffi_from_c_wchar3216_t(x);
+}
+
+
/********** end CPython-specific section **********/
#else
_CFFI_UNUSED_FN
diff --git a/lib_pypy/cffi/cffi_opcode.py b/lib_pypy/cffi/cffi_opcode.py
--- a/lib_pypy/cffi/cffi_opcode.py
+++ b/lib_pypy/cffi/cffi_opcode.py
@@ -107,9 +107,10 @@
PRIM_UINTMAX = 47
PRIM_FLOATCOMPLEX = 48
PRIM_DOUBLECOMPLEX = 49
+PRIM_CHAR16 = 50
+PRIM_CHAR32 = 51
-
-_NUM_PRIM = 50
+_NUM_PRIM = 52
_UNKNOWN_PRIM = -1
_UNKNOWN_FLOAT_PRIM = -2
_UNKNOWN_LONG_DOUBLE = -3
@@ -135,6 +136,8 @@
'double _Complex': PRIM_DOUBLECOMPLEX,
'_Bool': PRIM_BOOL,
'wchar_t': PRIM_WCHAR,
+ 'char16_t': PRIM_CHAR16,
+ 'char32_t': PRIM_CHAR32,
'int8_t': PRIM_INT8,
'uint8_t': PRIM_UINT8,
'int16_t': PRIM_INT16,
diff --git a/lib_pypy/cffi/model.py b/lib_pypy/cffi/model.py
--- a/lib_pypy/cffi/model.py
+++ b/lib_pypy/cffi/model.py
@@ -122,6 +122,8 @@
'_Bool': 'i',
# the following types are not primitive in the C sense
'wchar_t': 'c',
+ 'char16_t': 'c',
+ 'char32_t': 'c',
'int8_t': 'i',
'uint8_t': 'i',
'int16_t': 'i',
diff --git a/lib_pypy/cffi/parse_c_type.h b/lib_pypy/cffi/parse_c_type.h
--- a/lib_pypy/cffi/parse_c_type.h
+++ b/lib_pypy/cffi/parse_c_type.h
@@ -81,8 +81,10 @@
#define _CFFI_PRIM_UINTMAX 47
#define _CFFI_PRIM_FLOATCOMPLEX 48
#define _CFFI_PRIM_DOUBLECOMPLEX 49
+#define _CFFI_PRIM_CHAR16 50
+#define _CFFI_PRIM_CHAR32 51
-#define _CFFI__NUM_PRIM 50
+#define _CFFI__NUM_PRIM 52
#define _CFFI__UNKNOWN_PRIM (-1)
#define _CFFI__UNKNOWN_FLOAT_PRIM (-2)
#define _CFFI__UNKNOWN_LONG_DOUBLE (-3)
diff --git a/lib_pypy/cffi/recompiler.py b/lib_pypy/cffi/recompiler.py
--- a/lib_pypy/cffi/recompiler.py
+++ b/lib_pypy/cffi/recompiler.py
@@ -3,8 +3,9 @@
from .error import VerificationError
from .cffi_opcode import *
-VERSION = "0x2601"
-VERSION_EMBEDDED = "0x2701"
+VERSION_BASE = 0x2601
+VERSION_EMBEDDED = 0x2701
+VERSION_CHAR16CHAR32 = 0x2801
class GlobalExpr:
@@ -126,6 +127,10 @@
self.ffi = ffi
self.module_name = module_name
self.target_is_python = target_is_python
+ self._version = VERSION_BASE
+
+ def needs_version(self, ver):
+ self._version = max(self._version, ver)
def collect_type_table(self):
self._typesdict = {}
@@ -304,9 +309,7 @@
prnt('#endif')
lines = self._rel_readlines('_embedding.h')
prnt(''.join(lines))
- version = VERSION_EMBEDDED
- else:
- version = VERSION
+ self.needs_version(VERSION_EMBEDDED)
#
# then paste the C source given by the user, verbatim.
prnt('/************************************************************/')
@@ -405,7 +408,7 @@
prnt(' _cffi_call_python_org = '
'(void(*)(struct _cffi_externpy_s *, char *))p[1];')
prnt(' }')
- prnt(' p[0] = (const void *)%s;' % version)
+ prnt(' p[0] = (const void *)0x%x;' % self._version)
prnt(' p[1] = &_cffi_type_context;')
prnt('}')
# on Windows, distutils insists on putting init_cffi_xyz in
@@ -423,21 +426,22 @@
prnt('PyMODINIT_FUNC')
prnt('PyInit_%s(void)' % (base_module_name,))
prnt('{')
- prnt(' return _cffi_init("%s", %s, &_cffi_type_context);' % (
- self.module_name, version))
+ prnt(' return _cffi_init("%s", 0x%x, &_cffi_type_context);' % (
+ self.module_name, self._version))
prnt('}')
prnt('#else')
prnt('PyMODINIT_FUNC')
prnt('init%s(void)' % (base_module_name,))
prnt('{')
- prnt(' _cffi_init("%s", %s, &_cffi_type_context);' % (
- self.module_name, version))
+ prnt(' _cffi_init("%s", 0x%x, &_cffi_type_context);' % (
+ self.module_name, self._version))
prnt('}')
prnt('#endif')
prnt()
prnt('#ifdef __GNUC__')
prnt('# pragma GCC visibility pop')
prnt('#endif')
+ self._version = None
def _to_py(self, x):
if isinstance(x, str):
@@ -476,7 +480,8 @@
prnt('from %s import ffi as _ffi%d' % (included_module_name, i))
prnt()
prnt("ffi = _cffi_backend.FFI('%s'," % (self.module_name,))
- prnt(" _version = %s," % (VERSION,))
+ prnt(" _version = 0x%x," % (self._version,))
+ self._version = None
#
# the '_types' keyword argument
self.cffi_types = tuple(self.cffi_types) # don't change any more
@@ -515,8 +520,11 @@
# double' here, and _cffi_to_c_double would loose precision
converter = '(%s)_cffi_to_c_double' % (tp.get_c_name(''),)
else:
- converter = '(%s)_cffi_to_c_%s' % (tp.get_c_name(''),
+ cname = tp.get_c_name('')
+ converter = '(%s)_cffi_to_c_%s' % (cname,
tp.name.replace(' ', '_'))
+ if cname in ('char16_t', 'char32_t'):
+ self.needs_version(VERSION_CHAR16CHAR32)
errvalue = '-1'
#
elif isinstance(tp, model.PointerType):
@@ -573,7 +581,10 @@
elif isinstance(tp, model.UnknownFloatType):
return '_cffi_from_c_double(%s)' % (var,)
elif tp.name != 'long double' and not tp.is_complex_type():
- return '_cffi_from_c_%s(%s)' % (tp.name.replace(' ', '_'), var)
+ cname = tp.name.replace(' ', '_')
+ if cname in ('char16_t', 'char32_t'):
+ self.needs_version(VERSION_CHAR16CHAR32)
+ return '_cffi_from_c_%s(%s)' % (cname, var)
else:
return '_cffi_from_c_deref((char *)&%s, _cffi_type(%d))' % (
var, self._gettypenum(tp))
diff --git a/lib_pypy/cffi/vengine_cpy.py b/lib_pypy/cffi/vengine_cpy.py
--- a/lib_pypy/cffi/vengine_cpy.py
+++ b/lib_pypy/cffi/vengine_cpy.py
@@ -808,7 +808,8 @@
#include <stddef.h>
/* this block of #ifs should be kept exactly identical between
- c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */
+ c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py
+ and cffi/_cffi_include.h */
#if defined(_MSC_VER)
# include <malloc.h> /* for alloca() */
# if _MSC_VER < 1600 /* MSVC < 2010 */
@@ -842,11 +843,13 @@
# include <stdint.h>
# endif
# if _MSC_VER < 1800 /* MSVC < 2013 */
- typedef unsigned char _Bool;
+# ifndef __cplusplus
+ typedef unsigned char _Bool;
+# endif
# endif
#else
# include <stdint.h>
-# if (defined (__SVR4) && defined (__sun)) || defined(_AIX)
+# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux)
# include <alloca.h>
# endif
#endif
diff --git a/lib_pypy/cffi/vengine_gen.py b/lib_pypy/cffi/vengine_gen.py
--- a/lib_pypy/cffi/vengine_gen.py
+++ b/lib_pypy/cffi/vengine_gen.py
@@ -627,7 +627,8 @@
#include <sys/types.h> /* XXX for ssize_t on some platforms */
/* this block of #ifs should be kept exactly identical between
- c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */
+ c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py
+ and cffi/_cffi_include.h */
#if defined(_MSC_VER)
# include <malloc.h> /* for alloca() */
# if _MSC_VER < 1600 /* MSVC < 2010 */
@@ -661,11 +662,13 @@
# include <stdint.h>
# endif
# if _MSC_VER < 1800 /* MSVC < 2013 */
- typedef unsigned char _Bool;
+# ifndef __cplusplus
+ typedef unsigned char _Bool;
+# endif
# endif
#else
# include <stdint.h>
-# if (defined (__SVR4) && defined (__sun)) || defined(_AIX)
+# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux)
# include <alloca.h>
# endif
#endif
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -6,5 +6,6 @@
.. startrev: 558bd00b3dd8
.. branch: cffi-complex
+.. branch: cffi-char16-char32
-Part of the upgrade to cffi 1.11
+The two ``cffi-*`` branches are part of the upgrade to cffi 1.11.
diff --git a/pypy/module/_cffi_backend/cffi1_module.py b/pypy/module/_cffi_backend/cffi1_module.py
--- a/pypy/module/_cffi_backend/cffi1_module.py
+++ b/pypy/module/_cffi_backend/cffi1_module.py
@@ -9,7 +9,7 @@
VERSION_MIN = 0x2601
-VERSION_MAX = 0x27FF
+VERSION_MAX = 0x28FF
VERSION_EXPORT = 0x0A03
diff --git a/pypy/module/_cffi_backend/cffi_opcode.py b/pypy/module/_cffi_backend/cffi_opcode.py
--- a/pypy/module/_cffi_backend/cffi_opcode.py
+++ b/pypy/module/_cffi_backend/cffi_opcode.py
@@ -107,8 +107,10 @@
PRIM_UINTMAX = 47
PRIM_FLOATCOMPLEX = 48
PRIM_DOUBLECOMPLEX = 49
+PRIM_CHAR16 = 50
+PRIM_CHAR32 = 51
-_NUM_PRIM = 50
+_NUM_PRIM = 52
_UNKNOWN_PRIM = -1
_UNKNOWN_FLOAT_PRIM = -2
_UNKNOWN_LONG_DOUBLE = -3
@@ -131,8 +133,12 @@
'float': PRIM_FLOAT,
'double': PRIM_DOUBLE,
'long double': PRIM_LONGDOUBLE,
+ 'float _Complex': PRIM_FLOATCOMPLEX,
+ 'double _Complex': PRIM_DOUBLECOMPLEX,
'_Bool': PRIM_BOOL,
'wchar_t': PRIM_WCHAR,
+ 'char16_t': PRIM_CHAR16,
+ 'char32_t': PRIM_CHAR32,
'int8_t': PRIM_INT8,
'uint8_t': PRIM_UINT8,
'int16_t': PRIM_INT16,
diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py
--- a/pypy/module/_cffi_backend/ctypearray.py
+++ b/pypy/module/_cffi_backend/ctypearray.py
@@ -36,8 +36,7 @@
datasize = self.size
#
if datasize < 0:
- from pypy.module._cffi_backend import misc
- w_init, length = misc.get_new_array_length(space, w_init)
+ w_init, length = self.get_new_array_length(w_init)
try:
datasize = ovfcheck(length * self.ctitem.size)
except OverflowError:
@@ -53,6 +52,29 @@
self.convert_from_object(ptr, w_init)
return cdata
+ def get_new_array_length(self, w_value):
+ space = self.space
+ if (space.isinstance_w(w_value, space.w_list) or
+ space.isinstance_w(w_value, space.w_tuple)):
+ return (w_value, space.int_w(space.len(w_value)))
+ elif space.isinstance_w(w_value, space.w_bytes):
+ # from a string, we add the null terminator
+ s = space.bytes_w(w_value)
+ return (w_value, len(s) + 1)
+ elif space.isinstance_w(w_value, space.w_unicode):
+ from pypy.module._cffi_backend import wchar_helper
+ u = space.unicode_w(w_value)
+ if self.ctitem.size == 2:
+ length = wchar_helper.unicode_size_as_char16(u)
+ else:
+ length = wchar_helper.unicode_size_as_char32(u)
+ return (w_value, length + 1)
+ else:
+ explicitlength = space.getindex_w(w_value, space.w_OverflowError)
+ if explicitlength < 0:
+ raise oefmt(space.w_ValueError, "negative array length")
+ return (space.w_None, explicitlength)
+
def _check_subscript_index(self, w_cdata, i):
space = self.space
if i < 0:
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -10,7 +10,7 @@
from rpython.rtyper.tool import rfficache
from pypy.interpreter.error import oefmt
-from pypy.module._cffi_backend import cdataobj, misc
+from pypy.module._cffi_backend import cdataobj, misc, wchar_helper
from pypy.module._cffi_backend.ctypeobj import W_CType
@@ -42,11 +42,13 @@
def cast_unicode(self, w_ob):
space = self.space
s = space.unicode_w(w_ob)
- if len(s) != 1:
+ try:
+ ordinal = wchar_helper.unicode_to_ordinal(s)
+ except ValueError:
raise oefmt(space.w_TypeError,
"cannot cast unicode string of length %d to ctype '%s'",
len(s), self.name)
- return ord(s[0])
+ return intmask(ordinal)
def cast(self, w_ob):
from pypy.module._cffi_backend import ctypeptr
@@ -148,53 +150,83 @@
return self.space.newbytes(s)
-# XXX explicitly use an integer type instead of lltype.UniChar here,
-# because for now the latter is defined as unsigned by RPython (even
-# though it may be signed when 'wchar_t' is written to C).
-WCHAR_INT = {(2, False): rffi.USHORT,
- (4, False): rffi.UINT,
- (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar),
- rfficache.signof_c_type('wchar_t')]
-WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT)
+class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
+ _attrs_ = ['is_signed_wchar']
+ _immutable_fields_ = ['is_signed_wchar']
-class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
- _attrs_ = []
+ _wchar_is_signed = rfficache.signof_c_type('wchar_t')
- if rffi.r_wchar_t.SIGN:
- def write_raw_integer_data(self, w_cdata, value):
- w_cdata.write_raw_signed_data(value)
+ def __init__(self, space, size, name, name_position, align):
+ W_CTypePrimitiveCharOrUniChar.__init__(self, space, size, name,
+ name_position, align)
+ self.is_signed_wchar = self._wchar_is_signed and (name == "wchar_t")
+ # "char16_t" and "char32_t" are always unsigned
def cast_to_int(self, cdata):
- unichardata = rffi.cast(WCHAR_INTP, cdata)
- return self.space.newint(unichardata[0])
+ if self.is_signed_wchar:
+ value = misc.read_raw_long_data(cdata, self.size)
+ return self.space.newint(value)
+ else:
+ value = misc.read_raw_ulong_data(cdata, self.size)
+ if self.size < rffi.sizeof(lltype.Signed):
+ return self.space.newint(intmask(value))
+ else:
+ return self.space.newint(value) # r_uint => 'long' object
def convert_to_object(self, cdata):
- unichardata = rffi.cast(rffi.CWCHARP, cdata)
- return self.space.newunicode(unichardata[0])
+ if self.is_signed_wchar:
+ unichardata = rffi.cast(rffi.CWCHARP, cdata)
+ return self.space.newunicode(unichardata[0])
+ else:
+ value = misc.read_raw_ulong_data(cdata, self.size) # r_uint
+ try:
+ u = wchar_helper.ordinal_to_unicode(value)
+ except wchar_helper.OutOfRange as e:
+ raise oefmt(self.space.w_ValueError,
+ "char32_t out of range for "
+ "conversion to unicode: %s", hex(e.ordinal))
+ return self.space.newunicode(u)
def string(self, cdataobj, maxlen):
with cdataobj as ptr:
w_res = self.convert_to_object(ptr)
return w_res
- def _convert_to_unichar(self, w_ob):
+ def _convert_to_charN_t(self, w_ob):
+ # returns a r_uint. If self.size == 2, it is smaller than 0x10000
space = self.space
if space.isinstance_w(w_ob, space.w_unicode):
- s = space.unicode_w(w_ob)
- if len(s) == 1:
- return s[0]
- if (isinstance(w_ob, cdataobj.W_CData) and
- isinstance(w_ob.ctype, W_CTypePrimitiveUniChar)):
+ u = space.unicode_w(w_ob)
+ try:
+ ordinal = wchar_helper.unicode_to_ordinal(u)
+ except ValueError:
+ pass
+ else:
+ if self.size == 2 and ordinal > 0xffff:
+ raise self._convert_error("single character <= 0xFFFF",
+ w_ob)
+ return ordinal
+ elif (isinstance(w_ob, cdataobj.W_CData) and
+ isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and
+ w_ob.ctype.size == self.size):
with w_ob as ptr:
- return rffi.cast(rffi.CWCHARP, ptr)[0]
+ return misc.read_raw_ulong_data(ptr, self.size)
raise self._convert_error("unicode string of length 1", w_ob)
def convert_from_object(self, cdata, w_ob):
- value = self._convert_to_unichar(w_ob)
- rffi.cast(rffi.CWCHARP, cdata)[0] = value
+ ordinal = self._convert_to_charN_t(w_ob)
+ misc.write_raw_unsigned_data(cdata, ordinal, self.size)
def unpack_ptr(self, w_ctypeptr, ptr, length):
- u = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length)
+ if self.size == 2:
+ u = wchar_helper.unicode_from_char16(ptr, length)
+ else:
+ try:
+ u = wchar_helper.unicode_from_char32(ptr, length)
+ except wchar_helper.OutOfRange as e:
+ raise oefmt(self.space.w_ValueError,
+ "char32_t out of range for "
+ "conversion to unicode: %s", hex(e.ordinal))
return self.space.newunicode(u)
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -4,9 +4,9 @@
from rpython.rlib import rposix
from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rtyper.annlowlevel import llstr, llunicode
+from rpython.rtyper.annlowlevel import llstr
from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw, copy_unicode_to_raw
+from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw
from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
from pypy.module._cffi_backend import cdataobj, misc, ctypeprim, ctypevoid
@@ -88,18 +88,28 @@
if n != self.length:
cdata[n] = '\x00'
elif isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar):
+ from pypy.module._cffi_backend import wchar_helper
if not space.isinstance_w(w_ob, space.w_unicode):
raise self._convert_error("unicode or list or tuple", w_ob)
s = space.unicode_w(w_ob)
- n = len(s)
+ if self.ctitem.size == 2:
+ n = wchar_helper.unicode_size_as_char16(s)
+ else:
+ n = wchar_helper.unicode_size_as_char32(s)
if self.length >= 0 and n > self.length:
raise oefmt(space.w_IndexError,
"initializer unicode string is too long for '%s' "
"(got %d characters)", self.name, n)
- unichardata = rffi.cast(rffi.CWCHARP, cdata)
- copy_unicode_to_raw(llunicode(s), unichardata, 0, n)
- if n != self.length:
- unichardata[n] = u'\x00'
+ add_final_zero = (n != self.length)
+ if self.ctitem.size == 2:
+ try:
+ wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero)
+ except wchar_helper.OutOfRange as e:
+ raise oefmt(self.space.w_ValueError,
+ "unicode character ouf of range for "
+ "conversion to char16_t: %s", hex(e.ordinal))
+ else:
+ wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero)
else:
raise self._convert_error("list or tuple", w_ob)
@@ -134,12 +144,12 @@
#
# pointer to a wchar_t: builds and returns a unicode
if self.is_unichar_ptr_or_array():
- cdata = rffi.cast(rffi.CWCHARP, ptr)
- if length < 0:
- u = rffi.wcharp2unicode(cdata)
+ from pypy.module._cffi_backend import wchar_helper
+ if self.ctitem.size == 2:
+ length = wchar_helper.measure_length_16(ptr, length)
else:
- u = rffi.wcharp2unicoden(cdata, length)
- return space.newunicode(u)
+ length = wchar_helper.measure_length_32(ptr, length)
+ return self.ctitem.unpack_ptr(self, ptr, length)
#
return W_CType.string(self, cdataobj, maxlen)
@@ -302,9 +312,18 @@
if (space.isinstance_w(w_init, space.w_list) or
space.isinstance_w(w_init, space.w_tuple)):
length = space.int_w(space.len(w_init))
- elif space.isinstance_w(w_init, space.w_basestring):
+ elif space.isinstance_w(w_init, space.w_bytes):
# from a string, we add the null terminator
- length = space.int_w(space.len(w_init)) + 1
+ s = space.bytes_w(w_init)
+ length = len(s) + 1
+ elif space.isinstance_w(w_init, space.w_unicode):
+ from pypy.module._cffi_backend import wchar_helper
+ u = space.unicode_w(w_init)
+ if self.ctitem.size == 2:
+ length = wchar_helper.unicode_size_as_char16(u)
+ else:
+ length = wchar_helper.unicode_size_as_char32(u)
+ length += 1
elif self.is_file:
result = self.prepare_file(w_init)
if result:
diff --git a/pypy/module/_cffi_backend/ctypestruct.py b/pypy/module/_cffi_backend/ctypestruct.py
--- a/pypy/module/_cffi_backend/ctypestruct.py
+++ b/pypy/module/_cffi_backend/ctypestruct.py
@@ -244,7 +244,7 @@
ct = self.ctype
if isinstance(ct, ctypearray.W_CTypeArray) and ct.length < 0:
space = ct.space
- w_ob, varsizelength = misc.get_new_array_length(space, w_ob)
+ w_ob, varsizelength = ct.get_new_array_length(w_ob)
if optvarsize != -1:
# in this mode, the only purpose of this function is to compute
# the real size of the structure from a var-sized C99 array
diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py
--- a/pypy/module/_cffi_backend/misc.py
+++ b/pypy/module/_cffi_backend/misc.py
@@ -290,21 +290,6 @@
# ____________________________________________________________
-def get_new_array_length(space, w_value):
- if (space.isinstance_w(w_value, space.w_list) or
- space.isinstance_w(w_value, space.w_tuple)):
- return (w_value, space.int_w(space.len(w_value)))
- elif space.isinstance_w(w_value, space.w_basestring):
- # from a string, we add the null terminator
- return (w_value, space.int_w(space.len(w_value)) + 1)
- else:
- explicitlength = space.getindex_w(w_value, space.w_OverflowError)
- if explicitlength < 0:
- raise oefmt(space.w_ValueError, "negative array length")
- return (space.w_None, explicitlength)
-
-# ____________________________________________________________
-
@specialize.arg(0)
def _raw_memcopy_tp(TPP, source, dest):
# in its own function: LONGLONG may make the whole function jit-opaque
diff --git a/pypy/module/_cffi_backend/newtype.py b/pypy/module/_cffi_backend/newtype.py
--- a/pypy/module/_cffi_backend/newtype.py
+++ b/pypy/module/_cffi_backend/newtype.py
@@ -111,6 +111,9 @@
eptype("size_t", rffi.SIZE_T, ctypeprim.W_CTypePrimitiveUnsigned)
eptype("ssize_t", rffi.SSIZE_T, ctypeprim.W_CTypePrimitiveSigned)
+eptypesize("char16_t", 2, ctypeprim.W_CTypePrimitiveUniChar)
+eptypesize("char32_t", 4, ctypeprim.W_CTypePrimitiveUniChar)
+
_WCTSigned = ctypeprim.W_CTypePrimitiveSigned
_WCTUnsign = ctypeprim.W_CTypePrimitiveUnsigned
diff --git a/pypy/module/_cffi_backend/realize_c_type.py b/pypy/module/_cffi_backend/realize_c_type.py
--- a/pypy/module/_cffi_backend/realize_c_type.py
+++ b/pypy/module/_cffi_backend/realize_c_type.py
@@ -73,6 +73,8 @@
"uintmax_t",
"float _Complex",
"double _Complex",
+ "char16_t",
+ "char32_t",
]
assert len(NAMES) == cffi_opcode._NUM_PRIM
diff --git a/pypy/module/_cffi_backend/src/parse_c_type.c b/pypy/module/_cffi_backend/src/parse_c_type.c
--- a/pypy/module/_cffi_backend/src/parse_c_type.c
+++ b/pypy/module/_cffi_backend/src/parse_c_type.c
@@ -505,6 +505,7 @@
case '1':
if (size == 8 && !memcmp(p, "uint16", 6)) return _CFFI_PRIM_UINT16;
+ if (size == 8 && !memcmp(p, "char16", 6)) return _CFFI_PRIM_CHAR16;
break;
case '2':
@@ -513,6 +514,7 @@
case '3':
if (size == 8 && !memcmp(p, "uint32", 6)) return _CFFI_PRIM_UINT32;
+ if (size == 8 && !memcmp(p, "char32", 6)) return _CFFI_PRIM_CHAR32;
break;
case '4':
diff --git a/pypy/module/_cffi_backend/src/parse_c_type.h b/pypy/module/_cffi_backend/src/parse_c_type.h
--- a/pypy/module/_cffi_backend/src/parse_c_type.h
+++ b/pypy/module/_cffi_backend/src/parse_c_type.h
@@ -80,8 +80,10 @@
#define _CFFI_PRIM_UINTMAX 47
#define _CFFI_PRIM_FLOATCOMPLEX 48
#define _CFFI_PRIM_DOUBLECOMPLEX 49
+#define _CFFI_PRIM_CHAR16 50
+#define _CFFI_PRIM_CHAR32 51
-#define _CFFI__NUM_PRIM 50
+#define _CFFI__NUM_PRIM 52
#define _CFFI__UNKNOWN_PRIM (-1)
#define _CFFI__UNKNOWN_FLOAT_PRIM (-2)
#define _CFFI__UNKNOWN_LONG_DOUBLE (-3)
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -1925,7 +1925,11 @@
assert string(a, 8).startswith(b'ABC') # may contain additional garbage
def test_string_wchar():
- BWChar = new_primitive_type("wchar_t")
+ for typename in ["wchar_t", "char16_t", "char32_t"]:
+ _test_string_wchar_variant(typename)
+
+def _test_string_wchar_variant(typename):
+ BWChar = new_primitive_type(typename)
assert string(cast(BWChar, 42)) == u+'*'
assert string(cast(BWChar, 0x4253)) == u+'\u4253'
assert string(cast(BWChar, 0)) == u+'\x00'
@@ -2087,22 +2091,44 @@
py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)])
def test_wchar():
- BWChar = new_primitive_type("wchar_t")
+ _test_wchar_variant("wchar_t")
+ if sys.platform.startswith("linux"):
+ BWChar = new_primitive_type("wchar_t")
+ assert sizeof(BWChar) == 4
+ assert int(cast(BWChar, -1)) == -1 # signed, on linux
+
+def test_char16():
+ BChar16 = new_primitive_type("char16_t")
+ assert sizeof(BChar16) == 2
+ _test_wchar_variant("char16_t")
+ assert int(cast(BChar16, -1)) == 0xffff # always unsigned
+
+def test_char32():
+ BChar32 = new_primitive_type("char32_t")
+ assert sizeof(BChar32) == 4
+ _test_wchar_variant("char32_t")
+ assert int(cast(BChar32, -1)) == 0xffffffff # always unsigned
+
+def _test_wchar_variant(typename):
+ BWChar = new_primitive_type(typename)
BInt = new_primitive_type("int")
pyuni4 = {1: True, 2: False}[len(u+'\U00012345')]
wchar4 = {2: False, 4: True}[sizeof(BWChar)]
- assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' %s'E'>" % (
- mandatory_u_prefix,)
- assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' %s'\u1234'>" % (
- mandatory_u_prefix,)
- if wchar4:
- if not _hacked_pypy_uni4():
+ assert str(cast(BWChar, 0x45)) == "<cdata '%s' %s'E'>" % (
+ typename, mandatory_u_prefix)
+ assert str(cast(BWChar, 0x1234)) == "<cdata '%s' %s'\u1234'>" % (
+ typename, mandatory_u_prefix)
+ if not _hacked_pypy_uni4():
+ if wchar4:
x = cast(BWChar, 0x12345)
- assert str(x) == "<cdata 'wchar_t' %s'\U00012345'>" % (
- mandatory_u_prefix,)
+ assert str(x) == "<cdata '%s' %s'\U00012345'>" % (
+ typename, mandatory_u_prefix)
assert int(x) == 0x12345
- else:
- assert not pyuni4
+ else:
+ x = cast(BWChar, 0x18345)
+ assert str(x) == "<cdata '%s' %s'\u8345'>" % (
+ typename, mandatory_u_prefix)
+ assert int(x) == 0x8345
#
BWCharP = new_pointer_type(BWChar)
BStruct = new_struct_type("struct foo_s")
@@ -2117,9 +2143,9 @@
s.a1 = u+'\u1234'
assert s.a1 == u+'\u1234'
if pyuni4:
- assert wchar4
- s.a1 = u+'\U00012345'
- assert s.a1 == u+'\U00012345'
+ if wchar4:
+ s.a1 = u+'\U00012345'
+ assert s.a1 == u+'\U00012345'
elif wchar4:
if not _hacked_pypy_uni4():
s.a1 = cast(BWChar, 0x12345)
@@ -2154,17 +2180,17 @@
py.test.raises(IndexError, 'a[4]')
#
w = cast(BWChar, 'a')
- assert repr(w) == "<cdata 'wchar_t' %s'a'>" % mandatory_u_prefix
+ assert repr(w) == "<cdata '%s' %s'a'>" % (typename, mandatory_u_prefix)
assert str(w) == repr(w)
assert string(w) == u+'a'
assert int(w) == ord('a')
w = cast(BWChar, 0x1234)
- assert repr(w) == "<cdata 'wchar_t' %s'\u1234'>" % mandatory_u_prefix
+ assert repr(w) == "<cdata '%s' %s'\u1234'>" % (typename, mandatory_u_prefix)
assert str(w) == repr(w)
assert string(w) == u+'\u1234'
assert int(w) == 0x1234
w = cast(BWChar, u+'\u8234')
- assert repr(w) == "<cdata 'wchar_t' %s'\u8234'>" % mandatory_u_prefix
+ assert repr(w) == "<cdata '%s' %s'\u8234'>" % (typename, mandatory_u_prefix)
assert str(w) == repr(w)
assert string(w) == u+'\u8234'
assert int(w) == 0x8234
@@ -2172,8 +2198,8 @@
assert repr(w) == "<cdata 'int' 4660>"
if wchar4 and not _hacked_pypy_uni4():
w = cast(BWChar, u+'\U00012345')
- assert repr(w) == "<cdata 'wchar_t' %s'\U00012345'>" % (
- mandatory_u_prefix,)
+ assert repr(w) == "<cdata '%s' %s'\U00012345'>" % (
+ typename, mandatory_u_prefix)
assert str(w) == repr(w)
assert string(w) == u+'\U00012345'
assert int(w) == 0x12345
@@ -2200,7 +2226,7 @@
py.test.raises(RuntimeError, string, q)
#
def cb(p):
- assert repr(p).startswith("<cdata 'wchar_t *' 0x")
+ assert repr(p).startswith("<cdata '%s *' 0x" % typename)
return len(string(p))
BFunc = new_function_type((BWCharP,), BInt, False)
f = callback(BFunc, cb, -42)
@@ -2213,6 +2239,27 @@
x = cast(BWChar, -1)
py.test.raises(ValueError, string, x)
+def test_wchar_variants_mix():
+ BWChar = new_primitive_type("wchar_t")
+ BChar16 = new_primitive_type("char16_t")
+ BChar32 = new_primitive_type("char32_t")
+ assert int(cast(BChar32, cast(BChar16, -2))) == 0xfffe
+ assert int(cast(BWChar, cast(BChar16, -2))) == 0xfffe
+ assert int(cast(BChar16, cast(BChar32, 0x0001f345))) == 0xf345
+ assert int(cast(BChar16, cast(BWChar, 0x0001f345))) == 0xf345
+ #
+ BChar16A = new_array_type(new_pointer_type(BChar16), None)
+ BChar32A = new_array_type(new_pointer_type(BChar32), None)
+ x = cast(BChar32, 'A')
+ py.test.raises(TypeError, newp, BChar16A, [x])
+ x = cast(BChar16, 'A')
+ py.test.raises(TypeError, newp, BChar32A, [x])
+ #
+ a = newp(BChar16A, u+'\U00012345')
+ assert len(a) == 3
+ a = newp(BChar32A, u+'\U00012345')
+ assert len(a) == 2 # even if the Python unicode string above is 2 chars
+
def test_keepalive_struct():
# exception to the no-keepalive rule: p=newp(BStructPtr) returns a
# pointer owning the memory, and p[0] returns a pointer to the
@@ -3439,14 +3486,15 @@
py.test.raises(TypeError, "p[1:5] = u+'XYZT'")
py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]")
#
- BUniChar = new_primitive_type("wchar_t")
- BArray = new_array_type(new_pointer_type(BUniChar), None)
- p = newp(BArray, u+"foobar")
- p[2:5] = [u+"*", u+"Z", u+"T"]
- p[1:3] = u+"XY"
- assert list(p) == [u+"f", u+"X", u+"Y", u+"Z", u+"T", u+"r", u+"\x00"]
- py.test.raises(TypeError, "p[1:5] = b'XYZT'")
- py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]")
+ for typename in ["wchar_t", "char16_t", "char32_t"]:
+ BUniChar = new_primitive_type(typename)
+ BArray = new_array_type(new_pointer_type(BUniChar), None)
+ p = newp(BArray, u+"foobar")
+ p[2:5] = [u+"*", u+"Z", u+"T"]
+ p[1:3] = u+"XY"
+ assert list(p) == [u+"f", u+"X", u+"Y", u+"Z", u+"T", u+"r", u+"\x00"]
+ py.test.raises(TypeError, "p[1:5] = b'XYZT'")
+ py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]")
def test_void_p_arithmetic():
BVoid = new_void_type()
@@ -3759,10 +3807,12 @@
p0 = p
assert unpack(p, 10) == b"abc\x00def\x00\x00\x00"
assert unpack(p+1, 5) == b"bc\x00de"
- BWChar = new_primitive_type("wchar_t")
- BArray = new_array_type(new_pointer_type(BWChar), 10) # wchar_t[10]
- p = newp(BArray, u"abc\x00def")
- assert unpack(p, 10) == u"abc\x00def\x00\x00\x00"
+
+ for typename in ["wchar_t", "char16_t", "char32_t"]:
+ BWChar = new_primitive_type(typename)
+ BArray = new_array_type(new_pointer_type(BWChar), 10) # wchar_t[10]
+ p = newp(BArray, u"abc\x00def")
+ assert unpack(p, 10) == u"abc\x00def\x00\x00\x00"
for typename, samples in [
("uint8_t", [0, 2**8-1]),
diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py
--- a/pypy/module/_cffi_backend/test/test_ffi_obj.py
+++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py
@@ -555,3 +555,11 @@
import _cffi_backend as _cffi1_backend
ffi = _cffi1_backend.FFI()
raises(ffi.error, ffi.cast, "int[-5]", 0)
+
+ def test_char32_t(self):
+ import _cffi_backend as _cffi1_backend
+ ffi = _cffi1_backend.FFI()
+ z = ffi.new("char32_t[]", u'\U00012345')
+ assert len(z) == 2
+ assert ffi.cast("int *", z)[0] == 0x12345
+ assert list(z) == [u'\U00012345', u'\x00'] # maybe a 2-unichars str
diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -0,0 +1,192 @@
+from rpython.rlib.objectmodel import specialize
+from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask
+from rpython.rtyper.annlowlevel import llunicode
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw
+
+SIZE_UNICODE = rffi.sizeof(lltype.UniChar)
+
+
+if SIZE_UNICODE == 4:
+ def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint
+ return unichr(intmask(ordinal))
+else:
+ def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint
+ if ordinal <= 0xffff:
+ return unichr(intmask(ordinal))
+ elif ordinal <= 0x10ffff:
+ ordinal = intmask(ordinal - 0x10000)
+ return (unichr(0xD800 | (ordinal >> 10)) +
+ unichr(0xDC00 | (ordinal & 0x3FF)))
+ else:
+ raise OutOfRange(ordinal)
+
+def is_surrogate(u, index):
+ return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and
+ unichr(0xDC00) <= u[index + 1] <= unichr(0xDFFF))
+
+def as_surrogate(u, index):
+ ordinal = (ord(u[index + 0]) - 0xD800) << 10
+ ordinal |= (ord(u[index + 1]) - 0xDC00)
+ return r_uint(ordinal + 0x10000)
+
+def unicode_to_ordinal(u):
+ if len(u) == 1:
+ u = ord(u[0])
+ return r_uint(u)
+ elif SIZE_UNICODE == 2:
+ if len(u) == 2 and is_surrogate(u, 0):
+ return r_uint(as_surrogate(u, 0))
+ raise ValueError
+
+
+class OutOfRange(Exception):
+ ordinal = 0
+
+ def __init__(self, ordinal):
+ ordinal = intmask(rffi.cast(rffi.INT, ordinal))
+ self.ordinal = ordinal
+
+def _unicode_from_wchar(ptr, length):
+ return rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length)
+
+
+if SIZE_UNICODE == 2:
+ def unicode_from_char32(ptr, length):
+ # 'ptr' is a pointer to 'length' 32-bit integers
+ ptr = rffi.cast(rffi.UINTP, ptr)
+ alloc = length
+ for i in range(length):
+ if rffi.cast(lltype.Unsigned, ptr[i]) > 0xFFFF:
+ alloc += 1
+
+ u = [u'\x00'] * alloc
+ j = 0
+ for i in range(length):
+ ordinal = rffi.cast(lltype.Unsigned, ptr[i])
+ if ordinal > 0xFFFF:
+ if ordinal > 0x10FFFF:
+ raise OutOfRange(ordinal)
+ ordinal = intmask(ordinal - 0x10000)
+ u[j] = unichr(0xD800 | (ordinal >> 10))
+ j += 1
+ u[j] = unichr(0xDC00 | (ordinal & 0x3FF))
+ j += 1
+ else:
+ u[j] = unichr(intmask(ordinal))
+ j += 1
+ assert j == len(u)
+ return u''.join(u)
+
+ unicode_from_char16 = _unicode_from_wchar
+
+else:
+ unicode_from_char32 = _unicode_from_wchar
+
+ def unicode_from_char16(ptr, length):
+ # 'ptr' is a pointer to 'length' 16-bit integers
+ ptr = rffi.cast(rffi.USHORTP, ptr)
+ u = [u'\x00'] * length
+ i = 0
+ j = 0
+ while j < length:
+ ch = intmask(ptr[j])
+ j += 1
+ if 0xD800 <= ch <= 0xDBFF and j < length:
+ ch2 = intmask(ptr[j])
+ if 0xDC00 <= ch2 <= 0xDFFF:
+ ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000
+ j += 1
+ u[i] = unichr(ch)
+ i += 1
+ del u[i:]
+ return u''.join(u)
+
+
+ at specialize.ll()
+def _measure_length(ptr, maxlen):
+ result = 0
+ if maxlen < 0:
+ while intmask(ptr[result]) != 0:
+ result += 1
+ else:
+ while result < maxlen and intmask(ptr[result]) != 0:
+ result += 1
+ return result
+
+def measure_length_16(ptr, maxlen=-1):
+ return _measure_length(rffi.cast(rffi.USHORTP, ptr), maxlen)
+
+def measure_length_32(ptr, maxlen=-1):
+ return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen)
+
+
+def unicode_size_as_char16(u):
+ result = len(u)
+ if SIZE_UNICODE == 4:
+ for i in range(result):
+ if ord(u[i]) > 0xFFFF:
+ result += 1
+ return result
+
+def unicode_size_as_char32(u):
+ result = len(u)
+ if SIZE_UNICODE == 2 and result > 1:
+ for i in range(result - 1):
+ if is_surrogate(u, i):
+ result -= 1
+ return result
+
+
+def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero):
+ # 'target_ptr' is a raw pointer to 'target_length' wchars;
+ # we assume here that target_length == len(u).
+ unichardata = rffi.cast(rffi.CWCHARP, target_ptr)
+ copy_unicode_to_raw(llunicode(u), unichardata, 0, target_length)
+ if add_final_zero:
+ unichardata[target_length] = u'\x00'
+
+
+if SIZE_UNICODE == 2:
+ def unicode_to_char32(u, target_ptr, target_length, add_final_zero):
+ # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers;
+ # we assume here that target_length == unicode_size_as_char32(u).
+ ptr = rffi.cast(rffi.UINTP, target_ptr)
+ src_index = 0
+ last_surrogate_pos = len(u) - 2
+ for i in range(target_length):
+ if src_index <= last_surrogate_pos and is_surrogate(u, src_index):
+ ordinal = as_surrogate(u, src_index)
+ src_index += 2
+ else:
+ ordinal = r_uint(ord(u[src_index]))
+ src_index += 1
+ ptr[i] = rffi.cast(rffi.UINT, ordinal)
+ if add_final_zero:
+ ptr[target_length] = rffi.cast(rffi.UINT, 0)
+
+ unicode_to_char16 = _unicode_to_wchar
+
+else:
+ unicode_to_char32 = _unicode_to_wchar
+
+ def unicode_to_char16(u, target_ptr, target_length, add_final_zero):
+ # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers;
+ # we assume here that target_length == unicode_size_as_char16(u).
+ ptr = rffi.cast(rffi.USHORTP, target_ptr)
+ for uc in u:
+ ordinal = ord(uc)
+ if ordinal > 0xFFFF:
+ if ordinal > 0x10FFFF:
+ raise OutOfRange(ordinal)
+ ordinal -= 0x10000
+ ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10))
+ ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF))
+ ptr = rffi.ptradd(ptr, 2)
+ else:
+ ptr[0] = rffi.cast(rffi.USHORT, ordinal)
+ ptr = rffi.ptradd(ptr, 1)
+ assert ptr == (
+ rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length))
+ if add_final_zero:
+ ptr[0] = rffi.cast(rffi.USHORT, 0)
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
@@ -2,6 +2,7 @@
import py, sys, platform
import pytest
from pypy.module.test_lib_pypy.cffi_tests.cffi0 import backend_tests, test_function, test_ownlib
+from pypy.module.test_lib_pypy.cffi_tests.support import u
from cffi import FFI
import _cffi_backend
@@ -398,6 +399,8 @@
"double",
"long double",
"wchar_t",
+ "char16_t",
+ "char32_t",
"_Bool",
"int8_t",
"uint8_t",
@@ -509,3 +512,43 @@
py.test.raises(TypeError, cd)
py.test.raises(TypeError, cd, ffi.NULL)
py.test.raises(TypeError, cd, ffi.typeof("void *"))
+
+ def test_explicitly_defined_char16_t(self):
+ ffi = FFI()
+ ffi.cdef("typedef uint16_t char16_t;")
+ x = ffi.cast("char16_t", 1234)
+ assert ffi.typeof(x) is ffi.typeof("uint16_t")
+
+ def test_char16_t(self):
+ ffi = FFI()
+ x = ffi.new("char16_t[]", 5)
+ assert len(x) == 5 and ffi.sizeof(x) == 10
+ x[2] = u+'\u1324'
+ assert x[2] == u+'\u1324'
+ y = ffi.new("char16_t[]", u+'\u1234\u5678')
+ assert len(y) == 3
+ assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+ assert ffi.string(y) == u+'\u1234\u5678'
+ z = ffi.new("char16_t[]", u+'\U00012345')
+ assert len(z) == 3
+ assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00']
+ assert ffi.string(z) == u+'\U00012345'
+
+ def test_char32_t(self):
+ ffi = FFI()
+ x = ffi.new("char32_t[]", 5)
+ assert len(x) == 5 and ffi.sizeof(x) == 20
+ x[3] = u+'\U00013245'
+ assert x[3] == u+'\U00013245'
+ y = ffi.new("char32_t[]", u+'\u1234\u5678')
+ assert len(y) == 3
+ assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+ py_uni = u+'\U00012345'
+ z = ffi.new("char32_t[]", py_uni)
+ assert len(z) == 2
+ assert list(z) == [py_uni, u+'\x00'] # maybe a 2-unichars string
+ assert ffi.string(z) == py_uni
+ if len(py_uni) == 1: # 4-bytes unicodes in Python
+ s = ffi.new("char32_t[]", u+'\ud808\udf00')
+ assert len(s) == 3
+ assert list(s) == [u+'\ud808', u+'\udf00', u+'\x00']
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py
@@ -3,6 +3,7 @@
import subprocess, weakref
from cffi import FFI
from cffi.backend_ctypes import CTypesBackend
+from pypy.module.test_lib_pypy.cffi_tests.support import u
SOURCE = """\
@@ -93,6 +94,15 @@
}
EXPORT int my_array[7] = {0, 1, 2, 3, 4, 5, 6};
+
+EXPORT unsigned short foo_2bytes(unsigned short a)
+{
+ return (unsigned short)(a + 42);
+}
+EXPORT unsigned int foo_4bytes(unsigned int a)
+{
+ return (unsigned int)(a + 42);
+}
"""
class TestOwnLib(object):
@@ -301,3 +311,18 @@
pfn = ffi.addressof(lib, "test_getting_errno")
assert ffi.typeof(pfn) == ffi.typeof("int(*)(void)")
assert pfn == lib.test_getting_errno
+
+ def test_char16_char32_t(self):
+ if self.module is None:
+ py.test.skip("fix the auto-generation of the tiny test lib")
+ if self.Backend is CTypesBackend:
+ py.test.skip("not implemented with the ctypes backend")
+ ffi = FFI(backend=self.Backend())
+ ffi.cdef("""
+ char16_t foo_2bytes(char16_t);
+ char32_t foo_4bytes(char32_t);
+ """)
+ lib = ffi.dlopen(self.module)
+ assert lib.foo_2bytes(u+'\u1234') == u+'\u125e'
+ assert lib.foo_4bytes(u+'\u1234') == u+'\u125e'
+ assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f'
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py
@@ -242,7 +242,7 @@
F = tp.is_float_type()
X = tp.is_complex_type()
I = tp.is_integer_type()
- assert C == (typename in ('char', 'wchar_t'))
+ assert C == (typename in ('char', 'wchar_t', 'char16_t', 'char32_t'))
assert F == (typename in ('float', 'double', 'long double'))
assert X == (typename in ('float _Complex', 'double _Complex'))
assert I + F + C + X == 1 # one and only one of them is true
@@ -385,6 +385,10 @@
lib = ffi.verify("wchar_t foo(wchar_t x) { return x+1; }")
assert lib.foo(uniexample1) == uniexample2
+def test_char16_char32_type():
+ py.test.skip("XXX test or fully prevent char16_t and char32_t from "
+ "working in ffi.verify() mode")
+
def test_no_argument():
ffi = FFI()
ffi.cdef("int foo(void);")
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py
@@ -1673,6 +1673,8 @@
"double",
"long double",
"wchar_t",
+ "char16_t",
+ "char32_t",
"_Bool",
"int8_t",
"uint8_t",
@@ -1743,3 +1745,30 @@
exec("from _test_import_from_lib import *", d)
assert (sorted([x for x in d.keys() if not x.startswith('__')]) ==
['ffi', 'lib'])
+
+ def test_char16_t(self):
+ x = ffi.new("char16_t[]", 5)
+ assert len(x) == 5 and ffi.sizeof(x) == 10
+ x[2] = u+'\u1324'
+ assert x[2] == u+'\u1324'
+ y = ffi.new("char16_t[]", u+'\u1234\u5678')
+ assert len(y) == 3
+ assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+ assert ffi.string(y) == u+'\u1234\u5678'
+ z = ffi.new("char16_t[]", u+'\U00012345')
+ assert len(z) == 3
+ assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00']
+ assert ffi.string(z) == u+'\U00012345'
+
+ def test_char32_t(self):
+ x = ffi.new("char32_t[]", 5)
+ assert len(x) == 5 and ffi.sizeof(x) == 20
+ x[3] = u+'\U00013245'
+ assert x[3] == u+'\U00013245'
+ y = ffi.new("char32_t[]", u+'\u1234\u5678')
+ assert len(y) == 3
+ assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+ z = ffi.new("char32_t[]", u+'\U00012345')
+ assert len(z) == 2
+ assert list(z) == [u+'\U00012345', u+'\x00'] # maybe a 2-unichars strin
+ assert ffi.string(z) == u+'\U00012345'
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
@@ -25,13 +25,14 @@
assert ''.join(map(str, recomp.cffi_types)) == expected_output
def verify(ffi, module_name, source, *args, **kwds):
+ no_cpp = kwds.pop('no_cpp', False)
kwds.setdefault('undef_macros', ['NDEBUG'])
module_name = '_CFFI_' + module_name
ffi.set_source(module_name, source)
- if not os.environ.get('NO_CPP'): # test the .cpp mode too
+ if not os.environ.get('NO_CPP') and not no_cpp: # test the .cpp mode too
kwds.setdefault('source_extension', '.cpp')
source = 'extern "C" {\n%s\n}' % (source,)
- else:
+ elif sys.platform != 'win32':
# add '-Werror' to the existing 'extra_compile_args' flags
kwds['extra_compile_args'] = (kwds.get('extra_compile_args', []) +
['-Werror'])
@@ -2010,7 +2011,7 @@
lib = verify(ffi, "test_function_returns_float_complex", """
#include <complex.h>
static float _Complex f1(float a, float b) { return a + I*2.0*b; }
- """)
+ """, no_cpp=True) # <complex.h> fails on some systems with C++
result = lib.f1(1.25, 5.1)
assert type(result) == complex
assert result.real == 1.25 # exact
@@ -2024,7 +2025,7 @@
lib = verify(ffi, "test_function_returns_double_complex", """
#include <complex.h>
static double _Complex f1(double a, double b) { return a + I*2.0*b; }
- """)
+ """, no_cpp=True) # <complex.h> fails on some systems with C++
result = lib.f1(1.25, 5.1)
assert type(result) == complex
assert result.real == 1.25 # exact
@@ -2038,7 +2039,7 @@
lib = verify(ffi, "test_function_argument_float_complex", """
#include <complex.h>
static float f1(float _Complex x) { return cabsf(x); }
- """)
+ """, no_cpp=True) # <complex.h> fails on some systems with C++
x = complex(12.34, 56.78)
result = lib.f1(x)
assert abs(result - abs(x)) < 1e-5
@@ -2051,7 +2052,7 @@
lib = verify(ffi, "test_function_argument_double_complex", """
#include <complex.h>
static double f1(double _Complex x) { return cabs(x); }
- """)
+ """, no_cpp=True) # <complex.h> fails on some systems with C++
x = complex(12.34, 56.78)
result = lib.f1(x)
assert abs(result - abs(x)) < 1e-11
@@ -2251,3 +2252,34 @@
int f(int a) { return a + 40; }
""", extra_compile_args=['-fvisibility=hidden'])
assert lib.f(2) == 42
+
+def test_override_default_definition():
+ ffi = FFI()
+ ffi.cdef("typedef long int16_t, char16_t;")
+ lib = verify(ffi, "test_override_default_definition", "")
+ assert ffi.typeof("int16_t") is ffi.typeof("char16_t") is ffi.typeof("long")
+
+def test_char16_char32_type(no_cpp=False):
+ ffi = FFI()
+ ffi.cdef("""
+ char16_t foo_2bytes(char16_t);
+ char32_t foo_4bytes(char32_t);
+ """)
+ lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """
+ #if !defined(__cplusplus) || __cplusplus < 201103L
+ typedef uint_least16_t char16_t;
+ typedef uint_least32_t char32_t;
+ #endif
+
+ char16_t foo_2bytes(char16_t a) { return (char16_t)(a + 42); }
+ char32_t foo_4bytes(char32_t a) { return (char32_t)(a + 42); }
+ """, no_cpp=no_cpp)
+ assert lib.foo_2bytes(u+'\u1234') == u+'\u125e'
+ assert lib.foo_4bytes(u+'\u1234') == u+'\u125e'
+ assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f'
+ py.test.raises(TypeError, lib.foo_2bytes, u+'\U00012345')
+ py.test.raises(TypeError, lib.foo_2bytes, 1234)
+ py.test.raises(TypeError, lib.foo_4bytes, 1234)
+
+def test_char16_char32_plain_c():
+ test_char16_char32_type(no_cpp=True)
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py
@@ -222,7 +222,7 @@
F = tp.is_float_type()
X = tp.is_complex_type()
I = tp.is_integer_type()
- assert C == (typename in ('char', 'wchar_t'))
+ assert C == (typename in ('char', 'wchar_t', 'char16_t', 'char32_t'))
assert F == (typename in ('float', 'double', 'long double'))
assert X == (typename in ('float _Complex', 'double _Complex'))
assert I + F + C + X == 1 # one and only one of them is true
More information about the pypy-commit
mailing list