[pypy-commit] pypy default: hg merge cffi-char16-char32

arigo pypy.commits at gmail.com
Mon Jun 5 02:25:20 EDT 2017


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r91518:6a4af0b6b51c
Date: 2017-06-05 08:24 +0200
http://bitbucket.org/pypy/pypy/changeset/6a4af0b6b51c/

Log:	hg merge cffi-char16-char32

	Support the char16_t and char32_t types in cffi. This means
	reintroducing some surrogate handling in one of the two directions,
	depending on the size of unichar.

diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h
--- a/lib_pypy/cffi/_cffi_include.h
+++ b/lib_pypy/cffi/_cffi_include.h
@@ -159,9 +159,9 @@
 #define _cffi_from_c_struct                                              \
     ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[18])
 #define _cffi_to_c_wchar_t                                               \
-    ((wchar_t(*)(PyObject *))_cffi_exports[19])
+    ((_cffi_wchar_t(*)(PyObject *))_cffi_exports[19])
 #define _cffi_from_c_wchar_t                                             \
-    ((PyObject *(*)(wchar_t))_cffi_exports[20])
+    ((PyObject *(*)(_cffi_wchar_t))_cffi_exports[20])
 #define _cffi_to_c_long_double                                           \
     ((long double(*)(PyObject *))_cffi_exports[21])
 #define _cffi_to_c__Bool                                                 \
@@ -174,7 +174,11 @@
 #define _CFFI_CPIDX  25
 #define _cffi_call_python                                                \
     ((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX])
-#define _CFFI_NUM_EXPORTS 26
+#define _cffi_to_c_wchar3216_t                                           \
+    ((int(*)(PyObject *))_cffi_exports[26])
+#define _cffi_from_c_wchar3216_t                                         \
+    ((PyObject *(*)(int))_cffi_exports[27])
+#define _CFFI_NUM_EXPORTS 28
 
 struct _cffi_ctypedescr;
 
@@ -215,6 +219,46 @@
     return NULL;
 }
 
+
+#ifdef HAVE_WCHAR_H
+typedef wchar_t _cffi_wchar_t;
+#else
+typedef uint16_t _cffi_wchar_t;   /* same random pick as _cffi_backend.c */
+#endif
+
+_CFFI_UNUSED_FN static uint16_t _cffi_to_c_char16_t(PyObject *o)
+{
+    if (sizeof(_cffi_wchar_t) == 2)
+        return (uint16_t)_cffi_to_c_wchar_t(o);
+    else
+        return (uint16_t)_cffi_to_c_wchar3216_t(o);
+}
+
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x)
+{
+    if (sizeof(_cffi_wchar_t) == 2)
+        return _cffi_from_c_wchar_t(x);
+    else
+        return _cffi_from_c_wchar3216_t(x);
+}
+
+_CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o)
+{
+    if (sizeof(_cffi_wchar_t) == 4)
+        return (int)_cffi_to_c_wchar_t(o);
+    else
+        return (int)_cffi_to_c_wchar3216_t(o);
+}
+
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x)
+{
+    if (sizeof(_cffi_wchar_t) == 4)
+        return _cffi_from_c_wchar_t(x);
+    else
+        return _cffi_from_c_wchar3216_t(x);
+}
+
+
 /**********  end CPython-specific section  **********/
 #else
 _CFFI_UNUSED_FN
diff --git a/lib_pypy/cffi/cffi_opcode.py b/lib_pypy/cffi/cffi_opcode.py
--- a/lib_pypy/cffi/cffi_opcode.py
+++ b/lib_pypy/cffi/cffi_opcode.py
@@ -107,9 +107,10 @@
 PRIM_UINTMAX       = 47
 PRIM_FLOATCOMPLEX  = 48
 PRIM_DOUBLECOMPLEX = 49
+PRIM_CHAR16        = 50
+PRIM_CHAR32        = 51
 
-
-_NUM_PRIM          = 50
+_NUM_PRIM          = 52
 _UNKNOWN_PRIM          = -1
 _UNKNOWN_FLOAT_PRIM    = -2
 _UNKNOWN_LONG_DOUBLE   = -3
@@ -135,6 +136,8 @@
     'double _Complex':    PRIM_DOUBLECOMPLEX,
     '_Bool':              PRIM_BOOL,
     'wchar_t':            PRIM_WCHAR,
+    'char16_t':           PRIM_CHAR16,
+    'char32_t':           PRIM_CHAR32,
     'int8_t':             PRIM_INT8,
     'uint8_t':            PRIM_UINT8,
     'int16_t':            PRIM_INT16,
diff --git a/lib_pypy/cffi/model.py b/lib_pypy/cffi/model.py
--- a/lib_pypy/cffi/model.py
+++ b/lib_pypy/cffi/model.py
@@ -122,6 +122,8 @@
         '_Bool':              'i',
         # the following types are not primitive in the C sense
         'wchar_t':            'c',
+        'char16_t':           'c',
+        'char32_t':           'c',
         'int8_t':             'i',
         'uint8_t':            'i',
         'int16_t':            'i',
diff --git a/lib_pypy/cffi/parse_c_type.h b/lib_pypy/cffi/parse_c_type.h
--- a/lib_pypy/cffi/parse_c_type.h
+++ b/lib_pypy/cffi/parse_c_type.h
@@ -81,8 +81,10 @@
 #define _CFFI_PRIM_UINTMAX      47
 #define _CFFI_PRIM_FLOATCOMPLEX 48
 #define _CFFI_PRIM_DOUBLECOMPLEX 49
+#define _CFFI_PRIM_CHAR16       50
+#define _CFFI_PRIM_CHAR32       51
 
-#define _CFFI__NUM_PRIM         50
+#define _CFFI__NUM_PRIM         52
 #define _CFFI__UNKNOWN_PRIM           (-1)
 #define _CFFI__UNKNOWN_FLOAT_PRIM     (-2)
 #define _CFFI__UNKNOWN_LONG_DOUBLE    (-3)
diff --git a/lib_pypy/cffi/recompiler.py b/lib_pypy/cffi/recompiler.py
--- a/lib_pypy/cffi/recompiler.py
+++ b/lib_pypy/cffi/recompiler.py
@@ -3,8 +3,9 @@
 from .error import VerificationError
 from .cffi_opcode import *
 
-VERSION = "0x2601"
-VERSION_EMBEDDED = "0x2701"
+VERSION_BASE = 0x2601
+VERSION_EMBEDDED = 0x2701
+VERSION_CHAR16CHAR32 = 0x2801
 
 
 class GlobalExpr:
@@ -126,6 +127,10 @@
         self.ffi = ffi
         self.module_name = module_name
         self.target_is_python = target_is_python
+        self._version = VERSION_BASE
+
+    def needs_version(self, ver):
+        self._version = max(self._version, ver)
 
     def collect_type_table(self):
         self._typesdict = {}
@@ -304,9 +309,7 @@
             prnt('#endif')
             lines = self._rel_readlines('_embedding.h')
             prnt(''.join(lines))
-            version = VERSION_EMBEDDED
-        else:
-            version = VERSION
+            self.needs_version(VERSION_EMBEDDED)
         #
         # then paste the C source given by the user, verbatim.
         prnt('/************************************************************/')
@@ -405,7 +408,7 @@
             prnt('        _cffi_call_python_org = '
                  '(void(*)(struct _cffi_externpy_s *, char *))p[1];')
             prnt('    }')
-        prnt('    p[0] = (const void *)%s;' % version)
+        prnt('    p[0] = (const void *)0x%x;' % self._version)
         prnt('    p[1] = &_cffi_type_context;')
         prnt('}')
         # on Windows, distutils insists on putting init_cffi_xyz in
@@ -423,21 +426,22 @@
         prnt('PyMODINIT_FUNC')
         prnt('PyInit_%s(void)' % (base_module_name,))
         prnt('{')
-        prnt('  return _cffi_init("%s", %s, &_cffi_type_context);' % (
-            self.module_name, version))
+        prnt('  return _cffi_init("%s", 0x%x, &_cffi_type_context);' % (
+            self.module_name, self._version))
         prnt('}')
         prnt('#else')
         prnt('PyMODINIT_FUNC')
         prnt('init%s(void)' % (base_module_name,))
         prnt('{')
-        prnt('  _cffi_init("%s", %s, &_cffi_type_context);' % (
-            self.module_name, version))
+        prnt('  _cffi_init("%s", 0x%x, &_cffi_type_context);' % (
+            self.module_name, self._version))
         prnt('}')
         prnt('#endif')
         prnt()
         prnt('#ifdef __GNUC__')
         prnt('#  pragma GCC visibility pop')
         prnt('#endif')
+        self._version = None
 
     def _to_py(self, x):
         if isinstance(x, str):
@@ -476,7 +480,8 @@
             prnt('from %s import ffi as _ffi%d' % (included_module_name, i))
         prnt()
         prnt("ffi = _cffi_backend.FFI('%s'," % (self.module_name,))
-        prnt("    _version = %s," % (VERSION,))
+        prnt("    _version = 0x%x," % (self._version,))
+        self._version = None
         #
         # the '_types' keyword argument
         self.cffi_types = tuple(self.cffi_types)    # don't change any more
@@ -515,8 +520,11 @@
                 # double' here, and _cffi_to_c_double would loose precision
                 converter = '(%s)_cffi_to_c_double' % (tp.get_c_name(''),)
             else:
-                converter = '(%s)_cffi_to_c_%s' % (tp.get_c_name(''),
+                cname = tp.get_c_name('')
+                converter = '(%s)_cffi_to_c_%s' % (cname,
                                                    tp.name.replace(' ', '_'))
+                if cname in ('char16_t', 'char32_t'):
+                    self.needs_version(VERSION_CHAR16CHAR32)
             errvalue = '-1'
         #
         elif isinstance(tp, model.PointerType):
@@ -573,7 +581,10 @@
             elif isinstance(tp, model.UnknownFloatType):
                 return '_cffi_from_c_double(%s)' % (var,)
             elif tp.name != 'long double' and not tp.is_complex_type():
-                return '_cffi_from_c_%s(%s)' % (tp.name.replace(' ', '_'), var)
+                cname = tp.name.replace(' ', '_')
+                if cname in ('char16_t', 'char32_t'):
+                    self.needs_version(VERSION_CHAR16CHAR32)
+                return '_cffi_from_c_%s(%s)' % (cname, var)
             else:
                 return '_cffi_from_c_deref((char *)&%s, _cffi_type(%d))' % (
                     var, self._gettypenum(tp))
diff --git a/lib_pypy/cffi/vengine_cpy.py b/lib_pypy/cffi/vengine_cpy.py
--- a/lib_pypy/cffi/vengine_cpy.py
+++ b/lib_pypy/cffi/vengine_cpy.py
@@ -808,7 +808,8 @@
 #include <stddef.h>
 
 /* this block of #ifs should be kept exactly identical between
-   c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */
+   c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py
+   and cffi/_cffi_include.h */
 #if defined(_MSC_VER)
 # include <malloc.h>   /* for alloca() */
 # if _MSC_VER < 1600   /* MSVC < 2010 */
@@ -842,11 +843,13 @@
 #  include <stdint.h>
 # endif
 # if _MSC_VER < 1800   /* MSVC < 2013 */
-   typedef unsigned char _Bool;
+#  ifndef __cplusplus
+    typedef unsigned char _Bool;
+#  endif
 # endif
 #else
 # include <stdint.h>
-# if (defined (__SVR4) && defined (__sun)) || defined(_AIX)
+# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux)
 #  include <alloca.h>
 # endif
 #endif
diff --git a/lib_pypy/cffi/vengine_gen.py b/lib_pypy/cffi/vengine_gen.py
--- a/lib_pypy/cffi/vengine_gen.py
+++ b/lib_pypy/cffi/vengine_gen.py
@@ -627,7 +627,8 @@
 #include <sys/types.h>   /* XXX for ssize_t on some platforms */
 
 /* this block of #ifs should be kept exactly identical between
-   c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */
+   c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py
+   and cffi/_cffi_include.h */
 #if defined(_MSC_VER)
 # include <malloc.h>   /* for alloca() */
 # if _MSC_VER < 1600   /* MSVC < 2010 */
@@ -661,11 +662,13 @@
 #  include <stdint.h>
 # endif
 # if _MSC_VER < 1800   /* MSVC < 2013 */
-   typedef unsigned char _Bool;
+#  ifndef __cplusplus
+    typedef unsigned char _Bool;
+#  endif
 # endif
 #else
 # include <stdint.h>
-# if (defined (__SVR4) && defined (__sun)) || defined(_AIX)
+# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux)
 #  include <alloca.h>
 # endif
 #endif
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -6,5 +6,6 @@
 .. startrev: 558bd00b3dd8
 
 .. branch: cffi-complex
+.. branch: cffi-char16-char32
 
-Part of the upgrade to cffi 1.11
+The two ``cffi-*`` branches are part of the upgrade to cffi 1.11.
diff --git a/pypy/module/_cffi_backend/cffi1_module.py b/pypy/module/_cffi_backend/cffi1_module.py
--- a/pypy/module/_cffi_backend/cffi1_module.py
+++ b/pypy/module/_cffi_backend/cffi1_module.py
@@ -9,7 +9,7 @@
 
 
 VERSION_MIN    = 0x2601
-VERSION_MAX    = 0x27FF
+VERSION_MAX    = 0x28FF
 
 VERSION_EXPORT = 0x0A03
 
diff --git a/pypy/module/_cffi_backend/cffi_opcode.py b/pypy/module/_cffi_backend/cffi_opcode.py
--- a/pypy/module/_cffi_backend/cffi_opcode.py
+++ b/pypy/module/_cffi_backend/cffi_opcode.py
@@ -107,8 +107,10 @@
 PRIM_UINTMAX       = 47
 PRIM_FLOATCOMPLEX  = 48
 PRIM_DOUBLECOMPLEX = 49
+PRIM_CHAR16        = 50
+PRIM_CHAR32        = 51
 
-_NUM_PRIM          = 50
+_NUM_PRIM          = 52
 _UNKNOWN_PRIM          = -1
 _UNKNOWN_FLOAT_PRIM    = -2
 _UNKNOWN_LONG_DOUBLE   = -3
@@ -131,8 +133,12 @@
     'float':              PRIM_FLOAT,
     'double':             PRIM_DOUBLE,
     'long double':        PRIM_LONGDOUBLE,
+    'float _Complex':     PRIM_FLOATCOMPLEX,
+    'double _Complex':    PRIM_DOUBLECOMPLEX,
     '_Bool':              PRIM_BOOL,
     'wchar_t':            PRIM_WCHAR,
+    'char16_t':           PRIM_CHAR16,
+    'char32_t':           PRIM_CHAR32,
     'int8_t':             PRIM_INT8,
     'uint8_t':            PRIM_UINT8,
     'int16_t':            PRIM_INT16,
diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py
--- a/pypy/module/_cffi_backend/ctypearray.py
+++ b/pypy/module/_cffi_backend/ctypearray.py
@@ -36,8 +36,7 @@
         datasize = self.size
         #
         if datasize < 0:
-            from pypy.module._cffi_backend import misc
-            w_init, length = misc.get_new_array_length(space, w_init)
+            w_init, length = self.get_new_array_length(w_init)
             try:
                 datasize = ovfcheck(length * self.ctitem.size)
             except OverflowError:
@@ -53,6 +52,29 @@
                 self.convert_from_object(ptr, w_init)
         return cdata
 
+    def get_new_array_length(self, w_value):
+        space = self.space
+        if (space.isinstance_w(w_value, space.w_list) or
+            space.isinstance_w(w_value, space.w_tuple)):
+            return (w_value, space.int_w(space.len(w_value)))
+        elif space.isinstance_w(w_value, space.w_bytes):
+            # from a string, we add the null terminator
+            s = space.bytes_w(w_value)
+            return (w_value, len(s) + 1)
+        elif space.isinstance_w(w_value, space.w_unicode):
+            from pypy.module._cffi_backend import wchar_helper
+            u = space.unicode_w(w_value)
+            if self.ctitem.size == 2:
+                length = wchar_helper.unicode_size_as_char16(u)
+            else:
+                length = wchar_helper.unicode_size_as_char32(u)
+            return (w_value, length + 1)
+        else:
+            explicitlength = space.getindex_w(w_value, space.w_OverflowError)
+            if explicitlength < 0:
+                raise oefmt(space.w_ValueError, "negative array length")
+            return (space.w_None, explicitlength)
+
     def _check_subscript_index(self, w_cdata, i):
         space = self.space
         if i < 0:
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -10,7 +10,7 @@
 from rpython.rtyper.tool import rfficache
 
 from pypy.interpreter.error import oefmt
-from pypy.module._cffi_backend import cdataobj, misc
+from pypy.module._cffi_backend import cdataobj, misc, wchar_helper
 from pypy.module._cffi_backend.ctypeobj import W_CType
 
 
@@ -42,11 +42,13 @@
     def cast_unicode(self, w_ob):
         space = self.space
         s = space.unicode_w(w_ob)
-        if len(s) != 1:
+        try:
+            ordinal = wchar_helper.unicode_to_ordinal(s)
+        except ValueError:
             raise oefmt(space.w_TypeError,
                         "cannot cast unicode string of length %d to ctype '%s'",
                         len(s), self.name)
-        return ord(s[0])
+        return intmask(ordinal)
 
     def cast(self, w_ob):
         from pypy.module._cffi_backend import ctypeptr
@@ -148,53 +150,83 @@
         return self.space.newbytes(s)
 
 
-# XXX explicitly use an integer type instead of lltype.UniChar here,
-# because for now the latter is defined as unsigned by RPython (even
-# though it may be signed when 'wchar_t' is written to C).
-WCHAR_INT = {(2, False): rffi.USHORT,
-             (4, False): rffi.UINT,
-             (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar),
-                                  rfficache.signof_c_type('wchar_t')]
-WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT)
+class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
+    _attrs_            = ['is_signed_wchar']
+    _immutable_fields_ = ['is_signed_wchar']
 
-class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar):
-    _attrs_ = []
+    _wchar_is_signed = rfficache.signof_c_type('wchar_t')
 
-    if rffi.r_wchar_t.SIGN:
-        def write_raw_integer_data(self, w_cdata, value):
-            w_cdata.write_raw_signed_data(value)
+    def __init__(self, space, size, name, name_position, align):
+        W_CTypePrimitiveCharOrUniChar.__init__(self, space, size, name,
+                                               name_position, align)
+        self.is_signed_wchar = self._wchar_is_signed and (name == "wchar_t")
+        # "char16_t" and "char32_t" are always unsigned
 
     def cast_to_int(self, cdata):
-        unichardata = rffi.cast(WCHAR_INTP, cdata)
-        return self.space.newint(unichardata[0])
+        if self.is_signed_wchar:
+            value = misc.read_raw_long_data(cdata, self.size)
+            return self.space.newint(value)
+        else:
+            value = misc.read_raw_ulong_data(cdata, self.size)
+            if self.size < rffi.sizeof(lltype.Signed):
+                return self.space.newint(intmask(value))
+            else:
+                return self.space.newint(value)    # r_uint => 'long' object
 
     def convert_to_object(self, cdata):
-        unichardata = rffi.cast(rffi.CWCHARP, cdata)
-        return self.space.newunicode(unichardata[0])
+        if self.is_signed_wchar:
+            unichardata = rffi.cast(rffi.CWCHARP, cdata)
+            return self.space.newunicode(unichardata[0])
+        else:
+            value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
+            try:
+                u = wchar_helper.ordinal_to_unicode(value)
+            except wchar_helper.OutOfRange as e:
+                raise oefmt(self.space.w_ValueError,
+                            "char32_t out of range for "
+                            "conversion to unicode: %s", hex(e.ordinal))
+            return self.space.newunicode(u)
 
     def string(self, cdataobj, maxlen):
         with cdataobj as ptr:
             w_res = self.convert_to_object(ptr)
         return w_res
 
-    def _convert_to_unichar(self, w_ob):
+    def _convert_to_charN_t(self, w_ob):
+        # returns a r_uint.  If self.size == 2, it is smaller than 0x10000
         space = self.space
         if space.isinstance_w(w_ob, space.w_unicode):
-            s = space.unicode_w(w_ob)
-            if len(s) == 1:
-                return s[0]
-        if (isinstance(w_ob, cdataobj.W_CData) and
-               isinstance(w_ob.ctype, W_CTypePrimitiveUniChar)):
+            u = space.unicode_w(w_ob)
+            try:
+                ordinal = wchar_helper.unicode_to_ordinal(u)
+            except ValueError:
+                pass
+            else:
+                if self.size == 2 and ordinal > 0xffff:
+                    raise self._convert_error("single character <= 0xFFFF",
+                                              w_ob)
+                return ordinal
+        elif (isinstance(w_ob, cdataobj.W_CData) and
+               isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and
+               w_ob.ctype.size == self.size):
             with w_ob as ptr:
-                return rffi.cast(rffi.CWCHARP, ptr)[0]
+                return misc.read_raw_ulong_data(ptr, self.size)
         raise self._convert_error("unicode string of length 1", w_ob)
 
     def convert_from_object(self, cdata, w_ob):
-        value = self._convert_to_unichar(w_ob)
-        rffi.cast(rffi.CWCHARP, cdata)[0] = value
+        ordinal = self._convert_to_charN_t(w_ob)
+        misc.write_raw_unsigned_data(cdata, ordinal, self.size)
 
     def unpack_ptr(self, w_ctypeptr, ptr, length):
-        u = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length)
+        if self.size == 2:
+            u = wchar_helper.unicode_from_char16(ptr, length)
+        else:
+            try:
+                u = wchar_helper.unicode_from_char32(ptr, length)
+            except wchar_helper.OutOfRange as e:
+                raise oefmt(self.space.w_ValueError,
+                            "char32_t out of range for "
+                            "conversion to unicode: %s", hex(e.ordinal))
         return self.space.newunicode(u)
 
 
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -4,9 +4,9 @@
 
 from rpython.rlib import rposix
 from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rtyper.annlowlevel import llstr, llunicode
+from rpython.rtyper.annlowlevel import llstr
 from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw, copy_unicode_to_raw
+from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw
 
 from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
 from pypy.module._cffi_backend import cdataobj, misc, ctypeprim, ctypevoid
@@ -88,18 +88,28 @@
             if n != self.length:
                 cdata[n] = '\x00'
         elif isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar):
+            from pypy.module._cffi_backend import wchar_helper
             if not space.isinstance_w(w_ob, space.w_unicode):
                 raise self._convert_error("unicode or list or tuple", w_ob)
             s = space.unicode_w(w_ob)
-            n = len(s)
+            if self.ctitem.size == 2:
+                n = wchar_helper.unicode_size_as_char16(s)
+            else:
+                n = wchar_helper.unicode_size_as_char32(s)
             if self.length >= 0 and n > self.length:
                 raise oefmt(space.w_IndexError,
                             "initializer unicode string is too long for '%s' "
                             "(got %d characters)", self.name, n)
-            unichardata = rffi.cast(rffi.CWCHARP, cdata)
-            copy_unicode_to_raw(llunicode(s), unichardata, 0, n)
-            if n != self.length:
-                unichardata[n] = u'\x00'
+            add_final_zero = (n != self.length)
+            if self.ctitem.size == 2:
+                try:
+                    wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero)
+                except wchar_helper.OutOfRange as e:
+                    raise oefmt(self.space.w_ValueError,
+                                "unicode character ouf of range for "
+                                "conversion to char16_t: %s", hex(e.ordinal))
+            else:
+                wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero)
         else:
             raise self._convert_error("list or tuple", w_ob)
 
@@ -134,12 +144,12 @@
                 #
                 # pointer to a wchar_t: builds and returns a unicode
                 if self.is_unichar_ptr_or_array():
-                    cdata = rffi.cast(rffi.CWCHARP, ptr)
-                    if length < 0:
-                        u = rffi.wcharp2unicode(cdata)
+                    from pypy.module._cffi_backend import wchar_helper
+                    if self.ctitem.size == 2:
+                        length = wchar_helper.measure_length_16(ptr, length)
                     else:
-                        u = rffi.wcharp2unicoden(cdata, length)
-                    return space.newunicode(u)
+                        length = wchar_helper.measure_length_32(ptr, length)
+                    return self.ctitem.unpack_ptr(self, ptr, length)
         #
         return W_CType.string(self, cdataobj, maxlen)
 
@@ -302,9 +312,18 @@
         if (space.isinstance_w(w_init, space.w_list) or
             space.isinstance_w(w_init, space.w_tuple)):
             length = space.int_w(space.len(w_init))
-        elif space.isinstance_w(w_init, space.w_basestring):
+        elif space.isinstance_w(w_init, space.w_bytes):
             # from a string, we add the null terminator
-            length = space.int_w(space.len(w_init)) + 1
+            s = space.bytes_w(w_init)
+            length = len(s) + 1
+        elif space.isinstance_w(w_init, space.w_unicode):
+            from pypy.module._cffi_backend import wchar_helper
+            u = space.unicode_w(w_init)
+            if self.ctitem.size == 2:
+                length = wchar_helper.unicode_size_as_char16(u)
+            else:
+                length = wchar_helper.unicode_size_as_char32(u)
+            length += 1
         elif self.is_file:
             result = self.prepare_file(w_init)
             if result:
diff --git a/pypy/module/_cffi_backend/ctypestruct.py b/pypy/module/_cffi_backend/ctypestruct.py
--- a/pypy/module/_cffi_backend/ctypestruct.py
+++ b/pypy/module/_cffi_backend/ctypestruct.py
@@ -244,7 +244,7 @@
         ct = self.ctype
         if isinstance(ct, ctypearray.W_CTypeArray) and ct.length < 0:
             space = ct.space
-            w_ob, varsizelength = misc.get_new_array_length(space, w_ob)
+            w_ob, varsizelength = ct.get_new_array_length(w_ob)
             if optvarsize != -1:
                 # in this mode, the only purpose of this function is to compute
                 # the real size of the structure from a var-sized C99 array
diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py
--- a/pypy/module/_cffi_backend/misc.py
+++ b/pypy/module/_cffi_backend/misc.py
@@ -290,21 +290,6 @@
 
 # ____________________________________________________________
 
-def get_new_array_length(space, w_value):
-    if (space.isinstance_w(w_value, space.w_list) or
-        space.isinstance_w(w_value, space.w_tuple)):
-        return (w_value, space.int_w(space.len(w_value)))
-    elif space.isinstance_w(w_value, space.w_basestring):
-        # from a string, we add the null terminator
-        return (w_value, space.int_w(space.len(w_value)) + 1)
-    else:
-        explicitlength = space.getindex_w(w_value, space.w_OverflowError)
-        if explicitlength < 0:
-            raise oefmt(space.w_ValueError, "negative array length")
-        return (space.w_None, explicitlength)
-
-# ____________________________________________________________
-
 @specialize.arg(0)
 def _raw_memcopy_tp(TPP, source, dest):
     # in its own function: LONGLONG may make the whole function jit-opaque
diff --git a/pypy/module/_cffi_backend/newtype.py b/pypy/module/_cffi_backend/newtype.py
--- a/pypy/module/_cffi_backend/newtype.py
+++ b/pypy/module/_cffi_backend/newtype.py
@@ -111,6 +111,9 @@
 eptype("size_t",    rffi.SIZE_T,    ctypeprim.W_CTypePrimitiveUnsigned)
 eptype("ssize_t",   rffi.SSIZE_T,   ctypeprim.W_CTypePrimitiveSigned)
 
+eptypesize("char16_t", 2, ctypeprim.W_CTypePrimitiveUniChar)
+eptypesize("char32_t", 4, ctypeprim.W_CTypePrimitiveUniChar)
+
 _WCTSigned = ctypeprim.W_CTypePrimitiveSigned
 _WCTUnsign = ctypeprim.W_CTypePrimitiveUnsigned
 
diff --git a/pypy/module/_cffi_backend/realize_c_type.py b/pypy/module/_cffi_backend/realize_c_type.py
--- a/pypy/module/_cffi_backend/realize_c_type.py
+++ b/pypy/module/_cffi_backend/realize_c_type.py
@@ -73,6 +73,8 @@
         "uintmax_t",
         "float _Complex",
         "double _Complex",
+        "char16_t",
+        "char32_t",
         ]
     assert len(NAMES) == cffi_opcode._NUM_PRIM
 
diff --git a/pypy/module/_cffi_backend/src/parse_c_type.c b/pypy/module/_cffi_backend/src/parse_c_type.c
--- a/pypy/module/_cffi_backend/src/parse_c_type.c
+++ b/pypy/module/_cffi_backend/src/parse_c_type.c
@@ -505,6 +505,7 @@
 
     case '1':
         if (size == 8 && !memcmp(p, "uint16", 6)) return _CFFI_PRIM_UINT16;
+        if (size == 8 && !memcmp(p, "char16", 6)) return _CFFI_PRIM_CHAR16;
         break;
 
     case '2':
@@ -513,6 +514,7 @@
 
     case '3':
         if (size == 8 && !memcmp(p, "uint32", 6)) return _CFFI_PRIM_UINT32;
+        if (size == 8 && !memcmp(p, "char32", 6)) return _CFFI_PRIM_CHAR32;
         break;
 
     case '4':
diff --git a/pypy/module/_cffi_backend/src/parse_c_type.h b/pypy/module/_cffi_backend/src/parse_c_type.h
--- a/pypy/module/_cffi_backend/src/parse_c_type.h
+++ b/pypy/module/_cffi_backend/src/parse_c_type.h
@@ -80,8 +80,10 @@
 #define _CFFI_PRIM_UINTMAX      47
 #define _CFFI_PRIM_FLOATCOMPLEX 48
 #define _CFFI_PRIM_DOUBLECOMPLEX 49
+#define _CFFI_PRIM_CHAR16       50
+#define _CFFI_PRIM_CHAR32       51
 
-#define _CFFI__NUM_PRIM         50
+#define _CFFI__NUM_PRIM         52
 #define _CFFI__UNKNOWN_PRIM           (-1)
 #define _CFFI__UNKNOWN_FLOAT_PRIM     (-2)
 #define _CFFI__UNKNOWN_LONG_DOUBLE    (-3)
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -1925,7 +1925,11 @@
         assert string(a, 8).startswith(b'ABC')  # may contain additional garbage
 
 def test_string_wchar():
-    BWChar = new_primitive_type("wchar_t")
+    for typename in ["wchar_t", "char16_t", "char32_t"]:
+        _test_string_wchar_variant(typename)
+
+def _test_string_wchar_variant(typename):
+    BWChar = new_primitive_type(typename)
     assert string(cast(BWChar, 42)) == u+'*'
     assert string(cast(BWChar, 0x4253)) == u+'\u4253'
     assert string(cast(BWChar, 0)) == u+'\x00'
@@ -2087,22 +2091,44 @@
     py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)])
 
 def test_wchar():
-    BWChar = new_primitive_type("wchar_t")
+    _test_wchar_variant("wchar_t")
+    if sys.platform.startswith("linux"):
+        BWChar = new_primitive_type("wchar_t")
+        assert sizeof(BWChar) == 4
+        assert int(cast(BWChar, -1)) == -1        # signed, on linux
+
+def test_char16():
+    BChar16 = new_primitive_type("char16_t")
+    assert sizeof(BChar16) == 2
+    _test_wchar_variant("char16_t")
+    assert int(cast(BChar16, -1)) == 0xffff       # always unsigned
+
+def test_char32():
+    BChar32 = new_primitive_type("char32_t")
+    assert sizeof(BChar32) == 4
+    _test_wchar_variant("char32_t")
+    assert int(cast(BChar32, -1)) == 0xffffffff   # always unsigned
+
+def _test_wchar_variant(typename):
+    BWChar = new_primitive_type(typename)
     BInt = new_primitive_type("int")
     pyuni4 = {1: True, 2: False}[len(u+'\U00012345')]
     wchar4 = {2: False, 4: True}[sizeof(BWChar)]
-    assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' %s'E'>" % (
-        mandatory_u_prefix,)
-    assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' %s'\u1234'>" % (
-        mandatory_u_prefix,)
-    if wchar4:
-        if not _hacked_pypy_uni4():
+    assert str(cast(BWChar, 0x45)) == "<cdata '%s' %s'E'>" % (
+        typename, mandatory_u_prefix)
+    assert str(cast(BWChar, 0x1234)) == "<cdata '%s' %s'\u1234'>" % (
+        typename, mandatory_u_prefix)
+    if not _hacked_pypy_uni4():
+        if wchar4:
             x = cast(BWChar, 0x12345)
-            assert str(x) == "<cdata 'wchar_t' %s'\U00012345'>" % (
-                mandatory_u_prefix,)
+            assert str(x) == "<cdata '%s' %s'\U00012345'>" % (
+                typename, mandatory_u_prefix)
             assert int(x) == 0x12345
-    else:
-        assert not pyuni4
+        else:
+            x = cast(BWChar, 0x18345)
+            assert str(x) == "<cdata '%s' %s'\u8345'>" % (
+                typename, mandatory_u_prefix)
+            assert int(x) == 0x8345
     #
     BWCharP = new_pointer_type(BWChar)
     BStruct = new_struct_type("struct foo_s")
@@ -2117,9 +2143,9 @@
     s.a1 = u+'\u1234'
     assert s.a1 == u+'\u1234'
     if pyuni4:
-        assert wchar4
-        s.a1 = u+'\U00012345'
-        assert s.a1 == u+'\U00012345'
+        if wchar4:
+            s.a1 = u+'\U00012345'
+            assert s.a1 == u+'\U00012345'
     elif wchar4:
         if not _hacked_pypy_uni4():
             s.a1 = cast(BWChar, 0x12345)
@@ -2154,17 +2180,17 @@
         py.test.raises(IndexError, 'a[4]')
     #
     w = cast(BWChar, 'a')
-    assert repr(w) == "<cdata 'wchar_t' %s'a'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'a'>" % (typename, mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'a'
     assert int(w) == ord('a')
     w = cast(BWChar, 0x1234)
-    assert repr(w) == "<cdata 'wchar_t' %s'\u1234'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'\u1234'>" % (typename, mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'\u1234'
     assert int(w) == 0x1234
     w = cast(BWChar, u+'\u8234')
-    assert repr(w) == "<cdata 'wchar_t' %s'\u8234'>" % mandatory_u_prefix
+    assert repr(w) == "<cdata '%s' %s'\u8234'>" % (typename, mandatory_u_prefix)
     assert str(w) == repr(w)
     assert string(w) == u+'\u8234'
     assert int(w) == 0x8234
@@ -2172,8 +2198,8 @@
     assert repr(w) == "<cdata 'int' 4660>"
     if wchar4 and not _hacked_pypy_uni4():
         w = cast(BWChar, u+'\U00012345')
-        assert repr(w) == "<cdata 'wchar_t' %s'\U00012345'>" % (
-            mandatory_u_prefix,)
+        assert repr(w) == "<cdata '%s' %s'\U00012345'>" % (
+            typename, mandatory_u_prefix)
         assert str(w) == repr(w)
         assert string(w) == u+'\U00012345'
         assert int(w) == 0x12345
@@ -2200,7 +2226,7 @@
     py.test.raises(RuntimeError, string, q)
     #
     def cb(p):
-        assert repr(p).startswith("<cdata 'wchar_t *' 0x")
+        assert repr(p).startswith("<cdata '%s *' 0x" % typename)
         return len(string(p))
     BFunc = new_function_type((BWCharP,), BInt, False)
     f = callback(BFunc, cb, -42)
@@ -2213,6 +2239,27 @@
         x = cast(BWChar, -1)
         py.test.raises(ValueError, string, x)
 
+def test_wchar_variants_mix():
+    BWChar  = new_primitive_type("wchar_t")
+    BChar16 = new_primitive_type("char16_t")
+    BChar32 = new_primitive_type("char32_t")
+    assert int(cast(BChar32, cast(BChar16, -2))) == 0xfffe
+    assert int(cast(BWChar, cast(BChar16, -2))) == 0xfffe
+    assert int(cast(BChar16, cast(BChar32, 0x0001f345))) == 0xf345
+    assert int(cast(BChar16, cast(BWChar, 0x0001f345))) == 0xf345
+    #
+    BChar16A = new_array_type(new_pointer_type(BChar16), None)
+    BChar32A = new_array_type(new_pointer_type(BChar32), None)
+    x = cast(BChar32, 'A')
+    py.test.raises(TypeError, newp, BChar16A, [x])
+    x = cast(BChar16, 'A')
+    py.test.raises(TypeError, newp, BChar32A, [x])
+    #
+    a = newp(BChar16A, u+'\U00012345')
+    assert len(a) == 3
+    a = newp(BChar32A, u+'\U00012345')
+    assert len(a) == 2   # even if the Python unicode string above is 2 chars
+
 def test_keepalive_struct():
     # exception to the no-keepalive rule: p=newp(BStructPtr) returns a
     # pointer owning the memory, and p[0] returns a pointer to the
@@ -3439,14 +3486,15 @@
     py.test.raises(TypeError, "p[1:5] = u+'XYZT'")
     py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]")
     #
-    BUniChar = new_primitive_type("wchar_t")
-    BArray = new_array_type(new_pointer_type(BUniChar), None)
-    p = newp(BArray, u+"foobar")
-    p[2:5] = [u+"*", u+"Z", u+"T"]
-    p[1:3] = u+"XY"
-    assert list(p) == [u+"f", u+"X", u+"Y", u+"Z", u+"T", u+"r", u+"\x00"]
-    py.test.raises(TypeError, "p[1:5] = b'XYZT'")
-    py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]")
+    for typename in ["wchar_t", "char16_t", "char32_t"]:
+        BUniChar = new_primitive_type(typename)
+        BArray = new_array_type(new_pointer_type(BUniChar), None)
+        p = newp(BArray, u+"foobar")
+        p[2:5] = [u+"*", u+"Z", u+"T"]
+        p[1:3] = u+"XY"
+        assert list(p) == [u+"f", u+"X", u+"Y", u+"Z", u+"T", u+"r", u+"\x00"]
+        py.test.raises(TypeError, "p[1:5] = b'XYZT'")
+        py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]")
 
 def test_void_p_arithmetic():
     BVoid = new_void_type()
@@ -3759,10 +3807,12 @@
     p0 = p
     assert unpack(p, 10) == b"abc\x00def\x00\x00\x00"
     assert unpack(p+1, 5) == b"bc\x00de"
-    BWChar = new_primitive_type("wchar_t")
-    BArray = new_array_type(new_pointer_type(BWChar), 10)   # wchar_t[10]
-    p = newp(BArray, u"abc\x00def")
-    assert unpack(p, 10) == u"abc\x00def\x00\x00\x00"
+
+    for typename in ["wchar_t", "char16_t", "char32_t"]:
+        BWChar = new_primitive_type(typename)
+        BArray = new_array_type(new_pointer_type(BWChar), 10)   # wchar_t[10]
+        p = newp(BArray, u"abc\x00def")
+        assert unpack(p, 10) == u"abc\x00def\x00\x00\x00"
 
     for typename, samples in [
             ("uint8_t",  [0, 2**8-1]),
diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py
--- a/pypy/module/_cffi_backend/test/test_ffi_obj.py
+++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py
@@ -555,3 +555,11 @@
         import _cffi_backend as _cffi1_backend
         ffi = _cffi1_backend.FFI()
         raises(ffi.error, ffi.cast, "int[-5]", 0)
+
+    def test_char32_t(self):
+        import _cffi_backend as _cffi1_backend
+        ffi = _cffi1_backend.FFI()
+        z = ffi.new("char32_t[]", u'\U00012345')
+        assert len(z) == 2
+        assert ffi.cast("int *", z)[0] == 0x12345
+        assert list(z) == [u'\U00012345', u'\x00']   # maybe a 2-unichars str
diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -0,0 +1,192 @@
+from rpython.rlib.objectmodel import specialize
+from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask
+from rpython.rtyper.annlowlevel import llunicode
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw
+
+SIZE_UNICODE = rffi.sizeof(lltype.UniChar)
+
+
+if SIZE_UNICODE == 4:
+    def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
+        return unichr(intmask(ordinal))
+else:
+    def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
+        if ordinal <= 0xffff:
+            return unichr(intmask(ordinal))
+        elif ordinal <= 0x10ffff:
+            ordinal = intmask(ordinal - 0x10000)
+            return (unichr(0xD800 | (ordinal >> 10)) +
+                    unichr(0xDC00 | (ordinal & 0x3FF)))
+        else:
+            raise OutOfRange(ordinal)
+
+def is_surrogate(u, index):
+    return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and
+            unichr(0xDC00) <= u[index + 1] <= unichr(0xDFFF))
+
+def as_surrogate(u, index):
+    ordinal = (ord(u[index + 0]) - 0xD800) << 10
+    ordinal |= (ord(u[index + 1]) - 0xDC00)
+    return r_uint(ordinal + 0x10000)
+
+def unicode_to_ordinal(u):
+    if len(u) == 1:
+        u = ord(u[0])
+        return r_uint(u)
+    elif SIZE_UNICODE == 2:
+        if len(u) == 2 and is_surrogate(u, 0):
+            return r_uint(as_surrogate(u, 0))
+    raise ValueError
+
+
+class OutOfRange(Exception):
+    ordinal = 0
+
+    def __init__(self, ordinal):
+        ordinal = intmask(rffi.cast(rffi.INT, ordinal))
+        self.ordinal = ordinal
+
+def _unicode_from_wchar(ptr, length):
+    return rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length)
+
+
+if SIZE_UNICODE == 2:
+    def unicode_from_char32(ptr, length):
+        # 'ptr' is a pointer to 'length' 32-bit integers
+        ptr = rffi.cast(rffi.UINTP, ptr)
+        alloc = length
+        for i in range(length):
+            if rffi.cast(lltype.Unsigned, ptr[i]) > 0xFFFF:
+                alloc += 1
+
+        u = [u'\x00'] * alloc
+        j = 0
+        for i in range(length):
+            ordinal = rffi.cast(lltype.Unsigned, ptr[i])
+            if ordinal > 0xFFFF:
+                if ordinal > 0x10FFFF:
+                    raise OutOfRange(ordinal)
+                ordinal = intmask(ordinal - 0x10000)
+                u[j] = unichr(0xD800 | (ordinal >> 10))
+                j += 1
+                u[j] = unichr(0xDC00 | (ordinal & 0x3FF))
+                j += 1
+            else:
+                u[j] = unichr(intmask(ordinal))
+                j += 1
+        assert j == len(u)
+        return u''.join(u)
+
+    unicode_from_char16 = _unicode_from_wchar
+
+else:
+    unicode_from_char32 = _unicode_from_wchar
+
+    def unicode_from_char16(ptr, length):
+        # 'ptr' is a pointer to 'length' 16-bit integers
+        ptr = rffi.cast(rffi.USHORTP, ptr)
+        u = [u'\x00'] * length
+        i = 0
+        j = 0
+        while j < length:
+            ch = intmask(ptr[j])
+            j += 1
+            if 0xD800 <= ch <= 0xDBFF and j < length:
+                ch2 = intmask(ptr[j])
+                if 0xDC00 <= ch2 <= 0xDFFF:
+                    ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000
+                    j += 1
+            u[i] = unichr(ch)
+            i += 1
+        del u[i:]
+        return u''.join(u)
+
+
+ at specialize.ll()
+def _measure_length(ptr, maxlen):
+    result = 0
+    if maxlen < 0:
+        while intmask(ptr[result]) != 0:
+            result += 1
+    else:
+        while result < maxlen and intmask(ptr[result]) != 0:
+            result += 1
+    return result
+
+def measure_length_16(ptr, maxlen=-1):
+    return _measure_length(rffi.cast(rffi.USHORTP, ptr), maxlen)
+
+def measure_length_32(ptr, maxlen=-1):
+    return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen)
+
+
+def unicode_size_as_char16(u):
+    result = len(u)
+    if SIZE_UNICODE == 4:
+        for i in range(result):
+            if ord(u[i]) > 0xFFFF:
+                result += 1
+    return result
+
+def unicode_size_as_char32(u):
+    result = len(u)
+    if SIZE_UNICODE == 2 and result > 1:
+        for i in range(result - 1):
+            if is_surrogate(u, i):
+                result -= 1
+    return result
+
+
+def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero):
+    # 'target_ptr' is a raw pointer to 'target_length' wchars;
+    # we assume here that target_length == len(u).
+    unichardata = rffi.cast(rffi.CWCHARP, target_ptr)
+    copy_unicode_to_raw(llunicode(u), unichardata, 0, target_length)
+    if add_final_zero:
+        unichardata[target_length] = u'\x00'
+
+
+if SIZE_UNICODE == 2:
+    def unicode_to_char32(u, target_ptr, target_length, add_final_zero):
+        # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers;
+        # we assume here that target_length == unicode_size_as_char32(u).
+        ptr = rffi.cast(rffi.UINTP, target_ptr)
+        src_index = 0
+        last_surrogate_pos = len(u) - 2
+        for i in range(target_length):
+            if src_index <= last_surrogate_pos and is_surrogate(u, src_index):
+                ordinal = as_surrogate(u, src_index)
+                src_index += 2
+            else:
+                ordinal = r_uint(ord(u[src_index]))
+                src_index += 1
+            ptr[i] = rffi.cast(rffi.UINT, ordinal)
+        if add_final_zero:
+            ptr[target_length] = rffi.cast(rffi.UINT, 0)
+
+    unicode_to_char16 = _unicode_to_wchar
+
+else:
+    unicode_to_char32 = _unicode_to_wchar
+
+    def unicode_to_char16(u, target_ptr, target_length, add_final_zero):
+        # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers;
+        # we assume here that target_length == unicode_size_as_char16(u).
+        ptr = rffi.cast(rffi.USHORTP, target_ptr)
+        for uc in u:
+            ordinal = ord(uc)
+            if ordinal > 0xFFFF:
+                if ordinal > 0x10FFFF:
+                    raise OutOfRange(ordinal)
+                ordinal -= 0x10000
+                ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10))
+                ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF))
+                ptr = rffi.ptradd(ptr, 2)
+            else:
+                ptr[0] = rffi.cast(rffi.USHORT, ordinal)
+                ptr = rffi.ptradd(ptr, 1)
+        assert ptr == (
+            rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length))
+        if add_final_zero:
+            ptr[0] = rffi.cast(rffi.USHORT, 0)
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
@@ -2,6 +2,7 @@
 import py, sys, platform
 import pytest
 from pypy.module.test_lib_pypy.cffi_tests.cffi0 import backend_tests, test_function, test_ownlib
+from pypy.module.test_lib_pypy.cffi_tests.support import u
 from cffi import FFI
 import _cffi_backend
 
@@ -398,6 +399,8 @@
             "double",
             "long double",
             "wchar_t",
+            "char16_t",
+            "char32_t",
             "_Bool",
             "int8_t",
             "uint8_t",
@@ -509,3 +512,43 @@
             py.test.raises(TypeError, cd)
             py.test.raises(TypeError, cd, ffi.NULL)
             py.test.raises(TypeError, cd, ffi.typeof("void *"))
+
+    def test_explicitly_defined_char16_t(self):
+        ffi = FFI()
+        ffi.cdef("typedef uint16_t char16_t;")
+        x = ffi.cast("char16_t", 1234)
+        assert ffi.typeof(x) is ffi.typeof("uint16_t")
+
+    def test_char16_t(self):
+        ffi = FFI()
+        x = ffi.new("char16_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 10
+        x[2] = u+'\u1324'
+        assert x[2] == u+'\u1324'
+        y = ffi.new("char16_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        assert ffi.string(y) == u+'\u1234\u5678'
+        z = ffi.new("char16_t[]", u+'\U00012345')
+        assert len(z) == 3
+        assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00']
+        assert ffi.string(z) == u+'\U00012345'
+
+    def test_char32_t(self):
+        ffi = FFI()
+        x = ffi.new("char32_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 20
+        x[3] = u+'\U00013245'
+        assert x[3] == u+'\U00013245'
+        y = ffi.new("char32_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        py_uni = u+'\U00012345'
+        z = ffi.new("char32_t[]", py_uni)
+        assert len(z) == 2
+        assert list(z) == [py_uni, u+'\x00']    # maybe a 2-unichars string
+        assert ffi.string(z) == py_uni
+        if len(py_uni) == 1:    # 4-bytes unicodes in Python
+            s = ffi.new("char32_t[]", u+'\ud808\udf00')
+            assert len(s) == 3
+            assert list(s) == [u+'\ud808', u+'\udf00', u+'\x00']
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py
@@ -3,6 +3,7 @@
 import subprocess, weakref
 from cffi import FFI
 from cffi.backend_ctypes import CTypesBackend
+from pypy.module.test_lib_pypy.cffi_tests.support import u
 
 
 SOURCE = """\
@@ -93,6 +94,15 @@
 }
 
 EXPORT int my_array[7] = {0, 1, 2, 3, 4, 5, 6};
+
+EXPORT unsigned short foo_2bytes(unsigned short a)
+{
+    return (unsigned short)(a + 42);
+}
+EXPORT unsigned int foo_4bytes(unsigned int a)
+{
+    return (unsigned int)(a + 42);
+}
 """
 
 class TestOwnLib(object):
@@ -301,3 +311,18 @@
         pfn = ffi.addressof(lib, "test_getting_errno")
         assert ffi.typeof(pfn) == ffi.typeof("int(*)(void)")
         assert pfn == lib.test_getting_errno
+
+    def test_char16_char32_t(self):
+        if self.module is None:
+            py.test.skip("fix the auto-generation of the tiny test lib")
+        if self.Backend is CTypesBackend:
+            py.test.skip("not implemented with the ctypes backend")
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("""
+            char16_t foo_2bytes(char16_t);
+            char32_t foo_4bytes(char32_t);
+        """)
+        lib = ffi.dlopen(self.module)
+        assert lib.foo_2bytes(u+'\u1234') == u+'\u125e'
+        assert lib.foo_4bytes(u+'\u1234') == u+'\u125e'
+        assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f'
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py
@@ -242,7 +242,7 @@
         F = tp.is_float_type()
         X = tp.is_complex_type()
         I = tp.is_integer_type()
-        assert C == (typename in ('char', 'wchar_t'))
+        assert C == (typename in ('char', 'wchar_t', 'char16_t', 'char32_t'))
         assert F == (typename in ('float', 'double', 'long double'))
         assert X == (typename in ('float _Complex', 'double _Complex'))
         assert I + F + C + X == 1      # one and only one of them is true
@@ -385,6 +385,10 @@
     lib = ffi.verify("wchar_t foo(wchar_t x) { return x+1; }")
     assert lib.foo(uniexample1) == uniexample2
 
+def test_char16_char32_type():
+    py.test.skip("XXX test or fully prevent char16_t and char32_t from "
+                 "working in ffi.verify() mode")
+
 def test_no_argument():
     ffi = FFI()
     ffi.cdef("int foo(void);")
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py
@@ -1673,6 +1673,8 @@
             "double",
             "long double",
             "wchar_t",
+            "char16_t",
+            "char32_t",
             "_Bool",
             "int8_t",
             "uint8_t",
@@ -1743,3 +1745,30 @@
         exec("from _test_import_from_lib import *", d)
         assert (sorted([x for x in d.keys() if not x.startswith('__')]) ==
                 ['ffi', 'lib'])
+
+    def test_char16_t(self):
+        x = ffi.new("char16_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 10
+        x[2] = u+'\u1324'
+        assert x[2] == u+'\u1324'
+        y = ffi.new("char16_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        assert ffi.string(y) == u+'\u1234\u5678'
+        z = ffi.new("char16_t[]", u+'\U00012345')
+        assert len(z) == 3
+        assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00']
+        assert ffi.string(z) == u+'\U00012345'
+
+    def test_char32_t(self):
+        x = ffi.new("char32_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 20
+        x[3] = u+'\U00013245'
+        assert x[3] == u+'\U00013245'
+        y = ffi.new("char32_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        z = ffi.new("char32_t[]", u+'\U00012345')
+        assert len(z) == 2
+        assert list(z) == [u+'\U00012345', u+'\x00'] # maybe a 2-unichars strin
+        assert ffi.string(z) == u+'\U00012345'
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
@@ -25,13 +25,14 @@
     assert ''.join(map(str, recomp.cffi_types)) == expected_output
 
 def verify(ffi, module_name, source, *args, **kwds):
+    no_cpp = kwds.pop('no_cpp', False)
     kwds.setdefault('undef_macros', ['NDEBUG'])
     module_name = '_CFFI_' + module_name
     ffi.set_source(module_name, source)
-    if not os.environ.get('NO_CPP'):     # test the .cpp mode too
+    if not os.environ.get('NO_CPP') and not no_cpp:   # test the .cpp mode too
         kwds.setdefault('source_extension', '.cpp')
         source = 'extern "C" {\n%s\n}' % (source,)
-    else:
+    elif sys.platform != 'win32':
         # add '-Werror' to the existing 'extra_compile_args' flags
         kwds['extra_compile_args'] = (kwds.get('extra_compile_args', []) +
                                       ['-Werror'])
@@ -2010,7 +2011,7 @@
     lib = verify(ffi, "test_function_returns_float_complex", """
         #include <complex.h>
         static float _Complex f1(float a, float b) { return a + I*2.0*b; }
-    """)
+    """, no_cpp=True)    # <complex.h> fails on some systems with C++
     result = lib.f1(1.25, 5.1)
     assert type(result) == complex
     assert result.real == 1.25   # exact
@@ -2024,7 +2025,7 @@
     lib = verify(ffi, "test_function_returns_double_complex", """
         #include <complex.h>
         static double _Complex f1(double a, double b) { return a + I*2.0*b; }
-    """)
+    """, no_cpp=True)    # <complex.h> fails on some systems with C++
     result = lib.f1(1.25, 5.1)
     assert type(result) == complex
     assert result.real == 1.25   # exact
@@ -2038,7 +2039,7 @@
     lib = verify(ffi, "test_function_argument_float_complex", """
         #include <complex.h>
         static float f1(float _Complex x) { return cabsf(x); }
-    """)
+    """, no_cpp=True)    # <complex.h> fails on some systems with C++
     x = complex(12.34, 56.78)
     result = lib.f1(x)
     assert abs(result - abs(x)) < 1e-5
@@ -2051,7 +2052,7 @@
     lib = verify(ffi, "test_function_argument_double_complex", """
         #include <complex.h>
         static double f1(double _Complex x) { return cabs(x); }
-    """)
+    """, no_cpp=True)    # <complex.h> fails on some systems with C++
     x = complex(12.34, 56.78)
     result = lib.f1(x)
     assert abs(result - abs(x)) < 1e-11
@@ -2251,3 +2252,34 @@
     int f(int a) { return a + 40; }
     """, extra_compile_args=['-fvisibility=hidden'])
     assert lib.f(2) == 42
+
+def test_override_default_definition():
+    ffi = FFI()
+    ffi.cdef("typedef long int16_t, char16_t;")
+    lib = verify(ffi, "test_override_default_definition", "")
+    assert ffi.typeof("int16_t") is ffi.typeof("char16_t") is ffi.typeof("long")
+
+def test_char16_char32_type(no_cpp=False):
+    ffi = FFI()
+    ffi.cdef("""
+        char16_t foo_2bytes(char16_t);
+        char32_t foo_4bytes(char32_t);
+    """)
+    lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """
+    #if !defined(__cplusplus) || __cplusplus < 201103L
+    typedef uint_least16_t char16_t;
+    typedef uint_least32_t char32_t;
+    #endif
+    
+    char16_t foo_2bytes(char16_t a) { return (char16_t)(a + 42); }
+    char32_t foo_4bytes(char32_t a) { return (char32_t)(a + 42); }
+    """, no_cpp=no_cpp)
+    assert lib.foo_2bytes(u+'\u1234') == u+'\u125e'
+    assert lib.foo_4bytes(u+'\u1234') == u+'\u125e'
+    assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f'
+    py.test.raises(TypeError, lib.foo_2bytes, u+'\U00012345')
+    py.test.raises(TypeError, lib.foo_2bytes, 1234)
+    py.test.raises(TypeError, lib.foo_4bytes, 1234)
+
+def test_char16_char32_plain_c():
+    test_char16_char32_type(no_cpp=True)
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py
@@ -222,7 +222,7 @@
         F = tp.is_float_type()
         X = tp.is_complex_type()
         I = tp.is_integer_type()
-        assert C == (typename in ('char', 'wchar_t'))
+        assert C == (typename in ('char', 'wchar_t', 'char16_t', 'char32_t'))
         assert F == (typename in ('float', 'double', 'long double'))
         assert X == (typename in ('float _Complex', 'double _Complex'))
         assert I + F + C + X == 1      # one and only one of them is true


More information about the pypy-commit mailing list