[pypy-commit] cffi char16_char32_t: in-progress

arigo pypy.commits at gmail.com
Wed May 31 10:42:25 EDT 2017


Author: Armin Rigo <arigo at tunes.org>
Branch: char16_char32_t
Changeset: r2955:983e7f434f13
Date: 2017-05-31 16:42 +0200
http://bitbucket.org/cffi/cffi/changeset/983e7f434f13/

Log:	in-progress

diff --git a/c/cffi1_module.c b/c/cffi1_module.c
--- a/c/cffi1_module.c
+++ b/c/cffi1_module.c
@@ -2,8 +2,9 @@
 #include "parse_c_type.c"
 #include "realize_c_type.c"
 
-#define CFFI_VERSION_MIN    0x2601
-#define CFFI_VERSION_MAX    0x27FF
+#define CFFI_VERSION_MIN            0x2601
+#define CFFI_VERSION_CHAR16CHAR32   0x2801
+#define CFFI_VERSION_MAX            0x28FF
 
 typedef struct FFIObject_s FFIObject;
 typedef struct LibObject_s LibObject;
@@ -183,6 +184,8 @@
     num_exports = 25;
     if (ctx->flags & 1)    /* set to mean that 'extern "Python"' is used */
         num_exports = 26;
+    if (version >= CFFI_VERSION_CHAR16CHAR32)
+        num_exports = 28;
     memcpy(exports, (char *)cffi_exports, num_exports * sizeof(void *));
 
     /* make the module object */
diff --git a/c/realize_c_type.c b/c/realize_c_type.c
--- a/c/realize_c_type.c
+++ b/c/realize_c_type.c
@@ -153,6 +153,8 @@
         "uintmax_t",
         "float _Complex",
         "double _Complex",
+        "char16_t",
+        "char32_t",
     };
     PyObject *x;
 
diff --git a/cffi/_cffi_include.h b/cffi/_cffi_include.h
--- a/cffi/_cffi_include.h
+++ b/cffi/_cffi_include.h
@@ -62,11 +62,16 @@
     typedef unsigned char _Bool;
 #  endif
 # endif
+# if _MSC_VER < 1900 || !defined(__cplusplus)   /* MSVC < 2015, or plain C */
+    typedef uint16_t char16_t;
+    typedef int32_t char32_t;
+# endif
 #else
 # include <stdint.h>
 # if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux)
 #  include <alloca.h>
 # endif
+# include <uchar.h>
 #endif
 
 #ifdef __GNUC__
@@ -174,7 +179,11 @@
 #define _CFFI_CPIDX  25
 #define _cffi_call_python                                                \
     ((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX])
-#define _CFFI_NUM_EXPORTS 26
+#define _cffi_to_c_wchar3216_t                                           \
+    ((int(*)(PyObject *))_cffi_exports[26])
+#define _cffi_from_c_wchar3216_t                                         \
+    ((PyObject *(*)(int))_cffi_exports[27])
+#define _CFFI_NUM_EXPORTS 28
 
 struct _cffi_ctypedescr;
 
@@ -215,6 +224,46 @@
     return NULL;
 }
 
+
+#ifdef HAVE_WCHAR_H
+typedef wchar_t _cffi_wchar_t;
+#else
+typedef uint16_t _cffi_wchar_t;   /* same random pick as _cffi_backend.c */
+#endif
+
+_CFFI_UNUSED_FN static int _cffi_to_c_char16_t(PyObject *o)
+{
+    if (sizeof(_cffi_wchar_t) == 2)
+        return _cffi_to_c_wchar_t(o);
+    else
+        return _cffi_to_c_wchar3216_t(o);
+}
+
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(int x)
+{
+    if (sizeof(_cffi_wchar_t) == 2)
+        return _cffi_from_c_wchar_t(x);
+    else
+        return _cffi_from_c_wchar3216_t(x);
+}
+
+_CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o)
+{
+    if (sizeof(_cffi_wchar_t) == 4)
+        return _cffi_to_c_wchar_t(o);
+    else
+        return _cffi_to_c_wchar3216_t(o);
+}
+
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x)
+{
+    if (sizeof(_cffi_wchar_t) == 4)
+        return _cffi_from_c_wchar_t(x);
+    else
+        return _cffi_from_c_wchar3216_t(x);
+}
+
+
 /**********  end CPython-specific section  **********/
 #else
 _CFFI_UNUSED_FN
diff --git a/cffi/parse_c_type.h b/cffi/parse_c_type.h
--- a/cffi/parse_c_type.h
+++ b/cffi/parse_c_type.h
@@ -81,8 +81,10 @@
 #define _CFFI_PRIM_UINTMAX      47
 #define _CFFI_PRIM_FLOATCOMPLEX 48
 #define _CFFI_PRIM_DOUBLECOMPLEX 49
+#define _CFFI_PRIM_CHAR16       50
+#define _CFFI_PRIM_CHAR32       51
 
-#define _CFFI__NUM_PRIM         50
+#define _CFFI__NUM_PRIM         52
 #define _CFFI__UNKNOWN_PRIM           (-1)
 #define _CFFI__UNKNOWN_FLOAT_PRIM     (-2)
 #define _CFFI__UNKNOWN_LONG_DOUBLE    (-3)
diff --git a/cffi/recompiler.py b/cffi/recompiler.py
--- a/cffi/recompiler.py
+++ b/cffi/recompiler.py
@@ -3,8 +3,9 @@
 from .error import VerificationError
 from .cffi_opcode import *
 
-VERSION = "0x2601"
-VERSION_EMBEDDED = "0x2701"
+VERSION_BASE = 0x2601
+VERSION_EMBEDDED = 0x2701
+VERSION_CHAR16CHAR32 = 0x2801
 
 
 class GlobalExpr:
@@ -126,6 +127,10 @@
         self.ffi = ffi
         self.module_name = module_name
         self.target_is_python = target_is_python
+        self._version = VERSION_BASE
+
+    def needs_version(self, ver):
+        self._version = max(self._version, ver)
 
     def collect_type_table(self):
         self._typesdict = {}
@@ -304,9 +309,7 @@
             prnt('#endif')
             lines = self._rel_readlines('_embedding.h')
             prnt(''.join(lines))
-            version = VERSION_EMBEDDED
-        else:
-            version = VERSION
+            self.needs_version(VERSION_EMBEDDED)
         #
         # then paste the C source given by the user, verbatim.
         prnt('/************************************************************/')
@@ -405,7 +408,7 @@
             prnt('        _cffi_call_python_org = '
                  '(void(*)(struct _cffi_externpy_s *, char *))p[1];')
             prnt('    }')
-        prnt('    p[0] = (const void *)%s;' % version)
+        prnt('    p[0] = (const void *)0x%x;' % self._version)
         prnt('    p[1] = &_cffi_type_context;')
         prnt('}')
         # on Windows, distutils insists on putting init_cffi_xyz in
@@ -423,21 +426,22 @@
         prnt('PyMODINIT_FUNC')
         prnt('PyInit_%s(void)' % (base_module_name,))
         prnt('{')
-        prnt('  return _cffi_init("%s", %s, &_cffi_type_context);' % (
-            self.module_name, version))
+        prnt('  return _cffi_init("%s", 0x%x, &_cffi_type_context);' % (
+            self.module_name, self._version))
         prnt('}')
         prnt('#else')
         prnt('PyMODINIT_FUNC')
         prnt('init%s(void)' % (base_module_name,))
         prnt('{')
-        prnt('  _cffi_init("%s", %s, &_cffi_type_context);' % (
-            self.module_name, version))
+        prnt('  _cffi_init("%s", 0x%x, &_cffi_type_context);' % (
+            self.module_name, self._version))
         prnt('}')
         prnt('#endif')
         prnt()
         prnt('#ifdef __GNUC__')
         prnt('#  pragma GCC visibility pop')
         prnt('#endif')
+        self._version = None
 
     def _to_py(self, x):
         if isinstance(x, str):
@@ -476,7 +480,8 @@
             prnt('from %s import ffi as _ffi%d' % (included_module_name, i))
         prnt()
         prnt("ffi = _cffi_backend.FFI('%s'," % (self.module_name,))
-        prnt("    _version = %s," % (VERSION,))
+        prnt("    _version = 0x%x," % (self._version,))
+        self._version = None
         #
         # the '_types' keyword argument
         self.cffi_types = tuple(self.cffi_types)    # don't change any more
@@ -515,8 +520,11 @@
                 # double' here, and _cffi_to_c_double would loose precision
                 converter = '(%s)_cffi_to_c_double' % (tp.get_c_name(''),)
             else:
-                converter = '(%s)_cffi_to_c_%s' % (tp.get_c_name(''),
+                cname = tp.get_c_name('')
+                converter = '(%s)_cffi_to_c_%s' % (cname,
                                                    tp.name.replace(' ', '_'))
+                if cname in ('char16_t', 'char32_t'):
+                    self.needs_version(VERSION_CHAR16CHAR32)
             errvalue = '-1'
         #
         elif isinstance(tp, model.PointerType):
@@ -573,7 +581,10 @@
             elif isinstance(tp, model.UnknownFloatType):
                 return '_cffi_from_c_double(%s)' % (var,)
             elif tp.name != 'long double' and not tp.is_complex_type():
-                return '_cffi_from_c_%s(%s)' % (tp.name.replace(' ', '_'), var)
+                cname = tp.name.replace(' ', '_')
+                if cname in ('char16_t', 'char32_t'):
+                    self.needs_version(VERSION_CHAR16CHAR32)
+                return '_cffi_from_c_%s(%s)' % (cname, var)
             else:
                 return '_cffi_from_c_deref((char *)&%s, _cffi_type(%d))' % (
                     var, self._gettypenum(tp))
diff --git a/testing/cffi1/test_new_ffi_1.py b/testing/cffi1/test_new_ffi_1.py
--- a/testing/cffi1/test_new_ffi_1.py
+++ b/testing/cffi1/test_new_ffi_1.py
@@ -1744,3 +1744,30 @@
         exec("from _test_import_from_lib import *", d)
         assert (sorted([x for x in d.keys() if not x.startswith('__')]) ==
                 ['ffi', 'lib'])
+
+    def test_char16_t(self):
+        x = ffi.new("char16_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 10
+        x[2] = u+'\u1324'
+        assert x[2] == u+'\u1324'
+        y = ffi.new("char16_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        assert ffi.string(y) == u+'\u1234\u5678'
+        z = ffi.new("char16_t[]", u+'\U00012345')
+        assert len(z) == 3
+        assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00']
+        assert ffi.string(z) == u+'\U00012345'
+
+    def test_char32_t(self):
+        x = ffi.new("char32_t[]", 5)
+        assert len(x) == 5 and ffi.sizeof(x) == 20
+        x[3] = u+'\U00013245'
+        assert x[3] == u+'\U00013245'
+        y = ffi.new("char32_t[]", u+'\u1234\u5678')
+        assert len(y) == 3
+        assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00']
+        z = ffi.new("char32_t[]", u+'\U00012345')
+        assert len(z) == 2
+        assert list(z) == [u+'\U00012345', u+'\x00'] # maybe a 2-unichars strin
+        assert ffi.string(z) == u+'\U00012345'
diff --git a/testing/cffi1/test_recompiler.py b/testing/cffi1/test_recompiler.py
--- a/testing/cffi1/test_recompiler.py
+++ b/testing/cffi1/test_recompiler.py
@@ -24,10 +24,11 @@
     assert ''.join(map(str, recomp.cffi_types)) == expected_output
 
 def verify(ffi, module_name, source, *args, **kwds):
+    no_cpp = kwds.pop('no_cpp', False)
     kwds.setdefault('undef_macros', ['NDEBUG'])
     module_name = '_CFFI_' + module_name
     ffi.set_source(module_name, source)
-    if not os.environ.get('NO_CPP'):     # test the .cpp mode too
+    if not os.environ.get('NO_CPP') and not no_cpp:   # test the .cpp mode too
         kwds.setdefault('source_extension', '.cpp')
         source = 'extern "C" {\n%s\n}' % (source,)
     else:
@@ -2256,3 +2257,20 @@
     ffi.cdef("typedef long int16_t, char16_t;")
     lib = verify(ffi, "test_override_default_definition", "")
     assert ffi.typeof("int16_t") is ffi.typeof("char16_t") is ffi.typeof("long")
+
+def test_char16_char32_type(no_cpp=False):
+    ffi = FFI()
+    ffi.cdef("""
+        char16_t foo_2bytes(char16_t);
+        char32_t foo_4bytes(char32_t);
+    """)
+    lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """
+    char16_t foo_2bytes(char16_t a) { return (char16_t)(a + 42); }
+    char32_t foo_4bytes(char32_t a) { return (char32_t)(a + 42); }
+    """, no_cpp=no_cpp)
+    assert lib.foo_2bytes(u+'\u1234') == u+'\u125e'
+    assert lib.foo_4bytes(u+'\u1234') == u+'\u125e'
+    assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f'
+
+def test_char16_char32_plain_c():
+    test_char16_char32_type(no_cpp=True)


More information about the pypy-commit mailing list