[pypy-commit] pypy unicode-utf8: add test, avx+sse4 version are compiled in and only used when the platform at runtime supports it

Tue Mar 14 08:07:26 EDT 2017

Author: Richard Plangger <planrichi at gmail.com>
Branch: unicode-utf8
Changeset: r90682:b2dd71846ca0
Date: 2017-03-14 13:06 +0100
http://bitbucket.org/pypy/pypy/changeset/b2dd71846ca0/

Log:	add test, avx+sse4 version are compiled in and only used when the
	platform at runtime supports it

diff --git a/rpython/rlib/rutf8/capi.py b/rpython/rlib/rutf8/capi.py
--- a/rpython/rlib/rutf8/capi.py
+++ b/rpython/rlib/rutf8/capi.py
@@ -4,6 +4,7 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rtyper.tool import rffi_platform as platform
+from rpython.translator.platform import platform as trans_plaform
 
 ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rutf8')
 SRC = ROOT.join('src')
@@ -12,33 +13,42 @@
     _libs = ['dl']
 else:
     _libs = []
-eci_kwds = dict(
-    include_dirs = [SRC],
-    includes = ['utf8.h'],
-    libraries = _libs,
-    separate_module_files = [SRC.join('utf8.c')],)
-global_eci = ExternalCompilationInfo(**eci_kwds)
+
 
 IDXTAB = lltype.ForwardReference()
 IDXTAB.become(rffi.CStruct("fu8_idxtab",
                            ('character_step', rffi.INT),
-                           ('byte_positions', lltype.Ptr(rffi.SIZE_T)),
+                           ('byte_positions', rffi.SIZE_TP),
                            ('bytepos_table_length', rffi.SIZE_T)))
-IDXTABPP = lltype.Ptr(lltype.Ptr(IDXTAB))
+IDXTABP = lltype.Ptr(IDXTAB)
 
 def setup():
-    compile_extra = ['-DRPYTHON_LL2CTYPES']
-    platform.verify_eci(ExternalCompilationInfo(
-        compile_extra=compile_extra,
-        **eci_kwds))
+    compile_extra = ['-DRPYTHON_LL2CTYPES', '-DALLOW_SURROGATES=0', '-fPIC']
+    eci_kwds = dict(
+        include_dirs = [SRC],
+        includes = ['utf8.h'],
+        libraries = _libs,
+        compile_extra = compile_extra)
+    # compile the SSE4.1 and AVX version
+    compile_extra.append('-msse4.1')
+    ofile_eci = ExternalCompilationInfo(**eci_kwds)
+    sse4_o, = trans_plaform._compile_o_files([SRC.join('utf8-sse4.c')], ofile_eci)
+    compile_extra.pop()
+    compile_extra.append('-mavx2')
+    ofile_eci = ExternalCompilationInfo(**eci_kwds)
+    avx_o, = trans_plaform._compile_o_files([SRC.join('utf8-avx.c')], ofile_eci)
+    del ofile_eci
 
-    eci = global_eci
-    count_utf8_code_points = rffi.llexternal("fu8_count_utf8_codepoints",
-                                  [rffi.CCHARP, rffi.SIZE_T],
+    eci_kwds['separate_module_files'] = [SRC.join('utf8.c')]
+    eci_kwds['link_files'] = [sse4_o.strpath, avx_o.strpath]
+    eci = ExternalCompilationInfo(**eci_kwds)
+    platform.verify_eci(eci)
+    count_utf8_codepoints = rffi.llexternal("fu8_count_utf8_codepoints",
+                                  [rffi.CCHARP, rffi.SSIZE_T],
                                   rffi.SSIZE_T, compilation_info=eci,
                                   _nowrapper=True)
     index2byteposition = rffi.llexternal("fu8_idx2bytepos",
-                                  [rffi.SIZE_T, rffi.CCHARP, rffi.SIZE_T, IDXTABPP],
+                                  [rffi.SIZE_T, rffi.CCHARP, rffi.SIZE_T, IDXTABP],
                                   rffi.SSIZE_T, compilation_info=eci,
                                   _nowrapper=True)
 
diff --git a/rpython/rlib/rutf8/src/utf8-avx.c b/rpython/rlib/rutf8/src/utf8-avx.c
--- a/rpython/rlib/rutf8/src/utf8-avx.c
+++ b/rpython/rlib/rutf8/src/utf8-avx.c
@@ -61,8 +61,9 @@
     printf("\n");
 }
 
-ssize_t count_utf8_codepoints_avx(const uint8_t * encoded, size_t len)
+ssize_t fu8_count_utf8_codepoints_avx(const char * utf8, size_t len)
 {
+    const uint8_t * encoded = (const uint8_t*)utf8;
     __builtin_prefetch(encoded, 0, 0);
     size_t num_codepoints = 0;
     __m256i chunk;
@@ -244,7 +245,7 @@
         return num_codepoints;
     }
 
-    ssize_t result = count_utf8_codepoints_seq(encoded, len);
+    ssize_t result = fu8_count_utf8_codepoints_seq(encoded, len);
     if (result == -1) {
         return -1;
     }
diff --git a/rpython/rlib/rutf8/src/utf8-scalar.c b/rpython/rlib/rutf8/src/utf8-scalar.c
--- a/rpython/rlib/rutf8/src/utf8-scalar.c
+++ b/rpython/rlib/rutf8/src/utf8-scalar.c
@@ -17,9 +17,10 @@
     return 0;
 }
 
-ssize_t count_utf8_codepoints_seq(const uint8_t * encoded, size_t len) {
+ssize_t fu8_count_utf8_codepoints_seq(const char * utf8, size_t len) {
     size_t num_codepoints = 0;
     uint8_t byte = 0;
+    const uint8_t * encoded = (const uint8_t*)utf8;
     const uint8_t * endptr = encoded + len;
 
     while (encoded < endptr) {
diff --git a/rpython/rlib/rutf8/src/utf8-sse4.c b/rpython/rlib/rutf8/src/utf8-sse4.c
--- a/rpython/rlib/rutf8/src/utf8-sse4.c
+++ b/rpython/rlib/rutf8/src/utf8-sse4.c
@@ -40,8 +40,9 @@
 }
 
 
-ssize_t count_utf8_codepoints_sse4(const uint8_t * encoded, size_t len)
+ssize_t fu8_count_utf8_codepoints_sse4(const char * utf8, size_t len)
 {
+    const uint8_t * encoded = (const uint8_t*)utf8;
     __builtin_prefetch(encoded, 0, 0);
     size_t num_codepoints = 0;
     __m128i chunk;
@@ -222,7 +223,7 @@
         return num_codepoints;
     }
 
-    ssize_t result = count_utf8_codepoints_seq(encoded, len);
+    ssize_t result = fu8_count_utf8_codepoints_seq(encoded, len);
     if (result == -1) {
         return -1;
     }
diff --git a/rpython/rlib/rutf8/src/utf8.c b/rpython/rlib/rutf8/src/utf8.c
--- a/rpython/rlib/rutf8/src/utf8.c
+++ b/rpython/rlib/rutf8/src/utf8.c
@@ -37,7 +37,7 @@
     }
 }
 
-ssize_t count_utf8_codepoints(const uint8_t * encoded, size_t len)
+ssize_t fu8_count_utf8_codepoints(const char * utf8, size_t len)
 {
     if (instruction_set == -1) {
         detect_instructionset();
@@ -45,15 +45,15 @@
 
     if (len >= 32 && (instruction_set & ISET_AVX2) != 0) {
         // to the MOON!
-        return count_utf8_codepoints_avx(encoded, len);
+        return fu8_count_utf8_codepoints_avx(utf8, len);
     }
     if (len >= 16 && (instruction_set == ISET_SSE4) != 0) {
         // speed!!
-        return count_utf8_codepoints_sse4(encoded, len);
+        return fu8_count_utf8_codepoints_sse4(utf8, len);
     }
 
     // oh no, just do it sequentially!
-    return count_utf8_codepoints_seq(encoded, len);
+    return fu8_count_utf8_codepoints_seq(utf8, len);
 }
 
 typedef struct fu8_idxtab {
diff --git a/rpython/rlib/rutf8/src/utf8.h b/rpython/rlib/rutf8/src/utf8.h
--- a/rpython/rlib/rutf8/src/utf8.h
+++ b/rpython/rlib/rutf8/src/utf8.h
@@ -4,6 +4,20 @@
 #include <stdint.h>
 #include <stddef.h>
 
+#ifdef RPYTHON_LL2CTYPES
+   /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */
+#ifndef RPY_EXTERN
+#  define RPY_EXTERN RPY_EXPORTED
+#endif
+
+#ifdef _WIN32
+#  define RPY_EXPORTED __declspec(dllexport)
+#else
+#  define RPY_EXPORTED  extern __attribute__((visibility("default")))
+#endif
+
+#endif
+
 /**
  * Returns -1 if the given string is not a valid utf8 encoded string.
  * Otherwise returns the amount code point in the given string.
@@ -12,14 +26,14 @@
  * The above documentation also applies for several vectorized implementations
  * found below.
  *
- * count_utf8_codepoints dispatches amongst several
+ * fu8_count_utf8_codepoints dispatches amongst several
  * implementations (e.g. seq, SSE4, AVX)
  */
 // TODO rename (fu8 prefix)
-ssize_t fu8_count_utf8_codepoints(const uint8_t * encoded, size_t len);
-ssize_t fu8_count_utf8_codepoints_seq(const uint8_t * encoded, size_t len);
-ssize_t fu8_count_utf8_codepoints_sse4(const uint8_t * encoded, size_t len);
-ssize_t fu8_count_utf8_codepoints_avx(const uint8_t * encoded, size_t len);
+RPY_EXTERN ssize_t fu8_count_utf8_codepoints(const char * utf8, size_t len);
+RPY_EXTERN ssize_t fu8_count_utf8_codepoints_seq(const char * utf8, size_t len);
+RPY_EXTERN ssize_t fu8_count_utf8_codepoints_sse4(const char * utf8, size_t len);
+RPY_EXTERN ssize_t fu8_count_utf8_codepoints_avx(const char * utf8, size_t len);
 
 
 struct fu8_idxtab;
@@ -41,11 +55,11 @@
  * table to speed up indexing.
  *
  */
-ssize_t fu8_idx2bytepos(size_t index,
+RPY_EXTERN ssize_t fu8_idx2bytepos(size_t index,
                         const uint8_t * utf8, size_t bytelen,
                         size_t cplen,
                         struct fu8_idxtab ** tab);
-void fu8_free_idxtab(struct fu8_idxtab * t);
-ssize_t fu8_idx2bytepso_sse4(size_t index,
+RPY_EXTERN void fu8_free_idxtab(struct fu8_idxtab * t);
+RPY_EXTERN ssize_t fu8_idx2bytepso_sse4(size_t index,
                              const uint8_t * utf8, size_t len,
                              struct fu8_idxtab ** t);