[pypy-commit] pypy unicode-utf8: add test, avx+sse4 version are compiled in and only used when the platform at runtime supports it
plan_rich
pypy.commits at gmail.com
Tue Mar 14 08:07:26 EDT 2017
Author: Richard Plangger <planrichi at gmail.com>
Branch: unicode-utf8
Changeset: r90682:b2dd71846ca0
Date: 2017-03-14 13:06 +0100
http://bitbucket.org/pypy/pypy/changeset/b2dd71846ca0/
Log: add test, avx+sse4 version are compiled in and only used when the
platform at runtime supports it
diff --git a/rpython/rlib/rutf8/capi.py b/rpython/rlib/rutf8/capi.py
--- a/rpython/rlib/rutf8/capi.py
+++ b/rpython/rlib/rutf8/capi.py
@@ -4,6 +4,7 @@
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.translator.tool.cbuild import ExternalCompilationInfo
from rpython.rtyper.tool import rffi_platform as platform
+from rpython.translator.platform import platform as trans_plaform
ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rutf8')
SRC = ROOT.join('src')
@@ -12,33 +13,42 @@
_libs = ['dl']
else:
_libs = []
-eci_kwds = dict(
- include_dirs = [SRC],
- includes = ['utf8.h'],
- libraries = _libs,
- separate_module_files = [SRC.join('utf8.c')],)
-global_eci = ExternalCompilationInfo(**eci_kwds)
+
IDXTAB = lltype.ForwardReference()
IDXTAB.become(rffi.CStruct("fu8_idxtab",
('character_step', rffi.INT),
- ('byte_positions', lltype.Ptr(rffi.SIZE_T)),
+ ('byte_positions', rffi.SIZE_TP),
('bytepos_table_length', rffi.SIZE_T)))
-IDXTABPP = lltype.Ptr(lltype.Ptr(IDXTAB))
+IDXTABP = lltype.Ptr(IDXTAB)
def setup():
- compile_extra = ['-DRPYTHON_LL2CTYPES']
- platform.verify_eci(ExternalCompilationInfo(
- compile_extra=compile_extra,
- **eci_kwds))
+ compile_extra = ['-DRPYTHON_LL2CTYPES', '-DALLOW_SURROGATES=0', '-fPIC']
+ eci_kwds = dict(
+ include_dirs = [SRC],
+ includes = ['utf8.h'],
+ libraries = _libs,
+ compile_extra = compile_extra)
+ # compile the SSE4.1 and AVX version
+ compile_extra.append('-msse4.1')
+ ofile_eci = ExternalCompilationInfo(**eci_kwds)
+ sse4_o, = trans_plaform._compile_o_files([SRC.join('utf8-sse4.c')], ofile_eci)
+ compile_extra.pop()
+ compile_extra.append('-mavx2')
+ ofile_eci = ExternalCompilationInfo(**eci_kwds)
+ avx_o, = trans_plaform._compile_o_files([SRC.join('utf8-avx.c')], ofile_eci)
+ del ofile_eci
- eci = global_eci
- count_utf8_code_points = rffi.llexternal("fu8_count_utf8_codepoints",
- [rffi.CCHARP, rffi.SIZE_T],
+ eci_kwds['separate_module_files'] = [SRC.join('utf8.c')]
+ eci_kwds['link_files'] = [sse4_o.strpath, avx_o.strpath]
+ eci = ExternalCompilationInfo(**eci_kwds)
+ platform.verify_eci(eci)
+ count_utf8_codepoints = rffi.llexternal("fu8_count_utf8_codepoints",
+ [rffi.CCHARP, rffi.SSIZE_T],
rffi.SSIZE_T, compilation_info=eci,
_nowrapper=True)
index2byteposition = rffi.llexternal("fu8_idx2bytepos",
- [rffi.SIZE_T, rffi.CCHARP, rffi.SIZE_T, IDXTABPP],
+ [rffi.SIZE_T, rffi.CCHARP, rffi.SIZE_T, IDXTABP],
rffi.SSIZE_T, compilation_info=eci,
_nowrapper=True)
diff --git a/rpython/rlib/rutf8/src/utf8-avx.c b/rpython/rlib/rutf8/src/utf8-avx.c
--- a/rpython/rlib/rutf8/src/utf8-avx.c
+++ b/rpython/rlib/rutf8/src/utf8-avx.c
@@ -61,8 +61,9 @@
printf("\n");
}
-ssize_t count_utf8_codepoints_avx(const uint8_t * encoded, size_t len)
+ssize_t fu8_count_utf8_codepoints_avx(const char * utf8, size_t len)
{
+ const uint8_t * encoded = (const uint8_t*)utf8;
__builtin_prefetch(encoded, 0, 0);
size_t num_codepoints = 0;
__m256i chunk;
@@ -244,7 +245,7 @@
return num_codepoints;
}
- ssize_t result = count_utf8_codepoints_seq(encoded, len);
+ ssize_t result = fu8_count_utf8_codepoints_seq(encoded, len);
if (result == -1) {
return -1;
}
diff --git a/rpython/rlib/rutf8/src/utf8-scalar.c b/rpython/rlib/rutf8/src/utf8-scalar.c
--- a/rpython/rlib/rutf8/src/utf8-scalar.c
+++ b/rpython/rlib/rutf8/src/utf8-scalar.c
@@ -17,9 +17,10 @@
return 0;
}
-ssize_t count_utf8_codepoints_seq(const uint8_t * encoded, size_t len) {
+ssize_t fu8_count_utf8_codepoints_seq(const char * utf8, size_t len) {
size_t num_codepoints = 0;
uint8_t byte = 0;
+ const uint8_t * encoded = (const uint8_t*)utf8;
const uint8_t * endptr = encoded + len;
while (encoded < endptr) {
diff --git a/rpython/rlib/rutf8/src/utf8-sse4.c b/rpython/rlib/rutf8/src/utf8-sse4.c
--- a/rpython/rlib/rutf8/src/utf8-sse4.c
+++ b/rpython/rlib/rutf8/src/utf8-sse4.c
@@ -40,8 +40,9 @@
}
-ssize_t count_utf8_codepoints_sse4(const uint8_t * encoded, size_t len)
+ssize_t fu8_count_utf8_codepoints_sse4(const char * utf8, size_t len)
{
+ const uint8_t * encoded = (const uint8_t*)utf8;
__builtin_prefetch(encoded, 0, 0);
size_t num_codepoints = 0;
__m128i chunk;
@@ -222,7 +223,7 @@
return num_codepoints;
}
- ssize_t result = count_utf8_codepoints_seq(encoded, len);
+ ssize_t result = fu8_count_utf8_codepoints_seq(encoded, len);
if (result == -1) {
return -1;
}
diff --git a/rpython/rlib/rutf8/src/utf8.c b/rpython/rlib/rutf8/src/utf8.c
--- a/rpython/rlib/rutf8/src/utf8.c
+++ b/rpython/rlib/rutf8/src/utf8.c
@@ -37,7 +37,7 @@
}
}
-ssize_t count_utf8_codepoints(const uint8_t * encoded, size_t len)
+ssize_t fu8_count_utf8_codepoints(const char * utf8, size_t len)
{
if (instruction_set == -1) {
detect_instructionset();
@@ -45,15 +45,15 @@
if (len >= 32 && (instruction_set & ISET_AVX2) != 0) {
// to the MOON!
- return count_utf8_codepoints_avx(encoded, len);
+ return fu8_count_utf8_codepoints_avx(utf8, len);
}
if (len >= 16 && (instruction_set == ISET_SSE4) != 0) {
// speed!!
- return count_utf8_codepoints_sse4(encoded, len);
+ return fu8_count_utf8_codepoints_sse4(utf8, len);
}
// oh no, just do it sequentially!
- return count_utf8_codepoints_seq(encoded, len);
+ return fu8_count_utf8_codepoints_seq(utf8, len);
}
typedef struct fu8_idxtab {
diff --git a/rpython/rlib/rutf8/src/utf8.h b/rpython/rlib/rutf8/src/utf8.h
--- a/rpython/rlib/rutf8/src/utf8.h
+++ b/rpython/rlib/rutf8/src/utf8.h
@@ -4,6 +4,20 @@
#include <stdint.h>
#include <stddef.h>
+#ifdef RPYTHON_LL2CTYPES
+ /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */
+#ifndef RPY_EXTERN
+# define RPY_EXTERN RPY_EXPORTED
+#endif
+
+#ifdef _WIN32
+# define RPY_EXPORTED __declspec(dllexport)
+#else
+# define RPY_EXPORTED extern __attribute__((visibility("default")))
+#endif
+
+#endif
+
/**
* Returns -1 if the given string is not a valid utf8 encoded string.
* Otherwise returns the amount code point in the given string.
@@ -12,14 +26,14 @@
* The above documentation also applies for several vectorized implementations
* found below.
*
- * count_utf8_codepoints dispatches amongst several
+ * fu8_count_utf8_codepoints dispatches amongst several
* implementations (e.g. seq, SSE4, AVX)
*/
// TODO rename (fu8 prefix)
-ssize_t fu8_count_utf8_codepoints(const uint8_t * encoded, size_t len);
-ssize_t fu8_count_utf8_codepoints_seq(const uint8_t * encoded, size_t len);
-ssize_t fu8_count_utf8_codepoints_sse4(const uint8_t * encoded, size_t len);
-ssize_t fu8_count_utf8_codepoints_avx(const uint8_t * encoded, size_t len);
+RPY_EXTERN ssize_t fu8_count_utf8_codepoints(const char * utf8, size_t len);
+RPY_EXTERN ssize_t fu8_count_utf8_codepoints_seq(const char * utf8, size_t len);
+RPY_EXTERN ssize_t fu8_count_utf8_codepoints_sse4(const char * utf8, size_t len);
+RPY_EXTERN ssize_t fu8_count_utf8_codepoints_avx(const char * utf8, size_t len);
struct fu8_idxtab;
@@ -41,11 +55,11 @@
* table to speed up indexing.
*
*/
-ssize_t fu8_idx2bytepos(size_t index,
+RPY_EXTERN ssize_t fu8_idx2bytepos(size_t index,
const uint8_t * utf8, size_t bytelen,
size_t cplen,
struct fu8_idxtab ** tab);
-void fu8_free_idxtab(struct fu8_idxtab * t);
-ssize_t fu8_idx2bytepso_sse4(size_t index,
+RPY_EXTERN void fu8_free_idxtab(struct fu8_idxtab * t);
+RPY_EXTERN ssize_t fu8_idx2bytepso_sse4(size_t index,
const uint8_t * utf8, size_t len,
struct fu8_idxtab ** t);
More information about the pypy-commit
mailing list