[pypy-commit] pypy py3.6: merge default into py3.6
mattip
pypy.commits at gmail.com
Fri Feb 15 07:57:06 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: py3.6
Changeset: r96019:bf156a807410
Date: 2019-02-15 14:56 +0200
http://bitbucket.org/pypy/pypy/changeset/bf156a807410/
Log: merge default into py3.6
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -729,7 +729,9 @@
""" The JIT special-cases this too. """
from rpython.rtyper.lltypesystem import lltype
from rpython.rtyper.lltypesystem.lloperation import llop
- return llop.int_force_ge_zero(lltype.Signed, n)
+ n = llop.int_force_ge_zero(lltype.Signed, n)
+ assert n >= 0
+ return n
def int_c_div(x, y):
"""Return the result of the C-style 'x / y'. This differs from the
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -151,7 +151,10 @@
# The following methods are provided to be overriden in
# Utf8MatchContext. The non-utf8 implementation is provided
# by the FixedMatchContext abstract subclass, in order to use
- # the same @not_rpython safety trick as above.
+ # the same @not_rpython safety trick as above. If you get a
+ # "not_rpython" error during translation, either consider
+ # calling the methods xxx_indirect() instead of xxx(), or if
+ # applicable add the @specializectx decorator.
ZERO = 0
@not_rpython
def next(self, position):
@@ -460,8 +463,7 @@
ptr = self.start_ptr
if not self.next_char_ok(ctx, pattern, ptr, self.ppos3):
return
- assert not isinstance(ctx, AbstractMatchContext)
- self.start_ptr = ctx.next(ptr)
+ self.start_ptr = ctx.next_indirect(ptr)
return self.find_first_result(ctx, pattern)
def next_char_ok(self, ctx, pattern, ptr, ppos):
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -19,7 +19,7 @@
from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize
from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline
from rpython.rlib.rstring import StringBuilder
-from rpython.rlib import jit, types
+from rpython.rlib import jit, types, rarithmetic
from rpython.rlib.signature import signature, finishsigs
from rpython.rlib.types import char, none
from rpython.rlib.rarithmetic import r_uint
@@ -117,6 +117,12 @@
# chinese wikipedia, they're anywhere between 10% and 30% slower.
# In extreme cases (small, only chinese text), they're 40% slower
+# The following was found by hand to be more optimal than both,
+# on x86-64...
+_is_64bit = sys.maxint > 2**32
+_constant_ncp = rarithmetic.r_uint64(0xffff0000ffffffff)
+
+ at always_inline
def next_codepoint_pos(code, pos):
"""Gives the position of the next codepoint after pos.
Assumes valid utf8. 'pos' must be before the end of the string.
@@ -125,6 +131,11 @@
chr1 = ord(code[pos])
if chr1 <= 0x7F:
return pos + 1
+ if _is_64bit and not jit.we_are_jitted():
+ # optimized for Intel x86-64 by hand
+ return pos + 1 + (
+ ((chr1 > 0xDF) << 1) +
+ rarithmetic.intmask((_constant_ncp >> (chr1 & 0x3F)) & 1))
if chr1 <= 0xDF:
return pos + 2
if chr1 <= 0xEF:
@@ -162,7 +173,6 @@
ordch1 = ord(code[pos])
if ordch1 <= 0x7F or pos +1 >= lgt:
return ordch1
-
ordch2 = ord(code[pos+1])
if ordch1 <= 0xDF or pos +2 >= lgt:
# 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
@@ -518,7 +528,7 @@
break
return storage
- at jit.dont_look_inside
+ at jit.elidable
def codepoint_position_at_index(utf8, storage, index):
""" Return byte index of a character inside utf8 encoded string, given
storage of type UTF8_INDEX_STORAGE. The index must be smaller than
@@ -546,7 +556,7 @@
pos = next_codepoint_pos(utf8, pos)
return pos
- at jit.dont_look_inside
+ at jit.elidable
def codepoint_at_index(utf8, storage, index):
""" Return codepoint of a character inside utf8 encoded string, given
storage of type UTF8_INDEX_STORAGE
@@ -564,7 +574,7 @@
bytepos = next_codepoint_pos(utf8, bytepos)
return codepoint_at_pos(utf8, bytepos)
- at jit.dont_look_inside
+ at jit.elidable
def codepoint_index_at_byte_position(utf8, storage, bytepos):
""" Return the character index for which
codepoint_position_at_index(index) == bytepos.
More information about the pypy-commit
mailing list