[pypy-commit] pypy unicode-utf8: Test and fix
arigo
pypy.commits at gmail.com
Mon Dec 11 16:08:20 EST 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93373:ebaac96d17ab
Date: 2017-12-11 22:07 +0100
http://bitbucket.org/pypy/pypy/changeset/ebaac96d17ab/
Log: Test and fix
diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py
--- a/pypy/module/_sre/test/test_app_sre.py
+++ b/pypy/module/_sre/test/test_app_sre.py
@@ -124,6 +124,7 @@
assert ["a", "u"] == re.findall("b(.)", "abalbus")
assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
+ assert [u"xyz"] == re.findall(u".*yz", u"xyz")
def test_finditer(self):
import re
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -915,10 +915,10 @@
@specializectx
def find_repetition_end(ctx, ppos, ptr, maxcount, marks):
end = ctx.end
+ # First get rid of the cases where we don't have room for any match.
+ if maxcount <= 0 or ptr >= end:
+ return ptr
ptrp1 = ctx.next(ptr)
- # First get rid of the cases where we don't have room for any match.
- if maxcount <= 0 or ptrp1 > end:
- return ptr
# Check the first character directly. If it doesn't match, we are done.
# The idea is to be fast for cases like re.search("b+"), where we expect
# the common case to be a non-match. It's much faster with the JIT to
@@ -1202,12 +1202,14 @@
def regular_search(ctx, base):
start = ctx.match_start
- while start <= ctx.end:
+ while True:
ctx.jitdriver_RegularSearch.jit_merge_point(ctx=ctx, start=start,
base=base)
if sre_match(ctx, base, start, None) is not None:
ctx.match_start = start
return True
+ if start >= ctx.end:
+ break
start = ctx.next_indirect(start)
return False
More information about the pypy-commit
mailing list