From pypy.commits at gmail.com Fri Dec 1 08:06:21 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 01 Dec 2017 05:06:21 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Test for replace_count Message-ID: <5a2153cd.d2a1df0a.64b37.b800@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93231:ea8cbee5a965 Date: 2017-12-01 14:05 +0100 http://bitbucket.org/pypy/pypy/changeset/ea8cbee5a965/ Log: Test for replace_count diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -1,7 +1,7 @@ import sys, py from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, split, rsplit -from rpython.rlib.rstring import replace, startswith, endswith +from rpython.rlib.rstring import replace, startswith, endswith, replace_count from rpython.rlib.rstring import find, rfind, count from rpython.rlib.buffer import StringBuffer from rpython.rtyper.test.tool import BaseRtypingTest @@ -93,9 +93,13 @@ def check_replace(value, sub, *args, **kwargs): result = kwargs['res'] assert replace(value, sub, *args) == result + assert replace(list(value), sub, *args) == list(result) + count = value.count(sub) + if len(args) >= 2: + count = min(count, args[1]) + assert replace_count(value, sub, *args) == (result, count) + assert replace_count(value, sub, *args, isutf8=True) == (result, count) - assert replace(list(value), sub, *args) == list(result) - check_replace('one!two!three!', '!', '@', 1, res='one at two!three!') check_replace('one!two!three!', '!', '', res='onetwothree') check_replace('one!two!three!', '!', '@', 2, res='one at two@three!') From pypy.commits at gmail.com Fri Dec 1 08:06:23 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 01 Dec 2017 05:06:23 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Get rid of this function (there is codepoints_in_utf8()) Message-ID: <5a2153cf.83871c0a.6d29.2b4c@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93232:a868253965d3 Date: 2017-12-01 14:05 +0100 http://bitbucket.org/pypy/pypy/changeset/a868253965d3/ Log: Get rid of this function (there is codepoints_in_utf8()) diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -253,7 +253,7 @@ else: assert isutf8 from rpython.rlib import rutf8 - cnt = rutf8.compute_length_utf8(input) + 1 + cnt = rutf8.codepoints_in_utf8(input) + 1 if cnt > maxsplit and maxsplit > 0: cnt = maxsplit diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -120,13 +120,6 @@ assert pos >= 0 return pos -def compute_length_utf8(s): - continuation_bytes = 0 - for i in range(len(s)): - if 0x80 <= ord(s[i]) <= 0xBF: # count the continuation bytes - continuation_bytes += 1 - return len(s) - continuation_bytes - def codepoint_at_pos(code, pos): """ Give a codepoint in code at pos - assumes valid utf8, no checking! """ From pypy.commits at gmail.com Fri Dec 1 12:20:13 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 01 Dec 2017 09:20:13 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Start of step 1 Message-ID: <5a218f4d.4195df0a.e85b0.e994@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93233:3c9e5313506f Date: 2017-12-01 18:19 +0100 http://bitbucket.org/pypy/pypy/changeset/3c9e5313506f/ Log: Start of step 1 Making sure that we don't use integer arithmetic uncheckedly on string positions, because they are going to be byte positions with utf8. In-progress. diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -316,14 +316,21 @@ class ExpectedRegularInt(Exception): pass +class NegativeArgumentNotAllowed(Exception): + pass + def check_nonneg(x): """Give a translation-time error if 'x' is not known to be non-negative. To help debugging, this also gives a translation-time error if 'x' is actually typed as an r_uint (in which case the call to check_nonneg() is a bit strange and probably unexpected). """ - assert type(x)(-1) < 0 # otherwise, 'x' is a r_uint or similar - assert x >= 0 + try: + assert type(x)(-1) < 0 # otherwise, 'x' is a r_uint or similar + except NegativeArgumentNotAllowed: + pass + else: + assert x >= 0 return x class Entry(ExtRegistryEntry): diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -83,6 +83,9 @@ def __init__(self, msg): self.msg = msg +class EndOfString(Exception): + pass + class AbstractMatchContext(object): """Abstract base class""" _immutable_fields_ = ['pattern[*]', 'flags', 'end'] @@ -135,8 +138,12 @@ """Similar to str().""" raise NotImplementedError + def debug_check_pos(self, pos): + pass + def get_mark(self, gid): - return find_mark(self.match_marks, gid) + mark = find_mark(self.match_marks, gid) + return self.slowly_convert_byte_pos_to_index(mark) def flatten_marks(self): # for testing @@ -317,7 +324,7 @@ ctx.jitdriver_RepeatOne.jit_merge_point( self=self, ptr=ptr, ctx=ctx, nextppos=nextppos) result = sre_match(ctx, nextppos, ptr, self.start_marks) - ptr -= 1 + ptr = ctx.prev_or_minus1(ptr) if result is not None: self.subresult = result self.start_ptr = ptr @@ -331,28 +338,31 @@ reds=['ptr', 'self', 'ctx'], debugprint=(2, 0)) # indices in 'greens' - def __init__(self, nextppos, ppos3, maxptr, ptr, marks): + def __init__(self, nextppos, ppos3, max_count, ptr, marks): self.nextppos = nextppos self.ppos3 = ppos3 - self.maxptr = maxptr + self.max_count = max_count self.start_ptr = ptr self.start_marks = marks def find_first_result(self, ctx): ptr = self.start_ptr nextppos = self.nextppos + max_count = self.max_count ppos3 = self.ppos3 - while ptr <= self.maxptr: + while max_count >= 0: ctx.jitdriver_MinRepeatOne.jit_merge_point( self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3) result = sre_match(ctx, nextppos, ptr, self.start_marks) if result is not None: self.subresult = result self.start_ptr = ptr + self.max_count = max_count return self if not self.next_char_ok(ctx, ptr, ppos3): break - ptr += 1 + ptr = ctx.next(ptr) + max_count -= 1 def find_next_result(self, ctx): ptr = self.start_ptr @@ -520,6 +530,7 @@ need all results; in that case we use the method move_to_next_result() of the MatchResult.""" while True: + ctx.debug_check_pos(ptr) op = ctx.pat(ppos) ppos += 1 @@ -551,22 +562,25 @@ # if ptr >= ctx.end or rsre_char.is_linebreak(ctx.str(ptr)): return - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_ANY_ALL: # match anything # if ptr >= ctx.end: return - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_ASSERT: # assert subpattern # <0=skip> <1=back> - ptr1 = ptr - ctx.pat(ppos+1) + try: + ptr1 = ctx.prev_n(ptr, ctx.pat(ppos+1), ctx.ZERO) + except EndOfString: + return saved = ctx.fullmatch_only ctx.fullmatch_only = False - stop = ptr1 < 0 or sre_match(ctx, ppos + 2, ptr1, marks) is None + stop = sre_match(ctx, ppos + 2, ptr1, marks) is None ctx.fullmatch_only = saved if stop: return @@ -576,14 +590,17 @@ elif op == OPCODE_ASSERT_NOT: # assert not subpattern # <0=skip> <1=back> - ptr1 = ptr - ctx.pat(ppos+1) - saved = ctx.fullmatch_only - ctx.fullmatch_only = False - stop = (ptr1 >= 0 and sre_match(ctx, ppos + 2, ptr1, marks) - is not None) - ctx.fullmatch_only = saved - if stop: - return + try: + ptr1 = ctx.prev_n(ptr, ctx.pat(ppos+1), ctx.ZERO) + except EndOfString: + pass + else: + saved = ctx.fullmatch_only + ctx.fullmatch_only = False + stop = sre_match(ctx, ppos + 2, ptr1, marks) is not None + ctx.fullmatch_only = saved + if stop: + return ppos += ctx.pat(ppos) elif op == OPCODE_AT: @@ -661,7 +678,7 @@ elif op == OPCODE_INFO: # optimization info block # <0=skip> <1=flags> <2=min> ... - if (ctx.end - ptr) < ctx.pat(ppos+2): + if ctx.maximum_distance(ptr, ctx.end) < ctx.pat(ppos+2): return ppos += ctx.pat(ppos) @@ -674,7 +691,7 @@ if ptr >= ctx.end or ctx.str(ptr) != ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_LITERAL_IGNORE: # match literal string, ignoring case @@ -743,8 +760,9 @@ # use the MAX_REPEAT operator. # <1=min> <2=max> item tail start = ptr - minptr = start + ctx.pat(ppos+1) - if minptr > ctx.end: + try: + minptr = ctx.next_n(start, ctx.pat(ppos+1), ctx.end) + except EndOfString: return # cannot match ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2), marks) @@ -765,7 +783,7 @@ start = ptr min = ctx.pat(ppos+1) if min > 0: - minptr = ptr + min + min_count = ptr + min if minptr > ctx.end: return # cannot match # count using pattern min as the maximum @@ -773,14 +791,12 @@ if ptr < minptr: return # did not match minimum number of times - maxptr = ctx.end + max_count = sys.maxint max = ctx.pat(ppos+2) if max != rsre_char.MAXREPEAT: - maxptr1 = start + max - if maxptr1 <= maxptr: - maxptr = maxptr1 + max_count = max nextppos = ppos + ctx.pat(ppos) - result = MinRepeatOneMatchResult(nextppos, ppos+3, maxptr, + result = MinRepeatOneMatchResult(nextppos, ppos+3, max_count, ptr, marks) return result.find_first_result(ctx) @@ -818,7 +834,7 @@ @specializectx def find_repetition_end(ctx, ppos, ptr, maxcount, marks): end = ctx.end - ptrp1 = ptr + 1 + ptrp1 = ctx.next(ptr) # First get rid of the cases where we don't have room for any match. if maxcount <= 0 or ptrp1 > end: return ptr @@ -904,7 +920,7 @@ ctx.jitdriver_MatchIn.jit_merge_point(ctx=ctx, ptr=ptr, end=end, ppos=ppos) if ptr < end and checkerfn(ctx, ptr, ppos): - ptr += 1 + ptr = ctx.next(ptr) else: return ptr elif checkerfn == match_IN_IGNORE: @@ -927,7 +943,7 @@ @specializectx def fre(ctx, ptr, end, ppos): while ptr < end and checkerfn(ctx, ptr, ppos): - ptr += 1 + ptr = ctx.next(ptr) return ptr fre = func_with_new_name(fre, 'fre_' + checkerfn.__name__) return fre diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rsre/test/support.py @@ -0,0 +1,102 @@ +import sys, random +from rpython.rlib import debug +from rpython.rlib.rsre.rsre_core import _adjust, match_context +from rpython.rlib.rsre.rsre_core import StrMatchContext, EndOfString + + +class Position(object): + def __init__(self, p): + assert isinstance(p, int) + if p < 0: + raise debug.NegativeArgumentNotAllowed( + "making a Position with byte index %r" % p) + self._p = p + def __repr__(self): + return '' % (self._p) + def __cmp__(self, other): + if not isinstance(other, (Position, MinusOnePosition)): + raise TypeError("cannot compare %r with %r" % (self, other)) + return cmp(self._p, other._p) + +class MinusOnePosition(object): + _p = -1 + def __repr__(self): + return '' + def __cmp__(self, other): + if not isinstance(other, (Position, MinusOnePosition)): + raise TypeError("cannot compare %r with %r" % (self, other)) + return cmp(self._p, other._p) + + +class MatchContextForTests(StrMatchContext): + """Concrete subclass for matching in a plain string, tweaked for tests""" + + ZERO = Position(0) + MINUS1 = MinusOnePosition() + EXACT_DISTANCE = False + + def next(self, position): + assert isinstance(position, Position) + return Position(position._p + 1) + + def prev_or_minus1(self, position): + assert isinstance(position, Position) + if position._p == 0: + return self.MINUS1 + return Position(position._p - 1) + + def next_n(self, position, n, end_position): + assert isinstance(position, Position) + assert isinstance(end_position, Position) + assert position._p <= end_position._p + r = position._p + n + if r > end_position._p: + raise EndOfString + return Position(r) + + def prev_n(self, position, n, start_position): + assert isinstance(position, Position) + assert isinstance(start_position, Position) + assert position._p >= start_position._p + r = position._p - n + if r < start_position._p: + raise EndOfString + return Position(r) + + def slowly_convert_byte_pos_to_index(self, position): + assert isinstance(position, Position) + return position._p + + def str(self, position): + assert isinstance(position, Position) + return ord(self._string[position._p]) + + def debug_check_pos(self, position): + assert isinstance(position, Position) + + #def minimum_distance(self, position_low, position_high): + # """Return an estimate. The real value may be higher.""" + # assert isinstance(position_low, Position) + # assert isinstance(position_high, Position) + # dist = position_high._p - position_low._p + # if dist == 0: + # return 0 + # return random.randrange(1, dist + 1) + + def maximum_distance(self, position_low, position_high): + """Return an estimate. The real value may be lower.""" + assert isinstance(position_low, Position) + assert isinstance(position_high, Position) + return position_high._p - position_low._p + random.randrange(0, 10) + + +def match(pattern, string, start=0, end=sys.maxint, flags=0, fullmatch=False): + start, end = _adjust(start, end, len(string)) + start = Position(start) + end = Position(end) + ctx = MatchContextForTests(pattern, string, start, end, flags) + ctx.fullmatch_only = fullmatch + if match_context(ctx): + return ctx + else: + return None diff --git a/rpython/rlib/rsre/test/test_match.py b/rpython/rlib/rsre/test/test_match.py --- a/rpython/rlib/rsre/test/test_match.py +++ b/rpython/rlib/rsre/test/test_match.py @@ -1,6 +1,7 @@ import re, random, py -from rpython.rlib.rsre import rsre_core, rsre_char +from rpython.rlib.rsre import rsre_char from rpython.rlib.rsre.rpy import get_code, VERSION +from rpython.rlib.rsre.test.support import match def get_code_and_re(regexp): @@ -16,61 +17,61 @@ def test_or(self): r = get_code(r"a|bc|def") - assert rsre_core.match(r, "a") - assert rsre_core.match(r, "bc") - assert rsre_core.match(r, "def") - assert not rsre_core.match(r, "ghij") + assert match(r, "a") + assert match(r, "bc") + assert match(r, "def") + assert not match(r, "ghij") def test_any(self): r = get_code(r"ab.cd") - assert rsre_core.match(r, "abXcdef") - assert not rsre_core.match(r, "ab\ncdef") - assert not rsre_core.match(r, "abXcDef") + assert match(r, "abXcdef") + assert not match(r, "ab\ncdef") + assert not match(r, "abXcDef") def test_any_repetition(self): r = get_code(r"ab.*cd") - assert rsre_core.match(r, "abXXXXcdef") - assert rsre_core.match(r, "abcdef") - assert not rsre_core.match(r, "abX\nXcdef") - assert not rsre_core.match(r, "abXXXXcDef") + assert match(r, "abXXXXcdef") + assert match(r, "abcdef") + assert not match(r, "abX\nXcdef") + assert not match(r, "abXXXXcDef") def test_any_all(self): r = get_code(r"(?s)ab.cd") - assert rsre_core.match(r, "abXcdef") - assert rsre_core.match(r, "ab\ncdef") - assert not rsre_core.match(r, "ab\ncDef") + assert match(r, "abXcdef") + assert match(r, "ab\ncdef") + assert not match(r, "ab\ncDef") def test_any_all_repetition(self): r = get_code(r"(?s)ab.*cd") - assert rsre_core.match(r, "abXXXXcdef") - assert rsre_core.match(r, "abcdef") - assert rsre_core.match(r, "abX\nXcdef") - assert not rsre_core.match(r, "abX\nXcDef") + assert match(r, "abXXXXcdef") + assert match(r, "abcdef") + assert match(r, "abX\nXcdef") + assert not match(r, "abX\nXcDef") def test_assert(self): r = get_code(r"abc(?=def)(.)") - res = rsre_core.match(r, "abcdefghi") + res = match(r, "abcdefghi") assert res is not None and res.get_mark(1) == 4 - assert not rsre_core.match(r, "abcdeFghi") + assert not match(r, "abcdeFghi") def test_assert_not(self): r = get_code(r"abc(?!def)(.)") - res = rsre_core.match(r, "abcdeFghi") + res = match(r, "abcdeFghi") assert res is not None and res.get_mark(1) == 4 - assert not rsre_core.match(r, "abcdefghi") + assert not match(r, "abcdefghi") def test_lookbehind(self): r = get_code(r"([a-z]*)(?<=de)") - assert rsre_core.match(r, "ade") - res = rsre_core.match(r, "adefg") + assert match(r, "ade") + res = match(r, "adefg") assert res is not None and res.get_mark(1) == 3 - assert not rsre_core.match(r, "abc") - assert not rsre_core.match(r, "X") - assert not rsre_core.match(r, "eX") + assert not match(r, "abc") + assert not match(r, "X") + assert not match(r, "eX") def test_negative_lookbehind(self): def found(s): - res = rsre_core.match(r, s) + res = match(r, s) assert res is not None return res.get_mark(1) r = get_code(r"([a-z]*)(? OPCODE_RANGE_IGNORE - assert rsre_core.match(r, u"\U00010428") + assert match(r, u"\U00010428") From pypy.commits at gmail.com Fri Dec 1 14:07:37 2017 From: pypy.commits at gmail.com (pjenvey) Date: Fri, 01 Dec 2017 11:07:37 -0800 (PST) Subject: [pypy-commit] pypy default: fill in struct_rusage's full name Message-ID: <5a21a879.4fabdf0a.ecada.458f@mx.google.com> Author: Philip Jenvey Branch: Changeset: r93234:49077d47e615 Date: 2017-12-01 11:06 -0800 http://bitbucket.org/pypy/pypy/changeset/49077d47e615/ Log: fill in struct_rusage's full name diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") From pypy.commits at gmail.com Fri Dec 1 16:17:26 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 01 Dec 2017 13:17:26 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix handling of arguments containing null bytes in zipimporter methods Message-ID: <5a21c6e6.c7a4df0a.6e59d.ba0f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93235:e6985c577de2 Date: 2017-12-01 21:16 +0000 http://bitbucket.org/pypy/pypy/changeset/e6985c577de2/ Log: Fix handling of arguments containing null bytes in zipimporter methods diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -292,7 +292,7 @@ ext + PYC_TAG + '.pyc') return result -#@signature(types.str0(), returns=types.str0()) + at signature(types.str0(), returns=types.any()) def make_source_pathname(pathname): "Given the path to a .pyc file, return the path to its .py file." # (...)/__pycache__/foo..pyc -> (...)/foo.py diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py --- a/pypy/module/zipimport/interp_zipimport.py +++ b/pypy/module/zipimport/interp_zipimport.py @@ -1,6 +1,15 @@ import os import stat +from rpython.annotator.model import s_Str0 +from rpython.rlib.objectmodel import enforceargs +from rpython.rlib.unroll import unrolling_iterable +from rpython.rlib.rzipfile import RZipFile, BadZipfile +from rpython.rlib.rzlib import RZlibError +from rpython.rlib.rstring import assert_str0 +from rpython.rlib.signature import signature, finishsigs +from rpython.rlib import types + from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec @@ -8,9 +17,6 @@ from pypy.interpreter.module import Module from pypy.module.imp import importing from pypy.module.zlib.interp_zlib import zlib_error -from rpython.rlib.unroll import unrolling_iterable -from rpython.rlib.rzipfile import RZipFile, BadZipfile -from rpython.rlib.rzlib import RZlibError ZIPSEP = '/' # note that zipfiles always use slash, but for OSes with other @@ -116,6 +122,7 @@ zip_cache = W_ZipCache() + at finishsigs class W_ZipImporter(W_Root): def __init__(self, space, name, filename, zip_file, prefix): self.space = space @@ -138,12 +145,14 @@ filename = filename.replace(os.path.sep, ZIPSEP) return filename + @signature(types.self(), types.str0(), returns=types.str0()) def corr_zname(self, fname): if ZIPSEP != os.path.sep: return fname.replace(ZIPSEP, os.path.sep) else: return fname + @enforceargs(filename=s_Str0, typecheck=False) def import_py_file(self, space, modname, filename, buf, pkgpath): w_mod = Module(space, space.newtext(modname)) real_name = self.filename + os.path.sep + self.corr_zname(filename) @@ -194,20 +203,21 @@ return False return True + @enforceargs(filename=s_Str0, typecheck=False) def import_pyc_file(self, space, modname, filename, buf, pkgpath): magic = importing._get_long(buf[:4]) timestamp = importing._get_long(buf[4:8]) if not self.can_use_pyc(space, filename, magic, timestamp): return None # zipimport ignores the size field - buf = buf[12:] # XXX ugly copy, should use sequential read instead + buf = buf[12:] # XXX ugly copy, should use sequential read instead w_mod = Module(space, space.newtext(modname)) real_name = self.filename + os.path.sep + self.corr_zname(filename) space.setattr(w_mod, space.newtext('__loader__'), self) importing._prepare_module(space, w_mod, real_name, pkgpath) - result = importing.load_compiled_module(space, space.newtext(modname), w_mod, - real_name, magic, timestamp, - buf) + result = importing.load_compiled_module( + space, space.newtext(modname), + w_mod, real_name, magic, timestamp, buf) return result def have_modulefile(self, space, filename): @@ -227,14 +237,14 @@ return self def make_filename(self, fullname): - startpos = fullname.rfind('.') + 1 # 0 when not found + startpos = fullname.rfind('.') + 1 # 0 when not found assert startpos >= 0 subname = fullname[startpos:] if ZIPSEP == os.path.sep: return self.prefix + subname.replace('.', '/') else: - return self.prefix.replace(os.path.sep, ZIPSEP) + \ - subname.replace('.', '/') + return (self.prefix.replace(os.path.sep, ZIPSEP) + + subname.replace('.', '/')) def make_co_filename(self, filename): """ @@ -248,6 +258,12 @@ fullname = space.text_w(w_fullname) filename = self.make_filename(fullname) for compiled, is_package, ext in ENUMERATE_EXTS: + if '\x00' in filename: + # Special case to make the annotator happy: + # filenames inside ZIPs shouldn't contain NULs so no module can + # possibly be found in this case + break + filename = assert_str0(filename) fname = filename + ext try: buf = self.zip_file.read(fname) @@ -302,6 +318,12 @@ fullname = space.text_w(w_fullname) filename = self.make_filename(fullname) for compiled, _, ext in ENUMERATE_EXTS: + if '\x00' in filename: + # Special case to make the annotator happy: + # filenames inside ZIPs shouldn't contain NULs so no module can + # possibly be found in this case + break + filename = assert_str0(filename) if self.have_modulefile(space, filename + ext): w_source = self.get_data(space, filename + ext) source = space.bytes_w(w_source) @@ -328,6 +350,12 @@ filename = self.make_filename(fullname) found = False for compiled, _, ext in ENUMERATE_EXTS: + if '\x00' in filename: + # Special case to make the annotator happy: + # filenames inside ZIPs shouldn't contain NULs so no module can + # possibly be found in this case + break + filename = assert_str0(filename) fname = filename + ext if self.have_modulefile(space, fname): if not compiled: @@ -349,6 +377,12 @@ fullname = space.text_w(w_fullname) filename = self.make_filename(fullname) for _, is_package, ext in ENUMERATE_EXTS: + if '\x00' in filename: + # Special case to make the annotator happy: + # filenames inside ZIPs shouldn't contain NULs so no module can + # possibly be found in this case + break + filename = assert_str0(filename) if self.have_modulefile(space, filename + ext): return space.newfilename(self.filename + os.path.sep + self.corr_zname(filename + ext)) @@ -361,6 +395,12 @@ fullname = space.text_w(w_fullname) filename = self.make_filename(fullname) for _, is_package, ext in ENUMERATE_EXTS: + if '\x00' in filename: + # Special case to make the annotator happy: + # filenames inside ZIPs shouldn't contain NULs so no module can + # possibly be found in this case + break + filename = assert_str0(filename) if self.have_modulefile(space, filename + ext): return space.newbool(is_package) raise oefmt(get_error(space), @@ -373,7 +413,14 @@ return space.newfilename(self.filename) def _find_loader(self, space, fullname): + if '\x00' in fullname: + # Special case to make the annotator happy: + # filenames inside ZIPs shouldn't contain NULs so no module can + # possibly be found in this case + return False, None + fullname = assert_str0(fullname) filename = self.make_filename(fullname) + filename = assert_str0(filename) for _, _, ext in ENUMERATE_EXTS: if self.have_modulefile(space, filename + ext): return True, None diff --git a/pypy/module/zipimport/test/test_zipimport.py b/pypy/module/zipimport/test/test_zipimport.py --- a/pypy/module/zipimport/test/test_zipimport.py +++ b/pypy/module/zipimport/test/test_zipimport.py @@ -394,10 +394,19 @@ assert z.get_code('ä') raises(ImportError, z.get_code, 'xx') mod = z.load_module('ä') + #assert z.load_module('ä') is mod assert z.get_filename('ä') == mod.__file__ raises(ImportError, z.load_module, 'xx') raises(ImportError, z.get_filename, 'xx') assert z.archive == self.zipfile + # PyPy fix: check null byte behavior: + import sys + if '__pypy__' in sys.builtin_module_names: + raises(ImportError, z.is_package, 'ä\0 b') + raises(ImportError, z.get_source, 'ä\0 b') + raises(ImportError, z.get_code, 'ä\0 b') + raises(ImportError, z.load_module, 'ä\0 b') + raises(ImportError, z.get_filename, 'ä\0 b') def test_co_filename(self): self.writefile('mymodule.py', """ From pypy.commits at gmail.com Fri Dec 1 22:12:50 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 01 Dec 2017 19:12:50 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix: StringIO.seek() may set the position beyond the end of the buffer Message-ID: <5a221a32.923e1c0a.49364.1670@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93236:981544a8f028 Date: 2017-12-02 03:12 +0000 http://bitbucket.org/pypy/pypy/changeset/981544a8f028/ Log: fix: StringIO.seek() may set the position beyond the end of the buffer diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -35,7 +35,8 @@ def _convert_limit(self, limit): if limit < 0 or limit > len(self.data) - self.pos: limit = len(self.data) - self.pos - assert limit >= 0 + if limit < 0: # happens when self.pos > len(self.data) + limit = 0 return limit def readline_universal(self, limit): diff --git a/pypy/module/_io/test/test_stringio.py b/pypy/module/_io/test/test_stringio.py --- a/pypy/module/_io/test/test_stringio.py +++ b/pypy/module/_io/test/test_stringio.py @@ -259,6 +259,8 @@ assert line == s i += 1 assert i == 10 + sio.seek(len(s) * 10 +1) + assert list(sio) == [] sio = io.StringIO(s * 2) sio.close() raises(ValueError, next, sio) From pypy.commits at gmail.com Fri Dec 1 22:53:18 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 01 Dec 2017 19:53:18 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Disable a test that seems to segfault for unclear reasons Message-ID: <5a2223ae.0485df0a.e1bb0.583e@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93237:9db39ab84107 Date: 2017-12-02 03:52 +0000 http://bitbucket.org/pypy/pypy/changeset/9db39ab84107/ Log: Disable a test that seems to segfault for unclear reasons diff --git a/pypy/module/cpyext/test/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py --- a/pypy/module/cpyext/test/test_memoryobject.py +++ b/pypy/module/cpyext/test/test_memoryobject.py @@ -66,6 +66,7 @@ result = module.fillinfo() assert b"hello, world." == result + @pytest.mark.skip(reason="segfaults on linux buildslave") def test_0d(self): module = self.import_extension('foo', [ ("create_view", "METH_VARARGS", From pypy.commits at gmail.com Fri Dec 1 23:40:55 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 01 Dec 2017 20:40:55 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix test Message-ID: <5a222ed7.90a9df0a.6473d.4720@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93238:c31995f66da2 Date: 2017-12-02 04:40 +0000 http://bitbucket.org/pypy/pypy/changeset/c31995f66da2/ Log: fix test diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py --- a/pypy/module/cpyext/test/test_typeobject.py +++ b/pypy/module/cpyext/test/test_typeobject.py @@ -454,6 +454,7 @@ assert module.tp_descr_set(p) is True def test_text_signature(self): + import sys module = self.import_module(name='docstrings') assert module.SomeType.__text_signature__ == '()' assert module.SomeType.__doc__ == 'A type with a signature' From pypy.commits at gmail.com Fri Dec 1 23:54:44 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 01 Dec 2017 20:54:44 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix tests Message-ID: <5a223214.8b951c0a.29a92.2d8f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93239:13acefc43cbf Date: 2017-12-02 04:54 +0000 http://bitbucket.org/pypy/pypy/changeset/13acefc43cbf/ Log: fix tests diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -504,9 +504,9 @@ def test_encode_fsdefault(self, space): w_u = space.wrap(u'späm') - w_s = PyUnicode_EncodeFSDefault(space, w_u) - if w_s is None: - PyErr_Clear(space) + try: + w_s = PyUnicode_EncodeFSDefault(space, w_u) + except OperationError: py.test.skip("Requires a unicode-aware fsencoding") with rffi.scoped_str2charp(space.str_w(w_s)) as encoded: w_decoded = PyUnicode_DecodeFSDefaultAndSize(space, encoded, space.len_w(w_s)) @@ -623,8 +623,11 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unwrap( - PyUnicode_FromObject(space, space.newbytes('test'))) == "b'test'" + with raises_w(space, TypeError): + PyUnicode_FromObject(space, space.newbytes('test')) + with raises_w(space, TypeError): + PyUnicode_FromObject(space, space.newint(42)) + def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') From pypy.commits at gmail.com Sat Dec 2 00:10:05 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 01 Dec 2017 21:10:05 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix rffi nonsense Message-ID: <5a2235ad.0ab8df0a.762be.6f1c@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93240:9847200316ed Date: 2017-12-02 05:09 +0000 http://bitbucket.org/pypy/pypy/changeset/9847200316ed/ Log: fix rffi nonsense diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -700,8 +700,8 @@ value = 1 else: value = 0 - pendian = lltype.malloc(rffi.INTP.TO, 1, flavor='raw') - pendian[0] = rffi.cast(rffi.INT, value) + pendian = lltype.malloc(INT_realP.TO, 1, flavor='raw') + pendian[0] = rffi.cast(rffi.INT_real, value) else: pendian = None @@ -736,8 +736,8 @@ value = 1 else: value = 0 - pendian = lltype.malloc(rffi.INTP.TO, 1, flavor='raw') - pendian[0] = rffi.cast(rffi.INT, value) + pendian = lltype.malloc(INT_realP.TO, 1, flavor='raw') + pendian[0] = rffi.cast(rffi.INT_real, value) else: pendian = None From pypy.commits at gmail.com Sun Dec 3 09:13:16 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 06:13:16 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: in-progress Message-ID: <5a24067c.4dd91c0a.1ae71.575e@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93241:87a98889b109 Date: 2017-12-03 15:12 +0100 http://bitbucket.org/pypy/pypy/changeset/87a98889b109/ Log: in-progress diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -324,7 +324,10 @@ ctx.jitdriver_RepeatOne.jit_merge_point( self=self, ptr=ptr, ctx=ctx, nextppos=nextppos) result = sre_match(ctx, nextppos, ptr, self.start_marks) - ptr = ctx.prev_or_minus1(ptr) + try: + ptr = ctx.prev(ptr) + except EndOfString: + ptr = -1 if result is not None: self.subresult = result self.start_ptr = ptr @@ -440,12 +443,12 @@ min = ctx.pat(ppos+1) if enum is not None: # matched one more 'item'. record it and continue. - last_match_length = ctx.match_end - ptr + last_match_zero_length = (ctx.match_end == ptr) self.pending = Pending(ptr, marks, enum, self.pending) self.num_pending += 1 ptr = ctx.match_end marks = ctx.match_marks - if last_match_length == 0 and self.num_pending >= min: + if last_match_zero_length and self.num_pending >= min: # zero-width protection: after an empty match, if there # are enough matches, don't try to match more. Instead, # fall through to trying to match 'tail'. @@ -629,30 +632,30 @@ elif op == OPCODE_GROUPREF: # match backreference # - startptr, length = get_group_ref(marks, ctx.pat(ppos)) - if length < 0: + startptr, length_bytes = get_group_ref(ctx, marks, ctx.pat(ppos)) + if length_bytes < 0: return # group was not previously defined - if not match_repeated(ctx, ptr, startptr, length): + if not match_repeated(ctx, ptr, startptr, length_bytes): return # no match - ptr += length + ptr = ctx.go_forward_by_bytes(ptr, length_bytes) ppos += 1 elif op == OPCODE_GROUPREF_IGNORE: # match backreference # - startptr, length = get_group_ref(marks, ctx.pat(ppos)) - if length < 0: + startptr, length_bytes = get_group_ref(ctx, marks, ctx.pat(ppos)) + if length_bytes < 0: return # group was not previously defined - if not match_repeated_ignore(ctx, ptr, startptr, length): + ptr = match_repeated_ignore(ctx, ptr, startptr, length_bytes) + if ptr < ctx.ZERO: return # no match - ptr += length ppos += 1 elif op == OPCODE_GROUPREF_EXISTS: # conditional match depending on the existence of a group # codeyes codeno ... - _, length = get_group_ref(marks, ctx.pat(ppos)) - if length >= 0: + _, length_bytes = get_group_ref(ctx, marks, ctx.pat(ppos)) + if length_bytes >= 0: ppos += 2 # jump to 'codeyes' else: ppos += ctx.pat(ppos+1) # jump to 'codeno' @@ -664,7 +667,7 @@ ctx.str(ptr)): return ppos += ctx.pat(ppos) - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_IN_IGNORE: # match set member (or non_member), ignoring case @@ -673,7 +676,7 @@ ctx.lowstr(ptr)): return ppos += ctx.pat(ppos) - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_INFO: # optimization info block @@ -699,7 +702,7 @@ if ptr >= ctx.end or ctx.lowstr(ptr) != ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_MARK: # set mark @@ -804,32 +807,36 @@ raise Error("bad pattern code %d" % op) -def get_group_ref(marks, groupnum): +def get_group_ref(ctx, marks, groupnum): gid = groupnum * 2 startptr = find_mark(marks, gid) - if startptr < 0: + if startptr < ctx.ZERO: return 0, -1 endptr = find_mark(marks, gid + 1) - length = endptr - startptr # < 0 if endptr < startptr (or if endptr=-1) - return startptr, length + length_bytes = ctx.bytes_difference(endptr, startptr) + # < 0 if endptr < startptr (or if endptr=-1) + return startptr, length_bytes @specializectx -def match_repeated(ctx, ptr, oldptr, length): - if ptr + length > ctx.end: +def match_repeated(ctx, ptr, oldptr, length_bytes): + if ctx.bytes_difference(ctx.end, ptr) < length_bytes: return False - for i in range(length): - if ctx.str(ptr + i) != ctx.str(oldptr + i): + for i in range(length_bytes): + if ctx.get_single_byte(ptr, i) != ctx.get_single_byte(oldptr, i): return False return True @specializectx -def match_repeated_ignore(ctx, ptr, oldptr, length): - if ptr + length > ctx.end: - return False - for i in range(length): - if ctx.lowstr(ptr + i) != ctx.lowstr(oldptr + i): - return False - return True +def match_repeated_ignore(ctx, ptr, oldptr, length_bytes): + oldend = ctx.go_forward_by_bytes(oldptr, length_bytes) + while oldptr < oldend: + if ptr >= ctx.end: + return -1 + if ctx.lowstr(ptr) != ctx.lowstr(oldptr): + return -1 + ptr = ctx.next(ptr) + oldptr = ctx.next(oldptr) + return ptr @specializectx def find_repetition_end(ctx, ppos, ptr, maxcount, marks): @@ -934,7 +941,7 @@ ctx.jitdriver_MatchInIgnore.jit_merge_point(ctx=ctx, ptr=ptr, end=end, ppos=ppos) if ptr < end and checkerfn(ctx, ptr, ppos): - ptr += 1 + ptr = ctx.next(ptr) else: return ptr else: @@ -996,9 +1003,8 @@ return at_non_boundary(ctx, ptr) elif atcode == AT_END: - remaining_chars = ctx.end - ptr - return remaining_chars <= 0 or ( - remaining_chars == 1 and rsre_char.is_linebreak(ctx.str(ptr))) + return (ptr == ctx.end or + (ctx.next(ptr) == ctx.end and rsre_char.is_linebreak(ctx.str(ptr)))) elif atcode == AT_END_LINE: return ptr == ctx.end or rsre_char.is_linebreak(ctx.str(ptr)) diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -14,35 +14,27 @@ def __repr__(self): return '' % (self._p) def __cmp__(self, other): - if not isinstance(other, (Position, MinusOnePosition)): - raise TypeError("cannot compare %r with %r" % (self, other)) - return cmp(self._p, other._p) - -class MinusOnePosition(object): - _p = -1 - def __repr__(self): - return '' - def __cmp__(self, other): - if not isinstance(other, (Position, MinusOnePosition)): - raise TypeError("cannot compare %r with %r" % (self, other)) - return cmp(self._p, other._p) + if isinstance(other, Position): + return cmp(self._p, other._p) + if type(other) is int and other == -1: + return cmp(self._p, -1) + raise TypeError("cannot compare %r with %r" % (self, other)) class MatchContextForTests(StrMatchContext): """Concrete subclass for matching in a plain string, tweaked for tests""" ZERO = Position(0) - MINUS1 = MinusOnePosition() EXACT_DISTANCE = False def next(self, position): assert isinstance(position, Position) return Position(position._p + 1) - def prev_or_minus1(self, position): + def prev(self, position): assert isinstance(position, Position) if position._p == 0: - return self.MINUS1 + raise EndOfString return Position(position._p - 1) def next_n(self, position, n, end_position): @@ -89,6 +81,21 @@ assert isinstance(position_high, Position) return position_high._p - position_low._p + random.randrange(0, 10) + def bytes_difference(self, position1, position2): + assert isinstance(position1, Position) + assert isinstance(position2, Position) + return position1._p - position2._p + + def get_single_byte(self, base_position, index): + assert isinstance(base_position, Position) + assert isinstance(index, int) + return ord(self._string[base_position._p + index]) + + def go_forward_by_bytes(self, base_position, index): + assert isinstance(base_position, Position) + assert isinstance(index, int) + return Position(base_position._p + index) + def match(pattern, string, start=0, end=sys.maxint, flags=0, fullmatch=False): start, end = _adjust(start, end, len(string)) From pypy.commits at gmail.com Sun Dec 3 09:21:22 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 06:21:22 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: in-progress. test_match passes again Message-ID: <5a240862.e1acdf0a.492d7.a594@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93242:dd8e0cdfa795 Date: 2017-12-03 15:20 +0100 http://bitbucket.org/pypy/pypy/changeset/dd8e0cdfa795/ Log: in-progress. test_match passes again diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -149,7 +149,8 @@ # for testing if self.match_marks_flat is None: self._compute_flattened_marks() - return self.match_marks_flat + return [self.slowly_convert_byte_pos_to_index(i) + for i in self.match_marks_flat] def _compute_flattened_marks(self): self.match_marks_flat = [self.match_start, self.match_end] @@ -371,7 +372,7 @@ ptr = self.start_ptr if not self.next_char_ok(ctx, ptr, self.ppos3): return - self.start_ptr = ptr + 1 + self.start_ptr = ctx.next(ptr) return self.find_first_result(ctx) def next_char_ok(self, ctx, ptr, ppos): @@ -717,7 +718,7 @@ if ptr >= ctx.end or ctx.str(ptr) == ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_NOT_LITERAL_IGNORE: # match if it's not a literal string, ignoring case @@ -725,7 +726,7 @@ if ptr >= ctx.end or ctx.lowstr(ptr) == ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_REPEAT: # general repeat. in this version of the re module, all the work @@ -786,9 +787,10 @@ start = ptr min = ctx.pat(ppos+1) if min > 0: - min_count = ptr + min - if minptr > ctx.end: - return # cannot match + try: + minptr = ctx.next_n(ptr, min, ctx.end) + except EndOfString: + return # cannot match # count using pattern min as the maximum ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks) if ptr < minptr: @@ -990,11 +992,12 @@ def sre_at(ctx, atcode, ptr): if (atcode == AT_BEGINNING or atcode == AT_BEGINNING_STRING): - return ptr == 0 + return ptr == ctx.ZERO elif atcode == AT_BEGINNING_LINE: - prevptr = ptr - 1 - return prevptr < 0 or rsre_char.is_linebreak(ctx.str(prevptr)) + if ptr <= ctx.ZERO: + return True + return rsre_char.is_linebreak(ctx.str(ctx.prev(ptr))) elif atcode == AT_BOUNDARY: return at_boundary(ctx, ptr) diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -56,6 +56,8 @@ return Position(r) def slowly_convert_byte_pos_to_index(self, position): + if type(position) is int and position == -1: + return -1 assert isinstance(position, Position) return position._p @@ -107,3 +109,6 @@ return ctx else: return None + +def fullmatch(pattern, string, start=0, end=sys.maxint, flags=0): + return match(pattern, string, start, end, flags, fullmatch=True) diff --git a/rpython/rlib/rsre/test/test_match.py b/rpython/rlib/rsre/test/test_match.py --- a/rpython/rlib/rsre/test/test_match.py +++ b/rpython/rlib/rsre/test/test_match.py @@ -1,7 +1,7 @@ import re, random, py from rpython.rlib.rsre import rsre_char from rpython.rlib.rsre.rpy import get_code, VERSION -from rpython.rlib.rsre.test.support import match +from rpython.rlib.rsre.test.support import match, fullmatch, Position def get_code_and_re(regexp): @@ -267,7 +267,7 @@ print r m = match(r, "abbbbbbbbbcdef") assert m - assert m.match_end == 11 + assert m.match_end == Position(11) def test_empty_maxuntil(self): r = get_code("\\{\\{((?:.*?)+)\\}\\}") @@ -276,30 +276,30 @@ def test_fullmatch_1(self): r = get_code(r"ab*c") - assert not rsre_core.fullmatch(r, "abbbcdef") - assert rsre_core.fullmatch(r, "abbbc") + assert not fullmatch(r, "abbbcdef") + assert fullmatch(r, "abbbc") def test_fullmatch_2(self): r = get_code(r"a(b*?)") - match = rsre_core.fullmatch(r, "abbb") + match = fullmatch(r, "abbb") assert match.group(1) == "bbb" - assert not rsre_core.fullmatch(r, "abbbc") + assert not fullmatch(r, "abbbc") def test_fullmatch_3(self): r = get_code(r"a((bp)*?)c") - match = rsre_core.fullmatch(r, "abpbpbpc") + match = fullmatch(r, "abpbpbpc") assert match.group(1) == "bpbpbp" def test_fullmatch_4(self): r = get_code(r"a((bp)*)c") - match = rsre_core.fullmatch(r, "abpbpbpc") + match = fullmatch(r, "abpbpbpc") assert match.group(1) == "bpbpbp" def test_fullmatch_assertion(self): r = get_code(r"(?=a).b") - assert rsre_core.fullmatch(r, "ab") + assert fullmatch(r, "ab") r = get_code(r"(?!a)..") - assert not rsre_core.fullmatch(r, "ab") + assert not fullmatch(r, "ab") def test_range_ignore(self): from rpython.rlib.unicodedata import unicodedb From pypy.commits at gmail.com Sun Dec 3 09:24:36 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 06:24:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: fix for test_ext_opcode Message-ID: <5a240924.8a5b1c0a.b482d.a785@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93243:19e4a5fa4aa4 Date: 2017-12-03 15:24 +0100 http://bitbucket.org/pypy/pypy/changeset/19e4a5fa4aa4/ Log: fix for test_ext_opcode diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -627,7 +627,7 @@ if (ptr == ctx.end or not rsre_char.category_dispatch(ctx.pat(ppos), ctx.str(ptr))): return - ptr += 1 + ptr = ctx.next(ptr) ppos += 1 elif op == OPCODE_GROUPREF: @@ -887,7 +887,7 @@ if end1 <= end: end = end1 while ptr < end and sre_match(ctx, ppos, ptr, marks) is not None: - ptr += 1 + ptr = ctx.next(ptr) return ptr @specializectx diff --git a/rpython/rlib/rsre/test/test_ext_opcode.py b/rpython/rlib/rsre/test/test_ext_opcode.py --- a/rpython/rlib/rsre/test/test_ext_opcode.py +++ b/rpython/rlib/rsre/test/test_ext_opcode.py @@ -5,6 +5,7 @@ from rpython.rlib.rsre import rsre_core from rpython.rlib.rsre.rsre_char import MAXREPEAT +from rpython.rlib.rsre.test.support import match, Position # import OPCODE_XX as XX for name, value in rsre_core.__dict__.items(): @@ -17,10 +18,10 @@ # it's a valid optimization because \1 is always one character long r = [MARK, 0, ANY, MARK, 1, REPEAT_ONE, 6, 0, MAXREPEAT, GROUPREF, 0, SUCCESS, SUCCESS] - assert rsre_core.match(r, "aaa").match_end == 3 + assert match(r, "aaa").match_end == Position(3) def test_min_repeat_one_with_backref(): # Python 3.5 compiles "(.)\1*?b" using MIN_REPEAT_ONE r = [MARK, 0, ANY, MARK, 1, MIN_REPEAT_ONE, 6, 0, MAXREPEAT, GROUPREF, 0, SUCCESS, LITERAL, 98, SUCCESS] - assert rsre_core.match(r, "aaab").match_end == 4 + assert match(r, "aaab").match_end == Position(4) From pypy.commits at gmail.com Sun Dec 3 09:44:59 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 06:44:59 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: in-progress Message-ID: <5a240deb.99451c0a.c8436.3f15@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93244:4b6473b3ea05 Date: 2017-12-03 15:44 +0100 http://bitbucket.org/pypy/pypy/changeset/4b6473b3ea05/ Log: in-progress diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -1164,7 +1164,7 @@ if sre_match(ctx, base, start, None) is not None: ctx.match_start = start return True - start += 1 + start = ctx.next(start) return False install_jitdriver_spec('FastSearch', @@ -1183,6 +1183,8 @@ prefix_len = ctx.pat(5) assert prefix_len >= 0 i = 0 + j = 0 + past_start_positions = [0] * (prefix_len - 1) while True: ctx.jitdriver_FastSearch.jit_merge_point(ctx=ctx, string_position=string_position, i=i, prefix_len=prefix_len) @@ -1196,10 +1198,26 @@ i += 1 if i == prefix_len: # found a potential match - start = string_position + 1 - prefix_len - assert start >= 0 + + # This would be 'start = string_position + 1 - prefix_len' + # but it's probably faster to record the 'prefix_len' + # most recent locations, for utf8 + start = past_start_positions[j] + assert start >= ctx.ZERO prefix_skip = ctx.pat(6) - ptr = start + prefix_skip + if prefix_skip >= prefix_len - 1: + try: + ptr = ctx.next_n(string_position, + prefix_skip - (prefix_len - 1), + ctx.end) + except EndOfString: + ptr = -1 + else: + assert prefix_skip < prefix_len - 1 + j_prefix_skip = j + prefix_skip + if j_prefix_skip >= prefix_len - 1: + j_prefix_skip -= (prefix_len - 1) + ptr = past_start_positions[j_prefix_skip] #flags = ctx.pat(2) #if flags & rsre_char.SRE_INFO_LITERAL: # # matched all of pure literal pattern @@ -1209,11 +1227,16 @@ # return True pattern_offset = ctx.pat(1) + 1 ppos_start = pattern_offset + 2 * prefix_skip - if sre_match(ctx, ppos_start, ptr, None) is not None: + if (ptr >= ctx.ZERO and + sre_match(ctx, ppos_start, ptr, None) is not None): ctx.match_start = start return True overlap_offset = prefix_len + (7 - 1) i = ctx.pat(overlap_offset + i) - string_position += 1 + past_start_positions[j] = string_position + string_position = ctx.next(string_position) if string_position >= ctx.end: return False + j += 1 + if j == prefix_len - 1: + j = 0 diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -1,6 +1,6 @@ import sys, random from rpython.rlib import debug -from rpython.rlib.rsre.rsre_core import _adjust, match_context +from rpython.rlib.rsre.rsre_core import _adjust, match_context, search_context from rpython.rlib.rsre.rsre_core import StrMatchContext, EndOfString @@ -112,3 +112,13 @@ def fullmatch(pattern, string, start=0, end=sys.maxint, flags=0): return match(pattern, string, start, end, flags, fullmatch=True) + +def search(pattern, string, start=0, end=sys.maxint, flags=0): + start, end = _adjust(start, end, len(string)) + start = Position(start) + end = Position(end) + ctx = MatchContextForTests(pattern, string, start, end, flags) + if search_context(ctx): + return ctx + else: + return None diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py --- a/rpython/rlib/rsre/test/test_search.py +++ b/rpython/rlib/rsre/test/test_search.py @@ -1,44 +1,44 @@ import re, py -from rpython.rlib.rsre import rsre_core from rpython.rlib.rsre.test.test_match import get_code, get_code_and_re +from rpython.rlib.rsre.test.support import search, match class TestSearch: def test_code1(self): r_code1 = get_code(r'[abc][def][ghi]') - res = rsre_core.search(r_code1, "fooahedixxx") + res = search(r_code1, "fooahedixxx") assert res is None - res = rsre_core.search(r_code1, "fooahcdixxx") + res = search(r_code1, "fooahcdixxx") assert res is not None assert res.span() == (5, 8) def test_code2(self): r_code2 = get_code(r'\s*(.*?)') - res = rsre_core.search(r_code2, "foo bar abcdef") + res = search(r_code2, "foo bar abcdef") assert res is not None assert res.span() == (8, 34) def test_pure_literal(self): r_code3 = get_code(r'foobar') - res = rsre_core.search(r_code3, "foo bar foobar baz") + res = search(r_code3, "foo bar foobar baz") assert res is not None assert res.span() == (8, 14) def test_code3(self): r_code1 = get_code(r'\s*(.*?)') - res = rsre_core.match(r_code1, " abcdef") + res = match(r_code1, " abcdef") assert res is not None def test_max_until_0_65535(self): r_code2 = get_code(r'(?:xy)*xy') - #res = rsre_core.match(r_code2, 'def') + #res = match(r_code2, 'def') #assert res is None - #res = rsre_core.match(r_code2, 'xydef') + #res = match(r_code2, 'xydef') #assert res is not None - res = rsre_core.match(r_code2, 'xyxyxydef') + res = match(r_code2, 'xyxyxydef') assert res is not None - res = rsre_core.match(r_code2, '' + 'xy'*1000 + 'def') + res = match(r_code2, '' + 'xy'*1000 + 'def') assert res is not None def test_max_until_3_5(self): @@ -46,18 +46,18 @@ for i in range(8): s = '' + 'xy'*i + 'defdefdefdefdef' assert (r.match(s) is not None) is (3 <= i-1 <= 5) - res = rsre_core.match(r_code2, s) + res = match(r_code2, s) assert (res is not None) is (3 <= i-1 <= 5) def test_min_until_0_65535(self): r_code2 = get_code(r'(?:xy)*?xy') - res = rsre_core.match(r_code2, 'def') + res = match(r_code2, 'def') assert res is None - res = rsre_core.match(r_code2, 'xydef') + res = match(r_code2, 'xydef') assert res is not None - res = rsre_core.match(r_code2, 'xyxyxydef') + res = match(r_code2, 'xyxyxydef') assert res is not None - res = rsre_core.match(r_code2, '' + 'xy'*1000 + 'def') + res = match(r_code2, '' + 'xy'*1000 + 'def') assert res is not None def test_min_until_3_5(self): @@ -65,44 +65,44 @@ for i in range(8): s = '' + 'xy'*i + 'defdefdefdefdef' assert (r.match(s) is not None) is (3 <= i-1 <= 5) - res = rsre_core.match(r_code2, s) + res = match(r_code2, s) assert (res is not None) is (3 <= i-1 <= 5) def test_min_repeat_one(self): r_code3 = get_code(r'.{3,5}?y') for i in range(8): - res = rsre_core.match(r_code3, '' + 'x'*i + 'y') + res = match(r_code3, '' + 'x'*i + 'y') assert (res is not None) is (3 <= i <= 5) def test_simple_group(self): r_code4 = get_code(r'(x.)') - res = rsre_core.match(r_code4, 'xadef') + res = match(r_code4, 'xadef') assert res is not None assert res.get_mark(0) == 5 assert res.get_mark(1) == 7 def test_max_until_groups(self): r_code4 = get_code(r'(x.)*xy') - res = rsre_core.match(r_code4, 'xaxbxydef') + res = match(r_code4, 'xaxbxydef') assert res is not None assert res.get_mark(0) == 7 assert res.get_mark(1) == 9 def test_group_branch(self): r_code5 = get_code(r'(ab|c)') - res = rsre_core.match(r_code5, 'abdef') + res = match(r_code5, 'abdef') assert (res.get_mark(0), res.get_mark(1)) == (5, 7) - res = rsre_core.match(r_code5, 'cdef') + res = match(r_code5, 'cdef') assert (res.get_mark(0), res.get_mark(1)) == (5, 6) - res = rsre_core.match(r_code5, 'dedef') + res = match(r_code5, 'dedef') assert res is None def test_group_branch_max_until(self): r_code6 = get_code(r'(ab|c)*a') - res = rsre_core.match(r_code6, 'ccabcccabadef') + res = match(r_code6, 'ccabcccabadef') assert (res.get_mark(0), res.get_mark(1)) == (12, 14) r_code7 = get_code(r'((ab)|(c))*a') - res = rsre_core.match(r_code7, 'ccabcccabadef') + res = match(r_code7, 'ccabcccabadef') assert (res.get_mark(0), res.get_mark(1)) == (12, 14) assert (res.get_mark(2), res.get_mark(3)) == (12, 14) assert (res.get_mark(4), res.get_mark(5)) == (11, 12) @@ -113,7 +113,7 @@ assert match.span(1) == (12, 13) assert match.span(3) == (12, 13) assert match.span(2) == (8, 9) - res = rsre_core.match(r_code7, 'bbbabbbb') + res = match(r_code7, 'bbbabbbb') assert (res.get_mark(0), res.get_mark(1)) == (12, 13) assert (res.get_mark(4), res.get_mark(5)) == (12, 13) assert (res.get_mark(2), res.get_mark(3)) == (8, 9) @@ -124,7 +124,7 @@ assert match.span(1) == (6, 7) assert match.span(3) == (6, 7) assert match.span(2) == (5, 6) - res = rsre_core.match(r_code8, 'ab') + res = match(r_code8, 'ab') assert (res.get_mark(0), res.get_mark(1)) == (6, 7) assert (res.get_mark(4), res.get_mark(5)) == (6, 7) assert (res.get_mark(2), res.get_mark(3)) == (5, 6) @@ -134,7 +134,7 @@ match = r9.match('xyzxc') assert match.span(1) == (3, 4) assert match.span(2) == (-1, -1) - res = rsre_core.match(r_code9, 'xyzxc') + res = match(r_code9, 'xyzxc') assert (res.get_mark(0), res.get_mark(1)) == (3, 4) assert (res.get_mark(2), res.get_mark(3)) == (-1, -1) @@ -143,7 +143,7 @@ match = r9.match('xycxyzxc') assert match.span(2) == (6, 7) #assert match.span(3) == (1, 2) --- bug of CPython - res = rsre_core.match(r_code9, 'xycxyzxc') + res = match(r_code9, 'xycxyzxc') assert (res.get_mark(2), res.get_mark(3)) == (6, 7) assert (res.get_mark(4), res.get_mark(5)) == (1, 2) @@ -151,19 +151,19 @@ r_code, r = get_code_and_re(r'(a?)+y') assert r.match('y') assert r.match('aaayaaay').span() == (0, 4) - res = rsre_core.match(r_code, 'y') + res = match(r_code, 'y') assert res - res = rsre_core.match(r_code, 'aaayaaay') + res = match(r_code, 'aaayaaay') assert res and res.span() == (0, 4) # r_code, r = get_code_and_re(r'(a?){4,6}y') assert r.match('y') - res = rsre_core.match(r_code, 'y') + res = match(r_code, 'y') assert res # r_code, r = get_code_and_re(r'(a?)*y') assert r.match('y') - res = rsre_core.match(r_code, 'y') + res = match(r_code, 'y') assert res def test_empty_maxuntil_2(self): @@ -173,24 +173,24 @@ py.test.skip("older version of the stdlib: %s" % (e,)) assert r.match('XfooXbarX').span() == (0, 5) assert r.match('XfooXbarX').span(1) == (4, 4) - res = rsre_core.match(r_code, 'XfooXbarX') + res = match(r_code, 'XfooXbarX') assert res.span() == (0, 5) assert res.span(1) == (4, 4) def test_empty_minuntil(self): r_code, r = get_code_and_re(r'(a?)+?y') #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory - res = rsre_core.match(r_code, 'z') + res = match(r_code, 'z') assert not res # r_code, r = get_code_and_re(r'(a?){4,6}?y') assert not r.match('z') - res = rsre_core.match(r_code, 'z') + res = match(r_code, 'z') assert not res # r_code, r = get_code_and_re(r'(a?)*?y') #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory - res = rsre_core.match(r_code, 'z') + res = match(r_code, 'z') assert not res def test_empty_search(self): @@ -198,7 +198,7 @@ for j in range(-2, 6): for i in range(-2, 6): match = r.search('abc', i, j) - res = rsre_core.search(r_code, 'abc', i, j) + res = search(r_code, 'abc', i, j) jk = min(max(j, 0), 3) ik = min(max(i, 0), 3) if ik <= jk: From pypy.commits at gmail.com Sun Dec 3 09:49:30 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 06:49:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: fix Message-ID: <5a240efa.424a1c0a.aad5b.3d60@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93245:02e89a1160e5 Date: 2017-12-03 15:48 +0100 http://bitbucket.org/pypy/pypy/changeset/02e89a1160e5/ Log: fix diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -799,7 +799,8 @@ max_count = sys.maxint max = ctx.pat(ppos+2) if max != rsre_char.MAXREPEAT: - max_count = max + max_count = max - min + assert max_count >= 0 nextppos = ppos + ctx.pat(ppos) result = MinRepeatOneMatchResult(nextppos, ppos+3, max_count, ptr, marks) @@ -868,9 +869,10 @@ # Else we really need to count how many times it matches. if maxcount != rsre_char.MAXREPEAT: # adjust end - end1 = ptr + maxcount - if end1 <= end: - end = end1 + try: + end = ctx.next_n(ptr, maxcount, end) + except EndOfString: + pass op = ctx.pat(ppos) for op1, fre in unroll_fre_checker: if op1 == op: From pypy.commits at gmail.com Sun Dec 3 09:50:37 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 06:50:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: test_search passes Message-ID: <5a240f3d.43c0df0a.8745.0062@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93246:f8c971bd7900 Date: 2017-12-03 15:50 +0100 http://bitbucket.org/pypy/pypy/changeset/f8c971bd7900/ Log: test_search passes diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py --- a/rpython/rlib/rsre/test/test_search.py +++ b/rpython/rlib/rsre/test/test_search.py @@ -1,6 +1,6 @@ import re, py from rpython.rlib.rsre.test.test_match import get_code, get_code_and_re -from rpython.rlib.rsre.test.support import search, match +from rpython.rlib.rsre.test.support import search, match, Position class TestSearch: @@ -109,10 +109,10 @@ def test_group_7(self): r_code7, r7 = get_code_and_re(r'((a)?(b))*') - match = r7.match('bbbabbbb') - assert match.span(1) == (12, 13) - assert match.span(3) == (12, 13) - assert match.span(2) == (8, 9) + m = r7.match('bbbabbbb') + assert m.span(1) == (12, 13) + assert m.span(3) == (12, 13) + assert m.span(2) == (8, 9) res = match(r_code7, 'bbbabbbb') assert (res.get_mark(0), res.get_mark(1)) == (12, 13) assert (res.get_mark(4), res.get_mark(5)) == (12, 13) @@ -120,10 +120,10 @@ def test_group_branch_repeat_complex_case(self): r_code8, r8 = get_code_and_re(r'((a)|(b))*') - match = r8.match('ab') - assert match.span(1) == (6, 7) - assert match.span(3) == (6, 7) - assert match.span(2) == (5, 6) + m = r8.match('ab') + assert m.span(1) == (6, 7) + assert m.span(3) == (6, 7) + assert m.span(2) == (5, 6) res = match(r_code8, 'ab') assert (res.get_mark(0), res.get_mark(1)) == (6, 7) assert (res.get_mark(4), res.get_mark(5)) == (6, 7) @@ -131,17 +131,17 @@ def test_minuntil_lastmark_restore(self): r_code9, r9 = get_code_and_re(r'(x|yz)+?(y)??c') - match = r9.match('xyzxc') - assert match.span(1) == (3, 4) - assert match.span(2) == (-1, -1) + m = r9.match('xyzxc') + assert m.span(1) == (3, 4) + assert m.span(2) == (-1, -1) res = match(r_code9, 'xyzxc') assert (res.get_mark(0), res.get_mark(1)) == (3, 4) assert (res.get_mark(2), res.get_mark(3)) == (-1, -1) def test_minuntil_bug(self): r_code9, r9 = get_code_and_re(r'((x|yz)+?(y)??c)*') - match = r9.match('xycxyzxc') - assert match.span(2) == (6, 7) + m = r9.match('xycxyzxc') + assert m.span(2) == (6, 7) #assert match.span(3) == (1, 2) --- bug of CPython res = match(r_code9, 'xycxyzxc') assert (res.get_mark(2), res.get_mark(3)) == (6, 7) @@ -205,7 +205,8 @@ assert match is not None assert match.span() == (ik, ik) assert res is not None - assert res.match_start == ik and res.match_end == ik + assert res.match_start == Position(ik) + assert res.match_end == Position(ik) else: assert match is None assert res is None From pypy.commits at gmail.com Sun Dec 3 09:56:50 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 06:56:50 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: test_zexternal Message-ID: <5a2410b2.8db6df0a.1e1f.c0d5@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93247:3a51da0f6752 Date: 2017-12-03 15:56 +0100 http://bitbucket.org/pypy/pypy/changeset/3a51da0f6752/ Log: test_zexternal diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -997,9 +997,11 @@ return ptr == ctx.ZERO elif atcode == AT_BEGINNING_LINE: - if ptr <= ctx.ZERO: + try: + prevptr = ctx.prev(ptr) + except EndOfString: return True - return rsre_char.is_linebreak(ctx.str(ctx.prev(ptr))) + return rsre_char.is_linebreak(ctx.str(prevptr)) elif atcode == AT_BOUNDARY: return at_boundary(ctx, ptr) @@ -1034,18 +1036,26 @@ def _make_boundary(word_checker): @specializectx def at_boundary(ctx, ptr): - if ctx.end == 0: + if ctx.end == ctx.ZERO: return False - prevptr = ptr - 1 - that = prevptr >= 0 and word_checker(ctx.str(prevptr)) + try: + prevptr = ctx.prev(ptr) + except EndOfString: + that = False + else: + that = word_checker(ctx.str(prevptr)) this = ptr < ctx.end and word_checker(ctx.str(ptr)) return this != that @specializectx def at_non_boundary(ctx, ptr): - if ctx.end == 0: + if ctx.end == ctx.ZERO: return False - prevptr = ptr - 1 - that = prevptr >= 0 and word_checker(ctx.str(prevptr)) + try: + prevptr = ctx.prev(ptr) + except EndOfString: + that = False + else: + that = word_checker(ctx.str(prevptr)) this = ptr < ctx.end and word_checker(ctx.str(ptr)) return this == that return at_boundary, at_non_boundary @@ -1127,7 +1137,7 @@ if sre_match(ctx, base, start, None) is not None: ctx.match_start = start return True - start += 1 + start = ctx.next(start) return False install_jitdriver_spec("LiteralSearch", @@ -1144,11 +1154,12 @@ while start < ctx.end: ctx.jitdriver_LiteralSearch.jit_merge_point(ctx=ctx, start=start, base=base, character=character) + start1 = ctx.next(start) if ctx.str(start) == character: - if sre_match(ctx, base, start + 1, None) is not None: + if sre_match(ctx, base, start1, None) is not None: ctx.match_start = start return True - start += 1 + start = start1 return False install_jitdriver_spec("CharsetSearch", diff --git a/rpython/rlib/rsre/test/test_zexternal.py b/rpython/rlib/rsre/test/test_zexternal.py --- a/rpython/rlib/rsre/test/test_zexternal.py +++ b/rpython/rlib/rsre/test/test_zexternal.py @@ -1,6 +1,6 @@ import re from rpython.rlib.rsre.test.test_match import get_code -from rpython.rlib.rsre import rsre_core +from rpython.rlib.rsre.test import support def test_external_match(): @@ -31,11 +31,11 @@ raise Exception("this should have been a syntax error") # if use_search: - result = rsre_core.search(obj, s) + result = support.search(obj, s) else: # Emulate a poor man's search() with repeated match()s for i in range(len(s)+1): - result = rsre_core.match(obj, s, start=i) + result = support.match(obj, s, start=i) if result: break # From pypy.commits at gmail.com Sun Dec 3 10:04:39 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 07:04:39 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: tweak Message-ID: <5a241287.89c0df0a.5d27b.7647@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93248:7efa0e2d1448 Date: 2017-12-03 16:04 +0100 http://bitbucket.org/pypy/pypy/changeset/7efa0e2d1448/ Log: tweak diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -1219,12 +1219,10 @@ assert start >= ctx.ZERO prefix_skip = ctx.pat(6) if prefix_skip >= prefix_len - 1: - try: - ptr = ctx.next_n(string_position, - prefix_skip - (prefix_len - 1), - ctx.end) - except EndOfString: - ptr = -1 + assert prefix_skip <= prefix_len + ptr = string_position + if prefix_skip == prefix_len: + ptr = ctx.next(ptr) else: assert prefix_skip < prefix_len - 1 j_prefix_skip = j + prefix_skip @@ -1240,8 +1238,7 @@ # return True pattern_offset = ctx.pat(1) + 1 ppos_start = pattern_offset + 2 * prefix_skip - if (ptr >= ctx.ZERO and - sre_match(ctx, ppos_start, ptr, None) is not None): + if sre_match(ctx, ppos_start, ptr, None) is not None: ctx.match_start = start return True overlap_offset = prefix_len + (7 - 1) From pypy.commits at gmail.com Sun Dec 3 10:08:45 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 07:08:45 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Be safer against empty-array access Message-ID: <5a24137d.8bdf1c0a.6395a.740a@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93249:38318346b17b Date: 2017-12-03 16:08 +0100 http://bitbucket.org/pypy/pypy/changeset/38318346b17b/ Log: Be safer against empty-array access diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -1194,13 +1194,17 @@ if string_position >= ctx.end: return False prefix_len = ctx.pat(5) - assert prefix_len >= 0 + assert prefix_len > 0 i = 0 j = 0 - past_start_positions = [0] * (prefix_len - 1) + past_start_positions = [0] * prefix_len while True: ctx.jitdriver_FastSearch.jit_merge_point(ctx=ctx, string_position=string_position, i=i, prefix_len=prefix_len) + past_start_positions[j] = string_position + j += 1 + if j == prefix_len: + j = 0 char_ord = ctx.str(string_position) if char_ord != ctx.pat(7 + i): if i > 0: @@ -1224,10 +1228,9 @@ if prefix_skip == prefix_len: ptr = ctx.next(ptr) else: - assert prefix_skip < prefix_len - 1 j_prefix_skip = j + prefix_skip - if j_prefix_skip >= prefix_len - 1: - j_prefix_skip -= (prefix_len - 1) + if j_prefix_skip >= prefix_len: + j_prefix_skip -= prefix_len ptr = past_start_positions[j_prefix_skip] #flags = ctx.pat(2) #if flags & rsre_char.SRE_INFO_LITERAL: @@ -1243,10 +1246,6 @@ return True overlap_offset = prefix_len + (7 - 1) i = ctx.pat(overlap_offset + i) - past_start_positions[j] = string_position string_position = ctx.next(string_position) if string_position >= ctx.end: return False - j += 1 - if j == prefix_len - 1: - j = 0 From pypy.commits at gmail.com Sun Dec 3 10:21:28 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 03 Dec 2017 07:21:28 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Simplify again the code here Message-ID: <5a241678.0abadf0a.a9a6c.0f6a@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93250:28c87cb229b6 Date: 2017-12-03 16:21 +0100 http://bitbucket.org/pypy/pypy/changeset/28c87cb229b6/ Log: Simplify again the code here diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -1196,15 +1196,9 @@ prefix_len = ctx.pat(5) assert prefix_len > 0 i = 0 - j = 0 - past_start_positions = [0] * prefix_len while True: ctx.jitdriver_FastSearch.jit_merge_point(ctx=ctx, string_position=string_position, i=i, prefix_len=prefix_len) - past_start_positions[j] = string_position - j += 1 - if j == prefix_len: - j = 0 char_ord = ctx.str(string_position) if char_ord != ctx.pat(7 + i): if i > 0: @@ -1215,23 +1209,14 @@ i += 1 if i == prefix_len: # found a potential match - - # This would be 'start = string_position + 1 - prefix_len' - # but it's probably faster to record the 'prefix_len' - # most recent locations, for utf8 - start = past_start_positions[j] - assert start >= ctx.ZERO + # start = string_position + 1 - prefix_len: computed later + ptr = string_position prefix_skip = ctx.pat(6) - if prefix_skip >= prefix_len - 1: - assert prefix_skip <= prefix_len - ptr = string_position - if prefix_skip == prefix_len: - ptr = ctx.next(ptr) + if prefix_skip == prefix_len: + ptr = ctx.next(ptr) else: - j_prefix_skip = j + prefix_skip - if j_prefix_skip >= prefix_len: - j_prefix_skip -= prefix_len - ptr = past_start_positions[j_prefix_skip] + assert prefix_skip < prefix_len + ptr = ctx.prev_n(ptr, prefix_len-1 - prefix_skip, ctx.ZERO) #flags = ctx.pat(2) #if flags & rsre_char.SRE_INFO_LITERAL: # # matched all of pure literal pattern @@ -1242,6 +1227,7 @@ pattern_offset = ctx.pat(1) + 1 ppos_start = pattern_offset + 2 * prefix_skip if sre_match(ctx, ppos_start, ptr, None) is not None: + start = ctx.prev_n(ptr, prefix_skip, ctx.ZERO) ctx.match_start = start return True overlap_offset = prefix_len + (7 - 1) From pypy.commits at gmail.com Sun Dec 3 12:49:47 2017 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 03 Dec 2017 09:49:47 -0800 (PST) Subject: [pypy-commit] pypy intbound-improvements: improve the bounds computation of modulo Message-ID: <5a24393b.64b8df0a.65e9f.0af7@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: intbound-improvements Changeset: r93251:2ecf080bbdb4 Date: 2017-12-02 22:32 +0100 http://bitbucket.org/pypy/pypy/changeset/2ecf080bbdb4/ Log: improve the bounds computation of modulo diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -210,14 +210,26 @@ def mod_bound(self, other): r = IntUnbounded() - if other.is_constant(): - val = other.getint() - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + if not other.has_upper and not other.has_lower: + return r # nothing known about other + if other.known_nonnegative(): + # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + if other.has_upper: + r.make_lt(IntBound(other.upper, other.upper)) + elif other.has_upper and other.upper <= 0: + # with Python's modulo: neg < (x % neg) <= 0 + r.make_le(IntBound(0, 0)) + if other.has_lower: + r.make_gt(IntBound(other.lower, other.lower)) + else: + # the interval straddles 0, so we know this: + # other.lower < x % other < other.upper + if other.has_upper: + r.make_lt(IntBound(other.upper, other.upper)) + if other.has_lower: + r.make_gt(IntBound(other.lower, other.lower)) + pass return r def lshift_bound(self, other): diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -5,7 +5,7 @@ import sys from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -from hypothesis import given, strategies +from hypothesis import given, strategies, settings special_values = ( range(-100, 100) + @@ -23,7 +23,7 @@ ints = strategies.builds( int, # strategies.integers sometimes returns a long? special_values | strategies.integers( - min_value=int(-sys.maxint-1), max_value=sys.maxint)) + min_value=int(-sys.maxint-1), max_value=sys.maxint)) ints_or_none = strategies.none() | ints @@ -39,7 +39,7 @@ return IntBound(a, b) def const(a): - return bound(a,a) + return bound(a, a) def build_bound_with_contained_number(a, b, c): @@ -335,6 +335,67 @@ if n2 != 0: assert b3.contains(n1 % n2) # Python-style div +def test_mod_bound_explicit(): + # % positive + a = bound(1, 5).mod_bound(bound(1, 5)) + assert a.contains(0) + assert a.contains(4) + assert not a.contains(-1) + assert not a.contains(5) + + a = bound(1, 5).mod_bound(bound(1, None)) + assert a.contains(0) + assert a.contains(4) + assert not a.contains(-1) + assert a.contains(100000) + + # % negative + a = bound(1, 5).mod_bound(bound(-6, -1)) + assert a.contains(0) + assert a.contains(-5) + assert not a.contains(-6) + assert not a.contains(1) + + a = bound(1, 5).mod_bound(bound(None, -1)) + assert a.contains(0) + assert a.contains(-5) + assert a.contains(-60000) + assert not a.contains(1) + + # % neither + a = bound(1, 5).mod_bound(bound(-6, 10)) + assert a.contains(0) + assert a.contains(-5) + assert a.contains(9) + assert not a.contains(-6) + assert not a.contains(10) + + a = bound(1, 5).mod_bound(bound(None, 5)) + assert a.contains(0) + assert a.contains(4) + assert a.contains(-5) + assert a.contains(-60000) + assert not a.contains(5) + + a = bound(1, 5).mod_bound(bound(None, 0)) + assert a.contains(0) + assert a.contains(-5) + assert a.contains(-60000) + assert not a.contains(1) + + a = bound(1, 5).mod_bound(bound(-4, None)) + assert a.contains(0) + assert a.contains(-3) + assert a.contains(60000) + assert not a.contains(-4) + + a = bound(1, 5).mod_bound(bound(0, None)) + assert a.contains(0) + assert a.contains(5) + assert a.contains(60000) + assert not a.contains(-1) + + def test_sub_bound(): for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): From pypy.commits at gmail.com Sun Dec 3 12:49:50 2017 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 03 Dec 2017 09:49:50 -0800 (PST) Subject: [pypy-commit] pypy intbound-improvements: separate implementations of or and xor, improve upper bounds on both Message-ID: <5a24393e.e393df0a.34177.ed74@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: intbound-improvements Changeset: r93252:3485b3be23ea Date: 2017-12-03 11:17 +0100 http://bitbucket.org/pypy/pypy/changeset/3485b3be23ea/ Log: separate implementations of or and xor, improve upper bounds on both diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -62,18 +62,22 @@ postprocess_GUARD_FALSE = _postprocess_guard_true_false_value postprocess_GUARD_VALUE = _postprocess_guard_true_false_value - def optimize_INT_OR_or_XOR(self, op): + def optimize_INT_OR(self, op): v1 = self.get_box_replacement(op.getarg(0)) v2 = self.get_box_replacement(op.getarg(1)) if v1 is v2: - if op.getopnum() == rop.INT_OR: - self.make_equal_to(op, v1) - else: - self.make_constant_int(op, 0) + self.make_equal_to(op, v1) return None return self.emit(op) - def postprocess_INT_OR_or_XOR(self, op): + def optimize_INT_XOR(self, op): + v1 = self.get_box_replacement(op.getarg(0)) + v2 = self.get_box_replacement(op.getarg(1)) + if v1 is v2: + self.make_constant_int(op, 0) + return self.emit(op) + + def postprocess_INT_OR(self, op): v1 = self.get_box_replacement(op.getarg(0)) b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) @@ -81,11 +85,13 @@ b = b1.or_bound(b2) self.getintbound(op).intersect(b) - optimize_INT_OR = optimize_INT_OR_or_XOR - optimize_INT_XOR = optimize_INT_OR_or_XOR - - postprocess_INT_OR = postprocess_INT_OR_or_XOR - postprocess_INT_XOR = postprocess_INT_OR_or_XOR + def postprocess_INT_XOR(self, op): + v1 = self.get_box_replacement(op.getarg(0)) + b1 = self.getintbound(v1) + v2 = self.get_box_replacement(op.getarg(1)) + b2 = self.getintbound(v2) + b = b1.xor_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_AND(self, op): return self.emit(op) diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -1,8 +1,6 @@ import sys from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT, maxint, is_valid_int from rpython.rlib.objectmodel import we_are_translated -from rpython.rtyper.lltypesystem import lltype -from rpython.rtyper.lltypesystem.lloperation import llop from rpython.jit.metainterp.resoperation import rop, ResOperation from rpython.jit.metainterp.optimizeopt.info import AbstractInfo, INFO_NONNULL,\ INFO_UNKNOWN, INFO_NULL @@ -25,6 +23,15 @@ n |= n >> 32 return n +def upper_bound_or_xor(upper1, upper2): + pow2 = next_pow2_m1(upper1 | upper2) + try: + # addition gives an ok (but not tight) upper bound of | and ^ + add = ovfcheck(upper1 + upper2) + except OverflowError: + return pow2 + else: + return min(pow2, add) class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -277,13 +284,27 @@ r = IntUnbounded() if self.known_nonnegative() and \ other.known_nonnegative(): + r.make_ge(IntBound(0, 0)) if self.has_upper and other.has_upper: - mostsignificant = self.upper | other.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + r.intersect(IntBound(0, upper_bound_or_xor(self.upper, other.upper))) + if self.has_lower and other.has_lower: + # max of the two lower bounds gives an ok (but not tight) lower + # bound of or + lower = max(self.lower, other.lower) + r.make_ge(IntBound(lower, lower)) + return r + + def xor_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + r.intersect(IntBound(0, upper_bound_or_xor(self.upper, other.upper))) else: r.make_ge(IntBound(0, 0)) return r + def contains(self, val): if not we_are_translated(): assert not isinstance(val, long) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -418,6 +418,15 @@ if b1.contains(n1) and b2.contains(n2): assert b3.contains(n1 & n2) +def test_or_bound_explicit(): + a = bound(0b10, 0b100) + b = bound(0, 0b10) + c = a.or_bound(b) + assert c.contains(0b10) + assert c.contains(0b100 | 0b10) + assert not c.contains(1) + assert not c.contains(0b111) + def test_or_bound(): for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): @@ -426,7 +435,24 @@ for n2 in nbr: if b1.contains(n1) and b2.contains(n2): assert b3.contains(n1 | n2) - assert b3.contains(n1 ^ n2) # we use it for xor too + +def test_xor_bound_explicit(): + a = bound(0b10, 0b100) + b = bound(0, 0b10) + c = a.or_bound(b) + assert c.contains(0b10) + assert c.contains(0b100 | 0b10) + assert not c.contains(-1) + assert not c.contains(0b111) + +def test_xor_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.xor_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 ^ n2) def test_next_pow2_m1(): @@ -515,5 +541,11 @@ b3 = b1.or_bound(b2) r = n1 | n2 assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_xor_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.xor_bound(b2) r = n1 ^ n2 assert b3.contains(r) From pypy.commits at gmail.com Sun Dec 3 12:49:52 2017 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 03 Dec 2017 09:49:52 -0800 (PST) Subject: [pypy-commit] pypy intbound-improvements: refactor the .make_??( methods to include versions that take constants as Message-ID: <5a243940.1098df0a.69bbc.730e@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: intbound-improvements Changeset: r93253:3dd506aecf8a Date: 2017-12-03 18:33 +0100 http://bitbucket.org/pypy/pypy/changeset/3dd506aecf8a/ Log: refactor the .make_??( methods to include versions that take constants as arguments (as opposed to bounds), make those versions the primitives diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -49,28 +49,51 @@ # Returns True if the bound was updated def make_le(self, other): if other.has_upper: - if not self.has_upper or other.upper < self.upper: - self.has_upper = True - self.upper = other.upper - return True + self.make_le_const(other.upper) return False def make_lt(self, other): - return self.make_le(other.add(-1)) + if other.has_upper: + return self.make_lt_const(other.upper) + return False def make_ge(self, other): if other.has_lower: - if not self.has_lower or other.lower > self.lower: - self.has_lower = True - self.lower = other.lower - return True + return self.make_ge_const(other.lower) return False + def make_gt(self, other): + if other.has_lower: + return self.make_gt_const(other.lower) + return False + + def make_le_const(self, other): + if not self.has_upper or other < self.upper: + self.has_upper = True + self.upper = other + return True + return False + + def make_lt_const(self, other): + try: + other = ovfcheck(other - 1) + except OverflowError: + return False + return self.make_le_const(other) + def make_ge_const(self, other): - return self.make_ge(ConstIntBound(other)) + if not self.has_lower or other > self.lower: + self.has_lower = True + self.lower = other + return True + return False def make_gt_const(self, other): - return self.make_gt(ConstIntBound(other)) + try: + other = ovfcheck(other + 1) + except OverflowError: + return False + return self.make_ge_const(other) def make_eq_const(self, intval): self.has_upper = True @@ -78,9 +101,6 @@ self.upper = intval self.lower = intval - def make_gt(self, other): - return self.make_ge(other.add(1)) - def is_constant(self): return self.has_upper and self.has_lower and self.lower == self.upper @@ -221,21 +241,21 @@ return r # nothing known about other if other.known_nonnegative(): # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) + r.make_ge_const(0) if other.has_upper: - r.make_lt(IntBound(other.upper, other.upper)) + r.make_lt_const(other.upper) elif other.has_upper and other.upper <= 0: # with Python's modulo: neg < (x % neg) <= 0 - r.make_le(IntBound(0, 0)) + r.make_le_const(0) if other.has_lower: - r.make_gt(IntBound(other.lower, other.lower)) + r.make_gt_const(other.lower) else: # the interval straddles 0, so we know this: # other.lower < x % other < other.upper if other.has_upper: - r.make_lt(IntBound(other.upper, other.upper)) + r.make_lt_const(other.upper) if other.has_lower: - r.make_gt(IntBound(other.lower, other.lower)) + r.make_gt_const(other.lower) pass return r @@ -273,7 +293,7 @@ pos2 = other.known_nonnegative() r = IntUnbounded() if pos1 or pos2: - r.make_ge(IntBound(0, 0)) + r.make_ge_const(0) if pos1: r.make_le(self) if pos2: @@ -284,14 +304,14 @@ r = IntUnbounded() if self.known_nonnegative() and \ other.known_nonnegative(): - r.make_ge(IntBound(0, 0)) + r.make_ge_const(0) if self.has_upper and other.has_upper: r.intersect(IntBound(0, upper_bound_or_xor(self.upper, other.upper))) if self.has_lower and other.has_lower: # max of the two lower bounds gives an ok (but not tight) lower # bound of or lower = max(self.lower, other.lower) - r.make_ge(IntBound(lower, lower)) + r.make_ge_const(lower) return r def xor_bound(self, other): @@ -301,7 +321,7 @@ if self.has_upper and other.has_upper: r.intersect(IntBound(0, upper_bound_or_xor(self.upper, other.upper))) else: - r.make_ge(IntBound(0, 0)) + r.make_ge_const(0) return r diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -130,7 +130,7 @@ assert b.known_lt(c) -def test_make(): +def test_make(): for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): lt = IntUnbounded() @@ -169,7 +169,7 @@ assert not le.known_gt(c) assert not le.known_ge(c) - + ge = IntUnbounded() ge.make_ge(b1) ge.make_ge(b2) @@ -192,13 +192,13 @@ else: assert not gl.known_ge(c) assert not gl.known_gt(c) - if b2.known_le(c): + if b2.known_le(c): assert gl.known_le(c) else: assert not gl.known_le(c) assert not gl.known_lt(c) -def test_intersect(): +def test_intersect(): for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b = copy(b1) @@ -208,7 +208,7 @@ assert b.contains(n) else: assert not b.contains(n) - + def test_add(): for _, _, b1 in some_bounds(): for n1 in nbr: From pypy.commits at gmail.com Sun Dec 3 12:49:54 2017 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 03 Dec 2017 09:49:54 -0800 (PST) Subject: [pypy-commit] pypy intbound-improvements: remove copy-pasted comment Message-ID: <5a243942.a180df0a.a1e2.ef2a@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: intbound-improvements Changeset: r93254:157377fea9d2 Date: 2017-12-03 18:42 +0100 http://bitbucket.org/pypy/pypy/changeset/157377fea9d2/ Log: remove copy-pasted comment diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py --- a/rpython/jit/metainterp/optimizeopt/rewrite.py +++ b/rpython/jit/metainterp/optimizeopt/rewrite.py @@ -834,8 +834,6 @@ self.make_constant_int(op, 0) self.last_emitted_operation = REMOVED return True - # This is Python's integer division: 'x // (2**shift)' can always - # be replaced with 'x >> shift', even for negative values of x if not b2.is_constant(): return False val = b2.getint() From pypy.commits at gmail.com Sun Dec 3 13:18:34 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 10:18:34 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Blind change, trying to fix test.test_memoryio.CStringIOTest failures Message-ID: <5a243ffa.425a1c0a.87c1c.42fd@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93255:82c3b0528d78 Date: 2017-12-03 18:17 +0000 http://bitbucket.org/pypy/pypy/changeset/82c3b0528d78/ Log: Blind change, trying to fix test.test_memoryio.CStringIOTest failures diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -43,6 +43,8 @@ # Universal newline search. Find any of \r, \r\n, \n limit = self._convert_limit(limit) start = self.pos + if start >= len(self.data): + return u'' end = start + limit pos = start while pos < end: @@ -65,6 +67,8 @@ def readline(self, marker, limit): start = self.pos limit = self._convert_limit(limit) + if start >= len(self.data): + return u'' end = start + limit found = False for pos in range(start, end - len(marker) + 1): From pypy.commits at gmail.com Sun Dec 3 13:23:35 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 10:23:35 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix test Message-ID: <5a244127.dc361c0a.fb416.a3e5@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93256:18ca07086209 Date: 2017-12-03 18:22 +0000 http://bitbucket.org/pypy/pypy/changeset/18ca07086209/ Log: fix test diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -340,7 +340,6 @@ #if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) const wchar_t wtext[2] = {(wchar_t)0x10ABCDu}; size_t wtextlen = 1; - const wchar_t invalid[1] = {(wchar_t)0x110000u}; #else const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu}; size_t wtextlen = 2; From pypy.commits at gmail.com Sun Dec 3 13:38:16 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 10:38:16 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix test: pick an obscure module that won't have been imported yet Message-ID: <5a244498.493f1c0a.73580.a9d0@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93257:5afa98675f99 Date: 2017-12-03 18:37 +0000 http://bitbucket.org/pypy/pypy/changeset/5afa98675f99/ Log: Fix test: pick an obscure module that won't have been imported yet diff --git a/pypy/module/cpyext/test/test_import.py b/pypy/module/cpyext/test/test_import.py --- a/pypy/module/cpyext/test/test_import.py +++ b/pypy/module/cpyext/test/test_import.py @@ -22,7 +22,7 @@ space.wrap('__name__'))) == 'foobar' def test_getmoduledict(self, space, api): - testmod = "contextlib" + testmod = "imghdr" w_pre_dict = PyImport_GetModuleDict(space, ) assert not space.contains_w(w_pre_dict, space.wrap(testmod)) From pypy.commits at gmail.com Sun Dec 3 13:45:35 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 10:45:35 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix bad merge Message-ID: <5a24464f.b5a0df0a.bf8f.682c@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93258:7f17056199b6 Date: 2017-12-03 18:44 +0000 http://bitbucket.org/pypy/pypy/changeset/7f17056199b6/ Log: fix bad merge diff --git a/pypy/module/thread/test/test_import_lock.py b/pypy/module/thread/test/test_import_lock.py --- a/pypy/module/thread/test/test_import_lock.py +++ b/pypy/module/thread/test/test_import_lock.py @@ -101,8 +101,8 @@ importhook(space, 'sys') assert importlock.count == 0 # A new module - importhook(space, 're') - assert importlock.count >= 9 + importhook(space, 'time') + assert importlock.count >= 1 # Import it again previous_count = importlock.count importhook(space, "time") From pypy.commits at gmail.com Sun Dec 3 14:24:19 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 11:24:19 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-test: backout 26f1724ee623, to get meaningful test results Message-ID: <5a244f63.01a4df0a.47934.ee43@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-test Changeset: r93259:9d2a2229c128 Date: 2017-12-03 19:15 +0000 http://bitbucket.org/pypy/pypy/changeset/9d2a2229c128/ Log: backout 26f1724ee623, to get meaningful test results diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1760,6 +1760,10 @@ def utf8_w(self, w_obj): return w_obj.utf8_w(self) + def unicode_w(self, w_obj): + # XXX: kill me! + return w_obj.utf8_w(self).decode('utf-8') + def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -377,6 +377,13 @@ assert isinstance(s, str) # on pypy3, this decodes the byte string return W_BytesObject(s) # with the filesystem encoding + def newunicode(self, unistr): + # XXX: kill me! + assert isinstance(unistr, unicode) + utf8s = unistr.encode("utf-8") + length, flag = rutf8.check_utf8(utf8s, True) + return self.newutf8(utf8s, length, flag) + def type(self, w_obj): jit.promote(w_obj.__class__) return w_obj.getclass(self) From pypy.commits at gmail.com Sun Dec 3 15:00:33 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 12:00:33 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix test Message-ID: <5a2457e1.bab0df0a.696db.d3b6@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93260:6025539c2c58 Date: 2017-12-03 19:59 +0000 http://bitbucket.org/pypy/pypy/changeset/6025539c2c58/ Log: fix test diff --git a/pypy/module/pypyjit/test_pypy_c/test_jitlogparser.py b/pypy/module/pypyjit/test_pypy_c/test_jitlogparser.py --- a/pypy/module/pypyjit/test_pypy_c/test_jitlogparser.py +++ b/pypy/module/pypyjit/test_pypy_c/test_jitlogparser.py @@ -51,10 +51,12 @@ # do not care for _optimize_charset or _mk_bitmap continue assert loop.count > 0 - if ' is_prime, ' in loop.comment: + if 'is_prime' in loop.comment: is_prime_loops.append(loop) - elif ' fn_with_bridges, ' in loop.comment: + elif 'fn_with_bridges' in loop.comment: fn_with_bridges_loops.append(loop) + elif 'tuple.contains' in loop.comment: + pass else: assert ' bridge ' in loop.comment key = mangle_descr(loop.descr) From pypy.commits at gmail.com Sun Dec 3 15:41:57 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 12:41:57 -0800 (PST) Subject: [pypy-commit] pypy default: space.unwrap() -> space.unicode_w() Message-ID: <5a246195.22a8df0a.e1935.891e@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93261:9344c533d95d Date: 2017-12-03 20:41 +0000 http://bitbucket.org/pypy/pypy/changeset/9344c533d95d/ Log: space.unwrap() -> space.unicode_w() diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unwrap(w_decoded) == u'späm' + assert space.unicode_w(w_decoded) == u'späm' rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -131,7 +131,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unwrap(w_a) == u'caf\xe9' + assert space.unicode_w(w_a) == u'caf\xe9' lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -8,7 +8,7 @@ from pypy.module.cpyext.object import ( PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString, PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr, - PyObject_GetItem, + PyObject_GetItem, PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor, PyObject_Hash, PyObject_Cmp, PyObject_Unicode ) @@ -209,9 +209,9 @@ PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr) def test_unicode(self, space, api): - assert space.unwrap(api.PyObject_Unicode(None)) == u"" - assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]" - assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e" + assert space.unicode_w(api.PyObject_Unicode(None)) == u"" + assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" + assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" with raises_w(space, UnicodeDecodeError): PyObject_Unicode(space, space.wrap("\xe9")) @@ -562,7 +562,7 @@ PyObject *a = PyTuple_GetItem(args, 0); PyObject *b = PyTuple_GetItem(args, 1); int res = PyObject_RichCompareBool(a, b, Py_EQ); - return PyLong_FromLong(res); + return PyLong_FromLong(res); """),]) a = float('nan') b = float('nan') diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -178,7 +178,7 @@ array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unwrap(word)): + for (i, char) in enumerate(space.unicode_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char @@ -216,12 +216,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) Py_DecRef(space, res) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_unicode_resize(self, space): @@ -256,17 +256,17 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == u'sp\x134m' + assert space.unicode_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unicode_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8') + assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -364,18 +364,18 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unwrap( + assert space.unicode_w( PyUnicode_FromObject(space, space.wrap('test'))) == 'test' def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unwrap( + assert space.unicode_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unicode_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -391,7 +391,8 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text + assert space.unicode_w(PyUnicode_Decode( + space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) @@ -508,7 +509,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unwrap(w_res) == u'ab' + assert space.unicode_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -579,29 +580,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args)) + assert (space.unicode_w(w_formated) == + space.unicode_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unwrap(w_joined) == u'ab' + assert space.unicode_w(w_joined) == u'ab' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unwrap(w_char) == u'A' + assert space.unicode_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unwrap(w_char) == u'\0' + assert space.unicode_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unwrap(w_char) == u'\uFFFF' + assert space.unicode_w(w_char) == u'\uFFFF' def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unwrap( + assert u"zbzbabab" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unwrap( + assert u"zbzbzbzb" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -10,7 +10,7 @@ def normalize(s): w_s = space.newunicode(s) w_res = ucd.normalize(space, NF_code, w_s) - return space.unwrap(w_res) + return space.unicode_w(w_res) return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] From pypy.commits at gmail.com Sun Dec 3 15:44:16 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 12:44:16 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: hg merge default Message-ID: <5a246220.c8e81c0a.3a07d.60d5@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93262:56cea686737b Date: 2017-12-03 20:43 +0000 http://bitbucket.org/pypy/pypy/changeset/56cea686737b/ Log: hg merge default diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -1,28 +1,48 @@ from hypothesis import given, strategies as st from io import BytesIO, TextIOWrapper +import os -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(txt, mode, limit): + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data textio = TextIOWrapper( - BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) lines = [] - while True: + for limit in limits: line = textio.readline(limit) - if limit > 0: - assert len(line) < limit + if limit >= 0: + assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -26,3 +26,6 @@ .. branch: fix-vmprof-stacklet-switch Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -361,6 +361,7 @@ while scanned < limit: try: ch = self.next_char() + scanned += 1 except StopIteration: return False if ch == '\n': @@ -746,7 +747,7 @@ remnant = None continue - if limit > 0: + if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,41 +1,54 @@ import pytest try: - from hypothesis import given, strategies as st, assume + from hypothesis import given, strategies as st except ImportError: pytest.skip("hypothesis required") +import os from pypy.module._io.interp_bytesio import W_BytesIO from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace(u'\r\n', u'\n') + text = text.replace(u'\r', u'\n') + return text.replace(u'\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + fragments = [] + limits = [] + for _ in range(n_lines): + line = draw(st.text(st.characters(blacklist_characters=u'\r\n'))) + fragments.append(line) + ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n'])) + fragments.append(ending) + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + return (u''.join(fragments), limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(space, txt, mode, limit): - assume(limit != 0) + at given(data=st_readline(), + mode=st.sampled_from(['\r', '\n', '\r\n', ''])) +def test_readline(space, data, mode): + txt, limits = data w_stream = W_BytesIO(space) w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) w_textio = W_TextIOWrapper(space) w_textio.descr_init( - space, w_stream, encoding='utf-8', + space, w_stream, + encoding='utf-8', w_errors=space.newtext('surrogatepass'), w_newline=space.newtext(mode)) lines = [] - while True: + for limit in limits: w_line = w_textio.readline_w(space, space.newint(limit)) line = space.utf8_w(w_line).decode('utf-8') - if limit > 0: + if limit >= 0: assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + assert txt.startswith(u''.join(lines)) @given(st.text()) def test_read_buffer(text): diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unwrap(w_decoded) == u'späm' + assert space.unicode_w(w_decoded) == u'späm' rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -131,7 +131,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unwrap(w_a) == u'caf\xe9' + assert space.unicode_w(w_a) == u'caf\xe9' lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -8,7 +8,7 @@ from pypy.module.cpyext.object import ( PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString, PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr, - PyObject_GetItem, + PyObject_GetItem, PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor, PyObject_Hash, PyObject_Cmp, PyObject_Unicode ) @@ -209,9 +209,9 @@ PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr) def test_unicode(self, space, api): - assert space.unwrap(api.PyObject_Unicode(None)) == u"" - assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]" - assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e" + assert space.unicode_w(api.PyObject_Unicode(None)) == u"" + assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" + assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" with raises_w(space, UnicodeDecodeError): PyObject_Unicode(space, space.wrap("\xe9")) @@ -562,7 +562,7 @@ PyObject *a = PyTuple_GetItem(args, 0); PyObject *b = PyTuple_GetItem(args, 1); int res = PyObject_RichCompareBool(a, b, Py_EQ); - return PyLong_FromLong(res); + return PyLong_FromLong(res); """),]) a = float('nan') b = float('nan') diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -178,7 +178,7 @@ array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unwrap(word)): + for (i, char) in enumerate(space.unicode_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char @@ -216,12 +216,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) Py_DecRef(space, res) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_unicode_resize(self, space): @@ -256,17 +256,17 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == u'sp\x134m' + assert space.unicode_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unicode_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8') + assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -364,18 +364,18 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unwrap( + assert space.unicode_w( PyUnicode_FromObject(space, space.wrap('test'))) == 'test' def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unwrap( + assert space.unicode_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unicode_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -391,7 +391,8 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text + assert space.unicode_w(PyUnicode_Decode( + space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) @@ -508,7 +509,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unwrap(w_res) == u'ab' + assert space.unicode_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -579,29 +580,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args)) + assert (space.unicode_w(w_formated) == + space.unicode_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unwrap(w_joined) == u'ab' + assert space.unicode_w(w_joined) == u'ab' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unwrap(w_char) == u'A' + assert space.unicode_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unwrap(w_char) == u'\0' + assert space.unicode_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unwrap(w_char) == u'\uFFFF' + assert space.unicode_w(w_char) == u'\uFFFF' def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unwrap( + assert u"zbzbabab" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unwrap( + assert u"zbzbzbzb" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -10,7 +10,7 @@ def normalize(s): w_s = space.newunicode(s) w_res = ucd.normalize(space, NF_code, w_s) - return space.unwrap(w_res) + return space.unicode_w(w_res) return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,19 +25,6 @@ return (1 << ((byte_size << 3) - 1)) - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -50,7 +37,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the reuslt + # FIXME: This takes care of the instruction where box is the result # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -91,14 +78,8 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - if b1.known_ge(IntBound(0, 0)) and \ - b2.known_ge(IntBound(0, 0)): - r = self.getintbound(op) - if b1.has_upper and b2.has_upper: - mostsignificant = b1.upper | b2.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) + b = b1.or_bound(b2) + self.getintbound(op).intersect(b) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -112,15 +93,8 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - r = self.getintbound(op) - pos1 = b1.known_ge(IntBound(0, 0)) - pos2 = b2.known_ge(IntBound(0, 0)) - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(b1) - if pos2: - r.make_le(b2) + b = b1.and_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_SUB(self, op): return self.emit(op) @@ -211,16 +185,10 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): + b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - if b2.is_constant(): - val = b2.getint() - r = self.getintbound(op) - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + r = self.getintbound(op) + r.intersect(b1.mod_bound(b2)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -436,7 +404,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_ge(IntBound(0, 0)): + if b.known_nonnegative(): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -647,7 +615,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_ge(IntBound(0, 0)): + if b1.known_nonnegative(): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,6 +12,19 @@ MAXINT = maxint MININT = -maxint - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -92,6 +105,9 @@ def known_ge(self, other): return other.known_le(self) + def known_nonnegative(self): + return self.has_lower and 0 <= self.lower + def intersect(self, other): r = False @@ -192,10 +208,22 @@ else: return IntUnbounded() + def mod_bound(self, other): + r = IntUnbounded() + if other.is_constant(): + val = other.getint() + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) + return r + def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -211,7 +239,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -221,7 +249,32 @@ else: return IntUnbounded() + def and_bound(self, other): + pos1 = self.known_nonnegative() + pos2 = other.known_nonnegative() + r = IntUnbounded() + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(self) + if pos2: + r.make_le(other) + return r + + def or_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + mostsignificant = self.upper | other.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) + return r + def contains(self, val): + if not we_are_translated(): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -282,7 +335,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_ge(ConstIntBound(0)) and + return (self.bounded() and self.known_nonnegative() and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -297,7 +350,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_ge(IntBound(0, 0)) and \ + if self.known_nonnegative() and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,12 +1,34 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded -from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 + IntLowerBound, IntUnbounded, next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT +from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -def bound(a,b): +from hypothesis import given, strategies + +special_values = ( + range(-100, 100) + + [2 ** i for i in range(1, LONG_BIT)] + + [-2 ** i for i in range(1, LONG_BIT)] + + [2 ** i - 1 for i in range(1, LONG_BIT)] + + [-2 ** i - 1 for i in range(1, LONG_BIT)] + + [2 ** i + 1 for i in range(1, LONG_BIT)] + + [-2 ** i + 1 for i in range(1, LONG_BIT)] + + [sys.maxint, -sys.maxint-1]) + +special_values = strategies.sampled_from( + [int(v) for v in special_values if type(int(v)) is int]) + +ints = strategies.builds( + int, # strategies.integers sometimes returns a long? + special_values | strategies.integers( + min_value=int(-sys.maxint-1), max_value=sys.maxint)) + +ints_or_none = strategies.none() | ints + + +def bound(a, b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -14,11 +36,55 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a,b) + return IntBound(a, b) def const(a): return bound(a,a) + +def build_bound_with_contained_number(a, b, c): + a, b, c = sorted([a, b, c]) + r = bound(a, c) + assert r.contains(b) + return r, b + +bound_with_contained_number = strategies.builds( + build_bound_with_contained_number, + ints_or_none, + ints_or_none, + ints +) + +unbounded = strategies.builds( + lambda x: (bound(None, None), int(x)), + ints +) + +lower_bounded = strategies.builds( + lambda x, y: (bound(min(x, y), None), max(x, y)), + ints, + ints +) + +upper_bounded = strategies.builds( + lambda x, y: (bound(None, max(x, y)), min(x, y)), + ints, + ints +) + +bounded = strategies.builds( + build_bound_with_contained_number, + ints, ints, ints +) + +constant = strategies.builds( + lambda x: (const(x), x), + ints +) + +bound_with_contained_number = strategies.one_of( + unbounded, lower_bounded, upper_bounded, constant, bounded) + def some_bounds(): brd = [None] + range(-2, 3) for lower in brd: @@ -240,8 +306,6 @@ def test_div_bound(): - from rpython.rtyper.lltypesystem import lltype - from rpython.rtyper.lltypesystem.lloperation import llop for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b3 = b1.py_div_bound(b2) @@ -261,6 +325,15 @@ assert a.contains(-3) assert a.contains(0) +def test_mod_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.mod_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style div def test_sub_bound(): for _, _, b1 in some_bounds(): @@ -275,6 +348,25 @@ assert not a.contains(-1) assert not a.contains(4) +def test_and_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.and_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 & n2) + +def test_or_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.or_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 | n2) + assert b3.contains(n1 ^ n2) # we use it for xor too + def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 @@ -285,3 +377,82 @@ assert next_pow2_m1(80) == 127 assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1 assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1 + + + at given(bound_with_contained_number, bound_with_contained_number) +def test_add_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.add_bound(b2) + try: + r = ovfcheck(n1 + n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_sub_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.sub_bound(b2) + try: + r = ovfcheck(n1 - n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mul_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mul_bound(b2) + try: + r = ovfcheck(n1 * n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_div_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.py_div_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 / n2) # Python-style div + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mod_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mod_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style mod + + at given(bound_with_contained_number, bound_with_contained_number) +def test_and_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.and_bound(b2) + r = n1 & n2 + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_or_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.or_bound(b2) + r = n1 | n2 + assert b3.contains(r) + r = n1 ^ n2 + assert b3.contains(r) diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py --- a/rpython/translator/platform/test/test_platform.py +++ b/rpython/translator/platform/test/test_platform.py @@ -113,8 +113,10 @@ def test_environment_inheritance(self): # make sure that environment is inherited cmd = 'import os; print os.environ["_SOME_VARIABLE_%d"]' + env = {'_SOME_VARIABLE_1':'xyz'} + env['PATH'] = os.environ['PATH'] res = self.platform.execute(sys.executable, ['-c', cmd % 1], - env={'_SOME_VARIABLE_1':'xyz'}) + env=env) assert 'xyz' in res.out os.environ['_SOME_VARIABLE_2'] = 'zyz' try: diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -10,21 +10,13 @@ rpydir = str(py.path.local(rpython.__file__).join('..')) def _get_compiler_type(cc, x64_flag): - import subprocess if not cc: cc = os.environ.get('CC','') if not cc: return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - else: - return MsvcPlatform(cc=cc, x64=x64_flag) - try: - subprocess.check_output([cc, '--version']) - except: - raise ValueError("Could not find compiler specified by cc option '%s'," - " it must be a valid exe file on your path" % cc) - return MingwPlatform(cc) + return MsvcPlatform(cc=cc, x64=x64_flag) def Windows(cc=None): return _get_compiler_type(cc, False) @@ -74,6 +66,11 @@ vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat') else: vcvars = os.path.join(toolsdir, 'vsvars32.bat') + if not os.path.exists(vcvars): + # even msdn does not know which to run + # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx + # wich names both + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -95,25 +92,21 @@ key, value = line.split('=', 1) if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value - ## log.msg("Updated environment with %s" % (vcvars,)) + log.msg("Updated environment with %s" % (vcvars,)) return env def find_msvc_env(x64flag=False): + vcvers = [140, 100, 90, 80, 71, 70] # First, try to get the compiler which served to compile python msc_pos = sys.version.find('MSC v.') if msc_pos != -1: msc_ver = int(sys.version[msc_pos+6:msc_pos+10]) - # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90 + # 1500 -> 90, 1900 -> 140 vsver = (msc_ver / 10) - 60 + vcvers.insert(0, vsver) + errs = [] + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) - - if env is not None: - return env - - # Then, try any other version - for vsver in (100, 90, 80, 71, 70): # All the versions I know - env = _get_msvc_env(vsver, x64flag) - if env is not None: return env log.error("Could not find a Microsoft Compiler") From pypy.commits at gmail.com Sun Dec 3 15:44:18 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 03 Dec 2017 12:44:18 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-test: hg merge unicode-utf8 Message-ID: <5a246222.0ab8df0a.762be.d849@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-test Changeset: r93263:bdba860831ef Date: 2017-12-03 20:43 +0000 http://bitbucket.org/pypy/pypy/changeset/bdba860831ef/ Log: hg merge unicode-utf8 diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -1,28 +1,48 @@ from hypothesis import given, strategies as st from io import BytesIO, TextIOWrapper +import os -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(txt, mode, limit): + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data textio = TextIOWrapper( - BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) lines = [] - while True: + for limit in limits: line = textio.readline(limit) - if limit > 0: - assert len(line) < limit + if limit >= 0: + assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -26,3 +26,6 @@ .. branch: fix-vmprof-stacklet-switch Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -361,6 +361,7 @@ while scanned < limit: try: ch = self.next_char() + scanned += 1 except StopIteration: return False if ch == '\n': @@ -746,7 +747,7 @@ remnant = None continue - if limit > 0: + if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,41 +1,54 @@ import pytest try: - from hypothesis import given, strategies as st, assume + from hypothesis import given, strategies as st except ImportError: pytest.skip("hypothesis required") +import os from pypy.module._io.interp_bytesio import W_BytesIO from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace(u'\r\n', u'\n') + text = text.replace(u'\r', u'\n') + return text.replace(u'\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + fragments = [] + limits = [] + for _ in range(n_lines): + line = draw(st.text(st.characters(blacklist_characters=u'\r\n'))) + fragments.append(line) + ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n'])) + fragments.append(ending) + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + return (u''.join(fragments), limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(space, txt, mode, limit): - assume(limit != 0) + at given(data=st_readline(), + mode=st.sampled_from(['\r', '\n', '\r\n', ''])) +def test_readline(space, data, mode): + txt, limits = data w_stream = W_BytesIO(space) w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) w_textio = W_TextIOWrapper(space) w_textio.descr_init( - space, w_stream, encoding='utf-8', + space, w_stream, + encoding='utf-8', w_errors=space.newtext('surrogatepass'), w_newline=space.newtext(mode)) lines = [] - while True: + for limit in limits: w_line = w_textio.readline_w(space, space.newint(limit)) line = space.utf8_w(w_line).decode('utf-8') - if limit > 0: + if limit >= 0: assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + assert txt.startswith(u''.join(lines)) @given(st.text()) def test_read_buffer(text): diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unwrap(w_decoded) == u'späm' + assert space.unicode_w(w_decoded) == u'späm' rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -131,7 +131,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unwrap(w_a) == u'caf\xe9' + assert space.unicode_w(w_a) == u'caf\xe9' lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -8,7 +8,7 @@ from pypy.module.cpyext.object import ( PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString, PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr, - PyObject_GetItem, + PyObject_GetItem, PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor, PyObject_Hash, PyObject_Cmp, PyObject_Unicode ) @@ -209,9 +209,9 @@ PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr) def test_unicode(self, space, api): - assert space.unwrap(api.PyObject_Unicode(None)) == u"" - assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]" - assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e" + assert space.unicode_w(api.PyObject_Unicode(None)) == u"" + assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" + assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" with raises_w(space, UnicodeDecodeError): PyObject_Unicode(space, space.wrap("\xe9")) @@ -562,7 +562,7 @@ PyObject *a = PyTuple_GetItem(args, 0); PyObject *b = PyTuple_GetItem(args, 1); int res = PyObject_RichCompareBool(a, b, Py_EQ); - return PyLong_FromLong(res); + return PyLong_FromLong(res); """),]) a = float('nan') b = float('nan') diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -178,7 +178,7 @@ array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unwrap(word)): + for (i, char) in enumerate(space.unicode_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char @@ -216,12 +216,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) Py_DecRef(space, res) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_unicode_resize(self, space): @@ -256,17 +256,17 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == u'sp\x134m' + assert space.unicode_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unicode_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8') + assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -364,18 +364,18 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unwrap( + assert space.unicode_w( PyUnicode_FromObject(space, space.wrap('test'))) == 'test' def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unwrap( + assert space.unicode_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unicode_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -391,7 +391,8 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text + assert space.unicode_w(PyUnicode_Decode( + space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) @@ -508,7 +509,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unwrap(w_res) == u'ab' + assert space.unicode_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -579,29 +580,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args)) + assert (space.unicode_w(w_formated) == + space.unicode_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unwrap(w_joined) == u'ab' + assert space.unicode_w(w_joined) == u'ab' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unwrap(w_char) == u'A' + assert space.unicode_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unwrap(w_char) == u'\0' + assert space.unicode_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unwrap(w_char) == u'\uFFFF' + assert space.unicode_w(w_char) == u'\uFFFF' def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unwrap( + assert u"zbzbabab" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unwrap( + assert u"zbzbzbzb" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -10,7 +10,7 @@ def normalize(s): w_s = space.newunicode(s) w_res = ucd.normalize(space, NF_code, w_s) - return space.unwrap(w_res) + return space.unicode_w(w_res) return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,19 +25,6 @@ return (1 << ((byte_size << 3) - 1)) - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -50,7 +37,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the reuslt + # FIXME: This takes care of the instruction where box is the result # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -91,14 +78,8 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - if b1.known_ge(IntBound(0, 0)) and \ - b2.known_ge(IntBound(0, 0)): - r = self.getintbound(op) - if b1.has_upper and b2.has_upper: - mostsignificant = b1.upper | b2.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) + b = b1.or_bound(b2) + self.getintbound(op).intersect(b) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -112,15 +93,8 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - r = self.getintbound(op) - pos1 = b1.known_ge(IntBound(0, 0)) - pos2 = b2.known_ge(IntBound(0, 0)) - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(b1) - if pos2: - r.make_le(b2) + b = b1.and_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_SUB(self, op): return self.emit(op) @@ -211,16 +185,10 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): + b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - if b2.is_constant(): - val = b2.getint() - r = self.getintbound(op) - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + r = self.getintbound(op) + r.intersect(b1.mod_bound(b2)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -436,7 +404,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_ge(IntBound(0, 0)): + if b.known_nonnegative(): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -647,7 +615,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_ge(IntBound(0, 0)): + if b1.known_nonnegative(): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,6 +12,19 @@ MAXINT = maxint MININT = -maxint - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -92,6 +105,9 @@ def known_ge(self, other): return other.known_le(self) + def known_nonnegative(self): + return self.has_lower and 0 <= self.lower + def intersect(self, other): r = False @@ -192,10 +208,22 @@ else: return IntUnbounded() + def mod_bound(self, other): + r = IntUnbounded() + if other.is_constant(): + val = other.getint() + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) + return r + def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -211,7 +239,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -221,7 +249,32 @@ else: return IntUnbounded() + def and_bound(self, other): + pos1 = self.known_nonnegative() + pos2 = other.known_nonnegative() + r = IntUnbounded() + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(self) + if pos2: + r.make_le(other) + return r + + def or_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + mostsignificant = self.upper | other.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) + return r + def contains(self, val): + if not we_are_translated(): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -282,7 +335,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_ge(ConstIntBound(0)) and + return (self.bounded() and self.known_nonnegative() and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -297,7 +350,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_ge(IntBound(0, 0)) and \ + if self.known_nonnegative() and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,12 +1,34 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded -from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 + IntLowerBound, IntUnbounded, next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT +from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -def bound(a,b): +from hypothesis import given, strategies + +special_values = ( + range(-100, 100) + + [2 ** i for i in range(1, LONG_BIT)] + + [-2 ** i for i in range(1, LONG_BIT)] + + [2 ** i - 1 for i in range(1, LONG_BIT)] + + [-2 ** i - 1 for i in range(1, LONG_BIT)] + + [2 ** i + 1 for i in range(1, LONG_BIT)] + + [-2 ** i + 1 for i in range(1, LONG_BIT)] + + [sys.maxint, -sys.maxint-1]) + +special_values = strategies.sampled_from( + [int(v) for v in special_values if type(int(v)) is int]) + +ints = strategies.builds( + int, # strategies.integers sometimes returns a long? + special_values | strategies.integers( + min_value=int(-sys.maxint-1), max_value=sys.maxint)) + +ints_or_none = strategies.none() | ints + + +def bound(a, b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -14,11 +36,55 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a,b) + return IntBound(a, b) def const(a): return bound(a,a) + +def build_bound_with_contained_number(a, b, c): + a, b, c = sorted([a, b, c]) + r = bound(a, c) + assert r.contains(b) + return r, b + +bound_with_contained_number = strategies.builds( + build_bound_with_contained_number, + ints_or_none, + ints_or_none, + ints +) + +unbounded = strategies.builds( + lambda x: (bound(None, None), int(x)), + ints +) + +lower_bounded = strategies.builds( + lambda x, y: (bound(min(x, y), None), max(x, y)), + ints, + ints +) + +upper_bounded = strategies.builds( + lambda x, y: (bound(None, max(x, y)), min(x, y)), + ints, + ints +) + +bounded = strategies.builds( + build_bound_with_contained_number, + ints, ints, ints +) + +constant = strategies.builds( + lambda x: (const(x), x), + ints +) + +bound_with_contained_number = strategies.one_of( + unbounded, lower_bounded, upper_bounded, constant, bounded) + def some_bounds(): brd = [None] + range(-2, 3) for lower in brd: @@ -240,8 +306,6 @@ def test_div_bound(): - from rpython.rtyper.lltypesystem import lltype - from rpython.rtyper.lltypesystem.lloperation import llop for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b3 = b1.py_div_bound(b2) @@ -261,6 +325,15 @@ assert a.contains(-3) assert a.contains(0) +def test_mod_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.mod_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style div def test_sub_bound(): for _, _, b1 in some_bounds(): @@ -275,6 +348,25 @@ assert not a.contains(-1) assert not a.contains(4) +def test_and_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.and_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 & n2) + +def test_or_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.or_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 | n2) + assert b3.contains(n1 ^ n2) # we use it for xor too + def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 @@ -285,3 +377,82 @@ assert next_pow2_m1(80) == 127 assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1 assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1 + + + at given(bound_with_contained_number, bound_with_contained_number) +def test_add_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.add_bound(b2) + try: + r = ovfcheck(n1 + n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_sub_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.sub_bound(b2) + try: + r = ovfcheck(n1 - n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mul_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mul_bound(b2) + try: + r = ovfcheck(n1 * n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_div_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.py_div_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 / n2) # Python-style div + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mod_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mod_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style mod + + at given(bound_with_contained_number, bound_with_contained_number) +def test_and_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.and_bound(b2) + r = n1 & n2 + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_or_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.or_bound(b2) + r = n1 | n2 + assert b3.contains(r) + r = n1 ^ n2 + assert b3.contains(r) diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py --- a/rpython/translator/platform/test/test_platform.py +++ b/rpython/translator/platform/test/test_platform.py @@ -113,8 +113,10 @@ def test_environment_inheritance(self): # make sure that environment is inherited cmd = 'import os; print os.environ["_SOME_VARIABLE_%d"]' + env = {'_SOME_VARIABLE_1':'xyz'} + env['PATH'] = os.environ['PATH'] res = self.platform.execute(sys.executable, ['-c', cmd % 1], - env={'_SOME_VARIABLE_1':'xyz'}) + env=env) assert 'xyz' in res.out os.environ['_SOME_VARIABLE_2'] = 'zyz' try: diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -10,21 +10,13 @@ rpydir = str(py.path.local(rpython.__file__).join('..')) def _get_compiler_type(cc, x64_flag): - import subprocess if not cc: cc = os.environ.get('CC','') if not cc: return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - else: - return MsvcPlatform(cc=cc, x64=x64_flag) - try: - subprocess.check_output([cc, '--version']) - except: - raise ValueError("Could not find compiler specified by cc option '%s'," - " it must be a valid exe file on your path" % cc) - return MingwPlatform(cc) + return MsvcPlatform(cc=cc, x64=x64_flag) def Windows(cc=None): return _get_compiler_type(cc, False) @@ -74,6 +66,11 @@ vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat') else: vcvars = os.path.join(toolsdir, 'vsvars32.bat') + if not os.path.exists(vcvars): + # even msdn does not know which to run + # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx + # wich names both + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -95,25 +92,21 @@ key, value = line.split('=', 1) if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value - ## log.msg("Updated environment with %s" % (vcvars,)) + log.msg("Updated environment with %s" % (vcvars,)) return env def find_msvc_env(x64flag=False): + vcvers = [140, 100, 90, 80, 71, 70] # First, try to get the compiler which served to compile python msc_pos = sys.version.find('MSC v.') if msc_pos != -1: msc_ver = int(sys.version[msc_pos+6:msc_pos+10]) - # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90 + # 1500 -> 90, 1900 -> 140 vsver = (msc_ver / 10) - 60 + vcvers.insert(0, vsver) + errs = [] + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) - - if env is not None: - return env - - # Then, try any other version - for vsver in (100, 90, 80, 71, 70): # All the versions I know - env = _get_msvc_env(vsver, x64flag) - if env is not None: return env log.error("Could not find a Microsoft Compiler") From pypy.commits at gmail.com Sun Dec 3 16:25:18 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 03 Dec 2017 13:25:18 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix the warnings module, add we_are_translated for now Message-ID: <5a246bbe.c7a4df0a.6e59d.acaf@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93264:f312060a4a01 Date: 2017-12-03 16:27 +0100 http://bitbucket.org/pypy/pypy/changeset/f312060a4a01/ Log: fix the warnings module, add we_are_translated for now diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -596,9 +596,9 @@ return space.w_None wcharp_addr = rffi.cast(rffi.CWCHARP, address) if maxlength == -1: - s = rffi.wcharp2unicode(wcharp_addr) + s = rffi.wcharp2utf8(wcharp_addr) else: - s = rffi.wcharp2unicoden(wcharp_addr, maxlength) + s = rffi.wcharpsize2utf8(wcharp_addr, maxlength) return space.newunicode(s) @unwrap_spec(address=r_uint, maxlength=int) diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py --- a/pypy/module/_warnings/interp_warnings.py +++ b/pypy/module/_warnings/interp_warnings.py @@ -1,3 +1,6 @@ + +from rpython.rlib import rutf8 + from pypy.interpreter.gateway import unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt @@ -208,10 +211,11 @@ except OperationError as e: if e.async(space): raise - message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno, - space.unicode_w(w_name), - space.unicode_w(w_text)) - w_message = space.newunicode(message) + message = "%s:%d: %s: %s\n" % (space.utf8_w(w_filename), lineno, + space.utf8_w(w_name), + space.utf8_w(w_text)) + lgt, flag = rutf8.check_utf8(message, True) + w_message = space.newutf8(message, lgt, flag) else: w_message = space.newtext(message) space.call_method(w_stderr, "write", w_message) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -49,14 +49,15 @@ self._index_storage = rutf8.null_storage() # XXX checking, remove before any performance measurments # ifdef not_running_in_benchmark - lgt, flag_check = rutf8.check_utf8(utf8str, True) - assert lgt == length - if flag_check == rutf8.FLAG_ASCII: - # there are cases where we copy part of REULAR that happens - # to be ascii - assert flag in (rutf8.FLAG_ASCII, rutf8.FLAG_REGULAR) - else: - assert flag == flag_check + if not we_are_translated(): + lgt, flag_check = rutf8.check_utf8(utf8str, True) + assert lgt == length + if flag_check == rutf8.FLAG_ASCII: + # there are cases where we copy part of REULAR that happens + # to be ascii + assert flag in (rutf8.FLAG_ASCII, rutf8.FLAG_REGULAR) + else: + assert flag == flag_check # the storage can be one of: # - null, unicode with no surrogates # - rutf8.UTF8_HAS_SURROGATES @@ -1173,7 +1174,7 @@ s = space.charbuf_w(w_obj) unicodehelper.check_ascii_or_raise(space, s) return space.newutf8(s, len(s), rutf8.FLAG_ASCII) - if encoding == 'utf-8': + if encoding == 'utf-8' or encoding == 'utf8': s = space.charbuf_w(w_obj) lgt, flag = unicodehelper.check_utf8_or_raise(space, s) return space.newutf8(s, lgt, flag) diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1019,7 +1019,7 @@ s = StringBuilder(size) for i in range(size): rutf8.unichr_as_utf8_append(s, ord(w[i])) - return s.build() + return s.build() def utf82wcharp(utf8, utf8len): from rpython.rlib import rutf8 From pypy.commits at gmail.com Sun Dec 3 16:25:22 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 03 Dec 2017 13:25:22 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix exceptions module Message-ID: <5a246bc2.4f82df0a.ed9df.18b8@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93266:afec0f2bdfff Date: 2017-12-03 16:37 +0100 http://bitbucket.org/pypy/pypy/changeset/afec0f2bdfff/ Log: fix exceptions module diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -719,7 +719,7 @@ def descr_init(self, space, w_encoding, w_object, w_start, w_end, w_reason): # typechecking space.realtext_w(w_encoding) - space.utf8_w(w_object) + space.realutf8_w(w_object) space.int_w(w_start) space.int_w(w_end) space.realtext_w(w_reason) From pypy.commits at gmail.com Sun Dec 3 16:25:20 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 03 Dec 2017 13:25:20 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: "fix" the operator module Message-ID: <5a246bc0.48d31c0a.a607e.b2a5@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93265:cdf9331a2d41 Date: 2017-12-03 16:35 +0100 http://bitbucket.org/pypy/pypy/changeset/cdf9331a2d41/ Log: "fix" the operator module diff --git a/pypy/module/operator/tscmp.py b/pypy/module/operator/tscmp.py --- a/pypy/module/operator/tscmp.py +++ b/pypy/module/operator/tscmp.py @@ -45,15 +45,15 @@ Note: If a and b are of different lengths, or if an error occurs, a timing attack could theoretically reveal information about the types and lengths of a and b--but not their values. + + XXX note that here the strings have to have the same length as UTF8, + not only as unicode. Not sure how to do better """ if (space.isinstance_w(w_a, space.w_unicode) and space.isinstance_w(w_b, space.w_unicode)): - a = space.unicode_w(w_a) - b = space.unicode_w(w_b) - with rffi.scoped_nonmoving_unicodebuffer(a) as a_buf: - with rffi.scoped_nonmoving_unicodebuffer(b) as b_buf: - result = pypy_tscmp_wide(a_buf, b_buf, len(a), len(b)) - return space.newbool(rffi.cast(lltype.Bool, result)) + a = space.utf8_w(w_a) + b = space.utf8_w(w_b) + return space.newbool(_compare_two_strings(a, b)) return compare_digest_buffer(space, w_a, w_b) @@ -68,7 +68,10 @@ a = a_buf.as_str() b = b_buf.as_str() + return space.newbool(_compare_two_strings(a, b)) + +def _compare_two_strings(a, b): with rffi.scoped_nonmovingbuffer(a) as a_buf: with rffi.scoped_nonmovingbuffer(b) as b_buf: result = pypy_tscmp(a_buf, b_buf, len(a), len(b)) - return space.newbool(rffi.cast(lltype.Bool, result)) + return rffi.cast(lltype.Bool, result) From pypy.commits at gmail.com Sun Dec 3 16:25:28 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 03 Dec 2017 13:25:28 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge Message-ID: <5a246bc8.919bdf0a.c917c.b93d@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93269:43b46a2c0239 Date: 2017-12-03 22:24 +0100 http://bitbucket.org/pypy/pypy/changeset/43b46a2c0239/ Log: merge diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -1,28 +1,48 @@ from hypothesis import given, strategies as st from io import BytesIO, TextIOWrapper +import os -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(txt, mode, limit): + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data textio = TextIOWrapper( - BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) lines = [] - while True: + for limit in limits: line = textio.readline(limit) - if limit > 0: - assert len(line) < limit + if limit >= 0: + assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -26,3 +26,6 @@ .. branch: fix-vmprof-stacklet-switch Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -361,6 +361,7 @@ while scanned < limit: try: ch = self.next_char() + scanned += 1 except StopIteration: return False if ch == '\n': @@ -746,7 +747,7 @@ remnant = None continue - if limit > 0: + if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,41 +1,54 @@ import pytest try: - from hypothesis import given, strategies as st, assume + from hypothesis import given, strategies as st except ImportError: pytest.skip("hypothesis required") +import os from pypy.module._io.interp_bytesio import W_BytesIO from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace(u'\r\n', u'\n') + text = text.replace(u'\r', u'\n') + return text.replace(u'\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + fragments = [] + limits = [] + for _ in range(n_lines): + line = draw(st.text(st.characters(blacklist_characters=u'\r\n'))) + fragments.append(line) + ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n'])) + fragments.append(ending) + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + return (u''.join(fragments), limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(space, txt, mode, limit): - assume(limit != 0) + at given(data=st_readline(), + mode=st.sampled_from(['\r', '\n', '\r\n', ''])) +def test_readline(space, data, mode): + txt, limits = data w_stream = W_BytesIO(space) w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) w_textio = W_TextIOWrapper(space) w_textio.descr_init( - space, w_stream, encoding='utf-8', + space, w_stream, + encoding='utf-8', w_errors=space.newtext('surrogatepass'), w_newline=space.newtext(mode)) lines = [] - while True: + for limit in limits: w_line = w_textio.readline_w(space, space.newint(limit)) line = space.utf8_w(w_line).decode('utf-8') - if limit > 0: + if limit >= 0: assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + assert txt.startswith(u''.join(lines)) @given(st.text()) def test_read_buffer(text): diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unwrap(w_decoded) == u'späm' + assert space.unicode_w(w_decoded) == u'späm' rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -131,7 +131,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unwrap(w_a) == u'caf\xe9' + assert space.unicode_w(w_a) == u'caf\xe9' lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -8,7 +8,7 @@ from pypy.module.cpyext.object import ( PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString, PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr, - PyObject_GetItem, + PyObject_GetItem, PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor, PyObject_Hash, PyObject_Cmp, PyObject_Unicode ) @@ -209,9 +209,9 @@ PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr) def test_unicode(self, space, api): - assert space.unwrap(api.PyObject_Unicode(None)) == u"" - assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]" - assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e" + assert space.unicode_w(api.PyObject_Unicode(None)) == u"" + assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" + assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" with raises_w(space, UnicodeDecodeError): PyObject_Unicode(space, space.wrap("\xe9")) @@ -562,7 +562,7 @@ PyObject *a = PyTuple_GetItem(args, 0); PyObject *b = PyTuple_GetItem(args, 1); int res = PyObject_RichCompareBool(a, b, Py_EQ); - return PyLong_FromLong(res); + return PyLong_FromLong(res); """),]) a = float('nan') b = float('nan') diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -178,7 +178,7 @@ array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unwrap(word)): + for (i, char) in enumerate(space.unicode_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char @@ -216,12 +216,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) Py_DecRef(space, res) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_unicode_resize(self, space): @@ -256,17 +256,17 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == u'sp\x134m' + assert space.unicode_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unicode_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8') + assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -364,18 +364,18 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unwrap( + assert space.unicode_w( PyUnicode_FromObject(space, space.wrap('test'))) == 'test' def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unwrap( + assert space.unicode_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unicode_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -391,7 +391,8 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text + assert space.unicode_w(PyUnicode_Decode( + space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) @@ -508,7 +509,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unwrap(w_res) == u'ab' + assert space.unicode_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -579,29 +580,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args)) + assert (space.unicode_w(w_formated) == + space.unicode_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unwrap(w_joined) == u'ab' + assert space.unicode_w(w_joined) == u'ab' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unwrap(w_char) == u'A' + assert space.unicode_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unwrap(w_char) == u'\0' + assert space.unicode_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unwrap(w_char) == u'\uFFFF' + assert space.unicode_w(w_char) == u'\uFFFF' def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unwrap( + assert u"zbzbabab" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unwrap( + assert u"zbzbzbzb" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -10,7 +10,7 @@ def normalize(s): w_s = space.newunicode(s) w_res = ucd.normalize(space, NF_code, w_s) - return space.unwrap(w_res) + return space.unicode_w(w_res) return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,19 +25,6 @@ return (1 << ((byte_size << 3) - 1)) - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -50,7 +37,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the reuslt + # FIXME: This takes care of the instruction where box is the result # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -91,14 +78,8 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - if b1.known_ge(IntBound(0, 0)) and \ - b2.known_ge(IntBound(0, 0)): - r = self.getintbound(op) - if b1.has_upper and b2.has_upper: - mostsignificant = b1.upper | b2.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) + b = b1.or_bound(b2) + self.getintbound(op).intersect(b) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -112,15 +93,8 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - r = self.getintbound(op) - pos1 = b1.known_ge(IntBound(0, 0)) - pos2 = b2.known_ge(IntBound(0, 0)) - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(b1) - if pos2: - r.make_le(b2) + b = b1.and_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_SUB(self, op): return self.emit(op) @@ -211,16 +185,10 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): + b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - if b2.is_constant(): - val = b2.getint() - r = self.getintbound(op) - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + r = self.getintbound(op) + r.intersect(b1.mod_bound(b2)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -436,7 +404,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_ge(IntBound(0, 0)): + if b.known_nonnegative(): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -647,7 +615,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_ge(IntBound(0, 0)): + if b1.known_nonnegative(): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,6 +12,19 @@ MAXINT = maxint MININT = -maxint - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -92,6 +105,9 @@ def known_ge(self, other): return other.known_le(self) + def known_nonnegative(self): + return self.has_lower and 0 <= self.lower + def intersect(self, other): r = False @@ -192,10 +208,22 @@ else: return IntUnbounded() + def mod_bound(self, other): + r = IntUnbounded() + if other.is_constant(): + val = other.getint() + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) + return r + def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -211,7 +239,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -221,7 +249,32 @@ else: return IntUnbounded() + def and_bound(self, other): + pos1 = self.known_nonnegative() + pos2 = other.known_nonnegative() + r = IntUnbounded() + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(self) + if pos2: + r.make_le(other) + return r + + def or_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + mostsignificant = self.upper | other.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) + return r + def contains(self, val): + if not we_are_translated(): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -282,7 +335,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_ge(ConstIntBound(0)) and + return (self.bounded() and self.known_nonnegative() and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -297,7 +350,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_ge(IntBound(0, 0)) and \ + if self.known_nonnegative() and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,12 +1,34 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded -from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 + IntLowerBound, IntUnbounded, next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT +from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -def bound(a,b): +from hypothesis import given, strategies + +special_values = ( + range(-100, 100) + + [2 ** i for i in range(1, LONG_BIT)] + + [-2 ** i for i in range(1, LONG_BIT)] + + [2 ** i - 1 for i in range(1, LONG_BIT)] + + [-2 ** i - 1 for i in range(1, LONG_BIT)] + + [2 ** i + 1 for i in range(1, LONG_BIT)] + + [-2 ** i + 1 for i in range(1, LONG_BIT)] + + [sys.maxint, -sys.maxint-1]) + +special_values = strategies.sampled_from( + [int(v) for v in special_values if type(int(v)) is int]) + +ints = strategies.builds( + int, # strategies.integers sometimes returns a long? + special_values | strategies.integers( + min_value=int(-sys.maxint-1), max_value=sys.maxint)) + +ints_or_none = strategies.none() | ints + + +def bound(a, b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -14,11 +36,55 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a,b) + return IntBound(a, b) def const(a): return bound(a,a) + +def build_bound_with_contained_number(a, b, c): + a, b, c = sorted([a, b, c]) + r = bound(a, c) + assert r.contains(b) + return r, b + +bound_with_contained_number = strategies.builds( + build_bound_with_contained_number, + ints_or_none, + ints_or_none, + ints +) + +unbounded = strategies.builds( + lambda x: (bound(None, None), int(x)), + ints +) + +lower_bounded = strategies.builds( + lambda x, y: (bound(min(x, y), None), max(x, y)), + ints, + ints +) + +upper_bounded = strategies.builds( + lambda x, y: (bound(None, max(x, y)), min(x, y)), + ints, + ints +) + +bounded = strategies.builds( + build_bound_with_contained_number, + ints, ints, ints +) + +constant = strategies.builds( + lambda x: (const(x), x), + ints +) + +bound_with_contained_number = strategies.one_of( + unbounded, lower_bounded, upper_bounded, constant, bounded) + def some_bounds(): brd = [None] + range(-2, 3) for lower in brd: @@ -240,8 +306,6 @@ def test_div_bound(): - from rpython.rtyper.lltypesystem import lltype - from rpython.rtyper.lltypesystem.lloperation import llop for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b3 = b1.py_div_bound(b2) @@ -261,6 +325,15 @@ assert a.contains(-3) assert a.contains(0) +def test_mod_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.mod_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style div def test_sub_bound(): for _, _, b1 in some_bounds(): @@ -275,6 +348,25 @@ assert not a.contains(-1) assert not a.contains(4) +def test_and_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.and_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 & n2) + +def test_or_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.or_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 | n2) + assert b3.contains(n1 ^ n2) # we use it for xor too + def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 @@ -285,3 +377,82 @@ assert next_pow2_m1(80) == 127 assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1 assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1 + + + at given(bound_with_contained_number, bound_with_contained_number) +def test_add_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.add_bound(b2) + try: + r = ovfcheck(n1 + n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_sub_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.sub_bound(b2) + try: + r = ovfcheck(n1 - n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mul_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mul_bound(b2) + try: + r = ovfcheck(n1 * n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_div_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.py_div_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 / n2) # Python-style div + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mod_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mod_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style mod + + at given(bound_with_contained_number, bound_with_contained_number) +def test_and_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.and_bound(b2) + r = n1 & n2 + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_or_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.or_bound(b2) + r = n1 | n2 + assert b3.contains(r) + r = n1 ^ n2 + assert b3.contains(r) diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py --- a/rpython/translator/platform/test/test_platform.py +++ b/rpython/translator/platform/test/test_platform.py @@ -113,8 +113,10 @@ def test_environment_inheritance(self): # make sure that environment is inherited cmd = 'import os; print os.environ["_SOME_VARIABLE_%d"]' + env = {'_SOME_VARIABLE_1':'xyz'} + env['PATH'] = os.environ['PATH'] res = self.platform.execute(sys.executable, ['-c', cmd % 1], - env={'_SOME_VARIABLE_1':'xyz'}) + env=env) assert 'xyz' in res.out os.environ['_SOME_VARIABLE_2'] = 'zyz' try: diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -10,21 +10,13 @@ rpydir = str(py.path.local(rpython.__file__).join('..')) def _get_compiler_type(cc, x64_flag): - import subprocess if not cc: cc = os.environ.get('CC','') if not cc: return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - else: - return MsvcPlatform(cc=cc, x64=x64_flag) - try: - subprocess.check_output([cc, '--version']) - except: - raise ValueError("Could not find compiler specified by cc option '%s'," - " it must be a valid exe file on your path" % cc) - return MingwPlatform(cc) + return MsvcPlatform(cc=cc, x64=x64_flag) def Windows(cc=None): return _get_compiler_type(cc, False) @@ -74,6 +66,11 @@ vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat') else: vcvars = os.path.join(toolsdir, 'vsvars32.bat') + if not os.path.exists(vcvars): + # even msdn does not know which to run + # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx + # wich names both + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -95,25 +92,21 @@ key, value = line.split('=', 1) if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value - ## log.msg("Updated environment with %s" % (vcvars,)) + log.msg("Updated environment with %s" % (vcvars,)) return env def find_msvc_env(x64flag=False): + vcvers = [140, 100, 90, 80, 71, 70] # First, try to get the compiler which served to compile python msc_pos = sys.version.find('MSC v.') if msc_pos != -1: msc_ver = int(sys.version[msc_pos+6:msc_pos+10]) - # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90 + # 1500 -> 90, 1900 -> 140 vsver = (msc_ver / 10) - 60 + vcvers.insert(0, vsver) + errs = [] + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) - - if env is not None: - return env - - # Then, try any other version - for vsver in (100, 90, 80, 71, 70): # All the versions I know - env = _get_msvc_env(vsver, x64flag) - if env is not None: return env log.error("Could not find a Microsoft Compiler") From pypy.commits at gmail.com Sun Dec 3 16:25:24 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 03 Dec 2017 13:25:24 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: another untested part of exceptions module Message-ID: <5a246bc4.94ae1c0a.c415a.c7bb@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93267:9280e1159635 Date: 2017-12-03 16:40 +0100 http://bitbucket.org/pypy/pypy/changeset/9280e1159635/ Log: another untested part of exceptions module diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -78,6 +78,7 @@ from pypy.interpreter.gateway import interp2app from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import rwin32 +from rpython.rlib.rutf8 import FLAG_ASCII def readwrite_attrproperty_w(name, cls): @@ -126,7 +127,7 @@ return space.call_function(space.w_unicode, w_as_str) lgt = len(self.args_w) if lgt == 0: - return space.newunicode(u"") + return space.newutf8("", 0, FLAG_ASCII) if lgt == 1: return space.call_function(space.w_unicode, self.args_w[0]) else: From pypy.commits at gmail.com Sun Dec 3 16:25:26 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 03 Dec 2017 13:25:26 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: whack sre until it compiles Message-ID: <5a246bc6.43a4df0a.2c56e.3b29@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93268:831913dc603e Date: 2017-12-03 22:14 +0100 http://bitbucket.org/pypy/pypy/changeset/831913dc603e/ Log: whack sre until it compiles diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -6,7 +6,7 @@ from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.rarithmetic import intmask -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.rstring import StringBuilder from rpython.rlib.rutf8 import Utf8StringBuilder @@ -42,7 +42,9 @@ if isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - return space.newunicode(ctx._unicodestr[start:end]) + s = ctx._unicodestr[start:end] + lgt, flag = rutf8.check_utf8(s, True) + return space.newutf8(s, lgt, flag) else: # unreachable raise SystemError @@ -110,7 +112,9 @@ if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): - unicodestr = space.unicode_w(w_string) + unicodestr = space.utf8_w(w_string) + # XXX will fail some tests, the length need to be adjusted for + # real char len etc if pos > len(unicodestr): pos = len(unicodestr) if endpos > len(unicodestr): @@ -341,7 +345,7 @@ unicodebuilder.get_flag()), n else: if space.isinstance_w(w_string, space.w_unicode): - w_emptystr = space.newunicode(u'') + w_emptystr = space.newutf8('', 0, rutf8.FLAG_ASCII) else: w_emptystr = space.newbytes('') w_item = space.call_method(w_emptystr, 'join', @@ -575,7 +579,8 @@ elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - return space.newunicode(ctx._unicodestr) + lgt, flag = rutf8.check_utf8(ctx._unicodestr, True) + return space.newutf8(ctx._unicodestr, lgt, flag) else: raise SystemError diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -311,10 +311,10 @@ return res, flag raise CheckError(~res) -def get_utf8_length_flag(s): +def get_utf8_length_flag(s, start=0, end=-1): """ Get the length and flag out of valid utf8. For now just calls check_utf8 """ - return check_utf8(s, True) + return check_utf8(s, True, start, end) @jit.elidable def _check_utf8(s, allow_surrogates, start, stop): @@ -694,6 +694,12 @@ self._lgt += newlgt self._flag = combine_flags(self._flag, newflag) + def append_slice(self, s, start, end): + self._s.append_slice(s, start, end) + newlgt, newflag = get_utf8_length_flag(s, start, end) + self._lgt += newlgt + self._flag = combine_flags(self._flag, newflag) + @signature(char(), returns=none()) def append_char(self, s): # for characters, ascii From pypy.commits at gmail.com Sun Dec 3 16:26:30 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 03 Dec 2017 13:26:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: some benchmarks Message-ID: <5a246c06.90aa1c0a.ca4b1.ab5f@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93270:2d37182f156f Date: 2017-12-03 22:25 +0100 http://bitbucket.org/pypy/pypy/changeset/2d37182f156f/ Log: some benchmarks diff --git a/targetunicode-bench.py b/targetunicode-bench.py new file mode 100644 --- /dev/null +++ b/targetunicode-bench.py @@ -0,0 +1,45 @@ + +from rpython.rlib import rutf8 +from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from pypy.module.unicodedata import unicodedb + +l = ["u" * 100 + str(i) for i in range(100)] +u_l = [unicode("u" * 100 + str(i)) for i in range(100)] + +def descr_upper(s): + builder = rutf8.Utf8StringBuilder(len(s)) + for ch in rutf8.Utf8StringIterator(s): + ch = unicodedb.toupper(ch) + builder.append_code(ch) + return builder.build() +descr_upper._dont_inline_ = True + +def descr_upper_s(s): + builder = StringBuilder(len(s)) + for i in range(len(s)): + ch = s[i] + builder.append(chr(unicodedb.toupper(ord(ch)))) + return builder.build() + +def descr_upper_u(s): + builder = UnicodeBuilder(len(s)) + for ch in s: + builder.append(unichr(unicodedb.toupper(ord(ch)))) + return builder.build() + +def main(argv): + res_l = ["foo"] + res_l_2 = [u"foo"] + if len(argv) > 2 and argv[2] == "s": + for i in range(int(argv[1])): + res_l[0] = descr_upper_s(l[i % 100]) + if len(argv) > 2 and argv[2] == "u": + for i in range(int(argv[1])): + res_l_2[0] = descr_upper_u(u_l[i % 100]) + else: + for i in range(int(argv[1])): + res_l[0] = descr_upper(l[i % 100]) + return 0 + +def target(*args): + return main diff --git a/unicode-bench.py b/unicode-bench.py new file mode 100644 --- /dev/null +++ b/unicode-bench.py @@ -0,0 +1,48 @@ + +import time + +LGT = 100 + +unicodes = [unicode("u" * LGT + str(i)) for i in range(100)] +non_ascii_unicodes = [u"u" * LGT + unicode(i) + u"å" for i in range(100)] + +RANGE = 250000000 // LGT + +def upper(main_l): + l = [None] * 1000 + for i in xrange(RANGE): + l[i % 1000] = main_l[i % 100].upper() + +def lower(main_l): + l = [None] + for i in xrange(RANGE): + l[0] = main_l[i % 100].lower() + +def islower(main_l): + l = [None] + for i in xrange(RANGE * 3): + l[0] = main_l[i % 100].islower() + +def title(main_l): + l = [None] + for i in xrange(RANGE): + l[0] = main_l[i % 100].title() + +def add(main_l): + l = [None] + for i in xrange(RANGE): + l[0] = main_l[i % 100] + u"foo" + +def find(main_l): + l = [None] + for i in xrange(RANGE): + l[0] = main_l[i % 100].find(u"foo") + +for func in [upper]:#, lower, isupper, islower]: + t0 = time.time() + func(unicodes) + t1 = time.time() + print "ascii %s %.2f" % (func.__name__, t1 - t0) + func(non_ascii_unicodes) + t2 = time.time() + print "non-ascii %s %.2f" % (func.__name__, t2 - t1) From pypy.commits at gmail.com Mon Dec 4 04:20:42 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 04 Dec 2017 01:20:42 -0800 (PST) Subject: [pypy-commit] pypy intbound-improvements: invert(invert(x)) == x Message-ID: <5a25136a.a1abdf0a.1dfb4.c112@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: intbound-improvements Changeset: r93271:cfd8399c3787 Date: 2017-12-03 19:27 +0100 http://bitbucket.org/pypy/pypy/changeset/cfd8399c3787/ Log: invert(invert(x)) == x diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py --- a/rpython/jit/metainterp/optimizeopt/rewrite.py +++ b/rpython/jit/metainterp/optimizeopt/rewrite.py @@ -96,6 +96,10 @@ return False + def optimize_INT_INVERT(self, op): + self.optimizer.pure_from_args(rop.INT_INVERT, [op], op.getarg(0)) + return self.emit(op) + def optimize_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py @@ -3433,6 +3433,20 @@ """ self.optimize_loop(ops, expected) + def test_int_invert(self): + ops = """ + [i0] + i2 = int_invert(i0) + i3 = int_invert(i2) + jump(i3) + """ + expected = """ + [i0] + i2 = int_invert(i0) + jump(i0) + """ + self.optimize_loop(ops, expected) + def test_int_add_sub_constants_inverse(self): py.test.skip("reenable") import sys From pypy.commits at gmail.com Mon Dec 4 04:20:44 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 04 Dec 2017 01:20:44 -0800 (PST) Subject: [pypy-commit] pypy intbound-improvements: fix typo Message-ID: <5a25136c.89c0df0a.5d27b.3351@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: intbound-improvements Changeset: r93272:65738a183cd1 Date: 2017-12-03 19:57 +0100 http://bitbucket.org/pypy/pypy/changeset/65738a183cd1/ Log: fix typo diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -207,6 +207,7 @@ r = self.getintbound(op) b = b1.lshift_bound(b2) r.intersect(b) + print op, r # intbound.lshift_bound checks for an overflow and if the # lshift can be proven not to overflow sets b.has_upper and # b.has_lower diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -49,7 +49,7 @@ # Returns True if the bound was updated def make_le(self, other): if other.has_upper: - self.make_le_const(other.upper) + return self.make_le_const(other.upper) return False def make_lt(self, other): From pypy.commits at gmail.com Mon Dec 4 04:20:46 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 04 Dec 2017 01:20:46 -0800 (PST) Subject: [pypy-commit] pypy intbound-improvements: bounds propagation through int_invert Message-ID: <5a25136e.31a9df0a.42891.cc81@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: intbound-improvements Changeset: r93273:52f8b0936b0d Date: 2017-12-03 22:24 +0100 http://bitbucket.org/pypy/pypy/changeset/52f8b0936b0d/ Log: bounds propagation through int_invert diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -62,6 +62,15 @@ postprocess_GUARD_FALSE = _postprocess_guard_true_false_value postprocess_GUARD_VALUE = _postprocess_guard_true_false_value + def optimize_INT_INVERT(self, op): + return self.emit(op) + + def postprocess_INT_INVERT(self, op): + v1 = self.get_box_replacement(op.getarg(0)) + b1 = self.getintbound(v1) + b = b1.invert_bound() + self.getintbound(op).intersect(b) + def optimize_INT_OR(self, op): v1 = self.get_box_replacement(op.getarg(0)) v2 = self.get_box_replacement(op.getarg(1)) @@ -684,6 +693,13 @@ if b1.intersect(b): self.propagate_bounds_backward(op.getarg(0)) + def propagate_bounds_INT_INVERT(self, op): + b1 = self.getintbound(op.getarg(0)) + r = self.getintbound(op) + b = r.invert_bound() + if b1.intersect(b): + self.propagate_bounds_backward(op.getarg(0)) + propagate_bounds_INT_ADD_OVF = propagate_bounds_INT_ADD propagate_bounds_INT_SUB_OVF = propagate_bounds_INT_SUB propagate_bounds_INT_MUL_OVF = propagate_bounds_INT_MUL diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -324,6 +324,15 @@ r.make_ge_const(0) return r + def invert_bound(self): + r = IntUnbounded() + if self.has_upper: + r.has_lower = True + r.lower = ~self.upper + if self.has_lower: + r.has_upper = True + r.upper = ~self.lower + return r def contains(self, val): if not we_are_translated(): diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -455,6 +455,20 @@ assert b3.contains(n1 ^ n2) +def test_invert_bound_explicit(): + r = bound(-6, 12) + c = r.invert_bound() + assert c.upper == ~-6 + assert c.lower == ~12 + assert c.has_upper and c.has_lower + +def test_invert_bound(): + for _, _, b1 in some_bounds(): + b2 = b1.invert_bound() + for n in nbr: + if b1.contains(n): + assert b2.contains(~n) + def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 assert next_pow2_m1(1) == 1 @@ -549,3 +563,9 @@ b3 = b1.xor_bound(b2) r = n1 ^ n2 assert b3.contains(r) + + at given(bound_with_contained_number) +def test_invert_bound_random(t1): + b1, n1 = t1 + b2 = b1.invert_bound() + assert b2.contains(~n1) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py @@ -3447,6 +3447,57 @@ """ self.optimize_loop(ops, expected) + def test_int_invert_bound(self): + ops = """ + [i0] + i1 = int_gt(i0, -5) + guard_true(i1) [] + i2 = int_lt(i0, 100) + guard_true(i2) [] + i3 = int_invert(i0) + i4 = int_gt(i3, -101) + guard_true(i4) [] + i5 = int_lt(i3, 4) + guard_true(i5) [] + jump(i3) + """ + expected = """ + [i0] + i1 = int_gt(i0, -5) + guard_true(i1) [] + i2 = int_lt(i0, 100) + guard_true(i2) [] + i3 = int_invert(i0) + jump(i3) + """ + self.optimize_loop(ops, expected) + + def test_int_invert_bound_backwards(self): + ops = """ + [i0] + i3 = int_invert(i0) + i4 = int_gt(i3, -101) + guard_true(i4) [] + i5 = int_lt(i3, 4) + guard_true(i5) [] + + i1 = int_gt(i0, -5) + guard_true(i1) [] + i2 = int_lt(i0, 100) + guard_true(i2) [] + jump(i3) + """ + expected = """ + [i0] + i3 = int_invert(i0) + i4 = int_gt(i3, -101) + guard_true(i4) [] + i5 = int_lt(i3, 4) + guard_true(i5) [] + jump(i3) + """ + self.optimize_loop(ops, expected) + def test_int_add_sub_constants_inverse(self): py.test.skip("reenable") import sys From pypy.commits at gmail.com Mon Dec 4 23:52:52 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 04 Dec 2017 20:52:52 -0800 (PST) Subject: [pypy-commit] pypy py3.5: xfail 2 mysterious buildslave-only test failures Message-ID: <5a262624.caa2df0a.d5134.e08c@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93274:44b347a2f013 Date: 2017-12-05 04:52 +0000 http://bitbucket.org/pypy/pypy/changeset/44b347a2f013/ Log: xfail 2 mysterious buildslave-only test failures diff --git a/pypy/tool/pytest/test/test_conftest1.py b/pypy/tool/pytest/test/test_conftest1.py --- a/pypy/tool/pytest/test/test_conftest1.py +++ b/pypy/tool/pytest/test/test_conftest1.py @@ -1,4 +1,3 @@ - import py import sys import subprocess @@ -61,6 +60,7 @@ assert len(failed) == 1 assert "app_test_code_in_docstring_failing" in failed[0] + @py.test.mark.xfail(reason='fails on buildslave') def test_docstring_runappdirect(self): passed, failed = subproc_run(innertest, '-k', 'test_code_in_docstring', @@ -71,6 +71,7 @@ assert "app_test_code_in_docstring_failing" in failed[0] assert "test_code_in_docstring_failing" in failed[1] + @py.test.mark.xfail(reason='fails on buildslave') def test_raises_inside_closure(self): passed, failed = subproc_run(innertest, '-k', 'app_test_raise_in_a_closure', '--runappdirect') From pypy.commits at gmail.com Tue Dec 5 13:13:09 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 05 Dec 2017 10:13:09 -0800 (PST) Subject: [pypy-commit] pypy memory-accounting: merge Message-ID: <5a26e1b5.1ba1df0a.621fb.5999@mx.google.com> Author: Armin Rigo Branch: memory-accounting Changeset: r93276:30c0f83e1e0f Date: 2017-12-05 19:12 +0100 http://bitbucket.org/pypy/pypy/changeset/30c0f83e1e0f/ Log: merge diff too long, truncating to 2000 out of 18247 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -71,6 +71,8 @@ ^lib_pypy/.+.c$ ^lib_pypy/.+.o$ ^lib_pypy/.+.so$ +^lib_pypy/.+.pyd$ +^lib_pypy/Release/ ^pypy/doc/discussion/.+\.html$ ^include/.+\.h$ ^include/.+\.inl$ diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -40,3 +40,7 @@ 2875f328eae2216a87f3d6f335092832eb031f56 release-pypy3.5-v5.7.1 c925e73810367cd960a32592dd7f728f436c125c release-pypy2.7-v5.8.0 a37ecfe5f142bc971a86d17305cc5d1d70abec64 release-pypy3.5-v5.8.0 +03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 +d72f9800a42b46a8056951b1da2426d2c2d8d502 release-pypy3.5-v5.9.0 +03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 +84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0 diff --git a/_pytest/terminal.py b/_pytest/terminal.py --- a/_pytest/terminal.py +++ b/_pytest/terminal.py @@ -366,11 +366,11 @@ EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR, EXIT_NOTESTSCOLLECTED) if exitstatus in summary_exit_codes: - self.config.hook.pytest_terminal_summary(terminalreporter=self) self.summary_errors() self.summary_failures() self.summary_warnings() self.summary_passes() + self.config.hook.pytest_terminal_summary(terminalreporter=self) if exitstatus == EXIT_INTERRUPTED: self._report_keyboardinterrupt() del self._keyboardinterrupt_memo diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_bytes.py @@ -0,0 +1,84 @@ +from hypothesis import strategies as st +from hypothesis import given, example + +st_bytestring = st.binary() | st.binary().map(bytearray) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st_bytestring, st_bytestring) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_startswith_start(u, v, start): + expected = u[start:].startswith(v) if v else (start <= len(u)) + assert u.startswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st_bytestring, st_bytestring) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_endswith_2(u, v, start): + expected = u[start:].endswith(v) if v else (start <= len(u)) + assert u.endswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,48 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper +import os + +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) + + at st.composite +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) + + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data + textio = TextIOWrapper( + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) + lines = [] + for limit in limits: + line = textio.readline(limit) + if limit >= 0: + assert len(line) <= limit + if line: + lines.append(line) + elif limit: + break + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py --- a/extra_tests/test_unicode.py +++ b/extra_tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys import pytest from hypothesis import strategies as st from hypothesis import given, settings, example @@ -32,3 +33,89 @@ @given(s=st.text()) def test_composition(s, norm1, norm2, norm3): assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) + + at given(st.text(), st.text(), st.text()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.text(), st.text()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_startswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].startswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.startswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.text(), st.text()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_endswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].endswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.endswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py --- a/lib-python/2.7/inspect.py +++ b/lib-python/2.7/inspect.py @@ -40,6 +40,10 @@ import linecache from operator import attrgetter from collections import namedtuple +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # These constants are from Include/code.h. CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8 @@ -230,7 +234,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py --- a/lib-python/2.7/test/test_urllib2net.py +++ b/lib-python/2.7/test/test_urllib2net.py @@ -286,7 +286,7 @@ self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120) u.close() - FTP_HOST = 'ftp://ftp.debian.org/debian/' + FTP_HOST = 'ftp://www.pythontest.net/' def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py --- a/lib-python/2.7/warnings.py +++ b/lib-python/2.7/warnings.py @@ -43,11 +43,12 @@ unicodetype = unicode except NameError: unicodetype = () + template = "%s: %s: %s\n" try: message = str(message) except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) + template = unicode(template) + s = template % (lineno, category.__name__, message) line = linecache.getline(filename, lineno) if line is None else line if line: line = line.strip() diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -8,60 +8,63 @@ class ArrayMeta(_CDataMeta): def __new__(self, name, cls, typedict): res = type.__new__(self, name, cls, typedict) - if '_type_' in typedict: - ffiarray = _rawffi.Array(typedict['_type_']._ffishape_) - res._ffiarray = ffiarray - subletter = getattr(typedict['_type_'], '_type_', None) - if subletter == 'c': - def getvalue(self): - return _rawffi.charp2string(self._buffer.buffer, - self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, str): - _rawffi.rawstring2charp(self._buffer.buffer, val) - else: - for i in range(len(val)): - self[i] = val[i] - if len(val) < self._length_: - self._buffer[len(val)] = '\x00' - res.value = property(getvalue, setvalue) - def getraw(self): - return _rawffi.charp2rawstring(self._buffer.buffer, - self._length_) + if cls == (_CData,): # this is the Array class defined below + res._ffiarray = None + return res + if not hasattr(res, '_length_') or not isinstance(res._length_, int): + raise AttributeError( + "class must define a '_length_' attribute, " + "which must be a positive integer") + ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_) + subletter = getattr(res._type_, '_type_', None) + if subletter == 'c': + def getvalue(self): + return _rawffi.charp2string(self._buffer.buffer, + self._length_) + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, str): + _rawffi.rawstring2charp(self._buffer.buffer, val) + else: + for i in range(len(val)): + self[i] = val[i] + if len(val) < self._length_: + self._buffer[len(val)] = b'\x00' + res.value = property(getvalue, setvalue) - def setraw(self, buffer): - if len(buffer) > self._length_: - raise ValueError("%r too long" % (buffer,)) - _rawffi.rawstring2charp(self._buffer.buffer, buffer) - res.raw = property(getraw, setraw) - elif subletter == 'u': - def getvalue(self): - return _rawffi.wcharp2unicode(self._buffer.buffer, - self._length_) + def getraw(self): + return _rawffi.charp2rawstring(self._buffer.buffer, + self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, unicode): - target = self._buffer - else: - target = self - for i in range(len(val)): - target[i] = val[i] - if len(val) < self._length_: - target[len(val)] = u'\x00' - res.value = property(getvalue, setvalue) - - if '_length_' in typedict: - res._ffishape_ = (ffiarray, typedict['_length_']) - res._fficompositesize_ = res._sizeofinstances() - else: - res._ffiarray = None + def setraw(self, buffer): + if len(buffer) > self._length_: + raise ValueError("%r too long" % (buffer,)) + _rawffi.rawstring2charp(self._buffer.buffer, buffer) + res.raw = property(getraw, setraw) + elif subletter == 'u': + def getvalue(self): + return _rawffi.wcharp2unicode(self._buffer.buffer, + self._length_) + + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, unicode): + target = self._buffer + else: + target = self + for i in range(len(val)): + target[i] = val[i] + if len(val) < self._length_: + target[len(val)] = u'\x00' + res.value = property(getvalue, setvalue) + + res._ffishape_ = (ffiarray, res._length_) + res._fficompositesize_ = res._sizeofinstances() return res from_address = cdata_from_address @@ -156,7 +159,7 @@ l = [self[i] for i in range(start, stop, step)] letter = getattr(self._type_, '_type_', None) if letter == 'c': - return "".join(l) + return b"".join(l) if letter == 'u': return u"".join(l) return l diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -176,6 +176,10 @@ def _get_buffer_value(self): return self._buffer[0] + def _copy_to(self, addr): + target = type(self).from_address(addr)._buffer + target[0] = self._get_buffer_value() + def _to_ffi_param(self): if self.__class__._is_pointer_like(): return self._get_buffer_value() diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -114,7 +114,9 @@ cobj = self._type_.from_param(value) if ensure_objects(cobj) is not None: store_reference(self, index, cobj._objects) - self._subarray(index)[0] = cobj._get_buffer_value() + address = self._buffer[0] + address += index * sizeof(self._type_) + cobj._copy_to(address) def __nonzero__(self): return self._buffer[0] != 0 diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -291,6 +291,11 @@ def _get_buffer_value(self): return self._buffer.buffer + def _copy_to(self, addr): + from ctypes import memmove + origin = self._get_buffer_value() + memmove(addr, origin, self._fficompositesize_) + def _to_ffi_param(self): return self._buffer diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -1028,21 +1028,25 @@ if '\0' in sql: raise ValueError("the query contains a null character") - first_word = sql.lstrip().split(" ")[0].upper() - if first_word == "": + + if sql: + first_word = sql.lstrip().split()[0].upper() + if first_word == '': + self._type = _STMT_TYPE_INVALID + if first_word == "SELECT": + self._type = _STMT_TYPE_SELECT + elif first_word == "INSERT": + self._type = _STMT_TYPE_INSERT + elif first_word == "UPDATE": + self._type = _STMT_TYPE_UPDATE + elif first_word == "DELETE": + self._type = _STMT_TYPE_DELETE + elif first_word == "REPLACE": + self._type = _STMT_TYPE_REPLACE + else: + self._type = _STMT_TYPE_OTHER + else: self._type = _STMT_TYPE_INVALID - elif first_word == "SELECT": - self._type = _STMT_TYPE_SELECT - elif first_word == "INSERT": - self._type = _STMT_TYPE_INSERT - elif first_word == "UPDATE": - self._type = _STMT_TYPE_UPDATE - elif first_word == "DELETE": - self._type = _STMT_TYPE_DELETE - elif first_word == "REPLACE": - self._type = _STMT_TYPE_REPLACE - else: - self._type = _STMT_TYPE_OTHER if isinstance(sql, unicode): sql = sql.encode('utf-8') diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -16,4 +16,10 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py --- a/lib_pypy/_tkinter/app.py +++ b/lib_pypy/_tkinter/app.py @@ -119,7 +119,7 @@ tklib.TCL_GLOBAL_ONLY) # This is used to get the application class for Tk 4.1 and up - argv0 = className.lower() + argv0 = className.lower().encode('ascii') tklib.Tcl_SetVar(self.interp, "argv0", argv0, tklib.TCL_GLOBAL_ONLY) @@ -180,6 +180,9 @@ if err == tklib.TCL_ERROR: self.raiseTclError() + def interpaddr(self): + return int(tkffi.cast('size_t', self.interp)) + def _var_invoke(self, func, *args, **kwargs): if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread(): # The current thread is not the interpreter thread. diff --git a/lib_pypy/cffi.egg-info/PKG-INFO b/lib_pypy/cffi.egg-info/PKG-INFO --- a/lib_pypy/cffi.egg-info/PKG-INFO +++ b/lib_pypy/cffi.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: cffi -Version: 1.11.1 +Version: 1.11.2 Summary: Foreign Function Interface for Python calling C code. Home-page: http://cffi.readthedocs.org Author: Armin Rigo, Maciej Fijalkowski diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py --- a/lib_pypy/cffi/__init__.py +++ b/lib_pypy/cffi/__init__.py @@ -4,8 +4,8 @@ from .api import FFI from .error import CDefError, FFIError, VerificationError, VerificationMissing -__version__ = "1.11.1" -__version_info__ = (1, 11, 1) +__version__ = "1.11.2" +__version_info__ = (1, 11, 2) # The verifier module file names are based on the CRC32 of a string that # contains the following version number. It may be older than __version__ diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h --- a/lib_pypy/cffi/_cffi_include.h +++ b/lib_pypy/cffi/_cffi_include.h @@ -238,9 +238,9 @@ _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x) { if (sizeof(_cffi_wchar_t) == 2) - return _cffi_from_c_wchar_t(x); + return _cffi_from_c_wchar_t((_cffi_wchar_t)x); else - return _cffi_from_c_wchar3216_t(x); + return _cffi_from_c_wchar3216_t((int)x); } _CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o) @@ -254,7 +254,7 @@ _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x) { if (sizeof(_cffi_wchar_t) == 4) - return _cffi_from_c_wchar_t(x); + return _cffi_from_c_wchar_t((_cffi_wchar_t)x); else return _cffi_from_c_wchar3216_t(x); } diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h --- a/lib_pypy/cffi/_embedding.h +++ b/lib_pypy/cffi/_embedding.h @@ -247,7 +247,7 @@ if (f != NULL && f != Py_None) { PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME - "\ncompiled with cffi version: 1.11.1" + "\ncompiled with cffi version: 1.11.2" "\n_cffi_backend module: ", f); modules = PyImport_GetModuleDict(); mod = PyDict_GetItemString(modules, "_cffi_backend"); diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -119,7 +119,7 @@ To run untranslated tests, you need the Boehm garbage collector libgc. -On recent Debian and Ubuntu (like 17.04), this is the command to install +On recent Debian and Ubuntu (16.04 onwards), this is the command to install all build-time dependencies:: apt-get install gcc make libffi-dev pkg-config zlib1g-dev libbz2-dev \ @@ -127,7 +127,7 @@ tk-dev libgc-dev python-cffi \ liblzma-dev libncursesw5-dev # these two only needed on PyPy3 -On older Debian and Ubuntu (12.04 to 16.04):: +On older Debian and Ubuntu (12.04-14.04):: apt-get install gcc make libffi-dev pkg-config libz-dev libbz2-dev \ libsqlite3-dev libncurses-dev libexpat1-dev libssl-dev libgdbm-dev \ @@ -149,12 +149,23 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X, most of these build-time dependencies are installed alongside +On Mac OS X:: + +Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to find them you may need to run:: xcode-select --install +An exception is OpenSSL, which is no longer provided with the operating +system. It can be obtained via Homebrew (with ``$ brew install openssl``), +but it will not be available on the system path by default. The easiest +way to enable it for building pypy is to set an environment variable:: + + export PKG_CONFIG_PATH=$(brew --prefix)/opt/openssl/lib/pkgconfig + +After setting this, translation (described next) will find the OpenSSL libs +as expected. Run the translation ------------------- @@ -187,18 +198,18 @@ entire pypy interpreter. This step is currently singe threaded, and RAM hungry. As part of this step, the chain creates a large number of C code files and a Makefile to compile them in a - directory controlled by the ``PYPY_USESSION_DIR`` environment variable. + directory controlled by the ``PYPY_USESSION_DIR`` environment variable. 2. Create an executable ``pypy-c`` by running the Makefile. This step can - utilize all possible cores on the machine. -3. Copy the needed binaries to the current directory. -4. Generate c-extension modules for any cffi-based stdlib modules. + utilize all possible cores on the machine. +3. Copy the needed binaries to the current directory. +4. Generate c-extension modules for any cffi-based stdlib modules. The resulting executable behaves mostly like a normal Python interpreter (see :doc:`cpython_differences`), and is ready for testing, for use as a base interpreter for a new virtualenv, or for packaging into a binary suitable for installation on another machine running the same OS as the build -machine. +machine. Note that step 4 is merely done as a convenience, any of the steps may be rerun without rerunning the previous steps. @@ -255,7 +266,7 @@ * PyPy 2.5.1 or earlier: normal users would see permission errors. Installers need to run ``pypy -c "import gdbm"`` and other similar - commands at install time; the exact list is in + commands at install time; the exact list is in :source:`pypy/tool/release/package.py `. Users seeing a broken installation of PyPy can fix it after-the-fact if they have sudo rights, by running once e.g. ``sudo pypy -c "import gdbm``. diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -182,6 +182,57 @@ technical difficulties. +What about numpy, numpypy, micronumpy? +-------------------------------------- + +Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy. It +has two pieces: + + * the builtin module :source:`pypy/module/micronumpy`: this is written in + RPython and roughly covers the content of the ``numpy.core.multiarray`` + module. Confusingly enough, this is available in PyPy under the name + ``_numpypy``. It is included by default in all the official releases of + PyPy (but it might be dropped in the future). + + * a fork_ of the official numpy repository maintained by us and informally + called ``numpypy``: even more confusing, the name of the repo on bitbucket + is ``numpy``. The main difference with the upstream numpy, is that it is + based on the micronumpy module written in RPython, instead of of + ``numpy.core.multiarray`` which is written in C. + +Moreover, it is also possible to install the upstream version of ``numpy``: +its core is written in C and it runs on PyPy under the cpyext compatibility +layer. This is what you get if you do ``pypy -m pip install numpy``. + + +Should I install numpy or numpypy? +----------------------------------- + +TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip +install numpy``. You might also be interested in using the experimental `PyPy +binary wheels`_ to save compilation time. + +The upstream ``numpy`` is written in C, and runs under the cpyext +compatibility layer. Nowadays, cpyext is mature enough that you can simply +use the upstream ``numpy``, since it passes 99.9% of the test suite. At the +moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext +is infamously slow, and thus it has worse performance compared to +``numpypy``. However, we are actively working on improving it, as we expect to +reach the same speed, eventually. + +On the other hand, ``numpypy`` is more JIT-friendly and very fast to call, +since it is written in RPython: but it is a reimplementation, and it's hard to +be completely compatible: over the years the project slowly matured and +eventually it was able to call out to the LAPACK and BLAS libraries to speed +matrix calculations, and reached around an 80% parity with the upstream +numpy. However, 80% is far from 100%. Since cpyext/numpy compatibility is +progressing fast, we have discontinued support for ``numpypy``. + +.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html +.. _fork: https://bitbucket.org/pypy/numpy +.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels + + Is PyPy more clever than CPython about Tail Calls? -------------------------------------------------- diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst --- a/pypy/doc/project-ideas.rst +++ b/pypy/doc/project-ideas.rst @@ -240,9 +240,12 @@ **matplotlib** https://github.com/matplotlib/matplotlib - TODO: the tkagg backend does not work, which makes tests fail on downstream - projects like Pandas, SciPy. It uses id(obj) as a c-pointer to obj in - tkagg.py, which requires refactoring + Status: using the matplotlib branch of PyPy and the tkagg-cffi branch of + matplotlib from https://github.com/mattip/matplotlib/tree/tkagg-cffi, the + tkagg backend can function. + + TODO: the matplotlib branch passes numpy arrays by value (copying all the + data), this proof-of-concept needs help to become completely compliant **wxPython** https://bitbucket.org/amauryfa/wxpython-cffi diff --git a/pypy/doc/release-v5.9.0.rst b/pypy/doc/release-v5.9.0.rst --- a/pypy/doc/release-v5.9.0.rst +++ b/pypy/doc/release-v5.9.0.rst @@ -10,18 +10,24 @@ This new PyPy2.7 release includes the upstream stdlib version 2.7.13, and PyPy3.5 includes the upstream stdlib version 3.5.3. -Only a handful of failing tests remain in NumPy and Pandas on PyPy2.7, issues -that appeared as excessive memory use were cleared up and other incompatibilities -were resolved. +NumPy and Pandas now work on PyPy2.7 (together with Cython 0.27.1). Issues +that appeared as excessive memory +use were cleared up and other incompatibilities were resolved. The C-API +compatibility layer does slow down code which crosses the python-c interface +often, we have ideas on how it could be improved, and still recommend +using pure python on PyPy or interfacing via CFFI_. Many other modules +based on C-API exentions now work on PyPy as well. -Cython 0.27 (released last week) should support more projects with PyPy, both -on PyPy2.7 and PyPy3.5 beta. +Cython 0.27.1 (released very recently) supports more projects with PyPy, both +on PyPy2.7 and PyPy3.5 beta. Note version **0.27.1** is now the minimum +version that supports this version of PyPy, due to some interactions with +updated C-API interface code. We optimized the JSON parser for recurring string keys, which should decrease memory use to 50% and increase parsing speed by up to 15% for large JSON files with many repeating dictionary keys (which is quite common). -CFFI_, which is part of the PyPy release, has been updated to 1.11, +CFFI_, which is part of the PyPy release, has been updated to 1.11.1, improving an already great package for interfacing with C. CFFI now supports complex arguments in API mode, as well as ``char16_t`` and ``char32_t`` and has improved support for callbacks. @@ -145,6 +151,7 @@ * Issue 2590_: fix the bounds in the GC when allocating a lot of objects with finalizers * Replace magical NOT RPYTHON comment with a decorator * Implement ``socket.sendmsg()``/``.recvmsg()`` for py3.5 + * Add ``memory_pressure`` for ``_SSLSocket`` objects * Degredations @@ -163,7 +170,8 @@ * Add support for ``_PyNamespace_New``, ``PyMemoryView_FromMemory``, ``Py_EnterRecursiveCall`` raising RecursionError, ``PyObject_LengthHint``, - ``PyUnicode_FromKindAndData``, ``PyDict_SetDefault``, ``PyGenObject`` + ``PyUnicode_FromKindAndData``, ``PyDict_SetDefault``, ``PyGenObject``, + ``PyGenObject``, ``PyUnicode_Substring``, ``PyLong_FromUnicodeObject`` * Implement ``PyType_FromSpec`` (PEP 384) and fix issues with PEP 489 support * Support the new version of ``os.stat()`` on win32 * Use ``stat3()`` on Posix diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -3,4 +3,29 @@ =========================== .. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:899e5245de1e +.. startrev:d56dadcef996 + +.. branch: cppyy-packaging +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/whatsnew-pypy2-5.9.0.rst b/pypy/doc/whatsnew-pypy2-5.9.0.rst --- a/pypy/doc/whatsnew-pypy2-5.9.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.9.0.rst @@ -85,3 +85,12 @@ .. branch: py_ssize_t Explicitly use Py_ssize_t as the Signed type in pypy c-api + +.. branch: cpyext-jit + +Differentiate the code to call METH_NOARGS, METH_O and METH_VARARGS in cpyext: +this allows to write specialized code which is much faster than previous +completely generic version. Moreover, let the JIT to look inside the cpyext +module: the net result is that cpyext calls are up to 7x faster. However, this +is true only for very simple situations: in all real life code, we are still +much slower than CPython (more optimizations to come) diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'" + binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -85,13 +85,17 @@ # permissive parsing of the given list of tokens; it relies on # the real parsing done afterwards to give errors. it.skip_newlines() - it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") - if it.skip(pygram.tokens.STRING): - it.skip_newlines() - while (it.skip_name("from") and + docstring_possible = True + while True: + it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") + if docstring_possible and it.skip(pygram.tokens.STRING): + it.skip_newlines() + docstring_possible = False + if not (it.skip_name("from") and it.skip_name("__future__") and it.skip_name("import")): + break it.skip(pygram.tokens.LPAR) # optionally # return in 'last_position' any line-column pair that points # somewhere inside the last __future__ import statement diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py --- a/pypy/interpreter/pyparser/test/test_future.py +++ b/pypy/interpreter/pyparser/test/test_future.py @@ -208,3 +208,13 @@ 'from __future__ import with_statement;') f = run(s, (2, 23)) assert f == fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_WITH_STATEMENT + +def test_future_doc_future(): + # for some reason people do this :-[ + s = ''' +from __future__ import generators +"Docstring" +from __future__ import division + ''' + f = run(s, (4, 24)) + assert f == fut.CO_FUTURE_DIVISION | fut.CO_GENERATOR_ALLOWED diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -3,7 +3,7 @@ from rpython.rlib import rdynload, clibffi from rpython.rtyper.lltypesystem import rffi -VERSION = "1.11.1" +VERSION = "1.11.2" FFI_DEFAULT_ABI = clibffi.FFI_DEFAULT_ABI try: diff --git a/pypy/module/_cffi_backend/cffi1_module.py b/pypy/module/_cffi_backend/cffi1_module.py --- a/pypy/module/_cffi_backend/cffi1_module.py +++ b/pypy/module/_cffi_backend/cffi1_module.py @@ -1,4 +1,5 @@ from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib import jit from pypy.interpreter.error import oefmt from pypy.interpreter.module import Module @@ -15,7 +16,7 @@ INITFUNCPTR = lltype.Ptr(lltype.FuncType([rffi.VOIDPP], lltype.Void)) - + at jit.dont_look_inside def load_cffi1_module(space, name, path, initptr): # This is called from pypy.module.cpyext.api.load_extension_module() from pypy.module._cffi_backend.call_python import get_ll_cffi_call_python diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -156,10 +156,11 @@ class W_CTypePtrBase(W_CTypePtrOrArray): # base class for both pointers and pointers-to-functions - _attrs_ = ['is_void_ptr', 'is_voidchar_ptr'] - _immutable_fields_ = ['is_void_ptr', 'is_voidchar_ptr'] + _attrs_ = ['is_void_ptr', 'is_voidchar_ptr', 'is_onebyte_ptr'] + _immutable_fields_ = ['is_void_ptr', 'is_voidchar_ptr', 'is_onebyte_ptr'] is_void_ptr = False is_voidchar_ptr = False + is_onebyte_ptr = False def convert_to_object(self, cdata): ptrdata = rffi.cast(rffi.CCHARPP, cdata)[0] @@ -179,12 +180,20 @@ if self.is_void_ptr or other.is_void_ptr: pass # cast from or to 'void *' elif self.is_voidchar_ptr or other.is_voidchar_ptr: - space = self.space - msg = ("implicit cast from '%s' to '%s' " - "will be forbidden in the future (check that the types " - "are as you expect; use an explicit ffi.cast() if they " - "are correct)" % (other.name, self.name)) - space.warn(space.newtext(msg), space.w_UserWarning) + # for backward compatibility, accept "char *" as either + # source of target. This is not what C does, though, + # so emit a warning that will eventually turn into an + # error. The warning is turned off if both types are + # pointers to single bytes. + if self.is_onebyte_ptr and other.is_onebyte_ptr: + pass # no warning + else: + space = self.space + msg = ("implicit cast from '%s' to '%s' " + "will be forbidden in the future (check that the types " + "are as you expect; use an explicit ffi.cast() if they " + "are correct)" % (other.name, self.name)) + space.warn(space.newtext(msg), space.w_UserWarning) else: raise self._convert_error("compatible pointer", w_ob) @@ -214,6 +223,7 @@ self.is_void_ptr = isinstance(ctitem, ctypevoid.W_CTypeVoid) self.is_voidchar_ptr = (self.is_void_ptr or isinstance(ctitem, ctypeprim.W_CTypePrimitiveChar)) + self.is_onebyte_ptr = (ctitem.size == 1) W_CTypePtrBase.__init__(self, space, size, extra, 2, ctitem) def newp(self, w_init, allocator): diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -1,7 +1,7 @@ # ____________________________________________________________ import sys -assert __version__ == "1.11.1", ("This test_c.py file is for testing a version" +assert __version__ == "1.11.2", ("This test_c.py file is for testing a version" " of cffi that differs from the one that we" " get from 'import _cffi_backend'") if sys.version_info < (3,): @@ -2099,7 +2099,8 @@ if sys.platform.startswith("linux"): BWChar = new_primitive_type("wchar_t") assert sizeof(BWChar) == 4 - assert int(cast(BWChar, -1)) == -1 # signed, on linux + # wchar_t is often signed on Linux, but not always (e.g. on ARM) + assert int(cast(BWChar, -1)) in (-1, 4294967295) def test_char16(): BChar16 = new_primitive_type("char16_t") @@ -3903,9 +3904,11 @@ BCharP = new_pointer_type(new_primitive_type("char")) BIntP = new_pointer_type(new_primitive_type("int")) BVoidP = new_pointer_type(new_void_type()) + BUCharP = new_pointer_type(new_primitive_type("unsigned char")) z1 = cast(BCharP, 0) z2 = cast(BIntP, 0) z3 = cast(BVoidP, 0) + z4 = cast(BUCharP, 0) with warnings.catch_warnings(record=True) as w: newp(new_pointer_type(BIntP), z1) # warn assert len(w) == 1 @@ -3919,6 +3922,12 @@ assert len(w) == 2 newp(new_pointer_type(BIntP), z3) # fine assert len(w) == 2 + newp(new_pointer_type(BCharP), z4) # fine (ignore signedness here) + assert len(w) == 2 + newp(new_pointer_type(BUCharP), z1) # fine (ignore signedness here) + assert len(w) == 2 + newp(new_pointer_type(BUCharP), z3) # fine + assert len(w) == 2 # check that the warnings are associated with lines in this file assert w[1].lineno == w[0].lineno + 4 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -66,20 +66,17 @@ "position %d from error handler out of bounds", newpos) replace = space.unicode_w(w_replace) - return replace, newpos + if decode: + return replace, newpos + else: + return replace, None, newpos return call_errorhandler def make_decode_errorhandler(self, space): return self._make_errorhandler(space, True) def make_encode_errorhandler(self, space): - errorhandler = self._make_errorhandler(space, False) - def encode_call_errorhandler(errors, encoding, reason, input, startpos, - endpos): - replace, newpos = errorhandler(errors, encoding, reason, input, - startpos, endpos) - return replace, None, newpos - return encode_call_errorhandler + return self._make_errorhandler(space, False) def get_unicodedata_handler(self, space): if self.unicodedata_handler: diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -8,6 +8,35 @@ cls.w_translated = cls.space.wrap( os.path.join(os.path.dirname(__file__), 'test_translated.py')) + cls.w_stack = cls.space.appexec([], """(): + import sys + def stack(f=None): + ''' + get the call-stack of the caller or the specified frame + ''' + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame cycle + res.append('...') + break + if f.f_code.co_name == 'runtest': + # if we are running with -A, cut all the stack above + # the test function + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + return stack + """) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) def test_new_empty(self): from _continuation import continulet @@ -290,66 +319,100 @@ def test_random_switching(self): from _continuation import continulet # + seen = [] + # def t1(c1): - return c1.switch() + seen.append(3) + res = c1.switch() + seen.append(6) + return res + # def s1(c1, n): + seen.append(2) assert n == 123 c2 = t1(c1) - return c1.switch('a') + 1 + seen.append(7) + res = c1.switch('a') + 1 + seen.append(10) + return res # def s2(c2, c1): + seen.append(5) res = c1.switch(c2) + seen.append(8) assert res == 'a' - return c2.switch('b') + 2 + res = c2.switch('b') + 2 + seen.append(12) + return res # def f(): + seen.append(1) c1 = continulet(s1, 123) c2 = continulet(s2, c1) c1.switch() + seen.append(4) res = c2.switch() + seen.append(9) assert res == 'b' res = c1.switch(1000) + seen.append(11) assert res == 1001 - return c2.switch(2000) + res = c2.switch(2000) + seen.append(13) + return res # res = f() assert res == 2002 + assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] def test_f_back(self): import sys from _continuation import continulet + stack = self.stack # - def g(c): + def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) + # + assert stack() == ['bar', 'foo', 'main', 'test_f_back'] c.switch(sys._getframe(1).f_back) - assert sys._getframe(2) is f3.f_back + # + assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] + assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) - def f(c): - g(c) + def foo(c): + bar(c) # - c = continulet(f) - f1 = c.switch() - assert f1.f_code.co_name == 'g' - f2 = c.switch() - assert f2.f_code.co_name == 'f' - f3 = c.switch() - assert f3 is f2 - assert f1.f_back is f3 + assert stack() == ['test_f_back'] + c = continulet(foo) + f1_bar = c.switch() + assert f1_bar.f_code.co_name == 'bar' + f2_foo = c.switch() + assert f2_foo.f_code.co_name == 'foo' + f3_foo = c.switch() + assert f3_foo is f2_foo + assert f1_bar.f_back is f3_foo + # def main(): - f4 = c.switch() - assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f4_main = c.switch() + assert f4_main.f_code.co_name == 'main' + assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main', 'test_f_back'] + assert stack(f1_bar) == ['bar', 'foo', '...'] + # def main2(): - f5 = c.switch() - assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f5_main2 = c.switch() + assert f5_main2.f_code.co_name == 'main2' + assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack(f1_bar) == ['bar', 'foo', '...'] + # main() main2() res = c.switch() assert res is None - assert f3.f_back is None + assert f3_foo.f_back is None def test_traceback_is_complete(self): import sys diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -5,6 +5,7 @@ py.test.skip("to run on top of a translated pypy-c") import sys, random +from rpython.tool.udir import udir # ____________________________________________________________ @@ -92,6 +93,33 @@ from pypy.conftest import option if not option.runappdirect: py.test.skip("meant only for -A run") + cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof'))) + + def test_vmprof(self): + """ + The point of this test is to check that we do NOT segfault. In + particular, we need to ensure that vmprof does not sample the stack in + the middle of a switch, else we read nonsense. + """ + try: + import _vmprof + except ImportError: + py.test.skip("no _vmprof") + # + def switch_forever(c): + while True: + c.switch() + # + f = open(self.vmprof_file, 'w+b') + _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False) + c = _continuation.continulet(switch_forever) + for i in range(10**7): + if i % 100000 == 0: + print i + c.switch() + _vmprof.disable() + f.close() + def _setup(): for _i in range(20): diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -1,28 +1,27 @@ from pypy.interpreter.mixedmodule import MixedModule class Module(MixedModule): - "This module provides runtime bindings to C++ code for which reflection\n\ - info has been generated. Current supported back-ends are Reflex and CINT.\n\ - See http://doc.pypy.org/en/latest/cppyy.html for full details." + "This module brigdes the cppyy frontend with its backend, through PyPy.\n\ + See http://cppyy.readthedocs.io/en/latest for full details." interpleveldefs = { - '_load_dictionary' : 'interp_cppyy.load_dictionary', '_resolve_name' : 'interp_cppyy.resolve_name', '_scope_byname' : 'interp_cppyy.scope_byname', - '_template_byname' : 'interp_cppyy.template_byname', + '_is_template' : 'interp_cppyy.is_template', '_std_string_name' : 'interp_cppyy.std_string_name', '_set_class_generator' : 'interp_cppyy.set_class_generator', '_set_function_generator': 'interp_cppyy.set_function_generator', '_register_class' : 'interp_cppyy.register_class', '_get_nullptr' : 'interp_cppyy.get_nullptr', - 'CPPInstanceBase' : 'interp_cppyy.W_CPPInstance', + 'CPPClassBase' : 'interp_cppyy.W_CPPClass', 'addressof' : 'interp_cppyy.addressof', + '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', + 'move' : 'interp_cppyy.move', } appleveldefs = { '_init_pythonify' : 'pythonify._init_pythonify', - 'load_reflection_info' : 'pythonify.load_reflection_info', 'add_pythonization' : 'pythonify.add_pythonization', 'Template' : 'pythonify.CPPTemplate', } diff --git a/pypy/module/_cppyy/backend/create_cppyy_package.py b/pypy/module/_cppyy/backend/create_cppyy_package.py deleted file mode 100755 --- a/pypy/module/_cppyy/backend/create_cppyy_package.py +++ /dev/null @@ -1,649 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function - -import os, sys -import argparse, re, shutil, tarfile, urllib2 - - -DEBUG_TESTBUILD = False - -TARBALL_CACHE_DIR = 'releases' - -ROOT_KEEP = ['build', 'cmake', 'config', 'core', 'etc', 'interpreter', - 'io', 'LICENSE', 'net', 'Makefile', 'CMakeLists.txt', 'math', - 'main'] # main only needed in more recent root b/c of rootcling -ROOT_CORE_KEEP = ['CMakeLists.txt', 'base', 'clib', 'clingutils', 'cont', - 'dictgen', 'foundation', 'lzma', 'macosx', 'meta', - 'metacling', 'metautils', 'rootcling_stage1', 'textinput', - 'thread', 'unix', 'utils', 'winnt', 'zip'] -ROOT_IO_KEEP = ['CMakeLists.txt', 'io', 'rootpcm'] -ROOT_NET_KEEP = ['CMakeLists.txt', 'net'] -ROOT_MATH_KEEP = ['CMakeLists.txt', 'mathcore'] -ROOT_ETC_KEEP = ['Makefile.arch', 'class.rules', 'cmake', 'dictpch', - 'gdb-backtrace.sh', 'gitinfo.txt', 'helgrind-root.supp', - 'hostcert.conf', 'system.plugins-ios', - 'valgrind-root-python.supp', 'valgrind-root.supp', 'vmc'] - -ROOT_EXPLICIT_REMOVE = ['core/base/v7', 'math/mathcore/v7', 'io/io/v7'] - - -ERR_RELEASE_NOT_FOUND = 2 - - -# -## CLI arguments -# -class ReleaseValidation(argparse.Action): - def __call__(self, parser, namespace, value, option_string=None): - if not re.match(r'6\.\d\d\.\d\d', value): - raise argparse.ArgumentTypeError( - "release number should of the form '6.dd.dd'") - setattr(namespace, self.dest, value) - return value - -parser = argparse.ArgumentParser( - description='Build PyPi package for cppyy containing the minimum of ROOT') -parser.add_argument('-r', '--release', type=str, nargs='?', - action=ReleaseValidation, help='ROOT release to use') - -args = parser.parse_args() - - -# -## ROOT source pull and cleansing -# -def clean_directory(directory, keeplist, trim_cmake=True): - removed_entries = [] - for entry in os.listdir(directory): - if entry[0] == '.' or entry in keeplist: - continue - removed_entries.append(entry) - entry = os.path.join(directory, entry) - print('now removing', entry) - if os.path.isdir(entry): - shutil.rmtree(entry) - else: - os.remove(entry) - - if not trim_cmake: - return - - # now take the removed entries out of the CMakeLists.txt - if removed_entries: - inp = os.path.join(directory, 'CMakeLists.txt') - print('trimming', inp) - outp = inp+'.new' - new_cml = open(outp, 'w') - for line in open(inp).readlines(): - if ('add_subdirectory' in line) or\ - ('COMMAND' in line and 'copy' in line) or\ - ('ROOT_ADD_TEST_SUBDIRECTORY' in line) or\ - ('install(DIRECTORY' in line): - for sub in removed_entries: - if sub in line: - line = '#'+line - break - new_cml.write(line) - new_cml.close() - os.rename(outp, inp) - else: - print('reusing existing %s/CMakeLists.txt' % (directory,)) - - -class ReleaseValidation(argparse.Action): - def __call__(self, parser, namespace, value, option_string=None): - if not re.match(r'6\.\d\d\.\d\d', value): - raise argparse.ArgumentTypeError( - "release number should of the form '6.dd.dd'") - setattr(namespace, self.dest, value) - return value - -parser = argparse.ArgumentParser( - description='Build PyPi package for cppyy containing the minimum of ROOT') -parser.add_argument('-r', '--release', type=str, nargs='?', - action=ReleaseValidation, help='ROOT release to use') - -args = parser.parse_args() - -if not os.path.exists(TARBALL_CACHE_DIR): - os.mkdir(TARBALL_CACHE_DIR) - -if args.release: - # use provided release - fn = 'root_v%s.source.tar.gz' % args.release - addr = 'https://root.cern.ch/download/'+fn - if not os.path.exists(os.path.join(TARBALL_CACHE_DIR, fn)): - try: - print('retrieving', fn) - resp = urllib2.urlopen(addr, fn) - out = open(os.path.join(TARBALL_CACHE_DIR, fn), 'wb') - out.write(resp.read()) - out.close() - except urllib2.HTTPError: - print('release %s not found' % args.release) - sys.exit(ERR_RELEASE_NOT_FOUND) - else: - print('reusing', fn, 'from local directory') -else: - print('provide release ... getting latest release is not yet implemented ...') - sys.exit(1) - # get latest and set fn, args.release, etc. - -# construct version for package -args.version = '' -testnext = False -for c in args.release: - if testnext: - testnext = False - if c == '0': - continue - if c == '.': - testnext = True - args.version += c -args.version += '.0' - -fn = os.path.join(TARBALL_CACHE_DIR, fn) -pkgdir = os.path.join('root-'+args.release) -if not os.path.exists(pkgdir): - print('now extracting', args.release) - tf = tarfile.TarFile.gzopen(fn) - tf.extractall() - tf.close() -else: - print('reusing existing directory', pkgdir) - -# remove everything except for the listed set of libraries -os.chdir(pkgdir) - -clean_directory(os.path.curdir, ROOT_KEEP) -clean_directory('core', ROOT_CORE_KEEP) -clean_directory('etc', ROOT_ETC_KEEP, trim_cmake=False) -clean_directory('io', ROOT_IO_KEEP) -clean_directory('math', ROOT_MATH_KEEP) -clean_directory('net', ROOT_NET_KEEP) - - -# trim main (only need rootcling) -print('trimming main') -for entry in os.listdir('main/src'): - if entry != 'rootcling.cxx': - os.remove('main/src/'+entry) -inp = 'main/CMakeLists.txt' -outp = inp+'.new' -new_cml = open(outp, 'w') -for line in open(inp).readlines(): - if ('ROOT_EXECUTABLE' in line or\ - 'SET_TARGET_PROPERTIES' in line) and\ - not 'rootcling' in line: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - - -# remove afterimage and ftgl explicitly -print('trimming externals') -for cmf in ['AfterImage', 'FTGL']: - os.remove('cmake/modules/Find%s.cmake' % (cmf,)) -inp = 'cmake/modules/SearchInstalledSoftware.cmake' -outp = inp+'.new' -now_stripping = False -new_cml = open(outp, 'w') -for line in open(inp).readlines(): - if '#---Check for ftgl if needed' == line[0:28] or\ - '#---Check for AfterImage' == line[0:24]: - now_stripping = True - elif '#---Check' == line[0:9]: - now_stripping = False - if now_stripping: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - -inp = 'cmake/modules/RootBuildOptions.cmake' -outp = inp+'.new' -new_cml = open(outp, 'w') -for line in open(inp).readlines(): - if 'ROOT_BUILD_OPTION(builtin_ftgl' in line or\ - 'ROOT_BUILD_OPTION(builtin_afterimage' in line: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - - -# remove testing and examples -print('trimming testing') -inp = 'CMakeLists.txt' -outp = inp+'.new' -now_stripping = False -new_cml = open(outp, 'w') -for line in open(inp).readlines(): - if '#---Configure Testing using CTest' == line[0:33] or\ - '#---hsimple.root' == line[0:16]: - now_stripping = True - elif '#---Packaging' == line[0:13] or\ - '#---version' == line[0:11]: - now_stripping = False - if now_stripping: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - -print('trimming RootCPack') -inp = 'cmake/modules/RootCPack.cmake' -outp = inp+'.new' -new_cml = open(outp, 'w') -for line in open(inp): - if 'README.txt' in line: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - -# some more explicit removes: -for dir_to_remove in ROOT_EXPLICIT_REMOVE: - try: - shutil.rmtree(dir_to_remove) - except OSError: - pass - -# special fixes -inp = 'core/base/src/TVirtualPad.cxx' -outp = inp+'.new' -new_cml = open(outp, 'w') -for line in open(inp): - if '#include "X3DBuffer.h"' == line[0:22]: - line = """//#include "X3DBuffer.h" -typedef struct _x3d_sizeof_ { - int numPoints; - int numSegs; - int numPolys; -} Size3D; -""" - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - -inp = 'math/mathcore/src/Fitter.cxx' -if os.path.exists(inp): - outp = inp+'.new' - new_cml = open(outp, 'w') - for line in open(inp): - if '#include "TF1.h"' in line: - continue - new_cml.write(line) - new_cml.close() - os.rename(outp, inp) - -# done -os.chdir(os.path.pardir) - -# debugging: run a test build -if DEBUG_TESTBUILD: - print('running a debug test build') - tb = "test_builddir" - if os.path.exists(tb): - shutil.rmtree(tb) - os.mkdir(tb) - os.chdir(tb) - os.system('cmake ../%s -DCMAKE_INSTALL_PREFIX=../install -Dminimal=ON -Dasimage=OFF' % pkgdir) - os.system('make -j 32') - - -# -## package creation -# -countdown = 0 -pidir = 'Package-'+args.release -print('creating package', pidir) -if not os.path.exists(pidir): - os.mkdir(pidir) -os.chdir(pidir); countdown += 1 - -print('creating LICENSE.txt') -with open('LICENSE.txt', 'w') as outp: - outp.write("""There are three main parts: - - LLVM: distributed under University of Illinois/NCSA Open Source License - https://opensource.org/licenses/UoI-NCSA.php - ROOT: distributed under LGPL 2.1 - https://root.cern.ch/license - Cppyy: distributed under LBNL BSD - https://fedoraproject.org/wiki/Licensing/LBNLBSD -""") - -print('creating MANIFEST.in') -with open('MANIFEST.in', 'w') as outp: - outp.write("""# Include the license file -include LICENSE.txt - -# Include the data files -recursive-include src * -""") - -print('creating README.rst') -with open('README.rst', 'w') as outp: - outp.write("""PyPy cling-support -================== - ----- - -Find the documentation here: - http://doc.pypy.org/en/latest/cppyy.html -""") - -print('creating setup.cfg') -with open('setup.cfg', 'w') as outp: - outp.write("""[bdist_wheel] -universal=0 -""") - -print('creating setup.py') -with open('setup.py', 'w') as outp: - outp.write("""import os, sys, subprocess -from setuptools import setup, find_packages -from distutils import log -from distutils.command.build import build as _build -from setuptools.command.install import install as _install -from distutils.sysconfig import get_python_lib -from distutils.errors import DistutilsSetupError -from codecs import open - -here = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: - long_description = f.read() - -builddir = None -def get_builddir(): - global builddir - if builddir is None: - topdir = os.getcwd() - builddir = os.path.join(topdir, 'builddir') - return builddir - -srcdir = None -def get_srcdir(): - global srcdir - if srcdir is None: - topdir = os.getcwd() - srcdir = os.path.join(topdir, 'src', 'backend') - return srcdir - -class my_cmake_build(_build): - def __init__(self, dist, *args, **kwargs): - _build.__init__(self, dist, *args, **kwargs) - # TODO: can't seem to find a better way of getting hold of - # the install_lib parameter during the build phase ... - prefix = '' - try: - prefix = dist.get_command_obj('install').install_lib - except AttributeError: - pass - if not prefix: - prefix = get_python_lib(1, 0) - self.prefix = os.path.join(prefix, 'cppyy_backend') - - def run(self): - # base run - _build.run(self) - - # custom run - log.info('Now building libcppyy_backend.so and dependencies') - builddir = get_builddir() - srcdir = get_srcdir() - if not os.path.exists(builddir): - log.info('Creating build directory %s ...' % builddir) - os.makedirs(builddir) - - os.chdir(builddir) - log.info('Running cmake for cppyy_backend') - if subprocess.call([ - 'cmake', srcdir, '-Dminimal=ON -Dasimage=OFF', - '-DCMAKE_INSTALL_PREFIX='+self.prefix]) != 0: - raise DistutilsSetupError('Failed to configure cppyy_backend') - - nprocs = os.getenv("MAKE_NPROCS") - if nprocs: - try: - ival = int(nprocs) - nprocs = '-j'+nprocs - except ValueError: - log.warn("Integer expected for MAKE_NPROCS, but got %s (ignored)", nprocs) - nprocs = '-j1' - else: - nprocs = '-j1' - log.info('Now building cppyy_backend and dependencies ...') - if subprocess.call(['make', nprocs]) != 0: - raise DistutilsSetupError('Failed to build cppyy_backend') - - log.info('build finished') - -class my_libs_install(_install): - def run(self): - # base install - _install.run(self) - - # custom install - log.info('Now installing libcppyy_backend.so and dependencies') - builddir = get_builddir() - if not os.path.exists(builddir): - raise DistutilsSetupError('Failed to find build dir!') - os.chdir(builddir) - - prefix = self.install_lib - log.info('Now installing in %s ...', prefix) - if subprocess.call(['make', 'install']) != 0: - raise DistutilsSetupError('Failed to install cppyy_backend') - - log.info('install finished') - - def get_outputs(self): - outputs = _install.get_outputs(self) - outputs.append(os.path.join(self.install_lib, 'cppyy_backend')) - return outputs - -setup( - name='PyPy-cppyy-backend', -""") - outp.write(" version='%s', # corresponds to ROOT %s, extra number is for packager\n"\ - % (args.version, args.release)) - outp.write(""" description='Cling support for PyPy', - long_description=long_description, - - url='http://pypy.org', - - # Author details - author='PyPy Developers', - author_email='pypy-dev at python.org', - - license='LLVM: UoI-NCSA; ROOT: LGPL 2.1; Cppyy: LBNL BSD', - - classifiers=[ - 'Development Status :: 4 - Beta', - - 'Intended Audience :: Developers', - - 'Topic :: Software Development', - 'Topic :: Software Development :: Interpreters', - - #'License :: OSI Approved :: MIT License', - - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Programming Language :: C', - 'Programming Language :: C++', - - 'Natural Language :: English' - ], - - keywords='interpreter development', - - packages=find_packages('src', ['backend']), - include_package_data=True, - - extras_require={ - }, - - cmdclass = { - 'build': my_cmake_build, - 'install': my_libs_install, - }, -) -""") - - -print('creating src ... ROOT part') -if not os.path.exists('src'): - os.mkdir('src') -os.chdir('src'); countdown += 1 -if not os.path.exists('backend'): - src = os.path.join(os.path.pardir, os.path.pardir, pkgdir) - print('now copying', src) - shutil.copytree(src, 'backend') - -print('creating src ... cppyy part') -os.chdir('backend'); countdown += 1 -if not os.path.exists('cppyy'): - os.mkdir('cppyy') - os.chdir('cppyy'); countdown += 1 - - with open('CMakeLists.txt', 'w') as outp: - outp.write("""############################################################################ -# CMakeLists.txt file for building cppyy package -############################################################################ - From pypy.commits at gmail.com Tue Dec 5 13:13:01 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 05 Dec 2017 10:13:01 -0800 (PST) Subject: [pypy-commit] pypy memory-accounting: add peak memory usage reporting Message-ID: <5a26e1ad.8f9ddf0a.506d5.4f36@mx.google.com> Author: Armin Rigo Branch: memory-accounting Changeset: r93275:1cdb7e0ad36b Date: 2017-12-05 19:11 +0100 http://bitbucket.org/pypy/pypy/changeset/1cdb7e0ad36b/ Log: add peak memory usage reporting diff --git a/pypy/module/gc/app_referents.py b/pypy/module/gc/app_referents.py --- a/pypy/module/gc/app_referents.py +++ b/pypy/module/gc/app_referents.py @@ -52,8 +52,10 @@ class GcStats(object): def __init__(self, s): self._s = s - for item in ('total_gc_memory', 'jit_backend_used', 'total_memory_pressure', - 'total_allocated_memory', 'jit_backend_allocated'): + for item in ('total_gc_memory', 'jit_backend_used', + 'total_memory_pressure', + 'total_allocated_memory', 'jit_backend_allocated', + 'peak_memory', 'peak_allocated_memory'): setattr(self, item, self._format(getattr(self._s, item))) self.memory_used_sum = self._format(self._s.total_gc_memory + self._s.total_memory_pressure + self._s.jit_backend_used) @@ -68,21 +70,26 @@ def repr(self): return """Total memory consumed: -GC used: %s +GC used: %s (peak: %s) raw assembler used: %s memory pressure: %s ----------------------------- Total: %s Total memory allocated: -GC allocated: %s +GC allocated: %s (peak: %s) raw assembler allocated: %s memory pressure: %s ----------------------------- Total: %s -""" % (self.total_gc_memory, self.jit_backend_used, self.total_memory_pressure, +""" % (self.total_gc_memory, self.peak_memory, + self.jit_backend_used, + self.total_memory_pressure, self.memory_used_sum, - self.total_allocated_memory, self.jit_backend_allocated, self.total_memory_pressure, + + self.total_allocated_memory, self.peak_allocated_memory, + self.jit_backend_allocated, + self.total_memory_pressure, self.memory_allocated_sum) def get_stats(): diff --git a/pypy/module/gc/referents.py b/pypy/module/gc/referents.py --- a/pypy/module/gc/referents.py +++ b/pypy/module/gc/referents.py @@ -176,6 +176,8 @@ self.total_memory_pressure = rgc.get_stats(rgc.TOTAL_MEMORY_PRESSURE) self.total_gc_memory = rgc.get_stats(rgc.TOTAL_MEMORY) self.total_allocated_memory = rgc.get_stats(rgc.TOTAL_ALLOCATED_MEMORY) + self.peak_memory = rgc.get_stats(rgc.PEAK_MEMORY) + self.peak_allocated_memory = rgc.get_stats(rgc.PEAK_ALLOCATED_MEMORY) self.jit_backend_allocated = jit_hooks.stats_asmmemmgr_allocated(None) self.jit_backend_used = jit_hooks.stats_asmmemmgr_used(None) @@ -184,6 +186,10 @@ cls=W_GcStats, wrapfn="newint"), total_gc_memory=interp_attrproperty("total_gc_memory", cls=W_GcStats, wrapfn="newint"), + peak_allocated_memory=interp_attrproperty("peak_allocated_memory", + cls=W_GcStats, wrapfn="newint"), + peak_memory=interp_attrproperty("peak_memory", + cls=W_GcStats, wrapfn="newint"), total_allocated_memory=interp_attrproperty("total_allocated_memory", cls=W_GcStats, wrapfn="newint"), jit_backend_allocated=interp_attrproperty("jit_backend_allocated", diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -372,6 +372,7 @@ self.old_rawmalloced_objects = self.AddressStack() self.raw_malloc_might_sweep = self.AddressStack() self.rawmalloced_total_size = r_uint(0) + self.rawmalloced_peak_size = r_uint(0) self.gc_state = STATE_SCANNING # @@ -997,6 +998,8 @@ # Record the newly allocated object and its full malloced size. # The object is young or old depending on the argument. self.rawmalloced_total_size += r_uint(allocsize) + self.rawmalloced_peak_size = max(self.rawmalloced_total_size, + self.rawmalloced_peak_size) if alloc_young: if not self.young_rawmalloced_objects: self.young_rawmalloced_objects = self.AddressDict() @@ -1189,6 +1192,19 @@ """ return self.ac.total_memory_alloced + self.rawmalloced_total_size + def get_peak_memory_alloced(self): + """ Return the peak memory ever allocated. The peaks + can be at different times, but we just don't worry for now + """ + return self.ac.peak_memory_alloced + self.rawmalloced_peak_size + + def get_peak_memory_used(self): + """ Return the peak memory GC felt ever responsible for + """ + mem_allocated = max(self.ac.peak_memory_used, + self.ac.total_memory_used) + return mem_allocated + self.rawmalloced_peak_size + def threshold_reached(self, extra=0): return (self.next_major_collection_threshold - float(self.get_total_memory_used())) < float(extra) @@ -2161,6 +2177,8 @@ # size_gc_header = self.gcheaderbuilder.size_gc_header self.rawmalloced_total_size += r_uint(raw_malloc_usage(totalsize)) + self.rawmalloced_peak_size = max(self.rawmalloced_total_size, + self.rawmalloced_peak_size) self.old_rawmalloced_objects.append(arena + size_gc_header) return arena @@ -2929,6 +2947,10 @@ if stats_no == rgc.TOTAL_MEMORY: return intmask(self.get_total_memory_used() + self.nursery_size) + elif stats_no == rgc.PEAK_MEMORY: + return intmask(self.get_peak_memory_used() + self.nursery_size) + elif stats_no == rgc.PEAK_ALLOCATED_MEMORY: + return intmask(self.get_peak_memory_alloced() + self.nursery_size) elif stats_no == rgc.TOTAL_ALLOCATED_MEMORY: return intmask(self.get_total_memory_alloced() + self.nursery_size) elif stats_no == rgc.TOTAL_MEMORY_PRESSURE: diff --git a/rpython/memory/gc/minimarkpage.py b/rpython/memory/gc/minimarkpage.py --- a/rpython/memory/gc/minimarkpage.py +++ b/rpython/memory/gc/minimarkpage.py @@ -140,7 +140,9 @@ # the total memory used, counting every block in use, without # the additional bookkeeping stuff. self.total_memory_used = r_uint(0) + self.peak_memory_used = r_uint(0) self.total_memory_alloced = r_uint(0) + self.peak_memory_alloced = r_uint(0) def _new_page_ptr_list(self, length): @@ -295,6 +297,9 @@ # be a page-aligned address arena_base = llarena.arena_malloc(self.arena_size, False) self.total_memory_alloced += self.arena_size + self.peak_memory_alloced = max(self.total_memory_alloced, + self.peak_memory_alloced) + if not arena_base: out_of_memory("out of memory: couldn't allocate the next arena") arena_end = arena_base + self.arena_size @@ -321,6 +326,8 @@ """Prepare calls to mass_free_incremental(): moves the chained lists into 'self.old_xxx'. """ + self.peak_memory_used = max(self.peak_memory_used, + self.total_memory_used) self.total_memory_used = r_uint(0) # size_class = self.small_request_threshold >> WORD_POWER_2 diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -650,7 +650,8 @@ else: return id(gcref._x) -TOTAL_MEMORY, TOTAL_ALLOCATED_MEMORY, TOTAL_MEMORY_PRESSURE = range(3) +(TOTAL_MEMORY, TOTAL_ALLOCATED_MEMORY, TOTAL_MEMORY_PRESSURE, + PEAK_MEMORY, PEAK_ALLOCATED_MEMORY) = range(5) @not_rpython def get_stats(stat_no): From pypy.commits at gmail.com Tue Dec 5 14:51:26 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 05 Dec 2017 11:51:26 -0800 (PST) Subject: [pypy-commit] pypy default: skip unicode tests of macos file names, as per cpython issue 31380 Message-ID: <5a26f8be.4a371c0a.c61d4.9c49@mx.google.com> Author: Matti Picus Branch: Changeset: r93277:9dad012e168e Date: 2017-12-05 20:56 +0200 http://bitbucket.org/pypy/pypy/changeset/9dad012e168e/ Log: skip unicode tests of macos file names, as per cpython issue 31380 diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -31,9 +31,15 @@ pdir.join('file2').write("test2") pdir.join('another_longer_file_name').write("test3") mod.pdir = pdir - unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + if sys.platform == 'darwin': + # see issue https://bugs.python.org/issue31380 + unicode_dir = udir.ensure('fixc5x9fier.txt', dir=True) + file_name = 'cafxe9' + else: + unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + file_name = 'caf\xe9' unicode_dir.join('somefile').write('who cares?') - unicode_dir.join('caf\xe9').write('who knows?') + unicode_dir.join(file_name).write('who knows?') mod.unicode_dir = unicode_dir # in applevel tests, os.stat uses the CPython os.stat. From pypy.commits at gmail.com Tue Dec 5 17:03:08 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 05 Dec 2017 14:03:08 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a27179c.8dd71c0a.9da4.a88d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93278:e7bcb8094e51 Date: 2017-12-05 22:02 +0000 http://bitbucket.org/pypy/pypy/changeset/e7bcb8094e51/ Log: hg merge default diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unwrap(w_decoded) == u'späm' + assert space.unicode_w(w_decoded) == u'späm' rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -131,7 +131,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unwrap(w_a) == u'caf\xe9' + assert space.unicode_w(w_a) == u'caf\xe9' lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -8,7 +8,7 @@ from pypy.module.cpyext.object import ( PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString, PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr, - PyObject_GetItem, + PyObject_GetItem, PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor, PyObject_Hash) @@ -570,7 +570,7 @@ PyObject *a = PyTuple_GetItem(args, 0); PyObject *b = PyTuple_GetItem(args, 1); int res = PyObject_RichCompareBool(a, b, Py_EQ); - return PyLong_FromLong(res); + return PyLong_FromLong(res); """),]) a = float('nan') b = float('nan') diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -372,7 +372,7 @@ word = space.wrap(u'spam') array = rffi.cast(rffi.CWCHARP, PyUnicode_AsUnicode(space, word)) array2 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unwrap(word)): + for (i, char) in enumerate(space.unicode_w(word)): assert array[i] == char assert array2[i] == char with raises_w(space, TypeError): @@ -409,12 +409,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) Py_DecRef(space, res) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_internfromstring(self, space): @@ -457,17 +457,17 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == u'sp\x134m' + assert space.unicode_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unicode_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8') + assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -631,12 +631,12 @@ def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unwrap( + assert space.unicode_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.newbytes("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unicode_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -652,7 +652,8 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text + assert space.unicode_w(PyUnicode_Decode( + space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) @@ -769,7 +770,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unwrap(w_res) == u'ab' + assert space.unicode_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -840,29 +841,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args)) + assert (space.unicode_w(w_formated) == + space.unicode_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unwrap(w_joined) == u'ab' + assert space.unicode_w(w_joined) == u'ab' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unwrap(w_char) == u'A' + assert space.unicode_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unwrap(w_char) == u'\0' + assert space.unicode_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unwrap(w_char) == u'\uFFFF' + assert space.unicode_w(w_char) == u'\uFFFF' def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unwrap( + assert u"zbzbabab" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unwrap( + assert u"zbzbzbzb" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -29,12 +29,20 @@ pdir.join('file2').write("test2") pdir.join('another_longer_file_name').write("test3") mod.pdir = pdir - bytes_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + if sys.platform == 'darwin': + # see issue https://bugs.python.org/issue31380 + bytes_dir = udir.ensure('fixc5x9fier.txt', dir=True) + file_name = 'cafxe9' + surrogate_name = 'foo' + else: + bytes_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + file_name = 'caf\xe9' + surrogate_name = 'foo\x80' bytes_dir.join('somefile').write('who cares?') - bytes_dir.join('caf\xe9').write('who knows?') + bytes_dir.join(file_name).write('who knows?') mod.bytes_dir = bytes_dir # an escaped surrogate - mod.esurrogate_dir = udir.ensure('foo\x80', dir=True) + mod.esurrogate_dir = udir.ensure(surrogate_name, dir=True) # in applevel tests, os.stat uses the CPython os.stat. # Be sure to return times with full precision diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -10,7 +10,7 @@ def normalize(s): w_s = space.newunicode(s) w_res = ucd.normalize(space, NF_code, w_s) - return space.unwrap(w_res) + return space.unicode_w(w_res) return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] From pypy.commits at gmail.com Wed Dec 6 04:53:59 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 06 Dec 2017 01:53:59 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Should improve the situation already Message-ID: <5a27be37.17f71c0a.53b37.fa5c@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93279:87f548473353 Date: 2017-12-06 10:53 +0100 http://bitbucket.org/pypy/pypy/changeset/87f548473353/ Log: Should improve the situation already diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -17,6 +17,7 @@ import sys from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize +from rpython.rlib.objectmodel import always_inline from rpython.rlib.rstring import StringBuilder from rpython.rlib import jit from rpython.rlib.signature import signature @@ -734,6 +735,7 @@ def __iter__(self): return self + @always_inline def next(self): if self._pos == self._end: raise StopIteration From pypy.commits at gmail.com Wed Dec 6 04:59:37 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 06 Dec 2017 01:59:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: I _think_ it was meant to be like that, or else the interface is very obscure Message-ID: <5a27bf89.5de81c0a.a5ff5.1852@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93280:67e2516d5db6 Date: 2017-12-06 10:58 +0100 http://bitbucket.org/pypy/pypy/changeset/67e2516d5db6/ Log: I _think_ it was meant to be like that, or else the interface is very obscure diff --git a/targetunicode-bench.py b/targetunicode-bench.py --- a/targetunicode-bench.py +++ b/targetunicode-bench.py @@ -33,7 +33,7 @@ if len(argv) > 2 and argv[2] == "s": for i in range(int(argv[1])): res_l[0] = descr_upper_s(l[i % 100]) - if len(argv) > 2 and argv[2] == "u": + elif len(argv) > 2 and argv[2] == "u": for i in range(int(argv[1])): res_l_2[0] = descr_upper_u(u_l[i % 100]) else: From pypy.commits at gmail.com Wed Dec 6 05:14:25 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 06 Dec 2017 02:14:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: This is probably faster (needs non-ascii to verify) Message-ID: <5a27c301.86081c0a.97964.08da@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93281:91e03fd0b17e Date: 2017-12-06 11:13 +0100 http://bitbucket.org/pypy/pypy/changeset/91e03fd0b17e/ Log: This is probably faster (needs non-ascii to verify) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -131,23 +131,20 @@ ordch2 = ord(code[pos+1]) if ordch1 <= 0xDF: # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz - return (((ordch1 & 0x1F) << 6) + # 0b00011111 - (ordch2 & 0x3F)) # 0b00111111 + return (ordch1 << 6) + ordch2 - ( + (0xC0 << 6) + 0x80 ) ordch3 = ord(code[pos+2]) if ordch1 <= 0xEF: # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz - return (((ordch1 & 0x0F) << 12) + # 0b00001111 - ((ordch2 & 0x3F) << 6) + # 0b00111111 - (ordch3 & 0x3F)) # 0b00111111 + return (ordch1 << 12) + (ordch2 << 6) + ordch3 - ( + (0xE0 << 12) + (0x80 << 6) + 0x80 ) ordch4 = ord(code[pos+3]) if True: # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz - return (((ordch1 & 0x07) << 18) + # 0b00000111 - ((ordch2 & 0x3F) << 12) + # 0b00111111 - ((ordch3 & 0x3F) << 6) + # 0b00111111 - (ordch4 & 0x3F)) # 0b00111111 + return (ordch1 << 18) + (ordch2 << 12) + (ordch3 << 6) + ordch4 - ( + (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 ) assert False, "unreachable" def codepoint_before_pos(code, pos): From pypy.commits at gmail.com Wed Dec 6 05:19:03 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 06 Dec 2017 02:19:03 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: This reduces the overhead from 50% to 35% Message-ID: <5a27c417.c2afdf0a.1ce51.db7b@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93282:cc3f32cc59be Date: 2017-12-06 11:18 +0100 http://bitbucket.org/pypy/pypy/changeset/cc3f32cc59be/ Log: This reduces the overhead from 50% to 35% diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -734,11 +734,43 @@ @always_inline def next(self): - if self._pos == self._end: + pos = self._pos + if pos == self._end: raise StopIteration - ret = codepoint_at_pos(self._utf8, self._pos) - self._pos = next_codepoint_pos(self._utf8, self._pos) - return ret + #----- sane-looking version: ------ + #ret = codepoint_at_pos(self._utf8, self._pos) + #self._pos = next_codepoint_pos(self._utf8, self._pos) + #return ret + #----- manually inlined version follows, with merged checks ----- + + code = self._utf8 + ordch1 = ord(code[pos]) + if ordch1 <= 0x7F: + self._pos = pos + 1 + return ordch1 + + ordch2 = ord(code[pos+1]) + if ordch1 <= 0xDF: + # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz + self._pos = pos + 2 + return (ordch1 << 6) + ordch2 - ( + (0xC0 << 6) + 0x80 ) + + ordch3 = ord(code[pos+2]) + if ordch1 <= 0xEF: + # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz + self._pos = pos + 3 + return (ordch1 << 12) + (ordch2 << 6) + ordch3 - ( + (0xE0 << 12) + (0x80 << 6) + 0x80 ) + + ordch4 = ord(code[pos+3]) + if True: + # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz + self._pos = pos + 4 + return (ordch1 << 18) + (ordch2 << 12) + (ordch3 << 6) + ordch4 - ( + (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 ) + assert False, "unreachable" + def decode_latin_1(s): if len(s) == 0: From pypy.commits at gmail.com Wed Dec 6 05:21:58 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 06 Dec 2017 02:21:58 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Another big speed-up Message-ID: <5a27c4c6.21b9df0a.1144b.0634@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93283:41d3807f2d87 Date: 2017-12-06 11:21 +0100 http://bitbucket.org/pypy/pypy/changeset/41d3807f2d87/ Log: Another big speed-up diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -680,11 +680,13 @@ return unicode_escape #, char_escape_helper class Utf8StringBuilder(object): + @always_inline def __init__(self, size=0): self._s = StringBuilder(size) self._lgt = 0 self._flag = FLAG_ASCII + @always_inline def append(self, s): # for strings self._s.append(s) @@ -692,6 +694,7 @@ self._lgt += newlgt self._flag = combine_flags(self._flag, newflag) + @always_inline def append_slice(self, s, start, end): self._s.append_slice(s, start, end) newlgt, newflag = get_utf8_length_flag(s, start, end) @@ -699,27 +702,33 @@ self._flag = combine_flags(self._flag, newflag) @signature(char(), returns=none()) + @always_inline def append_char(self, s): # for characters, ascii self._lgt += 1 self._s.append(s) + @always_inline def append_code(self, code): self._flag = combine_flags(self._flag, get_flag_from_code(code)) self._lgt += 1 unichr_as_utf8_append(self._s, code, True) + @always_inline def append_utf8(self, utf8, length, flag): self._flag = combine_flags(self._flag, flag) self._lgt += length self._s.append(utf8) + @always_inline def build(self): return self._s.build() + @always_inline def get_flag(self): return self._flag + @always_inline def get_length(self): return self._lgt From pypy.commits at gmail.com Wed Dec 6 05:43:10 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 06 Dec 2017 02:43:10 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Inline the ascii part of unichr_as_utf8_append() Message-ID: <5a27c9be.1288df0a.8cad7.0a02@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93284:6d7f2e710bd2 Date: 2017-12-06 11:42 +0100 http://bitbucket.org/pypy/pypy/changeset/6d7f2e710bd2/ Log: Inline the ascii part of unichr_as_utf8_append() diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -17,7 +17,7 @@ import sys from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize -from rpython.rlib.objectmodel import always_inline +from rpython.rlib.objectmodel import always_inline, dont_inline from rpython.rlib.rstring import StringBuilder from rpython.rlib import jit from rpython.rlib.signature import signature @@ -50,6 +50,7 @@ chr((0x80 | (code & 0x3f)))) raise ValueError + at always_inline def unichr_as_utf8_append(builder, code, allow_surrogates=False): """Encode code (numeric value) as utf8 encoded string and emit the result into the given StringBuilder. @@ -59,13 +60,40 @@ if code <= r_uint(0x7F): # Encode ASCII builder.append(chr(code)) - return + else: + # Encode non-ASCII, uses a function call + if allow_surrogates: + _nonascii_unichr_as_utf8_append(builder, code) + else: + _nonascii_unichr_as_utf8_append_nosurrogates(builder, code) + + at dont_inline +def _nonascii_unichr_as_utf8_append(builder, code): if code <= r_uint(0x07FF): builder.append(chr((0xc0 | (code >> 6)))) builder.append(chr((0x80 | (code & 0x3f)))) return if code <= r_uint(0xFFFF): - if not allow_surrogates and 0xd800 <= code <= 0xdfff: + builder.append(chr((0xe0 | (code >> 12)))) + builder.append(chr((0x80 | ((code >> 6) & 0x3f)))) + builder.append(chr((0x80 | (code & 0x3f)))) + return + if code <= r_uint(0x10FFFF): + builder.append(chr((0xf0 | (code >> 18)))) + builder.append(chr((0x80 | ((code >> 12) & 0x3f)))) + builder.append(chr((0x80 | ((code >> 6) & 0x3f)))) + builder.append(chr((0x80 | (code & 0x3f)))) + return + raise ValueError + + at dont_inline +def _nonascii_unichr_as_utf8_append_nosurrogates(builder, code): + if code <= r_uint(0x07FF): + builder.append(chr((0xc0 | (code >> 6)))) + builder.append(chr((0x80 | (code & 0x3f)))) + return + if code <= r_uint(0xFFFF): + if 0xd800 <= code <= 0xdfff: raise ValueError builder.append(chr((0xe0 | (code >> 12)))) builder.append(chr((0x80 | ((code >> 6) & 0x3f)))) @@ -79,6 +107,7 @@ return raise ValueError + # note - table lookups are really slow. Measured on various elements of obama # chinese wikipedia, they're anywhere between 10% and 30% slower. # In extreme cases (small, only chinese text), they're 40% slower From pypy.commits at gmail.com Wed Dec 6 08:21:35 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 06 Dec 2017 05:21:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: change always_inline to try_inline Message-ID: <5a27eedf.02be1c0a.45e5d.153d@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93285:0e829cf58e7b Date: 2017-12-06 15:20 +0200 http://bitbucket.org/pypy/pypy/changeset/0e829cf58e7b/ Log: change always_inline to try_inline diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -17,7 +17,7 @@ import sys from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize -from rpython.rlib.objectmodel import always_inline, dont_inline +from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline from rpython.rlib.rstring import StringBuilder from rpython.rlib import jit from rpython.rlib.signature import signature @@ -50,7 +50,7 @@ chr((0x80 | (code & 0x3f)))) raise ValueError - at always_inline + at try_inline def unichr_as_utf8_append(builder, code, allow_surrogates=False): """Encode code (numeric value) as utf8 encoded string and emit the result into the given StringBuilder. @@ -737,7 +737,7 @@ self._lgt += 1 self._s.append(s) - @always_inline + @try_inline def append_code(self, code): self._flag = combine_flags(self._flag, get_flag_from_code(code)) self._lgt += 1 From pypy.commits at gmail.com Wed Dec 6 11:48:25 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 06 Dec 2017 08:48:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: use iterator for islower Message-ID: <5a281f59.cc5e1c0a.de6d2.6c8a@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93286:55238fb1d18a Date: 2017-12-06 18:47 +0200 http://bitbucket.org/pypy/pypy/changeset/55238fb1d18a/ Log: use iterator for islower diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -7,6 +7,7 @@ IntOrFloatListStrategy) from pypy.objspace.std import listobject from pypy.objspace.std.test.test_listobject import TestW_ListObject +from rpython.rlib.rutf8 import FLAG_ASCII class TestW_ListStrategies(TestW_ListObject): @@ -600,9 +601,9 @@ def test_unicode(self): l1 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newbytes("zwei")]) assert isinstance(l1.strategy, BytesListStrategy) - l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, 2), self.space.newutf8("zwei", 4, 2)]) + l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, FLAG_ASCII), self.space.newutf8("zwei", 4, FLAG_ASCII)]) assert isinstance(l2.strategy, UnicodeListStrategy) - l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4, 2)]) + l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4, FLAG_ASCII)]) assert isinstance(l3.strategy, ObjectListStrategy) def test_listview_bytes(self): diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -524,16 +524,12 @@ def descr_islower(self, space): cased = False - val = self._utf8 - i = 0 - while i < len(val): - uchar = rutf8.codepoint_at_pos(val, i) + for uchar in rutf8.Utf8StringIterator(self._utf8): if (unicodedb.isupper(uchar) or unicodedb.istitle(uchar)): return space.w_False if not cased and unicodedb.islower(uchar): cased = True - i = rutf8.next_codepoint_pos(val, i) return space.newbool(cased) def descr_istitle(self, space): From pypy.commits at gmail.com Wed Dec 6 14:01:17 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 06 Dec 2017 11:01:17 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: small improvements Message-ID: <5a283e7d.130d1c0a.a93e3.89e1@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93287:f6b0e685b84d Date: 2017-12-06 21:00 +0200 http://bitbucket.org/pypy/pypy/changeset/f6b0e685b84d/ Log: small improvements diff --git a/unicode-bench.py b/unicode-bench.py --- a/unicode-bench.py +++ b/unicode-bench.py @@ -6,6 +6,9 @@ unicodes = [unicode("u" * LGT + str(i)) for i in range(100)] non_ascii_unicodes = [u"u" * LGT + unicode(i) + u"å" for i in range(100)] +long_string = u" " * 1000000 +unicodes = [long_string] * 100 + RANGE = 250000000 // LGT def upper(main_l): @@ -38,11 +41,37 @@ for i in xrange(RANGE): l[0] = main_l[i % 100].find(u"foo") -for func in [upper]:#, lower, isupper, islower]: +def split(main_l): + l = [None] + for i in xrange(RANGE): + l[0] = main_l[i % 100].split() + +def splitlines(main_l): + l = [None] + for i in xrange(RANGE): + l[0] = main_l[i % 100].splitlines() + +def iter(main_l): + l = [None] + for i in xrange(RANGE // 10000): + for elem in main_l[i % 100]: + l[0] = elem + +def indexing(main_l): + l = [None] + for i in xrange(RANGE * 10): + l[0] = main_l[i % 100][13] + +def isspace(main_l): + l = [None] + for i in xrange(RANGE // 10000): + l[0] = main_l[i % 100].isspace() + +for func in [isspace]:#, lower, isupper, islower]: t0 = time.time() func(unicodes) t1 = time.time() print "ascii %s %.2f" % (func.__name__, t1 - t0) - func(non_ascii_unicodes) - t2 = time.time() - print "non-ascii %s %.2f" % (func.__name__, t2 - t1) + #func(non_ascii_unicodes) + #t2 = time.time() + #print "non-ascii %s %.2f" % (func.__name__, t2 - t1) From pypy.commits at gmail.com Wed Dec 6 14:01:19 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 06 Dec 2017 11:01:19 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: more trivial use cases for Utf8StringIterator Message-ID: <5a283e7f.1ba1df0a.621fb.613c@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93288:0c5fc845224f Date: 2017-12-06 21:00 +0200 http://bitbucket.org/pypy/pypy/changeset/0c5fc845224f/ Log: more trivial use cases for Utf8StringIterator diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -449,10 +449,7 @@ def _is_generic_loop(self, space, v, func_name): func = getattr(self, func_name) val = self._utf8 - i = 0 - while i < len(val): - uchar = rutf8.codepoint_at_pos(val, i) - i = rutf8.next_codepoint_pos(val, i) + for uchar in rutf8.Utf8StringIterator(val): if not func(uchar): return space.w_False return space.w_True @@ -535,11 +532,7 @@ def descr_istitle(self, space): cased = False previous_is_cased = False - val = self._utf8 - i = 0 - while i < len(val): - uchar = rutf8.codepoint_at_pos(val, i) - i = rutf8.next_codepoint_pos(val, i) + for uchar in rutf8.Utf8StringIterator(self._utf8): if unicodedb.isupper(uchar) or unicodedb.istitle(uchar): if previous_is_cased: return space.w_False @@ -555,16 +548,12 @@ def descr_isupper(self, space): cased = False - i = 0 - val = self._utf8 - while i < len(val): - uchar = rutf8.codepoint_at_pos(val, i) + for uchar in rutf8.Utf8StringIterator(self._utf8): if (unicodedb.islower(uchar) or unicodedb.istitle(uchar)): return space.w_False if not cased and unicodedb.isupper(uchar): cased = True - i = rutf8.next_codepoint_pos(val, i) return space.newbool(cased) def descr_startswith(self, space, w_prefix, w_start=None, w_end=None): From pypy.commits at gmail.com Wed Dec 6 14:03:27 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 06 Dec 2017 11:03:27 -0800 (PST) Subject: [pypy-commit] cffi default: Issue #348 Message-ID: <5a283eff.08ea1c0a.fefe3.72ec@mx.google.com> Author: Armin Rigo Branch: Changeset: r3052:9b653b9a1c87 Date: 2017-12-06 20:02 +0100 http://bitbucket.org/cffi/cffi/changeset/9b653b9a1c87/ Log: Issue #348 Fix for MSVC complaining about very large strings. diff --git a/cffi/recompiler.py b/cffi/recompiler.py --- a/cffi/recompiler.py +++ b/cffi/recompiler.py @@ -295,8 +295,9 @@ base_module_name = self.module_name.split('.')[-1] if self.ffi._embedding is not None: prnt('#define _CFFI_MODULE_NAME "%s"' % (self.module_name,)) - prnt('#define _CFFI_PYTHON_STARTUP_CODE %s' % - (self._string_literal(self.ffi._embedding),)) + prnt('static const char _CFFI_PYTHON_STARTUP_CODE[] = {') + self._print_string_literal_in_array(self.ffi._embedding) + prnt('0 };') prnt('#ifdef PYPY_VERSION') prnt('# define _CFFI_PYTHON_STARTUP_FUNC _cffi_pypyinit_%s' % ( base_module_name,)) @@ -1271,17 +1272,18 @@ _generate_cpy_extern_python_plus_c_ctx = \ _generate_cpy_extern_python_ctx - def _string_literal(self, s): - def _char_repr(c): - # escape with a '\' the characters '\', '"' or (for trigraphs) '?' - if c in '\\"?': return '\\' + c - if ' ' <= c < '\x7F': return c - if c == '\n': return '\\n' - return '\\%03o' % ord(c) - lines = [] - for line in s.splitlines(True) or ['']: - lines.append('"%s"' % ''.join([_char_repr(c) for c in line])) - return ' \\\n'.join(lines) + def _print_string_literal_in_array(self, s): + prnt = self._prnt + prnt('// # NB. this is not a string because of a size limit in MSVC') + for line in s.splitlines(True): + prnt(('// ' + line).rstrip()) + printed_line = '' + for c in line: + if len(printed_line) >= 76: + prnt(printed_line) + printed_line = '' + printed_line += '%d,' % (ord(c),) + prnt(printed_line) # ---------- # emitting the opcodes for individual types From pypy.commits at gmail.com Wed Dec 6 14:18:16 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 06 Dec 2017 11:18:16 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: try to improve latin1 handling Message-ID: <5a284278.f3c4df0a.dbd90.795b@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93289:a6a28d7e46a8 Date: 2017-12-06 21:17 +0200 http://bitbucket.org/pypy/pypy/changeset/a6a28d7e46a8/ Log: try to improve latin1 handling diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -149,37 +149,32 @@ def _utf8_encode_latin_1_slowpath(s, errors, errorhandler): res = StringBuilder(len(s)) - size = len(s) cur = 0 - i = 0 - while i < size: - if ord(s[i]) <= 0x7F: - res.append(s[i]) - i += 1 - cur += 1 - else: - oc = rutf8.codepoint_at_pos(s, i) - if oc <= 0xFF: - res.append(chr(oc)) + iter = rutf8.Utf8StringIterator(s) + try: + while True: + ch = iter.next() + if ch <= 0xFF: + res.append(chr(ch)) cur += 1 - i = rutf8.next_codepoint_pos(s, i) else: r, pos = errorhandler(errors, 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) - for j in range(pos - cur): - i = rutf8.next_codepoint_pos(s, i) - j = 0 - while j < len(r): - c = rutf8.codepoint_at_pos(r, j) + for c in rutf8.Utf8StringIterator(r): if c > 0xFF: errorhandler("strict", 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) - j = rutf8.next_codepoint_pos(r, j) res.append(chr(c)) + + for j in range(pos - cur - 1): + iter.next() + cur = pos + except StopIteration: + pass r = res.build() return r From pypy.commits at gmail.com Wed Dec 6 14:40:58 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 06 Dec 2017 11:40:58 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: refactor Message-ID: <5a2847ca.43a4df0a.d2806.6c70@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93290:c211485151ba Date: 2017-12-06 21:40 +0200 http://bitbucket.org/pypy/pypy/changeset/c211485151ba/ Log: refactor diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -151,30 +151,30 @@ res = StringBuilder(len(s)) cur = 0 iter = rutf8.Utf8StringIterator(s) - try: - while True: + while True: + try: ch = iter.next() - if ch <= 0xFF: - res.append(chr(ch)) - cur += 1 - else: - r, pos = errorhandler(errors, 'latin1', - 'ordinal not in range(256)', s, cur, - cur + 1) + except StopIteration: + break + if ch <= 0xFF: + res.append(chr(ch)) + cur += 1 + else: + r, pos = errorhandler(errors, 'latin1', + 'ordinal not in range(256)', s, cur, + cur + 1) - for c in rutf8.Utf8StringIterator(r): - if c > 0xFF: - errorhandler("strict", 'latin1', - 'ordinal not in range(256)', s, - cur, cur + 1) - res.append(chr(c)) + for c in rutf8.Utf8StringIterator(r): + if c > 0xFF: + errorhandler("strict", 'latin1', + 'ordinal not in range(256)', s, + cur, cur + 1) + res.append(chr(c)) - for j in range(pos - cur - 1): - iter.next() + for j in range(pos - cur - 1): + iter.next() - cur = pos - except StopIteration: - pass + cur = pos r = res.build() return r From pypy.commits at gmail.com Wed Dec 6 17:22:26 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 06 Dec 2017 14:22:26 -0800 (PST) Subject: [pypy-commit] buildbot default: no need for non-JIT builds on ARM, schedule backend tests only for RPython changes Message-ID: <5a286da2.06aadf0a.60a1e.8331@mx.google.com> Author: Matti Picus Branch: Changeset: r1045:21cd87db82d9 Date: 2017-12-07 00:21 +0200 http://bitbucket.org/pypy/buildbot/changeset/21cd87db82d9/ Log: no need for non-JIT builds on ARM, schedule backend tests only for RPython changes diff --git a/bot2/pypybuildbot/arm_master.py b/bot2/pypybuildbot/arm_master.py --- a/bot2/pypybuildbot/arm_master.py +++ b/bot2/pypybuildbot/arm_master.py @@ -1,5 +1,5 @@ from buildbot.scheduler import Nightly, Triggerable -from pypybuildbot.util import load +from pypybuildbot.util import load, isRPython pypybuilds = load('pypybuildbot.builds') ARMCrossLock = pypybuilds.ARMCrossLock @@ -10,13 +10,13 @@ crosstranslationargs = ['--platform=arm', '--gcrootfinder=shadowstack'] crosstranslationjitargs = ['--jit-backend=arm'] # this one needs a larger timeout due to how it is run -pypyJitBackendOnlyOwnTestFactoryARM = pypybuilds.Own( +pypyJitBackendOnlyRPythonTestFactoryARM = pypybuilds.RPython( cherrypick=':'.join(["jit/backend/arm", "jit/backend/llsupport", "jit/backend/test", # kill this one in case it is too slow ]), timeout=36000) -pypyJitOnlyOwnTestFactoryARM = pypybuilds.Own(cherrypick="jit", timeout=2 * 3600) +pypyJitOnlyRPythonTestFactoryARM = pypybuilds.RPython(cherrypick="jit", timeout=2 * 3600) pypyOwnTestFactoryARM = pypybuilds.Own(timeout=2 * 3600) pypyRPythonTestFactoryARM = pypybuilds.RPython(timeout=2 * 3600) @@ -146,13 +146,20 @@ BUILDJITLINUXARMHF_RASPBIAN, # on hhu-cross-raspbianhf, uses 1 core BUILDJITLINUXARMHF_RARING, # on hhu-cross-raring-armhf, uses 1 core - BUILDLINUXARM, # on hhu-cross-armel, uses 1 core - BUILDLINUXARMHF_RASPBIAN, # on hhu-cross-raspbianhf, uses 1 core + #BUILDLINUXARM, # on hhu-cross-armel, uses 1 core + #BUILDLINUXARMHF_RASPBIAN, # on hhu-cross-raspbianhf, uses 1 core + ], branch=None, hour=0, minute=0, + ), + + Nightly("nightly-arm-0-01", [ JITBACKENDONLYLINUXARMEL, # on hhu-imx.53 JITBACKENDONLYLINUXARMHF, JITBACKENDONLYLINUXARMHF_v7, # on cubieboard-bob - ], branch=None, hour=0, minute=0), + ], branch='default', hour=0, minute=0, onlyIfChanged=True, + fileIsImportant=isRPython, + change_filter=filter.ChangeFilter(branch='default'), + ), Triggerable("APPLVLLINUXARM_scheduler", [ APPLVLLINUXARM, # triggered by BUILDLINUXARM, on hhu-beagleboard @@ -182,7 +189,7 @@ {"name": JITBACKENDONLYLINUXARMEL, "slavenames": ['hhu-i.mx53'], "builddir": JITBACKENDONLYLINUXARMEL, - "factory": pypyJitBackendOnlyOwnTestFactoryARM, + "factory": pypyJitBackendOnlyRPythonTestFactoryARM, "category": 'linux-armel', "locks": [ARMBoardLock.access('counting')], }, @@ -191,7 +198,7 @@ {"name": JITBACKENDONLYLINUXARMHF, "slavenames": ['hhu-raspberry-pi', 'hhu-pypy-pi', 'hhu-pypy-pi2'], "builddir": JITBACKENDONLYLINUXARMHF, - "factory": pypyJitBackendOnlyOwnTestFactoryARM, + "factory": pypyJitBackendOnlyRPythonTestFactoryARM, "category": 'linux-armhf', "locks": [ARMBoardLock.access('counting')], }, @@ -199,7 +206,7 @@ {"name": JITBACKENDONLYLINUXARMHF_v7, "slavenames": ['cubieboard-bob'], "builddir": JITBACKENDONLYLINUXARMHF_v7, - "factory": pypyJitBackendOnlyOwnTestFactoryARM, + "factory": pypyJitBackendOnlyRPythonTestFactoryARM, "category": 'linux-armhf', "locks": [ARMBoardLock.access('counting')], }, diff --git a/bot2/pypybuildbot/master.py b/bot2/pypybuildbot/master.py --- a/bot2/pypybuildbot/master.py +++ b/bot2/pypybuildbot/master.py @@ -11,7 +11,7 @@ #from buildbot import manhole from pypybuildbot.pypylist import PyPyList, NumpyStatusList from pypybuildbot.ircbot import IRC # side effects -from pypybuildbot.util import we_are_debugging +from pypybuildbot.util import we_are_debugging, isRPython from buildbot.changes import filter from buildbot.changes.hgpoller import HgPoller @@ -63,7 +63,7 @@ pypybuilds = load('pypybuildbot.builds') -# all ARM buildbot configuration si in arm_master.py +# all ARM buildbot configuration is in arm_master.py ARM = load('pypybuildbot.arm_master') TannitCPU = pypybuilds.TannitCPU @@ -271,13 +271,6 @@ 'hhu-pypy-pi2': {'max_builds': 1}, } -def isRPython(change): - for fname in change.files: - if fname.startswith('rpython'): - log.msg('fileIsImportant filter isRPython got "%s"' % fname) - return True - return False - BuildmasterConfig = { 'slavePortnum': slavePortnum, diff --git a/bot2/pypybuildbot/util.py b/bot2/pypybuildbot/util.py --- a/bot2/pypybuildbot/util.py +++ b/bot2/pypybuildbot/util.py @@ -17,3 +17,10 @@ if os.path.lexists(dst): os.remove(dst) os.symlink(src, dst) + +def isRPython(change): + for fname in change.files: + if fname.startswith('rpython'): + log.msg('fileIsImportant filter isRPython got "%s"' % fname) + return True + return False From pypy.commits at gmail.com Thu Dec 7 03:01:42 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 07 Dec 2017 00:01:42 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Refix and test the standard StrMatchContext Message-ID: <5a28f566.d2a1df0a.81b99.fff3@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93291:fc5e50bec2b2 Date: 2017-12-07 09:01 +0100 http://bitbucket.org/pypy/pypy/changeset/fc5e50bec2b2/ Log: Refix and test the standard StrMatchContext diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -138,8 +138,40 @@ """Similar to str().""" raise NotImplementedError - def debug_check_pos(self, pos): - pass + # The following methods are provided to be overriden in + # Utf8MatchContext. The non-utf8 implementation is provided + # by the FixedMatchContext abstract subclass, in order to use + # the same @not_rpython safety trick as above. + @not_rpython + def next(self, position): + raise NotImplementedError + @not_rpython + def prev(self, position): + raise NotImplementedError + @not_rpython + def next_n(self, position, n): + raise NotImplementedError + @not_rpython + def prev_n(self, position, n, start_position): + raise NotImplementedError + @not_rpython + def slowly_convert_byte_pos_to_index(self, position): + raise NotImplementedError + @not_rpython + def debug_check_pos(self, position): + raise NotImplementedError + @not_rpython + def maximum_distance(self, position_low, position_high): + raise NotImplementedError + @not_rpython + def bytes_difference(self, position1, position2): + raise NotImplementedError + @not_rpython + def get_single_byte(self, base_position, index): + raise NotImplementedError + @not_rpython + def go_forward_by_bytes(self, base_position, index): + raise NotImplementedError def get_mark(self, gid): mark = find_mark(self.match_marks, gid) @@ -186,13 +218,56 @@ def fresh_copy(self, start): raise NotImplementedError -class BufMatchContext(AbstractMatchContext): + +class FixedMatchContext(AbstractMatchContext): + """Abstract subclass to introduce the default implementation for + these position methods. The Utf8 subclass doesn't inherit from here.""" + + ZERO = 0 + + def next(self, position): + return position + 1 + + def prev(self, position): + if position == 0: + raise EndOfString + return position - 1 + + def next_n(self, position, n, end_position): + position += n + if position > end_position: + raise EndOfString + return position + + def prev_n(self, position, n, start_position): + position -= n + if position < start_position: + raise EndOfString + return position + + def slowly_convert_byte_pos_to_index(self, position): + return position + + def debug_check_pos(self, position): + pass + + def maximum_distance(self, position_low, position_high): + return position_high - position_low + + def bytes_difference(self, position1, position2): + return position1 - position2 + + def go_forward_by_bytes(self, base_position, index): + return base_position + index + + +class BufMatchContext(FixedMatchContext): """Concrete subclass for matching in a buffer.""" _immutable_fields_ = ["_buffer"] def __init__(self, pattern, buf, match_start, end, flags): - AbstractMatchContext.__init__(self, pattern, match_start, end, flags) + FixedMatchContext.__init__(self, pattern, match_start, end, flags) self._buffer = buf def str(self, index): @@ -203,17 +278,20 @@ c = self.str(index) return rsre_char.getlower(c, self.flags) + def get_single_byte(self, base_position, index): + return self.str(base_position + index) + def fresh_copy(self, start): return BufMatchContext(self.pattern, self._buffer, start, self.end, self.flags) -class StrMatchContext(AbstractMatchContext): +class StrMatchContext(FixedMatchContext): """Concrete subclass for matching in a plain string.""" _immutable_fields_ = ["_string"] def __init__(self, pattern, string, match_start, end, flags): - AbstractMatchContext.__init__(self, pattern, match_start, end, flags) + FixedMatchContext.__init__(self, pattern, match_start, end, flags) self._string = string if not we_are_translated() and isinstance(string, unicode): self.flags |= rsre_char.SRE_FLAG_UNICODE # for rsre_re.py @@ -226,17 +304,20 @@ c = self.str(index) return rsre_char.getlower(c, self.flags) + def get_single_byte(self, base_position, index): + return self.str(base_position + index) + def fresh_copy(self, start): return StrMatchContext(self.pattern, self._string, start, self.end, self.flags) -class UnicodeMatchContext(AbstractMatchContext): +class UnicodeMatchContext(FixedMatchContext): """Concrete subclass for matching in a unicode string.""" _immutable_fields_ = ["_unicodestr"] def __init__(self, pattern, unicodestr, match_start, end, flags): - AbstractMatchContext.__init__(self, pattern, match_start, end, flags) + FixedMatchContext.__init__(self, pattern, match_start, end, flags) self._unicodestr = unicodestr def str(self, index): @@ -247,6 +328,9 @@ c = self.str(index) return rsre_char.getlower(c, self.flags) + def get_single_byte(self, base_position, index): + return self.str(base_position + index) + def fresh_copy(self, start): return UnicodeMatchContext(self.pattern, self._unicodestr, start, self.end, self.flags) diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -25,7 +25,6 @@ """Concrete subclass for matching in a plain string, tweaked for tests""" ZERO = Position(0) - EXACT_DISTANCE = False def next(self, position): assert isinstance(position, Position) diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py --- a/rpython/rlib/rsre/test/test_search.py +++ b/rpython/rlib/rsre/test/test_search.py @@ -1,44 +1,45 @@ import re, py from rpython.rlib.rsre.test.test_match import get_code, get_code_and_re -from rpython.rlib.rsre.test.support import search, match, Position +from rpython.rlib.rsre.test import support +from rpython.rlib.rsre import rsre_core -class TestSearch: +class BaseTestSearch: def test_code1(self): r_code1 = get_code(r'[abc][def][ghi]') - res = search(r_code1, "fooahedixxx") + res = self.search(r_code1, "fooahedixxx") assert res is None - res = search(r_code1, "fooahcdixxx") + res = self.search(r_code1, "fooahcdixxx") assert res is not None assert res.span() == (5, 8) def test_code2(self): r_code2 = get_code(r'\s*(.*?)') - res = search(r_code2, "foo bar abcdef") + res = self.search(r_code2, "foo bar abcdef") assert res is not None assert res.span() == (8, 34) def test_pure_literal(self): r_code3 = get_code(r'foobar') - res = search(r_code3, "foo bar foobar baz") + res = self.search(r_code3, "foo bar foobar baz") assert res is not None assert res.span() == (8, 14) def test_code3(self): r_code1 = get_code(r'\s*(.*?)') - res = match(r_code1, " abcdef") + res = self.match(r_code1, " abcdef") assert res is not None def test_max_until_0_65535(self): r_code2 = get_code(r'(?:xy)*xy') - #res = match(r_code2, 'def') + #res = self.match(r_code2, 'def') #assert res is None - #res = match(r_code2, 'xydef') + #res = self.match(r_code2, 'xydef') #assert res is not None - res = match(r_code2, 'xyxyxydef') + res = self.match(r_code2, 'xyxyxydef') assert res is not None - res = match(r_code2, '' + 'xy'*1000 + 'def') + res = self.match(r_code2, '' + 'xy'*1000 + 'def') assert res is not None def test_max_until_3_5(self): @@ -46,18 +47,18 @@ for i in range(8): s = '' + 'xy'*i + 'defdefdefdefdef' assert (r.match(s) is not None) is (3 <= i-1 <= 5) - res = match(r_code2, s) + res = self.match(r_code2, s) assert (res is not None) is (3 <= i-1 <= 5) def test_min_until_0_65535(self): r_code2 = get_code(r'(?:xy)*?xy') - res = match(r_code2, 'def') + res = self.match(r_code2, 'def') assert res is None - res = match(r_code2, 'xydef') + res = self.match(r_code2, 'xydef') assert res is not None - res = match(r_code2, 'xyxyxydef') + res = self.match(r_code2, 'xyxyxydef') assert res is not None - res = match(r_code2, '' + 'xy'*1000 + 'def') + res = self.match(r_code2, '' + 'xy'*1000 + 'def') assert res is not None def test_min_until_3_5(self): @@ -65,44 +66,44 @@ for i in range(8): s = '' + 'xy'*i + 'defdefdefdefdef' assert (r.match(s) is not None) is (3 <= i-1 <= 5) - res = match(r_code2, s) + res = self.match(r_code2, s) assert (res is not None) is (3 <= i-1 <= 5) def test_min_repeat_one(self): r_code3 = get_code(r'.{3,5}?y') for i in range(8): - res = match(r_code3, '' + 'x'*i + 'y') + res = self.match(r_code3, '' + 'x'*i + 'y') assert (res is not None) is (3 <= i <= 5) def test_simple_group(self): r_code4 = get_code(r'(x.)') - res = match(r_code4, 'xadef') + res = self.match(r_code4, 'xadef') assert res is not None assert res.get_mark(0) == 5 assert res.get_mark(1) == 7 def test_max_until_groups(self): r_code4 = get_code(r'(x.)*xy') - res = match(r_code4, 'xaxbxydef') + res = self.match(r_code4, 'xaxbxydef') assert res is not None assert res.get_mark(0) == 7 assert res.get_mark(1) == 9 def test_group_branch(self): r_code5 = get_code(r'(ab|c)') - res = match(r_code5, 'abdef') + res = self.match(r_code5, 'abdef') assert (res.get_mark(0), res.get_mark(1)) == (5, 7) - res = match(r_code5, 'cdef') + res = self.match(r_code5, 'cdef') assert (res.get_mark(0), res.get_mark(1)) == (5, 6) - res = match(r_code5, 'dedef') + res = self.match(r_code5, 'dedef') assert res is None def test_group_branch_max_until(self): r_code6 = get_code(r'(ab|c)*a') - res = match(r_code6, 'ccabcccabadef') + res = self.match(r_code6, 'ccabcccabadef') assert (res.get_mark(0), res.get_mark(1)) == (12, 14) r_code7 = get_code(r'((ab)|(c))*a') - res = match(r_code7, 'ccabcccabadef') + res = self.match(r_code7, 'ccabcccabadef') assert (res.get_mark(0), res.get_mark(1)) == (12, 14) assert (res.get_mark(2), res.get_mark(3)) == (12, 14) assert (res.get_mark(4), res.get_mark(5)) == (11, 12) @@ -113,7 +114,7 @@ assert m.span(1) == (12, 13) assert m.span(3) == (12, 13) assert m.span(2) == (8, 9) - res = match(r_code7, 'bbbabbbb') + res = self.match(r_code7, 'bbbabbbb') assert (res.get_mark(0), res.get_mark(1)) == (12, 13) assert (res.get_mark(4), res.get_mark(5)) == (12, 13) assert (res.get_mark(2), res.get_mark(3)) == (8, 9) @@ -124,7 +125,7 @@ assert m.span(1) == (6, 7) assert m.span(3) == (6, 7) assert m.span(2) == (5, 6) - res = match(r_code8, 'ab') + res = self.match(r_code8, 'ab') assert (res.get_mark(0), res.get_mark(1)) == (6, 7) assert (res.get_mark(4), res.get_mark(5)) == (6, 7) assert (res.get_mark(2), res.get_mark(3)) == (5, 6) @@ -134,7 +135,7 @@ m = r9.match('xyzxc') assert m.span(1) == (3, 4) assert m.span(2) == (-1, -1) - res = match(r_code9, 'xyzxc') + res = self.match(r_code9, 'xyzxc') assert (res.get_mark(0), res.get_mark(1)) == (3, 4) assert (res.get_mark(2), res.get_mark(3)) == (-1, -1) @@ -142,8 +143,8 @@ r_code9, r9 = get_code_and_re(r'((x|yz)+?(y)??c)*') m = r9.match('xycxyzxc') assert m.span(2) == (6, 7) - #assert match.span(3) == (1, 2) --- bug of CPython - res = match(r_code9, 'xycxyzxc') + #assert self.match.span(3) == (1, 2) --- bug of CPython + res = self.match(r_code9, 'xycxyzxc') assert (res.get_mark(2), res.get_mark(3)) == (6, 7) assert (res.get_mark(4), res.get_mark(5)) == (1, 2) @@ -151,19 +152,19 @@ r_code, r = get_code_and_re(r'(a?)+y') assert r.match('y') assert r.match('aaayaaay').span() == (0, 4) - res = match(r_code, 'y') + res = self.match(r_code, 'y') assert res - res = match(r_code, 'aaayaaay') + res = self.match(r_code, 'aaayaaay') assert res and res.span() == (0, 4) # r_code, r = get_code_and_re(r'(a?){4,6}y') assert r.match('y') - res = match(r_code, 'y') + res = self.match(r_code, 'y') assert res # r_code, r = get_code_and_re(r'(a?)*y') assert r.match('y') - res = match(r_code, 'y') + res = self.match(r_code, 'y') assert res def test_empty_maxuntil_2(self): @@ -173,24 +174,24 @@ py.test.skip("older version of the stdlib: %s" % (e,)) assert r.match('XfooXbarX').span() == (0, 5) assert r.match('XfooXbarX').span(1) == (4, 4) - res = match(r_code, 'XfooXbarX') + res = self.match(r_code, 'XfooXbarX') assert res.span() == (0, 5) assert res.span(1) == (4, 4) def test_empty_minuntil(self): r_code, r = get_code_and_re(r'(a?)+?y') #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory - res = match(r_code, 'z') + res = self.match(r_code, 'z') assert not res # r_code, r = get_code_and_re(r'(a?){4,6}?y') assert not r.match('z') - res = match(r_code, 'z') + res = self.match(r_code, 'z') assert not res # r_code, r = get_code_and_re(r'(a?)*?y') #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory - res = match(r_code, 'z') + res = self.match(r_code, 'z') assert not res def test_empty_search(self): @@ -198,15 +199,26 @@ for j in range(-2, 6): for i in range(-2, 6): match = r.search('abc', i, j) - res = search(r_code, 'abc', i, j) + res = self.search(r_code, 'abc', i, j) jk = min(max(j, 0), 3) ik = min(max(i, 0), 3) if ik <= jk: assert match is not None assert match.span() == (ik, ik) assert res is not None - assert res.match_start == Position(ik) - assert res.match_end == Position(ik) + assert res.match_start == self.Position(ik) + assert res.match_end == self.Position(ik) else: assert match is None assert res is None + + +class TestSearchCustom(BaseTestSearch): + search = staticmethod(support.search) + match = staticmethod(support.match) + Position = support.Position + +class TestSearchStr(BaseTestSearch): + search = staticmethod(rsre_core.search) + match = staticmethod(rsre_core.match) + Position = staticmethod(lambda n: n) From pypy.commits at gmail.com Thu Dec 7 03:12:47 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 07 Dec 2017 00:12:47 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Translation fix (nonneg issues) Message-ID: <5a28f7ff.759adf0a.c1100.4b69@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93292:4ab5b6fa7557 Date: 2017-12-07 09:12 +0100 http://bitbucket.org/pypy/pypy/changeset/4ab5b6fa7557/ Log: Translation fix (nonneg issues) diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -229,9 +229,10 @@ return position + 1 def prev(self, position): - if position == 0: + position -= 1 + if position < 0: raise EndOfString - return position - 1 + return position def next_n(self, position, n, end_position): position += n From pypy.commits at gmail.com Thu Dec 7 07:36:49 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 07 Dec 2017 04:36:49 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: use iterator and add some more obscure iterface for it Message-ID: <5a2935e1.03da1c0a.945a3.2aba@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93293:8c2d7104d91e Date: 2017-12-07 14:36 +0200 http://bitbucket.org/pypy/pypy/changeset/8c2d7104d91e/ Log: use iterator and add some more obscure iterface for it diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -770,6 +770,12 @@ def __iter__(self): return self + def get_pos(self): + return self._pos + + def done(self): + return self._pos == self._end + @always_inline def next(self): pos = self._pos diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1025,11 +1025,9 @@ from rpython.rlib import rutf8 w = lltype.malloc(CWCHARP.TO, utf8len + 1, flavor='raw') - i = 0 index = 0 - while i < len(utf8): - w[index] = unichr(rutf8.codepoint_at_pos(utf8, i)) - i = rutf8.next_codepoint_pos(utf8, i) + for ch in rutf8.Utf8StringIterator(utf8): + w[index] = unichr(ch) index += 1 w[index] = unichr(0) return w From pypy.commits at gmail.com Thu Dec 7 08:34:00 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 07 Dec 2017 05:34:00 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: add one item Message-ID: <5a294348.d2a1df0a.81b99.4fa3@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93294:2d147c5dd32c Date: 2017-12-07 15:33 +0200 http://bitbucket.org/pypy/pypy/changeset/2d147c5dd32c/ Log: add one item diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -9,5 +9,6 @@ * remove assertions from W_UnicodeObject.__init__ if all the builders pass * what to do with error handlers that go backwards. There were tests in test_codecs that would check for that +* improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object From pypy.commits at gmail.com Thu Dec 7 09:53:35 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 07 Dec 2017 06:53:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: changes to array module before changing the world Message-ID: <5a2955ef.8284df0a.9ee9a.0ee1@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93295:1be16a6d5f37 Date: 2017-12-07 16:52 +0200 http://bitbucket.org/pypy/pypy/changeset/1be16a6d5f37/ Log: changes to array module before changing the world diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1,4 +1,4 @@ -from rpython.rlib import jit, rgc +from rpython.rlib import jit, rgc, rutf8 from rpython.rlib.buffer import RawBuffer from rpython.rlib.objectmodel import keepalive_until_here from rpython.rlib.rarithmetic import ovfcheck, widen @@ -451,7 +451,7 @@ """ if self.typecode == 'u': buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned()) - return space.newunicode(rffi.wcharpsize2unicode(buf, self.len)) + return space.newutf8(rffi.wcharpsize2unicode(buf, self.len)) else: raise oefmt(space.w_ValueError, "tounicode() may only be called on type 'u' arrays") @@ -797,7 +797,7 @@ TypeCode(rffi.UINT, 'int_w', True) types = { 'c': TypeCode(lltype.Char, 'bytes_w', method=''), - 'u': TypeCode(lltype.UniChar, 'unicode_w', method=''), + 'u': TypeCode(lltype.UniChar, 'utf8_len_w', method=''), 'b': TypeCode(rffi.SIGNEDCHAR, 'int_w', True, True), 'B': TypeCode(rffi.UCHAR, 'int_w', True), 'h': TypeCode(rffi.SHORT, 'int_w', True, True), @@ -895,11 +895,17 @@ "unsigned %d-byte integer out of range", mytype.bytes) return rffi.cast(mytype.itemtype, item) - if mytype.unwrap == 'bytes_w' or mytype.unwrap == 'unicode_w': + if mytype.unwrap == 'bytes_w': if len(item) != 1: raise oefmt(space.w_TypeError, "array item must be char") item = item[0] return rffi.cast(mytype.itemtype, item) + if mytype.unwrap == 'utf8_len_w': + utf8, lgt = item + if lgt != 1: + raise oefmt(space.w_TypeError, "array item must be char") + uchar = rutf8.codepoint_at_pos(utf8, 0) + return rffi.cast(mytype.itemtype, uchar) # # "regular" case: it fits in an rpython integer (lltype.Signed) # or it is a float @@ -1007,7 +1013,9 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - return space.newunicode(item) + code = ord(item) + return space.newutf8(rutf8.unichr_as_utf8(code), 1, + rutf8.get_flag_from_code(code)) assert 0, "unreachable" # interface From pypy.commits at gmail.com Thu Dec 7 10:45:27 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 07 Dec 2017 07:45:27 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: remove flag handling from rutf8 Message-ID: <5a296217.03251c0a.4be64.3778@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93296:1a4e2f08f746 Date: 2017-12-07 17:02 +0200 http://bitbucket.org/pypy/pypy/changeset/1a4e2f08f746/ Log: remove flag handling from rutf8 diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -328,18 +328,18 @@ def check_utf8(s, allow_surrogates, start=0, stop=-1): """Check that 's' is a utf-8-encoded byte string. - Returns the length (number of chars) and flag or raise CheckError. + Returns the length (number of chars) or raise CheckError. If allow_surrogates is False, then also raise if we see any. Note also codepoints_in_utf8(), which also computes the length faster by assuming that 's' is valid utf-8. """ - res, flag = _check_utf8(s, allow_surrogates, start, stop) + res = _check_utf8(s, allow_surrogates, start, stop) if res >= 0: - return res, flag + return res raise CheckError(~res) -def get_utf8_length_flag(s, start=0, end=-1): - """ Get the length and flag out of valid utf8. For now just calls check_utf8 +def get_utf8_length(s, start=0, end=-1): + """ Get the length out of valid utf8. For now just calls check_utf8 """ return check_utf8(s, True, start, end) @@ -347,7 +347,6 @@ def _check_utf8(s, allow_surrogates, start, stop): pos = start continuation_bytes = 0 - flag = FLAG_ASCII if stop < 0: end = len(s) else: @@ -359,44 +358,39 @@ if ordch1 <= 0x7F: continue - if flag == FLAG_ASCII: - flag = FLAG_REGULAR - if ordch1 <= 0xC1: - return ~(pos - 1), 0 + return ~(pos - 1) if ordch1 <= 0xDF: if pos >= end: - return ~(pos - 1), 0 + return ~(pos - 1) ordch2 = ord(s[pos]) pos += 1 if _invalid_byte_2_of_2(ordch2): - return ~(pos - 2), 0 + return ~(pos - 2) # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz continuation_bytes += 1 continue if ordch1 <= 0xEF: if (pos + 2) > end: - return ~(pos - 1), 0 + return ~(pos - 1) ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) pos += 2 if (_invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates) or _invalid_byte_3_of_3(ordch3)): - return ~(pos - 3), 0 + return ~(pos - 3) - if allow_surrogates and _surrogate_bytes(ordch1, ordch2): - flag = FLAG_HAS_SURROGATES # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz continuation_bytes += 2 continue if ordch1 <= 0xF4: if (pos + 3) > end: - return ~(pos - 1), 0 + return ~(pos - 1) ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) ordch4 = ord(s[pos + 2]) @@ -405,16 +399,16 @@ if (_invalid_byte_2_of_4(ordch1, ordch2) or _invalid_byte_3_of_4(ordch3) or _invalid_byte_4_of_4(ordch4)): - return ~(pos - 4), 0 + return ~(pos - 4) # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz continuation_bytes += 3 continue - return ~(pos - 1), 0 + return ~(pos - 1) assert pos == end assert pos - continuation_bytes >= 0 - return pos - continuation_bytes, flag + return pos - continuation_bytes def reencode_utf8_with_surrogates(utf8): """ Receiving valid UTF8 which contains surrogates, combine surrogate @@ -472,47 +466,14 @@ return False -UTF8_INDEX_STORAGE = lltype.GcStruct('utf8_loc', - ('flag', lltype.Signed), - ('contents', lltype.Ptr(lltype.GcArray(lltype.Struct('utf8_loc_elem', +UTF8_INDEX_STORAGE = lltype.GcArray(lltype.Struct('utf8_loc_elem', ('baseindex', lltype.Signed), ('ofs', lltype.FixedSizeArray(lltype.Char, 16)), - ))))) - -def get_flag_from_code(oc): - assert isinstance(oc, int) - if oc <= 0x7F: - return FLAG_ASCII - if 0xD800 <= oc <= 0xDFFF: - return FLAG_HAS_SURROGATES - return FLAG_REGULAR - -def combine_flags(one, two): - return one | two - -FLAG_ASCII = 0 # no bits -FLAG_REGULAR = 1 # bit 0 -FLAG_HAS_SURROGATES = 3 # bit 0 and bit 1 -# note that we never need index storage if we're pure ascii, but it's useful -# for passing into W_UnicodeObject.__init__ - -#ASCII_INDEX_STORAGE_BLOCKS = 5 -#ASCII_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE.contents.TO, -# ASCII_INDEX_STORAGE_BLOCKS, -# immortal=True) -#for _i in range(ASCII_INDEX_STORAGE_BLOCKS): -# ASCII_INDEX_STORAGE[_i].baseindex = _i * 64 -# for _j in range(16): -# ASCII_INDEX_STORAGE[_i].ofs[_j] = chr(_j * 4 + 1) + )) def null_storage(): return lltype.nullptr(UTF8_INDEX_STORAGE) -UTF8_IS_ASCII = lltype.malloc(UTF8_INDEX_STORAGE, immortal=True) -UTF8_IS_ASCII.contents = lltype.nullptr(UTF8_INDEX_STORAGE.contents.TO) -UTF8_HAS_SURROGATES = lltype.malloc(UTF8_INDEX_STORAGE, immortal=True) -UTF8_HAS_SURROGATES.contents = lltype.nullptr(UTF8_INDEX_STORAGE.contents.TO) - def create_utf8_index_storage(utf8, utf8len): """ Create an index storage which stores index of each 4th character in utf8 encoded unicode string. @@ -520,23 +481,21 @@ # if len(utf8) == utf8len < ASCII_INDEX_STORAGE_BLOCKS * 64: # return ASCII_INDEX_STORAGE arraysize = utf8len // 64 + 1 - storage = lltype.malloc(UTF8_INDEX_STORAGE) - contents = lltype.malloc(UTF8_INDEX_STORAGE.contents.TO, arraysize) - storage.contents = contents + storage = lltype.malloc(UTF8_INDEX_STORAGE, arraysize) baseindex = 0 current = 0 while True: - contents[current].baseindex = baseindex + storage[current].baseindex = baseindex next = baseindex for i in range(16): if utf8len == 0: next += 1 # assume there is an extra '\x00' character else: next = next_codepoint_pos(utf8, next) - contents[current].ofs[i] = chr(next - baseindex) + storage[current].ofs[i] = chr(next - baseindex) utf8len -= 4 if utf8len < 0: - assert current + 1 == len(contents) + assert current + 1 == len(storage) break next = next_codepoint_pos(utf8, next) next = next_codepoint_pos(utf8, next) @@ -556,8 +515,8 @@ this function. """ current = index >> 6 - ofs = ord(storage.contents[current].ofs[(index >> 2) & 0x0F]) - bytepos = storage.contents[current].baseindex + ofs + ofs = ord(storage[current].ofs[(index >> 2) & 0x0F]) + bytepos = storage[current].baseindex + ofs index &= 0x3 if index == 0: return prev_codepoint_pos(utf8, bytepos) @@ -575,8 +534,8 @@ storage of type UTF8_INDEX_STORAGE """ current = index >> 6 - ofs = ord(storage.contents[current].ofs[(index >> 2) & 0x0F]) - bytepos = storage.contents[current].baseindex + ofs + ofs = ord(storage[current].ofs[(index >> 2) & 0x0F]) + bytepos = storage[current].baseindex + ofs index &= 0x3 if index == 0: return codepoint_before_pos(utf8, bytepos) @@ -596,15 +555,15 @@ is not tiny either. """ index_min = 0 - index_max = len(storage.contents) - 1 + index_max = len(storage) - 1 while index_min < index_max: index_middle = (index_min + index_max + 1) // 2 - base_bytepos = storage.contents[index_middle].baseindex + base_bytepos = storage[index_middle].baseindex if bytepos < base_bytepos: index_max = index_middle - 1 else: index_min = index_middle - bytepos1 = storage.contents[index_min].baseindex + bytepos1 = storage[index_min].baseindex result = index_min << 6 while bytepos1 < bytepos: bytepos1 = next_codepoint_pos(utf8, bytepos1) @@ -713,22 +672,19 @@ def __init__(self, size=0): self._s = StringBuilder(size) self._lgt = 0 - self._flag = FLAG_ASCII @always_inline def append(self, s): # for strings self._s.append(s) - newlgt, newflag = get_utf8_length_flag(s) + newlgt = get_utf8_length(s) self._lgt += newlgt - self._flag = combine_flags(self._flag, newflag) @always_inline def append_slice(self, s, start, end): self._s.append_slice(s, start, end) - newlgt, newflag = get_utf8_length_flag(s, start, end) + newlgt = get_utf8_length(s, start, end) self._lgt += newlgt - self._flag = combine_flags(self._flag, newflag) @signature(char(), returns=none()) @always_inline @@ -739,13 +695,11 @@ @try_inline def append_code(self, code): - self._flag = combine_flags(self._flag, get_flag_from_code(code)) self._lgt += 1 unichr_as_utf8_append(self._s, code, True) @always_inline - def append_utf8(self, utf8, length, flag): - self._flag = combine_flags(self._flag, flag) + def append_utf8(self, utf8, length): self._lgt += length self._s.append(utf8) @@ -754,10 +708,6 @@ return self._s.build() @always_inline - def get_flag(self): - return self._flag - - @always_inline def get_length(self): return self._lgt diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -38,30 +38,25 @@ def test_check_utf8_valid(u, allow_surrogates): _test_check_utf8(u.encode('utf-8'), allow_surrogates) +def _has_surrogates(s): + for u in s.decode('utf8'): + if 0xD800 <= ord(u) <= 0xDFFF: + return True + return False + def _test_check_utf8(s, allow_surrogates): - def _has_surrogates(s): - for u in s.decode('utf8'): - if 0xD800 <= ord(u) <= 0xDFFF: - return True - return False - try: u, _ = runicode.str_decode_utf_8(s, len(s), None, final=True, allow_surrogates=allow_surrogates) valid = True except UnicodeDecodeError as e: valid = False - length, flag = rutf8._check_utf8(s, allow_surrogates, 0, len(s)) + length = rutf8._check_utf8(s, allow_surrogates, 0, len(s)) if length < 0: assert not valid assert ~(length) == e.start else: assert valid - if flag == rutf8.FLAG_ASCII: - s.decode('ascii') # assert did not raise - elif flag == rutf8.FLAG_HAS_SURROGATES: - assert allow_surrogates - assert _has_surrogates(s) if sys.maxunicode == 0x10FFFF or not _has_surrogates(s): assert length == len(u) @@ -155,60 +150,45 @@ assert result == expected @given(strategies.lists(strategies.characters())) -def test_get_utf8_length_flag(unichars): +def test_get_utf8_length(unichars): u = u''.join(unichars) exp_lgt = len(u) - exp_flag = rutf8.FLAG_ASCII - for c in u: - if ord(c) > 0x7F: - exp_flag = rutf8.FLAG_REGULAR - if 0xD800 <= ord(c) <= 0xDFFF: - exp_flag = rutf8.FLAG_HAS_SURROGATES - break - lgt, flag = rutf8.get_utf8_length_flag(''.join([c.encode('utf8') for c in u])) - if exp_flag != rutf8.FLAG_HAS_SURROGATES or sys.maxunicode > 0xffff: + s = ''.join([c.encode('utf8') for c in u]) + lgt = rutf8.get_utf8_length(s) + if not _has_surrogates(s) or sys.maxunicode > 0xffff: assert lgt == exp_lgt - assert flag == exp_flag def test_utf8_string_builder(): s = rutf8.Utf8StringBuilder() s.append("foo") s.append_char("x") - assert s.get_flag() == rutf8.FLAG_ASCII assert s.get_length() == 4 assert s.build() == "foox" s.append(u"\u1234".encode("utf8")) - assert s.get_flag() == rutf8.FLAG_REGULAR assert s.get_length() == 5 assert s.build().decode("utf8") == u"foox\u1234" s.append("foo") s.append_char("x") - assert s.get_flag() == rutf8.FLAG_REGULAR assert s.get_length() == 9 assert s.build().decode("utf8") == u"foox\u1234foox" s = rutf8.Utf8StringBuilder() s.append_code(0x1234) assert s.build().decode("utf8") == u"\u1234" - assert s.get_flag() == rutf8.FLAG_REGULAR assert s.get_length() == 1 s.append_code(0xD800) - assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES assert s.get_length() == 2 s = rutf8.Utf8StringBuilder() - s.append_utf8("abc", 3, rutf8.FLAG_ASCII) - assert s.get_flag() == rutf8.FLAG_ASCII + s.append_utf8("abc", 3) assert s.get_length() == 3 assert s.build().decode("utf8") == u"abc" - s.append_utf8(u"\u1234".encode("utf8"), 1, rutf8.FLAG_REGULAR) + s.append_utf8(u"\u1234".encode("utf8"), 1) assert s.build().decode("utf8") == u"abc\u1234" - assert s.get_flag() == rutf8.FLAG_REGULAR assert s.get_length() == 4 s.append_code(0xD800) - assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES assert s.get_length() == 5 @given(strategies.text()) From pypy.commits at gmail.com Thu Dec 7 10:45:30 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 07 Dec 2017 07:45:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: remove the flag Message-ID: <5a29621a.cd4a1c0a.ed16e.4dff@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93297:db2a8c9fccf1 Date: 2017-12-07 17:44 +0200 http://bitbucket.org/pypy/pypy/changeset/db2a8c9fccf1/ Log: remove the flag diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1087,8 +1087,11 @@ def newlist_utf8(self, list_u, is_ascii): l_w = [None] * len(list_u) for i, item in enumerate(list_u): - length, flag = rutf8.check_utf8(item, True) - l_w[i] = self.newutf8(item, length, flag) + if not is_ascii: + length = rutf8.check_utf8(item, True) + else: + length = len(item) + l_w[i] = self.newutf8(item, length) return self.newlist(l_w) def newlist_int(self, list_i): diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -64,8 +64,8 @@ r = unicodehelper.decode_raw_unicode_escape(space, substr) else: r = unicodehelper.decode_unicode_escape(space, substr) - v, length, flag = r - return space.newutf8(v, length, flag) + v, length = r + return space.newutf8(v, length) need_encoding = (encoding is not None and encoding != "utf-8" and encoding != "utf8" and @@ -74,8 +74,8 @@ substr = s[ps : q] if rawmode or '\\' not in s[ps:]: if need_encoding: - lgt, flag = unicodehelper.check_utf8_or_raise(space, substr) - w_u = space.newutf8(substr, lgt, flag) + lgt = unicodehelper.check_utf8_or_raise(space, substr) + w_u = space.newutf8(substr, lgt) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: @@ -234,8 +234,8 @@ p = ps while p < end and ord(s[p]) & 0x80: p += 1 - lgt, flag = unicodehelper.check_utf8_or_raise(space, s, ps, p) - w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt, flag), + lgt = unicodehelper.check_utf8_or_raise(space, s, ps, p) + w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt), recode_encoding) v = space.bytes_w(w_v) return v, p diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -10,13 +10,13 @@ return str_decode_utf8(u, True, "strict", None) def test_decode_utf8(): - assert decode_utf8("abc") == ("abc", 3, 3, rutf8.FLAG_ASCII) - assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1, rutf8.FLAG_REGULAR) - assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES) - assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES) + assert decode_utf8("abc") == ("abc", 3, 3) + assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1) + assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1) + assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 3, 1) assert decode_utf8("\xed\xa0\x80\xed\xb0\x80") == ( - "\xed\xa0\x80\xed\xb0\x80", 6, 2, rutf8.FLAG_HAS_SURROGATES) - assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 4, 1, rutf8.FLAG_REGULAR) + "\xed\xa0\x80\xed\xb0\x80", 6, 2) + assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 4, 1) def test_utf8_encode_ascii(): assert utf8_encode_ascii("abc", "??", "??") == "abc" @@ -41,19 +41,19 @@ assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") def test_str_decode_ascii(): - assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3, rutf8.FLAG_ASCII) + assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3) def eh(errors, encoding, reason, p, start, end): lst.append((errors, encoding, p, start, end)) return u"\u1234\u5678".encode("utf8"), end lst = [] input = "\xe8" exp = u"\u1234\u5678".encode("utf8") - assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2, rutf8.FLAG_REGULAR) + assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2) assert lst == [("??", "ascii", input, 0, 1)] lst = [] input = "\xe8\xe9abc\xea\xeb" assert str_decode_ascii(input, "??", True, eh) == ( - exp + exp + "abc" + exp + exp, 7, 11, rutf8.FLAG_REGULAR) + exp + exp + "abc" + exp + exp, 7, 11) assert lst == [("??", "ascii", input, 0, 1), ("??", "ascii", input, 1, 2), ("??", "ascii", input, 5, 6), diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -3,7 +3,6 @@ from pypy.interpreter.error import OperationError from rpython.rlib.objectmodel import specialize from rpython.rlib import rutf8 -from rpython.rlib.rutf8 import combine_flags from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rstring import StringBuilder from pypy.module._codecs import interp_codecs @@ -26,10 +25,10 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): - u_len, flag = rutf8.check_utf8(utf8, True) + u_len = rutf8.check_utf8(utf8, True) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - space.newutf8(utf8, u_len, flag), + space.newutf8(utf8, u_len), space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) @@ -55,18 +54,18 @@ def decode_unicode_escape(space, string): state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) - result_utf8, consumed, length, flag = str_decode_unicode_escape( + result_utf8, consumed, length = str_decode_unicode_escape( string, "strict", final=True, errorhandler=decode_error_handler(space), ud_handler=unicodedata_handler) - return result_utf8, length, flag + return result_utf8, length def decode_raw_unicode_escape(space, string): - result_utf8, consumed, lgt, flag = str_decode_raw_unicode_escape( + result_utf8, consumed, lgt = str_decode_raw_unicode_escape( string, "strict", final=True, errorhandler=decode_error_handler(space)) - return result_utf8, lgt, flag + return result_utf8, lgt def check_ascii_or_raise(space, string): try: @@ -83,19 +82,19 @@ # you still get two surrogate unicode characters in the result. # These are the Python2 rules; Python3 differs. try: - length, flag = rutf8.check_utf8(string, True, start, end) + length = rutf8.check_utf8(string, True, start, end) except rutf8.CheckError as e: # convert position into unicode position - lgt, flags = rutf8.check_utf8(string, True, start, stop=e.pos) + lgt = rutf8.check_utf8(string, True, start, stop=e.pos) decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string, start + lgt, start + lgt + 1) assert False, "unreachable" - return length, flag + return length def str_decode_ascii(s, errors, final, errorhandler): try: rutf8.check_ascii(s) - return s, len(s), len(s), rutf8.FLAG_ASCII + return s, len(s), len(s) except rutf8.CheckError: return _str_decode_ascii_slowpath(s, errors, final, errorhandler) @@ -112,13 +111,13 @@ res.append(ch) i += 1 ress = res.build() - lgt, flag = rutf8.check_utf8(ress, True) - return ress, len(s), lgt, flag + lgt = rutf8.check_utf8(ress, True) + return ress, len(s), lgt def str_decode_latin_1(s, errors, final, errorhandler): try: rutf8.check_ascii(s) - return s, len(s), len(s), rutf8.FLAG_ASCII + return s, len(s), len(s) except rutf8.CheckError: return _str_decode_latin_1_slowpath(s, errors, final, errorhandler) @@ -138,7 +137,7 @@ res.append_slice(s, start, end) i = end # cannot be ASCII, cannot have surrogates, I believe - return res.build(), len(s), len(s), rutf8.FLAG_REGULAR + return res.build(), len(s), len(s) def utf8_encode_latin_1(s, errors, errorhandler): try: @@ -336,8 +335,7 @@ res.append(r) r = res.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + return r, pos, rutf8.check_utf8(r, True) hexdigits = "0123456789ABCDEFabcdef" @@ -350,7 +348,7 @@ endinpos += 1 res, pos = errorhandler(errors, encoding, message, s, pos-2, endinpos) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: try: @@ -361,7 +359,7 @@ endinpos += 1 res, pos = errorhandler(errors, encoding, message, s, pos-2, endinpos) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: # when we get here, chr is a 32-bit unicode character @@ -371,21 +369,19 @@ message = "illegal Unicode character" res, pos = errorhandler(errors, encoding, message, s, pos-2, pos+digits) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: - flag = rutf8.get_flag_from_code(intmask(chr)) pos += digits size = 1 - return pos, size, flag + return pos, size def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 - flag = rutf8.FLAG_ASCII builder = StringBuilder(size) pos = 0 outsize = 0 @@ -396,7 +392,6 @@ if ch != '\\': if ord(ch) > 0x7F: rutf8.unichr_as_utf8_append(builder, ord(ch)) - flag = combine_flags(rutf8.FLAG_REGULAR, flag) else: builder.append(ch) pos += 1 @@ -409,9 +404,8 @@ message = "\\ at end of string" res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, size) - newsize, newflag = rutf8.check_utf8(res, True) + newsize = rutf8.check_utf8(res, True) outsize + newsize - flag = combine_flags(flag, newflag) builder.append(res) continue @@ -464,7 +458,6 @@ outsize += 1 if x > 0x7F: rutf8.unichr_as_utf8_append(builder, x) - flag = combine_flags(rutf8.FLAG_REGULAR, flag) else: builder.append(chr(x)) # hex escapes @@ -472,27 +465,24 @@ elif ch == 'x': digits = 2 message = "truncated \\xXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \uXXXX elif ch == 'u': digits = 4 message = "truncated \\uXXXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \UXXXXXXXX elif ch == 'U': digits = 8 message = "truncated \\UXXXXXXXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \N{name} @@ -512,29 +502,25 @@ if code < 0: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) continue pos = look + 1 outsize += 1 - flag = combine_flags(flag, rutf8.get_flag_from_code(code)) rutf8.unichr_as_utf8_append(builder, code, allow_surrogates=True) # xxx 'code' is probably always within range here... else: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) else: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) else: @@ -542,7 +528,7 @@ builder.append(ch) outsize += 2 - return builder.build(), pos, outsize, flag + return builder.build(), pos, outsize # ____________________________________________________________ # Raw unicode escape @@ -551,7 +537,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 result = StringBuilder(size) pos = 0 @@ -593,8 +579,8 @@ "rawunicodeescape", errorhandler, message, errors) r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt _utf8_encode_unicode_escape = rutf8.make_utf8_escape_function() @@ -729,7 +715,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 inShift = False base64bits = 0 @@ -740,7 +726,6 @@ result = StringBuilder(size) pos = 0 shiftOutStartPos = 0 - flag = rutf8.FLAG_ASCII startinpos = 0 while pos < size: ch = s[pos] @@ -766,13 +751,11 @@ (outCh & 0x3FF)) + 0x10000 rutf8.unichr_as_utf8_append(result, code) outsize += 1 - flag = combine_flags(flag, rutf8.FLAG_REGULAR) surrogate = 0 continue else: rutf8.unichr_as_utf8_append(result, surrogate, allow_surrogates=True) - flag = rutf8.FLAG_HAS_SURROGATES outsize += 1 surrogate = 0 # Not done with outCh: falls back to next line @@ -780,8 +763,6 @@ # first surrogate surrogate = outCh else: - flag = combine_flags(flag, - rutf8.get_flag_from_code(outCh)) outsize += 1 assert outCh >= 0 rutf8.unichr_as_utf8_append(result, outCh, True) @@ -797,9 +778,8 @@ msg = "partial character in shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) continue else: @@ -809,15 +789,13 @@ msg = "non-zero padding bits in shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) continue if surrogate and _utf7_DECODE_DIRECT(ord(ch)): outsize += 1 - flag = rutf8.FLAG_HAS_SURROGATES rutf8.unichr_as_utf8_append(result, surrogate, True) surrogate = 0 @@ -849,9 +827,8 @@ pos += 1 msg = "unexpected special character" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) # end of string @@ -864,9 +841,8 @@ (base64bits > 0 and base64buffer != 0)): msg = "unterminated shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) final_length = result.getlength() elif inShift: @@ -874,7 +850,7 @@ final_length = shiftOutStartPos # back off output assert final_length >= 0 - return result.build()[:final_length], pos, outsize, flag + return result.build()[:final_length], pos, outsize def utf8_encode_utf_7(s, errors, errorhandler): size = len(s) @@ -937,21 +913,21 @@ def str_decode_utf_16(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "native") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_be(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "big") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_le(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "little") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_helper(s, errors, final=True, errorhandler=None, @@ -994,7 +970,7 @@ else: bo = 1 if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII, bo + return '', 0, 0, bo if bo == -1: # force little endian ihi = 1 @@ -1053,8 +1029,8 @@ s, pos - 2, pos) result.append(r) r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return result.build(), pos, lgt, flag, bo + lgt = rutf8.check_utf8(r, True) + return result.build(), pos, lgt, bo def _STORECHAR(result, CH, byteorder): hi = chr(((CH) >> 8) & 0xff) @@ -1143,21 +1119,21 @@ def str_decode_utf_32(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "native") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_32_be(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "big") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_32_le(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "little") - return result, c, lgt, flag + return result, c, lgt BOM32_DIRECT = intmask(0x0000FEFF) BOM32_REVERSE = intmask(0xFFFE0000) @@ -1203,7 +1179,7 @@ else: bo = 1 if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII, bo + return '', 0, 0, bo if bo == -1: # force little endian iorder = [0, 1, 2, 3] @@ -1238,8 +1214,8 @@ rutf8.unichr_as_utf8_append(result, ch, allow_surrogates=True) pos += 4 r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag, bo + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt, bo def _STORECHAR32(result, CH, byteorder): c0 = chr(((CH) >> 24) & 0xff) @@ -1325,7 +1301,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 unicode_bytes = 4 if BYTEORDER == "little": @@ -1362,8 +1338,8 @@ rutf8.unichr_as_utf8_append(result, intmask(t), allow_surrogates=True) pos += unicode_bytes r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt def utf8_encode_unicode_internal(s, errors, errorhandler): size = len(s) @@ -1404,7 +1380,7 @@ errorhandler=errorhandler) size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 pos = 0 result = StringBuilder(size) @@ -1421,8 +1397,8 @@ result.append(c) pos += 1 r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -26,14 +26,8 @@ "Return a Unicode string of one character with the given ordinal." if code < 0 or code > 0x10FFFF: raise oefmt(space.w_ValueError, "unichr() arg out of range") - elif code < 0x80: - flag = rutf8.FLAG_ASCII - elif 0xD800 <= code <= 0xDFFF: - flag = rutf8.FLAG_HAS_SURROGATES - else: - flag = rutf8.FLAG_REGULAR s = rutf8.unichr_as_utf8(code, allow_surrogates=True) - return space.newutf8(s, 1, flag) + return space.newutf8(s, 1) def len(space, w_obj): "len(object) -> integer\n\nReturn the number of items of a sequence or mapping." diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -183,8 +183,7 @@ raise oefmt(self.space.w_ValueError, "%s out of range for conversion to unicode: %s", self.name, s) - flag = rutf8.get_flag_from_code(intmask(value)) - return self.space.newutf8(utf8, 1, flag) + return self.space.newutf8(utf8, 1) def string(self, cdataobj, maxlen): with cdataobj as ptr: @@ -215,15 +214,15 @@ def unpack_ptr(self, w_ctypeptr, ptr, length): if self.size == 2: - utf8, lgt, flag = wchar_helper.utf8_from_char16(ptr, length) + utf8, lgt = wchar_helper.utf8_from_char16(ptr, length) else: try: - utf8, lgt, flag = wchar_helper.utf8_from_char32(ptr, length) + utf8, lgt = wchar_helper.utf8_from_char32(ptr, length) except wchar_helper.OutOfRange as e: raise oefmt(self.space.w_ValueError, "%s out of range for conversion to unicode: %s", self.name, hex(e.ordinal)) - return self.space.newutf8(utf8, lgt, flag) + return self.space.newutf8(utf8, lgt) class W_CTypePrimitiveSigned(W_CTypePrimitive): diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py --- a/pypy/module/_cffi_backend/wchar_helper.py +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -19,16 +19,14 @@ ptr = rffi.cast(rffi.UINTP, ptr) u = StringBuilder(length) j = 0 - flag = rutf8.FLAG_ASCII while j < length: ch = intmask(ptr[j]) j += 1 - flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) try: rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) except ValueError: raise OutOfRange(ch) - return u.build(), length, flag + return u.build(), length def utf8_from_char16(ptr, length): # 'ptr' is a pointer to 'length' 16-bit integers @@ -36,7 +34,6 @@ u = StringBuilder(length) j = 0 result_length = length - flag = rutf8.FLAG_ASCII while j < length: ch = intmask(ptr[j]) j += 1 @@ -46,9 +43,8 @@ ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 j += 1 result_length -= 1 - flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) - return u.build(), result_length, flag + return u.build(), result_length @specialize.ll() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -43,8 +43,8 @@ length = len(input) else: w_cls = space.w_UnicodeEncodeError - length, flag = rutf8.check_utf8(input, allow_surrogates=True) - w_input = space.newutf8(input, length, flag) + length = rutf8.check_utf8(input, allow_surrogates=True) + w_input = space.newutf8(input, length) w_exc = space.call_function( w_cls, space.newtext(encoding), @@ -192,7 +192,7 @@ def ignore_errors(space, w_exc): check_exception(space, w_exc) w_end = space.getattr(w_exc, space.newtext('end')) - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), w_end]) + return space.newtuple([space.newutf8('', 0), w_end]) REPLACEMENT = u'\ufffd'.encode('utf8') @@ -203,13 +203,13 @@ size = space.int_w(w_end) - space.int_w(w_start) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): text = '?' * size - return space.newtuple([space.newutf8(text, size, rutf8.FLAG_ASCII), w_end]) + return space.newtuple([space.newutf8(text, size), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError): text = REPLACEMENT - return space.newtuple([space.newutf8(text, 1, rutf8.FLAG_REGULAR), w_end]) + return space.newtuple([space.newutf8(text, 1), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeTranslateError): text = REPLACEMENT * size - return space.newtuple([space.newutf8(text, size, rutf8.FLAG_REGULAR), w_end]) + return space.newtuple([space.newutf8(text, size), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -237,8 +237,8 @@ builder.append(";") pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() - lgt, flag = rutf8.check_utf8(r, True) - return space.newtuple([space.newutf8(r, lgt, flag), w_end]) + lgt = rutf8.check_utf8(r, True) + return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -278,8 +278,8 @@ builder.append_slice(num, 2, lnum) pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() - lgt, flag = rutf8.check_utf8(r, True) - return space.newtuple([space.newutf8(r, lgt, flag), w_end]) + lgt = rutf8.check_utf8(r, True) + return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -417,9 +417,9 @@ final = space.is_true(w_final) state = space.fromcache(CodecState) func = getattr(unicodehelper, rname) - result, consumed, length, flag = func(string, errors, + result, consumed, length = func(string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(result, length, flag), + return space.newtuple([space.newutf8(result, length), space.newint(consumed)]) wrap_decoder.func_name = rname globals()[name] = wrap_decoder @@ -488,14 +488,14 @@ state = space.fromcache(CodecState) # call the fast version for checking try: - lgt, flag = rutf8.check_utf8(string, allow_surrogates=True) + lgt = rutf8.check_utf8(string, allow_surrogates=True) except rutf8.CheckError: - res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string, + res, consumed, lgt = unicodehelper.str_decode_utf8(string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed)]) else: - return space.newtuple([space.newutf8(string, lgt, flag), + return space.newtuple([space.newutf8(string, lgt), space.newint(len(string))]) @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, @@ -516,10 +516,10 @@ consumed = len(data) if final: consumed = 0 - res, consumed, lgt, flag, byteorder = str_decode_utf_16_helper( + res, consumed, lgt, byteorder = str_decode_utf_16_helper( data, errors, final, state.decode_error_handler, byteorder) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed), space.newint(byteorder)]) @@ -539,10 +539,10 @@ consumed = len(data) if final: consumed = 0 - res, consumed, lgt, flag, byteorder = str_decode_utf_32_helper( + res, consumed, lgt, byteorder = str_decode_utf_32_helper( data, errors, final, state.decode_error_handler, byteorder) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed), space.newint(byteorder)]) @@ -632,7 +632,7 @@ if errors is None: errors = 'strict' if len(string) == 0: - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), + return space.newtuple([space.newutf8('', 0), space.newint(0)]) if space.is_none(w_mapping): @@ -642,9 +642,9 @@ final = True state = space.fromcache(CodecState) - result, consumed, lgt, flag = unicodehelper.str_decode_charmap( + result, consumed, lgt = unicodehelper.str_decode_charmap( string, errors, final, state.decode_error_handler, mapping) - return space.newtuple([space.newutf8(result, lgt, flag), + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) @unwrap_spec(errors='text_or_none') @@ -708,12 +708,12 @@ unicode_name_handler = state.get_unicodedata_handler(space) - result, consumed, lgt, flag = unicodehelper.str_decode_unicode_escape( + result, consumed, lgt = unicodehelper.str_decode_unicode_escape( string, errors, final, state.decode_error_handler, unicode_name_handler) - return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)]) + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) # ____________________________________________________________ # Unicode-internal @@ -731,15 +731,15 @@ string = space.readbuf_w(w_string).as_str() if len(string) == 0: - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), + return space.newtuple([space.newutf8('', 0), space.newint(0)]) final = True state = space.fromcache(CodecState) - result, consumed, lgt, flag = unicodehelper.str_decode_unicode_internal( + result, consumed, lgt = unicodehelper.str_decode_unicode_internal( string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(result, lgt, flag), + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) # ____________________________________________________________ diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -11,8 +11,8 @@ from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import ( - FLAG_ASCII, check_utf8, next_codepoint_pos, codepoints_in_utf8) +from rpython.rlib.rutf8 import (check_utf8, next_codepoint_pos, + codepoints_in_utf8) STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -31,22 +31,22 @@ def __init__(self, space): self.w_newlines_dict = { - SEEN_CR: space.newutf8("\r", 1, FLAG_ASCII), - SEEN_LF: space.newutf8("\n", 1, FLAG_ASCII), - SEEN_CRLF: space.newutf8("\r\n", 2, FLAG_ASCII), + SEEN_CR: space.newutf8("\r", 1), + SEEN_LF: space.newutf8("\n", 1), + SEEN_CRLF: space.newutf8("\r\n", 2), SEEN_CR | SEEN_LF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\n", 1, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\n", 1)]), SEEN_CR | SEEN_CRLF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\r\n", 2)]), SEEN_LF | SEEN_CRLF: space.newtuple( - [space.newutf8("\n", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\n", 1), + space.newutf8("\r\n", 2)]), SEEN_CR | SEEN_LF | SEEN_CRLF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\n", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\n", 1), + space.newutf8("\r\n", 2)]), } @unwrap_spec(translate=int) @@ -98,7 +98,7 @@ output_len -= 1 if output_len == 0: - return space.newutf8("", 0, FLAG_ASCII) + return space.newutf8("", 0) # Record which newlines are read and do newline translation if # desired, all in one pass. @@ -153,8 +153,8 @@ output = builder.build() self.seennl |= seennl - lgt, flag = check_utf8(output, True) - return space.newutf8(output, lgt, flag) + lgt = check_utf8(output, True) + return space.newutf8(output, lgt) def reset_w(self, space): self.seennl = 0 diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py --- a/pypy/module/_multibytecodec/interp_incremental.py +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -66,8 +66,8 @@ pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf) assert 0 <= pos <= len(object) self.pending = object[pos:] - lgt, flag = rutf8.get_utf8_length_flag(output) - return space.newutf8(output, lgt, flag) + lgt = rutf8.get_utf8_length_flag(output) + return space.newutf8(output, lgt) @unwrap_spec(errors="text_or_none") diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -78,12 +78,11 @@ space.newtext(e.reason)])) def wrap_unicodeencodeerror(space, e, input, inputlen, name): - _, flag = rutf8.check_utf8(input, True) raise OperationError( space.w_UnicodeEncodeError, space.newtuple([ space.newtext(name), - space.newutf8(input, inputlen, flag), + space.newutf8(input, inputlen), space.newint(e.start), space.newint(e.end), space.newtext(e.reason)])) diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -295,15 +295,15 @@ if bits & 0x80: # the 8th bit is set, it's an utf8 string content_utf8 = self.getslice(start, end) - lgt, flag = unicodehelper.check_utf8_or_raise(self.space, + lgt = unicodehelper.check_utf8_or_raise(self.space, content_utf8) - return self.space.newutf8(content_utf8, lgt, flag) + return self.space.newutf8(content_utf8, lgt) else: # ascii only, fast path (ascii is a strict subset of # latin1, and we already checked that all the chars are < # 128) return self.space.newutf8(self.getslice(start, end), - end - start, rutf8.FLAG_ASCII) + end - start) def decode_string_escaped(self, start): i = self.pos @@ -316,10 +316,10 @@ i += 1 if ch == '"': content_utf8 = builder.build() - lgt, f = unicodehelper.check_utf8_or_raise(self.space, + lgt = unicodehelper.check_utf8_or_raise(self.space, content_utf8) self.pos = i - return self.space.newutf8(content_utf8, lgt, f) + return self.space.newutf8(content_utf8, lgt) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch < '\x20': diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py --- a/pypy/module/_pypyjson/test/test__pypyjson.py +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -11,7 +11,7 @@ dec.close() class FakeSpace(object): - def newutf8(self, s, l, f): + def newutf8(self, s, l): return s def test_decode_key(): diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -228,8 +228,7 @@ return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): wcharval = self.get_unichar(w_ffitype) - return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1, - rutf8.get_flag_from_code(intmask(wcharval))) + return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -43,8 +43,8 @@ return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_core.UnicodeMatchContext): s = ctx._unicodestr[start:end] - lgt, flag = rutf8.check_utf8(s, True) - return space.newutf8(s, lgt, flag) + lgt = rutf8.check_utf8(s, True) + return space.newutf8(s, lgt) else: # unreachable raise SystemError @@ -341,11 +341,10 @@ else: assert unicodebuilder is not None return space.newutf8(unicodebuilder.build(), - unicodebuilder.get_length(), - unicodebuilder.get_flag()), n + unicodebuilder.get_length()), n else: if space.isinstance_w(w_string, space.w_unicode): - w_emptystr = space.newutf8('', 0, rutf8.FLAG_ASCII) + w_emptystr = space.newutf8('', 0) else: w_emptystr = space.newbytes('') w_item = space.call_method(w_emptystr, 'join', @@ -579,8 +578,8 @@ elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - lgt, flag = rutf8.check_utf8(ctx._unicodestr, True) - return space.newutf8(ctx._unicodestr, lgt, flag) + lgt = rutf8.check_utf8(ctx._unicodestr, True) + return space.newutf8(ctx._unicodestr, lgt) else: raise SystemError diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py --- a/pypy/module/_warnings/interp_warnings.py +++ b/pypy/module/_warnings/interp_warnings.py @@ -214,8 +214,8 @@ message = "%s:%d: %s: %s\n" % (space.utf8_w(w_filename), lineno, space.utf8_w(w_name), space.utf8_w(w_text)) - lgt, flag = rutf8.check_utf8(message, True) - w_message = space.newutf8(message, lgt, flag) + lgt = rutf8.check_utf8(message, True) + w_message = space.newutf8(message, lgt) else: w_message = space.newtext(message) space.call_method(w_stderr, "write", w_message) diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1014,8 +1014,7 @@ return space.newbytes(item) elif mytype.typecode == 'u': code = ord(item) - return space.newutf8(rutf8.unichr_as_utf8(code), 1, - rutf8.get_flag_from_code(code)) + return space.newutf8(rutf8.unichr_as_utf8(code), 1) assert 0, "unreachable" # interface diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -78,7 +78,6 @@ from pypy.interpreter.gateway import interp2app from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import rwin32 -from rpython.rlib.rutf8 import FLAG_ASCII def readwrite_attrproperty_w(name, cls): @@ -127,7 +126,7 @@ return space.call_function(space.w_unicode, w_as_str) lgt = len(self.args_w) if lgt == 0: - return space.newutf8("", 0, FLAG_ASCII) + return space.newutf8("", 0) if lgt == 1: return space.call_function(space.w_unicode, self.args_w[0]) else: diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -478,8 +478,8 @@ # I suppose this is a valid utf8, but there is noone to check # and noone to catch an error either try: - lgt, flag = rutf8.check_utf8(s, True) - return space.newutf8(s, lgt, flag) + lgt = rutf8.check_utf8(s, True) + return space.newutf8(s, lgt) except rutf8.CheckError: from pypy.interpreter import unicodehelper # get the correct error msg diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -209,7 +209,7 @@ def newbytes(self, x): return w_some_obj() - def newutf8(self, x, l, f): + def newutf8(self, x, l): return w_some_obj() def new_from_utf8(self, a): diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -195,11 +195,11 @@ w_dict = self.getdict(space) if w_dict is None: w_dict = space.w_None - s, _, lgt, flag = str_decode_latin_1(''.join(self.getdata()), 'strict', + s, _, lgt = str_decode_latin_1(''.join(self.getdata()), 'strict', True, None) return space.newtuple([ space.type(self), space.newtuple([ - space.newutf8(s, lgt, flag), space.newtext('latin-1')]), + space.newutf8(s, lgt), space.newtext('latin-1')]), w_dict]) @staticmethod diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1197,7 +1197,7 @@ unerase = staticmethod(unerase) def wrap(self, unwrapped): - return self.space.newutf8(unwrapped, len(unwrapped), rutf8.FLAG_ASCII) + return self.space.newutf8(unwrapped, len(unwrapped)) def unwrap(self, wrapped): return self.space.utf8_w(wrapped) @@ -1239,7 +1239,7 @@ ## return self.space.newlist_bytes(self.listview_bytes(w_dict)) def wrapkey(space, key): - return space.newutf8(key, len(key), rutf8.FLAG_ASCII) + return space.newutf8(key, len(key)) ## @jit.look_inside_iff(lambda self, w_dict: ## w_dict_unrolling_heuristic(w_dict)) diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -198,8 +198,8 @@ if self.w_valuedict is None: raise oefmt(space.w_TypeError, "format requires a mapping") if do_unicode: - lgt, flag = rutf8.check_utf8(key, True) - w_key = space.newutf8(key, lgt, flag) + lgt = rutf8.check_utf8(key, True) + w_key = space.newutf8(key, lgt) else: w_key = space.newbytes(key) return space.getitem(self.w_valuedict, w_key) @@ -513,8 +513,8 @@ formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict) result = formatter.format() # this can force strings, not sure if it's a problem or not - lgt, flag = rutf8.check_utf8(result, True) - return space.newutf8(result, lgt, flag) + lgt = rutf8.check_utf8(result, True) + return space.newutf8(result, lgt) def mod_format(space, w_format, w_values, do_unicode=False): if space.isinstance_w(w_values, space.w_tuple): diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -1998,7 +1998,7 @@ def wrap(self, stringval): assert stringval is not None - return self.space.newutf8(stringval, len(stringval), rutf8.FLAG_ASCII) + return self.space.newutf8(stringval, len(stringval)) def unwrap(self, w_string): return self.space.utf8_w(w_string) diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -403,8 +403,8 @@ @unmarshaller(TYPE_UNICODE) def unmarshal_unicode(space, u, tc): arg = u.get_str() - length, flag = unicodehelper.check_utf8_or_raise(space, arg) - return space.newutf8(arg, length, flag) + length = unicodehelper.check_utf8_or_raise(space, arg) + return space.newutf8(arg, length) @marshaller(W_SetObject) def marshal_set(space, w_set, m): diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -51,8 +51,8 @@ if for_unicode: def wrap(self, u): - lgt, flag = rutf8.check_utf8(u, True) - return self.space.newutf8(u, lgt, flag) + lgt = rutf8.check_utf8(u, True) + return self.space.newutf8(u, lgt) else: def wrap(self, s): return self.space.newbytes(s) @@ -379,8 +379,8 @@ template = unicode_template_formatter(space, space.utf8_w(w_string)) r = template.build(args) - lgt, flag = rutf8.check_utf8(r, True) - return space.newutf8(r, lgt, flag) + lgt = rutf8.check_utf8(r, True) + return space.newutf8(r, lgt) else: template = str_template_formatter(space, space.bytes_w(w_string)) return space.newbytes(template.build(args)) @@ -416,8 +416,8 @@ if for_unicode: def wrap(self, u): - lgt, flag = rutf8.check_utf8(u, True) - return self.space.newutf8(u, lgt, flag) + lgt = rutf8.check_utf8(u, True) + return self.space.newutf8(u, lgt) else: def wrap(self, s): return self.space.newbytes(s) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -165,8 +165,8 @@ return self.newtext(x) if isinstance(x, unicode): x = x.encode('utf8') - lgt, flag = rutf8.check_utf8(x, True) - return self.newutf8(x, lgt, flag) + lgt = rutf8.check_utf8(x, True) + return self.newutf8(x, lgt) if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): @@ -362,16 +362,10 @@ return self.w_None return self.newtext(s) - def newutf8(self, utf8s, length, flag): + def newutf8(self, utf8s, length): assert utf8s is not None assert isinstance(utf8s, str) - return W_UnicodeObject(utf8s, length, flag) - - def new_from_utf8(self, utf8s): - # XXX: kill me! - assert isinstance(utf8s, str) - length, flag = rutf8.check_utf8(utf8s, True) - return W_UnicodeObject(utf8s, length, flag) + return W_UnicodeObject(utf8s, length) def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py --- a/pypy/objspace/std/setobject.py +++ b/pypy/objspace/std/setobject.py @@ -1291,7 +1291,7 @@ return self.space.utf8_w(w_item) def wrap(self, item): - return self.space.newutf8(item, len(item), rutf8.FLAG_ASCII) + return self.space.newutf8(item, len(item)) def iter(self, w_set): return UnicodeIteratorImplementation(self.space, self, w_set) @@ -1495,7 +1495,7 @@ def next_entry(self): for key in self.iterator: - return self.space.newutf8(key, len(key), rutf8.FLAG_ASCII) + return self.space.newutf8(key, len(key)) else: return None diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py --- a/pypy/objspace/std/test/test_index.py +++ b/pypy/objspace/std/test/test_index.py @@ -265,8 +265,7 @@ class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase): def setup_method(self, method): SeqTestCase.setup_method(self, method) - self.w_seq = self.space.newutf8("this is a test", len("this is a test"), - rutf8.FLAG_ASCII) + self.w_seq = self.space.newutf8("this is a test", len("this is a test")) self.w_const = self.space.appexec([], """(): return unicode""") diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py --- a/pypy/objspace/std/test/test_lengthhint.py +++ b/pypy/objspace/std/test/test_lengthhint.py @@ -74,8 +74,7 @@ self._test_length_hint(self.space.wrap('P' * self.SIZE)) def test_unicode(self): - self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE, - rutf8.FLAG_ASCII)) + self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE)) def test_tuple(self): self._test_length_hint(self.space.wrap(tuple(self.ITEMS))) diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -7,7 +7,6 @@ IntOrFloatListStrategy) from pypy.objspace.std import listobject from pypy.objspace.std.test.test_listobject import TestW_ListObject -from rpython.rlib.rutf8 import FLAG_ASCII class TestW_ListStrategies(TestW_ListObject): @@ -601,9 +600,9 @@ def test_unicode(self): l1 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newbytes("zwei")]) assert isinstance(l1.strategy, BytesListStrategy) - l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, FLAG_ASCII), self.space.newutf8("zwei", 4, FLAG_ASCII)]) + l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4), self.space.newutf8("zwei", 4)]) assert isinstance(l2.strategy, UnicodeListStrategy) - l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4, FLAG_ASCII)]) + l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4)]) assert isinstance(l3.strategy, ObjectListStrategy) def test_listview_bytes(self): diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py --- a/pypy/objspace/std/test/test_obj.py +++ b/pypy/objspace/std/test/test_obj.py @@ -17,7 +17,7 @@ cls.w_cpython_apptest = space.wrap(option.runappdirect and not hasattr(sys, 'pypy_translation_info')) def w_unwrap_wrap_unicode(space, w_obj): - return space.newutf8(space.utf8_w(w_obj), w_obj._length, w_obj._get_flag()) + return space.newutf8(space.utf8_w(w_obj), w_obj._length) cls.w_unwrap_wrap_unicode = space.wrap(gateway.interp2app(w_unwrap_wrap_unicode)) def w_unwrap_wrap_str(space, w_obj): return space.wrap(space.str_w(w_obj)) diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -27,12 +27,12 @@ assert len(warnings) == 2 def test_listview_unicode(self): - w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) + w_str = self.space.newutf8('abcd', 4) assert self.space.listview_utf8(w_str) == list("abcd") def test_new_shortcut(self): space = self.space - w_uni = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) + w_uni = self.space.newutf8('abcd', 4) w_new = space.call_method( space.w_unicode, "__new__", space.w_unicode, w_uni) assert w_new is w_uni @@ -44,8 +44,8 @@ return # skip this case v = u[start : start + len1] space = self.space - w_u = space.newutf8(u.encode('utf8'), len(u), rutf8.FLAG_REGULAR) - w_v = space.newutf8(v.encode('utf8'), len(v), rutf8.FLAG_REGULAR) + w_u = space.newutf8(u.encode('utf8'), len(u)) + w_v = space.newutf8(v.encode('utf8'), len(v)) expected = u.find(v, start, start + len1) try: w_index = space.call_method(w_u, 'index', w_v, diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -35,40 +35,22 @@ _immutable_fields_ = ['_utf8'] @enforceargs(utf8str=str) - def __init__(self, utf8str, length, flag): + def __init__(self, utf8str, length): assert isinstance(utf8str, str) assert length >= 0 self._utf8 = utf8str self._length = length - if flag == rutf8.FLAG_ASCII: - self._index_storage = rutf8.UTF8_IS_ASCII - elif flag == rutf8.FLAG_HAS_SURROGATES: - self._index_storage = rutf8.UTF8_HAS_SURROGATES - else: - assert flag == rutf8.FLAG_REGULAR - self._index_storage = rutf8.null_storage() + self._index_storage = rutf8.null_storage() # XXX checking, remove before any performance measurments # ifdef not_running_in_benchmark if not we_are_translated(): - lgt, flag_check = rutf8.check_utf8(utf8str, True) + lgt = rutf8.check_utf8(utf8str, True) assert lgt == length - if flag_check == rutf8.FLAG_ASCII: - # there are cases where we copy part of REULAR that happens - # to be ascii - assert flag in (rutf8.FLAG_ASCII, rutf8.FLAG_REGULAR) - else: - assert flag == flag_check - # the storage can be one of: - # - null, unicode with no surrogates - # - rutf8.UTF8_HAS_SURROGATES - # - rutf8.UTF8_IS_ASCII - # - malloced object, which means it has index, then - # _index_storage.flags determines the kind @staticmethod def from_utf8builder(builder): return W_UnicodeObject( - builder.build(), builder.get_length(), builder.get_flag()) + builder.build(), builder.get_length()) def __repr__(self): """representation for debugging purposes""" @@ -108,8 +90,6 @@ return space.text_w(space.str(self)) def utf8_w(self, space): - if self._has_surrogates(): - return rutf8.reencode_utf8_with_surrogates(self._utf8) return self._utf8 def readbuf_w(self, space): @@ -245,8 +225,7 @@ assert isinstance(w_value, W_UnicodeObject) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) - W_UnicodeObject.__init__(w_newobj, w_value._utf8, w_value._length, - w_value._get_flag()) + W_UnicodeObject.__init__(w_newobj, w_value._utf8, w_value._length) if w_value._index_storage: # copy the storage if it's there w_newobj._index_storage = w_value._index_storage @@ -393,8 +372,7 @@ elif space.isinstance_w(w_newval, space.w_int): codepoint = space.int_w(w_newval) elif isinstance(w_newval, W_UnicodeObject): - builder.append_utf8( - w_newval._utf8, w_newval._length, w_newval._get_flag()) + builder.append_utf8(w_newval._utf8, w_newval._length) continue else: raise oefmt(space.w_TypeError, @@ -481,16 +459,16 @@ newlen += dist oldtoken = token - return W_UnicodeObject(expanded, newlen, self._get_flag()) + return W_UnicodeObject(expanded, newlen) _StringMethods_descr_join = descr_join def descr_join(self, space, w_list): l = space.listview_utf8(w_list) if l is not None and self.is_ascii(): if len(l) == 1: - return space.newutf8(l[0], len(l[0]), rutf8.FLAG_ASCII) + return space.newutf8(l[0], len(l[0])) s = self._utf8.join(l) - return space.newutf8(s, len(s), rutf8.FLAG_ASCII) + return space.newutf8(s, len(s)) return self._StringMethods_descr_join(space, w_list) def _join_return_one(self, space, w_obj): @@ -584,13 +562,6 @@ return True return endswith(value, prefix, start, end) - def _get_flag(self): - if self.is_ascii(): - return rutf8.FLAG_ASCII - elif self._has_surrogates(): - return rutf8.FLAG_HAS_SURROGATES - return rutf8.FLAG_REGULAR - def descr_add(self, space, w_other): try: w_other = self.convert_arg_to_w_unicode(space, w_other) @@ -598,9 +569,8 @@ if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - flag = rutf8.combine_flags(self._get_flag(), w_other._get_flag()) return W_UnicodeObject(self._utf8 + w_other._utf8, - self._len() + w_other._len(), flag) + self._len() + w_other._len()) @jit.look_inside_iff(lambda self, space, list_w, size: jit.loop_unrolling_heuristic(list_w, size)) @@ -610,7 +580,6 @@ prealloc_size = len(value) * (size - 1) unwrapped = newlist_hint(size) - flag = self._get_flag() for i in range(size): w_s = list_w[i] if not (space.isinstance_w(w_s, space.w_bytes) or @@ -621,7 +590,6 @@ # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder w_u = self.convert_arg_to_w_unicode(space, w_s) - flag = rutf8.combine_flags(flag, w_u._get_flag()) unwrapped.append(w_u._utf8) lgt += w_u._length prealloc_size += len(unwrapped[i]) @@ -631,7 +599,7 @@ if value and i != 0: sb.append(value) sb.append(unwrapped[i]) - return W_UnicodeObject(sb.build(), lgt, flag) + return W_UnicodeObject(sb.build(), lgt) @unwrap_spec(keepends=bool) def descr_splitlines(self, space, keepends=False): @@ -660,8 +628,7 @@ lgt += line_end_chars assert eol >= 0 assert sol >= 0 - # XXX we can do better with flags here, if we want to - strs_w.append(W_UnicodeObject(value[sol:eol], lgt, self._get_flag())) + strs_w.append(W_UnicodeObject(value[sol:eol], lgt)) return space.newlist(strs_w) def descr_upper(self, space): @@ -675,11 +642,11 @@ def descr_zfill(self, space, width): selfval = self._utf8 if len(selfval) == 0: - return W_UnicodeObject('0' * width, width, rutf8.FLAG_ASCII) + return W_UnicodeObject('0' * width, width) num_zeros = width - self._len() if num_zeros <= 0: # cannot return self, in case it is a subclass of str - return W_UnicodeObject(selfval, self._len(), self._get_flag()) + return W_UnicodeObject(selfval, self._len()) builder = StringBuilder(num_zeros + len(selfval)) if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'): # copy sign to first position @@ -689,7 +656,7 @@ start = 0 builder.append_multiple_char('0', num_zeros) builder.append_slice(selfval, start, len(selfval)) - return W_UnicodeObject(builder.build(), width, self._get_flag()) + return W_UnicodeObject(builder.build(), width) @unwrap_spec(maxsplit=int) def descr_split(self, space, w_sep=None, maxsplit=-1): @@ -748,7 +715,7 @@ break i += 1 byte_pos = self._index_to_byte(start + i * step) - return W_UnicodeObject(builder.build(), sl, self._get_flag()) + return W_UnicodeObject(builder.build(), sl) def descr_getslice(self, space, w_start, w_stop): start, stop = normalize_simple_slice( @@ -765,8 +732,7 @@ assert stop >= 0 byte_start = self._index_to_byte(start) byte_stop = self._index_to_byte(stop) - return W_UnicodeObject(self._utf8[byte_start:byte_stop], stop - start, - self._get_flag()) + return W_UnicodeObject(self._utf8[byte_start:byte_stop], stop - start) def descr_capitalize(self, space): value = self._utf8 @@ -800,7 +766,7 @@ centered = value d = 0 - return W_UnicodeObject(centered, self._len() + d, self._get_flag()) + return W_UnicodeObject(centered, self._len() + d) def descr_count(self, space, w_sub, w_start=None, w_end=None): value = self._utf8 @@ -828,9 +794,9 @@ else: lgt, _ = rutf8.check_utf8(value, True, stop=pos) return space.newtuple( - [W_UnicodeObject(value[0:pos], lgt, self._get_flag()), w_sub, + [W_UnicodeObject(value[0:pos], lgt), w_sub, W_UnicodeObject(value[pos + len(sub._utf8):len(value)], - self._len() - lgt - sublen, self._get_flag())]) + self._len() - lgt - sublen)]) def descr_rpartition(self, space, w_sub): value = self._utf8 @@ -846,9 +812,9 @@ else: lgt, _ = rutf8.check_utf8(value, True, stop=pos) return space.newtuple( - [W_UnicodeObject(value[0:pos], lgt, self._get_flag()), w_sub, + [W_UnicodeObject(value[0:pos], lgt), w_sub, W_UnicodeObject(value[pos + len(sub._utf8):len(value)], - self._len() - lgt - sublen, self._get_flag())]) + self._len() - lgt - sublen)]) @unwrap_spec(count=int) def descr_replace(self, space, w_old, w_new, count=-1): @@ -866,9 +832,8 @@ except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") - flag = rutf8.combine_flags(self._get_flag(), w_by._get_flag()) newlength = self._length + replacements * (w_by._length - w_sub._length) - return W_UnicodeObject(res, newlength, flag) + return W_UnicodeObject(res, newlength) def descr_mul(self, space, w_times): try: @@ -880,29 +845,19 @@ if times <= 0: return self._empty() if len(self._utf8) == 1: - return W_UnicodeObject(self._utf8[0] * times, times, - self._get_flag()) - return W_UnicodeObject(self._utf8 * times, times * self._len(), - self._get_flag()) + return W_UnicodeObject(self._utf8[0] * times, times) + return W_UnicodeObject(self._utf8 * times, times * self._len()) descr_rmul = descr_mul def _get_index_storage(self): # XXX write the correct jit.elidable - condition = (self._index_storage == rutf8.null_storage() or - not bool(self._index_storage.contents)) - if condition: + if self._index_storage == rutf8.null_storage(): storage = rutf8.create_utf8_index_storage(self._utf8, self._length) else: storage = self._index_storage if not jit.isconstant(self): - prev_storage = self._index_storage self._index_storage = storage - if prev_storage == rutf8.UTF8_HAS_SURROGATES: - flag = rutf8.FLAG_HAS_SURROGATES - else: - flag = rutf8.FLAG_REGULAR - self._index_storage.flag = flag return storage def _getitem_result(self, space, index): @@ -912,15 +867,15 @@ raise oefmt(space.w_IndexError, "string index out of range") start = self._index_to_byte(index) end = rutf8.next_codepoint_pos(self._utf8, start) - return W_UnicodeObject(self._utf8[start:end], 1, self._get_flag()) + return W_UnicodeObject(self._utf8[start:end], 1) def is_ascii(self): - return self._index_storage is rutf8.UTF8_IS_ASCII + return self._length == len(self._utf8) def _has_surrogates(self): - return (self._index_storage is rutf8.UTF8_HAS_SURROGATES or - (bool(self._index_storage) and - self._index_storage.flag == rutf8.FLAG_HAS_SURROGATES)) + if self.is_ascii(): + return False + return rutf8.has_surrogates(self._utf8) def _index_to_byte(self, index): if self.is_ascii(): @@ -988,7 +943,6 @@ if w_fillchar._len() != 1: raise oefmt(space.w_TypeError, "rjust() argument 2 must be a single character") - flag = rutf8.combine_flags(self._get_flag(), w_fillchar._get_flag()) d = width - lgt if d > 0: if len(w_fillchar._utf8) == 1: @@ -996,9 +950,9 @@ value = d * w_fillchar._utf8[0] + value else: value = d * w_fillchar._utf8 + value - return W_UnicodeObject(value, width, flag) + return W_UnicodeObject(value, width) - return W_UnicodeObject(value, lgt, flag) + return W_UnicodeObject(value, lgt) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_ljust(self, space, width, w_fillchar): @@ -1007,7 +961,6 @@ if w_fillchar._len() != 1: raise oefmt(space.w_TypeError, "ljust() argument 2 must be a single character") - flag = rutf8.combine_flags(self._get_flag(), w_fillchar._get_flag()) d = width - self._len() if d > 0: if len(w_fillchar._utf8) == 1: @@ -1015,9 +968,9 @@ value = value + d * w_fillchar._utf8[0] else: value = value + d * w_fillchar._utf8 - return W_UnicodeObject(value, width, flag) + return W_UnicodeObject(value, width) - return W_UnicodeObject(value, self._len(), flag) + return W_UnicodeObject(value, self._len()) def _utf8_sliced(self, start, stop, lgt): assert start >= 0 @@ -1025,7 +978,7 @@ #if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj), # space.w_bytes): # return orig_obj - return W_UnicodeObject(self._utf8[start:stop], lgt, self._get_flag()) + return W_UnicodeObject(self._utf8[start:stop], lgt) def _strip_none(self, space, left, right): "internal function called by str_xstrip methods" @@ -1073,7 +1026,7 @@ return self._utf8_sliced(lpos, rpos, lgt) def descr_getnewargs(self, space): - return space.newtuple([W_UnicodeObject(self._utf8, self._length, self._get_flag())]) + return space.newtuple([W_UnicodeObject(self._utf8, self._length)]) _starts_ends_unicode = True @@ -1158,11 +1111,11 @@ if encoding == 'ascii': s = space.charbuf_w(w_obj) unicodehelper.check_ascii_or_raise(space, s) - return space.newutf8(s, len(s), rutf8.FLAG_ASCII) + return space.newutf8(s, len(s)) if encoding == 'utf-8' or encoding == 'utf8': s = space.charbuf_w(w_obj) - lgt, flag = unicodehelper.check_utf8_or_raise(space, s) - return space.newutf8(s, lgt, flag) + lgt = unicodehelper.check_utf8_or_raise(space, s) + return space.newutf8(s, lgt) w_codecs = space.getbuiltinmodule("_codecs") w_decode = space.getattr(w_codecs, space.newtext("decode")) if errors is None: @@ -1217,7 +1170,7 @@ return unicode_from_encoded_object(space, w_bytes, encoding, "strict") s = space.bytes_w(w_bytes) unicodehelper.check_ascii_or_raise(space, s) - return W_UnicodeObject(s, len(s), rutf8.FLAG_ASCII) + return W_UnicodeObject(s, len(s)) class UnicodeDocstrings: @@ -1764,7 +1717,7 @@ return [s for s in value] -W_UnicodeObject.EMPTY = W_UnicodeObject('', 0, rutf8.FLAG_ASCII) +W_UnicodeObject.EMPTY = W_UnicodeObject('', 0) # Helper for converting int/long From pypy.commits at gmail.com Thu Dec 7 11:03:53 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 07 Dec 2017 08:03:53 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: whack a few more places, handle surrogates correctly Message-ID: <5a296669.47b0df0a.ade9e.c227@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93298:8d468e08f3fe Date: 2017-12-07 18:03 +0200 http://bitbucket.org/pypy/pypy/changeset/8d468e08f3fe/ Log: whack a few more places, handle surrogates correctly diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -330,8 +330,7 @@ space = self.space if do_unicode: cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1) - flag = rutf8.get_flag_from_code(cp) - w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1, flag) + w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1) else: cp = ord(self.fmt[self.fmtpos - 1]) w_s = space.newbytes(chr(cp)) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -170,7 +170,8 @@ def _istitle(self, ch): return unicodedb.isupper(ch) or unicodedb.istitle(ch) - def _isspace(self, ch): + @staticmethod + def _isspace(ch): return unicodedb.isspace(ch) def _isalpha(self, ch): @@ -188,8 +189,8 @@ def _iscased(self, ch): return unicodedb.iscased(ch) - def _islinebreak(self, s, pos): - return rutf8.islinebreak(s, pos) + def _islinebreak(self, ch): + return unicodedb.islinebreak(ch) @staticmethod @unwrap_spec(w_string=WrappedDefault("")) @@ -610,7 +611,7 @@ while pos < length: sol = pos lgt = 0 - while pos < length and not self._islinebreak(value, pos): + while pos < length and not self._islinebreak(rutf8.codepoint_at_pos(value, pos)): pos = rutf8.next_codepoint_pos(value, pos) lgt += 1 eol = pos @@ -792,7 +793,7 @@ if pos < 0: return space.newtuple([self, self._empty(), self._empty()]) else: - lgt, _ = rutf8.check_utf8(value, True, stop=pos) + lgt = rutf8.check_utf8(value, True, stop=pos) return space.newtuple( [W_UnicodeObject(value[0:pos], lgt), w_sub, W_UnicodeObject(value[pos + len(sub._utf8):len(value)], @@ -810,7 +811,7 @@ if pos < 0: return space.newtuple([self._empty(), self._empty(), self]) else: - lgt, _ = rutf8.check_utf8(value, True, stop=pos) + lgt = rutf8.check_utf8(value, True, stop=pos) return space.newtuple( [W_UnicodeObject(value[0:pos], lgt), w_sub, W_UnicodeObject(value[pos + len(sub._utf8):len(value)], @@ -1087,7 +1088,10 @@ return space.newbytes(s) if ((encoding is None and space.sys.defaultencoding == 'utf8') or encoding == 'utf-8' or encoding == 'utf8' or encoding == 'UTF-8'): - return space.newbytes(space.utf8_w(w_object)) + utf8 = space.utf8_w(w_object) + if rutf8.has_surrogates(utf8): + utf8 = rutf8.reencode_utf8_with_surrogates(utf8) + return space.newbytes(utf8) if w_encoder is None: from pypy.module._codecs.interp_codecs import lookup_codec w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0)) @@ -1728,14 +1732,12 @@ result = ['\0'] * w_unistr._length digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] - i = 0 res_pos = 0 - while i < len(unistr): - uchr = rutf8.codepoint_at_pos(unistr, i) - if rutf8.isspace(unistr, i): + iter = rutf8.Utf8StringIterator(unistr) + for uchr in iter: + if W_UnicodeObject._isspace(uchr): result[res_pos] = ' ' res_pos += 1 - i = rutf8.next_codepoint_pos(unistr, i) continue try: result[res_pos] = digits[unicodedb.decimal(uchr)] @@ -1744,14 +1746,14 @@ result[res_pos] = chr(uchr) else: w_encoding = space.newtext('decimal') - w_start = space.newint(i) - w_end = space.newint(i+1) + pos = iter.get_pos() + w_start = space.newint(pos) + w_end = space.newint(pos+1) w_reason = space.newtext('invalid decimal Unicode string') raise OperationError(space.w_UnicodeEncodeError, space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason])) - i = rutf8.next_codepoint_pos(unistr, i) res_pos += 1 return ''.join(result) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -410,6 +410,13 @@ assert pos - continuation_bytes >= 0 return pos - continuation_bytes +def has_surrogates(utf8): + # XXX write a faster version maybe + for ch in Utf8StringIterator(utf8): + if 0xD800 <= ch <= 0xDBFF: + return True + return False + def reencode_utf8_with_surrogates(utf8): """ Receiving valid UTF8 which contains surrogates, combine surrogate pairs into correct UTF8 with pairs collpased. This is a rare case From pypy.commits at gmail.com Thu Dec 7 13:03:42 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 07 Dec 2017 10:03:42 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix test Message-ID: <5a29827e.4a8f1c0a.7243a.94aa@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93299:0451e5fe8470 Date: 2017-12-07 04:34 +0000 http://bitbucket.org/pypy/pypy/changeset/0451e5fe8470/ Log: fix test diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -466,8 +466,8 @@ def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') - w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') + w_b = PyUnicode_EncodeUTF8(space, u, 4, None) + assert space.bytes_w(w_b) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): From pypy.commits at gmail.com Thu Dec 7 13:03:44 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 07 Dec 2017 10:03:44 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Allow -A imp tests to run on pypy Message-ID: <5a298280.05c41c0a.b3886.3db8@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93300:2cc79ebab1f2 Date: 2017-12-07 05:33 +0000 http://bitbucket.org/pypy/pypy/changeset/2cc79ebab1f2/ Log: Allow -A imp tests to run on pypy diff --git a/pypy/tool/pytest/apptest.py b/pypy/tool/pytest/apptest.py --- a/pypy/tool/pytest/apptest.py +++ b/pypy/tool/pytest/apptest.py @@ -28,7 +28,8 @@ 'struct': '_struct', 'thread': '_thread', 'operator': '_operator', - 'signal': '_signal'} + 'signal': '_signal', + 'imp': '_imp'} class AppError(Exception): def __init__(self, excinfo): From pypy.commits at gmail.com Thu Dec 7 13:03:46 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 07 Dec 2017 10:03:46 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Get the expected error when passing a bogus spec to _imp.create_dynamic() Message-ID: <5a298282.4191df0a.397b4.6f4f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93301:e30b20325b57 Date: 2017-12-07 05:40 +0000 http://bitbucket.org/pypy/pypy/changeset/e30b20325b57/ Log: Get the expected error when passing a bogus spec to _imp.create_dynamic() diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1562,6 +1562,7 @@ from rpython.rlib import rdynload w_name = space.getattr(w_spec, space.newtext("name")) + name = space.text_w(w_name) path = space.text_w(space.getattr(w_spec, space.newtext("origin"))) if os.sep not in path: @@ -1577,7 +1578,6 @@ raise raise_import_error(space, space.newfilename(e.msg), w_name, w_path) look_for = None - name = space.text_w(w_name) # if space.config.objspace.usemodules._cffi_backend: basename = name.split('.')[-1] From pypy.commits at gmail.com Fri Dec 8 05:45:30 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 08 Dec 2017 02:45:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: in-progress Message-ID: <5a2a6d4a.910f1c0a.86570.7b08@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93302:cb5b89596a2f Date: 2017-12-08 11:44 +0100 http://bitbucket.org/pypy/pypy/changeset/cb5b89596a2f/ Log: in-progress diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -142,6 +142,7 @@ # Utf8MatchContext. The non-utf8 implementation is provided # by the FixedMatchContext abstract subclass, in order to use # the same @not_rpython safety trick as above. + ZERO = 0 @not_rpython def next(self, position): raise NotImplementedError @@ -221,9 +222,8 @@ class FixedMatchContext(AbstractMatchContext): """Abstract subclass to introduce the default implementation for - these position methods. The Utf8 subclass doesn't inherit from here.""" - - ZERO = 0 + these position methods. The Utf8MatchContext subclass doesn't + inherit from here.""" def next(self, position): return position + 1 diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -0,0 +1,59 @@ +from rpython.rlib.debug import check_nonneg +from rpython.rlib.rarithmetic import r_uint, intmask +from rpython.rlib.rsre.rsre_core import AbstractMatchContext, EndOfString +from rpython.rlib.rsre import rsre_char +from rpython.rlib import rutf8 + + +class Utf8MatchContext(AbstractMatchContext): + + def __init__(self, pattern, utf8string, match_start, end, flags): + AbstractMatchContext.__init__(self, pattern, match_start, end, flags) + self._utf8 = utf8string + + def str(self, index): + check_nonneg(index) + return rutf8.codepoint_at_pos(self._utf8, index) + + def lowstr(self, index): + c = self.str(index) + return rsre_char.getlower(c, self.flags) + + def get_single_byte(self, base_position, index): + return self.str(base_position + index) + + def fresh_copy(self, start): + return Utf8MatchContext(self.pattern, self._utf8, start, + self.end, self.flags) + + def next(self, position): + return rutf8.next_codepoint_pos(self._utf8, position) + + def prev(self, position): + if position <= 0: + raise EndOfString + upos = r_uint(position) + upos = rutf8.prev_codepoint_pos(self._utf8, upos) + position = intmask(upos) + assert position >= 0 + return position + + def next_n(self, position, n, end_position): + for i in range(n): + if position >= end_position: + raise EndOfString + position = rutf8.next_codepoint_pos(self._utf8, position) + return position + + def prev_n(self, position, n, start_position): + upos = r_uint(position) + for i in range(n): + if upos <= r_uint(start_position): + raise EndOfString + upos = rutf8.next_codepoint_pos(self._utf8, upos) + position = intmask(upos) + assert position >= 0 + return position + + def slowly_convert_byte_pos_to_index(self, position): + diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py --- a/rpython/rlib/rsre/test/test_search.py +++ b/rpython/rlib/rsre/test/test_search.py @@ -1,7 +1,7 @@ import re, py from rpython.rlib.rsre.test.test_match import get_code, get_code_and_re from rpython.rlib.rsre.test import support -from rpython.rlib.rsre import rsre_core +from rpython.rlib.rsre import rsre_core, rsre_utf8 class BaseTestSearch: @@ -222,3 +222,8 @@ search = staticmethod(rsre_core.search) match = staticmethod(rsre_core.match) Position = staticmethod(lambda n: n) + +class TestSearchUtf8(BaseTestSearch): + search = staticmethod(rsre_utf8.utf8search) + match = staticmethod(rsre_utf8.utf8match) + Position = staticmethod(lambda n: n) # NB. only for plain ascii From pypy.commits at gmail.com Fri Dec 8 05:46:52 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 08 Dec 2017 02:46:52 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: hg merge unicode-utf8 Message-ID: <5a2a6d9c.530a1c0a.d018a.7a83@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93304:be4b4c164598 Date: 2017-12-08 11:46 +0100 http://bitbucket.org/pypy/pypy/changeset/be4b4c164598/ Log: hg merge unicode-utf8 diff too long, truncating to 2000 out of 3797 lines diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -9,5 +9,6 @@ * remove assertions from W_UnicodeObject.__init__ if all the builders pass * what to do with error handlers that go backwards. There were tests in test_codecs that would check for that +* improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -1,28 +1,48 @@ from hypothesis import given, strategies as st from io import BytesIO, TextIOWrapper +import os -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(txt, mode, limit): + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data textio = TextIOWrapper( - BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) lines = [] - while True: + for limit in limits: line = textio.readline(limit) - if limit > 0: - assert len(line) < limit + if limit >= 0: + assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -26,3 +26,6 @@ .. branch: fix-vmprof-stacklet-switch Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1087,8 +1087,11 @@ def newlist_utf8(self, list_u, is_ascii): l_w = [None] * len(list_u) for i, item in enumerate(list_u): - length, flag = rutf8.check_utf8(item, True) - l_w[i] = self.newutf8(item, length, flag) + if not is_ascii: + length = rutf8.check_utf8(item, True) + else: + length = len(item) + l_w[i] = self.newutf8(item, length) return self.newlist(l_w) def newlist_int(self, list_i): diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -64,8 +64,8 @@ r = unicodehelper.decode_raw_unicode_escape(space, substr) else: r = unicodehelper.decode_unicode_escape(space, substr) - v, length, flag = r - return space.newutf8(v, length, flag) + v, length = r + return space.newutf8(v, length) need_encoding = (encoding is not None and encoding != "utf-8" and encoding != "utf8" and @@ -74,8 +74,8 @@ substr = s[ps : q] if rawmode or '\\' not in s[ps:]: if need_encoding: - lgt, flag = unicodehelper.check_utf8_or_raise(space, substr) - w_u = space.newutf8(substr, lgt, flag) + lgt = unicodehelper.check_utf8_or_raise(space, substr) + w_u = space.newutf8(substr, lgt) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: @@ -234,8 +234,8 @@ p = ps while p < end and ord(s[p]) & 0x80: p += 1 - lgt, flag = unicodehelper.check_utf8_or_raise(space, s, ps, p) - w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt, flag), + lgt = unicodehelper.check_utf8_or_raise(space, s, ps, p) + w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt), recode_encoding) v = space.bytes_w(w_v) return v, p diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -10,13 +10,13 @@ return str_decode_utf8(u, True, "strict", None) def test_decode_utf8(): - assert decode_utf8("abc") == ("abc", 3, 3, rutf8.FLAG_ASCII) - assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1, rutf8.FLAG_REGULAR) - assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES) - assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES) + assert decode_utf8("abc") == ("abc", 3, 3) + assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1) + assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1) + assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 3, 1) assert decode_utf8("\xed\xa0\x80\xed\xb0\x80") == ( - "\xed\xa0\x80\xed\xb0\x80", 6, 2, rutf8.FLAG_HAS_SURROGATES) - assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 4, 1, rutf8.FLAG_REGULAR) + "\xed\xa0\x80\xed\xb0\x80", 6, 2) + assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 4, 1) def test_utf8_encode_ascii(): assert utf8_encode_ascii("abc", "??", "??") == "abc" @@ -41,19 +41,19 @@ assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") def test_str_decode_ascii(): - assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3, rutf8.FLAG_ASCII) + assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3) def eh(errors, encoding, reason, p, start, end): lst.append((errors, encoding, p, start, end)) return u"\u1234\u5678".encode("utf8"), end lst = [] input = "\xe8" exp = u"\u1234\u5678".encode("utf8") - assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2, rutf8.FLAG_REGULAR) + assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2) assert lst == [("??", "ascii", input, 0, 1)] lst = [] input = "\xe8\xe9abc\xea\xeb" assert str_decode_ascii(input, "??", True, eh) == ( - exp + exp + "abc" + exp + exp, 7, 11, rutf8.FLAG_REGULAR) + exp + exp + "abc" + exp + exp, 7, 11) assert lst == [("??", "ascii", input, 0, 1), ("??", "ascii", input, 1, 2), ("??", "ascii", input, 5, 6), diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -3,7 +3,6 @@ from pypy.interpreter.error import OperationError from rpython.rlib.objectmodel import specialize from rpython.rlib import rutf8 -from rpython.rlib.rutf8 import combine_flags from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rstring import StringBuilder from pypy.module._codecs import interp_codecs @@ -26,10 +25,10 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): - u_len, flag = rutf8.check_utf8(utf8, True) + u_len = rutf8.check_utf8(utf8, True) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - space.newutf8(utf8, u_len, flag), + space.newutf8(utf8, u_len), space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) @@ -55,18 +54,18 @@ def decode_unicode_escape(space, string): state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) - result_utf8, consumed, length, flag = str_decode_unicode_escape( + result_utf8, consumed, length = str_decode_unicode_escape( string, "strict", final=True, errorhandler=decode_error_handler(space), ud_handler=unicodedata_handler) - return result_utf8, length, flag + return result_utf8, length def decode_raw_unicode_escape(space, string): - result_utf8, consumed, lgt, flag = str_decode_raw_unicode_escape( + result_utf8, consumed, lgt = str_decode_raw_unicode_escape( string, "strict", final=True, errorhandler=decode_error_handler(space)) - return result_utf8, lgt, flag + return result_utf8, lgt def check_ascii_or_raise(space, string): try: @@ -83,19 +82,19 @@ # you still get two surrogate unicode characters in the result. # These are the Python2 rules; Python3 differs. try: - length, flag = rutf8.check_utf8(string, True, start, end) + length = rutf8.check_utf8(string, True, start, end) except rutf8.CheckError as e: # convert position into unicode position - lgt, flags = rutf8.check_utf8(string, True, start, stop=e.pos) + lgt = rutf8.check_utf8(string, True, start, stop=e.pos) decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string, start + lgt, start + lgt + 1) assert False, "unreachable" - return length, flag + return length def str_decode_ascii(s, errors, final, errorhandler): try: rutf8.check_ascii(s) - return s, len(s), len(s), rutf8.FLAG_ASCII + return s, len(s), len(s) except rutf8.CheckError: return _str_decode_ascii_slowpath(s, errors, final, errorhandler) @@ -112,13 +111,13 @@ res.append(ch) i += 1 ress = res.build() - lgt, flag = rutf8.check_utf8(ress, True) - return ress, len(s), lgt, flag + lgt = rutf8.check_utf8(ress, True) + return ress, len(s), lgt def str_decode_latin_1(s, errors, final, errorhandler): try: rutf8.check_ascii(s) - return s, len(s), len(s), rutf8.FLAG_ASCII + return s, len(s), len(s) except rutf8.CheckError: return _str_decode_latin_1_slowpath(s, errors, final, errorhandler) @@ -138,7 +137,7 @@ res.append_slice(s, start, end) i = end # cannot be ASCII, cannot have surrogates, I believe - return res.build(), len(s), len(s), rutf8.FLAG_REGULAR + return res.build(), len(s), len(s) def utf8_encode_latin_1(s, errors, errorhandler): try: @@ -149,37 +148,32 @@ def _utf8_encode_latin_1_slowpath(s, errors, errorhandler): res = StringBuilder(len(s)) - size = len(s) cur = 0 - i = 0 - while i < size: - if ord(s[i]) <= 0x7F: - res.append(s[i]) - i += 1 + iter = rutf8.Utf8StringIterator(s) + while True: + try: + ch = iter.next() + except StopIteration: + break + if ch <= 0xFF: + res.append(chr(ch)) cur += 1 else: - oc = rutf8.codepoint_at_pos(s, i) - if oc <= 0xFF: - res.append(chr(oc)) - cur += 1 - i = rutf8.next_codepoint_pos(s, i) - else: - r, pos = errorhandler(errors, 'latin1', - 'ordinal not in range(256)', s, cur, - cur + 1) - for j in range(pos - cur): - i = rutf8.next_codepoint_pos(s, i) + r, pos = errorhandler(errors, 'latin1', + 'ordinal not in range(256)', s, cur, + cur + 1) - j = 0 - while j < len(r): - c = rutf8.codepoint_at_pos(r, j) - if c > 0xFF: - errorhandler("strict", 'latin1', - 'ordinal not in range(256)', s, - cur, cur + 1) - j = rutf8.next_codepoint_pos(r, j) - res.append(chr(c)) - cur = pos + for c in rutf8.Utf8StringIterator(r): + if c > 0xFF: + errorhandler("strict", 'latin1', + 'ordinal not in range(256)', s, + cur, cur + 1) + res.append(chr(c)) + + for j in range(pos - cur - 1): + iter.next() + + cur = pos r = res.build() return r @@ -341,8 +335,7 @@ res.append(r) r = res.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + return r, pos, rutf8.check_utf8(r, True) hexdigits = "0123456789ABCDEFabcdef" @@ -355,7 +348,7 @@ endinpos += 1 res, pos = errorhandler(errors, encoding, message, s, pos-2, endinpos) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: try: @@ -366,7 +359,7 @@ endinpos += 1 res, pos = errorhandler(errors, encoding, message, s, pos-2, endinpos) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: # when we get here, chr is a 32-bit unicode character @@ -376,21 +369,19 @@ message = "illegal Unicode character" res, pos = errorhandler(errors, encoding, message, s, pos-2, pos+digits) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: - flag = rutf8.get_flag_from_code(intmask(chr)) pos += digits size = 1 - return pos, size, flag + return pos, size def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 - flag = rutf8.FLAG_ASCII builder = StringBuilder(size) pos = 0 outsize = 0 @@ -401,7 +392,6 @@ if ch != '\\': if ord(ch) > 0x7F: rutf8.unichr_as_utf8_append(builder, ord(ch)) - flag = combine_flags(rutf8.FLAG_REGULAR, flag) else: builder.append(ch) pos += 1 @@ -414,9 +404,8 @@ message = "\\ at end of string" res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, size) - newsize, newflag = rutf8.check_utf8(res, True) + newsize = rutf8.check_utf8(res, True) outsize + newsize - flag = combine_flags(flag, newflag) builder.append(res) continue @@ -469,7 +458,6 @@ outsize += 1 if x > 0x7F: rutf8.unichr_as_utf8_append(builder, x) - flag = combine_flags(rutf8.FLAG_REGULAR, flag) else: builder.append(chr(x)) # hex escapes @@ -477,27 +465,24 @@ elif ch == 'x': digits = 2 message = "truncated \\xXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \uXXXX elif ch == 'u': digits = 4 message = "truncated \\uXXXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \UXXXXXXXX elif ch == 'U': digits = 8 message = "truncated \\UXXXXXXXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \N{name} @@ -517,29 +502,25 @@ if code < 0: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) continue pos = look + 1 outsize += 1 - flag = combine_flags(flag, rutf8.get_flag_from_code(code)) rutf8.unichr_as_utf8_append(builder, code, allow_surrogates=True) # xxx 'code' is probably always within range here... else: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) else: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) else: @@ -547,7 +528,7 @@ builder.append(ch) outsize += 2 - return builder.build(), pos, outsize, flag + return builder.build(), pos, outsize # ____________________________________________________________ # Raw unicode escape @@ -556,7 +537,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 result = StringBuilder(size) pos = 0 @@ -598,8 +579,8 @@ "rawunicodeescape", errorhandler, message, errors) r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt _utf8_encode_unicode_escape = rutf8.make_utf8_escape_function() @@ -734,7 +715,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 inShift = False base64bits = 0 @@ -745,7 +726,6 @@ result = StringBuilder(size) pos = 0 shiftOutStartPos = 0 - flag = rutf8.FLAG_ASCII startinpos = 0 while pos < size: ch = s[pos] @@ -771,13 +751,11 @@ (outCh & 0x3FF)) + 0x10000 rutf8.unichr_as_utf8_append(result, code) outsize += 1 - flag = combine_flags(flag, rutf8.FLAG_REGULAR) surrogate = 0 continue else: rutf8.unichr_as_utf8_append(result, surrogate, allow_surrogates=True) - flag = rutf8.FLAG_HAS_SURROGATES outsize += 1 surrogate = 0 # Not done with outCh: falls back to next line @@ -785,8 +763,6 @@ # first surrogate surrogate = outCh else: - flag = combine_flags(flag, - rutf8.get_flag_from_code(outCh)) outsize += 1 assert outCh >= 0 rutf8.unichr_as_utf8_append(result, outCh, True) @@ -802,9 +778,8 @@ msg = "partial character in shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) continue else: @@ -814,15 +789,13 @@ msg = "non-zero padding bits in shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) continue if surrogate and _utf7_DECODE_DIRECT(ord(ch)): outsize += 1 - flag = rutf8.FLAG_HAS_SURROGATES rutf8.unichr_as_utf8_append(result, surrogate, True) surrogate = 0 @@ -854,9 +827,8 @@ pos += 1 msg = "unexpected special character" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) # end of string @@ -869,9 +841,8 @@ (base64bits > 0 and base64buffer != 0)): msg = "unterminated shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) final_length = result.getlength() elif inShift: @@ -879,7 +850,7 @@ final_length = shiftOutStartPos # back off output assert final_length >= 0 - return result.build()[:final_length], pos, outsize, flag + return result.build()[:final_length], pos, outsize def utf8_encode_utf_7(s, errors, errorhandler): size = len(s) @@ -942,21 +913,21 @@ def str_decode_utf_16(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "native") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_be(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "big") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_le(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "little") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_helper(s, errors, final=True, errorhandler=None, @@ -999,7 +970,7 @@ else: bo = 1 if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII, bo + return '', 0, 0, bo if bo == -1: # force little endian ihi = 1 @@ -1058,8 +1029,8 @@ s, pos - 2, pos) result.append(r) r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return result.build(), pos, lgt, flag, bo + lgt = rutf8.check_utf8(r, True) + return result.build(), pos, lgt, bo def _STORECHAR(result, CH, byteorder): hi = chr(((CH) >> 8) & 0xff) @@ -1148,21 +1119,21 @@ def str_decode_utf_32(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "native") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_32_be(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "big") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_32_le(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "little") - return result, c, lgt, flag + return result, c, lgt BOM32_DIRECT = intmask(0x0000FEFF) BOM32_REVERSE = intmask(0xFFFE0000) @@ -1208,7 +1179,7 @@ else: bo = 1 if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII, bo + return '', 0, 0, bo if bo == -1: # force little endian iorder = [0, 1, 2, 3] @@ -1243,8 +1214,8 @@ rutf8.unichr_as_utf8_append(result, ch, allow_surrogates=True) pos += 4 r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag, bo + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt, bo def _STORECHAR32(result, CH, byteorder): c0 = chr(((CH) >> 24) & 0xff) @@ -1330,7 +1301,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 unicode_bytes = 4 if BYTEORDER == "little": @@ -1367,8 +1338,8 @@ rutf8.unichr_as_utf8_append(result, intmask(t), allow_surrogates=True) pos += unicode_bytes r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt def utf8_encode_unicode_internal(s, errors, errorhandler): size = len(s) @@ -1409,7 +1380,7 @@ errorhandler=errorhandler) size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 pos = 0 result = StringBuilder(size) @@ -1426,8 +1397,8 @@ result.append(c) pos += 1 r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -26,14 +26,8 @@ "Return a Unicode string of one character with the given ordinal." if code < 0 or code > 0x10FFFF: raise oefmt(space.w_ValueError, "unichr() arg out of range") - elif code < 0x80: - flag = rutf8.FLAG_ASCII - elif 0xD800 <= code <= 0xDFFF: - flag = rutf8.FLAG_HAS_SURROGATES - else: - flag = rutf8.FLAG_REGULAR s = rutf8.unichr_as_utf8(code, allow_surrogates=True) - return space.newutf8(s, 1, flag) + return space.newutf8(s, 1) def len(space, w_obj): "len(object) -> integer\n\nReturn the number of items of a sequence or mapping." diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -183,8 +183,7 @@ raise oefmt(self.space.w_ValueError, "%s out of range for conversion to unicode: %s", self.name, s) - flag = rutf8.get_flag_from_code(intmask(value)) - return self.space.newutf8(utf8, 1, flag) + return self.space.newutf8(utf8, 1) def string(self, cdataobj, maxlen): with cdataobj as ptr: @@ -215,15 +214,15 @@ def unpack_ptr(self, w_ctypeptr, ptr, length): if self.size == 2: - utf8, lgt, flag = wchar_helper.utf8_from_char16(ptr, length) + utf8, lgt = wchar_helper.utf8_from_char16(ptr, length) else: try: - utf8, lgt, flag = wchar_helper.utf8_from_char32(ptr, length) + utf8, lgt = wchar_helper.utf8_from_char32(ptr, length) except wchar_helper.OutOfRange as e: raise oefmt(self.space.w_ValueError, "%s out of range for conversion to unicode: %s", self.name, hex(e.ordinal)) - return self.space.newutf8(utf8, lgt, flag) + return self.space.newutf8(utf8, lgt) class W_CTypePrimitiveSigned(W_CTypePrimitive): diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py --- a/pypy/module/_cffi_backend/wchar_helper.py +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -19,16 +19,14 @@ ptr = rffi.cast(rffi.UINTP, ptr) u = StringBuilder(length) j = 0 - flag = rutf8.FLAG_ASCII while j < length: ch = intmask(ptr[j]) j += 1 - flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) try: rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) except ValueError: raise OutOfRange(ch) - return u.build(), length, flag + return u.build(), length def utf8_from_char16(ptr, length): # 'ptr' is a pointer to 'length' 16-bit integers @@ -36,7 +34,6 @@ u = StringBuilder(length) j = 0 result_length = length - flag = rutf8.FLAG_ASCII while j < length: ch = intmask(ptr[j]) j += 1 @@ -46,9 +43,8 @@ ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 j += 1 result_length -= 1 - flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) - return u.build(), result_length, flag + return u.build(), result_length @specialize.ll() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -43,8 +43,8 @@ length = len(input) else: w_cls = space.w_UnicodeEncodeError - length, flag = rutf8.check_utf8(input, allow_surrogates=True) - w_input = space.newutf8(input, length, flag) + length = rutf8.check_utf8(input, allow_surrogates=True) + w_input = space.newutf8(input, length) w_exc = space.call_function( w_cls, space.newtext(encoding), @@ -192,7 +192,7 @@ def ignore_errors(space, w_exc): check_exception(space, w_exc) w_end = space.getattr(w_exc, space.newtext('end')) - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), w_end]) + return space.newtuple([space.newutf8('', 0), w_end]) REPLACEMENT = u'\ufffd'.encode('utf8') @@ -203,13 +203,13 @@ size = space.int_w(w_end) - space.int_w(w_start) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): text = '?' * size - return space.newtuple([space.newutf8(text, size, rutf8.FLAG_ASCII), w_end]) + return space.newtuple([space.newutf8(text, size), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError): text = REPLACEMENT - return space.newtuple([space.newutf8(text, 1, rutf8.FLAG_REGULAR), w_end]) + return space.newtuple([space.newutf8(text, 1), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeTranslateError): text = REPLACEMENT * size - return space.newtuple([space.newutf8(text, size, rutf8.FLAG_REGULAR), w_end]) + return space.newtuple([space.newutf8(text, size), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -237,8 +237,8 @@ builder.append(";") pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() - lgt, flag = rutf8.check_utf8(r, True) - return space.newtuple([space.newutf8(r, lgt, flag), w_end]) + lgt = rutf8.check_utf8(r, True) + return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -278,8 +278,8 @@ builder.append_slice(num, 2, lnum) pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() - lgt, flag = rutf8.check_utf8(r, True) - return space.newtuple([space.newutf8(r, lgt, flag), w_end]) + lgt = rutf8.check_utf8(r, True) + return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -417,9 +417,9 @@ final = space.is_true(w_final) state = space.fromcache(CodecState) func = getattr(unicodehelper, rname) - result, consumed, length, flag = func(string, errors, + result, consumed, length = func(string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(result, length, flag), + return space.newtuple([space.newutf8(result, length), space.newint(consumed)]) wrap_decoder.func_name = rname globals()[name] = wrap_decoder @@ -488,14 +488,14 @@ state = space.fromcache(CodecState) # call the fast version for checking try: - lgt, flag = rutf8.check_utf8(string, allow_surrogates=True) + lgt = rutf8.check_utf8(string, allow_surrogates=True) except rutf8.CheckError: - res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string, + res, consumed, lgt = unicodehelper.str_decode_utf8(string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed)]) else: - return space.newtuple([space.newutf8(string, lgt, flag), + return space.newtuple([space.newutf8(string, lgt), space.newint(len(string))]) @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, @@ -516,10 +516,10 @@ consumed = len(data) if final: consumed = 0 - res, consumed, lgt, flag, byteorder = str_decode_utf_16_helper( + res, consumed, lgt, byteorder = str_decode_utf_16_helper( data, errors, final, state.decode_error_handler, byteorder) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed), space.newint(byteorder)]) @@ -539,10 +539,10 @@ consumed = len(data) if final: consumed = 0 - res, consumed, lgt, flag, byteorder = str_decode_utf_32_helper( + res, consumed, lgt, byteorder = str_decode_utf_32_helper( data, errors, final, state.decode_error_handler, byteorder) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed), space.newint(byteorder)]) @@ -632,7 +632,7 @@ if errors is None: errors = 'strict' if len(string) == 0: - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), + return space.newtuple([space.newutf8('', 0), space.newint(0)]) if space.is_none(w_mapping): @@ -642,9 +642,9 @@ final = True state = space.fromcache(CodecState) - result, consumed, lgt, flag = unicodehelper.str_decode_charmap( + result, consumed, lgt = unicodehelper.str_decode_charmap( string, errors, final, state.decode_error_handler, mapping) - return space.newtuple([space.newutf8(result, lgt, flag), + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) @unwrap_spec(errors='text_or_none') @@ -708,12 +708,12 @@ unicode_name_handler = state.get_unicodedata_handler(space) - result, consumed, lgt, flag = unicodehelper.str_decode_unicode_escape( + result, consumed, lgt = unicodehelper.str_decode_unicode_escape( string, errors, final, state.decode_error_handler, unicode_name_handler) - return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)]) + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) # ____________________________________________________________ # Unicode-internal @@ -731,15 +731,15 @@ string = space.readbuf_w(w_string).as_str() if len(string) == 0: - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), + return space.newtuple([space.newutf8('', 0), space.newint(0)]) final = True state = space.fromcache(CodecState) - result, consumed, lgt, flag = unicodehelper.str_decode_unicode_internal( + result, consumed, lgt = unicodehelper.str_decode_unicode_internal( string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(result, lgt, flag), + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) # ____________________________________________________________ diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -11,8 +11,8 @@ from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import ( - FLAG_ASCII, check_utf8, next_codepoint_pos, codepoints_in_utf8) +from rpython.rlib.rutf8 import (check_utf8, next_codepoint_pos, + codepoints_in_utf8) STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -31,22 +31,22 @@ def __init__(self, space): self.w_newlines_dict = { - SEEN_CR: space.newutf8("\r", 1, FLAG_ASCII), - SEEN_LF: space.newutf8("\n", 1, FLAG_ASCII), - SEEN_CRLF: space.newutf8("\r\n", 2, FLAG_ASCII), + SEEN_CR: space.newutf8("\r", 1), + SEEN_LF: space.newutf8("\n", 1), + SEEN_CRLF: space.newutf8("\r\n", 2), SEEN_CR | SEEN_LF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\n", 1, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\n", 1)]), SEEN_CR | SEEN_CRLF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\r\n", 2)]), SEEN_LF | SEEN_CRLF: space.newtuple( - [space.newutf8("\n", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\n", 1), + space.newutf8("\r\n", 2)]), SEEN_CR | SEEN_LF | SEEN_CRLF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\n", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\n", 1), + space.newutf8("\r\n", 2)]), } @unwrap_spec(translate=int) @@ -98,7 +98,7 @@ output_len -= 1 if output_len == 0: - return space.newutf8("", 0, FLAG_ASCII) + return space.newutf8("", 0) # Record which newlines are read and do newline translation if # desired, all in one pass. @@ -153,8 +153,8 @@ output = builder.build() self.seennl |= seennl - lgt, flag = check_utf8(output, True) - return space.newutf8(output, lgt, flag) + lgt = check_utf8(output, True) + return space.newutf8(output, lgt) def reset_w(self, space): self.seennl = 0 @@ -361,6 +361,7 @@ while scanned < limit: try: ch = self.next_char() + scanned += 1 except StopIteration: return False if ch == '\n': @@ -746,7 +747,7 @@ remnant = None continue - if limit > 0: + if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,41 +1,54 @@ import pytest try: - from hypothesis import given, strategies as st, assume + from hypothesis import given, strategies as st except ImportError: pytest.skip("hypothesis required") +import os from pypy.module._io.interp_bytesio import W_BytesIO from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace(u'\r\n', u'\n') + text = text.replace(u'\r', u'\n') + return text.replace(u'\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + fragments = [] + limits = [] + for _ in range(n_lines): + line = draw(st.text(st.characters(blacklist_characters=u'\r\n'))) + fragments.append(line) + ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n'])) + fragments.append(ending) + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + return (u''.join(fragments), limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(space, txt, mode, limit): - assume(limit != 0) + at given(data=st_readline(), + mode=st.sampled_from(['\r', '\n', '\r\n', ''])) +def test_readline(space, data, mode): + txt, limits = data w_stream = W_BytesIO(space) w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) w_textio = W_TextIOWrapper(space) w_textio.descr_init( - space, w_stream, encoding='utf-8', + space, w_stream, + encoding='utf-8', w_errors=space.newtext('surrogatepass'), w_newline=space.newtext(mode)) lines = [] - while True: + for limit in limits: w_line = w_textio.readline_w(space, space.newint(limit)) line = space.utf8_w(w_line).decode('utf-8') - if limit > 0: + if limit >= 0: assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + assert txt.startswith(u''.join(lines)) @given(st.text()) def test_read_buffer(text): diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py --- a/pypy/module/_multibytecodec/interp_incremental.py +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -66,8 +66,8 @@ pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf) assert 0 <= pos <= len(object) self.pending = object[pos:] - lgt, flag = rutf8.get_utf8_length_flag(output) - return space.newutf8(output, lgt, flag) + lgt = rutf8.get_utf8_length_flag(output) + return space.newutf8(output, lgt) @unwrap_spec(errors="text_or_none") diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -78,12 +78,11 @@ space.newtext(e.reason)])) def wrap_unicodeencodeerror(space, e, input, inputlen, name): - _, flag = rutf8.check_utf8(input, True) raise OperationError( space.w_UnicodeEncodeError, space.newtuple([ space.newtext(name), - space.newutf8(input, inputlen, flag), + space.newutf8(input, inputlen), space.newint(e.start), space.newint(e.end), space.newtext(e.reason)])) diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -295,15 +295,15 @@ if bits & 0x80: # the 8th bit is set, it's an utf8 string content_utf8 = self.getslice(start, end) - lgt, flag = unicodehelper.check_utf8_or_raise(self.space, + lgt = unicodehelper.check_utf8_or_raise(self.space, content_utf8) - return self.space.newutf8(content_utf8, lgt, flag) + return self.space.newutf8(content_utf8, lgt) else: # ascii only, fast path (ascii is a strict subset of # latin1, and we already checked that all the chars are < # 128) return self.space.newutf8(self.getslice(start, end), - end - start, rutf8.FLAG_ASCII) + end - start) def decode_string_escaped(self, start): i = self.pos @@ -316,10 +316,10 @@ i += 1 if ch == '"': content_utf8 = builder.build() - lgt, f = unicodehelper.check_utf8_or_raise(self.space, + lgt = unicodehelper.check_utf8_or_raise(self.space, content_utf8) self.pos = i - return self.space.newutf8(content_utf8, lgt, f) + return self.space.newutf8(content_utf8, lgt) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch < '\x20': diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py --- a/pypy/module/_pypyjson/test/test__pypyjson.py +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -11,7 +11,7 @@ dec.close() class FakeSpace(object): - def newutf8(self, s, l, f): + def newutf8(self, s, l): return s def test_decode_key(): diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -228,8 +228,7 @@ return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): wcharval = self.get_unichar(w_ffitype) - return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1, - rutf8.get_flag_from_code(intmask(wcharval))) + return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -596,9 +596,9 @@ return space.w_None wcharp_addr = rffi.cast(rffi.CWCHARP, address) if maxlength == -1: - s = rffi.wcharp2unicode(wcharp_addr) + s = rffi.wcharp2utf8(wcharp_addr) else: - s = rffi.wcharp2unicoden(wcharp_addr, maxlength) + s = rffi.wcharpsize2utf8(wcharp_addr, maxlength) return space.newunicode(s) @unwrap_spec(address=r_uint, maxlength=int) diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -41,7 +41,9 @@ if isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - return space.newunicode(ctx._unicodestr[start:end]) + s = ctx._unicodestr[start:end] + lgt = rutf8.check_utf8(s, True) + return space.newutf8(s, lgt) else: # unreachable raise SystemError @@ -340,11 +342,10 @@ else: assert unicodebuilder is not None return space.newutf8(unicodebuilder.build(), - unicodebuilder.get_length(), - unicodebuilder.get_flag()), n + unicodebuilder.get_length()), n else: if space.isinstance_w(w_string, space.w_unicode): - w_emptystr = space.newunicode(u'') + w_emptystr = space.newutf8('', 0) else: w_emptystr = space.newbytes('') w_item = space.call_method(w_emptystr, 'join', @@ -578,7 +579,8 @@ elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - return space.newunicode(ctx._unicodestr) + lgt = rutf8.check_utf8(ctx._unicodestr, True) + return space.newutf8(ctx._unicodestr, lgt) else: raise SystemError diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py --- a/pypy/module/_warnings/interp_warnings.py +++ b/pypy/module/_warnings/interp_warnings.py @@ -1,3 +1,6 @@ + +from rpython.rlib import rutf8 + from pypy.interpreter.gateway import unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt @@ -208,10 +211,11 @@ except OperationError as e: if e.async(space): raise - message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno, - space.unicode_w(w_name), - space.unicode_w(w_text)) - w_message = space.newunicode(message) + message = "%s:%d: %s: %s\n" % (space.utf8_w(w_filename), lineno, + space.utf8_w(w_name), + space.utf8_w(w_text)) + lgt = rutf8.check_utf8(message, True) + w_message = space.newutf8(message, lgt) else: w_message = space.newtext(message) space.call_method(w_stderr, "write", w_message) diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1,4 +1,4 @@ -from rpython.rlib import jit, rgc +from rpython.rlib import jit, rgc, rutf8 from rpython.rlib.buffer import RawBuffer from rpython.rlib.objectmodel import keepalive_until_here from rpython.rlib.rarithmetic import ovfcheck, widen @@ -451,7 +451,7 @@ """ if self.typecode == 'u': buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned()) - return space.newunicode(rffi.wcharpsize2unicode(buf, self.len)) + return space.newutf8(rffi.wcharpsize2unicode(buf, self.len)) else: raise oefmt(space.w_ValueError, "tounicode() may only be called on type 'u' arrays") @@ -797,7 +797,7 @@ TypeCode(rffi.UINT, 'int_w', True) types = { 'c': TypeCode(lltype.Char, 'bytes_w', method=''), - 'u': TypeCode(lltype.UniChar, 'unicode_w', method=''), + 'u': TypeCode(lltype.UniChar, 'utf8_len_w', method=''), 'b': TypeCode(rffi.SIGNEDCHAR, 'int_w', True, True), 'B': TypeCode(rffi.UCHAR, 'int_w', True), 'h': TypeCode(rffi.SHORT, 'int_w', True, True), @@ -895,11 +895,17 @@ "unsigned %d-byte integer out of range", mytype.bytes) return rffi.cast(mytype.itemtype, item) - if mytype.unwrap == 'bytes_w' or mytype.unwrap == 'unicode_w': + if mytype.unwrap == 'bytes_w': if len(item) != 1: raise oefmt(space.w_TypeError, "array item must be char") item = item[0] return rffi.cast(mytype.itemtype, item) + if mytype.unwrap == 'utf8_len_w': + utf8, lgt = item + if lgt != 1: + raise oefmt(space.w_TypeError, "array item must be char") + uchar = rutf8.codepoint_at_pos(utf8, 0) + return rffi.cast(mytype.itemtype, uchar) # # "regular" case: it fits in an rpython integer (lltype.Signed) # or it is a float @@ -1007,7 +1013,8 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - return space.newunicode(item) + code = ord(item) + return space.newutf8(rutf8.unichr_as_utf8(code), 1) assert 0, "unreachable" # interface diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unwrap(w_decoded) == u'späm' + assert space.unicode_w(w_decoded) == u'späm' rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -131,7 +131,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unwrap(w_a) == u'caf\xe9' + assert space.unicode_w(w_a) == u'caf\xe9' lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -8,7 +8,7 @@ from pypy.module.cpyext.object import ( PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString, PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr, - PyObject_GetItem, + PyObject_GetItem, PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor, PyObject_Hash, PyObject_Cmp, PyObject_Unicode ) @@ -209,9 +209,9 @@ PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr) def test_unicode(self, space, api): - assert space.unwrap(api.PyObject_Unicode(None)) == u"" - assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]" - assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e" + assert space.unicode_w(api.PyObject_Unicode(None)) == u"" + assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" + assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" with raises_w(space, UnicodeDecodeError): PyObject_Unicode(space, space.wrap("\xe9")) @@ -562,7 +562,7 @@ PyObject *a = PyTuple_GetItem(args, 0); PyObject *b = PyTuple_GetItem(args, 1); int res = PyObject_RichCompareBool(a, b, Py_EQ); - return PyLong_FromLong(res); + return PyLong_FromLong(res); """),]) a = float('nan') b = float('nan') diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -178,7 +178,7 @@ array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unwrap(word)): + for (i, char) in enumerate(space.unicode_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char @@ -216,12 +216,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) Py_DecRef(space, res) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_unicode_resize(self, space): @@ -256,17 +256,17 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == u'sp\x134m' + assert space.unicode_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unicode_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8') + assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -364,18 +364,18 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unwrap( + assert space.unicode_w( PyUnicode_FromObject(space, space.wrap('test'))) == 'test' def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unwrap( + assert space.unicode_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unicode_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -391,7 +391,8 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text + assert space.unicode_w(PyUnicode_Decode( + space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) @@ -508,7 +509,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unwrap(w_res) == u'ab' + assert space.unicode_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -579,29 +580,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args)) + assert (space.unicode_w(w_formated) == + space.unicode_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unwrap(w_joined) == u'ab' + assert space.unicode_w(w_joined) == u'ab' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unwrap(w_char) == u'A' + assert space.unicode_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unwrap(w_char) == u'\0' + assert space.unicode_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unwrap(w_char) == u'\uFFFF' + assert space.unicode_w(w_char) == u'\uFFFF' def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unwrap( + assert u"zbzbabab" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unwrap( + assert u"zbzbzbzb" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -126,7 +126,7 @@ return space.call_function(space.w_unicode, w_as_str) lgt = len(self.args_w) if lgt == 0: - return space.newunicode(u"") + return space.newutf8("", 0) if lgt == 1: return space.call_function(space.w_unicode, self.args_w[0]) else: @@ -719,7 +719,7 @@ def descr_init(self, space, w_encoding, w_object, w_start, w_end, w_reason): # typechecking space.realtext_w(w_encoding) - space.utf8_w(w_object) + space.realutf8_w(w_object) space.int_w(w_start) space.int_w(w_end) space.realtext_w(w_reason) diff --git a/pypy/module/operator/tscmp.py b/pypy/module/operator/tscmp.py --- a/pypy/module/operator/tscmp.py +++ b/pypy/module/operator/tscmp.py @@ -45,15 +45,15 @@ Note: If a and b are of different lengths, or if an error occurs, a timing attack could theoretically reveal information about the types and lengths of a and b--but not their values. + + XXX note that here the strings have to have the same length as UTF8, + not only as unicode. Not sure how to do better """ if (space.isinstance_w(w_a, space.w_unicode) and space.isinstance_w(w_b, space.w_unicode)): - a = space.unicode_w(w_a) - b = space.unicode_w(w_b) - with rffi.scoped_nonmoving_unicodebuffer(a) as a_buf: - with rffi.scoped_nonmoving_unicodebuffer(b) as b_buf: - result = pypy_tscmp_wide(a_buf, b_buf, len(a), len(b)) - return space.newbool(rffi.cast(lltype.Bool, result)) + a = space.utf8_w(w_a) + b = space.utf8_w(w_b) + return space.newbool(_compare_two_strings(a, b)) return compare_digest_buffer(space, w_a, w_b) @@ -68,7 +68,10 @@ a = a_buf.as_str() b = b_buf.as_str() + return space.newbool(_compare_two_strings(a, b)) + +def _compare_two_strings(a, b): with rffi.scoped_nonmovingbuffer(a) as a_buf: with rffi.scoped_nonmovingbuffer(b) as b_buf: result = pypy_tscmp(a_buf, b_buf, len(a), len(b)) - return space.newbool(rffi.cast(lltype.Bool, result)) + return rffi.cast(lltype.Bool, result) diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -478,8 +478,8 @@ # I suppose this is a valid utf8, but there is noone to check # and noone to catch an error either try: - lgt, flag = rutf8.check_utf8(s, True) - return space.newutf8(s, lgt, flag) + lgt = rutf8.check_utf8(s, True) + return space.newutf8(s, lgt) except rutf8.CheckError: from pypy.interpreter import unicodehelper # get the correct error msg diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -10,7 +10,7 @@ def normalize(s): w_s = space.newunicode(s) w_res = ucd.normalize(space, NF_code, w_s) - return space.unwrap(w_res) + return space.unicode_w(w_res) return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -209,7 +209,7 @@ def newbytes(self, x): return w_some_obj() - def newutf8(self, x, l, f): + def newutf8(self, x, l): return w_some_obj() def new_from_utf8(self, a): diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -195,11 +195,11 @@ w_dict = self.getdict(space) if w_dict is None: w_dict = space.w_None - s, _, lgt, flag = str_decode_latin_1(''.join(self.getdata()), 'strict', + s, _, lgt = str_decode_latin_1(''.join(self.getdata()), 'strict', True, None) return space.newtuple([ space.type(self), space.newtuple([ - space.newutf8(s, lgt, flag), space.newtext('latin-1')]), + space.newutf8(s, lgt), space.newtext('latin-1')]), w_dict]) @staticmethod diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1197,7 +1197,7 @@ unerase = staticmethod(unerase) def wrap(self, unwrapped): - return self.space.newutf8(unwrapped, len(unwrapped), rutf8.FLAG_ASCII) + return self.space.newutf8(unwrapped, len(unwrapped)) def unwrap(self, wrapped): return self.space.utf8_w(wrapped) @@ -1239,7 +1239,7 @@ ## return self.space.newlist_bytes(self.listview_bytes(w_dict)) def wrapkey(space, key): - return space.newutf8(key, len(key), rutf8.FLAG_ASCII) + return space.newutf8(key, len(key)) ## @jit.look_inside_iff(lambda self, w_dict: ## w_dict_unrolling_heuristic(w_dict)) diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -198,8 +198,8 @@ if self.w_valuedict is None: raise oefmt(space.w_TypeError, "format requires a mapping") if do_unicode: - lgt, flag = rutf8.check_utf8(key, True) - w_key = space.newutf8(key, lgt, flag) + lgt = rutf8.check_utf8(key, True) + w_key = space.newutf8(key, lgt) else: w_key = space.newbytes(key) return space.getitem(self.w_valuedict, w_key) @@ -330,8 +330,7 @@ space = self.space if do_unicode: cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1) - flag = rutf8.get_flag_from_code(cp) - w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1, flag) + w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1) else: cp = ord(self.fmt[self.fmtpos - 1]) w_s = space.newbytes(chr(cp)) @@ -513,8 +512,8 @@ formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict) result = formatter.format() # this can force strings, not sure if it's a problem or not - lgt, flag = rutf8.check_utf8(result, True) - return space.newutf8(result, lgt, flag) + lgt = rutf8.check_utf8(result, True) + return space.newutf8(result, lgt) def mod_format(space, w_format, w_values, do_unicode=False): if space.isinstance_w(w_values, space.w_tuple): diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -1998,7 +1998,7 @@ def wrap(self, stringval): assert stringval is not None - return self.space.newutf8(stringval, len(stringval), rutf8.FLAG_ASCII) + return self.space.newutf8(stringval, len(stringval)) def unwrap(self, w_string): return self.space.utf8_w(w_string) diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -403,8 +403,8 @@ @unmarshaller(TYPE_UNICODE) def unmarshal_unicode(space, u, tc): arg = u.get_str() - length, flag = unicodehelper.check_utf8_or_raise(space, arg) - return space.newutf8(arg, length, flag) + length = unicodehelper.check_utf8_or_raise(space, arg) + return space.newutf8(arg, length) @marshaller(W_SetObject) def marshal_set(space, w_set, m): diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -51,8 +51,8 @@ if for_unicode: def wrap(self, u): - lgt, flag = rutf8.check_utf8(u, True) - return self.space.newutf8(u, lgt, flag) + lgt = rutf8.check_utf8(u, True) + return self.space.newutf8(u, lgt) else: def wrap(self, s): return self.space.newbytes(s) @@ -379,8 +379,8 @@ template = unicode_template_formatter(space, space.utf8_w(w_string)) r = template.build(args) - lgt, flag = rutf8.check_utf8(r, True) - return space.newutf8(r, lgt, flag) + lgt = rutf8.check_utf8(r, True) + return space.newutf8(r, lgt) else: template = str_template_formatter(space, space.bytes_w(w_string)) return space.newbytes(template.build(args)) @@ -416,8 +416,8 @@ if for_unicode: def wrap(self, u): - lgt, flag = rutf8.check_utf8(u, True) - return self.space.newutf8(u, lgt, flag) + lgt = rutf8.check_utf8(u, True) + return self.space.newutf8(u, lgt) else: def wrap(self, s): return self.space.newbytes(s) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -165,8 +165,8 @@ return self.newtext(x) if isinstance(x, unicode): x = x.encode('utf8') - lgt, flag = rutf8.check_utf8(x, True) - return self.newutf8(x, lgt, flag) + lgt = rutf8.check_utf8(x, True) + return self.newutf8(x, lgt) if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): @@ -362,16 +362,10 @@ return self.w_None return self.newtext(s) - def newutf8(self, utf8s, length, flag): + def newutf8(self, utf8s, length): assert utf8s is not None assert isinstance(utf8s, str) - return W_UnicodeObject(utf8s, length, flag) - - def new_from_utf8(self, utf8s): - # XXX: kill me! - assert isinstance(utf8s, str) - length, flag = rutf8.check_utf8(utf8s, True) - return W_UnicodeObject(utf8s, length, flag) + return W_UnicodeObject(utf8s, length) def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py --- a/pypy/objspace/std/setobject.py +++ b/pypy/objspace/std/setobject.py @@ -1291,7 +1291,7 @@ return self.space.utf8_w(w_item) def wrap(self, item): - return self.space.newutf8(item, len(item), rutf8.FLAG_ASCII) + return self.space.newutf8(item, len(item)) def iter(self, w_set): return UnicodeIteratorImplementation(self.space, self, w_set) @@ -1495,7 +1495,7 @@ def next_entry(self): for key in self.iterator: - return self.space.newutf8(key, len(key), rutf8.FLAG_ASCII) + return self.space.newutf8(key, len(key)) else: return None diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py --- a/pypy/objspace/std/test/test_index.py +++ b/pypy/objspace/std/test/test_index.py @@ -265,8 +265,7 @@ class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase): def setup_method(self, method): SeqTestCase.setup_method(self, method) - self.w_seq = self.space.newutf8("this is a test", len("this is a test"), - rutf8.FLAG_ASCII) + self.w_seq = self.space.newutf8("this is a test", len("this is a test")) self.w_const = self.space.appexec([], """(): return unicode""") diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py --- a/pypy/objspace/std/test/test_lengthhint.py +++ b/pypy/objspace/std/test/test_lengthhint.py @@ -74,8 +74,7 @@ self._test_length_hint(self.space.wrap('P' * self.SIZE)) def test_unicode(self): - self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE, - rutf8.FLAG_ASCII)) + self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE)) def test_tuple(self): self._test_length_hint(self.space.wrap(tuple(self.ITEMS))) diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -600,9 +600,9 @@ def test_unicode(self): l1 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newbytes("zwei")]) assert isinstance(l1.strategy, BytesListStrategy) - l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, 2), self.space.newutf8("zwei", 4, 2)]) + l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4), self.space.newutf8("zwei", 4)]) assert isinstance(l2.strategy, UnicodeListStrategy) - l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4, 2)]) + l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4)]) assert isinstance(l3.strategy, ObjectListStrategy) def test_listview_bytes(self): diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py --- a/pypy/objspace/std/test/test_obj.py +++ b/pypy/objspace/std/test/test_obj.py @@ -17,7 +17,7 @@ cls.w_cpython_apptest = space.wrap(option.runappdirect and not hasattr(sys, 'pypy_translation_info')) def w_unwrap_wrap_unicode(space, w_obj): - return space.newutf8(space.utf8_w(w_obj), w_obj._length, w_obj._get_flag()) + return space.newutf8(space.utf8_w(w_obj), w_obj._length) cls.w_unwrap_wrap_unicode = space.wrap(gateway.interp2app(w_unwrap_wrap_unicode)) def w_unwrap_wrap_str(space, w_obj): return space.wrap(space.str_w(w_obj)) diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -27,12 +27,12 @@ assert len(warnings) == 2 def test_listview_unicode(self): - w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) + w_str = self.space.newutf8('abcd', 4) assert self.space.listview_utf8(w_str) == list("abcd") From pypy.commits at gmail.com Fri Dec 8 05:46:49 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 08 Dec 2017 02:46:49 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: in-progress Message-ID: <5a2a6d99.4a361c0a.75239.7a31@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93303:0fd38947b59e Date: 2017-12-08 11:45 +0100 http://bitbucket.org/pypy/pypy/changeset/0fd38947b59e/ Log: in-progress diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -6,9 +6,8 @@ from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.rarithmetic import intmask -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import Utf8StringBuilder # ____________________________________________________________ # @@ -110,11 +109,15 @@ if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): - unicodestr = space.unicode_w(w_string) - if pos > len(unicodestr): - pos = len(unicodestr) - if endpos > len(unicodestr): - endpos = len(unicodestr) + utf8str, length = space.utf8_len_w(w_string) + if pos >= length: + bytepos = len(utf8str) + else: + bytepos = rutf8.codepoint_at_index(..) + + pos = length + if endpos >= length: + endpos = length return rsre_core.UnicodeMatchContext(self.code, unicodestr, pos, endpos, self.flags) elif space.isinstance_w(w_string, space.w_bytes): diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -87,6 +87,13 @@ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_findall_unicode(self): + import re + assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000") + assert ["a", "u"] == re.findall("b(.)", "abalbus") + assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") + assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_finditer(self): import re it = re.finditer("b(.)", "brabbel") From pypy.commits at gmail.com Fri Dec 8 06:10:56 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 08 Dec 2017 03:10:56 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: whack the slowpath too Message-ID: <5a2a7340.0c0d1c0a.5316b.859a@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93305:a50930e1db6b Date: 2017-12-07 18:07 +0200 http://bitbucket.org/pypy/pypy/changeset/a50930e1db6b/ Log: whack the slowpath too diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -460,10 +460,12 @@ # utf-8 functions are not regular, because we have to pass # "allow_surrogates=True" - at unwrap_spec(utf8='utf8', errors='text_or_none') -def utf_8_encode(space, utf8, errors="strict"): - length, _ = rutf8.check_utf8(utf8, allow_surrogates=True) - return space.newtuple([space.newbytes(utf8), space.newint(length)]) + at unwrap_spec(errors='text_or_none') +def utf_8_encode(space, w_obj, errors="strict"): + utf8, lgt = space.utf8_len_w(w_obj) + if rutf8.has_surrogates(utf8): + utf8 = rutf8.reencode_utf8_with_surrogates(utf8) + return space.newtuple([space.newbytes(utf8), space.newint(lgt)]) #@unwrap_spec(uni=unicode, errors='text_or_none') #def utf_8_encode(space, uni, errors="strict"): # if errors is None: diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -741,6 +741,8 @@ assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac' assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82' assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96' + assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82' + assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96' assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80' assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80' assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000 From pypy.commits at gmail.com Fri Dec 8 06:10:58 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 08 Dec 2017 03:10:58 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: kill dead code Message-ID: <5a2a7342.51bbdf0a.a2876.0f13@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93306:eb61e553bfd4 Date: 2017-12-07 18:07 +0200 http://bitbucket.org/pypy/pypy/changeset/eb61e553bfd4/ Log: kill dead code diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -466,18 +466,6 @@ if rutf8.has_surrogates(utf8): utf8 = rutf8.reencode_utf8_with_surrogates(utf8) return space.newtuple([space.newbytes(utf8), space.newint(lgt)]) -#@unwrap_spec(uni=unicode, errors='text_or_none') -#def utf_8_encode(space, uni, errors="strict"): -# if errors is None: -# errors = 'strict' -# state = space.fromcache(CodecState) -# # NB. can't call unicode_encode_utf_8() directly because that's -# # an @elidable function nowadays. Instead, we need the _impl(). -# # (The problem is the errorhandler, which calls arbitrary Python.) -# result = runicode.unicode_encode_utf_8_impl( -# uni, len(uni), errors, state.encode_error_handler, -# allow_surrogates=True) -# return space.newtuple([space.newbytes(result), space.newint(len(uni))]) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final = WrappedDefault(False)) From pypy.commits at gmail.com Fri Dec 8 06:11:02 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 08 Dec 2017 03:11:02 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: whack at _io module Message-ID: <5a2a7346.4191df0a.397b4.19dc@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93308:7ffcfc6493e6 Date: 2017-12-08 10:38 +0200 http://bitbucket.org/pypy/pypy/changeset/7ffcfc6493e6/ Log: whack at _io module diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -1,3 +1,5 @@ +from rpython.rlib.rutf8 import get_utf8_length + from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import ( TypeDef, generic_new_descr, GetSetProperty) @@ -152,7 +154,7 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.new_from_utf8(self.readnl)) # YYY + w_readnl = space.str(space.newutf8(self.readnl, get_utf8_length(self.readnl))) # YYY return space.newtuple([ w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) @@ -215,7 +217,8 @@ if self.writenl: w_decoded = space.call_method( w_decoded, "replace", - space.newtext("\n"), space.new_from_utf8(self.writenl)) + space.newtext("\n"), space.newutf8(self.writenl, + get_utf8_length(self.writenl))) string = space.utf8_w(w_decoded) if string: self.buf.write(string) @@ -225,7 +228,9 @@ def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - return space.new_from_utf8(self.buf.read(size)) + v = self.buf.read(size) + lgt = get_utf8_length(v) + return space.newutf8(v, lgt) def readline_w(self, space, w_limit=None): self._check_closed(space) @@ -239,7 +244,8 @@ else: newline = self.readnl result = self.buf.readline(newline, limit) - return space.new_from_utf8(result) + resultlen = get_utf8_length(result) + return space.newutf8(result, resultlen) @unwrap_spec(pos=int, mode=int) @@ -276,7 +282,9 @@ def getvalue_w(self, space): self._check_closed(space) - return space.new_from_utf8(self.buf.getvalue()) + v = self.buf.getvalue() + lgt = get_utf8_length(v) + return space.newutf8(v, lgt) def readable_w(self, space): self._check_closed(space) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -12,7 +12,8 @@ from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder from rpython.rlib.rutf8 import (check_utf8, next_codepoint_pos, - codepoints_in_utf8) + codepoints_in_utf8, get_utf8_length, + Utf8StringBuilder) STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -684,13 +685,15 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.new_from_utf8(self.decoded.get_chars(-1)) + chars = self.decoded.get_chars(-1) + lgt = get_utf8_length(chars) + w_result = space.newutf8(chars, lgt) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final remaining = size - builder = StringBuilder(size) + builder = Utf8StringBuilder(size) # Keep reading chunks until we have n characters to return while remaining > 0: @@ -700,7 +703,7 @@ builder.append(data) remaining -= len(data) - return space.new_from_utf8(builder.build()) + return space.newutf8(builder.build(), builder.get_length()) def _scan_line_ending(self, limit): if self.readuniversal: @@ -725,6 +728,7 @@ limit = convert_size(space, w_limit) remnant = None builder = StringBuilder() + # XXX maybe use Utf8StringBuilder instead? while True: # First, get some data if necessary has_data = self._ensure_data(space) @@ -771,7 +775,8 @@ self.decoded.reset() result = builder.build() - return space.new_from_utf8(result) + lgt = get_utf8_length(result) + return space.newutf8(result, lgt) # _____________________________________________________________ # write methods @@ -794,8 +799,8 @@ if text.find('\n') >= 0: haslf = True if haslf and self.writetranslate and self.writenl: - w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'), - space.new_from_utf8(self.writenl)) + w_text = space.call_method(w_text, "replace", space.newutf8('\n', 1), + space.newutf8(self.writenl, get_utf8_length(self.writenl))) text = space.utf8_w(w_text) needflush = False diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -212,9 +212,6 @@ def newutf8(self, x, l): return w_some_obj() - def new_from_utf8(self, a): - return w_some_obj() - def newunicode(self, a): return w_some_obj() From pypy.commits at gmail.com Fri Dec 8 06:10:59 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 08 Dec 2017 03:10:59 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix _codecs Message-ID: <5a2a7343.6b88df0a.eef4b.5b04@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93307:bf4ecad403eb Date: 2017-12-08 10:19 +0200 http://bitbucket.org/pypy/pypy/changeset/bf4ecad403eb/ Log: fix _codecs diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -575,8 +575,8 @@ digits = 4 if s[pos] == 'u' else 8 message = "truncated \\uXXXX" pos += 1 - pos, _, _ = hexescape(result, s, pos, digits, - "rawunicodeescape", errorhandler, message, errors) + pos, _ = hexescape(result, s, pos, digits, + "rawunicodeescape", errorhandler, message, errors) r = result.build() lgt = rutf8.check_utf8(r, True) From pypy.commits at gmail.com Fri Dec 8 06:11:05 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 08 Dec 2017 03:11:05 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: more fixes Message-ID: <5a2a7349.05d31c0a.1c8af.8229@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93310:e4ed73204961 Date: 2017-12-08 10:50 +0200 http://bitbucket.org/pypy/pypy/changeset/e4ed73204961/ Log: more fixes diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -451,7 +451,7 @@ """ if self.typecode == 'u': buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned()) - return space.newutf8(rffi.wcharpsize2unicode(buf, self.len)) + return space.newutf8(rffi.wcharpsize2utf8(buf, self.len), self.len) else: raise oefmt(space.w_ValueError, "tounicode() may only be called on type 'u' arrays") From pypy.commits at gmail.com Fri Dec 8 06:11:04 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 08 Dec 2017 03:11:04 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix _multibytecodec Message-ID: <5a2a7348.5dbf1c0a.b2c74.7eb0@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93309:affb72fc7cf7 Date: 2017-12-08 10:40 +0200 http://bitbucket.org/pypy/pypy/changeset/affb72fc7cf7/ Log: fix _multibytecodec diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -157,7 +157,7 @@ replace, end = errorcb(errors, namecb, reason, stringdata, start, end) # 'replace' is RPython unicode here - lgt, _ = rutf8.check_utf8(replace, True) + lgt = rutf8.get_utf8_length(replace) inbuf = rffi.utf82wcharp(replace, lgt) try: r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, lgt, end) @@ -268,7 +268,7 @@ rets, end = errorcb(errors, namecb, reason, unicodedata, start, end) codec = pypy_cjk_enc_getcodec(encodebuf) - lgt, _ = rutf8.get_utf8_length_flag(rets) + lgt = rutf8.get_utf8_length(rets) replace = encode(codec, rets, lgt, "strict", errorcb, namecb) with rffi.scoped_nonmovingbuffer(replace) as inbuf: r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end) diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py --- a/pypy/module/_multibytecodec/interp_incremental.py +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -66,7 +66,7 @@ pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf) assert 0 <= pos <= len(object) self.pending = object[pos:] - lgt = rutf8.get_utf8_length_flag(output) + lgt = rutf8.get_utf8_length(output) return space.newutf8(output, lgt) diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -27,8 +27,8 @@ raise wrap_unicodedecodeerror(space, e, input, self.name) except RuntimeError: raise wrap_runtimeerror(space) - lgt, flag = rutf8.check_utf8(utf8_output, True) - return space.newtuple([space.newutf8(utf8_output, lgt, flag), + lgt = rutf8.get_utf8_length(utf8_output) + return space.newtuple([space.newutf8(utf8_output, lgt), space.newint(len(input))]) @unwrap_spec(errors="text_or_none") diff --git a/pypy/module/_multibytecodec/test/test_translation.py b/pypy/module/_multibytecodec/test/test_translation.py --- a/pypy/module/_multibytecodec/test/test_translation.py +++ b/pypy/module/_multibytecodec/test/test_translation.py @@ -14,7 +14,7 @@ codecname, string = argv[1], argv[2] c = c_codecs.getcodec(codecname) u = c_codecs.decode(c, string) - lgt, _ = rutf8.get_utf8_length_flag(u) + lgt = rutf8.get_utf8_length(u) r = c_codecs.encode(c, u, lgt) print r return 0 From pypy.commits at gmail.com Fri Dec 8 06:23:31 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 08 Dec 2017 03:23:31 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: in-progress Message-ID: <5a2a7633.d2a1df0a.81b99.501f@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93311:336fb075d139 Date: 2017-12-08 12:22 +0100 http://bitbucket.org/pypy/pypy/changeset/336fb075d139/ Log: in-progress diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -13,7 +13,7 @@ # # Constants and exposed functions -from rpython.rlib.rsre import rsre_core +from rpython.rlib.rsre import rsre_core, rsre_utf8 from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, getlower, set_unicode_db @@ -40,7 +40,8 @@ end-start)) if isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string[start:end]) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + XXXXXXX s = ctx._unicodestr[start:end] lgt = rutf8.check_utf8(s, True) return space.newutf8(s, lgt) @@ -103,7 +104,7 @@ raise oefmt(space.w_TypeError, "cannot copy this pattern object") def make_ctx(self, w_string, pos=0, endpos=sys.maxint): - """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for + """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space if pos < 0: @@ -111,17 +112,26 @@ if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): - utf8str, length = space.utf8_len_w(w_string) - if pos >= length: + # xxx fish for the _index_storage + w_string = space.convert_arg_to_w_unicode(w_string) + utf8str = w_string._utf8 + length = w_string._len() + index_storage = w_string._get_index_storage() + # + if pos <= 0: + bytepos = 0 + elif pos >= length: bytepos = len(utf8str) else: - bytepos = rutf8.codepoint_at_index(..) - - pos = length + bytepos = rutf8.codepoint_at_index(utf8str, index_storage, pos) if endpos >= length: - endpos = length - return rsre_core.UnicodeMatchContext(self.code, unicodestr, - pos, endpos, self.flags) + endbytepos = len(utf8str) + else: + endbytepos = rutf8.codepoint_at_index(utf8str, index_storage, + endpos) + return rsre_utf8.Utf8MatchContext( + self.code, unicodestr, index_storage, + bytepos, endbytepos, self.flags) elif space.isinstance_w(w_string, space.w_bytes): str = space.bytes_w(w_string) if pos > len(str): @@ -372,7 +382,8 @@ if isinstance(ctx, rsre_core.StrMatchContext): assert strbuilder is not None return strbuilder.append_slice(ctx._string, start, end) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + XXXXXXX assert unicodebuilder is not None return unicodebuilder.append_slice(ctx._unicodestr, start, end) assert 0, "unreachable" @@ -578,7 +589,8 @@ return space.newbytes(ctx._buffer.as_str()) elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + XXXXXXXX lgt = rutf8.check_utf8(ctx._unicodestr, True) return space.newutf8(ctx._unicodestr, lgt) else: diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -55,6 +55,8 @@ specific subclass, calling 'func' is a direct call; if 'ctx' is only known to be of class AbstractMatchContext, calling 'func' is an indirect call. """ + from rpython.rlib.rsre.rsre_utf8 import Utf8MatchContext + assert func.func_code.co_varnames[0] == 'ctx' specname = '_spec_' + func.func_name while specname in _seen_specname: @@ -65,7 +67,8 @@ specialized_methods = [] for prefix, concreteclass in [('buf', BufMatchContext), ('str', StrMatchContext), - ('uni', UnicodeMatchContext)]: + ('uni', UnicodeMatchContext), + ('utf8', Utf8MatchContext)]: newfunc = func_with_new_name(func, prefix + specname) assert not hasattr(concreteclass, specname) setattr(concreteclass, specname, newfunc) diff --git a/rpython/rlib/rsre/rsre_jit.py b/rpython/rlib/rsre/rsre_jit.py --- a/rpython/rlib/rsre/rsre_jit.py +++ b/rpython/rlib/rsre/rsre_jit.py @@ -36,8 +36,10 @@ from rpython.rlib.rsre.rsre_core import BufMatchContext from rpython.rlib.rsre.rsre_core import StrMatchContext from rpython.rlib.rsre.rsre_core import UnicodeMatchContext + from rpython.rlib.rsre.rsre_utf8 import Utf8MatchContext for prefix, concreteclass in [('Buf', BufMatchContext), ('Str', StrMatchContext), - ('Uni', UnicodeMatchContext)]: + ('Uni', UnicodeMatchContext), + ('Utf8', Utf8MatchContext)]: jitdriver = RSreJitDriver(prefix + name, **kwds) setattr(concreteclass, 'jitdriver_' + name, jitdriver) diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py --- a/rpython/rlib/rsre/rsre_utf8.py +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -1,3 +1,4 @@ +import sys from rpython.rlib.debug import check_nonneg from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rsre.rsre_core import AbstractMatchContext, EndOfString @@ -7,9 +8,11 @@ class Utf8MatchContext(AbstractMatchContext): - def __init__(self, pattern, utf8string, match_start, end, flags): + def __init__(self, pattern, utf8string, index_storage, + match_start, end, flags): AbstractMatchContext.__init__(self, pattern, match_start, end, flags) self._utf8 = utf8string + self._index_storage = index_storage def str(self, index): check_nonneg(index) @@ -56,4 +59,32 @@ return position def slowly_convert_byte_pos_to_index(self, position): - + return rutf8.codepoint_index_at_byte_position( + self._utf8, self._index_storage, position) + + def debug_check_pos(self, position): + assert not (0x80 <= self._utf8[position] < 0xC0) # continuation byte + + +def utf8search(pattern, utf8string, index_storage=None, bytestart=0, + byteend=sys.maxint, flags=0): + # bytestart and byteend must be valid byte positions inside the + # utf8string. + from rpython.rlib.rsre.rsre_core import search_context + + assert 0 <= bytestart <= len(utf8string) + assert 0 <= byteend + if byteend > len(utf8string): + byteend = len(utf8string) + if index_storage is None: # should be restricted to tests only + length = rutf8.check_utf8(utf8string, allow_surrogates=True) + index_storage = rutf8.create_utf8_index_storage(utf8string, length) + ctx = Utf8MatchContext(pattern, utf8string, index_storage, + bytestart, byteend, flags) + if search_context(ctx): + return ctx + else: + return None + +def utf8match(*args, **kwds): + NOT_IMPLEMENTED From pypy.commits at gmail.com Fri Dec 8 06:45:36 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 08 Dec 2017 03:45:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Remove slowly_convert_byte_pos_to_index Message-ID: <5a2a7b60.841a1c0a.ad030.7f77@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93312:b58a53172e21 Date: 2017-12-08 12:44 +0100 http://bitbucket.org/pypy/pypy/changeset/b58a53172e21/ Log: Remove slowly_convert_byte_pos_to_index diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -159,9 +159,6 @@ def prev_n(self, position, n, start_position): raise NotImplementedError @not_rpython - def slowly_convert_byte_pos_to_index(self, position): - raise NotImplementedError - @not_rpython def debug_check_pos(self, position): raise NotImplementedError @not_rpython @@ -178,15 +175,13 @@ raise NotImplementedError def get_mark(self, gid): - mark = find_mark(self.match_marks, gid) - return self.slowly_convert_byte_pos_to_index(mark) + return find_mark(self.match_marks, gid) def flatten_marks(self): # for testing if self.match_marks_flat is None: self._compute_flattened_marks() - return [self.slowly_convert_byte_pos_to_index(i) - for i in self.match_marks_flat] + return self.match_marks_flat def _compute_flattened_marks(self): self.match_marks_flat = [self.match_start, self.match_end] @@ -249,9 +244,6 @@ raise EndOfString return position - def slowly_convert_byte_pos_to_index(self, position): - return position - def debug_check_pos(self, position): pass diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py --- a/rpython/rlib/rsre/rsre_utf8.py +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -3,16 +3,19 @@ from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rsre.rsre_core import AbstractMatchContext, EndOfString from rpython.rlib.rsre import rsre_char +from rpython.rlib.objectmodel import we_are_translated from rpython.rlib import rutf8 class Utf8MatchContext(AbstractMatchContext): + """A context that matches unicode, but encoded in a utf8 string. + Be careful because most positions taken by, handled in, and returned + by this class are expressed in *bytes*, not in characters. + """ - def __init__(self, pattern, utf8string, index_storage, - match_start, end, flags): + def __init__(self, pattern, utf8string, match_start, end, flags): AbstractMatchContext.__init__(self, pattern, match_start, end, flags) self._utf8 = utf8string - self._index_storage = index_storage def str(self, index): check_nonneg(index) @@ -58,16 +61,15 @@ assert position >= 0 return position - def slowly_convert_byte_pos_to_index(self, position): - return rutf8.codepoint_index_at_byte_position( - self._utf8, self._index_storage, position) - def debug_check_pos(self, position): + if we_are_translated(): + return + if position == len(self._utf8): + return # end of string is fine assert not (0x80 <= self._utf8[position] < 0xC0) # continuation byte -def utf8search(pattern, utf8string, index_storage=None, bytestart=0, - byteend=sys.maxint, flags=0): +def utf8search(pattern, utf8string, bytestart=0, byteend=sys.maxint, flags=0): # bytestart and byteend must be valid byte positions inside the # utf8string. from rpython.rlib.rsre.rsre_core import search_context @@ -76,11 +78,9 @@ assert 0 <= byteend if byteend > len(utf8string): byteend = len(utf8string) - if index_storage is None: # should be restricted to tests only - length = rutf8.check_utf8(utf8string, allow_surrogates=True) - index_storage = rutf8.create_utf8_index_storage(utf8string, length) - ctx = Utf8MatchContext(pattern, utf8string, index_storage, - bytestart, byteend, flags) + ctx = Utf8MatchContext(pattern, utf8string, bytestart, byteend, flags) + ctx.debug_check_pos(bytestart) + ctx.debug_check_pos(byteend) if search_context(ctx): return ctx else: diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py --- a/rpython/rlib/rsre/test/test_search.py +++ b/rpython/rlib/rsre/test/test_search.py @@ -12,19 +12,22 @@ assert res is None res = self.search(r_code1, "fooahcdixxx") assert res is not None - assert res.span() == (5, 8) + P = self.P + assert res.span() == (P(5), P(8)) def test_code2(self): r_code2 = get_code(r'\s*(.*?)') res = self.search(r_code2, "foo bar abcdef") assert res is not None - assert res.span() == (8, 34) + P = self.P + assert res.span() == (P(8), P(34)) def test_pure_literal(self): r_code3 = get_code(r'foobar') res = self.search(r_code3, "foo bar foobar baz") assert res is not None - assert res.span() == (8, 14) + P = self.P + assert res.span() == (P(8), P(14)) def test_code3(self): r_code1 = get_code(r'\s*(.*?)') @@ -79,34 +82,38 @@ r_code4 = get_code(r'(x.)') res = self.match(r_code4, 'xadef') assert res is not None - assert res.get_mark(0) == 5 - assert res.get_mark(1) == 7 + P = self.P + assert res.get_mark(0) == P(5) + assert res.get_mark(1) == P(7) def test_max_until_groups(self): r_code4 = get_code(r'(x.)*xy') res = self.match(r_code4, 'xaxbxydef') assert res is not None - assert res.get_mark(0) == 7 - assert res.get_mark(1) == 9 + P = self.P + assert res.get_mark(0) == P(7) + assert res.get_mark(1) == P(9) def test_group_branch(self): r_code5 = get_code(r'(ab|c)') res = self.match(r_code5, 'abdef') - assert (res.get_mark(0), res.get_mark(1)) == (5, 7) + P = self.P + assert (res.get_mark(0), res.get_mark(1)) == (P(5), P(7)) res = self.match(r_code5, 'cdef') - assert (res.get_mark(0), res.get_mark(1)) == (5, 6) + assert (res.get_mark(0), res.get_mark(1)) == (P(5), P(6)) res = self.match(r_code5, 'dedef') assert res is None def test_group_branch_max_until(self): r_code6 = get_code(r'(ab|c)*a') res = self.match(r_code6, 'ccabcccabadef') - assert (res.get_mark(0), res.get_mark(1)) == (12, 14) + P = self.P + assert (res.get_mark(0), res.get_mark(1)) == (P(12), P(14)) r_code7 = get_code(r'((ab)|(c))*a') res = self.match(r_code7, 'ccabcccabadef') - assert (res.get_mark(0), res.get_mark(1)) == (12, 14) - assert (res.get_mark(2), res.get_mark(3)) == (12, 14) - assert (res.get_mark(4), res.get_mark(5)) == (11, 12) + assert (res.get_mark(0), res.get_mark(1)) == (P(12), P(14)) + assert (res.get_mark(2), res.get_mark(3)) == (P(12), P(14)) + assert (res.get_mark(4), res.get_mark(5)) == (P(11), P(12)) def test_group_7(self): r_code7, r7 = get_code_and_re(r'((a)?(b))*') @@ -115,9 +122,10 @@ assert m.span(3) == (12, 13) assert m.span(2) == (8, 9) res = self.match(r_code7, 'bbbabbbb') - assert (res.get_mark(0), res.get_mark(1)) == (12, 13) - assert (res.get_mark(4), res.get_mark(5)) == (12, 13) - assert (res.get_mark(2), res.get_mark(3)) == (8, 9) + P = self.P + assert (res.get_mark(0), res.get_mark(1)) == (P(12), P(13)) + assert (res.get_mark(4), res.get_mark(5)) == (P(12), P(13)) + assert (res.get_mark(2), res.get_mark(3)) == (P(8), P(9)) def test_group_branch_repeat_complex_case(self): r_code8, r8 = get_code_and_re(r'((a)|(b))*') @@ -126,9 +134,10 @@ assert m.span(3) == (6, 7) assert m.span(2) == (5, 6) res = self.match(r_code8, 'ab') - assert (res.get_mark(0), res.get_mark(1)) == (6, 7) - assert (res.get_mark(4), res.get_mark(5)) == (6, 7) - assert (res.get_mark(2), res.get_mark(3)) == (5, 6) + P = self.P + assert (res.get_mark(0), res.get_mark(1)) == (P(6), P(7)) + assert (res.get_mark(4), res.get_mark(5)) == (P(6), P(7)) + assert (res.get_mark(2), res.get_mark(3)) == (P(5), P(6)) def test_minuntil_lastmark_restore(self): r_code9, r9 = get_code_and_re(r'(x|yz)+?(y)??c') @@ -136,7 +145,8 @@ assert m.span(1) == (3, 4) assert m.span(2) == (-1, -1) res = self.match(r_code9, 'xyzxc') - assert (res.get_mark(0), res.get_mark(1)) == (3, 4) + P = self.P + assert (res.get_mark(0), res.get_mark(1)) == (P(3), P(4)) assert (res.get_mark(2), res.get_mark(3)) == (-1, -1) def test_minuntil_bug(self): @@ -145,8 +155,9 @@ assert m.span(2) == (6, 7) #assert self.match.span(3) == (1, 2) --- bug of CPython res = self.match(r_code9, 'xycxyzxc') - assert (res.get_mark(2), res.get_mark(3)) == (6, 7) - assert (res.get_mark(4), res.get_mark(5)) == (1, 2) + P = self.P + assert (res.get_mark(2), res.get_mark(3)) == (P(6), P(7)) + assert (res.get_mark(4), res.get_mark(5)) == (P(1), P(2)) def test_empty_maxuntil(self): r_code, r = get_code_and_re(r'(a?)+y') @@ -155,7 +166,8 @@ res = self.match(r_code, 'y') assert res res = self.match(r_code, 'aaayaaay') - assert res and res.span() == (0, 4) + P = self.P + assert res and res.span() == (P(0), P(4)) # r_code, r = get_code_and_re(r'(a?){4,6}y') assert r.match('y') @@ -175,8 +187,9 @@ assert r.match('XfooXbarX').span() == (0, 5) assert r.match('XfooXbarX').span(1) == (4, 4) res = self.match(r_code, 'XfooXbarX') - assert res.span() == (0, 5) - assert res.span(1) == (4, 4) + P = self.P + assert res.span() == (P(0), P(5)) + assert res.span(1) == (P(4), P(4)) def test_empty_minuntil(self): r_code, r = get_code_and_re(r'(a?)+?y') @@ -206,8 +219,8 @@ assert match is not None assert match.span() == (ik, ik) assert res is not None - assert res.match_start == self.Position(ik) - assert res.match_end == self.Position(ik) + assert res.match_start == self.P(ik) + assert res.match_end == self.P(ik) else: assert match is None assert res is None @@ -216,14 +229,14 @@ class TestSearchCustom(BaseTestSearch): search = staticmethod(support.search) match = staticmethod(support.match) - Position = support.Position + P = support.Position class TestSearchStr(BaseTestSearch): search = staticmethod(rsre_core.search) match = staticmethod(rsre_core.match) - Position = staticmethod(lambda n: n) + P = staticmethod(lambda n: n) class TestSearchUtf8(BaseTestSearch): search = staticmethod(rsre_utf8.utf8search) match = staticmethod(rsre_utf8.utf8match) - Position = staticmethod(lambda n: n) # NB. only for plain ascii + P = staticmethod(lambda n: n) # NB. only for plain ascii From pypy.commits at gmail.com Fri Dec 8 06:52:55 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 08 Dec 2017 03:52:55 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: duh Message-ID: <5a2a7d17.05c41c0a.69432.8031@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93313:68c926785f51 Date: 2017-12-08 12:52 +0100 http://bitbucket.org/pypy/pypy/changeset/68c926785f51/ Log: duh diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py --- a/rpython/rlib/rsre/rsre_utf8.py +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -56,7 +56,7 @@ for i in range(n): if upos <= r_uint(start_position): raise EndOfString - upos = rutf8.next_codepoint_pos(self._utf8, upos) + upos = rutf8.prev_codepoint_pos(self._utf8, upos) position = intmask(upos) assert position >= 0 return position From pypy.commits at gmail.com Fri Dec 8 07:04:05 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 08 Dec 2017 04:04:05 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Fix test_search Message-ID: <5a2a7fb5.068a1c0a.26f6b.8e21@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93314:80ff594175dc Date: 2017-12-08 12:57 +0100 http://bitbucket.org/pypy/pypy/changeset/80ff594175dc/ Log: Fix test_search diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py --- a/rpython/rlib/rsre/rsre_utf8.py +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -68,23 +68,41 @@ return # end of string is fine assert not (0x80 <= self._utf8[position] < 0xC0) # continuation byte + def maximum_distance(self, position_low, position_high): + # may overestimate if there are non-ascii chars + return position_high - position_low + + +def make_utf8_ctx(pattern, utf8string, bytestart, byteend, flags): + if bytestart < 0: bytestart = 0 + elif bytestart > len(utf8string): bytestart = len(utf8string) + if byteend < 0: byteend = 0 + elif byteend > len(utf8string): byteend = len(utf8string) + ctx = Utf8MatchContext(pattern, utf8string, bytestart, byteend, flags) + ctx.debug_check_pos(bytestart) + ctx.debug_check_pos(byteend) + return ctx def utf8search(pattern, utf8string, bytestart=0, byteend=sys.maxint, flags=0): # bytestart and byteend must be valid byte positions inside the # utf8string. from rpython.rlib.rsre.rsre_core import search_context - assert 0 <= bytestart <= len(utf8string) - assert 0 <= byteend - if byteend > len(utf8string): - byteend = len(utf8string) - ctx = Utf8MatchContext(pattern, utf8string, bytestart, byteend, flags) - ctx.debug_check_pos(bytestart) - ctx.debug_check_pos(byteend) + ctx = make_utf8_ctx(pattern, utf8string, bytestart, byteend, flags) if search_context(ctx): return ctx else: return None -def utf8match(*args, **kwds): - NOT_IMPLEMENTED +def utf8match(pattern, utf8string, bytestart=0, byteend=sys.maxint, flags=0, + fullmatch=False): + # bytestart and byteend must be valid byte positions inside the + # utf8string. + from rpython.rlib.rsre.rsre_core import match_context + + ctx = make_utf8_ctx(pattern, utf8string, bytestart, byteend, flags) + ctx.fullmatch_only = fullmatch + if match_context(ctx): + return ctx + else: + return None From pypy.commits at gmail.com Fri Dec 8 07:04:07 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 08 Dec 2017 04:04:07 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Fix test_match Message-ID: <5a2a7fb7.90a9df0a.50226.9f4c@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93315:e2017b23843a Date: 2017-12-08 13:03 +0100 http://bitbucket.org/pypy/pypy/changeset/e2017b23843a/ Log: Fix test_match diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -207,13 +207,6 @@ return (-1, -1) return (fmarks[groupnum], fmarks[groupnum+1]) - def group(self, groupnum=0): - frm, to = self.span(groupnum) - if 0 <= frm <= to: - return self._string[frm:to] - else: - return None - def fresh_copy(self, start): raise NotImplementedError diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -54,12 +54,19 @@ raise EndOfString return Position(r) - def slowly_convert_byte_pos_to_index(self, position): + def _real_pos(self, position): if type(position) is int and position == -1: return -1 assert isinstance(position, Position) return position._p + def group(self, groupnum=0): + frm, to = self.span(groupnum) + if self.ZERO <= frm <= to: + return self._string[self._real_pos(frm):self._real_pos(to)] + else: + return None + def str(self, position): assert isinstance(position, Position) return ord(self._string[position._p]) diff --git a/rpython/rlib/rsre/test/test_match.py b/rpython/rlib/rsre/test/test_match.py --- a/rpython/rlib/rsre/test/test_match.py +++ b/rpython/rlib/rsre/test/test_match.py @@ -1,7 +1,7 @@ import re, random, py from rpython.rlib.rsre import rsre_char from rpython.rlib.rsre.rpy import get_code, VERSION -from rpython.rlib.rsre.test.support import match, fullmatch, Position +from rpython.rlib.rsre.test.support import match, fullmatch, Position as P def get_code_and_re(regexp): @@ -51,20 +51,20 @@ def test_assert(self): r = get_code(r"abc(?=def)(.)") res = match(r, "abcdefghi") - assert res is not None and res.get_mark(1) == 4 + assert res is not None and res.get_mark(1) == P(4) assert not match(r, "abcdeFghi") def test_assert_not(self): r = get_code(r"abc(?!def)(.)") res = match(r, "abcdeFghi") - assert res is not None and res.get_mark(1) == 4 + assert res is not None and res.get_mark(1) == P(4) assert not match(r, "abcdefghi") def test_lookbehind(self): r = get_code(r"([a-z]*)(?<=de)") assert match(r, "ade") res = match(r, "adefg") - assert res is not None and res.get_mark(1) == 3 + assert res is not None and res.get_mark(1) == P(3) assert not match(r, "abc") assert not match(r, "X") assert not match(r, "eX") @@ -75,13 +75,13 @@ assert res is not None return res.get_mark(1) r = get_code(r"([a-z]*)(?utf8 conversions in cpyext/unicodeobject.py Message-ID: <5a2a8eb2.41281c0a.391cd.948c@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93316:8cc0253e1ece Date: 2017-12-08 13:07 +0000 http://bitbucket.org/pypy/pypy/changeset/8cc0253e1ece/ Log: Some unicode>utf8 conversions in cpyext/unicodeobject.py diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,10 +1,11 @@ import sys -from pypy.interpreter.error import OperationError +from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize from rpython.rlib import rutf8 from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rstring import StringBuilder +from rpython.rtyper.lltypesystem import rffi from pypy.module._codecs import interp_codecs @specialize.memo() @@ -204,7 +205,7 @@ if c > 0x7F: errorhandler("strict", 'ascii', 'ordinal not in range(128)', utf8, - pos, pos + 1) + pos, pos + 1) j = rutf8.next_codepoint_pos(r, j) pos = newpos res.append(r) @@ -530,6 +531,19 @@ return builder.build(), pos, outsize +def wcharpsize2utf8(space, wcharp, size): + """Safe version of rffi.wcharpsize2utf8. + + Raises app-level ValueError if any wchar value is outside the valid + codepoint range. + """ + try: + return rffi.wcharpsize2utf8(wcharp, size) + except ValueError: + raise oefmt(space.w_ValueError, + "character is not in range [U+0000; U+10ffff]") + + # ____________________________________________________________ # Raw unicode escape diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -1,5 +1,9 @@ +from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.rlib import rstring, runicode +from rpython.tool.sourcetools import func_renamer + from pypy.interpreter.error import OperationError, oefmt -from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.interpreter.unicodehelper import wcharpsize2utf8 from pypy.module.unicodedata import unicodedb from pypy.module.cpyext.api import ( CANNOT_FAIL, Py_ssize_t, build_type_checkers_flags, cpython_api, @@ -13,8 +17,6 @@ from pypy.module.sys.interp_encoding import setdefaultencoding from pypy.module._codecs.interp_codecs import CodecState from pypy.objspace.std import unicodeobject -from rpython.rlib import rstring, runicode -from rpython.tool.sourcetools import func_renamer import sys ## See comment in bytesobject.py. @@ -61,10 +63,10 @@ def unicode_attach(space, py_obj, w_obj, w_userdata=None): "Fills a newly allocated PyUnicodeObject with a unicode string" py_unicode = rffi.cast(PyUnicodeObject, py_obj) - s = space.unicode_w(w_obj) - py_unicode.c_length = len(s) + s, length = space.utf8_len_w(w_obj) + py_unicode.c_length = length py_unicode.c_str = lltype.nullptr(rffi.CWCHARP.TO) - py_unicode.c_hash = space.hash_w(space.newunicode(s)) + py_unicode.c_hash = space.hash_w(space.newutf8(s, length)) py_unicode.c_defenc = lltype.nullptr(PyObject.TO) def unicode_realize(space, py_obj): @@ -73,11 +75,12 @@ be modified after this call. """ py_uni = rffi.cast(PyUnicodeObject, py_obj) - s = rffi.wcharpsize2unicode(py_uni.c_str, py_uni.c_length) + length = py_uni.c_length + s = wcharpsize2utf8(space, py_uni.c_str, length) w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type)) w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type) - w_obj.__init__(s) - py_uni.c_hash = space.hash_w(space.newunicode(s)) + w_obj.__init__(s, length) + py_uni.c_hash = space.hash_w(space.newutf8(s, length)) track_reference(space, py_obj, w_obj) return w_obj @@ -214,8 +217,8 @@ if not ref_unicode.c_str: # Copy unicode buffer w_unicode = from_ref(space, rffi.cast(PyObject, ref)) - u = space.unicode_w(w_unicode) - ref_unicode.c_str = rffi.unicode2wcharp(u) + u, length = space.utf8_len_w(w_unicode) + ref_unicode.c_str = rffi.utf82wcharp(u, length) return ref_unicode.c_str @cpython_api([PyObject], rffi.CWCHARP) @@ -335,8 +338,8 @@ Therefore, modification of the resulting Unicode object is only allowed when u is NULL.""" if wchar_p: - s = rffi.wcharpsize2unicode(wchar_p, length) - return make_ref(space, space.newunicode(s)) + s = wcharpsize2utf8(space, wchar_p, length) + return make_ref(space, space.newutf8(s, length)) else: return rffi.cast(PyObject, new_empty_unicode(space, length)) @@ -506,7 +509,8 @@ """Encode the Py_UNICODE buffer of the given size and return a Python string object. Return NULL if an exception was raised by the codec.""" - w_u = space.newunicode(rffi.wcharpsize2unicode(s, size)) + u = wcharpsize2utf8(space, s, size) + w_u = space.newutf8(u, size) if errors: w_errors = space.newtext(rffi.charp2str(errors)) else: @@ -706,12 +710,12 @@ """Return 1 if substr matches str[start:end] at the given tail end (direction == -1 means to do a prefix match, direction == 1 a suffix match), 0 otherwise. Return -1 if an error occurred.""" - str = space.unicode_w(w_str) - substr = space.unicode_w(w_substr) + w_start = space.newint(start) + w_end = space.newint(end) if rffi.cast(lltype.Signed, direction) <= 0: - return rstring.startswith(str, substr, start, end) + return space.call_method(w_str, "startswith", w_substr, w_start, w_end) else: - return rstring.endswith(str, substr, start, end) + return space.call_method(w_str, "endswith", w_substr, w_start, w_end) @cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t], Py_ssize_t, error=-1) def PyUnicode_Count(space, w_str, w_substr, start, end): From pypy.commits at gmail.com Fri Dec 8 09:45:34 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 08 Dec 2017 06:45:34 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fixes Message-ID: <5a2aa58e.dc361c0a.ac44c.aacf@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93317:5677dc1909e9 Date: 2017-12-08 14:45 +0000 http://bitbucket.org/pypy/pypy/changeset/5677dc1909e9/ Log: fixes diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py --- a/pypy/module/cpyext/longobject.py +++ b/pypy/module/cpyext/longobject.py @@ -4,6 +4,7 @@ CONST_STRING, ADDR, CANNOT_FAIL) from pypy.objspace.std.longobject import W_LongObject from pypy.interpreter.error import OperationError +from pypy.interpreter.unicodehelper import wcharpsize2utf8 from pypy.module.cpyext.intobject import PyInt_AsUnsignedLongMask from rpython.rlib.rbigint import rbigint @@ -191,7 +192,7 @@ string, length gives the number of characters, and base is the radix for the conversion. The radix must be in the range [2, 36]; if it is out of range, ValueError will be raised.""" - w_value = space.newunicode(rffi.wcharpsize2unicode(u, length)) + w_value = space.newutf8(wcharpsize2utf8(space, u, length), length) w_base = space.newint(rffi.cast(lltype.Signed, base)) return space.call_function(space.w_long, w_value, w_base) diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -246,7 +246,7 @@ the Python expression unicode(o). Called by the unicode() built-in function.""" if w_obj is None: - return space.newunicode(u"") + return space.newutf8("", 6) return space.call_function(space.w_unicode, w_obj) @cpython_api([PyObject, PyObject], rffi.INT_real, error=-1) @@ -302,7 +302,7 @@ if opid == Py_EQ: return 1 if opid == Py_NE: - return 0 + return 0 w_res = PyObject_RichCompare(space, w_o1, w_o2, opid_int) return int(space.is_true(w_res)) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -710,12 +710,17 @@ """Return 1 if substr matches str[start:end] at the given tail end (direction == -1 means to do a prefix match, direction == 1 a suffix match), 0 otherwise. Return -1 if an error occurred.""" + space.utf8_w(w_str) # type check + space.utf8_w(w_substr) w_start = space.newint(start) w_end = space.newint(end) if rffi.cast(lltype.Signed, direction) <= 0: - return space.call_method(w_str, "startswith", w_substr, w_start, w_end) + w_result = space.call_method( + w_str, "startswith", w_substr, w_start, w_end) else: - return space.call_method(w_str, "endswith", w_substr, w_start, w_end) + w_result = space.call_method( + w_str, "endswith", w_substr, w_start, w_end) + return space.int_w(w_result) @cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t], Py_ssize_t, error=-1) def PyUnicode_Count(space, w_str, w_substr, start, end): From pypy.commits at gmail.com Fri Dec 8 11:55:25 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 08 Dec 2017 08:55:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Fix PyUnicode_DecodeUTF16/32 Message-ID: <5a2ac3fd.0485df0a.eb104.2058@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93318:d53d8f486841 Date: 2017-12-08 16:53 +0000 http://bitbucket.org/pypy/pypy/changeset/d53d8f486841/ Log: Fix PyUnicode_DecodeUTF16/32 diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -3,7 +3,8 @@ from rpython.tool.sourcetools import func_renamer from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter.unicodehelper import wcharpsize2utf8 +from pypy.interpreter.unicodehelper import ( + wcharpsize2utf8, str_decode_utf_16_helper, str_decode_utf_32_helper) from pypy.module.unicodedata import unicodedb from pypy.module.cpyext.api import ( CANNOT_FAIL, Py_ssize_t, build_type_checkers_flags, cpython_api, @@ -568,15 +569,11 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_16_helper( - string, size, errors, - True, # final ? false for multiple passes? - None, # errorhandler - byteorder) + result, _, length, byteorder = str_decode_utf_16_helper( + string, errors, final=True, errorhandler=None, byteorder=byteorder) if pbyteorder is not None: pbyteorder[0] = rffi.cast(rffi.INT, byteorder) - - return space.newunicode(result) + return space.newutf8(result, length) @cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING, rffi.INTP], PyObject) def PyUnicode_DecodeUTF32(space, s, size, llerrors, pbyteorder): @@ -624,15 +621,11 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( - string, size, errors, - True, # final ? false for multiple passes? - None, # errorhandler - byteorder) + result, _, length, byteorder = str_decode_utf_32_helper( + string, errors, final=True, errorhandler=None, byteorder=byteorder) if pbyteorder is not None: pbyteorder[0] = rffi.cast(rffi.INT, byteorder) - - return space.newunicode(result) + return space.newutf8(result, length) @cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP, CONST_STRING], rffi.INT_real, error=-1) From pypy.commits at gmail.com Fri Dec 8 20:36:44 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 08 Dec 2017 17:36:44 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Add utf8-based replacement for runicode.unicode_encode_decimal() to unicodehelper and fix PyUnicode_EncodeDecimal() Message-ID: <5a2b3e2c.47b0df0a.ade9e.7f8a@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93319:ac75e33e51bb Date: 2017-12-09 01:36 +0000 http://bitbucket.org/pypy/pypy/changeset/ac75e33e51bb/ Log: Add utf8-based replacement for runicode.unicode_encode_decimal() to unicodehelper and fix PyUnicode_EncodeDecimal() diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,3 +1,4 @@ +import pytest from hypothesis import given, strategies from rpython.rlib import rutf8 @@ -5,6 +6,7 @@ from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii from pypy.interpreter import unicodehelper as uh +from pypy.module._codecs.interp_codecs import CodecState def decode_utf8(u): return str_decode_utf8(u, True, "strict", None) @@ -68,3 +70,16 @@ def test_unicode_escape(u): r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) assert r == u.encode("unicode-escape") + +def test_encode_decimal(space): + assert uh.unicode_encode_decimal(u' 12, 34 ', None) == ' 12, 34 ' + with pytest.raises(ValueError): + uh.unicode_encode_decimal(u' 12, \u1234 '.encode('utf8'), None) + state = space.fromcache(CodecState) + handler = state.encode_error_handler + assert uh.unicode_encode_decimal( + u'u\u1234\u1235v'.encode('utf8'), 'replace', handler) == 'u??v' + + result = uh.unicode_encode_decimal( + u'12\u1234'.encode('utf8'), 'xmlcharrefreplace', handler) + assert result == '12ሴ' diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -7,6 +7,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rtyper.lltypesystem import rffi from pypy.module._codecs import interp_codecs +from pypy.module.unicodedata import unicodedb @specialize.memo() def decode_error_handler(space): @@ -35,6 +36,16 @@ space.newtext(msg)])) return raise_unicode_exception_encode +def default_error_encode( + errors, encoding, msg, u, startingpos, endingpos): + """A default handler, for tests""" + assert endingpos >= 0 + if errors == 'replace': + return '?', endingpos + if errors == 'ignore': + return '', endingpos + raise ValueError + def convert_arg_to_w_unicode(space, w_arg, strict=None): return space.convert_arg_to_w_unicode(w_arg) @@ -1458,3 +1469,70 @@ pos = rutf8.next_codepoint_pos(s, pos) return result.build() +# ____________________________________________________________ +# Decimal Encoder +def unicode_encode_decimal(s, errors, errorhandler=None): + """Converts whitespace to ' ', decimal characters to their + corresponding ASCII digit and all other Latin-1 characters except + \0 as-is. Characters outside this range (Unicode ordinals 1-256) + are treated as errors. This includes embedded NULL bytes. + """ + if errorhandler is None: + errorhandler = default_error_encode + result = StringBuilder(len(s)) + pos = 0 + i = 0 + it = rutf8.Utf8StringIterator(s) + for ch in it: + if unicodedb.isspace(ch): + result.append(' ') + i += 1 + continue + try: + decimal = unicodedb.decimal(ch) + except KeyError: + pass + else: + result.append(chr(48 + decimal)) + i += 1 + continue + if 0 < ch < 256: + result.append(chr(ch)) + i += 1 + continue + # All other characters are considered unencodable + start_index = i + i += 1 + while not it.done(): + ch = rutf8.codepoint_at_pos(s, it.get_pos()) + try: + if (0 < ch < 256 or unicodedb.isspace(ch) or + unicodedb.decimal(ch) >= 0): + break + except KeyError: + # not a decimal + pass + if it.done(): + break + ch = next(it) + i += 1 + end_index = i + msg = "invalid decimal Unicode string" + r, pos = errorhandler( + errors, 'decimal', msg, s, start_index, end_index) + for ch in rutf8.Utf8StringIterator(r): + if unicodedb.isspace(ch): + result.append(' ') + continue + try: + decimal = unicodedb.decimal(ch) + except KeyError: + pass + else: + result.append(chr(48 + decimal)) + continue + if 0 < ch < 256: + result.append(chr(ch)) + continue + errorhandler('strict', 'decimal', msg, s, start_index, end_index) + return result.build() diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -4,7 +4,8 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.unicodehelper import ( - wcharpsize2utf8, str_decode_utf_16_helper, str_decode_utf_32_helper) + wcharpsize2utf8, str_decode_utf_16_helper, str_decode_utf_32_helper, + unicode_encode_decimal) from pypy.module.unicodedata import unicodedb from pypy.module.cpyext.api import ( CANNOT_FAIL, Py_ssize_t, build_type_checkers_flags, cpython_api, @@ -643,14 +644,13 @@ Returns 0 on success, -1 on failure. """ - u = rffi.wcharpsize2unicode(s, length) + u = rffi.wcharpsize2utf8(s, length) if llerrors: errors = rffi.charp2str(llerrors) else: errors = None state = space.fromcache(CodecState) - result = runicode.unicode_encode_decimal(u, length, errors, - state.encode_error_handler) + result = unicode_encode_decimal(u, errors, state.encode_error_handler) i = len(result) output[i] = '\0' i -= 1 From pypy.commits at gmail.com Fri Dec 8 21:15:56 2017 From: pypy.commits at gmail.com (mjacob) Date: Fri, 08 Dec 2017 18:15:56 -0800 (PST) Subject: [pypy-commit] pypy py3.6: hg merge py3.5 (+ fixes) Message-ID: <5a2b475c.8988df0a.3b9f5.7379@mx.google.com> Author: Manuel Jacob Branch: py3.6 Changeset: r93320:f04d4604c7e3 Date: 2017-12-09 03:14 +0100 http://bitbucket.org/pypy/pypy/changeset/f04d4604c7e3/ Log: hg merge py3.5 (+ fixes) I'm not 100% sure about the merge in test_dis.py, but most of the tests are failing anyway. diff too long, truncating to 2000 out of 12565 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -59,6 +59,7 @@ ^rpython/rlib/rvmprof/src/shared/libbacktrace/config.h$ ^rpython/rlib/rvmprof/src/shared/libbacktrace/config.log$ ^rpython/rlib/rvmprof/src/shared/libbacktrace/config.status$ +^pypy/tool/dest$ ^pypy/goal/pypy-translation-snapshot$ ^pypy/goal/pypy-c ^pypy/goal/pypy3-c diff --git a/_pytest/terminal.py b/_pytest/terminal.py --- a/_pytest/terminal.py +++ b/_pytest/terminal.py @@ -366,11 +366,11 @@ EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR, EXIT_NOTESTSCOLLECTED) if exitstatus in summary_exit_codes: - self.config.hook.pytest_terminal_summary(terminalreporter=self) self.summary_errors() self.summary_failures() self.summary_warnings() self.summary_passes() + self.config.hook.pytest_terminal_summary(terminalreporter=self) if exitstatus == EXIT_INTERRUPTED: self._report_keyboardinterrupt() del self._keyboardinterrupt_memo diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_bytes.py @@ -0,0 +1,84 @@ +from hypothesis import strategies as st +from hypothesis import given, example + +st_bytestring = st.binary() | st.binary().map(bytearray) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st_bytestring, st_bytestring) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_startswith_start(u, v, start): + expected = u[start:].startswith(v) if v else (start <= len(u)) + assert u.startswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st_bytestring, st_bytestring) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_endswith_2(u, v, start): + expected = u[start:].endswith(v) if v else (start <= len(u)) + assert u.endswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,48 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper +import os + +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) + + at st.composite +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) + + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data + textio = TextIOWrapper( + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) + lines = [] + for limit in limits: + line = textio.readline(limit) + if limit >= 0: + assert len(line) <= limit + if line: + lines.append(line) + elif limit: + break + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py --- a/extra_tests/test_unicode.py +++ b/extra_tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys import pytest from hypothesis import strategies as st from hypothesis import given, settings, example @@ -32,3 +33,89 @@ @given(s=st.text()) def test_composition(s, norm1, norm2, norm3): assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) + + at given(st.text(), st.text(), st.text()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.text(), st.text()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_startswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].startswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.startswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.text(), st.text()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_endswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].endswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.endswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py --- a/lib-python/2.7/inspect.py +++ b/lib-python/2.7/inspect.py @@ -40,6 +40,10 @@ import linecache from operator import attrgetter from collections import namedtuple +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # These constants are from Include/code.h. CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8 @@ -230,7 +234,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py --- a/lib-python/2.7/test/test_urllib2net.py +++ b/lib-python/2.7/test/test_urllib2net.py @@ -286,7 +286,7 @@ self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120) u.close() - FTP_HOST = 'ftp://ftp.debian.org/debian/' + FTP_HOST = 'ftp://www.pythontest.net/' def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py --- a/lib-python/2.7/warnings.py +++ b/lib-python/2.7/warnings.py @@ -43,11 +43,12 @@ unicodetype = unicode except NameError: unicodetype = () + template = "%s: %s: %s\n" try: message = str(message) except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) + template = unicode(template) + s = template % (lineno, category.__name__, message) line = linecache.getline(filename, lineno) if line is None else line if line: line = line.strip() diff --git a/lib-python/3/doctest.py b/lib-python/3/doctest.py --- a/lib-python/3/doctest.py +++ b/lib-python/3/doctest.py @@ -948,6 +948,8 @@ elif inspect.getmodule(object) is not None: return module is inspect.getmodule(object) elif inspect.isfunction(object): + if isinstance(object.__code__, inspect._builtin_code_type): + return True # XXX: A PyPy builtin - no way to tell return module.__dict__ is object.__globals__ elif inspect.ismethoddescriptor(object): if hasattr(object, '__objclass__'): diff --git a/lib-python/3/idlelib/CallTips.py b/lib-python/3/idlelib/CallTips.py --- a/lib-python/3/idlelib/CallTips.py +++ b/lib-python/3/idlelib/CallTips.py @@ -123,6 +123,15 @@ _first_param = re.compile(r'(?<=\()\w*\,?\s*') _default_callable_argspec = "See source or doc" +def _is_user_method(ob): + """Detect user methods on PyPy""" + return (isinstance(ob, types.MethodType) and + isinstance(ob.__code__, types.CodeType)) + +def _is_user_function(ob): + """Detect user methods on PyPy""" + return (isinstance(ob, types.FunctionType) and + isinstance(ob.__code__, types.CodeType)) def get_argspec(ob): '''Return a string describing the signature of a callable object, or ''. @@ -140,21 +149,21 @@ return argspec if isinstance(ob, type): fob = ob.__init__ - elif isinstance(ob_call, types.MethodType): + elif _is_user_method(ob_call): fob = ob_call else: fob = ob if (isinstance(fob, (types.FunctionType, types.MethodType)) and hasattr(fob.__code__, 'co_code')): # PyPy: not on argspec = inspect.formatargspec(*inspect.getfullargspec(fob)) - if (isinstance(ob, (type, types.MethodType)) or - isinstance(ob_call, types.MethodType)): + if (_is_user_method(ob) or _is_user_method(ob_call) or + (isinstance(ob, type) and _is_user_function(fob))): argspec = _first_param.sub("", argspec) lines = (textwrap.wrap(argspec, _MAX_COLS, subsequent_indent=_INDENT) if len(argspec) > _MAX_COLS else [argspec] if argspec else []) - if isinstance(ob_call, types.MethodType): + if _is_user_method(ob_call): doc = ob_call.__doc__ else: doc = getattr(ob, "__doc__", "") diff --git a/lib-python/3/idlelib/idle_test/test_calltips.py b/lib-python/3/idlelib/idle_test/test_calltips.py --- a/lib-python/3/idlelib/idle_test/test_calltips.py +++ b/lib-python/3/idlelib/idle_test/test_calltips.py @@ -63,7 +63,7 @@ gtest([].append, append_doc) gtest(List.append, append_doc) - gtest(types.MethodType, "method(function, instance)") + gtest(types.MethodType, "instancemethod(function, instance, class)") gtest(SB(), default_tip) def test_signature_wrap(self): diff --git a/lib-python/3/inspect.py b/lib-python/3/inspect.py --- a/lib-python/3/inspect.py +++ b/lib-python/3/inspect.py @@ -49,6 +49,10 @@ import builtins from operator import attrgetter from collections import namedtuple, OrderedDict +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # Create constants for the compiler flags in Include/code.h # We try to get them from dis to avoid duplication @@ -274,7 +278,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" @@ -1823,7 +1827,7 @@ kwdefaults = getattr(obj, '__kwdefaults__', _void) # ... and not None here annotations = getattr(obj, '__annotations__', None) - return (isinstance(code, types.CodeType) and + return (isinstance(code, (types.CodeType, _builtin_code_type)) and isinstance(name, str) and (defaults is None or isinstance(defaults, tuple)) and (kwdefaults is None or isinstance(kwdefaults, dict)) and @@ -2077,8 +2081,6 @@ s = getattr(func, "__text_signature__", None) if not s: - if func is object: # XXX PyPy hack until we support __text_signature__ - return '()' # in the same cases as CPython raise ValueError("no signature found for builtin {!r}".format(func)) return _signature_fromstr(cls, func, s, skip_bound_arg) diff --git a/lib-python/3/test/test_capi.py b/lib-python/3/test/test_capi.py --- a/lib-python/3/test/test_capi.py +++ b/lib-python/3/test/test_capi.py @@ -31,8 +31,9 @@ skips = [] if support.check_impl_detail(pypy=True): skips += [ - 'test_widechar', - ] + 'test_lazy_hash_inheritance', + 'test_capsule', + ] def testfunction(self): """some doc""" @@ -55,6 +56,8 @@ self.assertEqual(testfunction.attribute, "test") self.assertRaises(AttributeError, setattr, inst.testfunction, "attribute", "test") + @unittest.skipIf(support.check_impl_detail(pypy=True), + "doesn't crash on PyPy") @unittest.skipUnless(threading, 'Threading required for this test.') def test_no_FatalError_infinite_loop(self): with support.SuppressCrashReport(): @@ -207,9 +210,9 @@ else: with self.assertRaises(SystemError) as cm: _testcapi.return_null_without_error() + # PyPy change: different message self.assertRegex(str(cm.exception), - 'return_null_without_error.* ' - 'returned NULL without setting an error') + 'Function returned a NULL result without setting an exception') def test_return_result_with_error(self): # Issue #23571: A function must not return a result with an error set @@ -239,9 +242,9 @@ else: with self.assertRaises(SystemError) as cm: _testcapi.return_result_with_error() + # PyPy change: different message self.assertRegex(str(cm.exception), - 'return_result_with_error.* ' - 'returned a result with an error set') + 'An exception was set, but function returned a value') def test_buildvalue_N(self): _testcapi.test_buildvalue_N() @@ -329,6 +332,8 @@ self.pendingcalls_wait(l, n) + at unittest.skipIf(support.check_impl_detail(pypy=True), + "subinterpreters not implemented on PyPy") class SubinterpreterTest(unittest.TestCase): def test_subinterps(self): diff --git a/lib-python/3/test/test_cmd_line_script.py b/lib-python/3/test/test_cmd_line_script.py --- a/lib-python/3/test/test_cmd_line_script.py +++ b/lib-python/3/test/test_cmd_line_script.py @@ -43,11 +43,7 @@ _loader = __loader__ if __loader__ is BuiltinImporter else type(__loader__) print('__loader__==%a' % _loader) print('__file__==%a' % __file__) -if __cached__ is not None: - # XXX: test_script_compiled on PyPy - assertEqual(__file__, __cached__) - if not __cached__.endswith(('pyc', 'pyo')): - raise AssertionError('has __cached__ but not compiled') +print('__cached__==%a' % __cached__) print('__package__==%r' % __package__) # Check PEP 451 details import os.path @@ -238,9 +234,8 @@ def test_basic_script(self): with support.temp_dir() as script_dir: script_name = _make_test_script(script_dir, 'script') - package = '' if support.check_impl_detail(pypy=True) else None self._check_script(script_name, script_name, script_name, - script_dir, package, + script_dir, None, importlib.machinery.SourceFileLoader) def test_script_compiled(self): @@ -249,9 +244,8 @@ py_compile.compile(script_name, doraise=True) os.remove(script_name) pyc_file = support.make_legacy_pyc(script_name) - package = '' if support.check_impl_detail(pypy=True) else None self._check_script(pyc_file, pyc_file, - pyc_file, script_dir, package, + pyc_file, script_dir, None, importlib.machinery.SourcelessFileLoader) def test_directory(self): diff --git a/lib-python/3/test/test_cprofile.py b/lib-python/3/test/test_cprofile.py --- a/lib-python/3/test/test_cprofile.py +++ b/lib-python/3/test/test_cprofile.py @@ -1,7 +1,7 @@ """Test suite for the cProfile module.""" import sys -from test.support import run_unittest, TESTFN, unlink +from test.support import run_unittest, TESTFN, unlink, cpython_only # rip off all interesting stuff from test_profile import cProfile @@ -17,6 +17,7 @@ return _ProfileOutput # Issue 3895. + @cpython_only def test_bad_counter_during_dealloc(self): import _lsprof # Must use a file as StringIO doesn't trigger the bug. diff --git a/lib-python/3/test/test_descr.py b/lib-python/3/test/test_descr.py --- a/lib-python/3/test/test_descr.py +++ b/lib-python/3/test/test_descr.py @@ -4278,7 +4278,10 @@ c = C() c.__dict__[Evil()] = 0 - self.assertEqual(c.attr, 1) + try: + self.assertEqual(c.attr, 1) + except AttributeError: # when Evil.__eq__ is called twice + pass # this makes a crash more likely: support.gc_collect() self.assertNotHasAttr(c, 'attr') diff --git a/lib-python/3/test/test_dis.py b/lib-python/3/test/test_dis.py --- a/lib-python/3/test/test_dis.py +++ b/lib-python/3/test/test_dis.py @@ -146,24 +146,26 @@ 1) pass +# PyPy change: JUMP_IF_NOT_DEBUG dis_bug1333982 = """\ -%3d 0 LOAD_CONST 1 (0) - 2 POP_JUMP_IF_TRUE 26 - 4 LOAD_GLOBAL 0 (AssertionError) - 6 LOAD_CONST 2 ( at 0x..., file "%s", line %d>) - 8 LOAD_CONST 3 ('bug1333982..') - 10 MAKE_FUNCTION 0 - 12 LOAD_FAST 0 (x) - 14 GET_ITER - 16 CALL_FUNCTION 1 +%3d 0 JUMP_IF_NOT_DEBUG 26 (to 28) + 2 LOAD_CONST 1 (0) + 4 POP_JUMP_IF_TRUE 28 + 6 LOAD_GLOBAL 0 (AssertionError) + 8 LOAD_CONST 2 ( at 0x..., file "%s", line %d>) + 10 LOAD_CONST 3 ('bug1333982..') + 12 MAKE_FUNCTION 0 + 14 LOAD_FAST 0 (x) + 16 GET_ITER + 18 CALL_FUNCTION 1 -%3d 18 LOAD_CONST 4 (1) - 20 BINARY_ADD - 22 CALL_FUNCTION 1 - 24 RAISE_VARARGS 1 +%3d 20 LOAD_CONST 4 (1) + 22 BINARY_ADD + 24 CALL_FUNCTION 1 + 26 RAISE_VARARGS 1 -%3d >> 26 LOAD_CONST 0 (None) - 28 RETURN_VALUE +%3d >> 28 LOAD_CONST 0 (None) + 30 RETURN_VALUE """ % (bug1333982.__code__.co_firstlineno + 1, __file__, bug1333982.__code__.co_firstlineno + 1, diff --git a/lib-python/3/test/test_doctest.py b/lib-python/3/test/test_doctest.py --- a/lib-python/3/test/test_doctest.py +++ b/lib-python/3/test/test_doctest.py @@ -660,7 +660,7 @@ >>> import builtins >>> tests = doctest.DocTestFinder().find(builtins) - >>> lo, hi = (120, 140) if is_pypy else (790, 810) + >>> lo, hi = (420, 440) if is_pypy else (790, 810) >>> lo < len(tests) < hi # approximate number of objects with docstrings True >>> real_tests = [t for t in tests if len(t.examples) > 0] diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -32,6 +32,8 @@ from test import support from test.test_import import _ready_to_import +if check_impl_detail(): + import _pickle # Functions tested in this suite: @@ -372,6 +374,7 @@ self.assertEqual(inspect.getdoc(mod.FesteringGob.contradiction), 'The automatic gainsaying.') + @cpython_only # XXX: _finddoc() is broken on PyPy, but getdoc() seems OK @unittest.skipIf(MISSING_C_DOCSTRINGS, "test requires docstrings") def test_finddoc(self): finddoc = inspect._finddoc @@ -765,21 +768,23 @@ @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_getfullargspec_builtin_methods(self): - import _pickle - self.assertFullArgSpecEquals(_pickle.Pickler.dump, - args_e=['self', 'obj'], formatted='(self, obj)') - - self.assertFullArgSpecEquals(_pickle.Pickler(io.BytesIO()).dump, - args_e=['self', 'obj'], formatted='(self, obj)') + if check_impl_detail(): + self.assertFullArgSpecEquals(_pickle.Pickler.dump, + args_e=['self', 'obj'], formatted='(self, obj)') + + self.assertFullArgSpecEquals(_pickle.Pickler(io.BytesIO()).dump, + args_e=['self', 'obj'], formatted='(self, obj)') + + # platform-dependent on PyPy + default_fd = os.stat.__kwdefaults__['dir_fd'] self.assertFullArgSpecEquals( os.stat, args_e=['path'], kwonlyargs_e=['dir_fd', 'follow_symlinks'], - kwonlydefaults_e={'dir_fd': None, 'follow_symlinks': True}, - formatted='(path, *, dir_fd=None, follow_symlinks=True)') - - @cpython_only + kwonlydefaults_e={'dir_fd': default_fd, 'follow_symlinks': True}, + formatted='(path, *, dir_fd={}, follow_symlinks=True)'.format(default_fd)) + @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_getfullagrspec_builtin_func(self): @@ -788,7 +793,6 @@ spec = inspect.getfullargspec(builtin) self.assertEqual(spec.defaults[0], 'avocado') - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_getfullagrspec_builtin_func_no_signature(self): @@ -826,7 +830,9 @@ attrs = attrs_wo_objs(A) - self.assertIn(('__new__', 'method', object), attrs, 'missing __new__') + # changed in PyPy + self.assertIn(('__new__', 'static method', object), attrs, 'missing __new__') + self.assertIn(('__init__', 'method', object), attrs, 'missing __init__') self.assertIn(('s', 'static method', A), attrs, 'missing static method') @@ -1969,12 +1975,10 @@ ('kwargs', ..., int, "var_keyword")), ...)) - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_signature_on_builtins(self): import _testcapi - import _pickle def test_unbound_method(o): """Use this to test unbound methods (things that should have a self)""" @@ -2008,9 +2012,10 @@ # normal method # (PyMethodDescr_Type, "method_descriptor") - test_unbound_method(_pickle.Pickler.dump) - d = _pickle.Pickler(io.StringIO()) - test_callable(d.dump) + if check_impl_detail(): + test_unbound_method(_pickle.Pickler.dump) + d = _pickle.Pickler(io.StringIO()) + test_callable(d.dump) # static method test_callable(str.maketrans) @@ -2031,7 +2036,7 @@ # This doesn't work now. # (We don't have a valid signature for "type" in 3.4) - with self.assertRaisesRegex(ValueError, "no signature found"): + with self.assertRaisesRegex(ValueError, "signature"): class ThisWorksNow: __call__ = type test_callable(ThisWorksNow()) @@ -2043,7 +2048,6 @@ # Regression test for issue #20586 test_callable(_testcapi.docstring_with_signature_but_no_doc) - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_signature_on_decorated_builtins(self): @@ -2066,7 +2070,6 @@ follow_wrapped=False), inspect.signature(wrapper_like)) - @cpython_only def test_signature_on_builtins_no_signature(self): import _testcapi with self.assertRaisesRegex(ValueError, @@ -2642,10 +2645,10 @@ with self.assertRaisesRegex(ValueError, "callable.*is not supported"): self.assertEqual(inspect.signature(D), None) + @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_signature_on_builtin_class(self): - import _pickle self.assertEqual(str(inspect.signature(_pickle.Pickler)), '(file, protocol=None, fix_imports=True)') @@ -2891,10 +2894,10 @@ foo_sig = MySignature.from_callable(foo) self.assertTrue(isinstance(foo_sig, MySignature)) + @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_signature_from_callable_builtin_obj(self): - import _pickle class MySignature(inspect.Signature): pass sig = MySignature.from_callable(_pickle.Pickler) self.assertTrue(isinstance(sig, MySignature)) @@ -3453,7 +3456,6 @@ # This test case provides a home for checking that particular APIs # have signatures available for introspection - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_builtins_have_signatures(self): diff --git a/lib-python/3/test/test_io.py b/lib-python/3/test/test_io.py --- a/lib-python/3/test/test_io.py +++ b/lib-python/3/test/test_io.py @@ -1202,12 +1202,7 @@ b = bytearray(2*buffer_size) self.assertEqual(bufio.peek(3), b'fgh') self.assertEqual(rawio._reads, 3) - self.assertEqual(bufio.readinto1(b), 6) # fails because of - # an apparent inconsistency in CPython: readinto1(), if the - # buffered amount is smaller, would always issue one raw read() - # call. This differs from read1(), which if the buffered amount - # if smaller (but more than zero), would just return it without - # any raw read() call. In PyPy both have the behavior of read1(). + self.assertEqual(bufio.readinto1(b), 6) self.assertEqual(b[:6], b"fghjkl") self.assertEqual(rawio._reads, 4) diff --git a/lib-python/3/test/test_pydoc.py b/lib-python/3/test/test_pydoc.py --- a/lib-python/3/test/test_pydoc.py +++ b/lib-python/3/test/test_pydoc.py @@ -143,7 +143,7 @@  
Modules - +\x20\x20\x20\x20        
builtins

@@ -883,7 +883,7 @@ @requires_docstrings def test_unbound_builtin_method(self): self.assertEqual(self._get_summary_line(pickle.Pickler.dump), - "dump(self, obj, /)") + "dump(self, obj)") # these no longer include "self" def test_bound_python_method(self): @@ -912,13 +912,13 @@ s = StringIO() p = pickle.Pickler(s) self.assertEqual(self._get_summary_line(p.dump), - "dump(obj, /) method of _pickle.Pickler instance") + "dump(obj) method of pickle._Pickler instance") # this should *never* include self! @requires_docstrings def test_module_level_callable(self): self.assertEqual(self._get_summary_line(os.stat), - "stat(path, *, dir_fd=None, follow_symlinks=True)") + "stat(path, *, dir_fd=-100, follow_symlinks=True)") @unittest.skipUnless(threading, 'Threading required for this test.') diff --git a/lib-python/3/test/test_tracemalloc.py b/lib-python/3/test/test_tracemalloc.py --- a/lib-python/3/test/test_tracemalloc.py +++ b/lib-python/3/test/test_tracemalloc.py @@ -1,7 +1,6 @@ import contextlib import os import sys -import tracemalloc import unittest from unittest.mock import patch from test.support.script_helper import (assert_python_ok, assert_python_failure, @@ -17,6 +16,11 @@ _testcapi = None +try: + import tracemalloc +except ImportError: + raise unittest.SkipTest("tracemalloc is required") + EMPTY_STRING_SIZE = sys.getsizeof(b'') diff --git a/lib-python/3/traceback.py b/lib-python/3/traceback.py --- a/lib-python/3/traceback.py +++ b/lib-python/3/traceback.py @@ -566,8 +566,8 @@ yield ' {}\n'.format(badline.strip()) if offset is not None: caretspace = badline.rstrip('\n') - offset = min(len(caretspace), offset) - 1 - caretspace = caretspace[:offset].lstrip() + # bug in CPython: the case offset==0 is mishandled + caretspace = caretspace[:offset].lstrip()[:-1] # non-space whitespace (likes tabs) must be kept for alignment caretspace = ((c.isspace() and c or ' ') for c in caretspace) yield ' {}^\n'.format(''.join(caretspace)) diff --git a/lib_pypy/_cffi_ssl/_cffi_src/openssl/cryptography.py b/lib_pypy/_cffi_ssl/_cffi_src/openssl/cryptography.py --- a/lib_pypy/_cffi_ssl/_cffi_src/openssl/cryptography.py +++ b/lib_pypy/_cffi_ssl/_cffi_src/openssl/cryptography.py @@ -48,6 +48,9 @@ #else #define CRYPTOGRAPHY_IS_LIBRESSL 0 #endif + +#define CRYPTOGRAPHY_LIBRESSL_251_OR_GREATER \ + (CRYPTOGRAPHY_IS_LIBRESSL && LIBRESSL_VERSION_NUMBER >= 0x20501000) """ TYPES = """ diff --git a/lib_pypy/_cffi_ssl/_cffi_src/openssl/x509_vfy.py b/lib_pypy/_cffi_ssl/_cffi_src/openssl/x509_vfy.py --- a/lib_pypy/_cffi_ssl/_cffi_src/openssl/x509_vfy.py +++ b/lib_pypy/_cffi_ssl/_cffi_src/openssl/x509_vfy.py @@ -244,6 +244,14 @@ static const long X509_V_FLAG_SUITEB_192_LOS = 0; static const long X509_V_FLAG_SUITEB_128_LOS = 0; +#if CRYPTOGRAPHY_LIBRESSL_251_OR_GREATER +int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *, const char *, size_t); +int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *, const char *, size_t); +int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *, const unsigned char *, + size_t); +int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *, const char *); +void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *, unsigned int); +#else int (*X509_VERIFY_PARAM_set1_host)(X509_VERIFY_PARAM *, const char *, size_t) = NULL; int (*X509_VERIFY_PARAM_set1_email)(X509_VERIFY_PARAM *, const char *, @@ -254,6 +262,7 @@ void (*X509_VERIFY_PARAM_set_hostflags)(X509_VERIFY_PARAM *, unsigned int) = NULL; #endif +#endif /* OpenSSL 1.0.2+ or Solaris's backport */ #ifdef X509_V_FLAG_PARTIAL_CHAIN diff --git a/lib_pypy/_cffi_ssl/osx-roots.diff b/lib_pypy/_cffi_ssl/osx-roots.diff new file mode 100644 --- /dev/null +++ b/lib_pypy/_cffi_ssl/osx-roots.diff @@ -0,0 +1,475 @@ +diff -Naur libressl-2.6.2.orig/crypto/Makefile.am libressl-2.6.2/crypto/Makefile.am +--- libressl-2.6.2.orig/crypto/Makefile.am 2017-09-02 01:49:55.000000000 +0200 ++++ libressl-2.6.2/crypto/Makefile.am 2017-10-07 14:05:16.000000000 +0200 +@@ -92,7 +92,7 @@ + -mv crypto_portable.sym.tmp crypto_portable.sym + endif + +-libcrypto_la_LDFLAGS = -version-info @LIBCRYPTO_VERSION@ -no-undefined -export-symbols crypto_portable.sym ++libcrypto_la_LDFLAGS = -version-info @LIBCRYPTO_VERSION@ -no-undefined -export-symbols crypto_portable.sym -framework Security -framework CoreFoundation + libcrypto_la_LIBADD = libcompat.la + if !HAVE_EXPLICIT_BZERO + libcrypto_la_LIBADD += libcompatnoopt.la +@@ -863,6 +863,7 @@ + libcrypto_la_SOURCES += x509/x509_txt.c + libcrypto_la_SOURCES += x509/x509_v3.c + libcrypto_la_SOURCES += x509/x509_vfy.c ++libcrypto_la_SOURCES += x509/x509_vfy_apple.c + libcrypto_la_SOURCES += x509/x509_vpm.c + libcrypto_la_SOURCES += x509/x509cset.c + libcrypto_la_SOURCES += x509/x509name.c +diff -Naur libressl-2.6.2.orig/crypto/Makefile.in libressl-2.6.2/crypto/Makefile.in +--- libressl-2.6.2.orig/crypto/Makefile.in 2017-09-26 06:07:03.000000000 +0200 ++++ libressl-2.6.2/crypto/Makefile.in 2017-10-07 14:05:24.000000000 +0200 +@@ -426,20 +426,20 @@ + x509/x509_err.c x509/x509_ext.c x509/x509_lu.c x509/x509_obj.c \ + x509/x509_r2x.c x509/x509_req.c x509/x509_set.c \ + x509/x509_trs.c x509/x509_txt.c x509/x509_v3.c x509/x509_vfy.c \ +- x509/x509_vpm.c x509/x509cset.c x509/x509name.c \ +- x509/x509rset.c x509/x509spki.c x509/x509type.c x509/x_all.c \ +- x509v3/pcy_cache.c x509v3/pcy_data.c x509v3/pcy_lib.c \ +- x509v3/pcy_map.c x509v3/pcy_node.c x509v3/pcy_tree.c \ +- x509v3/v3_akey.c x509v3/v3_akeya.c x509v3/v3_alt.c \ +- x509v3/v3_bcons.c x509v3/v3_bitst.c x509v3/v3_conf.c \ +- x509v3/v3_cpols.c x509v3/v3_crld.c x509v3/v3_enum.c \ +- x509v3/v3_extku.c x509v3/v3_genn.c x509v3/v3_ia5.c \ +- x509v3/v3_info.c x509v3/v3_int.c x509v3/v3_lib.c \ +- x509v3/v3_ncons.c x509v3/v3_ocsp.c x509v3/v3_pci.c \ +- x509v3/v3_pcia.c x509v3/v3_pcons.c x509v3/v3_pku.c \ +- x509v3/v3_pmaps.c x509v3/v3_prn.c x509v3/v3_purp.c \ +- x509v3/v3_skey.c x509v3/v3_sxnet.c x509v3/v3_utl.c \ +- x509v3/v3err.c ++ x509/x509_vfy_apple.c x509/x509_vpm.c x509/x509cset.c \ ++ x509/x509name.c x509/x509rset.c x509/x509spki.c \ ++ x509/x509type.c x509/x_all.c x509v3/pcy_cache.c \ ++ x509v3/pcy_data.c x509v3/pcy_lib.c x509v3/pcy_map.c \ ++ x509v3/pcy_node.c x509v3/pcy_tree.c x509v3/v3_akey.c \ ++ x509v3/v3_akeya.c x509v3/v3_alt.c x509v3/v3_bcons.c \ ++ x509v3/v3_bitst.c x509v3/v3_conf.c x509v3/v3_cpols.c \ ++ x509v3/v3_crld.c x509v3/v3_enum.c x509v3/v3_extku.c \ ++ x509v3/v3_genn.c x509v3/v3_ia5.c x509v3/v3_info.c \ ++ x509v3/v3_int.c x509v3/v3_lib.c x509v3/v3_ncons.c \ ++ x509v3/v3_ocsp.c x509v3/v3_pci.c x509v3/v3_pcia.c \ ++ x509v3/v3_pcons.c x509v3/v3_pku.c x509v3/v3_pmaps.c \ ++ x509v3/v3_prn.c x509v3/v3_purp.c x509v3/v3_skey.c \ ++ x509v3/v3_sxnet.c x509v3/v3_utl.c x509v3/v3err.c + am__objects_27 = aes/libcrypto_la-aes-elf-x86_64.lo \ + aes/libcrypto_la-bsaes-elf-x86_64.lo \ + aes/libcrypto_la-vpaes-elf-x86_64.lo \ +@@ -759,11 +759,12 @@ + x509/libcrypto_la-x509_r2x.lo x509/libcrypto_la-x509_req.lo \ + x509/libcrypto_la-x509_set.lo x509/libcrypto_la-x509_trs.lo \ + x509/libcrypto_la-x509_txt.lo x509/libcrypto_la-x509_v3.lo \ +- x509/libcrypto_la-x509_vfy.lo x509/libcrypto_la-x509_vpm.lo \ +- x509/libcrypto_la-x509cset.lo x509/libcrypto_la-x509name.lo \ +- x509/libcrypto_la-x509rset.lo x509/libcrypto_la-x509spki.lo \ +- x509/libcrypto_la-x509type.lo x509/libcrypto_la-x_all.lo \ +- x509v3/libcrypto_la-pcy_cache.lo \ ++ x509/libcrypto_la-x509_vfy.lo \ ++ x509/libcrypto_la-x509_vfy_apple.lo \ ++ x509/libcrypto_la-x509_vpm.lo x509/libcrypto_la-x509cset.lo \ ++ x509/libcrypto_la-x509name.lo x509/libcrypto_la-x509rset.lo \ ++ x509/libcrypto_la-x509spki.lo x509/libcrypto_la-x509type.lo \ ++ x509/libcrypto_la-x_all.lo x509v3/libcrypto_la-pcy_cache.lo \ + x509v3/libcrypto_la-pcy_data.lo x509v3/libcrypto_la-pcy_lib.lo \ + x509v3/libcrypto_la-pcy_map.lo x509v3/libcrypto_la-pcy_node.lo \ + x509v3/libcrypto_la-pcy_tree.lo x509v3/libcrypto_la-v3_akey.lo \ +@@ -1000,7 +1001,7 @@ + $(ASM_X86_64_ELF) $(ASM_X86_64_MACOSX) + BUILT_SOURCES = crypto_portable.sym + CLEANFILES = crypto_portable.sym +-libcrypto_la_LDFLAGS = -version-info @LIBCRYPTO_VERSION@ -no-undefined -export-symbols crypto_portable.sym ++libcrypto_la_LDFLAGS = -version-info @LIBCRYPTO_VERSION@ -no-undefined -export-symbols crypto_portable.sym -framework Security -framework CoreFoundation + libcrypto_la_LIBADD = libcompat.la $(am__append_1) + libcrypto_la_CPPFLAGS = $(AM_CPPFLAGS) -DLIBRESSL_INTERNAL \ + -DOPENSSL_NO_HW_PADLOCK $(am__append_2) $(am__append_3) \ +@@ -1272,20 +1273,20 @@ + x509/x509_err.c x509/x509_ext.c x509/x509_lu.c x509/x509_obj.c \ + x509/x509_r2x.c x509/x509_req.c x509/x509_set.c \ + x509/x509_trs.c x509/x509_txt.c x509/x509_v3.c x509/x509_vfy.c \ +- x509/x509_vpm.c x509/x509cset.c x509/x509name.c \ +- x509/x509rset.c x509/x509spki.c x509/x509type.c x509/x_all.c \ +- x509v3/pcy_cache.c x509v3/pcy_data.c x509v3/pcy_lib.c \ +- x509v3/pcy_map.c x509v3/pcy_node.c x509v3/pcy_tree.c \ +- x509v3/v3_akey.c x509v3/v3_akeya.c x509v3/v3_alt.c \ +- x509v3/v3_bcons.c x509v3/v3_bitst.c x509v3/v3_conf.c \ +- x509v3/v3_cpols.c x509v3/v3_crld.c x509v3/v3_enum.c \ +- x509v3/v3_extku.c x509v3/v3_genn.c x509v3/v3_ia5.c \ +- x509v3/v3_info.c x509v3/v3_int.c x509v3/v3_lib.c \ +- x509v3/v3_ncons.c x509v3/v3_ocsp.c x509v3/v3_pci.c \ +- x509v3/v3_pcia.c x509v3/v3_pcons.c x509v3/v3_pku.c \ +- x509v3/v3_pmaps.c x509v3/v3_prn.c x509v3/v3_purp.c \ +- x509v3/v3_skey.c x509v3/v3_sxnet.c x509v3/v3_utl.c \ +- x509v3/v3err.c ++ x509/x509_vfy_apple.c x509/x509_vpm.c x509/x509cset.c \ ++ x509/x509name.c x509/x509rset.c x509/x509spki.c \ ++ x509/x509type.c x509/x_all.c x509v3/pcy_cache.c \ ++ x509v3/pcy_data.c x509v3/pcy_lib.c x509v3/pcy_map.c \ ++ x509v3/pcy_node.c x509v3/pcy_tree.c x509v3/v3_akey.c \ ++ x509v3/v3_akeya.c x509v3/v3_alt.c x509v3/v3_bcons.c \ ++ x509v3/v3_bitst.c x509v3/v3_conf.c x509v3/v3_cpols.c \ ++ x509v3/v3_crld.c x509v3/v3_enum.c x509v3/v3_extku.c \ ++ x509v3/v3_genn.c x509v3/v3_ia5.c x509v3/v3_info.c \ ++ x509v3/v3_int.c x509v3/v3_lib.c x509v3/v3_ncons.c \ ++ x509v3/v3_ocsp.c x509v3/v3_pci.c x509v3/v3_pcia.c \ ++ x509v3/v3_pcons.c x509v3/v3_pku.c x509v3/v3_pmaps.c \ ++ x509v3/v3_prn.c x509v3/v3_purp.c x509v3/v3_skey.c \ ++ x509v3/v3_sxnet.c x509v3/v3_utl.c x509v3/v3err.c + + # chacha + +@@ -2808,6 +2809,8 @@ + x509/$(DEPDIR)/$(am__dirstamp) + x509/libcrypto_la-x509_vfy.lo: x509/$(am__dirstamp) \ + x509/$(DEPDIR)/$(am__dirstamp) ++x509/libcrypto_la-x509_vfy_apple.lo: x509/$(am__dirstamp) \ ++ x509/$(DEPDIR)/$(am__dirstamp) + x509/libcrypto_la-x509_vpm.lo: x509/$(am__dirstamp) \ + x509/$(DEPDIR)/$(am__dirstamp) + x509/libcrypto_la-x509cset.lo: x509/$(am__dirstamp) \ +@@ -3583,6 +3586,7 @@ + @AMDEP_TRUE@@am__include@ @am__quote at x509/$(DEPDIR)/libcrypto_la-x509_txt.Plo at am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote at x509/$(DEPDIR)/libcrypto_la-x509_v3.Plo at am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote at x509/$(DEPDIR)/libcrypto_la-x509_vfy.Plo at am__quote@ ++ at AMDEP_TRUE@@am__include@ @am__quote at x509/$(DEPDIR)/libcrypto_la-x509_vfy_apple.Plo at am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote at x509/$(DEPDIR)/libcrypto_la-x509_vpm.Plo at am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote at x509/$(DEPDIR)/libcrypto_la-x509cset.Plo at am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote at x509/$(DEPDIR)/libcrypto_la-x509name.Plo at am__quote@ +@@ -7460,6 +7464,13 @@ + @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + @am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcrypto_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x509/libcrypto_la-x509_vfy.lo `test -f 'x509/x509_vfy.c' || echo '$(srcdir)/'`x509/x509_vfy.c + ++x509/libcrypto_la-x509_vfy_apple.lo: x509/x509_vfy_apple.c ++ at am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcrypto_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT x509/libcrypto_la-x509_vfy_apple.lo -MD -MP -MF x509/$(DEPDIR)/libcrypto_la-x509_vfy_apple.Tpo -c -o x509/libcrypto_la-x509_vfy_apple.lo `test -f 'x509/x509_vfy_apple.c' || echo '$(srcdir)/'`x509/x509_vfy_apple.c ++ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) x509/$(DEPDIR)/libcrypto_la-x509_vfy_apple.Tpo x509/$(DEPDIR)/libcrypto_la-x509_vfy_apple.Plo ++ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='x509/x509_vfy_apple.c' object='x509/libcrypto_la-x509_vfy_apple.lo' libtool=yes @AMDEPBACKSLASH@ ++ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ ++ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcrypto_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x509/libcrypto_la-x509_vfy_apple.lo `test -f 'x509/x509_vfy_apple.c' || echo '$(srcdir)/'`x509/x509_vfy_apple.c ++ + x509/libcrypto_la-x509_vpm.lo: x509/x509_vpm.c + @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcrypto_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT x509/libcrypto_la-x509_vpm.lo -MD -MP -MF x509/$(DEPDIR)/libcrypto_la-x509_vpm.Tpo -c -o x509/libcrypto_la-x509_vpm.lo `test -f 'x509/x509_vpm.c' || echo '$(srcdir)/'`x509/x509_vpm.c + @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) x509/$(DEPDIR)/libcrypto_la-x509_vpm.Tpo x509/$(DEPDIR)/libcrypto_la-x509_vpm.Plo +diff -Naur libressl-2.6.2.orig/crypto/x509/x509_vfy.c libressl-2.6.2/crypto/x509/x509_vfy.c +--- libressl-2.6.2.orig/crypto/x509/x509_vfy.c 2017-09-02 14:01:08.000000000 +0200 ++++ libressl-2.6.2/crypto/x509/x509_vfy.c 2017-10-07 14:05:16.000000000 +0200 +@@ -115,6 +115,13 @@ + + #define CRL_SCORE_TIME_DELTA 0x002 + ++/* ++ * If we are using Trust Evaluation Agent, rename the original function ++ */ ++#ifdef __APPLE__ ++#define X509_verify_cert X509_verify_cert_orig ++#endif ++ + static int null_callback(int ok, X509_STORE_CTX *e); + static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer); + static X509 *find_issuer(X509_STORE_CTX *ctx, STACK_OF(X509) *sk, X509 *x); +diff -Naur libressl-2.6.2.orig/crypto/x509/x509_vfy_apple.c libressl-2.6.2/crypto/x509/x509_vfy_apple.c +--- libressl-2.6.2.orig/crypto/x509/x509_vfy_apple.c 1970-01-01 01:00:00.000000000 +0100 ++++ libressl-2.6.2/crypto/x509/x509_vfy_apple.c 2017-10-07 14:05:16.000000000 +0200 +@@ -0,0 +1,225 @@ ++/* ++ * Copyright (c) 2009 Apple Inc. All Rights Reserved. ++ * ++ * @APPLE_LICENSE_HEADER_START@ ++ * ++ * This file contains Original Code and/or Modifications of Original Code ++ * as defined in and that are subject to the Apple Public Source License ++ * Version 2.0 (the 'License'). You may not use this file except in ++ * compliance with the License. Please obtain a copy of the License at ++ * http://www.opensource.apple.com/apsl/ and read it before using this ++ * file. ++ * ++ * The Original Code and all software distributed under the License are ++ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER ++ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, ++ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. ++ * Please see the License for the specific language governing rights and ++ * limitations under the License. ++ * ++ * @APPLE_LICENSE_HEADER_END@ ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++#include ++#include ++#include ++ ++#include "cryptlib.h" ++#include "vpm_int.h" ++#include "x509_vfy_apple.h" ++ ++#define TEA_might_correct_error(err) (err == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY || err == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT || err == X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN) ++ ++ ++static bool add_cert_to_array(CFMutableArrayRef array, X509 *x509) ++{ ++ unsigned char *asn1_cert_data = NULL; ++ int asn1_cert_len = i2d_X509(x509, &asn1_cert_data); ++ ++ CFDataRef data = CFDataCreate(kCFAllocatorDefault, asn1_cert_data, asn1_cert_len); ++ ++ if (data == NULL) { ++ return false; ++ } ++ ++ SecCertificateRef cert = SecCertificateCreateWithData(NULL, data); ++ ++ free(asn1_cert_data); ++ ++ if (cert == NULL) { ++ CFRelease(data); ++ return false; ++ } ++ ++ CFArrayAppendValue(array, cert); ++ CFRelease(data); ++ ++ return true; ++} ++ ++static CFStringRef to_string(const char *s) { ++ if (s == NULL) ++ return NULL; ++ return CFStringCreateWithCString(kCFAllocatorDefault, s, ++ kCFStringEncodingASCII); ++} ++ ++static SecPolicyRef get_policy(X509_VERIFY_PARAM *param) { ++ switch (param->purpose) { ++ case X509_PURPOSE_SSL_CLIENT: ++ case X509_PURPOSE_SSL_SERVER: { ++ ++ if (!param->id) { ++ fprintf(stderr, "got no ID!\n"); ++ return NULL; ++ } ++ ++ CFStringRef hostname; ++ int nhosts = sk_OPENSSL_STRING_num(param->id->hosts); ++ ++ if (nhosts != 1) { ++ hostname = NULL; ++ ++ } else { ++ hostname = to_string(sk_OPENSSL_STRING_value(param->id->hosts, 0)); ++ CFShow(hostname); ++ } ++ ++ return SecPolicyCreateSSL(param->purpose == X509_PURPOSE_SSL_SERVER, ++ hostname); ++ } ++ ++ case X509_PURPOSE_NS_SSL_SERVER: ++ case X509_PURPOSE_SMIME_SIGN: ++ case X509_PURPOSE_SMIME_ENCRYPT: ++ case X509_PURPOSE_CRL_SIGN: ++ case X509_PURPOSE_ANY: ++ case X509_PURPOSE_OCSP_HELPER: ++ case X509_PURPOSE_TIMESTAMP_SIGN: ++ default: ++ fprintf(stderr, "unsupported purpose %d", param->purpose); ++ return NULL; ++ } ++} ++ ++/* ++ * Please see comment in x509_vfy_apple.h ++ */ ++int ++X509_verify_cert(X509_STORE_CTX *ctx) ++{ ++ uint64_t certLastIndex = 0; ++ uint64_t i = 0; ++ ++ /* Try OpenSSL, if we get a local certificate issue verify against trusted roots */ ++ int ret = X509_verify_cert_orig(ctx); ++ ++ /* Verify TEA is enabled and should be used. */ ++ if (0 == X509_TEA_is_enabled() || ++ ret == 1 || !TEA_might_correct_error(ctx->error)) { ++ return ret; ++ } ++ ++ /* Verify that the certificate chain exists, otherwise make it. */ ++ if (ctx->chain == NULL && (ctx->chain = sk_X509_new_null()) == NULL) { ++ fprintf(stderr, "Could not create the certificate chain"); ++ ctx->error = X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY; ++ return -1; ++ } ++ ++ /* Verify chain depth */ ++ certLastIndex = sk_X509_num(ctx->untrusted); ++ if (certLastIndex > ctx->param->depth) { ++ fprintf(stderr, "Pruning certificate chain to %" PRIu64, certLastIndex); ++ certLastIndex = ctx->param->depth; ++ } ++ ++ CFMutableArrayRef certArray = CFArrayCreateMutable(NULL, certLastIndex + 1, NULL); ++ CFRetain(certArray); ++ ++ if (!add_cert_to_array(certArray, ctx->cert)) { ++ fprintf(stderr, "Failed to add certificate to array"); ++ CFRelease(certArray); ++ ctx->error = X509_V_ERR_UNSPECIFIED; ++ return -1; ++ } ++ ++ for (i = 0; i < certLastIndex; ++i) { ++ X509 *t = sk_X509_value(ctx->untrusted, i); ++ if (!add_cert_to_array(certArray, t)) { ++ fprintf(stderr, "Failed to add chain certificate %lld to array", i); ++ CFRelease(certArray); ++ ctx->error = X509_V_ERR_UNSPECIFIED; ++ return 0; ++ } ++ } ++ ++ // We put ASN.1 encoded X509 on the CertificateChain, so we don't call TEACertificateChainSetEncodingHandler ++ SecPolicyRef policy = get_policy(ctx->param); ++ ++ if (policy == NULL) { ++ fprintf(stderr, "Failed to create policy!\n"); ++ CFRelease(certArray); ++ ctx->error = X509_V_ERR_UNSPECIFIED; ++ return -1; ++ } ++ ++ SecTrustRef trust = NULL; ++ ++ if (SecTrustCreateWithCertificates(certArray, policy, &trust) != errSecSuccess) { ++ fprintf(stderr, "Failed to create trust!\n"); ++ CFRelease(certArray); ++ ctx->error = X509_V_ERR_CERT_UNTRUSTED; ++ return -1; ++ } ++ ++ if (ctx->param->flags & X509_V_FLAG_USE_CHECK_TIME) { ++ fprintf(stderr, "Setting time not supported yet?\n"); ++ SecTrustSetVerifyDate(trust, CFDateCreate(NULL, ctx->param->check_time)); ++ } ++ ++ SecTrustResultType result = 0; ++ ++ if (SecTrustEvaluate(trust, &result) != errSecSuccess || result != kSecTrustResultUnspecified) { ++ CFRelease(certArray); ++ ctx->error = X509_V_ERR_CERT_UNTRUSTED; ++ return 0; ++ } ++ ++ CFRelease(certArray); ++ ctx->error = 0; ++ return 1; ++} ++ ++#pragma mark Trust Evaluation Agent ++ ++/* -1: not set ++ * 0: set to false ++ * 1: set to true ++ */ ++static int tea_enabled = -1; ++ ++void ++X509_TEA_set_state(int change) ++{ ++ tea_enabled = (change) ? 1 : 0; ++} ++ ++int ++X509_TEA_is_enabled() ++{ ++ if (tea_enabled < 0) ++ tea_enabled = (NULL == getenv(X509_TEA_ENV_DISABLE)); ++ ++ return tea_enabled != 0; ++} +diff -Naur libressl-2.6.2.orig/crypto/x509/x509_vfy_apple.h libressl-2.6.2/crypto/x509/x509_vfy_apple.h +--- libressl-2.6.2.orig/crypto/x509/x509_vfy_apple.h 1970-01-01 01:00:00.000000000 +0100 ++++ libressl-2.6.2/crypto/x509/x509_vfy_apple.h 2017-10-07 14:05:16.000000000 +0200 +@@ -0,0 +1,74 @@ ++/* ++ * Copyright (c) 2009 Apple Inc. All Rights Reserved. ++ * ++ * @APPLE_LICENSE_HEADER_START@ ++ * ++ * This file contains Original Code and/or Modifications of Original Code ++ * as defined in and that are subject to the Apple Public Source License ++ * Version 2.0 (the 'License'). You may not use this file except in ++ * compliance with the License. Please obtain a copy of the License at ++ * http://www.opensource.apple.com/apsl/ and read it before using this ++ * file. ++ * ++ * The Original Code and all software distributed under the License are ++ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER ++ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, ++ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. ++ * Please see the License for the specific language governing rights and ++ * limitations under the License. ++ * ++ * @APPLE_LICENSE_HEADER_END@ ++ * ++ */ ++ ++#ifndef HEADER_X509_H ++#include ++#endif ++ ++#ifndef HEADER_X509_VFY_APPLE_H ++#define HEADER_X509_VFY_APPLE_H ++ ++/* Environment variable name to disable TEA. */ ++#define X509_TEA_ENV_DISABLE "OPENSSL_X509_TEA_DISABLE" ++ ++/* ++ * X509_verify_cert ++ * ++ * Originally located in x509_vfy.c. ++ * ++ * Verify certificate with OpenSSL created X509_verify_cert. If and only if ++ * OpenSSL cannot get certificate issuer locally then OS X security API will ++ * verify the certificate, using Trust Evaluation Agent. ++ * ++ * Return values: ++ * -------------- ++ * -1: Null was passed for either ctx or ctx->cert. ++ * 0: Certificate is trusted. ++ * 1: Certificate is not trusted. ++ */ ++int X509_verify_cert(X509_STORE_CTX *ctx); ++ ++/* ++ * X509_TEA_is_enabled ++ * ++ * Is the Trust Evaluation Agent (TEA) used for certificate verification when ++ * the issuer cannot be verified. ++ * ++ * Returns 0 if TEA is disabled and 1 if TEA is enabled. ++ */ ++int X509_TEA_is_enabled(); ++ ++/* ++ * X509_TEA_set_state ++ * ++ * Enables/disables certificate verification with Trust Evaluation Agent (TEA) ++ * when the issuer cannot be verified. ++ * ++ * Pass 0 to disable TEA and non-zero to enable TEA. ++ */ ++void X509_TEA_set_state(int change); ++ ++int X509_verify_cert_orig(X509_STORE_CTX *ctx); ++ ++#endif /* HEADER_X509_VFY_APPLE_H */ diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_pypy_testcapi.py b/lib_pypy/_pypy_testcapi.py --- a/lib_pypy/_pypy_testcapi.py +++ b/lib_pypy/_pypy_testcapi.py @@ -8,7 +8,8 @@ content = fid.read() # from cffi's Verifier() key = '\x00'.join([sys.version[:3], content]) - key += 'cpyext-gc-support-2' # this branch requires recompilation! + # change the key to force recompilation + key += '2017-11-21' if sys.version_info >= (3,): key = key.encode('utf-8') k1 = hex(binascii.crc32(key[0::2]) & 0xffffffff) diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -17,6 +17,12 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/lib_pypy/_testcapimodule.c b/lib_pypy/_testcapimodule.c --- a/lib_pypy/_testcapimodule.c +++ b/lib_pypy/_testcapimodule.c @@ -915,12 +915,6 @@ return -1; } Py_DECREF(res); - if (Py_REFCNT(arg) != 1) { - PyErr_Format(TestError, "test_buildvalue_N: " - "arg was not decrefed in successful " - "Py_BuildValue(\"%s\")", fmt); - return -1; - } Py_INCREF(arg); res = Py_BuildValue(fmt, raise_error, NULL, arg); @@ -930,12 +924,6 @@ return -1; } PyErr_Clear(); - if (Py_REFCNT(arg) != 1) { - PyErr_Format(TestError, "test_buildvalue_N: " - "arg was not decrefed in failed " - "Py_BuildValue(\"%s\")", fmt); - return -1; - } Py_DECREF(arg); return 0; } @@ -958,10 +946,6 @@ return raiseTestError("test_buildvalue_N", "Py_BuildValue(\"N\") returned wrong result"); } - if (Py_REFCNT(arg) != 2) { - return raiseTestError("test_buildvalue_N", - "arg was not decrefed in Py_BuildValue(\"N\")"); - } Py_DECREF(res); Py_DECREF(arg); @@ -2834,8 +2818,6 @@ return PyMemoryView_FromBuffer(&info); } -#ifndef PYPY_VERSION - static PyObject * test_from_contiguous(PyObject* self, PyObject *noargs) { @@ -2885,7 +2867,6 @@ Py_RETURN_NONE; } -#endif /* PYPY_VERSION */ #if (defined(__linux__) || defined(__FreeBSD__)) && defined(__GNUC__) && !defined(PYPY_VERSION) extern PyTypeObject _PyBytesIOBuffer_Type; @@ -3923,9 +3904,7 @@ {"test_string_to_double", (PyCFunction)test_string_to_double, METH_NOARGS}, {"test_unicode_compare_with_ascii", (PyCFunction)test_unicode_compare_with_ascii, METH_NOARGS}, {"test_capsule", (PyCFunction)test_capsule, METH_NOARGS}, -#ifndef PYPY_VERSION {"test_from_contiguous", (PyCFunction)test_from_contiguous, METH_NOARGS}, -#endif #if (defined(__linux__) || defined(__FreeBSD__)) && defined(__GNUC__) && !defined(PYPY_VERSION) {"test_pep3118_obsolete_write_locks", (PyCFunction)test_pep3118_obsolete_write_locks, METH_NOARGS}, #endif diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py --- a/lib_pypy/_tkinter/app.py +++ b/lib_pypy/_tkinter/app.py @@ -185,6 +185,9 @@ if err == tklib.TCL_ERROR: self.raiseTclError() + def interpaddr(self): + return int(tkffi.cast('size_t', self.interp)) + def _var_invoke(self, func, *args, **kwargs): if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread(): # The current thread is not the interpreter thread. diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -182,6 +182,57 @@ technical difficulties. +What about numpy, numpypy, micronumpy? +-------------------------------------- + +Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy. It +has two pieces: + + * the builtin module :source:`pypy/module/micronumpy`: this is written in + RPython and roughly covers the content of the ``numpy.core.multiarray`` + module. Confusingly enough, this is available in PyPy under the name + ``_numpypy``. It is included by default in all the official releases of + PyPy (but it might be dropped in the future). + + * a fork_ of the official numpy repository maintained by us and informally + called ``numpypy``: even more confusing, the name of the repo on bitbucket + is ``numpy``. The main difference with the upstream numpy, is that it is + based on the micronumpy module written in RPython, instead of of + ``numpy.core.multiarray`` which is written in C. + +Moreover, it is also possible to install the upstream version of ``numpy``: +its core is written in C and it runs on PyPy under the cpyext compatibility +layer. This is what you get if you do ``pypy -m pip install numpy``. + + +Should I install numpy or numpypy? +----------------------------------- + +TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip +install numpy``. You might also be interested in using the experimental `PyPy +binary wheels`_ to save compilation time. + +The upstream ``numpy`` is written in C, and runs under the cpyext +compatibility layer. Nowadays, cpyext is mature enough that you can simply +use the upstream ``numpy``, since it passes 99.9% of the test suite. At the +moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext +is infamously slow, and thus it has worse performance compared to +``numpypy``. However, we are actively working on improving it, as we expect to +reach the same speed, eventually. + +On the other hand, ``numpypy`` is more JIT-friendly and very fast to call, +since it is written in RPython: but it is a reimplementation, and it's hard to +be completely compatible: over the years the project slowly matured and +eventually it was able to call out to the LAPACK and BLAS libraries to speed +matrix calculations, and reached around an 80% parity with the upstream +numpy. However, 80% is far from 100%. Since cpyext/numpy compatibility is +progressing fast, we have discontinued support for ``numpypy``. + +.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html +.. _fork: https://bitbucket.org/pypy/numpy +.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels + + Is PyPy more clever than CPython about Tail Calls? -------------------------------------------------- diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -10,3 +10,22 @@ .. branch: docs-osx-brew-openssl +.. branch: keep-debug-symbols +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -11,3 +11,8 @@ .. branch: py3.5-appexec Raise if space.is_true(space.appexec()) used in app level tests, fix tests that did this + +.. branch: py3.5-mac-embedding +Download and patch dependencies when building cffi-based stdlib modules + +.. branch: os_lockf diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy3' '*/bin/libpypy3-c.so'" + binfiles = "'*/bin/pypy3*' '*/bin/libpypy3-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -352,8 +352,9 @@ def hack_for_cffi_modules(self, driver): # HACKHACKHACK - # ugly hack to modify target goal from compile_* to build_cffi_imports - # this should probably get cleaned up and merged with driver.create_exe + # ugly hack to modify target goal from compile_* to build_cffi_imports, + # as done in package.py + # this is needed by the benchmark buildbot run, maybe do it as a seperate step there? from rpython.tool.runsubprocess import run_subprocess from rpython.translator.driver import taskdef import types @@ -363,11 +364,14 @@ def task_build_cffi_imports(self): ''' Use cffi to compile cffi interfaces to modules''' filename = os.path.join(pypydir, 'tool', 'build_cffi_imports.py') + if sys.platform == 'darwin': + argv = [filename, '--embed-dependencies'] + else: + argv = [filename,] status, out, err = run_subprocess(str(driver.compute_exe_name()), - [filename]) + argv) sys.stdout.write(out) sys.stderr.write(err) - # otherwise, ignore errors driver.task_build_cffi_imports = types.MethodType(task_build_cffi_imports, driver) driver.tasks['build_cffi_imports'] = driver.task_build_cffi_imports, [compile_goal] driver.default_goal = 'build_cffi_imports' diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -579,6 +579,8 @@ __pypy__.save_module_content_for_future_reload(sys) mainmodule = type(sys)('__main__') + mainmodule.__loader__ = sys.__loader__ + mainmodule.__builtins__ = os.__builtins__ sys.modules['__main__'] = mainmodule if not no_site: @@ -727,7 +729,7 @@ SourceFileLoader, SourcelessFileLoader) if IS_WINDOWS: filename = filename.lower() - if filename.endswith('.pyc') or filename.endswith('.pyo'): + if filename.endswith('.pyc'): # We don't actually load via SourcelessFileLoader # because '__main__' must not be listed inside # 'importlib._bootstrap._module_locks' (it deadlocks diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py --- a/pypy/interpreter/mixedmodule.py +++ b/pypy/interpreter/mixedmodule.py @@ -254,7 +254,7 @@ assert typ == imp.PY_SOURCE source = file.read() file.close() - if fn.endswith('.pyc') or fn.endswith('.pyo'): + if fn.endswith('.pyc'): fn = fn[:-1] app = gateway.applevel(source, filename=fn, modname=appname) applevelcache[impbase] = app diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py --- a/pypy/interpreter/pyparser/error.py +++ b/pypy/interpreter/pyparser/error.py @@ -14,7 +14,20 @@ def wrap_info(self, space): w_text = w_filename = space.w_None offset = self.offset - if self.text is not None: + w_lineno = space.newint(self.lineno) + if self.filename is not None: + w_filename = space.newfilename(self.filename) + if self.text is None and self.filename is not None: + w_text = space.appexec([w_filename, w_lineno], + """(filename, lineno): + try: + with open(filename) as f: + for _ in range(lineno - 1): + f.readline() + return f.readline() + except: # we can't allow any exceptions here! + return None""") + elif self.text is not None: from rpython.rlib.runicode import str_decode_utf_8 # self.text may not be UTF-8 in case of decoding errors. # adjust the encoded text offset to a decoded offset @@ -29,20 +42,15 @@ text, _ = str_decode_utf_8(self.text, len(self.text), 'replace') w_text = space.newunicode(text) - if self.filename is not None: - w_filename = space.newfilename(self.filename) - return space.newtuple([space.newtext(self.msg), - space.newtuple([w_filename, - space.newint(self.lineno), - space.newint(offset), - w_text, - space.newint(self.lastlineno)])]) + return space.newtuple([ + space.newtext(self.msg), + space.newtuple([ + w_filename, w_lineno, space.newint(offset), + w_text, space.newint(self.lastlineno)])]) def __str__(self): - return "%s at pos (%d, %d) in %r" % (self.__class__.__name__, - self.lineno, - self.offset, - self.text) + return "%s at pos (%d, %d) in %r" % ( + self.__class__.__name__, self.lineno, self.offset, self.text) class IndentationError(SyntaxError): pass @@ -51,10 +59,11 @@ def __init__(self, lineno=0, offset=0, text=None, filename=None, lastlineno=0): msg = "inconsistent use of tabs and spaces in indentation" - IndentationError.__init__(self, msg, lineno, offset, text, filename, lastlineno) + IndentationError.__init__( + self, msg, lineno, offset, text, filename, lastlineno) class ASTError(Exception): - def __init__(self, msg, ast_node ): + def __init__(self, msg, ast_node): self.msg = msg self.ast_node = ast_node diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -18,7 +18,7 @@ class TestBuiltinCode: - def test_signature(self): + def test_signature(self, space): def c(space, w_x, w_y, hello_w): pass code = gateway.BuiltinCode(c, unwrap_spec=[gateway.ObjSpace, @@ -53,6 +53,8 @@ code = gateway.BuiltinCode(f, unwrap_spec=[gateway.ObjSpace, "kwonly", W_Root]) assert code.signature() == Signature([], kwonlyargnames=['x']) + assert space.int_w(space.getattr( + code, space.newtext('co_kwonlyargcount'))) == 1 def test_call(self): diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py --- a/pypy/interpreter/typedef.py +++ b/pypy/interpreter/typedef.py @@ -16,7 +16,7 @@ @not_rpython def __init__(self, __name, __base=None, __total_ordering__=None, __buffer=None, __confirm_applevel_del__=False, - variable_sized=False, **rawdict): + _text_signature_=None, variable_sized=False, **rawdict): "initialization-time only" self.name = __name if __base is None: @@ -36,6 +36,7 @@ assert '__del__' not in rawdict self.weakrefable = '__weakref__' in rawdict self.doc = rawdict.get('__doc__', None) + self.text_signature = _text_signature_ for base in bases: self.hasdict |= base.hasdict self.weakrefable |= base.weakrefable @@ -539,6 +540,9 @@ def fget_co_argcount(space, code): # unwrapping through unwrap_spec return space.newint(code.signature().num_argnames()) +def fget_co_kwonlyargcount(space, code): # unwrapping through unwrap_spec + return space.newint(code.signature().num_kwonlyargnames()) + def fget_zero(space, code): return space.newint(0) @@ -598,7 +602,7 @@ co_name = interp_attrproperty('co_name', cls=BuiltinCode, wrapfn="newtext_or_none"), co_varnames = GetSetProperty(fget_co_varnames, cls=BuiltinCode), co_argcount = GetSetProperty(fget_co_argcount, cls=BuiltinCode), - co_kwonlyargcount = GetSetProperty(fget_zero, cls=BuiltinCode), + co_kwonlyargcount = GetSetProperty(fget_co_kwonlyargcount, cls=BuiltinCode), co_flags = GetSetProperty(fget_co_flags, cls=BuiltinCode), co_consts = GetSetProperty(fget_co_consts, cls=BuiltinCode), ) diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -8,6 +8,35 @@ cls.w_translated = cls.space.wrap( os.path.join(os.path.dirname(__file__), 'test_translated.py')) + cls.w_stack = cls.space.appexec([], """(): + import sys + def stack(f=None): + ''' + get the call-stack of the caller or the specified frame + ''' + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame cycle + res.append('...') + break + if f.f_code.co_name == 'runtest': + # if we are running with -A, cut all the stack above + # the test function + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + return stack + """) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) def test_new_empty(self): from _continuation import continulet @@ -339,17 +368,24 @@ def test_f_back(self): import sys from _continuation import continulet + stack = self.stack # def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) + # + assert stack() == ['bar', 'foo', 'main', 'test_f_back'] c.switch(sys._getframe(1).f_back) + # + assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) def foo(c): bar(c) # + assert stack() == ['test_f_back'] c = continulet(foo) f1_bar = c.switch() assert f1_bar.f_code.co_name == 'bar' @@ -358,14 +394,20 @@ f3_foo = c.switch() assert f3_foo is f2_foo assert f1_bar.f_back is f3_foo + # def main(): f4_main = c.switch() assert f4_main.f_code.co_name == 'main' assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main', 'test_f_back'] + assert stack(f1_bar) == ['bar', 'foo', '...'] + # def main2(): From pypy.commits at gmail.com Fri Dec 8 21:44:43 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 08 Dec 2017 18:44:43 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: translation fixes Message-ID: <5a2b4e1b.02431c0a.11cb5.1d46@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93321:598f10607a50 Date: 2017-12-09 02:44 +0000 http://bitbucket.org/pypy/pypy/changeset/598f10607a50/ Log: translation fixes diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -3,6 +3,7 @@ from rpython.rlib.objectmodel import specialize, always_inline, r_dict from rpython.rlib import rfloat, runicode, rutf8 from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib.rarithmetic import r_uint from pypy.interpreter.error import oefmt from pypy.interpreter import unicodehelper @@ -366,7 +367,7 @@ return # help the annotator to know that we'll never go beyond # this point # - utf8_ch = rutf8.unichr_as_utf8(val, allow_surrogates=True) + utf8_ch = rutf8.unichr_as_utf8(r_uint(val), allow_surrogates=True) builder.append(utf8_ch) return i @@ -400,7 +401,7 @@ break elif ch == '\\' or ch < '\x20': self.pos = i-1 - return self.space.unicode_w(self.decode_string_escaped(start)) + return self.decode_string_escaped(start) strhash = intmask((1000003 * strhash) ^ ord(ll_chars[i])) bits |= ord(ch) length = i - start - 1 diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -128,7 +128,7 @@ intval: lltype.Signed """ self.error(w_ffitype, w_obj) - + def handle_unichar(self, w_ffitype, w_obj, intval): """ intval: lltype.Signed @@ -174,7 +174,7 @@ def handle_struct_rawffi(self, w_ffitype, w_structinstance): """ This method should be killed as soon as we remove support for _rawffi structures - + w_structinstance: W_StructureInstance """ self.error(w_ffitype, w_structinstance) @@ -228,7 +228,7 @@ return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): wcharval = self.get_unichar(w_ffitype) - return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1) + return space.newutf8(rutf8.unichr_as_utf8(r_uint(wcharval)), 1) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): @@ -349,7 +349,7 @@ def get_struct_rawffi(self, w_ffitype, w_structdescr): """ This should be killed as soon as we kill support for _rawffi structures - + Return type: lltype.Unsigned (the address of the structure) """ diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -596,9 +596,9 @@ return space.w_None wcharp_addr = rffi.cast(rffi.CWCHARP, address) if maxlength == -1: - s = rffi.wcharp2utf8(wcharp_addr) + s = rffi.wcharp2unicode(wcharp_addr) else: - s = rffi.wcharpsize2utf8(wcharp_addr, maxlength) + s = rffi.wcharp2unicoden(wcharp_addr, maxlength) return space.newunicode(s) @unwrap_spec(address=r_uint, maxlength=int) diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1,7 +1,7 @@ from rpython.rlib import jit, rgc, rutf8 from rpython.rlib.buffer import RawBuffer from rpython.rlib.objectmodel import keepalive_until_here -from rpython.rlib.rarithmetic import ovfcheck, widen +from rpython.rlib.rarithmetic import ovfcheck, widen, r_uint from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem import lltype, rffi @@ -1013,7 +1013,7 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - code = ord(item) + code = r_uint(ord(item)) return space.newutf8(rutf8.unichr_as_utf8(code), 1) assert 0, "unreachable" diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -483,7 +483,7 @@ except rutf8.CheckError: from pypy.interpreter import unicodehelper # get the correct error msg - unicodehelper.str_decode_utf8(s, len(s), 'string', True, + unicodehelper.str_decode_utf8(s, 'string', True, unicodehelper.decode_error_handler(space)) assert False, "always raises" else: diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -3,7 +3,7 @@ from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import specialize -from rpython.rlib.rarithmetic import INT_MAX +from rpython.rlib.rarithmetic import INT_MAX, r_uint from rpython.rlib.rfloat import DTSF_ALT, formatd, isnan, isinf from rpython.rlib.rstring import StringBuilder from rpython.rlib.unroll import unrolling_iterable @@ -330,7 +330,7 @@ space = self.space if do_unicode: cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1) - w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1) + w_s = space.newutf8(rutf8.unichr_as_utf8(r_uint(cp)), 1) else: cp = ord(self.fmt[self.fmtpos - 1]) w_s = space.newbytes(chr(cp)) @@ -466,7 +466,7 @@ n = space.int_w(w_value) if do_unicode: try: - c = rutf8.unichr_as_utf8(n) + c = rutf8.unichr_as_utf8(r_uint(n)) except ValueError: raise oefmt(space.w_OverflowError, "unicode character code out of range") From pypy.commits at gmail.com Fri Dec 8 21:46:49 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 08 Dec 2017 18:46:49 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-test: hg merge unicode-utf8 Message-ID: <5a2b4e99.90aa1c0a.7ca66.4065@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-test Changeset: r93322:33d09fc56c08 Date: 2017-12-08 13:28 +0000 http://bitbucket.org/pypy/pypy/changeset/33d09fc56c08/ Log: hg merge unicode-utf8 diff too long, truncating to 2000 out of 3186 lines diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -9,5 +9,6 @@ * remove assertions from W_UnicodeObject.__init__ if all the builders pass * what to do with error handlers that go backwards. There were tests in test_codecs that would check for that +* improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1087,8 +1087,11 @@ def newlist_utf8(self, list_u, is_ascii): l_w = [None] * len(list_u) for i, item in enumerate(list_u): - length, flag = rutf8.check_utf8(item, True) - l_w[i] = self.newutf8(item, length, flag) + if not is_ascii: + length = rutf8.check_utf8(item, True) + else: + length = len(item) + l_w[i] = self.newutf8(item, length) return self.newlist(l_w) def newlist_int(self, list_i): diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -64,8 +64,8 @@ r = unicodehelper.decode_raw_unicode_escape(space, substr) else: r = unicodehelper.decode_unicode_escape(space, substr) - v, length, flag = r - return space.newutf8(v, length, flag) + v, length = r + return space.newutf8(v, length) need_encoding = (encoding is not None and encoding != "utf-8" and encoding != "utf8" and @@ -74,8 +74,8 @@ substr = s[ps : q] if rawmode or '\\' not in s[ps:]: if need_encoding: - lgt, flag = unicodehelper.check_utf8_or_raise(space, substr) - w_u = space.newutf8(substr, lgt, flag) + lgt = unicodehelper.check_utf8_or_raise(space, substr) + w_u = space.newutf8(substr, lgt) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: @@ -234,8 +234,8 @@ p = ps while p < end and ord(s[p]) & 0x80: p += 1 - lgt, flag = unicodehelper.check_utf8_or_raise(space, s, ps, p) - w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt, flag), + lgt = unicodehelper.check_utf8_or_raise(space, s, ps, p) + w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt), recode_encoding) v = space.bytes_w(w_v) return v, p diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -10,13 +10,13 @@ return str_decode_utf8(u, True, "strict", None) def test_decode_utf8(): - assert decode_utf8("abc") == ("abc", 3, 3, rutf8.FLAG_ASCII) - assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1, rutf8.FLAG_REGULAR) - assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES) - assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES) + assert decode_utf8("abc") == ("abc", 3, 3) + assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1) + assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1) + assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 3, 1) assert decode_utf8("\xed\xa0\x80\xed\xb0\x80") == ( - "\xed\xa0\x80\xed\xb0\x80", 6, 2, rutf8.FLAG_HAS_SURROGATES) - assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 4, 1, rutf8.FLAG_REGULAR) + "\xed\xa0\x80\xed\xb0\x80", 6, 2) + assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 4, 1) def test_utf8_encode_ascii(): assert utf8_encode_ascii("abc", "??", "??") == "abc" @@ -41,19 +41,19 @@ assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") def test_str_decode_ascii(): - assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3, rutf8.FLAG_ASCII) + assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3) def eh(errors, encoding, reason, p, start, end): lst.append((errors, encoding, p, start, end)) return u"\u1234\u5678".encode("utf8"), end lst = [] input = "\xe8" exp = u"\u1234\u5678".encode("utf8") - assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2, rutf8.FLAG_REGULAR) + assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2) assert lst == [("??", "ascii", input, 0, 1)] lst = [] input = "\xe8\xe9abc\xea\xeb" assert str_decode_ascii(input, "??", True, eh) == ( - exp + exp + "abc" + exp + exp, 7, 11, rutf8.FLAG_REGULAR) + exp + exp + "abc" + exp + exp, 7, 11) assert lst == [("??", "ascii", input, 0, 1), ("??", "ascii", input, 1, 2), ("??", "ascii", input, 5, 6), diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,11 +1,11 @@ import sys -from pypy.interpreter.error import OperationError +from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize from rpython.rlib import rutf8 -from rpython.rlib.rutf8 import combine_flags from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rstring import StringBuilder +from rpython.rtyper.lltypesystem import rffi from pypy.module._codecs import interp_codecs @specialize.memo() @@ -26,10 +26,10 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): - u_len, flag = rutf8.check_utf8(utf8, True) + u_len = rutf8.check_utf8(utf8, True) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - space.newutf8(utf8, u_len, flag), + space.newutf8(utf8, u_len), space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) @@ -55,18 +55,18 @@ def decode_unicode_escape(space, string): state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) - result_utf8, consumed, length, flag = str_decode_unicode_escape( + result_utf8, consumed, length = str_decode_unicode_escape( string, "strict", final=True, errorhandler=decode_error_handler(space), ud_handler=unicodedata_handler) - return result_utf8, length, flag + return result_utf8, length def decode_raw_unicode_escape(space, string): - result_utf8, consumed, lgt, flag = str_decode_raw_unicode_escape( + result_utf8, consumed, lgt = str_decode_raw_unicode_escape( string, "strict", final=True, errorhandler=decode_error_handler(space)) - return result_utf8, lgt, flag + return result_utf8, lgt def check_ascii_or_raise(space, string): try: @@ -83,19 +83,19 @@ # you still get two surrogate unicode characters in the result. # These are the Python2 rules; Python3 differs. try: - length, flag = rutf8.check_utf8(string, True, start, end) + length = rutf8.check_utf8(string, True, start, end) except rutf8.CheckError as e: # convert position into unicode position - lgt, flags = rutf8.check_utf8(string, True, start, stop=e.pos) + lgt = rutf8.check_utf8(string, True, start, stop=e.pos) decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string, start + lgt, start + lgt + 1) assert False, "unreachable" - return length, flag + return length def str_decode_ascii(s, errors, final, errorhandler): try: rutf8.check_ascii(s) - return s, len(s), len(s), rutf8.FLAG_ASCII + return s, len(s), len(s) except rutf8.CheckError: return _str_decode_ascii_slowpath(s, errors, final, errorhandler) @@ -112,13 +112,13 @@ res.append(ch) i += 1 ress = res.build() - lgt, flag = rutf8.check_utf8(ress, True) - return ress, len(s), lgt, flag + lgt = rutf8.check_utf8(ress, True) + return ress, len(s), lgt def str_decode_latin_1(s, errors, final, errorhandler): try: rutf8.check_ascii(s) - return s, len(s), len(s), rutf8.FLAG_ASCII + return s, len(s), len(s) except rutf8.CheckError: return _str_decode_latin_1_slowpath(s, errors, final, errorhandler) @@ -138,7 +138,7 @@ res.append_slice(s, start, end) i = end # cannot be ASCII, cannot have surrogates, I believe - return res.build(), len(s), len(s), rutf8.FLAG_REGULAR + return res.build(), len(s), len(s) def utf8_encode_latin_1(s, errors, errorhandler): try: @@ -149,37 +149,32 @@ def _utf8_encode_latin_1_slowpath(s, errors, errorhandler): res = StringBuilder(len(s)) - size = len(s) cur = 0 - i = 0 - while i < size: - if ord(s[i]) <= 0x7F: - res.append(s[i]) - i += 1 + iter = rutf8.Utf8StringIterator(s) + while True: + try: + ch = iter.next() + except StopIteration: + break + if ch <= 0xFF: + res.append(chr(ch)) cur += 1 else: - oc = rutf8.codepoint_at_pos(s, i) - if oc <= 0xFF: - res.append(chr(oc)) - cur += 1 - i = rutf8.next_codepoint_pos(s, i) - else: - r, pos = errorhandler(errors, 'latin1', - 'ordinal not in range(256)', s, cur, - cur + 1) - for j in range(pos - cur): - i = rutf8.next_codepoint_pos(s, i) + r, pos = errorhandler(errors, 'latin1', + 'ordinal not in range(256)', s, cur, + cur + 1) - j = 0 - while j < len(r): - c = rutf8.codepoint_at_pos(r, j) - if c > 0xFF: - errorhandler("strict", 'latin1', - 'ordinal not in range(256)', s, - cur, cur + 1) - j = rutf8.next_codepoint_pos(r, j) - res.append(chr(c)) - cur = pos + for c in rutf8.Utf8StringIterator(r): + if c > 0xFF: + errorhandler("strict", 'latin1', + 'ordinal not in range(256)', s, + cur, cur + 1) + res.append(chr(c)) + + for j in range(pos - cur - 1): + iter.next() + + cur = pos r = res.build() return r @@ -210,7 +205,7 @@ if c > 0x7F: errorhandler("strict", 'ascii', 'ordinal not in range(128)', utf8, - pos, pos + 1) + pos, pos + 1) j = rutf8.next_codepoint_pos(r, j) pos = newpos res.append(r) @@ -341,8 +336,7 @@ res.append(r) r = res.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + return r, pos, rutf8.check_utf8(r, True) hexdigits = "0123456789ABCDEFabcdef" @@ -355,7 +349,7 @@ endinpos += 1 res, pos = errorhandler(errors, encoding, message, s, pos-2, endinpos) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: try: @@ -366,7 +360,7 @@ endinpos += 1 res, pos = errorhandler(errors, encoding, message, s, pos-2, endinpos) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: # when we get here, chr is a 32-bit unicode character @@ -376,21 +370,19 @@ message = "illegal Unicode character" res, pos = errorhandler(errors, encoding, message, s, pos-2, pos+digits) - size, flag = rutf8.check_utf8(res, True) + size = rutf8.check_utf8(res, True) builder.append(res) else: - flag = rutf8.get_flag_from_code(intmask(chr)) pos += digits size = 1 - return pos, size, flag + return pos, size def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 - flag = rutf8.FLAG_ASCII builder = StringBuilder(size) pos = 0 outsize = 0 @@ -401,7 +393,6 @@ if ch != '\\': if ord(ch) > 0x7F: rutf8.unichr_as_utf8_append(builder, ord(ch)) - flag = combine_flags(rutf8.FLAG_REGULAR, flag) else: builder.append(ch) pos += 1 @@ -414,9 +405,8 @@ message = "\\ at end of string" res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, size) - newsize, newflag = rutf8.check_utf8(res, True) + newsize = rutf8.check_utf8(res, True) outsize + newsize - flag = combine_flags(flag, newflag) builder.append(res) continue @@ -469,7 +459,6 @@ outsize += 1 if x > 0x7F: rutf8.unichr_as_utf8_append(builder, x) - flag = combine_flags(rutf8.FLAG_REGULAR, flag) else: builder.append(chr(x)) # hex escapes @@ -477,27 +466,24 @@ elif ch == 'x': digits = 2 message = "truncated \\xXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \uXXXX elif ch == 'u': digits = 4 message = "truncated \\uXXXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \UXXXXXXXX elif ch == 'U': digits = 8 message = "truncated \\UXXXXXXXX escape" - pos, newsize, newflag = hexescape(builder, s, pos, digits, + pos, newsize = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - flag = combine_flags(flag, newflag) outsize += newsize # \N{name} @@ -517,29 +503,25 @@ if code < 0: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) continue pos = look + 1 outsize += 1 - flag = combine_flags(flag, rutf8.get_flag_from_code(code)) rutf8.unichr_as_utf8_append(builder, code, allow_surrogates=True) # xxx 'code' is probably always within range here... else: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) else: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - newsize, newflag = rutf8.check_utf8(res, True) - flag = combine_flags(flag, newflag) + newsize = rutf8.check_utf8(res, True) outsize += newsize builder.append(res) else: @@ -547,7 +529,20 @@ builder.append(ch) outsize += 2 - return builder.build(), pos, outsize, flag + return builder.build(), pos, outsize + +def wcharpsize2utf8(space, wcharp, size): + """Safe version of rffi.wcharpsize2utf8. + + Raises app-level ValueError if any wchar value is outside the valid + codepoint range. + """ + try: + return rffi.wcharpsize2utf8(wcharp, size) + except ValueError: + raise oefmt(space.w_ValueError, + "character is not in range [U+0000; U+10ffff]") + # ____________________________________________________________ # Raw unicode escape @@ -556,7 +551,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 result = StringBuilder(size) pos = 0 @@ -594,12 +589,12 @@ digits = 4 if s[pos] == 'u' else 8 message = "truncated \\uXXXX" pos += 1 - pos, _, _ = hexescape(result, s, pos, digits, - "rawunicodeescape", errorhandler, message, errors) + pos, _ = hexescape(result, s, pos, digits, + "rawunicodeescape", errorhandler, message, errors) r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt _utf8_encode_unicode_escape = rutf8.make_utf8_escape_function() @@ -734,7 +729,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 inShift = False base64bits = 0 @@ -745,7 +740,6 @@ result = StringBuilder(size) pos = 0 shiftOutStartPos = 0 - flag = rutf8.FLAG_ASCII startinpos = 0 while pos < size: ch = s[pos] @@ -771,13 +765,11 @@ (outCh & 0x3FF)) + 0x10000 rutf8.unichr_as_utf8_append(result, code) outsize += 1 - flag = combine_flags(flag, rutf8.FLAG_REGULAR) surrogate = 0 continue else: rutf8.unichr_as_utf8_append(result, surrogate, allow_surrogates=True) - flag = rutf8.FLAG_HAS_SURROGATES outsize += 1 surrogate = 0 # Not done with outCh: falls back to next line @@ -785,8 +777,6 @@ # first surrogate surrogate = outCh else: - flag = combine_flags(flag, - rutf8.get_flag_from_code(outCh)) outsize += 1 assert outCh >= 0 rutf8.unichr_as_utf8_append(result, outCh, True) @@ -802,9 +792,8 @@ msg = "partial character in shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) continue else: @@ -814,15 +803,13 @@ msg = "non-zero padding bits in shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) continue if surrogate and _utf7_DECODE_DIRECT(ord(ch)): outsize += 1 - flag = rutf8.FLAG_HAS_SURROGATES rutf8.unichr_as_utf8_append(result, surrogate, True) surrogate = 0 @@ -854,9 +841,8 @@ pos += 1 msg = "unexpected special character" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) # end of string @@ -869,9 +855,8 @@ (base64bits > 0 and base64buffer != 0)): msg = "unterminated shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos) - reslen, resflags = rutf8.check_utf8(res, True) + reslen = rutf8.check_utf8(res, True) outsize += reslen - flag = combine_flags(flag, resflags) result.append(res) final_length = result.getlength() elif inShift: @@ -879,7 +864,7 @@ final_length = shiftOutStartPos # back off output assert final_length >= 0 - return result.build()[:final_length], pos, outsize, flag + return result.build()[:final_length], pos, outsize def utf8_encode_utf_7(s, errors, errorhandler): size = len(s) @@ -942,21 +927,21 @@ def str_decode_utf_16(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "native") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_be(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "big") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_le(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, errorhandler, "little") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_16_helper(s, errors, final=True, errorhandler=None, @@ -999,7 +984,7 @@ else: bo = 1 if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII, bo + return '', 0, 0, bo if bo == -1: # force little endian ihi = 1 @@ -1058,8 +1043,8 @@ s, pos - 2, pos) result.append(r) r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return result.build(), pos, lgt, flag, bo + lgt = rutf8.check_utf8(r, True) + return result.build(), pos, lgt, bo def _STORECHAR(result, CH, byteorder): hi = chr(((CH) >> 8) & 0xff) @@ -1148,21 +1133,21 @@ def str_decode_utf_32(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "native") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_32_be(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "big") - return result, c, lgt, flag + return result, c, lgt def str_decode_utf_32_le(s, errors, final=True, errorhandler=None): - result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, errorhandler, "little") - return result, c, lgt, flag + return result, c, lgt BOM32_DIRECT = intmask(0x0000FEFF) BOM32_REVERSE = intmask(0xFFFE0000) @@ -1208,7 +1193,7 @@ else: bo = 1 if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII, bo + return '', 0, 0, bo if bo == -1: # force little endian iorder = [0, 1, 2, 3] @@ -1243,8 +1228,8 @@ rutf8.unichr_as_utf8_append(result, ch, allow_surrogates=True) pos += 4 r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag, bo + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt, bo def _STORECHAR32(result, CH, byteorder): c0 = chr(((CH) >> 24) & 0xff) @@ -1330,7 +1315,7 @@ errorhandler=None): size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 unicode_bytes = 4 if BYTEORDER == "little": @@ -1367,8 +1352,8 @@ rutf8.unichr_as_utf8_append(result, intmask(t), allow_surrogates=True) pos += unicode_bytes r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt def utf8_encode_unicode_internal(s, errors, errorhandler): size = len(s) @@ -1409,7 +1394,7 @@ errorhandler=errorhandler) size = len(s) if size == 0: - return '', 0, 0, rutf8.FLAG_ASCII + return '', 0, 0 pos = 0 result = StringBuilder(size) @@ -1426,8 +1411,8 @@ result.append(c) pos += 1 r = result.build() - lgt, flag = rutf8.check_utf8(r, True) - return r, pos, lgt, flag + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -26,14 +26,8 @@ "Return a Unicode string of one character with the given ordinal." if code < 0 or code > 0x10FFFF: raise oefmt(space.w_ValueError, "unichr() arg out of range") - elif code < 0x80: - flag = rutf8.FLAG_ASCII - elif 0xD800 <= code <= 0xDFFF: - flag = rutf8.FLAG_HAS_SURROGATES - else: - flag = rutf8.FLAG_REGULAR s = rutf8.unichr_as_utf8(code, allow_surrogates=True) - return space.newutf8(s, 1, flag) + return space.newutf8(s, 1) def len(space, w_obj): "len(object) -> integer\n\nReturn the number of items of a sequence or mapping." diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -183,8 +183,7 @@ raise oefmt(self.space.w_ValueError, "%s out of range for conversion to unicode: %s", self.name, s) - flag = rutf8.get_flag_from_code(intmask(value)) - return self.space.newutf8(utf8, 1, flag) + return self.space.newutf8(utf8, 1) def string(self, cdataobj, maxlen): with cdataobj as ptr: @@ -215,15 +214,15 @@ def unpack_ptr(self, w_ctypeptr, ptr, length): if self.size == 2: - utf8, lgt, flag = wchar_helper.utf8_from_char16(ptr, length) + utf8, lgt = wchar_helper.utf8_from_char16(ptr, length) else: try: - utf8, lgt, flag = wchar_helper.utf8_from_char32(ptr, length) + utf8, lgt = wchar_helper.utf8_from_char32(ptr, length) except wchar_helper.OutOfRange as e: raise oefmt(self.space.w_ValueError, "%s out of range for conversion to unicode: %s", self.name, hex(e.ordinal)) - return self.space.newutf8(utf8, lgt, flag) + return self.space.newutf8(utf8, lgt) class W_CTypePrimitiveSigned(W_CTypePrimitive): diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py --- a/pypy/module/_cffi_backend/wchar_helper.py +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -19,16 +19,14 @@ ptr = rffi.cast(rffi.UINTP, ptr) u = StringBuilder(length) j = 0 - flag = rutf8.FLAG_ASCII while j < length: ch = intmask(ptr[j]) j += 1 - flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) try: rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) except ValueError: raise OutOfRange(ch) - return u.build(), length, flag + return u.build(), length def utf8_from_char16(ptr, length): # 'ptr' is a pointer to 'length' 16-bit integers @@ -36,7 +34,6 @@ u = StringBuilder(length) j = 0 result_length = length - flag = rutf8.FLAG_ASCII while j < length: ch = intmask(ptr[j]) j += 1 @@ -46,9 +43,8 @@ ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 j += 1 result_length -= 1 - flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) - return u.build(), result_length, flag + return u.build(), result_length @specialize.ll() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -43,8 +43,8 @@ length = len(input) else: w_cls = space.w_UnicodeEncodeError - length, flag = rutf8.check_utf8(input, allow_surrogates=True) - w_input = space.newutf8(input, length, flag) + length = rutf8.check_utf8(input, allow_surrogates=True) + w_input = space.newutf8(input, length) w_exc = space.call_function( w_cls, space.newtext(encoding), @@ -192,7 +192,7 @@ def ignore_errors(space, w_exc): check_exception(space, w_exc) w_end = space.getattr(w_exc, space.newtext('end')) - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), w_end]) + return space.newtuple([space.newutf8('', 0), w_end]) REPLACEMENT = u'\ufffd'.encode('utf8') @@ -203,13 +203,13 @@ size = space.int_w(w_end) - space.int_w(w_start) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): text = '?' * size - return space.newtuple([space.newutf8(text, size, rutf8.FLAG_ASCII), w_end]) + return space.newtuple([space.newutf8(text, size), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError): text = REPLACEMENT - return space.newtuple([space.newutf8(text, 1, rutf8.FLAG_REGULAR), w_end]) + return space.newtuple([space.newutf8(text, 1), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeTranslateError): text = REPLACEMENT * size - return space.newtuple([space.newutf8(text, size, rutf8.FLAG_REGULAR), w_end]) + return space.newtuple([space.newutf8(text, size), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -237,8 +237,8 @@ builder.append(";") pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() - lgt, flag = rutf8.check_utf8(r, True) - return space.newtuple([space.newutf8(r, lgt, flag), w_end]) + lgt = rutf8.check_utf8(r, True) + return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -278,8 +278,8 @@ builder.append_slice(num, 2, lnum) pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() - lgt, flag = rutf8.check_utf8(r, True) - return space.newtuple([space.newutf8(r, lgt, flag), w_end]) + lgt = rutf8.check_utf8(r, True) + return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -417,9 +417,9 @@ final = space.is_true(w_final) state = space.fromcache(CodecState) func = getattr(unicodehelper, rname) - result, consumed, length, flag = func(string, errors, + result, consumed, length = func(string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(result, length, flag), + return space.newtuple([space.newutf8(result, length), space.newint(consumed)]) wrap_decoder.func_name = rname globals()[name] = wrap_decoder @@ -460,22 +460,12 @@ # utf-8 functions are not regular, because we have to pass # "allow_surrogates=True" - at unwrap_spec(utf8='utf8', errors='text_or_none') -def utf_8_encode(space, utf8, errors="strict"): - length, _ = rutf8.check_utf8(utf8, allow_surrogates=True) - return space.newtuple([space.newbytes(utf8), space.newint(length)]) -#@unwrap_spec(uni=unicode, errors='text_or_none') -#def utf_8_encode(space, uni, errors="strict"): -# if errors is None: -# errors = 'strict' -# state = space.fromcache(CodecState) -# # NB. can't call unicode_encode_utf_8() directly because that's -# # an @elidable function nowadays. Instead, we need the _impl(). -# # (The problem is the errorhandler, which calls arbitrary Python.) -# result = runicode.unicode_encode_utf_8_impl( -# uni, len(uni), errors, state.encode_error_handler, -# allow_surrogates=True) -# return space.newtuple([space.newbytes(result), space.newint(len(uni))]) + at unwrap_spec(errors='text_or_none') +def utf_8_encode(space, w_obj, errors="strict"): + utf8, lgt = space.utf8_len_w(w_obj) + if rutf8.has_surrogates(utf8): + utf8 = rutf8.reencode_utf8_with_surrogates(utf8) + return space.newtuple([space.newbytes(utf8), space.newint(lgt)]) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final = WrappedDefault(False)) @@ -488,14 +478,14 @@ state = space.fromcache(CodecState) # call the fast version for checking try: - lgt, flag = rutf8.check_utf8(string, allow_surrogates=True) + lgt = rutf8.check_utf8(string, allow_surrogates=True) except rutf8.CheckError: - res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string, + res, consumed, lgt = unicodehelper.str_decode_utf8(string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed)]) else: - return space.newtuple([space.newutf8(string, lgt, flag), + return space.newtuple([space.newutf8(string, lgt), space.newint(len(string))]) @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, @@ -516,10 +506,10 @@ consumed = len(data) if final: consumed = 0 - res, consumed, lgt, flag, byteorder = str_decode_utf_16_helper( + res, consumed, lgt, byteorder = str_decode_utf_16_helper( data, errors, final, state.decode_error_handler, byteorder) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed), space.newint(byteorder)]) @@ -539,10 +529,10 @@ consumed = len(data) if final: consumed = 0 - res, consumed, lgt, flag, byteorder = str_decode_utf_32_helper( + res, consumed, lgt, byteorder = str_decode_utf_32_helper( data, errors, final, state.decode_error_handler, byteorder) - return space.newtuple([space.newutf8(res, lgt, flag), + return space.newtuple([space.newutf8(res, lgt), space.newint(consumed), space.newint(byteorder)]) @@ -632,7 +622,7 @@ if errors is None: errors = 'strict' if len(string) == 0: - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), + return space.newtuple([space.newutf8('', 0), space.newint(0)]) if space.is_none(w_mapping): @@ -642,9 +632,9 @@ final = True state = space.fromcache(CodecState) - result, consumed, lgt, flag = unicodehelper.str_decode_charmap( + result, consumed, lgt = unicodehelper.str_decode_charmap( string, errors, final, state.decode_error_handler, mapping) - return space.newtuple([space.newutf8(result, lgt, flag), + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) @unwrap_spec(errors='text_or_none') @@ -708,12 +698,12 @@ unicode_name_handler = state.get_unicodedata_handler(space) - result, consumed, lgt, flag = unicodehelper.str_decode_unicode_escape( + result, consumed, lgt = unicodehelper.str_decode_unicode_escape( string, errors, final, state.decode_error_handler, unicode_name_handler) - return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)]) + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) # ____________________________________________________________ # Unicode-internal @@ -731,15 +721,15 @@ string = space.readbuf_w(w_string).as_str() if len(string) == 0: - return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), + return space.newtuple([space.newutf8('', 0), space.newint(0)]) final = True state = space.fromcache(CodecState) - result, consumed, lgt, flag = unicodehelper.str_decode_unicode_internal( + result, consumed, lgt = unicodehelper.str_decode_unicode_internal( string, errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(result, lgt, flag), + return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) # ____________________________________________________________ diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -1,3 +1,5 @@ +from rpython.rlib.rutf8 import get_utf8_length + from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import ( TypeDef, generic_new_descr, GetSetProperty) @@ -152,7 +154,7 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.new_from_utf8(self.readnl)) # YYY + w_readnl = space.str(space.newutf8(self.readnl, get_utf8_length(self.readnl))) # YYY return space.newtuple([ w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) @@ -215,7 +217,8 @@ if self.writenl: w_decoded = space.call_method( w_decoded, "replace", - space.newtext("\n"), space.new_from_utf8(self.writenl)) + space.newtext("\n"), space.newutf8(self.writenl, + get_utf8_length(self.writenl))) string = space.utf8_w(w_decoded) if string: self.buf.write(string) @@ -225,7 +228,9 @@ def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - return space.new_from_utf8(self.buf.read(size)) + v = self.buf.read(size) + lgt = get_utf8_length(v) + return space.newutf8(v, lgt) def readline_w(self, space, w_limit=None): self._check_closed(space) @@ -239,7 +244,8 @@ else: newline = self.readnl result = self.buf.readline(newline, limit) - return space.new_from_utf8(result) + resultlen = get_utf8_length(result) + return space.newutf8(result, resultlen) @unwrap_spec(pos=int, mode=int) @@ -276,7 +282,9 @@ def getvalue_w(self, space): self._check_closed(space) - return space.new_from_utf8(self.buf.getvalue()) + v = self.buf.getvalue() + lgt = get_utf8_length(v) + return space.newutf8(v, lgt) def readable_w(self, space): self._check_closed(space) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -11,8 +11,9 @@ from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import ( - FLAG_ASCII, check_utf8, next_codepoint_pos, codepoints_in_utf8) +from rpython.rlib.rutf8 import (check_utf8, next_codepoint_pos, + codepoints_in_utf8, get_utf8_length, + Utf8StringBuilder) STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -31,22 +32,22 @@ def __init__(self, space): self.w_newlines_dict = { - SEEN_CR: space.newutf8("\r", 1, FLAG_ASCII), - SEEN_LF: space.newutf8("\n", 1, FLAG_ASCII), - SEEN_CRLF: space.newutf8("\r\n", 2, FLAG_ASCII), + SEEN_CR: space.newutf8("\r", 1), + SEEN_LF: space.newutf8("\n", 1), + SEEN_CRLF: space.newutf8("\r\n", 2), SEEN_CR | SEEN_LF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\n", 1, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\n", 1)]), SEEN_CR | SEEN_CRLF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\r\n", 2)]), SEEN_LF | SEEN_CRLF: space.newtuple( - [space.newutf8("\n", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\n", 1), + space.newutf8("\r\n", 2)]), SEEN_CR | SEEN_LF | SEEN_CRLF: space.newtuple( - [space.newutf8("\r", 1, FLAG_ASCII), - space.newutf8("\n", 1, FLAG_ASCII), - space.newutf8("\r\n", 2, FLAG_ASCII)]), + [space.newutf8("\r", 1), + space.newutf8("\n", 1), + space.newutf8("\r\n", 2)]), } @unwrap_spec(translate=int) @@ -98,7 +99,7 @@ output_len -= 1 if output_len == 0: - return space.newutf8("", 0, FLAG_ASCII) + return space.newutf8("", 0) # Record which newlines are read and do newline translation if # desired, all in one pass. @@ -153,8 +154,8 @@ output = builder.build() self.seennl |= seennl - lgt, flag = check_utf8(output, True) - return space.newutf8(output, lgt, flag) + lgt = check_utf8(output, True) + return space.newutf8(output, lgt) def reset_w(self, space): self.seennl = 0 @@ -684,13 +685,15 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.new_from_utf8(self.decoded.get_chars(-1)) + chars = self.decoded.get_chars(-1) + lgt = get_utf8_length(chars) + w_result = space.newutf8(chars, lgt) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final remaining = size - builder = StringBuilder(size) + builder = Utf8StringBuilder(size) # Keep reading chunks until we have n characters to return while remaining > 0: @@ -700,7 +703,7 @@ builder.append(data) remaining -= len(data) - return space.new_from_utf8(builder.build()) + return space.newutf8(builder.build(), builder.get_length()) def _scan_line_ending(self, limit): if self.readuniversal: @@ -725,6 +728,7 @@ limit = convert_size(space, w_limit) remnant = None builder = StringBuilder() + # XXX maybe use Utf8StringBuilder instead? while True: # First, get some data if necessary has_data = self._ensure_data(space) @@ -771,7 +775,8 @@ self.decoded.reset() result = builder.build() - return space.new_from_utf8(result) + lgt = get_utf8_length(result) + return space.newutf8(result, lgt) # _____________________________________________________________ # write methods @@ -794,8 +799,8 @@ if text.find('\n') >= 0: haslf = True if haslf and self.writetranslate and self.writenl: - w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'), - space.new_from_utf8(self.writenl)) + w_text = space.call_method(w_text, "replace", space.newutf8('\n', 1), + space.newutf8(self.writenl, get_utf8_length(self.writenl))) text = space.utf8_w(w_text) needflush = False diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -157,7 +157,7 @@ replace, end = errorcb(errors, namecb, reason, stringdata, start, end) # 'replace' is RPython unicode here - lgt, _ = rutf8.check_utf8(replace, True) + lgt = rutf8.get_utf8_length(replace) inbuf = rffi.utf82wcharp(replace, lgt) try: r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, lgt, end) @@ -268,7 +268,7 @@ rets, end = errorcb(errors, namecb, reason, unicodedata, start, end) codec = pypy_cjk_enc_getcodec(encodebuf) - lgt, _ = rutf8.get_utf8_length_flag(rets) + lgt = rutf8.get_utf8_length(rets) replace = encode(codec, rets, lgt, "strict", errorcb, namecb) with rffi.scoped_nonmovingbuffer(replace) as inbuf: r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end) diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py --- a/pypy/module/_multibytecodec/interp_incremental.py +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -66,8 +66,8 @@ pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf) assert 0 <= pos <= len(object) self.pending = object[pos:] - lgt, flag = rutf8.get_utf8_length_flag(output) - return space.newutf8(output, lgt, flag) + lgt = rutf8.get_utf8_length(output) + return space.newutf8(output, lgt) @unwrap_spec(errors="text_or_none") diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -27,8 +27,8 @@ raise wrap_unicodedecodeerror(space, e, input, self.name) except RuntimeError: raise wrap_runtimeerror(space) - lgt, flag = rutf8.check_utf8(utf8_output, True) - return space.newtuple([space.newutf8(utf8_output, lgt, flag), + lgt = rutf8.get_utf8_length(utf8_output) + return space.newtuple([space.newutf8(utf8_output, lgt), space.newint(len(input))]) @unwrap_spec(errors="text_or_none") @@ -78,12 +78,11 @@ space.newtext(e.reason)])) def wrap_unicodeencodeerror(space, e, input, inputlen, name): - _, flag = rutf8.check_utf8(input, True) raise OperationError( space.w_UnicodeEncodeError, space.newtuple([ space.newtext(name), - space.newutf8(input, inputlen, flag), + space.newutf8(input, inputlen), space.newint(e.start), space.newint(e.end), space.newtext(e.reason)])) diff --git a/pypy/module/_multibytecodec/test/test_translation.py b/pypy/module/_multibytecodec/test/test_translation.py --- a/pypy/module/_multibytecodec/test/test_translation.py +++ b/pypy/module/_multibytecodec/test/test_translation.py @@ -14,7 +14,7 @@ codecname, string = argv[1], argv[2] c = c_codecs.getcodec(codecname) u = c_codecs.decode(c, string) - lgt, _ = rutf8.get_utf8_length_flag(u) + lgt = rutf8.get_utf8_length(u) r = c_codecs.encode(c, u, lgt) print r return 0 diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -295,15 +295,15 @@ if bits & 0x80: # the 8th bit is set, it's an utf8 string content_utf8 = self.getslice(start, end) - lgt, flag = unicodehelper.check_utf8_or_raise(self.space, + lgt = unicodehelper.check_utf8_or_raise(self.space, content_utf8) - return self.space.newutf8(content_utf8, lgt, flag) + return self.space.newutf8(content_utf8, lgt) else: # ascii only, fast path (ascii is a strict subset of # latin1, and we already checked that all the chars are < # 128) return self.space.newutf8(self.getslice(start, end), - end - start, rutf8.FLAG_ASCII) + end - start) def decode_string_escaped(self, start): i = self.pos @@ -316,10 +316,10 @@ i += 1 if ch == '"': content_utf8 = builder.build() - lgt, f = unicodehelper.check_utf8_or_raise(self.space, + lgt = unicodehelper.check_utf8_or_raise(self.space, content_utf8) self.pos = i - return self.space.newutf8(content_utf8, lgt, f) + return self.space.newutf8(content_utf8, lgt) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch < '\x20': diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py --- a/pypy/module/_pypyjson/test/test__pypyjson.py +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -11,7 +11,7 @@ dec.close() class FakeSpace(object): - def newutf8(self, s, l, f): + def newutf8(self, s, l): return s def test_decode_key(): diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -228,8 +228,7 @@ return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): wcharval = self.get_unichar(w_ffitype) - return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1, - rutf8.get_flag_from_code(intmask(wcharval))) + return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -596,9 +596,9 @@ return space.w_None wcharp_addr = rffi.cast(rffi.CWCHARP, address) if maxlength == -1: - s = rffi.wcharp2unicode(wcharp_addr) + s = rffi.wcharp2utf8(wcharp_addr) else: - s = rffi.wcharp2unicoden(wcharp_addr, maxlength) + s = rffi.wcharpsize2utf8(wcharp_addr, maxlength) return space.newunicode(s) @unwrap_spec(address=r_uint, maxlength=int) diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -6,7 +6,7 @@ from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.rarithmetic import intmask -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.rstring import StringBuilder from rpython.rlib.rutf8 import Utf8StringBuilder @@ -42,7 +42,9 @@ if isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - return space.newunicode(ctx._unicodestr[start:end]) + s = ctx._unicodestr[start:end] + lgt = rutf8.check_utf8(s, True) + return space.newutf8(s, lgt) else: # unreachable raise SystemError @@ -110,7 +112,9 @@ if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): - unicodestr = space.unicode_w(w_string) + unicodestr = space.utf8_w(w_string) + # XXX will fail some tests, the length need to be adjusted for + # real char len etc if pos > len(unicodestr): pos = len(unicodestr) if endpos > len(unicodestr): @@ -337,11 +341,10 @@ else: assert unicodebuilder is not None return space.newutf8(unicodebuilder.build(), - unicodebuilder.get_length(), - unicodebuilder.get_flag()), n + unicodebuilder.get_length()), n else: if space.isinstance_w(w_string, space.w_unicode): - w_emptystr = space.newunicode(u'') + w_emptystr = space.newutf8('', 0) else: w_emptystr = space.newbytes('') w_item = space.call_method(w_emptystr, 'join', @@ -575,7 +578,8 @@ elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - return space.newunicode(ctx._unicodestr) + lgt = rutf8.check_utf8(ctx._unicodestr, True) + return space.newutf8(ctx._unicodestr, lgt) else: raise SystemError diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py --- a/pypy/module/_warnings/interp_warnings.py +++ b/pypy/module/_warnings/interp_warnings.py @@ -1,3 +1,6 @@ + +from rpython.rlib import rutf8 + from pypy.interpreter.gateway import unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt @@ -208,10 +211,11 @@ except OperationError as e: if e.async(space): raise - message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno, - space.unicode_w(w_name), - space.unicode_w(w_text)) - w_message = space.newunicode(message) + message = "%s:%d: %s: %s\n" % (space.utf8_w(w_filename), lineno, + space.utf8_w(w_name), + space.utf8_w(w_text)) + lgt = rutf8.check_utf8(message, True) + w_message = space.newutf8(message, lgt) else: w_message = space.newtext(message) space.call_method(w_stderr, "write", w_message) diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1,4 +1,4 @@ -from rpython.rlib import jit, rgc +from rpython.rlib import jit, rgc, rutf8 from rpython.rlib.buffer import RawBuffer from rpython.rlib.objectmodel import keepalive_until_here from rpython.rlib.rarithmetic import ovfcheck, widen @@ -451,7 +451,7 @@ """ if self.typecode == 'u': buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned()) - return space.newunicode(rffi.wcharpsize2unicode(buf, self.len)) + return space.newutf8(rffi.wcharpsize2utf8(buf, self.len), self.len) else: raise oefmt(space.w_ValueError, "tounicode() may only be called on type 'u' arrays") @@ -797,7 +797,7 @@ TypeCode(rffi.UINT, 'int_w', True) types = { 'c': TypeCode(lltype.Char, 'bytes_w', method=''), - 'u': TypeCode(lltype.UniChar, 'unicode_w', method=''), + 'u': TypeCode(lltype.UniChar, 'utf8_len_w', method=''), 'b': TypeCode(rffi.SIGNEDCHAR, 'int_w', True, True), 'B': TypeCode(rffi.UCHAR, 'int_w', True), 'h': TypeCode(rffi.SHORT, 'int_w', True, True), @@ -895,11 +895,17 @@ "unsigned %d-byte integer out of range", mytype.bytes) return rffi.cast(mytype.itemtype, item) - if mytype.unwrap == 'bytes_w' or mytype.unwrap == 'unicode_w': + if mytype.unwrap == 'bytes_w': if len(item) != 1: raise oefmt(space.w_TypeError, "array item must be char") item = item[0] return rffi.cast(mytype.itemtype, item) + if mytype.unwrap == 'utf8_len_w': + utf8, lgt = item + if lgt != 1: + raise oefmt(space.w_TypeError, "array item must be char") + uchar = rutf8.codepoint_at_pos(utf8, 0) + return rffi.cast(mytype.itemtype, uchar) # # "regular" case: it fits in an rpython integer (lltype.Signed) # or it is a float @@ -1007,7 +1013,8 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - return space.newunicode(item) + code = ord(item) + return space.newutf8(rutf8.unichr_as_utf8(code), 1) assert 0, "unreachable" # interface diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -1,5 +1,9 @@ +from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.rlib import rstring, runicode +from rpython.tool.sourcetools import func_renamer + from pypy.interpreter.error import OperationError, oefmt -from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.interpreter.unicodehelper import wcharpsize2utf8 from pypy.module.unicodedata import unicodedb from pypy.module.cpyext.api import ( CANNOT_FAIL, Py_ssize_t, build_type_checkers_flags, cpython_api, @@ -13,8 +17,6 @@ from pypy.module.sys.interp_encoding import setdefaultencoding from pypy.module._codecs.interp_codecs import CodecState from pypy.objspace.std import unicodeobject -from rpython.rlib import rstring, runicode -from rpython.tool.sourcetools import func_renamer import sys ## See comment in bytesobject.py. @@ -61,10 +63,10 @@ def unicode_attach(space, py_obj, w_obj, w_userdata=None): "Fills a newly allocated PyUnicodeObject with a unicode string" py_unicode = rffi.cast(PyUnicodeObject, py_obj) - s = space.unicode_w(w_obj) - py_unicode.c_length = len(s) + s, length = space.utf8_len_w(w_obj) + py_unicode.c_length = length py_unicode.c_str = lltype.nullptr(rffi.CWCHARP.TO) - py_unicode.c_hash = space.hash_w(space.newunicode(s)) + py_unicode.c_hash = space.hash_w(space.newutf8(s, length)) py_unicode.c_defenc = lltype.nullptr(PyObject.TO) def unicode_realize(space, py_obj): @@ -73,11 +75,12 @@ be modified after this call. """ py_uni = rffi.cast(PyUnicodeObject, py_obj) - s = rffi.wcharpsize2unicode(py_uni.c_str, py_uni.c_length) + length = py_uni.c_length + s = wcharpsize2utf8(space, py_uni.c_str, length) w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type)) w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type) - w_obj.__init__(s) - py_uni.c_hash = space.hash_w(space.newunicode(s)) + w_obj.__init__(s, length) + py_uni.c_hash = space.hash_w(space.newutf8(s, length)) track_reference(space, py_obj, w_obj) return w_obj @@ -214,8 +217,8 @@ if not ref_unicode.c_str: # Copy unicode buffer w_unicode = from_ref(space, rffi.cast(PyObject, ref)) - u = space.unicode_w(w_unicode) - ref_unicode.c_str = rffi.unicode2wcharp(u) + u, length = space.utf8_len_w(w_unicode) + ref_unicode.c_str = rffi.utf82wcharp(u, length) return ref_unicode.c_str @cpython_api([PyObject], rffi.CWCHARP) @@ -335,8 +338,8 @@ Therefore, modification of the resulting Unicode object is only allowed when u is NULL.""" if wchar_p: - s = rffi.wcharpsize2unicode(wchar_p, length) - return make_ref(space, space.newunicode(s)) + s = wcharpsize2utf8(space, wchar_p, length) + return make_ref(space, space.newutf8(s, length)) else: return rffi.cast(PyObject, new_empty_unicode(space, length)) @@ -506,7 +509,8 @@ """Encode the Py_UNICODE buffer of the given size and return a Python string object. Return NULL if an exception was raised by the codec.""" - w_u = space.newunicode(rffi.wcharpsize2unicode(s, size)) + u = wcharpsize2utf8(space, s, size) + w_u = space.newutf8(u, size) if errors: w_errors = space.newtext(rffi.charp2str(errors)) else: @@ -706,12 +710,12 @@ """Return 1 if substr matches str[start:end] at the given tail end (direction == -1 means to do a prefix match, direction == 1 a suffix match), 0 otherwise. Return -1 if an error occurred.""" - str = space.unicode_w(w_str) - substr = space.unicode_w(w_substr) + w_start = space.newint(start) + w_end = space.newint(end) if rffi.cast(lltype.Signed, direction) <= 0: - return rstring.startswith(str, substr, start, end) + return space.call_method(w_str, "startswith", w_substr, w_start, w_end) else: - return rstring.endswith(str, substr, start, end) + return space.call_method(w_str, "endswith", w_substr, w_start, w_end) @cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t], Py_ssize_t, error=-1) def PyUnicode_Count(space, w_str, w_substr, start, end): diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -126,7 +126,7 @@ return space.call_function(space.w_unicode, w_as_str) lgt = len(self.args_w) if lgt == 0: - return space.newunicode(u"") + return space.newutf8("", 0) if lgt == 1: return space.call_function(space.w_unicode, self.args_w[0]) else: @@ -719,7 +719,7 @@ def descr_init(self, space, w_encoding, w_object, w_start, w_end, w_reason): # typechecking space.realtext_w(w_encoding) - space.utf8_w(w_object) + space.realutf8_w(w_object) space.int_w(w_start) space.int_w(w_end) space.realtext_w(w_reason) diff --git a/pypy/module/operator/tscmp.py b/pypy/module/operator/tscmp.py --- a/pypy/module/operator/tscmp.py +++ b/pypy/module/operator/tscmp.py @@ -45,15 +45,15 @@ Note: If a and b are of different lengths, or if an error occurs, a timing attack could theoretically reveal information about the types and lengths of a and b--but not their values. + + XXX note that here the strings have to have the same length as UTF8, + not only as unicode. Not sure how to do better """ if (space.isinstance_w(w_a, space.w_unicode) and space.isinstance_w(w_b, space.w_unicode)): - a = space.unicode_w(w_a) - b = space.unicode_w(w_b) - with rffi.scoped_nonmoving_unicodebuffer(a) as a_buf: - with rffi.scoped_nonmoving_unicodebuffer(b) as b_buf: - result = pypy_tscmp_wide(a_buf, b_buf, len(a), len(b)) - return space.newbool(rffi.cast(lltype.Bool, result)) + a = space.utf8_w(w_a) + b = space.utf8_w(w_b) + return space.newbool(_compare_two_strings(a, b)) return compare_digest_buffer(space, w_a, w_b) @@ -68,7 +68,10 @@ a = a_buf.as_str() b = b_buf.as_str() + return space.newbool(_compare_two_strings(a, b)) + +def _compare_two_strings(a, b): with rffi.scoped_nonmovingbuffer(a) as a_buf: with rffi.scoped_nonmovingbuffer(b) as b_buf: result = pypy_tscmp(a_buf, b_buf, len(a), len(b)) - return space.newbool(rffi.cast(lltype.Bool, result)) + return rffi.cast(lltype.Bool, result) diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -478,8 +478,8 @@ # I suppose this is a valid utf8, but there is noone to check # and noone to catch an error either try: - lgt, flag = rutf8.check_utf8(s, True) - return space.newutf8(s, lgt, flag) + lgt = rutf8.check_utf8(s, True) + return space.newutf8(s, lgt) except rutf8.CheckError: from pypy.interpreter import unicodehelper # get the correct error msg diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -209,10 +209,7 @@ def newbytes(self, x): return w_some_obj() - def newutf8(self, x, l, f): - return w_some_obj() - - def new_from_utf8(self, a): + def newutf8(self, x, l): return w_some_obj() def newunicode(self, a): diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -195,11 +195,11 @@ w_dict = self.getdict(space) if w_dict is None: w_dict = space.w_None - s, _, lgt, flag = str_decode_latin_1(''.join(self.getdata()), 'strict', + s, _, lgt = str_decode_latin_1(''.join(self.getdata()), 'strict', True, None) return space.newtuple([ space.type(self), space.newtuple([ - space.newutf8(s, lgt, flag), space.newtext('latin-1')]), + space.newutf8(s, lgt), space.newtext('latin-1')]), w_dict]) @staticmethod diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1197,7 +1197,7 @@ unerase = staticmethod(unerase) def wrap(self, unwrapped): - return self.space.newutf8(unwrapped, len(unwrapped), rutf8.FLAG_ASCII) + return self.space.newutf8(unwrapped, len(unwrapped)) def unwrap(self, wrapped): return self.space.utf8_w(wrapped) @@ -1239,7 +1239,7 @@ ## return self.space.newlist_bytes(self.listview_bytes(w_dict)) def wrapkey(space, key): - return space.newutf8(key, len(key), rutf8.FLAG_ASCII) + return space.newutf8(key, len(key)) ## @jit.look_inside_iff(lambda self, w_dict: ## w_dict_unrolling_heuristic(w_dict)) diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -198,8 +198,8 @@ if self.w_valuedict is None: raise oefmt(space.w_TypeError, "format requires a mapping") if do_unicode: - lgt, flag = rutf8.check_utf8(key, True) - w_key = space.newutf8(key, lgt, flag) + lgt = rutf8.check_utf8(key, True) + w_key = space.newutf8(key, lgt) else: w_key = space.newbytes(key) return space.getitem(self.w_valuedict, w_key) @@ -330,8 +330,7 @@ space = self.space if do_unicode: cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1) - flag = rutf8.get_flag_from_code(cp) - w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1, flag) + w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1) else: cp = ord(self.fmt[self.fmtpos - 1]) w_s = space.newbytes(chr(cp)) @@ -513,8 +512,8 @@ formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict) result = formatter.format() # this can force strings, not sure if it's a problem or not - lgt, flag = rutf8.check_utf8(result, True) - return space.newutf8(result, lgt, flag) + lgt = rutf8.check_utf8(result, True) + return space.newutf8(result, lgt) def mod_format(space, w_format, w_values, do_unicode=False): if space.isinstance_w(w_values, space.w_tuple): diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -1998,7 +1998,7 @@ def wrap(self, stringval): assert stringval is not None - return self.space.newutf8(stringval, len(stringval), rutf8.FLAG_ASCII) + return self.space.newutf8(stringval, len(stringval)) def unwrap(self, w_string): return self.space.utf8_w(w_string) diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -403,8 +403,8 @@ @unmarshaller(TYPE_UNICODE) def unmarshal_unicode(space, u, tc): arg = u.get_str() - length, flag = unicodehelper.check_utf8_or_raise(space, arg) - return space.newutf8(arg, length, flag) + length = unicodehelper.check_utf8_or_raise(space, arg) + return space.newutf8(arg, length) @marshaller(W_SetObject) def marshal_set(space, w_set, m): diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -51,8 +51,8 @@ if for_unicode: def wrap(self, u): - lgt, flag = rutf8.check_utf8(u, True) - return self.space.newutf8(u, lgt, flag) + lgt = rutf8.check_utf8(u, True) + return self.space.newutf8(u, lgt) else: def wrap(self, s): return self.space.newbytes(s) @@ -379,8 +379,8 @@ template = unicode_template_formatter(space, space.utf8_w(w_string)) r = template.build(args) - lgt, flag = rutf8.check_utf8(r, True) - return space.newutf8(r, lgt, flag) + lgt = rutf8.check_utf8(r, True) + return space.newutf8(r, lgt) else: template = str_template_formatter(space, space.bytes_w(w_string)) return space.newbytes(template.build(args)) @@ -416,8 +416,8 @@ if for_unicode: def wrap(self, u): - lgt, flag = rutf8.check_utf8(u, True) - return self.space.newutf8(u, lgt, flag) + lgt = rutf8.check_utf8(u, True) + return self.space.newutf8(u, lgt) else: def wrap(self, s): return self.space.newbytes(s) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -165,8 +165,8 @@ return self.newtext(x) if isinstance(x, unicode): x = x.encode('utf8') - lgt, flag = rutf8.check_utf8(x, True) - return self.newutf8(x, lgt, flag) + lgt = rutf8.check_utf8(x, True) + return self.newutf8(x, lgt) if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): @@ -362,16 +362,10 @@ return self.w_None return self.newtext(s) - def newutf8(self, utf8s, length, flag): + def newutf8(self, utf8s, length): assert utf8s is not None assert isinstance(utf8s, str) - return W_UnicodeObject(utf8s, length, flag) - - def new_from_utf8(self, utf8s): - # XXX: kill me! - assert isinstance(utf8s, str) - length, flag = rutf8.check_utf8(utf8s, True) - return W_UnicodeObject(utf8s, length, flag) + return W_UnicodeObject(utf8s, length) def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py --- a/pypy/objspace/std/setobject.py +++ b/pypy/objspace/std/setobject.py @@ -1291,7 +1291,7 @@ return self.space.utf8_w(w_item) def wrap(self, item): - return self.space.newutf8(item, len(item), rutf8.FLAG_ASCII) + return self.space.newutf8(item, len(item)) def iter(self, w_set): return UnicodeIteratorImplementation(self.space, self, w_set) @@ -1495,7 +1495,7 @@ def next_entry(self): for key in self.iterator: - return self.space.newutf8(key, len(key), rutf8.FLAG_ASCII) + return self.space.newutf8(key, len(key)) else: return None diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py --- a/pypy/objspace/std/test/test_index.py +++ b/pypy/objspace/std/test/test_index.py @@ -265,8 +265,7 @@ class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase): def setup_method(self, method): SeqTestCase.setup_method(self, method) - self.w_seq = self.space.newutf8("this is a test", len("this is a test"), - rutf8.FLAG_ASCII) + self.w_seq = self.space.newutf8("this is a test", len("this is a test")) self.w_const = self.space.appexec([], """(): return unicode""") diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py --- a/pypy/objspace/std/test/test_lengthhint.py +++ b/pypy/objspace/std/test/test_lengthhint.py @@ -74,8 +74,7 @@ self._test_length_hint(self.space.wrap('P' * self.SIZE)) def test_unicode(self): - self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE, - rutf8.FLAG_ASCII)) + self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE)) def test_tuple(self): self._test_length_hint(self.space.wrap(tuple(self.ITEMS))) diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -600,9 +600,9 @@ def test_unicode(self): l1 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newbytes("zwei")]) assert isinstance(l1.strategy, BytesListStrategy) - l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, 2), self.space.newutf8("zwei", 4, 2)]) + l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4), self.space.newutf8("zwei", 4)]) assert isinstance(l2.strategy, UnicodeListStrategy) - l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4, 2)]) + l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4)]) assert isinstance(l3.strategy, ObjectListStrategy) def test_listview_bytes(self): diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py --- a/pypy/objspace/std/test/test_obj.py +++ b/pypy/objspace/std/test/test_obj.py @@ -17,7 +17,7 @@ cls.w_cpython_apptest = space.wrap(option.runappdirect and not hasattr(sys, 'pypy_translation_info')) def w_unwrap_wrap_unicode(space, w_obj): - return space.newutf8(space.utf8_w(w_obj), w_obj._length, w_obj._get_flag()) + return space.newutf8(space.utf8_w(w_obj), w_obj._length) cls.w_unwrap_wrap_unicode = space.wrap(gateway.interp2app(w_unwrap_wrap_unicode)) def w_unwrap_wrap_str(space, w_obj): return space.wrap(space.str_w(w_obj)) diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -27,12 +27,12 @@ assert len(warnings) == 2 def test_listview_unicode(self): - w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) + w_str = self.space.newutf8('abcd', 4) assert self.space.listview_utf8(w_str) == list("abcd") def test_new_shortcut(self): space = self.space - w_uni = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) + w_uni = self.space.newutf8('abcd', 4) w_new = space.call_method( space.w_unicode, "__new__", space.w_unicode, w_uni) assert w_new is w_uni @@ -44,8 +44,8 @@ return # skip this case v = u[start : start + len1] space = self.space - w_u = space.newutf8(u.encode('utf8'), len(u), rutf8.FLAG_REGULAR) - w_v = space.newutf8(v.encode('utf8'), len(v), rutf8.FLAG_REGULAR) + w_u = space.newutf8(u.encode('utf8'), len(u)) + w_v = space.newutf8(v.encode('utf8'), len(v)) expected = u.find(v, start, start + len1) try: w_index = space.call_method(w_u, 'index', w_v, @@ -741,6 +741,8 @@ assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac' assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82' assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96' + assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82' + assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96' assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80' assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80' assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000 diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -35,39 +35,22 @@ _immutable_fields_ = ['_utf8'] @enforceargs(utf8str=str) - def __init__(self, utf8str, length, flag): + def __init__(self, utf8str, length): assert isinstance(utf8str, str) assert length >= 0 self._utf8 = utf8str self._length = length - if flag == rutf8.FLAG_ASCII: - self._index_storage = rutf8.UTF8_IS_ASCII - elif flag == rutf8.FLAG_HAS_SURROGATES: - self._index_storage = rutf8.UTF8_HAS_SURROGATES - else: - assert flag == rutf8.FLAG_REGULAR - self._index_storage = rutf8.null_storage() + self._index_storage = rutf8.null_storage() # XXX checking, remove before any performance measurments # ifdef not_running_in_benchmark - lgt, flag_check = rutf8.check_utf8(utf8str, True) - assert lgt == length - if flag_check == rutf8.FLAG_ASCII: - # there are cases where we copy part of REULAR that happens - # to be ascii - assert flag in (rutf8.FLAG_ASCII, rutf8.FLAG_REGULAR) - else: - assert flag == flag_check - # the storage can be one of: - # - null, unicode with no surrogates - # - rutf8.UTF8_HAS_SURROGATES - # - rutf8.UTF8_IS_ASCII - # - malloced object, which means it has index, then - # _index_storage.flags determines the kind + if not we_are_translated(): + lgt = rutf8.check_utf8(utf8str, True) + assert lgt == length @staticmethod def from_utf8builder(builder): return W_UnicodeObject( - builder.build(), builder.get_length(), builder.get_flag()) + builder.build(), builder.get_length()) def __repr__(self): """representation for debugging purposes""" @@ -107,8 +90,6 @@ return space.text_w(space.str(self)) def utf8_w(self, space): From pypy.commits at gmail.com Fri Dec 8 21:46:52 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 08 Dec 2017 18:46:52 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-test: fix space.newunicode Message-ID: <5a2b4e9c.919bdf0a.da6fd.80c3@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-test Changeset: r93323:9fe5f582087d Date: 2017-12-08 13:37 +0000 http://bitbucket.org/pypy/pypy/changeset/9fe5f582087d/ Log: fix space.newunicode diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -375,8 +375,8 @@ # XXX: kill me! assert isinstance(unistr, unicode) utf8s = unistr.encode("utf-8") - length, flag = rutf8.check_utf8(utf8s, True) - return self.newutf8(utf8s, length, flag) + length = rutf8.check_utf8(utf8s, True) + return self.newutf8(utf8s, length) def type(self, w_obj): jit.promote(w_obj.__class__) From pypy.commits at gmail.com Fri Dec 8 21:46:57 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 08 Dec 2017 18:46:57 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-test: hg merge unicode-utf8 Message-ID: <5a2b4ea1.43b6df0a.b6245.6b75@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-test Changeset: r93324:e6db8eec731a Date: 2017-12-09 02:46 +0000 http://bitbucket.org/pypy/pypy/changeset/e6db8eec731a/ Log: hg merge unicode-utf8 diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,3 +1,4 @@ +import pytest from hypothesis import given, strategies from rpython.rlib import rutf8 @@ -5,6 +6,7 @@ from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii from pypy.interpreter import unicodehelper as uh +from pypy.module._codecs.interp_codecs import CodecState def decode_utf8(u): return str_decode_utf8(u, True, "strict", None) @@ -68,3 +70,16 @@ def test_unicode_escape(u): r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) assert r == u.encode("unicode-escape") + +def test_encode_decimal(space): + assert uh.unicode_encode_decimal(u' 12, 34 ', None) == ' 12, 34 ' + with pytest.raises(ValueError): + uh.unicode_encode_decimal(u' 12, \u1234 '.encode('utf8'), None) + state = space.fromcache(CodecState) + handler = state.encode_error_handler + assert uh.unicode_encode_decimal( + u'u\u1234\u1235v'.encode('utf8'), 'replace', handler) == 'u??v' + + result = uh.unicode_encode_decimal( + u'12\u1234'.encode('utf8'), 'xmlcharrefreplace', handler) + assert result == '12ሴ' diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -7,6 +7,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rtyper.lltypesystem import rffi from pypy.module._codecs import interp_codecs +from pypy.module.unicodedata import unicodedb @specialize.memo() def decode_error_handler(space): @@ -35,6 +36,16 @@ space.newtext(msg)])) return raise_unicode_exception_encode +def default_error_encode( + errors, encoding, msg, u, startingpos, endingpos): + """A default handler, for tests""" + assert endingpos >= 0 + if errors == 'replace': + return '?', endingpos + if errors == 'ignore': + return '', endingpos + raise ValueError + def convert_arg_to_w_unicode(space, w_arg, strict=None): return space.convert_arg_to_w_unicode(w_arg) @@ -1458,3 +1469,70 @@ pos = rutf8.next_codepoint_pos(s, pos) return result.build() +# ____________________________________________________________ +# Decimal Encoder +def unicode_encode_decimal(s, errors, errorhandler=None): + """Converts whitespace to ' ', decimal characters to their + corresponding ASCII digit and all other Latin-1 characters except + \0 as-is. Characters outside this range (Unicode ordinals 1-256) + are treated as errors. This includes embedded NULL bytes. + """ + if errorhandler is None: + errorhandler = default_error_encode + result = StringBuilder(len(s)) + pos = 0 + i = 0 + it = rutf8.Utf8StringIterator(s) + for ch in it: + if unicodedb.isspace(ch): + result.append(' ') + i += 1 + continue + try: + decimal = unicodedb.decimal(ch) + except KeyError: + pass + else: + result.append(chr(48 + decimal)) + i += 1 + continue + if 0 < ch < 256: + result.append(chr(ch)) + i += 1 + continue + # All other characters are considered unencodable + start_index = i + i += 1 + while not it.done(): + ch = rutf8.codepoint_at_pos(s, it.get_pos()) + try: + if (0 < ch < 256 or unicodedb.isspace(ch) or + unicodedb.decimal(ch) >= 0): + break + except KeyError: + # not a decimal + pass + if it.done(): + break + ch = next(it) + i += 1 + end_index = i + msg = "invalid decimal Unicode string" + r, pos = errorhandler( + errors, 'decimal', msg, s, start_index, end_index) + for ch in rutf8.Utf8StringIterator(r): + if unicodedb.isspace(ch): + result.append(' ') + continue + try: + decimal = unicodedb.decimal(ch) + except KeyError: + pass + else: + result.append(chr(48 + decimal)) + continue + if 0 < ch < 256: + result.append(chr(ch)) + continue + errorhandler('strict', 'decimal', msg, s, start_index, end_index) + return result.build() diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -3,6 +3,7 @@ from rpython.rlib.objectmodel import specialize, always_inline, r_dict from rpython.rlib import rfloat, runicode, rutf8 from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib.rarithmetic import r_uint from pypy.interpreter.error import oefmt from pypy.interpreter import unicodehelper @@ -366,7 +367,7 @@ return # help the annotator to know that we'll never go beyond # this point # - utf8_ch = rutf8.unichr_as_utf8(val, allow_surrogates=True) + utf8_ch = rutf8.unichr_as_utf8(r_uint(val), allow_surrogates=True) builder.append(utf8_ch) return i @@ -400,7 +401,7 @@ break elif ch == '\\' or ch < '\x20': self.pos = i-1 - return self.space.unicode_w(self.decode_string_escaped(start)) + return self.decode_string_escaped(start) strhash = intmask((1000003 * strhash) ^ ord(ll_chars[i])) bits |= ord(ch) length = i - start - 1 diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -128,7 +128,7 @@ intval: lltype.Signed """ self.error(w_ffitype, w_obj) - + def handle_unichar(self, w_ffitype, w_obj, intval): """ intval: lltype.Signed @@ -174,7 +174,7 @@ def handle_struct_rawffi(self, w_ffitype, w_structinstance): """ This method should be killed as soon as we remove support for _rawffi structures - + w_structinstance: W_StructureInstance """ self.error(w_ffitype, w_structinstance) @@ -228,7 +228,7 @@ return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): wcharval = self.get_unichar(w_ffitype) - return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1) + return space.newutf8(rutf8.unichr_as_utf8(r_uint(wcharval)), 1) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): @@ -349,7 +349,7 @@ def get_struct_rawffi(self, w_ffitype, w_structdescr): """ This should be killed as soon as we kill support for _rawffi structures - + Return type: lltype.Unsigned (the address of the structure) """ diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -596,9 +596,9 @@ return space.w_None wcharp_addr = rffi.cast(rffi.CWCHARP, address) if maxlength == -1: - s = rffi.wcharp2utf8(wcharp_addr) + s = rffi.wcharp2unicode(wcharp_addr) else: - s = rffi.wcharpsize2utf8(wcharp_addr, maxlength) + s = rffi.wcharp2unicoden(wcharp_addr, maxlength) return space.newunicode(s) @unwrap_spec(address=r_uint, maxlength=int) diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1,7 +1,7 @@ from rpython.rlib import jit, rgc, rutf8 from rpython.rlib.buffer import RawBuffer from rpython.rlib.objectmodel import keepalive_until_here -from rpython.rlib.rarithmetic import ovfcheck, widen +from rpython.rlib.rarithmetic import ovfcheck, widen, r_uint from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem import lltype, rffi @@ -1013,7 +1013,7 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - code = ord(item) + code = r_uint(ord(item)) return space.newutf8(rutf8.unichr_as_utf8(code), 1) assert 0, "unreachable" diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py --- a/pypy/module/cpyext/longobject.py +++ b/pypy/module/cpyext/longobject.py @@ -4,6 +4,7 @@ CONST_STRING, ADDR, CANNOT_FAIL) from pypy.objspace.std.longobject import W_LongObject from pypy.interpreter.error import OperationError +from pypy.interpreter.unicodehelper import wcharpsize2utf8 from pypy.module.cpyext.intobject import PyInt_AsUnsignedLongMask from rpython.rlib.rbigint import rbigint @@ -191,7 +192,7 @@ string, length gives the number of characters, and base is the radix for the conversion. The radix must be in the range [2, 36]; if it is out of range, ValueError will be raised.""" - w_value = space.newunicode(rffi.wcharpsize2unicode(u, length)) + w_value = space.newutf8(wcharpsize2utf8(space, u, length), length) w_base = space.newint(rffi.cast(lltype.Signed, base)) return space.call_function(space.w_long, w_value, w_base) diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -246,7 +246,7 @@ the Python expression unicode(o). Called by the unicode() built-in function.""" if w_obj is None: - return space.newunicode(u"") + return space.newutf8("", 6) return space.call_function(space.w_unicode, w_obj) @cpython_api([PyObject, PyObject], rffi.INT_real, error=-1) @@ -302,7 +302,7 @@ if opid == Py_EQ: return 1 if opid == Py_NE: - return 0 + return 0 w_res = PyObject_RichCompare(space, w_o1, w_o2, opid_int) return int(space.is_true(w_res)) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -3,7 +3,9 @@ from rpython.tool.sourcetools import func_renamer from pypy.interpreter.error import OperationError, oefmt -from pypy.interpreter.unicodehelper import wcharpsize2utf8 +from pypy.interpreter.unicodehelper import ( + wcharpsize2utf8, str_decode_utf_16_helper, str_decode_utf_32_helper, + unicode_encode_decimal) from pypy.module.unicodedata import unicodedb from pypy.module.cpyext.api import ( CANNOT_FAIL, Py_ssize_t, build_type_checkers_flags, cpython_api, @@ -568,15 +570,11 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_16_helper( - string, size, errors, - True, # final ? false for multiple passes? - None, # errorhandler - byteorder) + result, _, length, byteorder = str_decode_utf_16_helper( + string, errors, final=True, errorhandler=None, byteorder=byteorder) if pbyteorder is not None: pbyteorder[0] = rffi.cast(rffi.INT, byteorder) - - return space.newunicode(result) + return space.newutf8(result, length) @cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING, rffi.INTP], PyObject) def PyUnicode_DecodeUTF32(space, s, size, llerrors, pbyteorder): @@ -624,15 +622,11 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( - string, size, errors, - True, # final ? false for multiple passes? - None, # errorhandler - byteorder) + result, _, length, byteorder = str_decode_utf_32_helper( + string, errors, final=True, errorhandler=None, byteorder=byteorder) if pbyteorder is not None: pbyteorder[0] = rffi.cast(rffi.INT, byteorder) - - return space.newunicode(result) + return space.newutf8(result, length) @cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP, CONST_STRING], rffi.INT_real, error=-1) @@ -650,14 +644,13 @@ Returns 0 on success, -1 on failure. """ - u = rffi.wcharpsize2unicode(s, length) + u = rffi.wcharpsize2utf8(s, length) if llerrors: errors = rffi.charp2str(llerrors) else: errors = None state = space.fromcache(CodecState) - result = runicode.unicode_encode_decimal(u, length, errors, - state.encode_error_handler) + result = unicode_encode_decimal(u, errors, state.encode_error_handler) i = len(result) output[i] = '\0' i -= 1 @@ -710,12 +703,17 @@ """Return 1 if substr matches str[start:end] at the given tail end (direction == -1 means to do a prefix match, direction == 1 a suffix match), 0 otherwise. Return -1 if an error occurred.""" + space.utf8_w(w_str) # type check + space.utf8_w(w_substr) w_start = space.newint(start) w_end = space.newint(end) if rffi.cast(lltype.Signed, direction) <= 0: - return space.call_method(w_str, "startswith", w_substr, w_start, w_end) + w_result = space.call_method( + w_str, "startswith", w_substr, w_start, w_end) else: - return space.call_method(w_str, "endswith", w_substr, w_start, w_end) + w_result = space.call_method( + w_str, "endswith", w_substr, w_start, w_end) + return space.int_w(w_result) @cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t], Py_ssize_t, error=-1) def PyUnicode_Count(space, w_str, w_substr, start, end): diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -483,7 +483,7 @@ except rutf8.CheckError: from pypy.interpreter import unicodehelper # get the correct error msg - unicodehelper.str_decode_utf8(s, len(s), 'string', True, + unicodehelper.str_decode_utf8(s, 'string', True, unicodehelper.decode_error_handler(space)) assert False, "always raises" else: diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -3,7 +3,7 @@ from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import specialize -from rpython.rlib.rarithmetic import INT_MAX +from rpython.rlib.rarithmetic import INT_MAX, r_uint from rpython.rlib.rfloat import DTSF_ALT, formatd, isnan, isinf from rpython.rlib.rstring import StringBuilder from rpython.rlib.unroll import unrolling_iterable @@ -330,7 +330,7 @@ space = self.space if do_unicode: cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1) - w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1) + w_s = space.newutf8(rutf8.unichr_as_utf8(r_uint(cp)), 1) else: cp = ord(self.fmt[self.fmtpos - 1]) w_s = space.newbytes(chr(cp)) @@ -466,7 +466,7 @@ n = space.int_w(w_value) if do_unicode: try: - c = rutf8.unichr_as_utf8(n) + c = rutf8.unichr_as_utf8(r_uint(n)) except ValueError: raise oefmt(space.w_OverflowError, "unicode character code out of range") From pypy.commits at gmail.com Sat Dec 9 07:46:38 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 09 Dec 2017 04:46:38 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: disallow invalid unicode from array Message-ID: <5a2bdb2e.1098df0a.542b3.4b7e@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93325:fe927a5758d2 Date: 2017-12-09 14:45 +0200 http://bitbucket.org/pypy/pypy/changeset/fe927a5758d2/ Log: disallow invalid unicode from array diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -380,6 +380,7 @@ if len(s) % self.itemsize != 0: raise oefmt(self.space.w_ValueError, "string length not a multiple of item size") + self.check_valid_unicode(space, s) # empty for non-u arrays oldlen = self.len new = len(s) / self.itemsize if not new: @@ -710,6 +711,9 @@ s = "array('%s', %s)" % (self.typecode, space.text_w(r)) return space.newtext(s) + def check_valid_unicode(self, space, s): + pass # overwritten by u + W_ArrayBase.typedef = TypeDef( 'array.array', __new__ = interp2app(w_array), @@ -870,6 +874,18 @@ def get_buffer(self): return rffi.cast(mytype.arrayptrtype, self._buffer) + if mytype.unwrap == 'utf8_len_w': + def check_valid_unicode(self, space, s): + i = 0 + while i < len(s): + if s[i] != '\x00' or ord(s[i + 1]) > 0x10: + v = ((ord(s[i]) << 24) + (ord(s[i + 1]) << 16) + + (ord(s[i + 2]) << 8) + ord(s[i + 3])) + raise oefmt(space.w_ValueError, + "Character U+%s is not in range [U+0000, U+10ffff]", + hex(v)[2:]) + i += 4 + def item_w(self, w_item): space = self.space unwrap = getattr(space, mytype.unwrap) diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py --- a/pypy/module/array/test/test_array.py +++ b/pypy/module/array/test/test_array.py @@ -844,13 +844,7 @@ import sys if sys.maxunicode == 0xffff: skip("test for 32-bit unicodes") - a = self.array('u', b'\xff\xff\xff\xff') - assert len(a) == 1 - assert repr(a[0]) == "u'\Uffffffff'" - if sys.maxint == 2147483647: - assert ord(a[0]) == -1 - else: - assert ord(a[0]) == 4294967295 + raises(ValueError, self.array, 'u', b'\xff\xff\xff\xff') def test_weakref(self): import weakref From pypy.commits at gmail.com Sat Dec 9 08:11:35 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 09 Dec 2017 05:11:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix unicodedata module Message-ID: <5a2be107.7996df0a.384d8.c7bc@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93326:b4456e64ff3b Date: 2017-12-09 15:10 +0200 http://bitbucket.org/pypy/pypy/changeset/b4456e64ff3b/ Log: fix unicodedata module diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -7,11 +7,8 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import TypeDef, interp_attrproperty from rpython.rlib.rarithmetic import r_longlong -from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib.runicode import MAXUNICODE from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_3_2_0 -from rpython.rlib.runicode import code_to_unichr, ord_accepts_surrogate -import sys +from rpython.rlib.rutf8 import Utf8StringBuilder, unichr_as_utf8 # Contants for Hangul characters @@ -30,49 +27,17 @@ # unicode code point. -if MAXUNICODE > 0xFFFF: - # Target is wide build - def unichr_to_code_w(space, w_unichr): - if not space.isinstance_w(w_unichr, space.w_unicode): - raise oefmt( - space.w_TypeError, 'argument 1 must be unicode, not %T', - w_unichr) +# Target is wide build +def unichr_to_code_w(space, w_unichr): + if not space.isinstance_w(w_unichr, space.w_unicode): + raise oefmt( + space.w_TypeError, 'argument 1 must be unicode, not %T', + w_unichr) - if not we_are_translated() and sys.maxunicode == 0xFFFF: - # Host CPython is narrow build, accept surrogates - try: - return ord_accepts_surrogate(space.unicode_w(w_unichr)) - except TypeError: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - else: - if not space.len_w(w_unichr) == 1: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - return space.int_w(space.ord(w_unichr)) - -else: - # Target is narrow build - def unichr_to_code_w(space, w_unichr): - if not space.isinstance_w(w_unichr, space.w_unicode): - raise oefmt( - space.w_TypeError, 'argument 1 must be unicode, not %T', - w_unichr) - - if not we_are_translated() and sys.maxunicode > 0xFFFF: - # Host CPython is wide build, forbid surrogates - if not space.len_w(w_unichr) == 1: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - return space.int_w(space.ord(w_unichr)) - - else: - # Accept surrogates - try: - return ord_accepts_surrogate(space.unicode_w(w_unichr)) - except TypeError: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") + if not space.len_w(w_unichr) == 1: + raise oefmt(space.w_TypeError, + "need a single Unicode character as parameter") + return space.int_w(space.ord(w_unichr)) class UCD(W_Root): @@ -110,7 +75,7 @@ except KeyError: msg = space.mod(space.newtext("undefined character name '%s'"), space.newtext(name)) raise OperationError(space.w_KeyError, msg) - return space.newunicode(code_to_unichr(code)) + return space.newutf8(unichr_as_utf8(code), 1) def name(self, space, w_unichr, w_default=None): code = unichr_to_code_w(space, w_unichr) @@ -259,10 +224,10 @@ result[0] = ch if not composed: # If decomposed normalization we are done - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) if j <= 1: - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) current = result[0] starter_pos = 0 @@ -310,7 +275,13 @@ result[starter_pos] = current - return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]])) + return self.build(space, result, stop=next_insert) + + def build(self, space, r, stop): + builder = Utf8StringBuilder(stop * 3) + for i in range(stop): + builder.append_code(r[i]) + return space.newutf8(builder.build(), stop) methods = {} diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -1,3 +1,4 @@ + import pytest try: from hypothesis import given, strategies as st, example, settings @@ -5,12 +6,14 @@ pytest.skip("hypothesis required") from pypy.module.unicodedata.interp_ucd import ucd +from rpython.rlib.rutf8 import get_utf8_length def make_normalization(space, NF_code): def normalize(s): - w_s = space.newunicode(s) + u = s.encode('utf8') + w_s = space.newutf8(u, get_utf8_length(u)) w_res = ucd.normalize(space, NF_code, w_s) - return space.unicode_w(w_res) + return space.utf8_w(w_res).decode('utf8') return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] From pypy.commits at gmail.com Sat Dec 9 08:42:41 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 09 Dec 2017 05:42:41 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix pyexpat Message-ID: <5a2be851.8bdf1c0a.6767.6d15@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93327:7cd0df437105 Date: 2017-12-09 15:42 +0200 http://bitbucket.org/pypy/pypy/changeset/7cd0df437105/ Log: fix pyexpat diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -587,21 +587,22 @@ def UnknownEncodingHandler(self, space, name, info): # Yes, supports only 8bit encodings - translationmap = space.unicode_w( + translationmap, lgt = space.utf8_len_w( space.call_method( space.newbytes(self.all_chars), "decode", space.newtext(name), space.newtext("replace"))) - if len(translationmap) != 256: + if lgt != 256: raise oefmt(space.w_ValueError, "multi-byte encodings are not supported") - for i in range(256): - c = translationmap[i] - if c == u'\ufffd': + i = 0 + for c in rutf8.Utf8StringIterator(translationmap): + if c == 0xfffd: info.c_map[i] = rffi.cast(rffi.INT, -1) else: info.c_map[i] = rffi.cast(rffi.INT, c) + i += 1 info.c_data = lltype.nullptr(rffi.VOIDP.TO) info.c_convert = lltype.nullptr(rffi.VOIDP.TO) info.c_release = lltype.nullptr(rffi.VOIDP.TO) From pypy.commits at gmail.com Sat Dec 9 08:45:13 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 05:45:13 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix call_errorhandler() Message-ID: <5a2be8e9.43b6df0a.b6245.c361@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93328:08976d2691b2 Date: 2017-12-09 13:44 +0000 http://bitbucket.org/pypy/pypy/changeset/08976d2691b2/ Log: fix call_errorhandler() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -70,9 +70,6 @@ raise oefmt(space.w_IndexError, "position %d from error handler out of bounds", newpos) - if newpos < startpos: - raise oefmt(space.w_IndexError, - "position %d from error handler did not progress", newpos) w_replace = space.convert_to_w_unicode(w_replace) return w_replace._utf8, newpos return call_errorhandler @@ -226,7 +223,7 @@ w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) start = w_obj._index_to_byte(start) - end = w_obj._index_to_byte(end) + end = w_obj._index_to_byte(end) builder = StringBuilder() pos = start obj = w_obj._utf8 From pypy.commits at gmail.com Sat Dec 9 09:01:45 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 09 Dec 2017 06:01:45 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix struct module Message-ID: <5a2becc9.968ddf0a.c12c8.e9ef@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93329:0d84c39f767e Date: 2017-12-09 16:01 +0200 http://bitbucket.org/pypy/pypy/changeset/0d84c39f767e/ Log: fix struct module diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py --- a/pypy/module/struct/formatiterator.py +++ b/pypy/module/struct/formatiterator.py @@ -1,6 +1,6 @@ from rpython.rlib.rarithmetic import (r_uint, r_ulonglong, r_longlong, maxint, intmask) -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import specialize from rpython.rlib.rstruct.error import StructError from rpython.rlib.rstruct.formatiterator import FormatIterator @@ -107,7 +107,7 @@ def accept_unicode_arg(self): w_obj = self.accept_obj_arg() - return self.space.unicode_w(w_obj) + return self.space.utf8_len_w(w_obj) def accept_float_arg(self): w_obj = self.accept_obj_arg() @@ -191,6 +191,9 @@ assert 0, "unreachable" self.result_w.append(w_value) + def append_utf8(self, value): + self.result_w.append(self.space.newutf8(rutf8.unichr_as_utf8(value), 1)) + def get_pos(self): return self.pos diff --git a/rpython/rlib/rstruct/nativefmttable.py b/rpython/rlib/rstruct/nativefmttable.py --- a/rpython/rlib/rstruct/nativefmttable.py +++ b/rpython/rlib/rstruct/nativefmttable.py @@ -4,7 +4,7 @@ """ import struct -from rpython.rlib import jit, longlong2float +from rpython.rlib import rutf8, longlong2float from rpython.rlib.objectmodel import specialize from rpython.rlib.rarithmetic import r_singlefloat, widen, intmask from rpython.rlib.rstruct import standardfmttable as std @@ -139,17 +139,17 @@ from rpython.rlib.rstruct import unichar def pack_unichar(fmtiter): - unistr = fmtiter.accept_unicode_arg() - if len(unistr) != 1: + utf8, lgt = fmtiter.accept_unicode_arg() + if lgt != 1: raise StructError("expected a unicode string of length 1") - c = unistr[0] # string->char conversion for the annotator - unichar.pack_unichar(c, fmtiter.wbuf, fmtiter.pos) + uchr = rutf8.codepoint_at_pos(utf8, 0) + unichar.pack_codepoint(uchr, fmtiter.wbuf, fmtiter.pos) fmtiter.advance(unichar.UNICODE_SIZE) @specialize.argtype(0) def unpack_unichar(fmtiter): data = fmtiter.read(unichar.UNICODE_SIZE) - fmtiter.appendobj(unichar.unpack_unichar(data)) + fmtiter.append_utf8(unichar.unpack_codepoint(data)) native_fmttable['u'] = {'size': unichar.UNICODE_SIZE, 'alignment': unichar.UNICODE_SIZE, diff --git a/rpython/rlib/rstruct/unichar.py b/rpython/rlib/rstruct/unichar.py --- a/rpython/rlib/rstruct/unichar.py +++ b/rpython/rlib/rstruct/unichar.py @@ -3,12 +3,8 @@ """ import sys -from rpython.rlib.runicode import MAXUNICODE -if MAXUNICODE <= 65535: - UNICODE_SIZE = 2 -else: - UNICODE_SIZE = 4 +UNICODE_SIZE = 4 BIGENDIAN = sys.byteorder == "big" def pack_unichar(unich, buf, pos): @@ -34,7 +30,7 @@ buf.setitem(pos+2, chr((unich >> 16) & 0xFF)) buf.setitem(pos+3, chr(unich >> 24)) -def unpack_unichar(rawstring): +def unpack_codepoint(rawstring): assert len(rawstring) == UNICODE_SIZE if UNICODE_SIZE == 2: if BIGENDIAN: @@ -54,4 +50,7 @@ ord(rawstring[1]) << 8 | ord(rawstring[2]) << 16 | ord(rawstring[3]) << 24) - return unichr(n) + return n + +def unpack_unichar(rawstring): + return unichr(unpack_codepoint(rawstring)) From pypy.commits at gmail.com Sat Dec 9 09:05:21 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 06:05:21 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-test: hg merge unicode-utf8 Message-ID: <5a2beda1.55a81c0a.b9422.7e2b@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-test Changeset: r93330:a31f4ea5722a Date: 2017-12-09 14:04 +0000 http://bitbucket.org/pypy/pypy/changeset/a31f4ea5722a/ Log: hg merge unicode-utf8 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -70,9 +70,6 @@ raise oefmt(space.w_IndexError, "position %d from error handler out of bounds", newpos) - if newpos < startpos: - raise oefmt(space.w_IndexError, - "position %d from error handler did not progress", newpos) w_replace = space.convert_to_w_unicode(w_replace) return w_replace._utf8, newpos return call_errorhandler @@ -226,7 +223,7 @@ w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) start = w_obj._index_to_byte(start) - end = w_obj._index_to_byte(end) + end = w_obj._index_to_byte(end) builder = StringBuilder() pos = start obj = w_obj._utf8 diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -380,6 +380,7 @@ if len(s) % self.itemsize != 0: raise oefmt(self.space.w_ValueError, "string length not a multiple of item size") + self.check_valid_unicode(space, s) # empty for non-u arrays oldlen = self.len new = len(s) / self.itemsize if not new: @@ -710,6 +711,9 @@ s = "array('%s', %s)" % (self.typecode, space.text_w(r)) return space.newtext(s) + def check_valid_unicode(self, space, s): + pass # overwritten by u + W_ArrayBase.typedef = TypeDef( 'array.array', __new__ = interp2app(w_array), @@ -870,6 +874,18 @@ def get_buffer(self): return rffi.cast(mytype.arrayptrtype, self._buffer) + if mytype.unwrap == 'utf8_len_w': + def check_valid_unicode(self, space, s): + i = 0 + while i < len(s): + if s[i] != '\x00' or ord(s[i + 1]) > 0x10: + v = ((ord(s[i]) << 24) + (ord(s[i + 1]) << 16) + + (ord(s[i + 2]) << 8) + ord(s[i + 3])) + raise oefmt(space.w_ValueError, + "Character U+%s is not in range [U+0000, U+10ffff]", + hex(v)[2:]) + i += 4 + def item_w(self, w_item): space = self.space unwrap = getattr(space, mytype.unwrap) diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py --- a/pypy/module/array/test/test_array.py +++ b/pypy/module/array/test/test_array.py @@ -844,13 +844,7 @@ import sys if sys.maxunicode == 0xffff: skip("test for 32-bit unicodes") - a = self.array('u', b'\xff\xff\xff\xff') - assert len(a) == 1 - assert repr(a[0]) == "u'\Uffffffff'" - if sys.maxint == 2147483647: - assert ord(a[0]) == -1 - else: - assert ord(a[0]) == 4294967295 + raises(ValueError, self.array, 'u', b'\xff\xff\xff\xff') def test_weakref(self): import weakref diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -587,21 +587,22 @@ def UnknownEncodingHandler(self, space, name, info): # Yes, supports only 8bit encodings - translationmap = space.unicode_w( + translationmap, lgt = space.utf8_len_w( space.call_method( space.newbytes(self.all_chars), "decode", space.newtext(name), space.newtext("replace"))) - if len(translationmap) != 256: + if lgt != 256: raise oefmt(space.w_ValueError, "multi-byte encodings are not supported") - for i in range(256): - c = translationmap[i] - if c == u'\ufffd': + i = 0 + for c in rutf8.Utf8StringIterator(translationmap): + if c == 0xfffd: info.c_map[i] = rffi.cast(rffi.INT, -1) else: info.c_map[i] = rffi.cast(rffi.INT, c) + i += 1 info.c_data = lltype.nullptr(rffi.VOIDP.TO) info.c_convert = lltype.nullptr(rffi.VOIDP.TO) info.c_release = lltype.nullptr(rffi.VOIDP.TO) diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -7,11 +7,8 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import TypeDef, interp_attrproperty from rpython.rlib.rarithmetic import r_longlong -from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib.runicode import MAXUNICODE from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_3_2_0 -from rpython.rlib.runicode import code_to_unichr, ord_accepts_surrogate -import sys +from rpython.rlib.rutf8 import Utf8StringBuilder, unichr_as_utf8 # Contants for Hangul characters @@ -30,49 +27,17 @@ # unicode code point. -if MAXUNICODE > 0xFFFF: - # Target is wide build - def unichr_to_code_w(space, w_unichr): - if not space.isinstance_w(w_unichr, space.w_unicode): - raise oefmt( - space.w_TypeError, 'argument 1 must be unicode, not %T', - w_unichr) +# Target is wide build +def unichr_to_code_w(space, w_unichr): + if not space.isinstance_w(w_unichr, space.w_unicode): + raise oefmt( + space.w_TypeError, 'argument 1 must be unicode, not %T', + w_unichr) - if not we_are_translated() and sys.maxunicode == 0xFFFF: - # Host CPython is narrow build, accept surrogates - try: - return ord_accepts_surrogate(space.unicode_w(w_unichr)) - except TypeError: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - else: - if not space.len_w(w_unichr) == 1: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - return space.int_w(space.ord(w_unichr)) - -else: - # Target is narrow build - def unichr_to_code_w(space, w_unichr): - if not space.isinstance_w(w_unichr, space.w_unicode): - raise oefmt( - space.w_TypeError, 'argument 1 must be unicode, not %T', - w_unichr) - - if not we_are_translated() and sys.maxunicode > 0xFFFF: - # Host CPython is wide build, forbid surrogates - if not space.len_w(w_unichr) == 1: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - return space.int_w(space.ord(w_unichr)) - - else: - # Accept surrogates - try: - return ord_accepts_surrogate(space.unicode_w(w_unichr)) - except TypeError: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") + if not space.len_w(w_unichr) == 1: + raise oefmt(space.w_TypeError, + "need a single Unicode character as parameter") + return space.int_w(space.ord(w_unichr)) class UCD(W_Root): @@ -110,7 +75,7 @@ except KeyError: msg = space.mod(space.newtext("undefined character name '%s'"), space.newtext(name)) raise OperationError(space.w_KeyError, msg) - return space.newunicode(code_to_unichr(code)) + return space.newutf8(unichr_as_utf8(code), 1) def name(self, space, w_unichr, w_default=None): code = unichr_to_code_w(space, w_unichr) @@ -259,10 +224,10 @@ result[0] = ch if not composed: # If decomposed normalization we are done - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) if j <= 1: - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) current = result[0] starter_pos = 0 @@ -310,7 +275,13 @@ result[starter_pos] = current - return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]])) + return self.build(space, result, stop=next_insert) + + def build(self, space, r, stop): + builder = Utf8StringBuilder(stop * 3) + for i in range(stop): + builder.append_code(r[i]) + return space.newutf8(builder.build(), stop) methods = {} diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -1,3 +1,4 @@ + import pytest try: from hypothesis import given, strategies as st, example, settings @@ -5,12 +6,14 @@ pytest.skip("hypothesis required") from pypy.module.unicodedata.interp_ucd import ucd +from rpython.rlib.rutf8 import get_utf8_length def make_normalization(space, NF_code): def normalize(s): - w_s = space.newunicode(s) + u = s.encode('utf8') + w_s = space.newutf8(u, get_utf8_length(u)) w_res = ucd.normalize(space, NF_code, w_s) - return space.unicode_w(w_res) + return space.utf8_w(w_res).decode('utf8') return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] From pypy.commits at gmail.com Sat Dec 9 09:10:16 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 09 Dec 2017 06:10:16 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: random assert Message-ID: <5a2beec8.90a9df0a.50226.d96a@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93331:86769d294fd1 Date: 2017-12-09 16:09 +0200 http://bitbucket.org/pypy/pypy/changeset/86769d294fd1/ Log: random assert diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -75,6 +75,7 @@ except KeyError: msg = space.mod(space.newtext("undefined character name '%s'"), space.newtext(name)) raise OperationError(space.w_KeyError, msg) + assert code >= 0 return space.newutf8(unichr_as_utf8(code), 1) def name(self, space, w_unichr, w_default=None): From pypy.commits at gmail.com Sat Dec 9 09:35:55 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 06:35:55 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix translation Message-ID: <5a2bf4cb.4fabdf0a.716d7.0046@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93332:5e36b35d5716 Date: 2017-12-09 14:35 +0000 http://bitbucket.org/pypy/pypy/changeset/5e36b35d5716/ Log: fix translation diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py --- a/pypy/module/struct/formatiterator.py +++ b/pypy/module/struct/formatiterator.py @@ -192,7 +192,8 @@ self.result_w.append(w_value) def append_utf8(self, value): - self.result_w.append(self.space.newutf8(rutf8.unichr_as_utf8(value), 1)) + w_ch = self.space.newutf8(rutf8.unichr_as_utf8(r_uint(value), 1)) + self.result_w.append(w_ch) def get_pos(self): return self.pos From pypy.commits at gmail.com Sat Dec 9 09:42:36 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 06:42:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: oops Message-ID: <5a2bf65c.0ab8df0a.31c81.dd4c@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93333:5ede24e505ee Date: 2017-12-09 14:42 +0000 http://bitbucket.org/pypy/pypy/changeset/5ede24e505ee/ Log: oops diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py --- a/pypy/module/struct/formatiterator.py +++ b/pypy/module/struct/formatiterator.py @@ -192,7 +192,7 @@ self.result_w.append(w_value) def append_utf8(self, value): - w_ch = self.space.newutf8(rutf8.unichr_as_utf8(r_uint(value), 1)) + w_ch = self.space.newutf8(rutf8.unichr_as_utf8(r_uint(value)), 1) self.result_w.append(w_ch) def get_pos(self): From pypy.commits at gmail.com Sat Dec 9 09:51:40 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 06:51:40 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-test: hg merge unicode-utf8 Message-ID: <5a2bf87c.2281df0a.b5da0.0697@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-test Changeset: r93334:1bb5950b8ff5 Date: 2017-12-09 14:51 +0000 http://bitbucket.org/pypy/pypy/changeset/1bb5950b8ff5/ Log: hg merge unicode-utf8 diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py --- a/pypy/module/struct/formatiterator.py +++ b/pypy/module/struct/formatiterator.py @@ -1,6 +1,6 @@ from rpython.rlib.rarithmetic import (r_uint, r_ulonglong, r_longlong, maxint, intmask) -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import specialize from rpython.rlib.rstruct.error import StructError from rpython.rlib.rstruct.formatiterator import FormatIterator @@ -107,7 +107,7 @@ def accept_unicode_arg(self): w_obj = self.accept_obj_arg() - return self.space.unicode_w(w_obj) + return self.space.utf8_len_w(w_obj) def accept_float_arg(self): w_obj = self.accept_obj_arg() @@ -191,6 +191,10 @@ assert 0, "unreachable" self.result_w.append(w_value) + def append_utf8(self, value): + w_ch = self.space.newutf8(rutf8.unichr_as_utf8(r_uint(value)), 1) + self.result_w.append(w_ch) + def get_pos(self): return self.pos diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -75,6 +75,7 @@ except KeyError: msg = space.mod(space.newtext("undefined character name '%s'"), space.newtext(name)) raise OperationError(space.w_KeyError, msg) + assert code >= 0 return space.newutf8(unichr_as_utf8(code), 1) def name(self, space, w_unichr, w_default=None): diff --git a/rpython/rlib/rstruct/nativefmttable.py b/rpython/rlib/rstruct/nativefmttable.py --- a/rpython/rlib/rstruct/nativefmttable.py +++ b/rpython/rlib/rstruct/nativefmttable.py @@ -4,7 +4,7 @@ """ import struct -from rpython.rlib import jit, longlong2float +from rpython.rlib import rutf8, longlong2float from rpython.rlib.objectmodel import specialize from rpython.rlib.rarithmetic import r_singlefloat, widen, intmask from rpython.rlib.rstruct import standardfmttable as std @@ -139,17 +139,17 @@ from rpython.rlib.rstruct import unichar def pack_unichar(fmtiter): - unistr = fmtiter.accept_unicode_arg() - if len(unistr) != 1: + utf8, lgt = fmtiter.accept_unicode_arg() + if lgt != 1: raise StructError("expected a unicode string of length 1") - c = unistr[0] # string->char conversion for the annotator - unichar.pack_unichar(c, fmtiter.wbuf, fmtiter.pos) + uchr = rutf8.codepoint_at_pos(utf8, 0) + unichar.pack_codepoint(uchr, fmtiter.wbuf, fmtiter.pos) fmtiter.advance(unichar.UNICODE_SIZE) @specialize.argtype(0) def unpack_unichar(fmtiter): data = fmtiter.read(unichar.UNICODE_SIZE) - fmtiter.appendobj(unichar.unpack_unichar(data)) + fmtiter.append_utf8(unichar.unpack_codepoint(data)) native_fmttable['u'] = {'size': unichar.UNICODE_SIZE, 'alignment': unichar.UNICODE_SIZE, diff --git a/rpython/rlib/rstruct/unichar.py b/rpython/rlib/rstruct/unichar.py --- a/rpython/rlib/rstruct/unichar.py +++ b/rpython/rlib/rstruct/unichar.py @@ -3,12 +3,8 @@ """ import sys -from rpython.rlib.runicode import MAXUNICODE -if MAXUNICODE <= 65535: - UNICODE_SIZE = 2 -else: - UNICODE_SIZE = 4 +UNICODE_SIZE = 4 BIGENDIAN = sys.byteorder == "big" def pack_unichar(unich, buf, pos): @@ -34,7 +30,7 @@ buf.setitem(pos+2, chr((unich >> 16) & 0xFF)) buf.setitem(pos+3, chr(unich >> 24)) -def unpack_unichar(rawstring): +def unpack_codepoint(rawstring): assert len(rawstring) == UNICODE_SIZE if UNICODE_SIZE == 2: if BIGENDIAN: @@ -54,4 +50,7 @@ ord(rawstring[1]) << 8 | ord(rawstring[2]) << 16 | ord(rawstring[3]) << 24) - return unichr(n) + return n + +def unpack_unichar(rawstring): + return unichr(unpack_codepoint(rawstring)) From pypy.commits at gmail.com Sat Dec 9 13:38:37 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 10:38:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: in-progress Message-ID: <5a2c2dad.94571c0a.500a7.b745@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93335:2114fde9ada8 Date: 2017-12-09 19:38 +0100 http://bitbucket.org/pypy/pypy/changeset/2114fde9ada8/ Log: in-progress diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -34,11 +34,14 @@ def slice_w(space, ctx, start, end, w_default): - if 0 <= start <= end: + # 'start' and 'end' are byte positions + if ctx.ZERO <= start <= end: if isinstance(ctx, rsre_core.BufMatchContext): return space.newbytes(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): + start = ctx._real_pos(start) + end = ctx._real_pos(end) return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_utf8.Utf8MatchContext): XXXXXXX @@ -60,6 +63,7 @@ return None result = [-1] * (2 * num_groups) mark = ctx.match_marks + XXX while mark is not None: index = jit.promote(mark.gid) if result[index] == -1: @@ -70,6 +74,7 @@ @jit.look_inside_iff(lambda space, ctx, fmarks, num_groups, w_default: jit.isconstant(num_groups)) def allgroups_w(space, ctx, fmarks, num_groups, w_default): + XXX grps = [slice_w(space, ctx, fmarks[i * 2], fmarks[i * 2 + 1], w_default) for i in range(num_groups)] return space.newtuple(grps) @@ -138,8 +143,7 @@ pos = len(str) if endpos > len(str): endpos = len(str) - return rsre_core.StrMatchContext(self.code, str, - pos, endpos, self.flags) + return self._make_str_match_context(str, pos, endpos) else: buf = space.readbuf_w(w_string) size = buf.getlength() @@ -151,6 +155,11 @@ return rsre_core.BufMatchContext(self.code, buf, pos, endpos, self.flags) + def _make_str_match_context(self, str, pos, endpos): + # for tests to override + return rsre_core.StrMatchContext(self.code, str, + pos, endpos, self.flags) + def getmatch(self, ctx, found): if found: return W_SRE_Match(self, ctx) @@ -191,6 +200,7 @@ matchlist_w.append(w_item) no_progress = (ctx.match_start == ctx.match_end) ctx.reset(ctx.match_end + no_progress) + XXX # ^^^ return space.newlist(matchlist_w) @unwrap_spec(pos=int, endpos=int) @@ -215,6 +225,7 @@ if ctx.match_start == ctx.end: # or end of string break ctx.reset(ctx.match_end + 1) + XXX # ^^^ continue splitlist.append(slice_w(space, ctx, last, ctx.match_start, space.w_None)) @@ -283,7 +294,7 @@ sublist_w = strbuilder = unicodebuilder = None if use_builder: if filter_as_unicode is not None: - unicodebuilder = Utf8StringBuilder(ctx.end) + unicodebuilder = XXX #Utf8StringBuilder(ctx.end) else: assert filter_as_string is not None strbuilder = StringBuilder(ctx.end) @@ -499,18 +510,30 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[0]) + start, end = self.do_span(w_groupnum) + start = self.bytepos_to_charindex(start) + return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[1]) + start, end = self.do_span(w_groupnum) + end = self.bytepos_to_charindex(end) + return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + start = self.bytepos_to_charindex(start) + end = self.bytepos_to_charindex(end) return self.space.newtuple([self.space.newint(start), self.space.newint(end)]) + def bytepos_to_charindex(self, bytepos): + # Transform a 'byte position', as returned by all methods from + # rsre_core, back into a 'character index'. This is for UTF8 + # handling. + XXXX + def flatten_marks(self): if self.flatten_cache is None: num_groups = self.srepat.num_groups diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -4,6 +4,8 @@ import py from py.test import raises, skip from pypy.interpreter.gateway import app2interp_temp +from pypy.module._sre import interp_sre +from rpython.rlib.rsre.test import support def init_app_test(cls, space): @@ -20,6 +22,33 @@ sys.path.pop(0) """) +def _test_sre_ctx_(self, str, start, end): + # Use the MatchContextForTests class, which handles Position + # instances instead of plain integers. This is used to detect when + # we're accepting or escaping a Position to app-level, which we + # should not: Positions are meant to be byte indexes inside a + # possibly UTF8 string, not character indexes. + start = support.Position(start) + end = support.Position(end) + return support.MatchContextForTests(self.code, str, start, end, self.flags) + +def _bytepos_to_charindex(self, bytepos): + return self.ctx._real_pos(bytepos) + +def setup_module(mod): + mod._org_maker = ( + interp_sre.W_SRE_Pattern._make_str_match_context, + interp_sre.W_SRE_Match.bytepos_to_charindex, + ) + interp_sre.W_SRE_Pattern._make_str_match_context = _test_sre_ctx_ + interp_sre.W_SRE_Match.bytepos_to_charindex = _bytepos_to_charindex + +def teardown_module(mod): + ( + interp_sre.W_SRE_Pattern._make_str_match_context, + interp_sre.W_SRE_Match.bytepos_to_charindex, + ) = mod._org_maker + class AppTestSrePy: def test_magic(self): diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -296,6 +296,9 @@ def get_single_byte(self, base_position, index): return self.str(base_position + index) + def _real_pos(self, index): + return index # overridden by tests + def fresh_copy(self, start): return StrMatchContext(self.pattern, self._string, start, self.end, self.flags) From pypy.commits at gmail.com Sat Dec 9 14:12:32 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 11:12:32 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Getting there Message-ID: <5a2c35a0.f3c4df0a.dbd90.a85e@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93336:170afb57631b Date: 2017-12-09 20:11 +0100 http://bitbucket.org/pypy/pypy/changeset/170afb57631b/ Log: Getting there diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -44,9 +44,8 @@ end = ctx._real_pos(end) return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_utf8.Utf8MatchContext): - XXXXXXX - s = ctx._unicodestr[start:end] - lgt = rutf8.check_utf8(s, True) + s = ctx._utf8[start:end] + lgt = rutf8.get_utf8_length(s) return space.newutf8(s, lgt) else: # unreachable @@ -59,11 +58,11 @@ # Returns a list of RPython-level integers. # Unlike the app-level groups() method, groups are numbered from 0 # and the returned list does not start with the whole match range. + # The integers are byte positions, not character indexes (for utf8). if num_groups == 0: return None result = [-1] * (2 * num_groups) mark = ctx.match_marks - XXX while mark is not None: index = jit.promote(mark.gid) if result[index] == -1: @@ -74,7 +73,6 @@ @jit.look_inside_iff(lambda space, ctx, fmarks, num_groups, w_default: jit.isconstant(num_groups)) def allgroups_w(space, ctx, fmarks, num_groups, w_default): - XXX grps = [slice_w(space, ctx, fmarks[i * 2], fmarks[i * 2 + 1], w_default) for i in range(num_groups)] return space.newtuple(grps) @@ -117,12 +115,7 @@ if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): - # xxx fish for the _index_storage - w_string = space.convert_arg_to_w_unicode(w_string) - utf8str = w_string._utf8 - length = w_string._len() - index_storage = w_string._get_index_storage() - # + utf8str, length = space.utf8_len_w(w_string) if pos <= 0: bytepos = 0 elif pos >= length: @@ -135,8 +128,7 @@ endbytepos = rutf8.codepoint_at_index(utf8str, index_storage, endpos) return rsre_utf8.Utf8MatchContext( - self.code, unicodestr, index_storage, - bytepos, endbytepos, self.flags) + self.code, utf8str, bytepos, endbytepos, self.flags) elif space.isinstance_w(w_string, space.w_bytes): str = space.bytes_w(w_string) if pos > len(str): @@ -198,9 +190,10 @@ w_item = allgroups_w(space, ctx, fmarks, num_groups, w_emptystr) matchlist_w.append(w_item) - no_progress = (ctx.match_start == ctx.match_end) - ctx.reset(ctx.match_end + no_progress) - XXX # ^^^ + reset_at = ctx.match_end + if ctx.match_start == ctx.match_end: + reset_at = ctx.next(reset_at) + ctx.reset(reset_at) return space.newlist(matchlist_w) @unwrap_spec(pos=int, endpos=int) @@ -216,16 +209,15 @@ space = self.space splitlist = [] n = 0 - last = 0 ctx = self.make_ctx(w_string) + last = ctx.ZERO while not maxsplit or n < maxsplit: if not searchcontext(space, ctx): break if ctx.match_start == ctx.match_end: # zero-width match if ctx.match_start == ctx.end: # or end of string break - ctx.reset(ctx.match_end + 1) - XXX # ^^^ + ctx.reset(ctx.next(ctx.match_end)) continue splitlist.append(slice_w(space, ctx, last, ctx.match_start, space.w_None)) @@ -254,20 +246,20 @@ def subx(self, w_ptemplate, w_string, count): space = self.space - # use a (much faster) string/unicode builder if w_ptemplate and + # use a (much faster) string builder (possibly utf8) if w_ptemplate and # w_string are both string or both unicode objects, and if w_ptemplate # is a literal - use_builder = False - filter_as_unicode = filter_as_string = None + use_builder = '\x00' # or 'S'tring or 'U'nicode/UTF8 + filter_as_string = None if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True else: if space.isinstance_w(w_ptemplate, space.w_unicode): - filter_as_unicode = space.utf8_w(w_ptemplate) - literal = '\\' not in filter_as_unicode - use_builder = ( - space.isinstance_w(w_string, space.w_unicode) and literal) + filter_as_string = space.utf8_w(w_ptemplate) + literal = '\\' not in filter_as_string + if space.isinstance_w(w_string, space.w_unicode) and literal: + use_builder = 'U' else: try: filter_as_string = space.bytes_w(w_ptemplate) @@ -277,8 +269,8 @@ literal = False else: literal = '\\' not in filter_as_string - use_builder = ( - space.isinstance_w(w_string, space.w_bytes) and literal) + if space.isinstance_w(w_string, space.w_bytes) and literal: + use_builder = 'S' if literal: w_filter = w_ptemplate filter_is_callable = False @@ -291,16 +283,14 @@ # # XXX this is a bit of a mess, but it improves performance a lot ctx = self.make_ctx(w_string) - sublist_w = strbuilder = unicodebuilder = None - if use_builder: - if filter_as_unicode is not None: - unicodebuilder = XXX #Utf8StringBuilder(ctx.end) - else: - assert filter_as_string is not None - strbuilder = StringBuilder(ctx.end) + sublist_w = strbuilder = None + if use_builder != '\x00': + assert filter_as_string is not None + strbuilder = StringBuilder(ctx.end) else: sublist_w = [] - n = last_pos = 0 + n = 0 + last_pos = ctx.ZERO while not count or n < count: sub_jitdriver.jit_merge_point( self=self, @@ -310,9 +300,7 @@ ctx=ctx, w_filter=w_filter, strbuilder=strbuilder, - unicodebuilder=unicodebuilder, filter_as_string=filter_as_string, - filter_as_unicode=filter_as_unicode, count=count, w_string=w_string, n=n, last_pos=last_pos, sublist_w=sublist_w @@ -323,10 +311,10 @@ if last_pos < ctx.match_start: _sub_append_slice( ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, last_pos, ctx.match_start) + strbuilder, last_pos, ctx.match_start) start = ctx.match_end if start == ctx.match_start: - start += 1 + start = ctx.next(start) if not (last_pos == ctx.match_start == ctx.match_end and n > 0): # the above ignores empty matches on latest position @@ -334,18 +322,13 @@ w_match = self.getmatch(ctx, True) w_piece = space.call_function(w_filter, w_match) if not space.is_w(w_piece, space.w_None): - assert strbuilder is None and unicodebuilder is None - assert not use_builder + assert strbuilder is None + assert use_builder == '\x00' sublist_w.append(w_piece) else: - if use_builder: - if strbuilder is not None: - assert filter_as_string is not None - strbuilder.append(filter_as_string) - else: - assert unicodebuilder is not None - assert filter_as_unicode is not None - unicodebuilder.append(filter_as_unicode) + if use_builder != '\x00': + assert filter_as_string is not None + strbuilder.append(filter_as_string) else: sublist_w.append(w_filter) last_pos = ctx.match_end @@ -356,14 +339,16 @@ if last_pos < ctx.end: _sub_append_slice(ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, last_pos, ctx.end) - if use_builder: - if strbuilder is not None: - return space.newbytes(strbuilder.build()), n + strbuilder, last_pos, ctx.end) + if use_builder != '\x00': + result_bytes = strbuilder.build() + if use_builder == 'S': + return space.newbytes(result_bytes), n + elif use_builder == 'U': + return space.newutf8(result_bytes, + rutf8.get_utf8_length(result_bytes)), n else: - assert unicodebuilder is not None - return space.newutf8(unicodebuilder.build(), - unicodebuilder.get_length()), n + raise AssertionError(use_builder) else: if space.isinstance_w(w_string, space.w_unicode): w_emptystr = space.newutf8('', 0) @@ -376,27 +361,27 @@ sub_jitdriver = jit.JitDriver( reds="""count n last_pos ctx w_filter - strbuilder unicodebuilder + strbuilder filter_as_string - filter_as_unicode w_string sublist_w self""".split(), greens=["filter_is_callable", "use_builder", "filter_type", "ctx.pattern"]) def _sub_append_slice(ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, start, end): - if use_builder: + strbuilder, start, end): + if use_builder != '\x00': if isinstance(ctx, rsre_core.BufMatchContext): - assert strbuilder is not None + assert use_builder == 'S' return strbuilder.append(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): - assert strbuilder is not None + assert use_builder == 'S' + start = ctx._real_pos(start) + end = ctx._real_pos(end) return strbuilder.append_slice(ctx._string, start, end) elif isinstance(ctx, rsre_utf8.Utf8MatchContext): - XXXXXXX - assert unicodebuilder is not None - return unicodebuilder.append_slice(ctx._unicodestr, start, end) + assert use_builder == 'U' + return strbuilder.append_slice(ctx._utf8, start, end) assert 0, "unreachable" else: sublist_w.append(slice_w(space, ctx, start, end, space.w_None)) @@ -523,6 +508,9 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + return self.new_charindex_tuple(start, end) + + def new_charindex_tuple(self, start, end): start = self.bytepos_to_charindex(start) end = self.bytepos_to_charindex(end) return self.space.newtuple([self.space.newint(start), @@ -541,6 +529,8 @@ return self.flatten_cache def do_span(self, w_arg): + # return a pair of integers, which are byte positions, not + # character indexes (for utf8) space = self.space try: groupnum = space.int_w(w_arg) @@ -588,10 +578,10 @@ return space.w_None def fget_pos(self, space): - return space.newint(self.ctx.original_pos) + return space.newint(self.bytepos_to_charindex(self.ctx.original_pos)) def fget_endpos(self, space): - return space.newint(self.ctx.end) + return space.newint(self.bytepos_to_charindex(self.ctx.end)) def fget_regs(self, space): space = self.space @@ -599,11 +589,11 @@ num_groups = self.srepat.num_groups result_w = [None] * (num_groups + 1) ctx = self.ctx - result_w[0] = space.newtuple([space.newint(ctx.match_start), - space.newint(ctx.match_end)]) + result_w[0] = self.new_charindex_tuple(ctx.match_start, + ctx.match_end) for i in range(num_groups): - result_w[i + 1] = space.newtuple([space.newint(fmarks[i*2]), - space.newint(fmarks[i*2+1])]) + result_w[i + 1] = self.new_charindex_tuple(fmarks[i*2], + fmarks[i*2+1]) return space.newtuple(result_w) def fget_string(self, space): @@ -680,12 +670,14 @@ if found: ctx = self.ctx nextstart = ctx.match_end - nextstart += (ctx.match_start == nextstart) + if ctx.match_start == nextstart: + nextstart = ctx.next(nextstart) self.ctx = ctx.fresh_copy(nextstart) match = W_SRE_Match(self.srepat, ctx) return match else: - self.ctx.match_start += 1 # obscure corner case + # obscure corner case + self.ctx.match_start = self.ctx.next(self.ctx.match_start) return None W_SRE_Scanner.typedef = TypeDef( diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -33,7 +33,9 @@ return support.MatchContextForTests(self.code, str, start, end, self.flags) def _bytepos_to_charindex(self, bytepos): - return self.ctx._real_pos(bytepos) + if isinstance(self.ctx, support.MatchContextForTests): + return self.ctx._real_pos(bytepos) + return bytepos def setup_module(mod): mod._org_maker = ( diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -165,14 +165,13 @@ def maximum_distance(self, position_low, position_high): raise NotImplementedError @not_rpython - def bytes_difference(self, position1, position2): - raise NotImplementedError - @not_rpython def get_single_byte(self, base_position, index): raise NotImplementedError - @not_rpython + + def bytes_difference(self, position1, position2): + return position1 - position2 def go_forward_by_bytes(self, base_position, index): - raise NotImplementedError + return base_position + index def get_mark(self, gid): return find_mark(self.match_marks, gid) @@ -243,12 +242,6 @@ def maximum_distance(self, position_low, position_high): return position_high - position_low - def bytes_difference(self, position1, position2): - return position1 - position2 - - def go_forward_by_bytes(self, base_position, index): - return base_position + index - class BufMatchContext(FixedMatchContext): """Concrete subclass for matching in a buffer.""" diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -104,6 +104,10 @@ assert isinstance(index, int) return Position(base_position._p + index) + def fresh_copy(self, start): + return MatchContextForTests(self.pattern, self._string, start, + self.end, self.flags) + def match(pattern, string, start=0, end=sys.maxint, flags=0, fullmatch=False): start, end = _adjust(start, end, len(string)) From pypy.commits at gmail.com Sat Dec 9 14:30:40 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 11:30:40 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Tests and fixes Message-ID: <5a2c39e0.82ce1c0a.e7949.a009@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93337:6b113f6d5350 Date: 2017-12-09 20:30 +0100 http://bitbucket.org/pypy/pypy/changeset/6b113f6d5350/ Log: Tests and fixes diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -115,7 +115,9 @@ if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): - utf8str, length = space.utf8_len_w(w_string) + w_unicode_obj = space.convert_arg_to_w_unicode(w_string) + utf8str = w_unicode_obj._utf8 + length = w_unicode_obj._len() if pos <= 0: bytepos = 0 elif pos >= length: @@ -127,8 +129,12 @@ else: endbytepos = rutf8.codepoint_at_index(utf8str, index_storage, endpos) - return rsre_utf8.Utf8MatchContext( + ctx = rsre_utf8.Utf8MatchContext( self.code, utf8str, bytepos, endbytepos, self.flags) + # xxx we store the w_string on the ctx too, for + # W_SRE_Match.bytepos_to_charindex() + ctx.w_unicode_obj = w_unicode_obj + return ctx elif space.isinstance_w(w_string, space.w_bytes): str = space.bytes_w(w_string) if pos > len(str): @@ -520,7 +526,13 @@ # Transform a 'byte position', as returned by all methods from # rsre_core, back into a 'character index'. This is for UTF8 # handling. - XXXX + ctx = self.ctx + if isinstance(ctx, rsre_utf8.Utf8MatchContext): + index_storage = ctx.w_unicode_obj._get_index_storage() + return rutf8.codepoint_index_at_byte_position( + ctx.w_unicode_obj._utf8, index_storage, bytepos) + else: + return bytepos def flatten_marks(self): if self.flatten_cache is None: @@ -603,9 +615,8 @@ elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) elif isinstance(ctx, rsre_utf8.Utf8MatchContext): - XXXXXXXX - lgt = rutf8.check_utf8(ctx._unicodestr, True) - return space.newutf8(ctx._unicodestr, lgt) + lgt = rutf8.get_utf8_length(ctx._utf8) + return space.newutf8(ctx._utf8, lgt) else: raise SystemError diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -35,7 +35,7 @@ def _bytepos_to_charindex(self, bytepos): if isinstance(self.ctx, support.MatchContextForTests): return self.ctx._real_pos(bytepos) - return bytepos + return _org_maker[1](self, bytepos) def setup_module(mod): mod._org_maker = ( @@ -1037,3 +1037,15 @@ import re assert re.search(".+ab", "wowowowawoabwowo") assert None == re.search(".+ab", "wowowaowowo") + + +class AppTestUnicodeExtra: + def test_string_attribute(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.string == u"\u1233\u1234\u1235" + + def test_match_start(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.start() == 1 From pypy.commits at gmail.com Sat Dec 9 14:36:24 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 09 Dec 2017 11:36:24 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix _rawffi and add a todo item Message-ID: <5a2c3b38.b198df0a.6e23d.2b86@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93338:93560a4f1a42 Date: 2017-12-09 21:35 +0200 http://bitbucket.org/pypy/pypy/changeset/93560a4f1a42/ Log: fix _rawffi and add a todo item diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -12,3 +12,4 @@ * improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object +* make sure we review all the places that call ord(unichr) to check for ValueErrors \ No newline at end of file diff --git a/pypy/module/_locale/interp_locale.py b/pypy/module/_locale/interp_locale.py --- a/pypy/module/_locale/interp_locale.py +++ b/pypy/module/_locale/interp_locale.py @@ -133,10 +133,11 @@ rffi.free_charp(s1_c) rffi.free_charp(s2_c) - s1, s2 = space.unicode_w(w_s1), space.unicode_w(w_s2) + s1, l1 = space.utf8_len_w(w_s1) + s2, l2 = space.utf8_len_w(w_s2) - s1_c = rffi.unicode2wcharp(s1) - s2_c = rffi.unicode2wcharp(s2) + s1_c = rffi.utf82wcharp(s1, l1) + s2_c = rffi.utf82wcharp(s2, l2) try: result = _wcscoll(s1_c, s2_c) finally: diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -227,8 +227,8 @@ ucharval = self.get_char(w_ffitype) return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): - wcharval = self.get_unichar(w_ffitype) - return space.newutf8(rutf8.unichr_as_utf8(r_uint(wcharval)), 1) + wcharval = r_uint(self.get_unichar(w_ffitype)) + return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -448,7 +448,8 @@ elif c == 'c': return space.newbytes(func(add_arg, argdesc, ll_type)) elif c == 'u': - return space.newunicode(func(add_arg, argdesc, ll_type)) + return space.newutf8(rutf8.unichr_as_utf8( + ord(func(add_arg, argdesc, ll_type))), 1) elif c == 'f' or c == 'd' or c == 'g': return space.newfloat(float(func(add_arg, argdesc, ll_type))) else: @@ -596,10 +597,10 @@ return space.w_None wcharp_addr = rffi.cast(rffi.CWCHARP, address) if maxlength == -1: - s = rffi.wcharp2unicode(wcharp_addr) + s, lgt = rffi.wcharp2utf8(wcharp_addr) else: - s = rffi.wcharp2unicoden(wcharp_addr, maxlength) - return space.newunicode(s) + s, lgt = rffi.wcharp2utf8n(wcharp_addr, maxlength) + return space.newutf8(s, lgt) @unwrap_spec(address=r_uint, maxlength=int) def charp2rawstring(space, address, maxlength=-1): @@ -612,8 +613,8 @@ def wcharp2rawunicode(space, address, maxlength=-1): if maxlength == -1: return wcharp2unicode(space, address) - s = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, address), maxlength) - return space.newunicode(s) + s = rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, address), maxlength) + return space.newutf8(s, maxlength) @unwrap_spec(address=r_uint, newcontent='bufferstr') def rawstring2charp(space, address, newcontent): diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -792,7 +792,7 @@ def ord(self): # warning, on 32-bit with 32-bit unichars, this might return # negative numbers - return SomeInteger() + return SomeInteger(nonneg=True) class __extend__(SomeIterator): diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -19,7 +19,7 @@ from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline from rpython.rlib.rstring import StringBuilder -from rpython.rlib import jit +from rpython.rlib import jit, types from rpython.rlib.signature import signature from rpython.rlib.types import char, none from rpython.rlib.rarithmetic import r_uint @@ -27,6 +27,8 @@ from rpython.rtyper.lltypesystem import lltype, rffi +# we need a way to accept both r_uint and int(nonneg=True) +#@signature(types.int_nonneg(), types.bool(), returns=types.str()) def unichr_as_utf8(code, allow_surrogates=False): """Encode code (numeric value) as utf8 encoded string """ diff --git a/rpython/rlib/types.py b/rpython/rlib/types.py --- a/rpython/rlib/types.py +++ b/rpython/rlib/types.py @@ -26,6 +26,8 @@ def int(): return model.SomeInteger() +def int_nonneg(): + return model.SomeInteger(nonneg=True) def bool(): return model.SomeBool() diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1019,7 +1019,27 @@ s = StringBuilder(size) for i in range(size): rutf8.unichr_as_utf8_append(s, ord(w[i])) - return s.build() + return s.build() + +def wcharp2utf8(w): + from rpython.rlib import rutf8 + + s = rutf8.Utf8StringBuilder() + i = 0 + while ord(w[i]): + s.append_code(ord(w[i])) + i += 1 + return s.build(), i + +def wcharp2utf8n(w, maxlen): + from rpython.rlib import rutf8 + + s = rutf8.Utf8StringBuilder(maxlen) + i = 0 + while i < maxlen and w[i]: + s.append_code(ord(w[i])) + i += 1 + return s.build(), i def utf82wcharp(utf8, utf8len): from rpython.rlib import rutf8 From pypy.commits at gmail.com Sat Dec 9 14:44:42 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 11:44:42 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Must not call ctx.next() when the type of ctx is not exactly known. Message-ID: <5a2c3d2a.17361c0a.54d38.beda@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93339:ebe0641d78a5 Date: 2017-12-09 20:44 +0100 http://bitbucket.org/pypy/pypy/changeset/ebe0641d78a5/ Log: Must not call ctx.next() when the type of ctx is not exactly known. Workaround for interp_sre, where the few calls are not performance- sensitive. diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -198,7 +198,7 @@ matchlist_w.append(w_item) reset_at = ctx.match_end if ctx.match_start == ctx.match_end: - reset_at = ctx.next(reset_at) + reset_at = ctx.next_indirect(reset_at) ctx.reset(reset_at) return space.newlist(matchlist_w) @@ -223,7 +223,7 @@ if ctx.match_start == ctx.match_end: # zero-width match if ctx.match_start == ctx.end: # or end of string break - ctx.reset(ctx.next(ctx.match_end)) + ctx.reset(ctx.next_indirect(ctx.match_end)) continue splitlist.append(slice_w(space, ctx, last, ctx.match_start, space.w_None)) @@ -320,7 +320,7 @@ strbuilder, last_pos, ctx.match_start) start = ctx.match_end if start == ctx.match_start: - start = ctx.next(start) + start = ctx.next_indirect(start) if not (last_pos == ctx.match_start == ctx.match_end and n > 0): # the above ignores empty matches on latest position @@ -682,13 +682,13 @@ ctx = self.ctx nextstart = ctx.match_end if ctx.match_start == nextstart: - nextstart = ctx.next(nextstart) + nextstart = ctx.next_indirect(nextstart) self.ctx = ctx.fresh_copy(nextstart) match = W_SRE_Match(self.srepat, ctx) return match else: # obscure corner case - self.ctx.match_start = self.ctx.next(self.ctx.match_start) + self.ctx.match_start = self.ctx.next_indirect(self.ctx.match_start) return None W_SRE_Scanner.typedef = TypeDef( diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -172,6 +172,8 @@ return position1 - position2 def go_forward_by_bytes(self, base_position, index): return base_position + index + def next_indirect(self, position): + return position + 1 # like next(), but can be called indirectly def get_mark(self, gid): return find_mark(self.match_marks, gid) diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py --- a/rpython/rlib/rsre/rsre_utf8.py +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -34,6 +34,7 @@ def next(self, position): return rutf8.next_codepoint_pos(self._utf8, position) + next_indirect = next def prev(self, position): if position <= 0: diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -29,6 +29,7 @@ def next(self, position): assert isinstance(position, Position) return Position(position._p + 1) + next_indirect = next def prev(self, position): assert isinstance(position, Position) From pypy.commits at gmail.com Sat Dec 9 14:46:49 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 09 Dec 2017 11:46:49 -0800 (PST) Subject: [pypy-commit] buildbot default: kill raring build, activate py3.5 on ARM, add more onlyIfChanged Message-ID: <5a2c3da9.02be1c0a.199a7.ae01@mx.google.com> Author: Matti Picus Branch: Changeset: r1046:9c8b26858cfa Date: 2017-12-09 21:46 +0200 http://bitbucket.org/pypy/buildbot/changeset/9c8b26858cfa/ Log: kill raring build, activate py3.5 on ARM, add more onlyIfChanged diff --git a/bot2/pypybuildbot/arm_master.py b/bot2/pypybuildbot/arm_master.py --- a/bot2/pypybuildbot/arm_master.py +++ b/bot2/pypybuildbot/arm_master.py @@ -1,5 +1,6 @@ from buildbot.scheduler import Nightly, Triggerable from pypybuildbot.util import load, isRPython +from buildbot.changes import filter pypybuilds = load('pypybuildbot.builds') ARMCrossLock = pypybuilds.ARMCrossLock @@ -51,15 +52,6 @@ prefix=['schroot', '-c', 'raspbian', '--'], trigger='JITLINUXARMHF_RASPBIAN_scheduler') -pypyJITCrossTranslationFactoryRaringHF = pypybuilds.NightlyBuild( - translationArgs=(crosstranslationargs - + jit_translation_args - + crosstranslationjitargs), - platform='linux-armhf-raring', - interpreter='pypy', - prefix=['schroot', '-c', 'raring', '--'], - trigger='JITLINUXARMHF_RARING_scheduler') - pypyARMJITTranslatedTestFactory = pypybuilds.TranslatedTests( translationArgs=(crosstranslationargs + jit_translation_args @@ -91,15 +83,7 @@ app_tests=True, platform='linux-armhf-raspbian', ) -pypyARMHF_RARING_JITTranslatedTestFactory = pypybuilds.TranslatedTests( - translationArgs=(crosstranslationargs - + jit_translation_args - + crosstranslationjitargs), - lib_python=True, - pypyjit=True, - app_tests=True, - platform='linux-armhf-raring', - ) + # LINUXARMHFOWN = "own-linux-armhf" LINUXARMHFRPYTHON = "rpython-linux-armhf" @@ -110,7 +94,6 @@ JITLINUXARM = "pypy-c-jit-linux-armel" JITLINUXARMHF_v7 = "pypy-c-jit-linux-armhf-v7" JITLINUXARMHF_RASPBIAN = "pypy-c-jit-linux-armhf-raspbian" -JITLINUXARMHF_RARING = "pypy-c-jit-linux-armhf-raring" JITBACKENDONLYLINUXARMEL = "jitbackendonly-own-linux-armel" JITBACKENDONLYLINUXARMHF = "jitbackendonly-own-linux-armhf" @@ -121,7 +104,6 @@ BUILDJITLINUXARM = "build-pypy-c-jit-linux-armel" BUILDLINUXARMHF_RASPBIAN = "build-pypy-c-linux-armhf-raspbian" BUILDJITLINUXARMHF_RASPBIAN = "build-pypy-c-jit-linux-armhf-raspbian" -BUILDJITLINUXARMHF_RARING = "build-pypy-c-jit-linux-armhf-raring" builderNames = [ APPLVLLINUXARM, @@ -137,19 +119,25 @@ BUILDJITLINUXARM, BUILDLINUXARMHF_RASPBIAN, BUILDJITLINUXARMHF_RASPBIAN, - BUILDJITLINUXARMHF_RARING, ] schedulers = [ Nightly("nighly-arm-0-00", [ BUILDJITLINUXARM, # on hhu-cross-armel, uses 1 core BUILDJITLINUXARMHF_RASPBIAN, # on hhu-cross-raspbianhf, uses 1 core - BUILDJITLINUXARMHF_RARING, # on hhu-cross-raring-armhf, uses 1 core #BUILDLINUXARM, # on hhu-cross-armel, uses 1 core #BUILDLINUXARMHF_RASPBIAN, # on hhu-cross-raspbianhf, uses 1 core - ], branch=None, hour=0, minute=0, + ], branch='default', hour=0, minute=0, + onlyIfChanged=True, + ), + + Nightly("nightly-arm-3-00-py3.5", [ + BUILDJITLINUXARM, # on hhu-cross-armel, uses 1 core + BUILDJITLINUXARMHF_RASPBIAN, # on hhu-cross-raspbianhf, uses 1 core + ], branch="py3.5", hour=3, minute=0, + onlyIfChanged=True, ), Nightly("nightly-arm-0-01", [ @@ -178,8 +166,6 @@ JITLINUXARMHF_v7, # triggered by BUILDJITLINUXARMHF_RASPBIAN, on cubieboard-bob ]), - Triggerable("JITLINUXARMHF_RARING_scheduler", [ # triggered by BUILDJITLINUXARMHF_RARING - ]) ] builders = [ @@ -285,11 +271,4 @@ "category": 'linux-armhf', "locks": [ARMCrossLock.access('counting')], }, - {"name": BUILDJITLINUXARMHF_RARING, - "slavenames": ['hhu-cross-raring'], - "builddir": BUILDJITLINUXARMHF_RARING, - "factory": pypyJITCrossTranslationFactoryRaringHF, - "category": 'linux-armhf', - "locks": [ARMCrossLock.access('counting')], - }, ] diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -728,14 +728,14 @@ # copy pypy-c to the expected location within the pypy source checkout command = ('PYPY_C="pypy-c/bin/pypy";' 'if [ -e pypy-c/bin/pypy3 ]; then PYPY_C="pypy-c/bin/pypy3"; fi;' - 'cp -v $PYPY_C build/pypy/goal/pypy-c;') + 'cp -v $PYPY_C build/pypy/goal;') self.addStep(ShellCmd( description="move pypy-c", command=command, haltOnFailure=True, workdir='.')) # copy libpypy-c.so to the expected location within the pypy source checkout, if available - command = 'if [ -e pypy-c/bin/libpypy-c.so ]; then cp -v pypy-c/bin/libpypy-c.so build/pypy/goal/; fi;' + command = 'if [ -e pypy-c/bin/libpypy-c.so ]; then cp -v pypy-c/bin/libpypy-c.so build/pypy/goal; fi;' self.addStep(ShellCmd( description="move libpypy-c.so", command=command, diff --git a/bot2/pypybuildbot/master.py b/bot2/pypybuildbot/master.py --- a/bot2/pypybuildbot/master.py +++ b/bot2/pypybuildbot/master.py @@ -300,7 +300,9 @@ JITMACOSX64, # on xerxes # buildbot selftest #PYPYBUILDBOT # on cobra - ], branch='default', hour=0, minute=0), + ], branch='default', hour=0, minute=0, + onlyIfChanged=True, + ), Nightly("nightly-0-01", [ LINUX32RPYTHON, # on tannit32, uses all cores @@ -317,7 +319,9 @@ JITBENCH64, # on tannit64, uses 1 core (in part exclusively) #JITBENCH64_NEW, # on speed64, uses 1 core (in part exclusively) - ], branch=None, hour=5, minute=0), + ], branch='default', hour=5, minute=0, + onlyIfChanged=True, + ), Triggerable("NUMPY64_scheduler", [ #NUMPY_64, # on tannit64, uses 1 core, takes about 5min. @@ -337,13 +341,17 @@ JITLINUX64, # on bencher4, uses 1 core JITMACOSX64, # on xerxes JITWIN32, # on allegro_win32, SalsaSalsa - ], branch="py3.5", hour=3, minute=0), + ], branch="py3.5", hour=3, minute=0, + onlyIfChanged=True, + ), # S390X vm (ibm-research) Nightly("nightly-4-00", [ LINUX_S390XOWN, ], branch='default', hour=0, minute=0), - Nightly("nightly-4-01", [JITLINUX_S390X], branch='default', hour=2, minute=0), + Nightly("nightly-4-01", [JITLINUX_S390X], branch='default', hour=2, minute=0, + onlyIfChanged=True, + ), # this one has faithfully run every night even though the latest # change to that branch was in January 2013. Re-enable one day. From pypy.commits at gmail.com Sat Dec 9 22:47:16 2017 From: pypy.commits at gmail.com (mjacob) Date: Sat, 09 Dec 2017 19:47:16 -0800 (PST) Subject: [pypy-commit] pypy default: Fix Sphinx build warnings. Message-ID: <5a2cae44.05c41c0a.69432.e651@mx.google.com> Author: Manuel Jacob Branch: Changeset: r93340:e9597229c18d Date: 2017-12-10 04:46 +0100 http://bitbucket.org/pypy/pypy/changeset/e9597229c18d/ Log: Fix Sphinx build warnings. diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -149,7 +149,7 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X:: +On Mac OS X: Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,31 +1,38 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - -.. branch: cppyy-packaging -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) - -.. branch: win32-vcvars - +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch + +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst b/pypy/doc/whatsnew-pypy2-5.6.0.rst --- a/pypy/doc/whatsnew-pypy2-5.6.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst @@ -101,7 +101,7 @@ .. branch: newinitwarn -Match CPython's stricter handling of __new/init__ arguments +Match CPython's stricter handling of ``__new__``/``__init__`` arguments .. branch: openssl-1.1 diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -11,7 +11,7 @@ To build pypy-c you need a working python environment, and a C compiler. It is possible to translate with a CPython 2.6 or later, but this is not -the preferred way, because it will take a lot longer to run � depending +the preferred way, because it will take a lot longer to run – depending on your architecture, between two and three times as long. So head to `our downloads`_ and get the latest stable version. @@ -103,6 +103,7 @@ must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the ``...\9.0\VC`` directory, and edit it, changing the lines that set ``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ set WindowsSdkDir=%~dp0\..\WinSDK\ From pypy.commits at gmail.com Sat Dec 9 23:48:23 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 20:48:23 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Fix errorhandler use in utf8_encode_charmap() Message-ID: <5a2cbc97.4191df0a.397b4.c155@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93341:3e5aa507f585 Date: 2017-12-10 04:47 +0000 http://bitbucket.org/pypy/pypy/changeset/3e5aa507f585/ Log: Fix errorhandler use in utf8_encode_charmap() diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1425,8 +1425,7 @@ lgt = rutf8.check_utf8(r, True) return r, pos, lgt -def utf8_encode_charmap(s, errors, errorhandler=None, - mapping=None): +def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): size = len(s) if mapping is None: return utf8_encode_latin_1(s, errors, errorhandler=errorhandler) @@ -1438,31 +1437,31 @@ index = 0 while pos < size: ch = rutf8.codepoint_at_pos(s, pos) - c = mapping.get(ch, '') if len(c) == 0: - # collect all unencodable chars. Important for narrow builds. - collend = rutf8.next_codepoint_pos(s, pos) - endindex = index + 1 - while collend < size and mapping.get(rutf8.codepoint_at_pos(s, collend), '') == '': - collend = rutf8.next_codepoint_pos(s, collend) - endindex += 1 - rs, endindex = errorhandler(errors, "charmap", + # collect all unencodable chars. + startindex = index + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + while (pos < size and + mapping.get(rutf8.codepoint_at_pos(s, pos), '') == ''): + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + res_8, newindex = errorhandler(errors, "charmap", "character maps to ", - s, index, endindex) - j = 0 - for _ in range(endindex - index): - ch2 = rutf8.codepoint_at_pos(rs, j) - ch2 = mapping.get(ch2, '') + s, startindex, index) + for cp2 in rutf8.Utf8StringIterator(res_8): + ch2 = mapping.get(cp2, '') if not ch2: errorhandler( - "strict", "charmap", - "character maps to ", - s, index, index + 1) + "strict", "charmap", "character maps to ", + s, startindex, index) result.append(ch2) - index += 1 - j = rutf8.next_codepoint_pos(rs, j) - pos = rutf8.next_codepoint_pos(s, pos) + if index != newindex: # Should be uncommon + index = newindex + pos = 0 + for _ in range(newindex): + pos = rutf8.next_codepoint_pos(s, pos) continue result.append(c) index += 1 diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -537,8 +537,12 @@ assert '\xff'.decode('utf-7', 'ignore') == '' assert '\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): - assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000' + def test_backslashreplace(self): + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected def test_badhandler(self): import codecs From pypy.commits at gmail.com Sun Dec 10 00:17:32 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 21:17:32 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Handle errorhandlers that go backward Message-ID: <5a2cc36c.5dbf1c0a.b2c74.0aea@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93342:a4d68881a89d Date: 2017-12-10 05:16 +0000 http://bitbucket.org/pypy/pypy/changeset/a4d68881a89d/ Log: Handle errorhandlers that go backward diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1101,19 +1101,16 @@ ru, newindex = errorhandler(errors, public_encoding_name, 'surrogates not allowed', s, pos-1, pos) - for j in range(newindex - index): - pos = rutf8.next_codepoint_pos(s, pos) - j = 0 - while j < len(ru): - ch = rutf8.codepoint_at_pos(ru, j) - if ord(ch) < 0xD800: - _STORECHAR(result, ord(ch), byteorder) + for cp in rutf8.Utf8StringIterator(res_8): + if cp < 0xD800: + _STORECHAR(result, cp, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos-1, pos) - j = rutf8.next_codepoint_pos(ru, j) - index = newindex + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue pos = rutf8.next_codepoint_pos(s, pos) @@ -1282,22 +1279,19 @@ ch = rutf8.codepoint_at_pos(s, pos) pos = rutf8.next_codepoint_pos(s, pos) if not allow_surrogates and 0xD800 <= ch < 0xE000: - ru, newindex = errorhandler(errors, public_encoding_name, + res_8, newindex = errorhandler(errors, public_encoding_name, 'surrogates not allowed', s, pos-1, pos) - for j in range(newindex - index): - pos = rutf8.next_codepoint_pos(s, pos) - j = 0 - while j < len(ru): - ch = rutf8.codepoint_at_pos(ru, j) - if ord(ch) < 0xD800: - _STORECHAR32(result, ord(ch), byteorder) + for ch in rutf8.Utf8StringIterator(res_8): + if ch < 0xD800: + _STORECHAR32(result, ch, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos-1, pos) - j = rutf8.next_codepoint_pos(ru, j) - index = newindex + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue _STORECHAR32(result, ch, byteorder) index += 1 @@ -1459,9 +1453,7 @@ result.append(ch2) if index != newindex: # Should be uncommon index = newindex - pos = 0 - for _ in range(newindex): - pos = rutf8.next_codepoint_pos(s, pos) + pos = rutf8._pos_at_index(s, newindex) continue result.append(c) index += 1 diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -439,7 +439,7 @@ low = codepoint_at_pos(utf8, i) if 0xDC00 <= low <= 0xDFFF: uchr = 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00) - i = next_codepoint_pos(utf8, i) + i = next_codepoint_pos(utf8, i) # else not really a surrogate pair, just append high else: i = next_codepoint_pos(utf8, i) @@ -537,6 +537,13 @@ else: return next_codepoint_pos(utf8, next_codepoint_pos(utf8, bytepos)) +def _pos_at_index(utf8, index): + # Slow! + pos = 0 + for _ in range(index): + pos = next_codepoint_pos(utf8, pos) + return pos + @jit.dont_look_inside def codepoint_at_index(utf8, storage, index): """ Return codepoint of a character inside utf8 encoded string, given From pypy.commits at gmail.com Sun Dec 10 00:25:36 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 21:25:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix Message-ID: <5a2cc550.0eef1c0a.4461f.fd09@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93343:0accab6d493c Date: 2017-12-10 05:25 +0000 http://bitbucket.org/pypy/pypy/changeset/0accab6d493c/ Log: fix diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1098,9 +1098,9 @@ elif ch >= 0xE000 or allow_surrogates: _STORECHAR(result, ch, byteorder) else: - ru, newindex = errorhandler(errors, public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) + res_8, newindex = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) for cp in rutf8.Utf8StringIterator(res_8): if cp < 0xD800: _STORECHAR(result, cp, byteorder) @@ -1279,16 +1279,16 @@ ch = rutf8.codepoint_at_pos(s, pos) pos = rutf8.next_codepoint_pos(s, pos) if not allow_surrogates and 0xD800 <= ch < 0xE000: - res_8, newindex = errorhandler(errors, public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) + res_8, newindex = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) for ch in rutf8.Utf8StringIterator(res_8): if ch < 0xD800: _STORECHAR32(result, ch, byteorder) else: - errorhandler('strict', public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) if index != newindex: # Should be uncommon index = newindex pos = rutf8._pos_at_index(s, newindex) From pypy.commits at gmail.com Sun Dec 10 00:36:32 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 09 Dec 2017 21:36:32 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-test: hg merge unicode-utf8 Message-ID: <5a2cc7e0.8f9ddf0a.506d5.2dbc@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-test Changeset: r93344:1665df77270e Date: 2017-12-10 05:27 +0000 http://bitbucket.org/pypy/pypy/changeset/1665df77270e/ Log: hg merge unicode-utf8 diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -12,3 +12,4 @@ * improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object +* make sure we review all the places that call ord(unichr) to check for ValueErrors \ No newline at end of file diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1098,22 +1098,19 @@ elif ch >= 0xE000 or allow_surrogates: _STORECHAR(result, ch, byteorder) else: - ru, newindex = errorhandler(errors, public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - for j in range(newindex - index): - pos = rutf8.next_codepoint_pos(s, pos) - j = 0 - while j < len(ru): - ch = rutf8.codepoint_at_pos(ru, j) - if ord(ch) < 0xD800: - _STORECHAR(result, ord(ch), byteorder) + res_8, newindex = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + for cp in rutf8.Utf8StringIterator(res_8): + if cp < 0xD800: + _STORECHAR(result, cp, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos-1, pos) - j = rutf8.next_codepoint_pos(ru, j) - index = newindex + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue pos = rutf8.next_codepoint_pos(s, pos) @@ -1282,22 +1279,19 @@ ch = rutf8.codepoint_at_pos(s, pos) pos = rutf8.next_codepoint_pos(s, pos) if not allow_surrogates and 0xD800 <= ch < 0xE000: - ru, newindex = errorhandler(errors, public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - for j in range(newindex - index): - pos = rutf8.next_codepoint_pos(s, pos) - j = 0 - while j < len(ru): - ch = rutf8.codepoint_at_pos(ru, j) - if ord(ch) < 0xD800: - _STORECHAR32(result, ord(ch), byteorder) + res_8, newindex = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + for ch in rutf8.Utf8StringIterator(res_8): + if ch < 0xD800: + _STORECHAR32(result, ch, byteorder) else: - errorhandler('strict', public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - j = rutf8.next_codepoint_pos(ru, j) - index = newindex + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue _STORECHAR32(result, ch, byteorder) index += 1 @@ -1425,8 +1419,7 @@ lgt = rutf8.check_utf8(r, True) return r, pos, lgt -def utf8_encode_charmap(s, errors, errorhandler=None, - mapping=None): +def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): size = len(s) if mapping is None: return utf8_encode_latin_1(s, errors, errorhandler=errorhandler) @@ -1438,31 +1431,29 @@ index = 0 while pos < size: ch = rutf8.codepoint_at_pos(s, pos) - c = mapping.get(ch, '') if len(c) == 0: - # collect all unencodable chars. Important for narrow builds. - collend = rutf8.next_codepoint_pos(s, pos) - endindex = index + 1 - while collend < size and mapping.get(rutf8.codepoint_at_pos(s, collend), '') == '': - collend = rutf8.next_codepoint_pos(s, collend) - endindex += 1 - rs, endindex = errorhandler(errors, "charmap", + # collect all unencodable chars. + startindex = index + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + while (pos < size and + mapping.get(rutf8.codepoint_at_pos(s, pos), '') == ''): + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + res_8, newindex = errorhandler(errors, "charmap", "character maps to ", - s, index, endindex) - j = 0 - for _ in range(endindex - index): - ch2 = rutf8.codepoint_at_pos(rs, j) - ch2 = mapping.get(ch2, '') + s, startindex, index) + for cp2 in rutf8.Utf8StringIterator(res_8): + ch2 = mapping.get(cp2, '') if not ch2: errorhandler( - "strict", "charmap", - "character maps to ", - s, index, index + 1) + "strict", "charmap", "character maps to ", + s, startindex, index) result.append(ch2) - index += 1 - j = rutf8.next_codepoint_pos(rs, j) - pos = rutf8.next_codepoint_pos(s, pos) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue result.append(c) index += 1 diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -537,8 +537,12 @@ assert '\xff'.decode('utf-7', 'ignore') == '' assert '\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): - assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000' + def test_backslashreplace(self): + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected def test_badhandler(self): import codecs diff --git a/pypy/module/_locale/interp_locale.py b/pypy/module/_locale/interp_locale.py --- a/pypy/module/_locale/interp_locale.py +++ b/pypy/module/_locale/interp_locale.py @@ -133,10 +133,11 @@ rffi.free_charp(s1_c) rffi.free_charp(s2_c) - s1, s2 = space.unicode_w(w_s1), space.unicode_w(w_s2) + s1, l1 = space.utf8_len_w(w_s1) + s2, l2 = space.utf8_len_w(w_s2) - s1_c = rffi.unicode2wcharp(s1) - s2_c = rffi.unicode2wcharp(s2) + s1_c = rffi.utf82wcharp(s1, l1) + s2_c = rffi.utf82wcharp(s2, l2) try: result = _wcscoll(s1_c, s2_c) finally: diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -227,8 +227,8 @@ ucharval = self.get_char(w_ffitype) return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): - wcharval = self.get_unichar(w_ffitype) - return space.newutf8(rutf8.unichr_as_utf8(r_uint(wcharval)), 1) + wcharval = r_uint(self.get_unichar(w_ffitype)) + return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -448,7 +448,8 @@ elif c == 'c': return space.newbytes(func(add_arg, argdesc, ll_type)) elif c == 'u': - return space.newunicode(func(add_arg, argdesc, ll_type)) + return space.newutf8(rutf8.unichr_as_utf8( + ord(func(add_arg, argdesc, ll_type))), 1) elif c == 'f' or c == 'd' or c == 'g': return space.newfloat(float(func(add_arg, argdesc, ll_type))) else: @@ -596,10 +597,10 @@ return space.w_None wcharp_addr = rffi.cast(rffi.CWCHARP, address) if maxlength == -1: - s = rffi.wcharp2unicode(wcharp_addr) + s, lgt = rffi.wcharp2utf8(wcharp_addr) else: - s = rffi.wcharp2unicoden(wcharp_addr, maxlength) - return space.newunicode(s) + s, lgt = rffi.wcharp2utf8n(wcharp_addr, maxlength) + return space.newutf8(s, lgt) @unwrap_spec(address=r_uint, maxlength=int) def charp2rawstring(space, address, maxlength=-1): @@ -612,8 +613,8 @@ def wcharp2rawunicode(space, address, maxlength=-1): if maxlength == -1: return wcharp2unicode(space, address) - s = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, address), maxlength) - return space.newunicode(s) + s = rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, address), maxlength) + return space.newutf8(s, maxlength) @unwrap_spec(address=r_uint, newcontent='bufferstr') def rawstring2charp(space, address, newcontent): diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -792,7 +792,7 @@ def ord(self): # warning, on 32-bit with 32-bit unichars, this might return # negative numbers - return SomeInteger() + return SomeInteger(nonneg=True) class __extend__(SomeIterator): diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -19,7 +19,7 @@ from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline from rpython.rlib.rstring import StringBuilder -from rpython.rlib import jit +from rpython.rlib import jit, types from rpython.rlib.signature import signature from rpython.rlib.types import char, none from rpython.rlib.rarithmetic import r_uint @@ -27,6 +27,8 @@ from rpython.rtyper.lltypesystem import lltype, rffi +# we need a way to accept both r_uint and int(nonneg=True) +#@signature(types.int_nonneg(), types.bool(), returns=types.str()) def unichr_as_utf8(code, allow_surrogates=False): """Encode code (numeric value) as utf8 encoded string """ @@ -437,7 +439,7 @@ low = codepoint_at_pos(utf8, i) if 0xDC00 <= low <= 0xDFFF: uchr = 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00) - i = next_codepoint_pos(utf8, i) + i = next_codepoint_pos(utf8, i) # else not really a surrogate pair, just append high else: i = next_codepoint_pos(utf8, i) @@ -535,6 +537,13 @@ else: return next_codepoint_pos(utf8, next_codepoint_pos(utf8, bytepos)) +def _pos_at_index(utf8, index): + # Slow! + pos = 0 + for _ in range(index): + pos = next_codepoint_pos(utf8, pos) + return pos + @jit.dont_look_inside def codepoint_at_index(utf8, storage, index): """ Return codepoint of a character inside utf8 encoded string, given diff --git a/rpython/rlib/types.py b/rpython/rlib/types.py --- a/rpython/rlib/types.py +++ b/rpython/rlib/types.py @@ -26,6 +26,8 @@ def int(): return model.SomeInteger() +def int_nonneg(): + return model.SomeInteger(nonneg=True) def bool(): return model.SomeBool() diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1019,7 +1019,27 @@ s = StringBuilder(size) for i in range(size): rutf8.unichr_as_utf8_append(s, ord(w[i])) - return s.build() + return s.build() + +def wcharp2utf8(w): + from rpython.rlib import rutf8 + + s = rutf8.Utf8StringBuilder() + i = 0 + while ord(w[i]): + s.append_code(ord(w[i])) + i += 1 + return s.build(), i + +def wcharp2utf8n(w, maxlen): + from rpython.rlib import rutf8 + + s = rutf8.Utf8StringBuilder(maxlen) + i = 0 + while i < maxlen and w[i]: + s.append_code(ord(w[i])) + i += 1 + return s.build(), i def utf82wcharp(utf8, utf8len): from rpython.rlib import rutf8 From pypy.commits at gmail.com Sun Dec 10 02:30:31 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 23:30:31 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Add sanity-checks Message-ID: <5a2ce297.6184df0a.2f176.8bd9@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93345:78d73593a136 Date: 2017-12-10 07:36 +0100 http://bitbucket.org/pypy/pypy/changeset/78d73593a136/ Log: Add sanity-checks diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -349,8 +349,10 @@ if use_builder != '\x00': result_bytes = strbuilder.build() if use_builder == 'S': + assert not isinstance(ctx, rsre_utf8.Utf8MatchContext) return space.newbytes(result_bytes), n elif use_builder == 'U': + assert isinstance(ctx, rsre_utf8.Utf8MatchContext) return space.newutf8(result_bytes, rutf8.get_utf8_length(result_bytes)), n else: From pypy.commits at gmail.com Sun Dec 10 02:30:33 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 23:30:33 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: in-progress: translation fixes, but I don't understand why I need that Message-ID: <5a2ce299.11101c0a.1b8b0.20ff@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93346:80cb52b67f60 Date: 2017-12-10 08:27 +0100 http://bitbucket.org/pypy/pypy/changeset/80cb52b67f60/ Log: in-progress: translation fixes, but I don't understand why I need that diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -334,6 +334,7 @@ else: if use_builder != '\x00': assert filter_as_string is not None + assert strbuilder is not None strbuilder.append(filter_as_string) else: sublist_w.append(w_filter) @@ -347,6 +348,7 @@ _sub_append_slice(ctx, space, use_builder, sublist_w, strbuilder, last_pos, ctx.end) if use_builder != '\x00': + assert strbuilder is not None result_bytes = strbuilder.build() if use_builder == 'S': assert not isinstance(ctx, rsre_utf8.Utf8MatchContext) @@ -379,6 +381,7 @@ def _sub_append_slice(ctx, space, use_builder, sublist_w, strbuilder, start, end): if use_builder != '\x00': + assert strbuilder is not None if isinstance(ctx, rsre_core.BufMatchContext): assert use_builder == 'S' return strbuilder.append(ctx._buffer.getslice(start, end, 1, end-start)) From pypy.commits at gmail.com Sun Dec 10 02:30:36 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 23:30:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: hg merge unicode-utf8 Message-ID: <5a2ce29c.4fabdf0a.716d7.9bfe@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93347:58b6fedc39bc Date: 2017-12-10 08:27 +0100 http://bitbucket.org/pypy/pypy/changeset/58b6fedc39bc/ Log: hg merge unicode-utf8 diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -12,3 +12,4 @@ * improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object +* make sure we review all the places that call ord(unichr) to check for ValueErrors \ No newline at end of file diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,3 +1,4 @@ +import pytest from hypothesis import given, strategies from rpython.rlib import rutf8 @@ -5,6 +6,7 @@ from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii from pypy.interpreter import unicodehelper as uh +from pypy.module._codecs.interp_codecs import CodecState def decode_utf8(u): return str_decode_utf8(u, True, "strict", None) @@ -68,3 +70,16 @@ def test_unicode_escape(u): r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) assert r == u.encode("unicode-escape") + +def test_encode_decimal(space): + assert uh.unicode_encode_decimal(u' 12, 34 ', None) == ' 12, 34 ' + with pytest.raises(ValueError): + uh.unicode_encode_decimal(u' 12, \u1234 '.encode('utf8'), None) + state = space.fromcache(CodecState) + handler = state.encode_error_handler + assert uh.unicode_encode_decimal( + u'u\u1234\u1235v'.encode('utf8'), 'replace', handler) == 'u??v' + + result = uh.unicode_encode_decimal( + u'12\u1234'.encode('utf8'), 'xmlcharrefreplace', handler) + assert result == '12ሴ' diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,11 +1,13 @@ import sys -from pypy.interpreter.error import OperationError +from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize from rpython.rlib import rutf8 from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rstring import StringBuilder +from rpython.rtyper.lltypesystem import rffi from pypy.module._codecs import interp_codecs +from pypy.module.unicodedata import unicodedb @specialize.memo() def decode_error_handler(space): @@ -34,6 +36,16 @@ space.newtext(msg)])) return raise_unicode_exception_encode +def default_error_encode( + errors, encoding, msg, u, startingpos, endingpos): + """A default handler, for tests""" + assert endingpos >= 0 + if errors == 'replace': + return '?', endingpos + if errors == 'ignore': + return '', endingpos + raise ValueError + def convert_arg_to_w_unicode(space, w_arg, strict=None): return space.convert_arg_to_w_unicode(w_arg) @@ -204,7 +216,7 @@ if c > 0x7F: errorhandler("strict", 'ascii', 'ordinal not in range(128)', utf8, - pos, pos + 1) + pos, pos + 1) j = rutf8.next_codepoint_pos(r, j) pos = newpos res.append(r) @@ -530,6 +542,19 @@ return builder.build(), pos, outsize +def wcharpsize2utf8(space, wcharp, size): + """Safe version of rffi.wcharpsize2utf8. + + Raises app-level ValueError if any wchar value is outside the valid + codepoint range. + """ + try: + return rffi.wcharpsize2utf8(wcharp, size) + except ValueError: + raise oefmt(space.w_ValueError, + "character is not in range [U+0000; U+10ffff]") + + # ____________________________________________________________ # Raw unicode escape @@ -575,8 +600,8 @@ digits = 4 if s[pos] == 'u' else 8 message = "truncated \\uXXXX" pos += 1 - pos, _, _ = hexescape(result, s, pos, digits, - "rawunicodeescape", errorhandler, message, errors) + pos, _ = hexescape(result, s, pos, digits, + "rawunicodeescape", errorhandler, message, errors) r = result.build() lgt = rutf8.check_utf8(r, True) @@ -1073,22 +1098,19 @@ elif ch >= 0xE000 or allow_surrogates: _STORECHAR(result, ch, byteorder) else: - ru, newindex = errorhandler(errors, public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - for j in range(newindex - index): - pos = rutf8.next_codepoint_pos(s, pos) - j = 0 - while j < len(ru): - ch = rutf8.codepoint_at_pos(ru, j) - if ord(ch) < 0xD800: - _STORECHAR(result, ord(ch), byteorder) + res_8, newindex = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + for cp in rutf8.Utf8StringIterator(res_8): + if cp < 0xD800: + _STORECHAR(result, cp, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos-1, pos) - j = rutf8.next_codepoint_pos(ru, j) - index = newindex + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue pos = rutf8.next_codepoint_pos(s, pos) @@ -1257,22 +1279,19 @@ ch = rutf8.codepoint_at_pos(s, pos) pos = rutf8.next_codepoint_pos(s, pos) if not allow_surrogates and 0xD800 <= ch < 0xE000: - ru, newindex = errorhandler(errors, public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - for j in range(newindex - index): - pos = rutf8.next_codepoint_pos(s, pos) - j = 0 - while j < len(ru): - ch = rutf8.codepoint_at_pos(ru, j) - if ord(ch) < 0xD800: - _STORECHAR32(result, ord(ch), byteorder) + res_8, newindex = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + for ch in rutf8.Utf8StringIterator(res_8): + if ch < 0xD800: + _STORECHAR32(result, ch, byteorder) else: - errorhandler('strict', public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - j = rutf8.next_codepoint_pos(ru, j) - index = newindex + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue _STORECHAR32(result, ch, byteorder) index += 1 @@ -1400,8 +1419,7 @@ lgt = rutf8.check_utf8(r, True) return r, pos, lgt -def utf8_encode_charmap(s, errors, errorhandler=None, - mapping=None): +def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): size = len(s) if mapping is None: return utf8_encode_latin_1(s, errors, errorhandler=errorhandler) @@ -1413,34 +1431,99 @@ index = 0 while pos < size: ch = rutf8.codepoint_at_pos(s, pos) - c = mapping.get(ch, '') if len(c) == 0: - # collect all unencodable chars. Important for narrow builds. - collend = rutf8.next_codepoint_pos(s, pos) - endindex = index + 1 - while collend < size and mapping.get(rutf8.codepoint_at_pos(s, collend), '') == '': - collend = rutf8.next_codepoint_pos(s, collend) - endindex += 1 - rs, endindex = errorhandler(errors, "charmap", + # collect all unencodable chars. + startindex = index + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + while (pos < size and + mapping.get(rutf8.codepoint_at_pos(s, pos), '') == ''): + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + res_8, newindex = errorhandler(errors, "charmap", "character maps to ", - s, index, endindex) - j = 0 - for _ in range(endindex - index): - ch2 = rutf8.codepoint_at_pos(rs, j) - ch2 = mapping.get(ch2, '') + s, startindex, index) + for cp2 in rutf8.Utf8StringIterator(res_8): + ch2 = mapping.get(cp2, '') if not ch2: errorhandler( - "strict", "charmap", - "character maps to ", - s, index, index + 1) + "strict", "charmap", "character maps to ", + s, startindex, index) result.append(ch2) - index += 1 - j = rutf8.next_codepoint_pos(rs, j) - pos = rutf8.next_codepoint_pos(s, pos) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue result.append(c) index += 1 pos = rutf8.next_codepoint_pos(s, pos) return result.build() +# ____________________________________________________________ +# Decimal Encoder +def unicode_encode_decimal(s, errors, errorhandler=None): + """Converts whitespace to ' ', decimal characters to their + corresponding ASCII digit and all other Latin-1 characters except + \0 as-is. Characters outside this range (Unicode ordinals 1-256) + are treated as errors. This includes embedded NULL bytes. + """ + if errorhandler is None: + errorhandler = default_error_encode + result = StringBuilder(len(s)) + pos = 0 + i = 0 + it = rutf8.Utf8StringIterator(s) + for ch in it: + if unicodedb.isspace(ch): + result.append(' ') + i += 1 + continue + try: + decimal = unicodedb.decimal(ch) + except KeyError: + pass + else: + result.append(chr(48 + decimal)) + i += 1 + continue + if 0 < ch < 256: + result.append(chr(ch)) + i += 1 + continue + # All other characters are considered unencodable + start_index = i + i += 1 + while not it.done(): + ch = rutf8.codepoint_at_pos(s, it.get_pos()) + try: + if (0 < ch < 256 or unicodedb.isspace(ch) or + unicodedb.decimal(ch) >= 0): + break + except KeyError: + # not a decimal + pass + if it.done(): + break + ch = next(it) + i += 1 + end_index = i + msg = "invalid decimal Unicode string" + r, pos = errorhandler( + errors, 'decimal', msg, s, start_index, end_index) + for ch in rutf8.Utf8StringIterator(r): + if unicodedb.isspace(ch): + result.append(' ') + continue + try: + decimal = unicodedb.decimal(ch) + except KeyError: + pass + else: + result.append(chr(48 + decimal)) + continue + if 0 < ch < 256: + result.append(chr(ch)) + continue + errorhandler('strict', 'decimal', msg, s, start_index, end_index) + return result.build() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -70,9 +70,6 @@ raise oefmt(space.w_IndexError, "position %d from error handler out of bounds", newpos) - if newpos < startpos: - raise oefmt(space.w_IndexError, - "position %d from error handler did not progress", newpos) w_replace = space.convert_to_w_unicode(w_replace) return w_replace._utf8, newpos return call_errorhandler @@ -226,7 +223,7 @@ w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) start = w_obj._index_to_byte(start) - end = w_obj._index_to_byte(end) + end = w_obj._index_to_byte(end) builder = StringBuilder() pos = start obj = w_obj._utf8 @@ -460,22 +457,12 @@ # utf-8 functions are not regular, because we have to pass # "allow_surrogates=True" - at unwrap_spec(utf8='utf8', errors='text_or_none') -def utf_8_encode(space, utf8, errors="strict"): - length, _ = rutf8.check_utf8(utf8, allow_surrogates=True) - return space.newtuple([space.newbytes(utf8), space.newint(length)]) -#@unwrap_spec(uni=unicode, errors='text_or_none') -#def utf_8_encode(space, uni, errors="strict"): -# if errors is None: -# errors = 'strict' -# state = space.fromcache(CodecState) -# # NB. can't call unicode_encode_utf_8() directly because that's -# # an @elidable function nowadays. Instead, we need the _impl(). -# # (The problem is the errorhandler, which calls arbitrary Python.) -# result = runicode.unicode_encode_utf_8_impl( -# uni, len(uni), errors, state.encode_error_handler, -# allow_surrogates=True) -# return space.newtuple([space.newbytes(result), space.newint(len(uni))]) + at unwrap_spec(errors='text_or_none') +def utf_8_encode(space, w_obj, errors="strict"): + utf8, lgt = space.utf8_len_w(w_obj) + if rutf8.has_surrogates(utf8): + utf8 = rutf8.reencode_utf8_with_surrogates(utf8) + return space.newtuple([space.newbytes(utf8), space.newint(lgt)]) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final = WrappedDefault(False)) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -537,8 +537,12 @@ assert '\xff'.decode('utf-7', 'ignore') == '' assert '\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): - assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000' + def test_backslashreplace(self): + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected def test_badhandler(self): import codecs diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -1,3 +1,5 @@ +from rpython.rlib.rutf8 import get_utf8_length + from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import ( TypeDef, generic_new_descr, GetSetProperty) @@ -152,7 +154,7 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.new_from_utf8(self.readnl)) # YYY + w_readnl = space.str(space.newutf8(self.readnl, get_utf8_length(self.readnl))) # YYY return space.newtuple([ w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) @@ -215,7 +217,8 @@ if self.writenl: w_decoded = space.call_method( w_decoded, "replace", - space.newtext("\n"), space.new_from_utf8(self.writenl)) + space.newtext("\n"), space.newutf8(self.writenl, + get_utf8_length(self.writenl))) string = space.utf8_w(w_decoded) if string: self.buf.write(string) @@ -225,7 +228,9 @@ def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - return space.new_from_utf8(self.buf.read(size)) + v = self.buf.read(size) + lgt = get_utf8_length(v) + return space.newutf8(v, lgt) def readline_w(self, space, w_limit=None): self._check_closed(space) @@ -239,7 +244,8 @@ else: newline = self.readnl result = self.buf.readline(newline, limit) - return space.new_from_utf8(result) + resultlen = get_utf8_length(result) + return space.newutf8(result, resultlen) @unwrap_spec(pos=int, mode=int) @@ -276,7 +282,9 @@ def getvalue_w(self, space): self._check_closed(space) - return space.new_from_utf8(self.buf.getvalue()) + v = self.buf.getvalue() + lgt = get_utf8_length(v) + return space.newutf8(v, lgt) def readable_w(self, space): self._check_closed(space) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -12,7 +12,8 @@ from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder from rpython.rlib.rutf8 import (check_utf8, next_codepoint_pos, - codepoints_in_utf8) + codepoints_in_utf8, get_utf8_length, + Utf8StringBuilder) STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -684,13 +685,15 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.new_from_utf8(self.decoded.get_chars(-1)) + chars = self.decoded.get_chars(-1) + lgt = get_utf8_length(chars) + w_result = space.newutf8(chars, lgt) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final remaining = size - builder = StringBuilder(size) + builder = Utf8StringBuilder(size) # Keep reading chunks until we have n characters to return while remaining > 0: @@ -700,7 +703,7 @@ builder.append(data) remaining -= len(data) - return space.new_from_utf8(builder.build()) + return space.newutf8(builder.build(), builder.get_length()) def _scan_line_ending(self, limit): if self.readuniversal: @@ -725,6 +728,7 @@ limit = convert_size(space, w_limit) remnant = None builder = StringBuilder() + # XXX maybe use Utf8StringBuilder instead? while True: # First, get some data if necessary has_data = self._ensure_data(space) @@ -771,7 +775,8 @@ self.decoded.reset() result = builder.build() - return space.new_from_utf8(result) + lgt = get_utf8_length(result) + return space.newutf8(result, lgt) # _____________________________________________________________ # write methods @@ -794,8 +799,8 @@ if text.find('\n') >= 0: haslf = True if haslf and self.writetranslate and self.writenl: - w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'), - space.new_from_utf8(self.writenl)) + w_text = space.call_method(w_text, "replace", space.newutf8('\n', 1), + space.newutf8(self.writenl, get_utf8_length(self.writenl))) text = space.utf8_w(w_text) needflush = False diff --git a/pypy/module/_locale/interp_locale.py b/pypy/module/_locale/interp_locale.py --- a/pypy/module/_locale/interp_locale.py +++ b/pypy/module/_locale/interp_locale.py @@ -133,10 +133,11 @@ rffi.free_charp(s1_c) rffi.free_charp(s2_c) - s1, s2 = space.unicode_w(w_s1), space.unicode_w(w_s2) + s1, l1 = space.utf8_len_w(w_s1) + s2, l2 = space.utf8_len_w(w_s2) - s1_c = rffi.unicode2wcharp(s1) - s2_c = rffi.unicode2wcharp(s2) + s1_c = rffi.utf82wcharp(s1, l1) + s2_c = rffi.utf82wcharp(s2, l2) try: result = _wcscoll(s1_c, s2_c) finally: diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -157,7 +157,7 @@ replace, end = errorcb(errors, namecb, reason, stringdata, start, end) # 'replace' is RPython unicode here - lgt, _ = rutf8.check_utf8(replace, True) + lgt = rutf8.get_utf8_length(replace) inbuf = rffi.utf82wcharp(replace, lgt) try: r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, lgt, end) @@ -268,7 +268,7 @@ rets, end = errorcb(errors, namecb, reason, unicodedata, start, end) codec = pypy_cjk_enc_getcodec(encodebuf) - lgt, _ = rutf8.get_utf8_length_flag(rets) + lgt = rutf8.get_utf8_length(rets) replace = encode(codec, rets, lgt, "strict", errorcb, namecb) with rffi.scoped_nonmovingbuffer(replace) as inbuf: r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end) diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py --- a/pypy/module/_multibytecodec/interp_incremental.py +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -66,7 +66,7 @@ pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf) assert 0 <= pos <= len(object) self.pending = object[pos:] - lgt = rutf8.get_utf8_length_flag(output) + lgt = rutf8.get_utf8_length(output) return space.newutf8(output, lgt) diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -27,8 +27,8 @@ raise wrap_unicodedecodeerror(space, e, input, self.name) except RuntimeError: raise wrap_runtimeerror(space) - lgt, flag = rutf8.check_utf8(utf8_output, True) - return space.newtuple([space.newutf8(utf8_output, lgt, flag), + lgt = rutf8.get_utf8_length(utf8_output) + return space.newtuple([space.newutf8(utf8_output, lgt), space.newint(len(input))]) @unwrap_spec(errors="text_or_none") diff --git a/pypy/module/_multibytecodec/test/test_translation.py b/pypy/module/_multibytecodec/test/test_translation.py --- a/pypy/module/_multibytecodec/test/test_translation.py +++ b/pypy/module/_multibytecodec/test/test_translation.py @@ -14,7 +14,7 @@ codecname, string = argv[1], argv[2] c = c_codecs.getcodec(codecname) u = c_codecs.decode(c, string) - lgt, _ = rutf8.get_utf8_length_flag(u) + lgt = rutf8.get_utf8_length(u) r = c_codecs.encode(c, u, lgt) print r return 0 diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -3,6 +3,7 @@ from rpython.rlib.objectmodel import specialize, always_inline, r_dict from rpython.rlib import rfloat, runicode, rutf8 from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib.rarithmetic import r_uint from pypy.interpreter.error import oefmt from pypy.interpreter import unicodehelper @@ -366,7 +367,7 @@ return # help the annotator to know that we'll never go beyond # this point # - utf8_ch = rutf8.unichr_as_utf8(val, allow_surrogates=True) + utf8_ch = rutf8.unichr_as_utf8(r_uint(val), allow_surrogates=True) builder.append(utf8_ch) return i @@ -400,7 +401,7 @@ break elif ch == '\\' or ch < '\x20': self.pos = i-1 - return self.space.unicode_w(self.decode_string_escaped(start)) + return self.decode_string_escaped(start) strhash = intmask((1000003 * strhash) ^ ord(ll_chars[i])) bits |= ord(ch) length = i - start - 1 diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -128,7 +128,7 @@ intval: lltype.Signed """ self.error(w_ffitype, w_obj) - + def handle_unichar(self, w_ffitype, w_obj, intval): """ intval: lltype.Signed @@ -174,7 +174,7 @@ def handle_struct_rawffi(self, w_ffitype, w_structinstance): """ This method should be killed as soon as we remove support for _rawffi structures - + w_structinstance: W_StructureInstance """ self.error(w_ffitype, w_structinstance) @@ -227,7 +227,7 @@ ucharval = self.get_char(w_ffitype) return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): - wcharval = self.get_unichar(w_ffitype) + wcharval = r_uint(self.get_unichar(w_ffitype)) return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1) elif w_ffitype.is_double(): return self._float(w_ffitype) @@ -349,7 +349,7 @@ def get_struct_rawffi(self, w_ffitype, w_structdescr): """ This should be killed as soon as we kill support for _rawffi structures - + Return type: lltype.Unsigned (the address of the structure) """ diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -448,7 +448,8 @@ elif c == 'c': return space.newbytes(func(add_arg, argdesc, ll_type)) elif c == 'u': - return space.newunicode(func(add_arg, argdesc, ll_type)) + return space.newutf8(rutf8.unichr_as_utf8( + ord(func(add_arg, argdesc, ll_type))), 1) elif c == 'f' or c == 'd' or c == 'g': return space.newfloat(float(func(add_arg, argdesc, ll_type))) else: @@ -596,10 +597,10 @@ return space.w_None wcharp_addr = rffi.cast(rffi.CWCHARP, address) if maxlength == -1: - s = rffi.wcharp2utf8(wcharp_addr) + s, lgt = rffi.wcharp2utf8(wcharp_addr) else: - s = rffi.wcharpsize2utf8(wcharp_addr, maxlength) - return space.newunicode(s) + s, lgt = rffi.wcharp2utf8n(wcharp_addr, maxlength) + return space.newutf8(s, lgt) @unwrap_spec(address=r_uint, maxlength=int) def charp2rawstring(space, address, maxlength=-1): @@ -612,8 +613,8 @@ def wcharp2rawunicode(space, address, maxlength=-1): if maxlength == -1: return wcharp2unicode(space, address) - s = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, address), maxlength) - return space.newunicode(s) + s = rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, address), maxlength) + return space.newutf8(s, maxlength) @unwrap_spec(address=r_uint, newcontent='bufferstr') def rawstring2charp(space, address, newcontent): diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1,7 +1,7 @@ from rpython.rlib import jit, rgc, rutf8 from rpython.rlib.buffer import RawBuffer from rpython.rlib.objectmodel import keepalive_until_here -from rpython.rlib.rarithmetic import ovfcheck, widen +from rpython.rlib.rarithmetic import ovfcheck, widen, r_uint from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem import lltype, rffi @@ -380,6 +380,7 @@ if len(s) % self.itemsize != 0: raise oefmt(self.space.w_ValueError, "string length not a multiple of item size") + self.check_valid_unicode(space, s) # empty for non-u arrays oldlen = self.len new = len(s) / self.itemsize if not new: @@ -451,7 +452,7 @@ """ if self.typecode == 'u': buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned()) - return space.newutf8(rffi.wcharpsize2unicode(buf, self.len)) + return space.newutf8(rffi.wcharpsize2utf8(buf, self.len), self.len) else: raise oefmt(space.w_ValueError, "tounicode() may only be called on type 'u' arrays") @@ -710,6 +711,9 @@ s = "array('%s', %s)" % (self.typecode, space.text_w(r)) return space.newtext(s) + def check_valid_unicode(self, space, s): + pass # overwritten by u + W_ArrayBase.typedef = TypeDef( 'array.array', __new__ = interp2app(w_array), @@ -870,6 +874,18 @@ def get_buffer(self): return rffi.cast(mytype.arrayptrtype, self._buffer) + if mytype.unwrap == 'utf8_len_w': + def check_valid_unicode(self, space, s): + i = 0 + while i < len(s): + if s[i] != '\x00' or ord(s[i + 1]) > 0x10: + v = ((ord(s[i]) << 24) + (ord(s[i + 1]) << 16) + + (ord(s[i + 2]) << 8) + ord(s[i + 3])) + raise oefmt(space.w_ValueError, + "Character U+%s is not in range [U+0000, U+10ffff]", + hex(v)[2:]) + i += 4 + def item_w(self, w_item): space = self.space unwrap = getattr(space, mytype.unwrap) @@ -1013,7 +1029,7 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - code = ord(item) + code = r_uint(ord(item)) return space.newutf8(rutf8.unichr_as_utf8(code), 1) assert 0, "unreachable" diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py --- a/pypy/module/array/test/test_array.py +++ b/pypy/module/array/test/test_array.py @@ -844,13 +844,7 @@ import sys if sys.maxunicode == 0xffff: skip("test for 32-bit unicodes") - a = self.array('u', b'\xff\xff\xff\xff') - assert len(a) == 1 - assert repr(a[0]) == "u'\Uffffffff'" - if sys.maxint == 2147483647: - assert ord(a[0]) == -1 - else: - assert ord(a[0]) == 4294967295 + raises(ValueError, self.array, 'u', b'\xff\xff\xff\xff') def test_weakref(self): import weakref diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py --- a/pypy/module/cpyext/longobject.py +++ b/pypy/module/cpyext/longobject.py @@ -4,6 +4,7 @@ CONST_STRING, ADDR, CANNOT_FAIL) from pypy.objspace.std.longobject import W_LongObject from pypy.interpreter.error import OperationError +from pypy.interpreter.unicodehelper import wcharpsize2utf8 from pypy.module.cpyext.intobject import PyInt_AsUnsignedLongMask from rpython.rlib.rbigint import rbigint @@ -191,7 +192,7 @@ string, length gives the number of characters, and base is the radix for the conversion. The radix must be in the range [2, 36]; if it is out of range, ValueError will be raised.""" - w_value = space.newunicode(rffi.wcharpsize2unicode(u, length)) + w_value = space.newutf8(wcharpsize2utf8(space, u, length), length) w_base = space.newint(rffi.cast(lltype.Signed, base)) return space.call_function(space.w_long, w_value, w_base) diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -246,7 +246,7 @@ the Python expression unicode(o). Called by the unicode() built-in function.""" if w_obj is None: - return space.newunicode(u"") + return space.newutf8("", 6) return space.call_function(space.w_unicode, w_obj) @cpython_api([PyObject, PyObject], rffi.INT_real, error=-1) @@ -302,7 +302,7 @@ if opid == Py_EQ: return 1 if opid == Py_NE: - return 0 + return 0 w_res = PyObject_RichCompare(space, w_o1, w_o2, opid_int) return int(space.is_true(w_res)) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -1,5 +1,11 @@ +from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.rlib import rstring, runicode +from rpython.tool.sourcetools import func_renamer + from pypy.interpreter.error import OperationError, oefmt -from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.interpreter.unicodehelper import ( + wcharpsize2utf8, str_decode_utf_16_helper, str_decode_utf_32_helper, + unicode_encode_decimal) from pypy.module.unicodedata import unicodedb from pypy.module.cpyext.api import ( CANNOT_FAIL, Py_ssize_t, build_type_checkers_flags, cpython_api, @@ -13,8 +19,6 @@ from pypy.module.sys.interp_encoding import setdefaultencoding from pypy.module._codecs.interp_codecs import CodecState from pypy.objspace.std import unicodeobject -from rpython.rlib import rstring, runicode -from rpython.tool.sourcetools import func_renamer import sys ## See comment in bytesobject.py. @@ -61,10 +65,10 @@ def unicode_attach(space, py_obj, w_obj, w_userdata=None): "Fills a newly allocated PyUnicodeObject with a unicode string" py_unicode = rffi.cast(PyUnicodeObject, py_obj) - s = space.unicode_w(w_obj) - py_unicode.c_length = len(s) + s, length = space.utf8_len_w(w_obj) + py_unicode.c_length = length py_unicode.c_str = lltype.nullptr(rffi.CWCHARP.TO) - py_unicode.c_hash = space.hash_w(space.newunicode(s)) + py_unicode.c_hash = space.hash_w(space.newutf8(s, length)) py_unicode.c_defenc = lltype.nullptr(PyObject.TO) def unicode_realize(space, py_obj): @@ -73,11 +77,12 @@ be modified after this call. """ py_uni = rffi.cast(PyUnicodeObject, py_obj) - s = rffi.wcharpsize2unicode(py_uni.c_str, py_uni.c_length) + length = py_uni.c_length + s = wcharpsize2utf8(space, py_uni.c_str, length) w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type)) w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type) - w_obj.__init__(s) - py_uni.c_hash = space.hash_w(space.newunicode(s)) + w_obj.__init__(s, length) + py_uni.c_hash = space.hash_w(space.newutf8(s, length)) track_reference(space, py_obj, w_obj) return w_obj @@ -214,8 +219,8 @@ if not ref_unicode.c_str: # Copy unicode buffer w_unicode = from_ref(space, rffi.cast(PyObject, ref)) - u = space.unicode_w(w_unicode) - ref_unicode.c_str = rffi.unicode2wcharp(u) + u, length = space.utf8_len_w(w_unicode) + ref_unicode.c_str = rffi.utf82wcharp(u, length) return ref_unicode.c_str @cpython_api([PyObject], rffi.CWCHARP) @@ -335,8 +340,8 @@ Therefore, modification of the resulting Unicode object is only allowed when u is NULL.""" if wchar_p: - s = rffi.wcharpsize2unicode(wchar_p, length) - return make_ref(space, space.newunicode(s)) + s = wcharpsize2utf8(space, wchar_p, length) + return make_ref(space, space.newutf8(s, length)) else: return rffi.cast(PyObject, new_empty_unicode(space, length)) @@ -506,7 +511,8 @@ """Encode the Py_UNICODE buffer of the given size and return a Python string object. Return NULL if an exception was raised by the codec.""" - w_u = space.newunicode(rffi.wcharpsize2unicode(s, size)) + u = wcharpsize2utf8(space, s, size) + w_u = space.newutf8(u, size) if errors: w_errors = space.newtext(rffi.charp2str(errors)) else: @@ -564,15 +570,11 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_16_helper( - string, size, errors, - True, # final ? false for multiple passes? - None, # errorhandler - byteorder) + result, _, length, byteorder = str_decode_utf_16_helper( + string, errors, final=True, errorhandler=None, byteorder=byteorder) if pbyteorder is not None: pbyteorder[0] = rffi.cast(rffi.INT, byteorder) - - return space.newunicode(result) + return space.newutf8(result, length) @cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING, rffi.INTP], PyObject) def PyUnicode_DecodeUTF32(space, s, size, llerrors, pbyteorder): @@ -620,15 +622,11 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( - string, size, errors, - True, # final ? false for multiple passes? - None, # errorhandler - byteorder) + result, _, length, byteorder = str_decode_utf_32_helper( + string, errors, final=True, errorhandler=None, byteorder=byteorder) if pbyteorder is not None: pbyteorder[0] = rffi.cast(rffi.INT, byteorder) - - return space.newunicode(result) + return space.newutf8(result, length) @cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP, CONST_STRING], rffi.INT_real, error=-1) @@ -646,14 +644,13 @@ Returns 0 on success, -1 on failure. """ - u = rffi.wcharpsize2unicode(s, length) + u = rffi.wcharpsize2utf8(s, length) if llerrors: errors = rffi.charp2str(llerrors) else: errors = None state = space.fromcache(CodecState) - result = runicode.unicode_encode_decimal(u, length, errors, - state.encode_error_handler) + result = unicode_encode_decimal(u, errors, state.encode_error_handler) i = len(result) output[i] = '\0' i -= 1 @@ -706,12 +703,17 @@ """Return 1 if substr matches str[start:end] at the given tail end (direction == -1 means to do a prefix match, direction == 1 a suffix match), 0 otherwise. Return -1 if an error occurred.""" - str = space.unicode_w(w_str) - substr = space.unicode_w(w_substr) + space.utf8_w(w_str) # type check + space.utf8_w(w_substr) + w_start = space.newint(start) + w_end = space.newint(end) if rffi.cast(lltype.Signed, direction) <= 0: - return rstring.startswith(str, substr, start, end) + w_result = space.call_method( + w_str, "startswith", w_substr, w_start, w_end) else: - return rstring.endswith(str, substr, start, end) + w_result = space.call_method( + w_str, "endswith", w_substr, w_start, w_end) + return space.int_w(w_result) @cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t], Py_ssize_t, error=-1) def PyUnicode_Count(space, w_str, w_substr, start, end): diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -483,7 +483,7 @@ except rutf8.CheckError: from pypy.interpreter import unicodehelper # get the correct error msg - unicodehelper.str_decode_utf8(s, len(s), 'string', True, + unicodehelper.str_decode_utf8(s, 'string', True, unicodehelper.decode_error_handler(space)) assert False, "always raises" else: @@ -587,21 +587,22 @@ def UnknownEncodingHandler(self, space, name, info): # Yes, supports only 8bit encodings - translationmap = space.unicode_w( + translationmap, lgt = space.utf8_len_w( space.call_method( space.newbytes(self.all_chars), "decode", space.newtext(name), space.newtext("replace"))) - if len(translationmap) != 256: + if lgt != 256: raise oefmt(space.w_ValueError, "multi-byte encodings are not supported") - for i in range(256): - c = translationmap[i] - if c == u'\ufffd': + i = 0 + for c in rutf8.Utf8StringIterator(translationmap): + if c == 0xfffd: info.c_map[i] = rffi.cast(rffi.INT, -1) else: info.c_map[i] = rffi.cast(rffi.INT, c) + i += 1 info.c_data = lltype.nullptr(rffi.VOIDP.TO) info.c_convert = lltype.nullptr(rffi.VOIDP.TO) info.c_release = lltype.nullptr(rffi.VOIDP.TO) diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py --- a/pypy/module/struct/formatiterator.py +++ b/pypy/module/struct/formatiterator.py @@ -1,6 +1,6 @@ from rpython.rlib.rarithmetic import (r_uint, r_ulonglong, r_longlong, maxint, intmask) -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import specialize from rpython.rlib.rstruct.error import StructError from rpython.rlib.rstruct.formatiterator import FormatIterator @@ -107,7 +107,7 @@ def accept_unicode_arg(self): w_obj = self.accept_obj_arg() - return self.space.unicode_w(w_obj) + return self.space.utf8_len_w(w_obj) def accept_float_arg(self): w_obj = self.accept_obj_arg() @@ -191,6 +191,10 @@ assert 0, "unreachable" self.result_w.append(w_value) + def append_utf8(self, value): + w_ch = self.space.newutf8(rutf8.unichr_as_utf8(r_uint(value)), 1) + self.result_w.append(w_ch) + def get_pos(self): return self.pos diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -7,11 +7,8 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import TypeDef, interp_attrproperty from rpython.rlib.rarithmetic import r_longlong -from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib.runicode import MAXUNICODE from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_3_2_0 -from rpython.rlib.runicode import code_to_unichr, ord_accepts_surrogate -import sys +from rpython.rlib.rutf8 import Utf8StringBuilder, unichr_as_utf8 # Contants for Hangul characters @@ -30,49 +27,17 @@ # unicode code point. -if MAXUNICODE > 0xFFFF: - # Target is wide build - def unichr_to_code_w(space, w_unichr): - if not space.isinstance_w(w_unichr, space.w_unicode): - raise oefmt( - space.w_TypeError, 'argument 1 must be unicode, not %T', - w_unichr) +# Target is wide build +def unichr_to_code_w(space, w_unichr): + if not space.isinstance_w(w_unichr, space.w_unicode): + raise oefmt( + space.w_TypeError, 'argument 1 must be unicode, not %T', + w_unichr) - if not we_are_translated() and sys.maxunicode == 0xFFFF: - # Host CPython is narrow build, accept surrogates - try: - return ord_accepts_surrogate(space.unicode_w(w_unichr)) - except TypeError: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - else: - if not space.len_w(w_unichr) == 1: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - return space.int_w(space.ord(w_unichr)) - -else: - # Target is narrow build - def unichr_to_code_w(space, w_unichr): - if not space.isinstance_w(w_unichr, space.w_unicode): - raise oefmt( - space.w_TypeError, 'argument 1 must be unicode, not %T', - w_unichr) - - if not we_are_translated() and sys.maxunicode > 0xFFFF: - # Host CPython is wide build, forbid surrogates - if not space.len_w(w_unichr) == 1: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") - return space.int_w(space.ord(w_unichr)) - - else: - # Accept surrogates - try: - return ord_accepts_surrogate(space.unicode_w(w_unichr)) - except TypeError: - raise oefmt(space.w_TypeError, - "need a single Unicode character as parameter") + if not space.len_w(w_unichr) == 1: + raise oefmt(space.w_TypeError, + "need a single Unicode character as parameter") + return space.int_w(space.ord(w_unichr)) class UCD(W_Root): @@ -110,7 +75,8 @@ except KeyError: msg = space.mod(space.newtext("undefined character name '%s'"), space.newtext(name)) raise OperationError(space.w_KeyError, msg) - return space.newunicode(code_to_unichr(code)) + assert code >= 0 + return space.newutf8(unichr_as_utf8(code), 1) def name(self, space, w_unichr, w_default=None): code = unichr_to_code_w(space, w_unichr) @@ -259,10 +225,10 @@ result[0] = ch if not composed: # If decomposed normalization we are done - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) if j <= 1: - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) current = result[0] starter_pos = 0 @@ -310,7 +276,13 @@ result[starter_pos] = current - return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]])) + return self.build(space, result, stop=next_insert) + + def build(self, space, r, stop): + builder = Utf8StringBuilder(stop * 3) + for i in range(stop): + builder.append_code(r[i]) + return space.newutf8(builder.build(), stop) methods = {} diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -1,3 +1,4 @@ + import pytest try: from hypothesis import given, strategies as st, example, settings @@ -5,12 +6,14 @@ pytest.skip("hypothesis required") from pypy.module.unicodedata.interp_ucd import ucd +from rpython.rlib.rutf8 import get_utf8_length def make_normalization(space, NF_code): def normalize(s): - w_s = space.newunicode(s) + u = s.encode('utf8') + w_s = space.newutf8(u, get_utf8_length(u)) w_res = ucd.normalize(space, NF_code, w_s) - return space.unicode_w(w_res) + return space.utf8_w(w_res).decode('utf8') return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -212,9 +212,6 @@ def newutf8(self, x, l): return w_some_obj() - def new_from_utf8(self, a): - return w_some_obj() - def newunicode(self, a): return w_some_obj() diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -3,7 +3,7 @@ from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import specialize -from rpython.rlib.rarithmetic import INT_MAX +from rpython.rlib.rarithmetic import INT_MAX, r_uint from rpython.rlib.rfloat import DTSF_ALT, formatd, isnan, isinf from rpython.rlib.rstring import StringBuilder from rpython.rlib.unroll import unrolling_iterable @@ -330,7 +330,7 @@ space = self.space if do_unicode: cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1) - w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1) + w_s = space.newutf8(rutf8.unichr_as_utf8(r_uint(cp)), 1) else: cp = ord(self.fmt[self.fmtpos - 1]) w_s = space.newbytes(chr(cp)) @@ -466,7 +466,7 @@ n = space.int_w(w_value) if do_unicode: try: - c = rutf8.unichr_as_utf8(n) + c = rutf8.unichr_as_utf8(r_uint(n)) except ValueError: raise oefmt(space.w_OverflowError, "unicode character code out of range") diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -741,6 +741,8 @@ assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac' assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82' assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96' + assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82' + assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96' assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80' assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80' assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000 diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -792,7 +792,7 @@ def ord(self): # warning, on 32-bit with 32-bit unichars, this might return # negative numbers - return SomeInteger() + return SomeInteger(nonneg=True) class __extend__(SomeIterator): diff --git a/rpython/rlib/rstruct/nativefmttable.py b/rpython/rlib/rstruct/nativefmttable.py --- a/rpython/rlib/rstruct/nativefmttable.py +++ b/rpython/rlib/rstruct/nativefmttable.py @@ -4,7 +4,7 @@ """ import struct -from rpython.rlib import jit, longlong2float +from rpython.rlib import rutf8, longlong2float from rpython.rlib.objectmodel import specialize from rpython.rlib.rarithmetic import r_singlefloat, widen, intmask from rpython.rlib.rstruct import standardfmttable as std @@ -139,17 +139,17 @@ from rpython.rlib.rstruct import unichar def pack_unichar(fmtiter): - unistr = fmtiter.accept_unicode_arg() - if len(unistr) != 1: + utf8, lgt = fmtiter.accept_unicode_arg() + if lgt != 1: raise StructError("expected a unicode string of length 1") - c = unistr[0] # string->char conversion for the annotator - unichar.pack_unichar(c, fmtiter.wbuf, fmtiter.pos) + uchr = rutf8.codepoint_at_pos(utf8, 0) + unichar.pack_codepoint(uchr, fmtiter.wbuf, fmtiter.pos) fmtiter.advance(unichar.UNICODE_SIZE) @specialize.argtype(0) def unpack_unichar(fmtiter): data = fmtiter.read(unichar.UNICODE_SIZE) - fmtiter.appendobj(unichar.unpack_unichar(data)) + fmtiter.append_utf8(unichar.unpack_codepoint(data)) native_fmttable['u'] = {'size': unichar.UNICODE_SIZE, 'alignment': unichar.UNICODE_SIZE, diff --git a/rpython/rlib/rstruct/unichar.py b/rpython/rlib/rstruct/unichar.py --- a/rpython/rlib/rstruct/unichar.py +++ b/rpython/rlib/rstruct/unichar.py @@ -3,12 +3,8 @@ """ import sys -from rpython.rlib.runicode import MAXUNICODE -if MAXUNICODE <= 65535: - UNICODE_SIZE = 2 -else: - UNICODE_SIZE = 4 +UNICODE_SIZE = 4 BIGENDIAN = sys.byteorder == "big" def pack_unichar(unich, buf, pos): @@ -34,7 +30,7 @@ buf.setitem(pos+2, chr((unich >> 16) & 0xFF)) buf.setitem(pos+3, chr(unich >> 24)) -def unpack_unichar(rawstring): +def unpack_codepoint(rawstring): assert len(rawstring) == UNICODE_SIZE if UNICODE_SIZE == 2: if BIGENDIAN: @@ -54,4 +50,7 @@ ord(rawstring[1]) << 8 | ord(rawstring[2]) << 16 | ord(rawstring[3]) << 24) - return unichr(n) + return n + +def unpack_unichar(rawstring): + return unichr(unpack_codepoint(rawstring)) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -19,7 +19,7 @@ from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline from rpython.rlib.rstring import StringBuilder -from rpython.rlib import jit +from rpython.rlib import jit, types from rpython.rlib.signature import signature from rpython.rlib.types import char, none from rpython.rlib.rarithmetic import r_uint @@ -27,6 +27,8 @@ from rpython.rtyper.lltypesystem import lltype, rffi +# we need a way to accept both r_uint and int(nonneg=True) +#@signature(types.int_nonneg(), types.bool(), returns=types.str()) def unichr_as_utf8(code, allow_surrogates=False): """Encode code (numeric value) as utf8 encoded string """ @@ -437,7 +439,7 @@ low = codepoint_at_pos(utf8, i) if 0xDC00 <= low <= 0xDFFF: uchr = 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00) - i = next_codepoint_pos(utf8, i) + i = next_codepoint_pos(utf8, i) # else not really a surrogate pair, just append high else: i = next_codepoint_pos(utf8, i) @@ -535,6 +537,13 @@ else: return next_codepoint_pos(utf8, next_codepoint_pos(utf8, bytepos)) +def _pos_at_index(utf8, index): + # Slow! + pos = 0 + for _ in range(index): + pos = next_codepoint_pos(utf8, pos) + return pos + @jit.dont_look_inside def codepoint_at_index(utf8, storage, index): """ Return codepoint of a character inside utf8 encoded string, given diff --git a/rpython/rlib/types.py b/rpython/rlib/types.py --- a/rpython/rlib/types.py +++ b/rpython/rlib/types.py @@ -26,6 +26,8 @@ def int(): return model.SomeInteger() +def int_nonneg(): + return model.SomeInteger(nonneg=True) def bool(): return model.SomeBool() diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1019,7 +1019,27 @@ s = StringBuilder(size) for i in range(size): rutf8.unichr_as_utf8_append(s, ord(w[i])) - return s.build() + return s.build() + +def wcharp2utf8(w): + from rpython.rlib import rutf8 + + s = rutf8.Utf8StringBuilder() + i = 0 + while ord(w[i]): + s.append_code(ord(w[i])) + i += 1 + return s.build(), i + +def wcharp2utf8n(w, maxlen): + from rpython.rlib import rutf8 + + s = rutf8.Utf8StringBuilder(maxlen) + i = 0 + while i < maxlen and w[i]: + s.append_code(ord(w[i])) + i += 1 + return s.build(), i def utf82wcharp(utf8, utf8len): from rpython.rlib import rutf8 From pypy.commits at gmail.com Sun Dec 10 02:39:34 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 23:39:34 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: translation fix Message-ID: <5a2ce4b6.90a9df0a.50226.910a@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93348:698fcd50ff42 Date: 2017-12-10 08:38 +0100 http://bitbucket.org/pypy/pypy/changeset/698fcd50ff42/ Log: translation fix diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -123,12 +123,15 @@ elif pos >= length: bytepos = len(utf8str) else: - bytepos = rutf8.codepoint_at_index(utf8str, index_storage, pos) + index_storage = w_unicode_obj._get_index_storage() + bytepos = rutf8.codepoint_position_at_index(utf8str, + index_storage, pos) if endpos >= length: endbytepos = len(utf8str) else: - endbytepos = rutf8.codepoint_at_index(utf8str, index_storage, - endpos) + index_storage = w_unicode_obj._get_index_storage() + endbytepos = rutf8.codepoint_position_at_index(utf8str, + index_storage, endpos) ctx = rsre_utf8.Utf8MatchContext( self.code, utf8str, bytepos, endbytepos, self.flags) # xxx we store the w_string on the ctx too, for From pypy.commits at gmail.com Sun Dec 10 02:39:36 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 23:39:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: Kill the wrong comment about r_uint in prev_codepoint_pos Message-ID: <5a2ce4b8.6b88df0a.eef4b.3308@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93349:0d4b7b9d28b5 Date: 2017-12-10 08:39 +0100 http://bitbucket.org/pypy/pypy/changeset/0d4b7b9d28b5/ Log: Kill the wrong comment about r_uint in prev_codepoint_pos diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py --- a/rpython/rlib/rsre/rsre_utf8.py +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -1,6 +1,5 @@ import sys from rpython.rlib.debug import check_nonneg -from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rsre.rsre_core import AbstractMatchContext, EndOfString from rpython.rlib.rsre import rsre_char from rpython.rlib.objectmodel import we_are_translated @@ -39,9 +38,7 @@ def prev(self, position): if position <= 0: raise EndOfString - upos = r_uint(position) - upos = rutf8.prev_codepoint_pos(self._utf8, upos) - position = intmask(upos) + position = rutf8.prev_codepoint_pos(self._utf8, position) assert position >= 0 return position @@ -53,12 +50,10 @@ return position def prev_n(self, position, n, start_position): - upos = r_uint(position) for i in range(n): - if upos <= r_uint(start_position): + if position <= start_position: raise EndOfString - upos = rutf8.prev_codepoint_pos(self._utf8, upos) - position = intmask(upos) + position = rutf8.prev_codepoint_pos(self._utf8, position) assert position >= 0 return position diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -132,7 +132,7 @@ """Gives the position of the previous codepoint. 'pos' must not be zero. """ - pos -= 1 # ruint + pos -= 1 if pos >= len(code): # for the case where pos - 1 == len(code): assert pos >= 0 return pos # assume there is an extra '\x00' character From pypy.commits at gmail.com Sun Dec 10 02:45:24 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 23:45:24 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: More translation fixes Message-ID: <5a2ce614.e4a6df0a.df20b.01d2@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93350:ec4522b42070 Date: 2017-12-10 08:44 +0100 http://bitbucket.org/pypy/pypy/changeset/ec4522b42070/ Log: More translation fixes diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -410,7 +410,7 @@ class MinRepeatOneMatchResult(MatchResult): install_jitdriver('MinRepeatOne', greens=['nextppos', 'ppos3', 'ctx.pattern'], - reds=['ptr', 'self', 'ctx'], + reds=['max_count', 'ptr', 'self', 'ctx'], debugprint=(2, 0)) # indices in 'greens' def __init__(self, nextppos, ppos3, max_count, ptr, marks): @@ -427,7 +427,8 @@ ppos3 = self.ppos3 while max_count >= 0: ctx.jitdriver_MinRepeatOne.jit_merge_point( - self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3) + self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3, + max_count=max_count) result = sre_match(ctx, nextppos, ptr, self.start_marks) if result is not None: self.subresult = result @@ -436,14 +437,14 @@ return self if not self.next_char_ok(ctx, ptr, ppos3): break - ptr = ctx.next(ptr) + ptr = ctx.next_indirect(ptr) max_count -= 1 def find_next_result(self, ctx): ptr = self.start_ptr if not self.next_char_ok(ctx, ptr, self.ppos3): return - self.start_ptr = ctx.next(ptr) + self.start_ptr = ctx.next_indirect(ptr) return self.find_first_result(ctx) def next_char_ok(self, ctx, ptr, ppos): @@ -1208,7 +1209,7 @@ if sre_match(ctx, base, start, None) is not None: ctx.match_start = start return True - start = ctx.next(start) + start = ctx.next_indirect(start) return False install_jitdriver_spec("LiteralSearch", From pypy.commits at gmail.com Sun Dec 10 02:50:12 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 09 Dec 2017 23:50:12 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: More Message-ID: <5a2ce734.928e1c0a.adcae.2802@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-re Changeset: r93351:363f54e90b19 Date: 2017-12-10 08:49 +0100 http://bitbucket.org/pypy/pypy/changeset/363f54e90b19/ Log: More diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -174,6 +174,11 @@ return base_position + index def next_indirect(self, position): return position + 1 # like next(), but can be called indirectly + def prev_indirect(self, position): + position -= 1 # like prev(), but can be called indirectly + if position < 0: + raise EndOfString + return position def get_mark(self, gid): return find_mark(self.match_marks, gid) @@ -217,14 +222,8 @@ these position methods. The Utf8MatchContext subclass doesn't inherit from here.""" - def next(self, position): - return position + 1 - - def prev(self, position): - position -= 1 - if position < 0: - raise EndOfString - return position + next = AbstractMatchContext.next_indirect + prev = AbstractMatchContext.prev_indirect def next_n(self, position, n, end_position): position += n @@ -397,7 +396,7 @@ self=self, ptr=ptr, ctx=ctx, nextppos=nextppos) result = sre_match(ctx, nextppos, ptr, self.start_marks) try: - ptr = ctx.prev(ptr) + ptr = ctx.prev_indirect(ptr) except EndOfString: ptr = -1 if result is not None: diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py --- a/rpython/rlib/rsre/rsre_utf8.py +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -41,6 +41,7 @@ position = rutf8.prev_codepoint_pos(self._utf8, position) assert position >= 0 return position + prev_indirect = prev def next_n(self, position, n, end_position): for i in range(n): diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py --- a/rpython/rlib/rsre/test/support.py +++ b/rpython/rlib/rsre/test/support.py @@ -36,6 +36,7 @@ if position._p == 0: raise EndOfString return Position(position._p - 1) + prev_indirect = prev def next_n(self, position, n, end_position): assert isinstance(position, Position) From pypy.commits at gmail.com Sun Dec 10 15:05:12 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 10 Dec 2017 12:05:12 -0800 (PST) Subject: [pypy-commit] buildbot default: need to think more about running benchmarks with onlyIfChanged Message-ID: <5a2d9378.99451c0a.790e5.d75d@mx.google.com> Author: Matti Picus Branch: Changeset: r1047:afea1ba1dd41 Date: 2017-12-10 22:04 +0200 http://bitbucket.org/pypy/buildbot/changeset/afea1ba1dd41/ Log: need to think more about running benchmarks with onlyIfChanged diff --git a/bot2/pypybuildbot/master.py b/bot2/pypybuildbot/master.py --- a/bot2/pypybuildbot/master.py +++ b/bot2/pypybuildbot/master.py @@ -319,8 +319,9 @@ JITBENCH64, # on tannit64, uses 1 core (in part exclusively) #JITBENCH64_NEW, # on speed64, uses 1 core (in part exclusively) - ], branch='default', hour=5, minute=0, - onlyIfChanged=True, + ], branch=None, hour=5, minute=0, + # XXX causes hg updatee -r nnn from pypy/pypy instead of pypy/benchmarks + #onlyIfChanged=True, ), Triggerable("NUMPY64_scheduler", [ From pypy.commits at gmail.com Sun Dec 10 16:15:58 2017 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 10 Dec 2017 13:15:58 -0800 (PST) Subject: [pypy-commit] pypy default: don't crash when calling sleep with inf or nan Message-ID: <5a2da40e.238edf0a.80921.6619@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93352:543c4e0c83b1 Date: 2017-12-10 22:11 +0100 http://bitbucket.org/pypy/pypy/changeset/543c4e0c83b1/ Log: don't crash when calling sleep with inf or nan (see https://www.blackhat.com/docs/eu-17/materials/eu-17-Arnaboldi- Exposing-Hidden-Exploitable-Behaviors-In-Programming-Languages- Using-Differential-Fuzzing-wp.pdf even though I am sure it is not really exploitable) diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -310,12 +310,19 @@ errno = rposix.get_saved_errno() return os.strerror(errno) +def _check_sleep_arg(space, secs): + from rpython.rlib.rfloat import isinf, isnan + if secs < 0: + raise oefmt(space.w_IOError, + "Invalid argument: negative time in sleep") + if isinf(secs) or isnan(secs): + raise oefmt(space.w_IOError, + "Invalid argument: inf or nan") + if sys.platform != 'win32': @unwrap_spec(secs=float) def sleep(space, secs): - if secs < 0: - raise oefmt(space.w_IOError, - "Invalid argument: negative time in sleep") + _check_sleep_arg(space, secs) rtime.sleep(secs) else: from rpython.rlib import rwin32 @@ -336,9 +343,7 @@ OSError(EINTR, "sleep() interrupted")) @unwrap_spec(secs=float) def sleep(space, secs): - if secs < 0: - raise oefmt(space.w_IOError, - "Invalid argument: negative time in sleep") + _check_sleep_arg(space, secs) # as decreed by Guido, only the main thread can be # interrupted. main_thread = space.fromcache(State).main_thread diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -19,6 +19,8 @@ raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(IOError, time.sleep, -1.0) + raises(IOError, time.sleep, float('nan')) + raises(IOError, time.sleep, float('inf')) def test_clock(self): import time From pypy.commits at gmail.com Sun Dec 10 16:35:28 2017 From: pypy.commits at gmail.com (amauryfa) Date: Sun, 10 Dec 2017 13:35:28 -0800 (PST) Subject: [pypy-commit] pypy py3.6: The ast module now has a "Constant" node that AST optimizers can use. Message-ID: <5a2da8a0.3799df0a.b08f4.5ed9@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93353:cadf41717a3f Date: 2017-12-10 17:47 +0100 http://bitbucket.org/pypy/pypy/changeset/cadf41717a3f/ Log: The ast module now has a "Constant" node that AST optimizers can use. Use it in place of PyPy's specific "Const". diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py --- a/pypy/interpreter/astcompiler/ast.py +++ b/pypy/interpreter/astcompiler/ast.py @@ -1743,6 +1743,8 @@ return NameConstant.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Ellipsis): return Ellipsis.from_object(space, w_node) + if space.isinstance_w(w_node, get(space).w_Constant): + return Constant.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Attribute): return Attribute.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Subscript): @@ -1755,8 +1757,6 @@ return List.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Tuple): return Tuple.from_object(space, w_node) - if space.isinstance_w(w_node, get(space).w_Const): - return Const.from_object(space, w_node) raise oefmt(space.w_TypeError, "Expected expr node, got %T", w_node) State.ast_type('expr', 'AST', None, ['lineno', 'col_offset']) @@ -2841,6 +2841,43 @@ State.ast_type('Ellipsis', 'expr', []) +class Constant(expr): + + def __init__(self, value, lineno, col_offset): + self.value = value + expr.__init__(self, lineno, col_offset) + + def walkabout(self, visitor): + visitor.visit_Constant(self) + + def mutate_over(self, visitor): + return visitor.visit_Constant(self) + + def to_object(self, space): + w_node = space.call_function(get(space).w_Constant) + w_value = self.value # constant + space.setattr(w_node, space.newtext('value'), w_value) + w_lineno = space.newint(self.lineno) # int + space.setattr(w_node, space.newtext('lineno'), w_lineno) + w_col_offset = space.newint(self.col_offset) # int + space.setattr(w_node, space.newtext('col_offset'), w_col_offset) + return w_node + + @staticmethod + def from_object(space, w_node): + w_value = get_field(space, w_node, 'value', False) + w_lineno = get_field(space, w_node, 'lineno', False) + w_col_offset = get_field(space, w_node, 'col_offset', False) + _value = w_value + if _value is None: + raise_required_value(space, w_node, 'value') + _lineno = space.int_w(w_lineno) + _col_offset = space.int_w(w_col_offset) + return Constant(_value, _lineno, _col_offset) + +State.ast_type('Constant', 'expr', ['value']) + + class Attribute(expr): def __init__(self, value, attr, ctx, lineno, col_offset): @@ -3137,43 +3174,6 @@ State.ast_type('Tuple', 'expr', ['elts', 'ctx']) -class Const(expr): - - def __init__(self, obj, lineno, col_offset): - self.obj = obj - expr.__init__(self, lineno, col_offset) - - def walkabout(self, visitor): - visitor.visit_Const(self) - - def mutate_over(self, visitor): - return visitor.visit_Const(self) - - def to_object(self, space): - w_node = space.call_function(get(space).w_Const) - w_obj = self.obj # object - space.setattr(w_node, space.newtext('obj'), w_obj) - w_lineno = space.newint(self.lineno) # int - space.setattr(w_node, space.newtext('lineno'), w_lineno) - w_col_offset = space.newint(self.col_offset) # int - space.setattr(w_node, space.newtext('col_offset'), w_col_offset) - return w_node - - @staticmethod - def from_object(space, w_node): - w_obj = get_field(space, w_node, 'obj', False) - w_lineno = get_field(space, w_node, 'lineno', False) - w_col_offset = get_field(space, w_node, 'col_offset', False) - _obj = w_obj - if _obj is None: - raise_required_value(space, w_node, 'obj') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) - return Const(_obj, _lineno, _col_offset) - -State.ast_type('Const', 'expr', ['obj']) - - class expr_context(AST): @staticmethod def from_object(space, w_node): @@ -4140,6 +4140,8 @@ return self.default_visitor(node) def visit_Ellipsis(self, node): return self.default_visitor(node) + def visit_Constant(self, node): + return self.default_visitor(node) def visit_Attribute(self, node): return self.default_visitor(node) def visit_Subscript(self, node): @@ -4152,8 +4154,6 @@ return self.default_visitor(node) def visit_Tuple(self, node): return self.default_visitor(node) - def visit_Const(self, node): - return self.default_visitor(node) def visit_Slice(self, node): return self.default_visitor(node) def visit_ExtSlice(self, node): @@ -4380,6 +4380,9 @@ def visit_Ellipsis(self, node): pass + def visit_Constant(self, node): + pass + def visit_Attribute(self, node): node.value.walkabout(self) @@ -4399,9 +4402,6 @@ def visit_Tuple(self, node): self.visit_sequence(node.elts) - def visit_Const(self, node): - pass - def visit_Slice(self, node): if node.lower: node.lower.walkabout(self) diff --git a/pypy/interpreter/astcompiler/asthelpers.py b/pypy/interpreter/astcompiler/asthelpers.py --- a/pypy/interpreter/astcompiler/asthelpers.py +++ b/pypy/interpreter/astcompiler/asthelpers.py @@ -128,7 +128,7 @@ _description = "dict comprehension" -class __extend__(ast.Dict, ast.Set, ast.Str, ast.Bytes, ast.Num, ast.Const): +class __extend__(ast.Dict, ast.Set, ast.Str, ast.Bytes, ast.Num, ast.Constant): _description = "literal" @@ -150,18 +150,18 @@ _description = "conditional expression" -class __extend__(ast.Const): +class __extend__(ast.Constant): constant = True def as_node_list(self, space): try: - values_w = space.unpackiterable(self.obj) + values_w = space.unpackiterable(self.value) except OperationError: return None line = self.lineno column = self.col_offset - return [ast.Const(w_obj, line, column) for w_obj in values_w] + return [ast.Constant(w_obj, line, column) for w_obj in values_w] class __extend__(ast.Str): diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py --- a/pypy/interpreter/astcompiler/codegen.py +++ b/pypy/interpreter/astcompiler/codegen.py @@ -1039,9 +1039,9 @@ self.update_position(b.lineno) self.load_const(b.s) - def visit_Const(self, const): + def visit_Constant(self, const): self.update_position(const.lineno) - self.load_const(const.obj) + self.load_const(const.value) def visit_Ellipsis(self, e): self.load_const(self.space.w_Ellipsis) diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py --- a/pypy/interpreter/astcompiler/optimize.py +++ b/pypy/interpreter/astcompiler/optimize.py @@ -63,10 +63,10 @@ return True -class __extend__(ast.Const): +class __extend__(ast.Constant): def as_constant(self): - return self.obj + return self.value class __extend__(ast.NameConstant): @@ -208,7 +208,7 @@ else: if self.space.int_w(w_len) > 20: return binop - return ast.Const(w_const, binop.lineno, binop.col_offset) + return ast.Constant(w_const, binop.lineno, binop.col_offset) return binop def visit_UnaryOp(self, unary): @@ -229,7 +229,7 @@ except OperationError: pass else: - return ast.Const(w_const, unary.lineno, unary.col_offset) + return ast.Constant(w_const, unary.lineno, unary.col_offset) elif op == ast.Not: compare = unary.operand if isinstance(compare, ast.Compare) and len(compare.ops) == 1: @@ -265,7 +265,7 @@ w_const = rep.value.as_constant() if w_const is not None: w_repr = self.space.repr(w_const) - return ast.Const(w_repr, rep.lineno, rep.col_offset) + return ast.Constant(w_repr, rep.lineno, rep.col_offset) return rep def visit_Name(self, name): @@ -282,7 +282,7 @@ elif iden == "False": w_const = space.w_False if w_const is not None: - return ast.Const(w_const, name.lineno, name.col_offset) + return ast.NameConstant(w_const, name.lineno, name.col_offset) return name def visit_Tuple(self, tup): @@ -303,7 +303,7 @@ else: consts_w = [] w_consts = self.space.newtuple(consts_w) - return ast.Const(w_consts, tup.lineno, tup.col_offset) + return ast.Constant(w_consts, tup.lineno, tup.col_offset) def visit_Subscript(self, subs): if subs.ctx == ast.Load: @@ -340,6 +340,6 @@ # See test_const_fold_unicode_subscr return subs - return ast.Const(w_const, subs.lineno, subs.col_offset) + return ast.Constant(w_const, subs.lineno, subs.col_offset) return subs diff --git a/pypy/interpreter/astcompiler/tools/Python.asdl b/pypy/interpreter/astcompiler/tools/Python.asdl --- a/pypy/interpreter/astcompiler/tools/Python.asdl +++ b/pypy/interpreter/astcompiler/tools/Python.asdl @@ -1,4 +1,8 @@ --- ASDL's six builtin types are identifier, int, string, bytes, object, singleton +-- ASDL's 7 builtin types are: +-- identifier, int, string, bytes, object, singleton, constant +-- +-- singleton: None, True or False +-- constant can be None, whereas None means "no value" for object. module Python { @@ -75,6 +79,7 @@ | Bytes(bytes s) | NameConstant(singleton value) | Ellipsis + | Constant(constant value) -- the following expression can appear in assignment context | Attribute(expr value, identifier attr, expr_context ctx) @@ -84,9 +89,6 @@ | List(expr* elts, expr_context ctx) | Tuple(expr* elts, expr_context ctx) - -- PyPy modification - | Const(object obj) - -- col_offset is the byte offset in the utf8 string the parser uses attributes (int lineno, int col_offset) diff --git a/pypy/interpreter/astcompiler/tools/asdl.py b/pypy/interpreter/astcompiler/tools/asdl.py --- a/pypy/interpreter/astcompiler/tools/asdl.py +++ b/pypy/interpreter/astcompiler/tools/asdl.py @@ -34,7 +34,7 @@ # between the various node types. builtin_types = {'identifier', 'string', 'bytes', 'int', 'bool', 'object', - 'singleton'} + 'singleton', 'constant'} class AST: def __repr__(self): diff --git a/pypy/interpreter/astcompiler/tools/asdl_py.py b/pypy/interpreter/astcompiler/tools/asdl_py.py --- a/pypy/interpreter/astcompiler/tools/asdl_py.py +++ b/pypy/interpreter/astcompiler/tools/asdl_py.py @@ -130,7 +130,8 @@ def get_value_converter(self, field, value): if field.type in self.data.simple_types: return "%s_to_class[%s - 1]().to_object(space)" % (field.type, value) - elif field.type in ("object", "singleton", "string", "bytes"): + elif field.type in ("object", "singleton", "constant", + "string", "bytes"): return value elif field.type == "bool": return "space.newbool(%s)" % (value,) @@ -155,7 +156,7 @@ def get_value_extractor(self, field, value): if field.type in self.data.simple_types: return "%s.from_object(space, %s)" % (field.type, value) - elif field.type in ("object","singleton"): + elif field.type in ("object", "singleton", "constant"): return value elif field.type in ("string","bytes"): return "check_string(space, %s)" % (value,) diff --git a/pypy/interpreter/astcompiler/validate.py b/pypy/interpreter/astcompiler/validate.py --- a/pypy/interpreter/astcompiler/validate.py +++ b/pypy/interpreter/astcompiler/validate.py @@ -318,6 +318,9 @@ def visit_Ellipsis(self, node): pass + def visit_Constant(self, node): + pass + def visit_BoolOp(self, node): if self._len(node.values) < 2: raise ValidationError("BoolOp with less than 2 values") From pypy.commits at gmail.com Sun Dec 10 16:35:30 2017 From: pypy.commits at gmail.com (amauryfa) Date: Sun, 10 Dec 2017 13:35:30 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Scandir: Add a context manager, and a ResourceWarning when it is not explictly closed. Message-ID: <5a2da8a2.16981c0a.94759.bad6@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93354:2d7ef69f48bd Date: 2017-12-10 21:33 +0100 http://bitbucket.org/pypy/pypy/changeset/2d7ef69f48bd/ Log: Scandir: Add a context manager, and a ResourceWarning when it is not explictly closed. diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -51,11 +51,25 @@ self.dirfd = dirfd self.w_path_prefix = w_path_prefix self.result_is_bytes = result_is_bytes + self.register_finalizer(space) - @rgc.must_be_light_finalizer - def __del__(self): - if self.dirp: - rposix_scandir.closedir(self.dirp) + def _finalize_(self): + if not self.dirp: + return + space = self.space + try: + msg = ("unclosed scandir iterator %s" % + space.text_w(space.repr(self))) + space.warn(space.newtext(msg), space.w_ResourceWarning) + except OperationError as e: + # Spurious errors can appear at shutdown + if e.match(space, space.w_Warning): + e.write_unraisable(space, '', self) + self._close() + + def _close(self): + rposix_scandir.closedir(self.dirp) + self.dirp = rposix_scandir.NULL_DIRP def iter_w(self): return self @@ -96,16 +110,31 @@ # known_type = rposix_scandir.get_known_type(entry) inode = rposix_scandir.get_inode(entry) + except: + self._close() + raise finally: self._in_next = False direntry = W_DirEntry(self, name, known_type, inode) return direntry + def close_w(self): + self._close() + + def enter_w(self): + return self + + def exit_w(self, space, __args__): + self._close() + W_ScandirIterator.typedef = TypeDef( 'posix.ScandirIterator', __iter__ = interp2app(W_ScandirIterator.iter_w), __next__ = interp2app(W_ScandirIterator.next_w), + __enter__ = interp2app(W_ScandirIterator.enter_w), + __exit__ = interp2app(W_ScandirIterator.exit_w), + close = interp2app(W_ScandirIterator.close_w), ) W_ScandirIterator.typedef.acceptable_as_base_class = False diff --git a/pypy/module/posix/test/test_scandir.py b/pypy/module/posix/test/test_scandir.py --- a/pypy/module/posix/test/test_scandir.py +++ b/pypy/module/posix/test/test_scandir.py @@ -170,3 +170,34 @@ posix = self.posix d = next(posix.scandir(self.dir1)) assert repr(d) == "" + + def test_resource_warning(self): + posix = self.posix + import warnings, gc + iterator = posix.scandir(self.dir1) + next(iterator) + with warnings.catch_warnings(record=True) as l: + warnings.simplefilter("always") + del iterator + gc.collect() + assert isinstance(l[0].message, ResourceWarning) + # + iterator = posix.scandir(self.dir1) + next(iterator) + with warnings.catch_warnings(record=True) as l: + warnings.simplefilter("always") + iterator.close() + del iterator + gc.collect() + assert len(l) == 0 + + def test_context_manager(self): + posix = self.posix + import warnings, gc + with warnings.catch_warnings(record=True) as l: + warnings.simplefilter("always") + with posix.scandir(self.dir1) as iterator: + next(iterator) + del iterator + gc.collect() + assert not l From pypy.commits at gmail.com Mon Dec 11 00:36:10 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 10 Dec 2017 21:36:10 -0800 (PST) Subject: [pypy-commit] buildbot default: tweak pypy runtime name when copying Message-ID: <5a2e194a.64b8df0a.8024d.f6b6@mx.google.com> Author: Matti Picus Branch: Changeset: r1048:72eb613d4c3a Date: 2017-12-11 07:35 +0200 http://bitbucket.org/pypy/buildbot/changeset/72eb613d4c3a/ Log: tweak pypy runtime name when copying diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -726,24 +726,24 @@ haltOnFailure=True, workdir='.')) # copy pypy-c to the expected location within the pypy source checkout - command = ('PYPY_C="pypy-c/bin/pypy";' - 'if [ -e pypy-c/bin/pypy3 ]; then PYPY_C="pypy-c/bin/pypy3"; fi;' - 'cp -v $PYPY_C build/pypy/goal;') + command = ('PYPY_C="pypy";' + 'if [ -e pypy-c/bin/pypy3 ]; then PYPY_C="pypy3"; fi;' + 'cp -v pypy-c/bin/$PYPY_C build/pypy/goal/$PYPY_C-c;') self.addStep(ShellCmd( - description="move pypy-c", + description="copy pypy-c", command=command, haltOnFailure=True, workdir='.')) # copy libpypy-c.so to the expected location within the pypy source checkout, if available command = 'if [ -e pypy-c/bin/libpypy-c.so ]; then cp -v pypy-c/bin/libpypy-c.so build/pypy/goal; fi;' self.addStep(ShellCmd( - description="move libpypy-c.so", + description="copy libpypy-c.so", command=command, haltOnFailure=True, workdir='.')) # copy generated and copied header files to build/include self.addStep(ShellCmd( - description="move header files", + description="copy header files", command=['cp', '-vr', 'pypy-c/include', 'build'], haltOnFailure=True, workdir='.')) From pypy.commits at gmail.com Mon Dec 11 01:06:45 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 10 Dec 2017 22:06:45 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge unicode-utf8-re Message-ID: <5a2e2075.8fb1df0a.c5411.e6dd@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93355:43e73aa47541 Date: 2017-12-11 08:05 +0200 http://bitbucket.org/pypy/pypy/changeset/43e73aa47541/ Log: merge unicode-utf8-re diff too long, truncating to 2000 out of 2355 lines diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -8,13 +8,12 @@ from rpython.rlib.rarithmetic import intmask from rpython.rlib import jit, rutf8 from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import Utf8StringBuilder # ____________________________________________________________ # # Constants and exposed functions -from rpython.rlib.rsre import rsre_core +from rpython.rlib.rsre import rsre_core, rsre_utf8 from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, getlower, set_unicode_db @@ -35,15 +34,18 @@ def slice_w(space, ctx, start, end, w_default): - if 0 <= start <= end: + # 'start' and 'end' are byte positions + if ctx.ZERO <= start <= end: if isinstance(ctx, rsre_core.BufMatchContext): return space.newbytes(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): + start = ctx._real_pos(start) + end = ctx._real_pos(end) return space.newbytes(ctx._string[start:end]) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): - s = ctx._unicodestr[start:end] - lgt = rutf8.check_utf8(s, True) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + s = ctx._utf8[start:end] + lgt = rutf8.get_utf8_length(s) return space.newutf8(s, lgt) else: # unreachable @@ -56,6 +58,7 @@ # Returns a list of RPython-level integers. # Unlike the app-level groups() method, groups are numbered from 0 # and the returned list does not start with the whole match range. + # The integers are byte positions, not character indexes (for utf8). if num_groups == 0: return None result = [-1] * (2 * num_groups) @@ -104,7 +107,7 @@ raise oefmt(space.w_TypeError, "cannot copy this pattern object") def make_ctx(self, w_string, pos=0, endpos=sys.maxint): - """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for + """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space if pos < 0: @@ -112,23 +115,36 @@ if endpos < pos: endpos = pos if space.isinstance_w(w_string, space.w_unicode): - unicodestr = space.utf8_w(w_string) - # XXX will fail some tests, the length need to be adjusted for - # real char len etc - if pos > len(unicodestr): - pos = len(unicodestr) - if endpos > len(unicodestr): - endpos = len(unicodestr) - return rsre_core.UnicodeMatchContext(self.code, unicodestr, - pos, endpos, self.flags) + w_unicode_obj = space.convert_arg_to_w_unicode(w_string) + utf8str = w_unicode_obj._utf8 + length = w_unicode_obj._len() + if pos <= 0: + bytepos = 0 + elif pos >= length: + bytepos = len(utf8str) + else: + index_storage = w_unicode_obj._get_index_storage() + bytepos = rutf8.codepoint_position_at_index(utf8str, + index_storage, pos) + if endpos >= length: + endbytepos = len(utf8str) + else: + index_storage = w_unicode_obj._get_index_storage() + endbytepos = rutf8.codepoint_position_at_index(utf8str, + index_storage, endpos) + ctx = rsre_utf8.Utf8MatchContext( + self.code, utf8str, bytepos, endbytepos, self.flags) + # xxx we store the w_string on the ctx too, for + # W_SRE_Match.bytepos_to_charindex() + ctx.w_unicode_obj = w_unicode_obj + return ctx elif space.isinstance_w(w_string, space.w_bytes): str = space.bytes_w(w_string) if pos > len(str): pos = len(str) if endpos > len(str): endpos = len(str) - return rsre_core.StrMatchContext(self.code, str, - pos, endpos, self.flags) + return self._make_str_match_context(str, pos, endpos) else: buf = space.readbuf_w(w_string) size = buf.getlength() @@ -140,6 +156,11 @@ return rsre_core.BufMatchContext(self.code, buf, pos, endpos, self.flags) + def _make_str_match_context(self, str, pos, endpos): + # for tests to override + return rsre_core.StrMatchContext(self.code, str, + pos, endpos, self.flags) + def getmatch(self, ctx, found): if found: return W_SRE_Match(self, ctx) @@ -178,8 +199,10 @@ w_item = allgroups_w(space, ctx, fmarks, num_groups, w_emptystr) matchlist_w.append(w_item) - no_progress = (ctx.match_start == ctx.match_end) - ctx.reset(ctx.match_end + no_progress) + reset_at = ctx.match_end + if ctx.match_start == ctx.match_end: + reset_at = ctx.next_indirect(reset_at) + ctx.reset(reset_at) return space.newlist(matchlist_w) @unwrap_spec(pos=int, endpos=int) @@ -195,15 +218,15 @@ space = self.space splitlist = [] n = 0 - last = 0 ctx = self.make_ctx(w_string) + last = ctx.ZERO while not maxsplit or n < maxsplit: if not searchcontext(space, ctx): break if ctx.match_start == ctx.match_end: # zero-width match if ctx.match_start == ctx.end: # or end of string break - ctx.reset(ctx.match_end + 1) + ctx.reset(ctx.next_indirect(ctx.match_end)) continue splitlist.append(slice_w(space, ctx, last, ctx.match_start, space.w_None)) @@ -232,20 +255,20 @@ def subx(self, w_ptemplate, w_string, count): space = self.space - # use a (much faster) string/unicode builder if w_ptemplate and + # use a (much faster) string builder (possibly utf8) if w_ptemplate and # w_string are both string or both unicode objects, and if w_ptemplate # is a literal - use_builder = False - filter_as_unicode = filter_as_string = None + use_builder = '\x00' # or 'S'tring or 'U'nicode/UTF8 + filter_as_string = None if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True else: if space.isinstance_w(w_ptemplate, space.w_unicode): - filter_as_unicode = space.utf8_w(w_ptemplate) - literal = '\\' not in filter_as_unicode - use_builder = ( - space.isinstance_w(w_string, space.w_unicode) and literal) + filter_as_string = space.utf8_w(w_ptemplate) + literal = '\\' not in filter_as_string + if space.isinstance_w(w_string, space.w_unicode) and literal: + use_builder = 'U' else: try: filter_as_string = space.bytes_w(w_ptemplate) @@ -255,8 +278,8 @@ literal = False else: literal = '\\' not in filter_as_string - use_builder = ( - space.isinstance_w(w_string, space.w_bytes) and literal) + if space.isinstance_w(w_string, space.w_bytes) and literal: + use_builder = 'S' if literal: w_filter = w_ptemplate filter_is_callable = False @@ -269,16 +292,14 @@ # # XXX this is a bit of a mess, but it improves performance a lot ctx = self.make_ctx(w_string) - sublist_w = strbuilder = unicodebuilder = None - if use_builder: - if filter_as_unicode is not None: - unicodebuilder = Utf8StringBuilder(ctx.end) - else: - assert filter_as_string is not None - strbuilder = StringBuilder(ctx.end) + sublist_w = strbuilder = None + if use_builder != '\x00': + assert filter_as_string is not None + strbuilder = StringBuilder(ctx.end) else: sublist_w = [] - n = last_pos = 0 + n = 0 + last_pos = ctx.ZERO while not count or n < count: sub_jitdriver.jit_merge_point( self=self, @@ -288,9 +309,7 @@ ctx=ctx, w_filter=w_filter, strbuilder=strbuilder, - unicodebuilder=unicodebuilder, filter_as_string=filter_as_string, - filter_as_unicode=filter_as_unicode, count=count, w_string=w_string, n=n, last_pos=last_pos, sublist_w=sublist_w @@ -301,10 +320,10 @@ if last_pos < ctx.match_start: _sub_append_slice( ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, last_pos, ctx.match_start) + strbuilder, last_pos, ctx.match_start) start = ctx.match_end if start == ctx.match_start: - start += 1 + start = ctx.next_indirect(start) if not (last_pos == ctx.match_start == ctx.match_end and n > 0): # the above ignores empty matches on latest position @@ -312,18 +331,14 @@ w_match = self.getmatch(ctx, True) w_piece = space.call_function(w_filter, w_match) if not space.is_w(w_piece, space.w_None): - assert strbuilder is None and unicodebuilder is None - assert not use_builder + assert strbuilder is None + assert use_builder == '\x00' sublist_w.append(w_piece) else: - if use_builder: - if strbuilder is not None: - assert filter_as_string is not None - strbuilder.append(filter_as_string) - else: - assert unicodebuilder is not None - assert filter_as_unicode is not None - unicodebuilder.append(filter_as_unicode) + if use_builder != '\x00': + assert filter_as_string is not None + assert strbuilder is not None + strbuilder.append(filter_as_string) else: sublist_w.append(w_filter) last_pos = ctx.match_end @@ -334,14 +349,19 @@ if last_pos < ctx.end: _sub_append_slice(ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, last_pos, ctx.end) - if use_builder: - if strbuilder is not None: - return space.newbytes(strbuilder.build()), n + strbuilder, last_pos, ctx.end) + if use_builder != '\x00': + assert strbuilder is not None + result_bytes = strbuilder.build() + if use_builder == 'S': + assert not isinstance(ctx, rsre_utf8.Utf8MatchContext) + return space.newbytes(result_bytes), n + elif use_builder == 'U': + assert isinstance(ctx, rsre_utf8.Utf8MatchContext) + return space.newutf8(result_bytes, + rutf8.get_utf8_length(result_bytes)), n else: - assert unicodebuilder is not None - return space.newutf8(unicodebuilder.build(), - unicodebuilder.get_length()), n + raise AssertionError(use_builder) else: if space.isinstance_w(w_string, space.w_unicode): w_emptystr = space.newutf8('', 0) @@ -354,26 +374,28 @@ sub_jitdriver = jit.JitDriver( reds="""count n last_pos ctx w_filter - strbuilder unicodebuilder + strbuilder filter_as_string - filter_as_unicode w_string sublist_w self""".split(), greens=["filter_is_callable", "use_builder", "filter_type", "ctx.pattern"]) def _sub_append_slice(ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, start, end): - if use_builder: + strbuilder, start, end): + if use_builder != '\x00': + assert strbuilder is not None if isinstance(ctx, rsre_core.BufMatchContext): - assert strbuilder is not None + assert use_builder == 'S' return strbuilder.append(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): - assert strbuilder is not None + assert use_builder == 'S' + start = ctx._real_pos(start) + end = ctx._real_pos(end) return strbuilder.append_slice(ctx._string, start, end) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): - assert unicodebuilder is not None - return unicodebuilder.append_slice(ctx._unicodestr, start, end) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + assert use_builder == 'U' + return strbuilder.append_slice(ctx._utf8, start, end) assert 0, "unreachable" else: sublist_w.append(slice_w(space, ctx, start, end, space.w_None)) @@ -487,18 +509,39 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[0]) + start, end = self.do_span(w_groupnum) + start = self.bytepos_to_charindex(start) + return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[1]) + start, end = self.do_span(w_groupnum) + end = self.bytepos_to_charindex(end) + return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + return self.new_charindex_tuple(start, end) + + def new_charindex_tuple(self, start, end): + start = self.bytepos_to_charindex(start) + end = self.bytepos_to_charindex(end) return self.space.newtuple([self.space.newint(start), self.space.newint(end)]) + def bytepos_to_charindex(self, bytepos): + # Transform a 'byte position', as returned by all methods from + # rsre_core, back into a 'character index'. This is for UTF8 + # handling. + ctx = self.ctx + if isinstance(ctx, rsre_utf8.Utf8MatchContext): + index_storage = ctx.w_unicode_obj._get_index_storage() + return rutf8.codepoint_index_at_byte_position( + ctx.w_unicode_obj._utf8, index_storage, bytepos) + else: + return bytepos + def flatten_marks(self): if self.flatten_cache is None: num_groups = self.srepat.num_groups @@ -506,6 +549,8 @@ return self.flatten_cache def do_span(self, w_arg): + # return a pair of integers, which are byte positions, not + # character indexes (for utf8) space = self.space try: groupnum = space.int_w(w_arg) @@ -553,10 +598,10 @@ return space.w_None def fget_pos(self, space): - return space.newint(self.ctx.original_pos) + return space.newint(self.bytepos_to_charindex(self.ctx.original_pos)) def fget_endpos(self, space): - return space.newint(self.ctx.end) + return space.newint(self.bytepos_to_charindex(self.ctx.end)) def fget_regs(self, space): space = self.space @@ -564,11 +609,11 @@ num_groups = self.srepat.num_groups result_w = [None] * (num_groups + 1) ctx = self.ctx - result_w[0] = space.newtuple([space.newint(ctx.match_start), - space.newint(ctx.match_end)]) + result_w[0] = self.new_charindex_tuple(ctx.match_start, + ctx.match_end) for i in range(num_groups): - result_w[i + 1] = space.newtuple([space.newint(fmarks[i*2]), - space.newint(fmarks[i*2+1])]) + result_w[i + 1] = self.new_charindex_tuple(fmarks[i*2], + fmarks[i*2+1]) return space.newtuple(result_w) def fget_string(self, space): @@ -577,9 +622,9 @@ return space.newbytes(ctx._buffer.as_str()) elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): - lgt = rutf8.check_utf8(ctx._unicodestr, True) - return space.newutf8(ctx._unicodestr, lgt) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + lgt = rutf8.get_utf8_length(ctx._utf8) + return space.newutf8(ctx._utf8, lgt) else: raise SystemError @@ -644,12 +689,14 @@ if found: ctx = self.ctx nextstart = ctx.match_end - nextstart += (ctx.match_start == nextstart) + if ctx.match_start == nextstart: + nextstart = ctx.next_indirect(nextstart) self.ctx = ctx.fresh_copy(nextstart) match = W_SRE_Match(self.srepat, ctx) return match else: - self.ctx.match_start += 1 # obscure corner case + # obscure corner case + self.ctx.match_start = self.ctx.next_indirect(self.ctx.match_start) return None W_SRE_Scanner.typedef = TypeDef( diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -4,6 +4,8 @@ import py from py.test import raises, skip from pypy.interpreter.gateway import app2interp_temp +from pypy.module._sre import interp_sre +from rpython.rlib.rsre.test import support def init_app_test(cls, space): @@ -20,6 +22,35 @@ sys.path.pop(0) """) +def _test_sre_ctx_(self, str, start, end): + # Use the MatchContextForTests class, which handles Position + # instances instead of plain integers. This is used to detect when + # we're accepting or escaping a Position to app-level, which we + # should not: Positions are meant to be byte indexes inside a + # possibly UTF8 string, not character indexes. + start = support.Position(start) + end = support.Position(end) + return support.MatchContextForTests(self.code, str, start, end, self.flags) + +def _bytepos_to_charindex(self, bytepos): + if isinstance(self.ctx, support.MatchContextForTests): + return self.ctx._real_pos(bytepos) + return _org_maker[1](self, bytepos) + +def setup_module(mod): + mod._org_maker = ( + interp_sre.W_SRE_Pattern._make_str_match_context, + interp_sre.W_SRE_Match.bytepos_to_charindex, + ) + interp_sre.W_SRE_Pattern._make_str_match_context = _test_sre_ctx_ + interp_sre.W_SRE_Match.bytepos_to_charindex = _bytepos_to_charindex + +def teardown_module(mod): + ( + interp_sre.W_SRE_Pattern._make_str_match_context, + interp_sre.W_SRE_Match.bytepos_to_charindex, + ) = mod._org_maker + class AppTestSrePy: def test_magic(self): @@ -87,6 +118,13 @@ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_findall_unicode(self): + import re + assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000") + assert ["a", "u"] == re.findall("b(.)", "abalbus") + assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") + assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_finditer(self): import re it = re.finditer("b(.)", "brabbel") @@ -999,3 +1037,15 @@ import re assert re.search(".+ab", "wowowowawoabwowo") assert None == re.search(".+ab", "wowowaowowo") + + +class AppTestUnicodeExtra: + def test_string_attribute(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.string == u"\u1233\u1234\u1235" + + def test_match_start(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.start() == 1 diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -316,14 +316,21 @@ class ExpectedRegularInt(Exception): pass +class NegativeArgumentNotAllowed(Exception): + pass + def check_nonneg(x): """Give a translation-time error if 'x' is not known to be non-negative. To help debugging, this also gives a translation-time error if 'x' is actually typed as an r_uint (in which case the call to check_nonneg() is a bit strange and probably unexpected). """ - assert type(x)(-1) < 0 # otherwise, 'x' is a r_uint or similar - assert x >= 0 + try: + assert type(x)(-1) < 0 # otherwise, 'x' is a r_uint or similar + except NegativeArgumentNotAllowed: + pass + else: + assert x >= 0 return x class Entry(ExtRegistryEntry): diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -55,6 +55,8 @@ specific subclass, calling 'func' is a direct call; if 'ctx' is only known to be of class AbstractMatchContext, calling 'func' is an indirect call. """ + from rpython.rlib.rsre.rsre_utf8 import Utf8MatchContext + assert func.func_code.co_varnames[0] == 'ctx' specname = '_spec_' + func.func_name while specname in _seen_specname: @@ -65,7 +67,8 @@ specialized_methods = [] for prefix, concreteclass in [('buf', BufMatchContext), ('str', StrMatchContext), - ('uni', UnicodeMatchContext)]: + ('uni', UnicodeMatchContext), + ('utf8', Utf8MatchContext)]: newfunc = func_with_new_name(func, prefix + specname) assert not hasattr(concreteclass, specname) setattr(concreteclass, specname, newfunc) @@ -83,6 +86,9 @@ def __init__(self, msg): self.msg = msg +class EndOfString(Exception): + pass + class AbstractMatchContext(object): """Abstract base class""" _immutable_fields_ = ['pattern[*]', 'flags', 'end'] @@ -135,6 +141,45 @@ """Similar to str().""" raise NotImplementedError + # The following methods are provided to be overriden in + # Utf8MatchContext. The non-utf8 implementation is provided + # by the FixedMatchContext abstract subclass, in order to use + # the same @not_rpython safety trick as above. + ZERO = 0 + @not_rpython + def next(self, position): + raise NotImplementedError + @not_rpython + def prev(self, position): + raise NotImplementedError + @not_rpython + def next_n(self, position, n): + raise NotImplementedError + @not_rpython + def prev_n(self, position, n, start_position): + raise NotImplementedError + @not_rpython + def debug_check_pos(self, position): + raise NotImplementedError + @not_rpython + def maximum_distance(self, position_low, position_high): + raise NotImplementedError + @not_rpython + def get_single_byte(self, base_position, index): + raise NotImplementedError + + def bytes_difference(self, position1, position2): + return position1 - position2 + def go_forward_by_bytes(self, base_position, index): + return base_position + index + def next_indirect(self, position): + return position + 1 # like next(), but can be called indirectly + def prev_indirect(self, position): + position -= 1 # like prev(), but can be called indirectly + if position < 0: + raise EndOfString + return position + def get_mark(self, gid): return find_mark(self.match_marks, gid) @@ -168,23 +213,44 @@ return (-1, -1) return (fmarks[groupnum], fmarks[groupnum+1]) - def group(self, groupnum=0): - frm, to = self.span(groupnum) - if 0 <= frm <= to: - return self._string[frm:to] - else: - return None - def fresh_copy(self, start): raise NotImplementedError -class BufMatchContext(AbstractMatchContext): + +class FixedMatchContext(AbstractMatchContext): + """Abstract subclass to introduce the default implementation for + these position methods. The Utf8MatchContext subclass doesn't + inherit from here.""" + + next = AbstractMatchContext.next_indirect + prev = AbstractMatchContext.prev_indirect + + def next_n(self, position, n, end_position): + position += n + if position > end_position: + raise EndOfString + return position + + def prev_n(self, position, n, start_position): + position -= n + if position < start_position: + raise EndOfString + return position + + def debug_check_pos(self, position): + pass + + def maximum_distance(self, position_low, position_high): + return position_high - position_low + + +class BufMatchContext(FixedMatchContext): """Concrete subclass for matching in a buffer.""" _immutable_fields_ = ["_buffer"] def __init__(self, pattern, buf, match_start, end, flags): - AbstractMatchContext.__init__(self, pattern, match_start, end, flags) + FixedMatchContext.__init__(self, pattern, match_start, end, flags) self._buffer = buf def str(self, index): @@ -195,17 +261,20 @@ c = self.str(index) return rsre_char.getlower(c, self.flags) + def get_single_byte(self, base_position, index): + return self.str(base_position + index) + def fresh_copy(self, start): return BufMatchContext(self.pattern, self._buffer, start, self.end, self.flags) -class StrMatchContext(AbstractMatchContext): +class StrMatchContext(FixedMatchContext): """Concrete subclass for matching in a plain string.""" _immutable_fields_ = ["_string"] def __init__(self, pattern, string, match_start, end, flags): - AbstractMatchContext.__init__(self, pattern, match_start, end, flags) + FixedMatchContext.__init__(self, pattern, match_start, end, flags) self._string = string if not we_are_translated() and isinstance(string, unicode): self.flags |= rsre_char.SRE_FLAG_UNICODE # for rsre_re.py @@ -218,17 +287,23 @@ c = self.str(index) return rsre_char.getlower(c, self.flags) + def get_single_byte(self, base_position, index): + return self.str(base_position + index) + + def _real_pos(self, index): + return index # overridden by tests + def fresh_copy(self, start): return StrMatchContext(self.pattern, self._string, start, self.end, self.flags) -class UnicodeMatchContext(AbstractMatchContext): +class UnicodeMatchContext(FixedMatchContext): """Concrete subclass for matching in a unicode string.""" _immutable_fields_ = ["_unicodestr"] def __init__(self, pattern, unicodestr, match_start, end, flags): - AbstractMatchContext.__init__(self, pattern, match_start, end, flags) + FixedMatchContext.__init__(self, pattern, match_start, end, flags) self._unicodestr = unicodestr def str(self, index): @@ -239,6 +314,9 @@ c = self.str(index) return rsre_char.getlower(c, self.flags) + def get_single_byte(self, base_position, index): + return self.str(base_position + index) + def fresh_copy(self, start): return UnicodeMatchContext(self.pattern, self._unicodestr, start, self.end, self.flags) @@ -317,7 +395,10 @@ ctx.jitdriver_RepeatOne.jit_merge_point( self=self, ptr=ptr, ctx=ctx, nextppos=nextppos) result = sre_match(ctx, nextppos, ptr, self.start_marks) - ptr -= 1 + try: + ptr = ctx.prev_indirect(ptr) + except EndOfString: + ptr = -1 if result is not None: self.subresult = result self.start_ptr = ptr @@ -328,37 +409,41 @@ class MinRepeatOneMatchResult(MatchResult): install_jitdriver('MinRepeatOne', greens=['nextppos', 'ppos3', 'ctx.pattern'], - reds=['ptr', 'self', 'ctx'], + reds=['max_count', 'ptr', 'self', 'ctx'], debugprint=(2, 0)) # indices in 'greens' - def __init__(self, nextppos, ppos3, maxptr, ptr, marks): + def __init__(self, nextppos, ppos3, max_count, ptr, marks): self.nextppos = nextppos self.ppos3 = ppos3 - self.maxptr = maxptr + self.max_count = max_count self.start_ptr = ptr self.start_marks = marks def find_first_result(self, ctx): ptr = self.start_ptr nextppos = self.nextppos + max_count = self.max_count ppos3 = self.ppos3 - while ptr <= self.maxptr: + while max_count >= 0: ctx.jitdriver_MinRepeatOne.jit_merge_point( - self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3) + self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3, + max_count=max_count) result = sre_match(ctx, nextppos, ptr, self.start_marks) if result is not None: self.subresult = result self.start_ptr = ptr + self.max_count = max_count return self if not self.next_char_ok(ctx, ptr, ppos3): break - ptr += 1 + ptr = ctx.next_indirect(ptr) + max_count -= 1 def find_next_result(self, ctx): ptr = self.start_ptr if not self.next_char_ok(ctx, ptr, self.ppos3): return - self.start_ptr = ptr + 1 + self.start_ptr = ctx.next_indirect(ptr) return self.find_first_result(ctx) def next_char_ok(self, ctx, ptr, ppos): @@ -430,12 +515,12 @@ min = ctx.pat(ppos+1) if enum is not None: # matched one more 'item'. record it and continue. - last_match_length = ctx.match_end - ptr + last_match_zero_length = (ctx.match_end == ptr) self.pending = Pending(ptr, marks, enum, self.pending) self.num_pending += 1 ptr = ctx.match_end marks = ctx.match_marks - if last_match_length == 0 and self.num_pending >= min: + if last_match_zero_length and self.num_pending >= min: # zero-width protection: after an empty match, if there # are enough matches, don't try to match more. Instead, # fall through to trying to match 'tail'. @@ -520,6 +605,7 @@ need all results; in that case we use the method move_to_next_result() of the MatchResult.""" while True: + ctx.debug_check_pos(ptr) op = ctx.pat(ppos) ppos += 1 @@ -551,22 +637,25 @@ # if ptr >= ctx.end or rsre_char.is_linebreak(ctx.str(ptr)): return - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_ANY_ALL: # match anything # if ptr >= ctx.end: return - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_ASSERT: # assert subpattern # <0=skip> <1=back> - ptr1 = ptr - ctx.pat(ppos+1) + try: + ptr1 = ctx.prev_n(ptr, ctx.pat(ppos+1), ctx.ZERO) + except EndOfString: + return saved = ctx.fullmatch_only ctx.fullmatch_only = False - stop = ptr1 < 0 or sre_match(ctx, ppos + 2, ptr1, marks) is None + stop = sre_match(ctx, ppos + 2, ptr1, marks) is None ctx.fullmatch_only = saved if stop: return @@ -576,14 +665,17 @@ elif op == OPCODE_ASSERT_NOT: # assert not subpattern # <0=skip> <1=back> - ptr1 = ptr - ctx.pat(ppos+1) - saved = ctx.fullmatch_only - ctx.fullmatch_only = False - stop = (ptr1 >= 0 and sre_match(ctx, ppos + 2, ptr1, marks) - is not None) - ctx.fullmatch_only = saved - if stop: - return + try: + ptr1 = ctx.prev_n(ptr, ctx.pat(ppos+1), ctx.ZERO) + except EndOfString: + pass + else: + saved = ctx.fullmatch_only + ctx.fullmatch_only = False + stop = sre_match(ctx, ppos + 2, ptr1, marks) is not None + ctx.fullmatch_only = saved + if stop: + return ppos += ctx.pat(ppos) elif op == OPCODE_AT: @@ -606,36 +698,36 @@ if (ptr == ctx.end or not rsre_char.category_dispatch(ctx.pat(ppos), ctx.str(ptr))): return - ptr += 1 + ptr = ctx.next(ptr) ppos += 1 elif op == OPCODE_GROUPREF: # match backreference # - startptr, length = get_group_ref(marks, ctx.pat(ppos)) - if length < 0: + startptr, length_bytes = get_group_ref(ctx, marks, ctx.pat(ppos)) + if length_bytes < 0: return # group was not previously defined - if not match_repeated(ctx, ptr, startptr, length): + if not match_repeated(ctx, ptr, startptr, length_bytes): return # no match - ptr += length + ptr = ctx.go_forward_by_bytes(ptr, length_bytes) ppos += 1 elif op == OPCODE_GROUPREF_IGNORE: # match backreference # - startptr, length = get_group_ref(marks, ctx.pat(ppos)) - if length < 0: + startptr, length_bytes = get_group_ref(ctx, marks, ctx.pat(ppos)) + if length_bytes < 0: return # group was not previously defined - if not match_repeated_ignore(ctx, ptr, startptr, length): + ptr = match_repeated_ignore(ctx, ptr, startptr, length_bytes) + if ptr < ctx.ZERO: return # no match - ptr += length ppos += 1 elif op == OPCODE_GROUPREF_EXISTS: # conditional match depending on the existence of a group # codeyes codeno ... - _, length = get_group_ref(marks, ctx.pat(ppos)) - if length >= 0: + _, length_bytes = get_group_ref(ctx, marks, ctx.pat(ppos)) + if length_bytes >= 0: ppos += 2 # jump to 'codeyes' else: ppos += ctx.pat(ppos+1) # jump to 'codeno' @@ -647,7 +739,7 @@ ctx.str(ptr)): return ppos += ctx.pat(ppos) - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_IN_IGNORE: # match set member (or non_member), ignoring case @@ -656,12 +748,12 @@ ctx.lowstr(ptr)): return ppos += ctx.pat(ppos) - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_INFO: # optimization info block # <0=skip> <1=flags> <2=min> ... - if (ctx.end - ptr) < ctx.pat(ppos+2): + if ctx.maximum_distance(ptr, ctx.end) < ctx.pat(ppos+2): return ppos += ctx.pat(ppos) @@ -674,7 +766,7 @@ if ptr >= ctx.end or ctx.str(ptr) != ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_LITERAL_IGNORE: # match literal string, ignoring case @@ -682,7 +774,7 @@ if ptr >= ctx.end or ctx.lowstr(ptr) != ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_MARK: # set mark @@ -697,7 +789,7 @@ if ptr >= ctx.end or ctx.str(ptr) == ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_NOT_LITERAL_IGNORE: # match if it's not a literal string, ignoring case @@ -705,7 +797,7 @@ if ptr >= ctx.end or ctx.lowstr(ptr) == ctx.pat(ppos): return ppos += 1 - ptr += 1 + ptr = ctx.next(ptr) elif op == OPCODE_REPEAT: # general repeat. in this version of the re module, all the work @@ -743,8 +835,9 @@ # use the MAX_REPEAT operator. # <1=min> <2=max> item tail start = ptr - minptr = start + ctx.pat(ppos+1) - if minptr > ctx.end: + try: + minptr = ctx.next_n(start, ctx.pat(ppos+1), ctx.end) + except EndOfString: return # cannot match ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2), marks) @@ -765,22 +858,22 @@ start = ptr min = ctx.pat(ppos+1) if min > 0: - minptr = ptr + min - if minptr > ctx.end: - return # cannot match + try: + minptr = ctx.next_n(ptr, min, ctx.end) + except EndOfString: + return # cannot match # count using pattern min as the maximum ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks) if ptr < minptr: return # did not match minimum number of times - maxptr = ctx.end + max_count = sys.maxint max = ctx.pat(ppos+2) if max != rsre_char.MAXREPEAT: - maxptr1 = start + max - if maxptr1 <= maxptr: - maxptr = maxptr1 + max_count = max - min + assert max_count >= 0 nextppos = ppos + ctx.pat(ppos) - result = MinRepeatOneMatchResult(nextppos, ppos+3, maxptr, + result = MinRepeatOneMatchResult(nextppos, ppos+3, max_count, ptr, marks) return result.find_first_result(ctx) @@ -788,37 +881,41 @@ raise Error("bad pattern code %d" % op) -def get_group_ref(marks, groupnum): +def get_group_ref(ctx, marks, groupnum): gid = groupnum * 2 startptr = find_mark(marks, gid) - if startptr < 0: + if startptr < ctx.ZERO: return 0, -1 endptr = find_mark(marks, gid + 1) - length = endptr - startptr # < 0 if endptr < startptr (or if endptr=-1) - return startptr, length + length_bytes = ctx.bytes_difference(endptr, startptr) + # < 0 if endptr < startptr (or if endptr=-1) + return startptr, length_bytes @specializectx -def match_repeated(ctx, ptr, oldptr, length): - if ptr + length > ctx.end: +def match_repeated(ctx, ptr, oldptr, length_bytes): + if ctx.bytes_difference(ctx.end, ptr) < length_bytes: return False - for i in range(length): - if ctx.str(ptr + i) != ctx.str(oldptr + i): + for i in range(length_bytes): + if ctx.get_single_byte(ptr, i) != ctx.get_single_byte(oldptr, i): return False return True @specializectx -def match_repeated_ignore(ctx, ptr, oldptr, length): - if ptr + length > ctx.end: - return False - for i in range(length): - if ctx.lowstr(ptr + i) != ctx.lowstr(oldptr + i): - return False - return True +def match_repeated_ignore(ctx, ptr, oldptr, length_bytes): + oldend = ctx.go_forward_by_bytes(oldptr, length_bytes) + while oldptr < oldend: + if ptr >= ctx.end: + return -1 + if ctx.lowstr(ptr) != ctx.lowstr(oldptr): + return -1 + ptr = ctx.next(ptr) + oldptr = ctx.next(oldptr) + return ptr @specializectx def find_repetition_end(ctx, ppos, ptr, maxcount, marks): end = ctx.end - ptrp1 = ptr + 1 + ptrp1 = ctx.next(ptr) # First get rid of the cases where we don't have room for any match. if maxcount <= 0 or ptrp1 > end: return ptr @@ -843,9 +940,10 @@ # Else we really need to count how many times it matches. if maxcount != rsre_char.MAXREPEAT: # adjust end - end1 = ptr + maxcount - if end1 <= end: - end = end1 + try: + end = ctx.next_n(ptr, maxcount, end) + except EndOfString: + pass op = ctx.pat(ppos) for op1, fre in unroll_fre_checker: if op1 == op: @@ -862,7 +960,7 @@ if end1 <= end: end = end1 while ptr < end and sre_match(ctx, ppos, ptr, marks) is not None: - ptr += 1 + ptr = ctx.next(ptr) return ptr @specializectx @@ -904,7 +1002,7 @@ ctx.jitdriver_MatchIn.jit_merge_point(ctx=ctx, ptr=ptr, end=end, ppos=ppos) if ptr < end and checkerfn(ctx, ptr, ppos): - ptr += 1 + ptr = ctx.next(ptr) else: return ptr elif checkerfn == match_IN_IGNORE: @@ -918,7 +1016,7 @@ ctx.jitdriver_MatchInIgnore.jit_merge_point(ctx=ctx, ptr=ptr, end=end, ppos=ppos) if ptr < end and checkerfn(ctx, ptr, ppos): - ptr += 1 + ptr = ctx.next(ptr) else: return ptr else: @@ -927,7 +1025,7 @@ @specializectx def fre(ctx, ptr, end, ppos): while ptr < end and checkerfn(ctx, ptr, ppos): - ptr += 1 + ptr = ctx.next(ptr) return ptr fre = func_with_new_name(fre, 'fre_' + checkerfn.__name__) return fre @@ -967,11 +1065,14 @@ def sre_at(ctx, atcode, ptr): if (atcode == AT_BEGINNING or atcode == AT_BEGINNING_STRING): - return ptr == 0 + return ptr == ctx.ZERO elif atcode == AT_BEGINNING_LINE: - prevptr = ptr - 1 - return prevptr < 0 or rsre_char.is_linebreak(ctx.str(prevptr)) + try: + prevptr = ctx.prev(ptr) + except EndOfString: + return True + return rsre_char.is_linebreak(ctx.str(prevptr)) elif atcode == AT_BOUNDARY: return at_boundary(ctx, ptr) @@ -980,9 +1081,8 @@ return at_non_boundary(ctx, ptr) elif atcode == AT_END: - remaining_chars = ctx.end - ptr - return remaining_chars <= 0 or ( - remaining_chars == 1 and rsre_char.is_linebreak(ctx.str(ptr))) + return (ptr == ctx.end or + (ctx.next(ptr) == ctx.end and rsre_char.is_linebreak(ctx.str(ptr)))) elif atcode == AT_END_LINE: return ptr == ctx.end or rsre_char.is_linebreak(ctx.str(ptr)) @@ -1007,18 +1107,26 @@ def _make_boundary(word_checker): @specializectx def at_boundary(ctx, ptr): - if ctx.end == 0: + if ctx.end == ctx.ZERO: return False - prevptr = ptr - 1 - that = prevptr >= 0 and word_checker(ctx.str(prevptr)) + try: + prevptr = ctx.prev(ptr) + except EndOfString: + that = False + else: + that = word_checker(ctx.str(prevptr)) this = ptr < ctx.end and word_checker(ctx.str(ptr)) return this != that @specializectx def at_non_boundary(ctx, ptr): - if ctx.end == 0: + if ctx.end == ctx.ZERO: return False - prevptr = ptr - 1 - that = prevptr >= 0 and word_checker(ctx.str(prevptr)) + try: + prevptr = ctx.prev(ptr) + except EndOfString: + that = False + else: + that = word_checker(ctx.str(prevptr)) this = ptr < ctx.end and word_checker(ctx.str(ptr)) return this == that return at_boundary, at_non_boundary @@ -1100,7 +1208,7 @@ if sre_match(ctx, base, start, None) is not None: ctx.match_start = start return True - start += 1 + start = ctx.next_indirect(start) return False install_jitdriver_spec("LiteralSearch", @@ -1117,11 +1225,12 @@ while start < ctx.end: ctx.jitdriver_LiteralSearch.jit_merge_point(ctx=ctx, start=start, base=base, character=character) + start1 = ctx.next(start) if ctx.str(start) == character: - if sre_match(ctx, base, start + 1, None) is not None: + if sre_match(ctx, base, start1, None) is not None: ctx.match_start = start return True - start += 1 + start = start1 return False install_jitdriver_spec("CharsetSearch", @@ -1139,7 +1248,7 @@ if sre_match(ctx, base, start, None) is not None: ctx.match_start = start return True - start += 1 + start = ctx.next(start) return False install_jitdriver_spec('FastSearch', @@ -1156,7 +1265,7 @@ if string_position >= ctx.end: return False prefix_len = ctx.pat(5) - assert prefix_len >= 0 + assert prefix_len > 0 i = 0 while True: ctx.jitdriver_FastSearch.jit_merge_point(ctx=ctx, @@ -1171,10 +1280,14 @@ i += 1 if i == prefix_len: # found a potential match - start = string_position + 1 - prefix_len - assert start >= 0 + # start = string_position + 1 - prefix_len: computed later + ptr = string_position prefix_skip = ctx.pat(6) - ptr = start + prefix_skip + if prefix_skip == prefix_len: + ptr = ctx.next(ptr) + else: + assert prefix_skip < prefix_len + ptr = ctx.prev_n(ptr, prefix_len-1 - prefix_skip, ctx.ZERO) #flags = ctx.pat(2) #if flags & rsre_char.SRE_INFO_LITERAL: # # matched all of pure literal pattern @@ -1185,10 +1298,11 @@ pattern_offset = ctx.pat(1) + 1 ppos_start = pattern_offset + 2 * prefix_skip if sre_match(ctx, ppos_start, ptr, None) is not None: + start = ctx.prev_n(ptr, prefix_skip, ctx.ZERO) ctx.match_start = start return True overlap_offset = prefix_len + (7 - 1) i = ctx.pat(overlap_offset + i) - string_position += 1 + string_position = ctx.next(string_position) if string_position >= ctx.end: return False diff --git a/rpython/rlib/rsre/rsre_jit.py b/rpython/rlib/rsre/rsre_jit.py --- a/rpython/rlib/rsre/rsre_jit.py +++ b/rpython/rlib/rsre/rsre_jit.py @@ -36,8 +36,10 @@ from rpython.rlib.rsre.rsre_core import BufMatchContext from rpython.rlib.rsre.rsre_core import StrMatchContext from rpython.rlib.rsre.rsre_core import UnicodeMatchContext + from rpython.rlib.rsre.rsre_utf8 import Utf8MatchContext for prefix, concreteclass in [('Buf', BufMatchContext), ('Str', StrMatchContext), - ('Uni', UnicodeMatchContext)]: + ('Uni', UnicodeMatchContext), + ('Utf8', Utf8MatchContext)]: jitdriver = RSreJitDriver(prefix + name, **kwds) setattr(concreteclass, 'jitdriver_' + name, jitdriver) diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -0,0 +1,105 @@ +import sys +from rpython.rlib.debug import check_nonneg +from rpython.rlib.rsre.rsre_core import AbstractMatchContext, EndOfString +from rpython.rlib.rsre import rsre_char +from rpython.rlib.objectmodel import we_are_translated +from rpython.rlib import rutf8 + + +class Utf8MatchContext(AbstractMatchContext): + """A context that matches unicode, but encoded in a utf8 string. + Be careful because most positions taken by, handled in, and returned + by this class are expressed in *bytes*, not in characters. + """ + + def __init__(self, pattern, utf8string, match_start, end, flags): + AbstractMatchContext.__init__(self, pattern, match_start, end, flags) + self._utf8 = utf8string + + def str(self, index): + check_nonneg(index) + return rutf8.codepoint_at_pos(self._utf8, index) + + def lowstr(self, index): + c = self.str(index) + return rsre_char.getlower(c, self.flags) + + def get_single_byte(self, base_position, index): + return self.str(base_position + index) + + def fresh_copy(self, start): + return Utf8MatchContext(self.pattern, self._utf8, start, + self.end, self.flags) + + def next(self, position): + return rutf8.next_codepoint_pos(self._utf8, position) + next_indirect = next + + def prev(self, position): + if position <= 0: + raise EndOfString + position = rutf8.prev_codepoint_pos(self._utf8, position) + assert position >= 0 + return position + prev_indirect = prev + + def next_n(self, position, n, end_position): + for i in range(n): + if position >= end_position: + raise EndOfString + position = rutf8.next_codepoint_pos(self._utf8, position) + return position + + def prev_n(self, position, n, start_position): + for i in range(n): + if position <= start_position: + raise EndOfString + position = rutf8.prev_codepoint_pos(self._utf8, position) + assert position >= 0 + return position + + def debug_check_pos(self, position): + if we_are_translated(): + return + if position == len(self._utf8): + return # end of string is fine + assert not (0x80 <= self._utf8[position] < 0xC0) # continuation byte + + def maximum_distance(self, position_low, position_high): + # may overestimate if there are non-ascii chars + return position_high - position_low + + +def make_utf8_ctx(pattern, utf8string, bytestart, byteend, flags): + if bytestart < 0: bytestart = 0 + elif bytestart > len(utf8string): bytestart = len(utf8string) + if byteend < 0: byteend = 0 + elif byteend > len(utf8string): byteend = len(utf8string) + ctx = Utf8MatchContext(pattern, utf8string, bytestart, byteend, flags) + ctx.debug_check_pos(bytestart) + ctx.debug_check_pos(byteend) + return ctx + +def utf8search(pattern, utf8string, bytestart=0, byteend=sys.maxint, flags=0): + # bytestart and byteend must be valid byte positions inside the + # utf8string. + from rpython.rlib.rsre.rsre_core import search_context + + ctx = make_utf8_ctx(pattern, utf8string, bytestart, byteend, flags) + if search_context(ctx): + return ctx + else: + return None + +def utf8match(pattern, utf8string, bytestart=0, byteend=sys.maxint, flags=0, + fullmatch=False): + # bytestart and byteend must be valid byte positions inside the + # utf8string. + from rpython.rlib.rsre.rsre_core import match_context + + ctx = make_utf8_ctx(pattern, utf8string, bytestart, byteend, flags) + ctx.fullmatch_only = fullmatch + if match_context(ctx): + return ctx + else: + return None diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rsre/test/support.py @@ -0,0 +1,136 @@ +import sys, random +from rpython.rlib import debug +from rpython.rlib.rsre.rsre_core import _adjust, match_context, search_context +from rpython.rlib.rsre.rsre_core import StrMatchContext, EndOfString + + +class Position(object): + def __init__(self, p): + assert isinstance(p, int) + if p < 0: + raise debug.NegativeArgumentNotAllowed( + "making a Position with byte index %r" % p) + self._p = p + def __repr__(self): + return '' % (self._p) + def __cmp__(self, other): + if isinstance(other, Position): + return cmp(self._p, other._p) + if type(other) is int and other == -1: + return cmp(self._p, -1) + raise TypeError("cannot compare %r with %r" % (self, other)) + + +class MatchContextForTests(StrMatchContext): + """Concrete subclass for matching in a plain string, tweaked for tests""" + + ZERO = Position(0) + + def next(self, position): + assert isinstance(position, Position) + return Position(position._p + 1) + next_indirect = next + + def prev(self, position): + assert isinstance(position, Position) + if position._p == 0: + raise EndOfString + return Position(position._p - 1) + prev_indirect = prev + + def next_n(self, position, n, end_position): + assert isinstance(position, Position) + assert isinstance(end_position, Position) + assert position._p <= end_position._p + r = position._p + n + if r > end_position._p: + raise EndOfString + return Position(r) + + def prev_n(self, position, n, start_position): + assert isinstance(position, Position) + assert isinstance(start_position, Position) + assert position._p >= start_position._p + r = position._p - n + if r < start_position._p: + raise EndOfString + return Position(r) + + def _real_pos(self, position): + if type(position) is int and position == -1: + return -1 + assert isinstance(position, Position) + return position._p + + def group(self, groupnum=0): + frm, to = self.span(groupnum) + if self.ZERO <= frm <= to: + return self._string[self._real_pos(frm):self._real_pos(to)] + else: + return None + + def str(self, position): + assert isinstance(position, Position) + return ord(self._string[position._p]) + + def debug_check_pos(self, position): + assert isinstance(position, Position) + + #def minimum_distance(self, position_low, position_high): + # """Return an estimate. The real value may be higher.""" + # assert isinstance(position_low, Position) + # assert isinstance(position_high, Position) + # dist = position_high._p - position_low._p + # if dist == 0: + # return 0 + # return random.randrange(1, dist + 1) + + def maximum_distance(self, position_low, position_high): + """Return an estimate. The real value may be lower.""" + assert isinstance(position_low, Position) + assert isinstance(position_high, Position) + return position_high._p - position_low._p + random.randrange(0, 10) + + def bytes_difference(self, position1, position2): + assert isinstance(position1, Position) + assert isinstance(position2, Position) + return position1._p - position2._p + + def get_single_byte(self, base_position, index): + assert isinstance(base_position, Position) + assert isinstance(index, int) + return ord(self._string[base_position._p + index]) + + def go_forward_by_bytes(self, base_position, index): + assert isinstance(base_position, Position) + assert isinstance(index, int) + return Position(base_position._p + index) + + def fresh_copy(self, start): + return MatchContextForTests(self.pattern, self._string, start, + self.end, self.flags) + + +def match(pattern, string, start=0, end=sys.maxint, flags=0, fullmatch=False): + start, end = _adjust(start, end, len(string)) + start = Position(start) + end = Position(end) + ctx = MatchContextForTests(pattern, string, start, end, flags) + ctx.fullmatch_only = fullmatch + if match_context(ctx): + return ctx + else: + return None + +def fullmatch(pattern, string, start=0, end=sys.maxint, flags=0): + return match(pattern, string, start, end, flags, fullmatch=True) + +def search(pattern, string, start=0, end=sys.maxint, flags=0): + start, end = _adjust(start, end, len(string)) + start = Position(start) + end = Position(end) + ctx = MatchContextForTests(pattern, string, start, end, flags) + if search_context(ctx): + return ctx + else: + return None diff --git a/rpython/rlib/rsre/test/test_ext_opcode.py b/rpython/rlib/rsre/test/test_ext_opcode.py --- a/rpython/rlib/rsre/test/test_ext_opcode.py +++ b/rpython/rlib/rsre/test/test_ext_opcode.py @@ -5,6 +5,7 @@ from rpython.rlib.rsre import rsre_core from rpython.rlib.rsre.rsre_char import MAXREPEAT +from rpython.rlib.rsre.test.support import match, Position # import OPCODE_XX as XX for name, value in rsre_core.__dict__.items(): @@ -17,10 +18,10 @@ # it's a valid optimization because \1 is always one character long r = [MARK, 0, ANY, MARK, 1, REPEAT_ONE, 6, 0, MAXREPEAT, GROUPREF, 0, SUCCESS, SUCCESS] - assert rsre_core.match(r, "aaa").match_end == 3 + assert match(r, "aaa").match_end == Position(3) def test_min_repeat_one_with_backref(): # Python 3.5 compiles "(.)\1*?b" using MIN_REPEAT_ONE r = [MARK, 0, ANY, MARK, 1, MIN_REPEAT_ONE, 6, 0, MAXREPEAT, GROUPREF, 0, SUCCESS, LITERAL, 98, SUCCESS] - assert rsre_core.match(r, "aaab").match_end == 4 + assert match(r, "aaab").match_end == Position(4) diff --git a/rpython/rlib/rsre/test/test_match.py b/rpython/rlib/rsre/test/test_match.py --- a/rpython/rlib/rsre/test/test_match.py +++ b/rpython/rlib/rsre/test/test_match.py @@ -1,6 +1,7 @@ import re, random, py -from rpython.rlib.rsre import rsre_core, rsre_char +from rpython.rlib.rsre import rsre_char from rpython.rlib.rsre.rpy import get_code, VERSION +from rpython.rlib.rsre.test.support import match, fullmatch, Position as P def get_code_and_re(regexp): @@ -16,234 +17,234 @@ def test_or(self): r = get_code(r"a|bc|def") - assert rsre_core.match(r, "a") - assert rsre_core.match(r, "bc") - assert rsre_core.match(r, "def") - assert not rsre_core.match(r, "ghij") + assert match(r, "a") + assert match(r, "bc") + assert match(r, "def") + assert not match(r, "ghij") def test_any(self): r = get_code(r"ab.cd") - assert rsre_core.match(r, "abXcdef") - assert not rsre_core.match(r, "ab\ncdef") - assert not rsre_core.match(r, "abXcDef") + assert match(r, "abXcdef") + assert not match(r, "ab\ncdef") + assert not match(r, "abXcDef") def test_any_repetition(self): r = get_code(r"ab.*cd") - assert rsre_core.match(r, "abXXXXcdef") - assert rsre_core.match(r, "abcdef") - assert not rsre_core.match(r, "abX\nXcdef") - assert not rsre_core.match(r, "abXXXXcDef") + assert match(r, "abXXXXcdef") + assert match(r, "abcdef") + assert not match(r, "abX\nXcdef") + assert not match(r, "abXXXXcDef") def test_any_all(self): r = get_code(r"(?s)ab.cd") - assert rsre_core.match(r, "abXcdef") - assert rsre_core.match(r, "ab\ncdef") - assert not rsre_core.match(r, "ab\ncDef") + assert match(r, "abXcdef") + assert match(r, "ab\ncdef") + assert not match(r, "ab\ncDef") def test_any_all_repetition(self): r = get_code(r"(?s)ab.*cd") - assert rsre_core.match(r, "abXXXXcdef") - assert rsre_core.match(r, "abcdef") - assert rsre_core.match(r, "abX\nXcdef") - assert not rsre_core.match(r, "abX\nXcDef") + assert match(r, "abXXXXcdef") + assert match(r, "abcdef") + assert match(r, "abX\nXcdef") + assert not match(r, "abX\nXcDef") def test_assert(self): r = get_code(r"abc(?=def)(.)") - res = rsre_core.match(r, "abcdefghi") - assert res is not None and res.get_mark(1) == 4 - assert not rsre_core.match(r, "abcdeFghi") + res = match(r, "abcdefghi") + assert res is not None and res.get_mark(1) == P(4) + assert not match(r, "abcdeFghi") def test_assert_not(self): r = get_code(r"abc(?!def)(.)") - res = rsre_core.match(r, "abcdeFghi") - assert res is not None and res.get_mark(1) == 4 - assert not rsre_core.match(r, "abcdefghi") + res = match(r, "abcdeFghi") + assert res is not None and res.get_mark(1) == P(4) + assert not match(r, "abcdefghi") def test_lookbehind(self): r = get_code(r"([a-z]*)(?<=de)") - assert rsre_core.match(r, "ade") - res = rsre_core.match(r, "adefg") - assert res is not None and res.get_mark(1) == 3 - assert not rsre_core.match(r, "abc") - assert not rsre_core.match(r, "X") - assert not rsre_core.match(r, "eX") + assert match(r, "ade") + res = match(r, "adefg") + assert res is not None and res.get_mark(1) == P(3) + assert not match(r, "abc") + assert not match(r, "X") + assert not match(r, "eX") def test_negative_lookbehind(self): def found(s): - res = rsre_core.match(r, s) + res = match(r, s) assert res is not None return res.get_mark(1) r = get_code(r"([a-z]*)(? OPCODE_RANGE_IGNORE - assert rsre_core.match(r, u"\U00010428") + assert match(r, u"\U00010428") diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py --- a/rpython/rlib/rsre/test/test_search.py +++ b/rpython/rlib/rsre/test/test_search.py @@ -1,44 +1,48 @@ From pypy.commits at gmail.com Mon Dec 11 01:06:53 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 10 Dec 2017 22:06:53 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-re: close merged branch Message-ID: <5a2e207d.449f1c0a.b8b0c.6cee@mx.google.com> Author: fijal Branch: unicode-utf8-re Changeset: r93356:bb8932ceb392 Date: 2017-12-11 08:05 +0200 http://bitbucket.org/pypy/pypy/changeset/bb8932ceb392/ Log: close merged branch From pypy.commits at gmail.com Mon Dec 11 01:06:55 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 10 Dec 2017 22:06:55 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge unicode-utf8-re Message-ID: <5a2e207f.02431c0a.11cb5.0917@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93357:fab58fe43e4c Date: 2017-12-11 08:06 +0200 http://bitbucket.org/pypy/pypy/changeset/fab58fe43e4c/ Log: merge unicode-utf8-re From pypy.commits at gmail.com Mon Dec 11 01:38:36 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 10 Dec 2017 22:38:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix micronumpy Message-ID: <5a2e27ec.4ce61c0a.5017a.5315@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93358:fadafada40af Date: 2017-12-11 08:37 +0200 http://bitbucket.org/pypy/pypy/changeset/fadafada40af/ Log: fix micronumpy diff --git a/pypy/module/micronumpy/boxes.py b/pypy/module/micronumpy/boxes.py --- a/pypy/module/micronumpy/boxes.py +++ b/pypy/module/micronumpy/boxes.py @@ -11,6 +11,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib.objectmodel import specialize from rpython.rlib import jit +from rpython.rlib.rutf8 import get_utf8_length from rpython.rtyper.lltypesystem import lltype, rffi from rpython.tool.sourcetools import func_with_new_name from pypy.module.micronumpy import constants as NPY @@ -636,7 +637,8 @@ if dtype.is_unicode(): return self elif dtype.is_object(): - return W_ObjectBox(space.newunicode(self._value)) + return W_ObjectBox(space.newutf8(self._value, + get_utf8_length(self._value))) else: raise oefmt(space.w_NotImplementedError, "Conversion from unicode not implemented yet") @@ -646,7 +648,7 @@ return new_unicode_dtype(space, len(self._value)) def descr__new__unicode_box(space, w_subtype, w_arg): - value = space.unicode_w(space.unicode_from_object(w_arg)) + value = space.utf8_w(space.unicode_from_object(w_arg)) return W_UnicodeBox(value) class W_ObjectBox(W_GenericBox): diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py --- a/pypy/module/micronumpy/compile.py +++ b/pypy/module/micronumpy/compile.py @@ -197,7 +197,7 @@ return StringObject(obj) newbytes = newtext - def newunicode(self, obj): + def newutf8(self, obj, l): raise NotImplementedError def newlist(self, items): @@ -305,10 +305,10 @@ raise NotImplementedError text_w = bytes_w - def unicode_w(self, w_obj): + def utf8_w(self, w_obj): # XXX if isinstance(w_obj, StringObject): - return unicode(w_obj.v) + return w_obj.v raise NotImplementedError def int(self, w_obj): diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py --- a/pypy/module/micronumpy/types.py +++ b/pypy/module/micronumpy/types.py @@ -1,6 +1,7 @@ import functools import math from rpython.rlib.unroll import unrolling_iterable +from rpython.rlib.rutf8 import Utf8StringIterator, get_utf8_length, Utf8StringBuilder from pypy.interpreter.error import OperationError, oefmt from pypy.objspace.std.floatobject import float2string from pypy.objspace.std.complexobject import str_format @@ -2271,23 +2272,29 @@ if isinstance(w_item, boxes.W_UnicodeBox): return w_item if isinstance(w_item, boxes.W_ObjectBox): - value = space.unicode_w(space.unicode_from_object(w_item.w_obj)) + value = space.utf8_w(space.unicode_from_object(w_item.w_obj)) else: - value = space.unicode_w(space.unicode_from_object(w_item)) + value = space.utf8_w(space.unicode_from_object(w_item)) return boxes.W_UnicodeBox(value) + def convert_utf8_to_unichar_list(self, utf8): + l = [] + for ch in Utf8StringIterator(utf8): + l.append(unichr(ch)) + return l + def store(self, arr, i, offset, box, native): assert isinstance(box, boxes.W_UnicodeBox) - value = box._value with arr as storage: self._store(storage, i, offset, box, arr.dtype.elsize) @jit.unroll_safe def _store(self, storage, i, offset, box, width): - size = min(width // 4, len(box._value)) + v = self.convert_utf8_to_unichar_list(box._value) + size = min(width // 4, len(v)) for k in range(size): index = i + offset + 4*k - data = rffi.cast(Int32.T, ord(box._value[k])) + data = rffi.cast(Int32.T, ord(v[k])) raw_storage_setitem_unaligned(storage, index, data) # zero out the remaining memory for index in range(size * 4 + i + offset, width): @@ -2298,16 +2305,16 @@ if dtype is None: dtype = arr.dtype size = dtype.elsize // 4 - builder = UnicodeBuilder(size) + builder = Utf8StringBuilder(size) with arr as storage: for k in range(size): index = i + offset + 4*k - codepoint = raw_storage_getitem_unaligned( - Int32.T, arr.storage, index) - char = unichr(codepoint) - if char == u'\0': + codepoint = rffi.cast(lltype.Signed, + raw_storage_getitem_unaligned( + Int32.T, arr.storage, index)) + if codepoint == 0: break - builder.append(char) + builder.append_code(codepoint) return boxes.W_UnicodeBox(builder.build()) def str_format(self, item, add_quotes=True): @@ -2323,7 +2330,7 @@ def to_builtin_type(self, space, box): assert isinstance(box, boxes.W_UnicodeBox) - return space.newunicode(box._value) + return space.newutf8(box._value, get_utf8_length(box._value)) def eq(self, v1, v2): assert isinstance(v1, boxes.W_UnicodeBox) From pypy.commits at gmail.com Mon Dec 11 01:47:37 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 10 Dec 2017 22:47:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix whatsnew Message-ID: <5a2e2a09.8dd71c0a.99767.14d9@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93360:f7db32ee32c8 Date: 2017-12-11 08:46 +0200 http://bitbucket.org/pypy/pypy/changeset/f7db32ee32c8/ Log: fix whatsnew diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -29,3 +29,7 @@ .. branch: win32-vcvars +.. branch: unicode-utf8-re +.. branch: utf8-io +Utf8 handling for unicode + From pypy.commits at gmail.com Mon Dec 11 01:47:35 2017 From: pypy.commits at gmail.com (fijal) Date: Sun, 10 Dec 2017 22:47:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix one place and remove done TODO items Message-ID: <5a2e2a07.131f1c0a.9a59b.f0d3@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93359:8243e781313a Date: 2017-12-11 08:46 +0200 http://bitbucket.org/pypy/pypy/changeset/8243e781313a/ Log: fix one place and remove done TODO items diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -1,15 +1,6 @@ -* rutf8.prev_codepoint_pos should use r_uint * find a better way to run "find" without creating the index storage, if one is not already readily available -* fix _pypyjson -* fix cpyext * write the correct jit_elidable in _get_index_storage -* better flag handling in split/splitlines maybe? -* encode_error_handler has XXX -* remove assertions from W_UnicodeObject.__init__ if all the builders pass -* what to do with error handlers that go backwards. There were tests - in test_codecs that would check for that * improve performance of splitlines - * fix _pypyjson to not use a wrapped dict when decoding an object -* make sure we review all the places that call ord(unichr) to check for ValueErrors \ No newline at end of file +* make sure we review all the places that call ord(unichr) to check for ValueErrors diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -27,7 +27,7 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): - u_len = rutf8.check_utf8(utf8, True) + u_len = rutf8.get_utf8_length(utf8, True) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), space.newutf8(utf8, u_len), From pypy.commits at gmail.com Mon Dec 11 07:35:28 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 04:35:28 -0800 (PST) Subject: [pypy-commit] pypy default: test and document the cmp(nan, nan) == 0 behaviour Message-ID: <5a2e7b90.02c7df0a.45a18.aa9f@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93361:96cf224f204a Date: 2017-12-11 13:34 +0100 http://bitbucket.org/pypy/pypy/changeset/96cf224f204a/ Log: test and document the cmp(nan, nan) == 0 behaviour diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -355,7 +355,11 @@ containers (as list items or in sets for example), the exact rule of equality used is "``if x is y or x == y``" (on both CPython and PyPy); as a consequence, because all ``nans`` are identical in PyPy, you -cannot have several of them in a set, unlike in CPython. (Issue `#1974`__) +cannot have several of them in a set, unlike in CPython. (Issue `#1974`__). +Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because +``cmp`` checks with ``is`` first whether the arguments are identical (there is +no good value to return from this call to ``cmp``, because ``cmp`` pretends +that there is a total order on floats, but that is wrong for NaNs). .. __: https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -404,6 +404,7 @@ def test_cmp(self): + assert cmp(float('nan'), float('nan')) == 0 assert cmp(9,9) == 0 assert cmp(0,9) < 0 assert cmp(9,0) > 0 From pypy.commits at gmail.com Mon Dec 11 08:02:58 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 11 Dec 2017 05:02:58 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Use the same logic for all encoders Message-ID: <5a2e8202.46101c0a.eb0c6.848a@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93362:09186de461ba Date: 2017-12-11 13:02 +0000 http://bitbucket.org/pypy/pypy/changeset/09186de461ba/ Log: Use the same logic for all encoders diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -159,74 +159,67 @@ return _utf8_encode_latin_1_slowpath(s, errors, errorhandler) def _utf8_encode_latin_1_slowpath(s, errors, errorhandler): - res = StringBuilder(len(s)) - cur = 0 - iter = rutf8.Utf8StringIterator(s) - while True: - try: - ch = iter.next() - except StopIteration: - break + size = len(s) + result = StringBuilder(size) + index = 0 + pos = 0 + while pos < size: + ch = rutf8.codepoint_at_pos(s, pos) if ch <= 0xFF: - res.append(chr(ch)) - cur += 1 + result.append(chr(ch)) + index += 1 + pos = rutf8.next_codepoint_pos(s, pos) else: - r, pos = errorhandler(errors, 'latin1', - 'ordinal not in range(256)', s, cur, - cur + 1) + startindex = index + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + while pos < size and rutf8.codepoint_at_pos(s, pos) > 0xFF: + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + msg = "ordinal not in range(256)" + res_8, newindex = errorhandler( + errors, 'latin1', msg, s, startindex, index) + for cp in rutf8.Utf8StringIterator(res_8): + if cp > 0xFF: + errorhandler("strict", 'latin1', msg, s, startindex, index) + result.append(chr(cp)) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) + return result.build() - for c in rutf8.Utf8StringIterator(r): - if c > 0xFF: - errorhandler("strict", 'latin1', - 'ordinal not in range(256)', s, - cur, cur + 1) - res.append(chr(c)) - - for j in range(pos - cur - 1): - iter.next() - - cur = pos - r = res.build() - return r - -def utf8_encode_ascii(utf8, errors, errorhandler): +def utf8_encode_ascii(s, errors, errorhandler): """ Don't be confused - this is a slowpath for errors e.g. "ignore" or an obscure errorhandler """ - res = StringBuilder() - i = 0 + size = len(s) + result = StringBuilder(size) + index = 0 pos = 0 - while i < len(utf8): - ch = rutf8.codepoint_at_pos(utf8, i) - if ch > 0x7F: - endpos = pos + 1 - end_i = rutf8.next_codepoint_pos(utf8, i) - while end_i < len(utf8) and rutf8.codepoint_at_pos(utf8, end_i) > 0x7F: - endpos += 1 - end_i = rutf8.next_codepoint_pos(utf8, end_i) + while pos < size: + ch = rutf8.codepoint_at_pos(s, pos) + if ch <= 0x7F: + result.append(chr(ch)) + index += 1 + pos = rutf8.next_codepoint_pos(s, pos) + else: + startindex = index + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + while pos < size and rutf8.codepoint_at_pos(s, pos) > 0x7F: + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 msg = "ordinal not in range(128)" - r, newpos = errorhandler(errors, 'ascii', msg, utf8, - pos, endpos) - for j in range(newpos - pos): - i = rutf8.next_codepoint_pos(utf8, i) - - j = 0 - while j < len(r): - c = rutf8.codepoint_at_pos(r, j) - if c > 0x7F: - errorhandler("strict", 'ascii', - 'ordinal not in range(128)', utf8, - pos, pos + 1) - j = rutf8.next_codepoint_pos(r, j) - pos = newpos - res.append(r) - else: - res.append(chr(ch)) - i = rutf8.next_codepoint_pos(utf8, i) - pos += 1 - - s = res.build() - return s + res_8, newindex = errorhandler( + errors, 'ascii', msg, s, startindex, index) + for cp in rutf8.Utf8StringIterator(res_8): + if cp > 0x7F: + errorhandler("strict", 'ascii', msg, s, startindex, index) + result.append(chr(cp)) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) + return result.build() def str_decode_utf8(s, errors, final, errorhandler): """ Same as checking for the valid utf8, but we know the utf8 is not diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -760,3 +760,25 @@ assert r == 'ሴ\x80⍅y\xab' r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace') assert r == 'ሴ€⍅y«' + + def test_errorhandler_collection(self): + import _codecs + errors = [] + def record_error(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + errors.append(exc.object[exc.start:exc.end]) + return (u'', exc.end) + _codecs.register_error("test.record", record_error) + + sin = u"\xac\u1234\u1234\u20ac\u8000" + assert sin.encode("ascii", "test.record") == "" + assert errors == [sin] + + errors = [] + assert sin.encode("latin-1", "test.record") == "\xac" + assert errors == [u'\u1234\u1234\u20ac\u8000'] + + errors = [] + assert sin.encode("iso-8859-15", "test.record") == "\xac\xa4" + assert errors == [u'\u1234\u1234', u'\u8000'] From pypy.commits at gmail.com Mon Dec 11 08:03:59 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 05:03:59 -0800 (PST) Subject: [pypy-commit] pypy default: explain how to do mark_dict_non_null on r_dicts Message-ID: <5a2e823f.51bbdf0a.a2876.05cf@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93363:d6474db3c19f Date: 2017-12-11 14:03 +0100 http://bitbucket.org/pypy/pypy/changeset/d6474db3c19f/ Log: explain how to do mark_dict_non_null on r_dicts diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -288,6 +288,9 @@ def mark_dict_non_null(d): """ Mark dictionary as having non-null keys and values. A warning would be emitted (not an error!) in case annotation disagrees. + + This doesn't work for r_dicts. For them, pass + r_dict(..., force_non_null=True) to the constructor. """ assert isinstance(d, dict) return d From pypy.commits at gmail.com Mon Dec 11 11:40:47 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 08:40:47 -0800 (PST) Subject: [pypy-commit] pypy rdict-fast-hash: a branch to support fast hash functions with r_dict Message-ID: <5a2eb50f.17f71c0a.abae.705d@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: rdict-fast-hash Changeset: r93364:bd78548cee25 Date: 2017-12-11 15:50 +0100 http://bitbucket.org/pypy/pypy/changeset/bd78548cee25/ Log: a branch to support fast hash functions with r_dict get as fast as the first test failing diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,12 +237,17 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_fast_hash=None): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_fast_hash is None: + fast_hash = False + else: + assert s_fast_hash.is_constant() + fast_hash = s_fast_hash.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, force_non_null=force_non_null) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -748,11 +748,12 @@ def _newdict(self): return {} - def __init__(self, key_eq, key_hash, force_non_null=False): + def __init__(self, key_eq, key_hash, force_non_null=False, fast_hash=False): self._dict = self._newdict() self.key_eq = key_eq self.key_hash = key_hash self.force_non_null = force_non_null + self.fast_hash = fast_hash def __getitem__(self, key): return self._dict[_r_dictkey(self, key)] diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -330,6 +330,13 @@ res = self.interpret(g, [3]) assert res == 77 + def test_r_dict_fast_functions(self): + def fn(): + d1 = r_dict(strange_key_eq, strange_key_hash, fast_hash=True) + return play_with_r_dict(d1) + res = self.interpret(fn, []) + assert res + def test_prepare_dict_update(self): def g(n): d = {} diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py --- a/rpython/rtyper/rbuiltin.py +++ b/rpython/rtyper/rbuiltin.py @@ -717,9 +717,9 @@ @typer_for(OrderedDict) @typer_for(objectmodel.r_dict) @typer_for(objectmodel.r_ordereddict) -def rtype_dict_constructor(hop, i_force_non_null=None): - # 'i_force_non_null' is ignored here; if it has any effect, it - # has already been applied to 'hop.r_result' +def rtype_dict_constructor(hop, i_force_non_null=None, i_fast_hash=None): + # 'i_force_non_null' and 'i_fast_hash' are ignored here; if they have any + # effect, it has already been applied to 'hop.r_result' hop.exception_cannot_occur() r_dict = hop.r_result cDICT = hop.inputconst(lltype.Void, r_dict.DICT) diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -538,6 +538,25 @@ r_dict = rtyper.getrepr(s) assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_r_dict_can_be_fast(self): + def myeq(n, m): + return n == m + def myhash(n): + return ~n + def f(): + d = self.new_r_dict(myeq, myhash, fast_hash=True) + d[5] = 7 + d[12] = 19 + return d + + t = TranslationContext() + s = t.buildannotator().build_types(f, []) + rtyper = t.buildrtyper() + rtyper.specialize() + + r_dict = rtyper.getrepr(s) + assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_tuple_dict(self): def f(i): d = self.newdict() @@ -1000,8 +1019,8 @@ return {} @staticmethod - def new_r_dict(myeq, myhash): - return r_dict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, fast_hash=False): + return r_dict(myeq, myhash, force_non_null=force_non_null, fast_hash=fast_hash) def test_two_dicts_with_different_value_types(self): def func(i): From pypy.commits at gmail.com Mon Dec 11 11:40:49 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 08:40:49 -0800 (PST) Subject: [pypy-commit] pypy rdict-fast-hash: small refactoring: rename ENTRIES.hash to .entry_hash (hash is super hard to Message-ID: <5a2eb511.0587df0a.3fff8.83ef@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: rdict-fast-hash Changeset: r93365:cba7d424ef73 Date: 2017-12-11 17:23 +0100 http://bitbucket.org/pypy/pypy/changeset/cba7d424ef73/ Log: small refactoring: rename ENTRIES.hash to .entry_hash (hash is super hard to grep for), also make it take the dict in preparation for r_dict supporting fast hash functions diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -116,9 +116,9 @@ if ll_fasthash_function is None: entryfields.append(("f_hash", lltype.Signed)) - entrymeths['hash'] = ll_hash_from_cache + entrymeths['entry_hash'] = ll_hash_from_cache else: - entrymeths['hash'] = ll_hash_recomputed + entrymeths['entry_hash'] = ll_hash_recomputed entrymeths['fasthashfn'] = ll_fasthash_function # Build the lltype data structures @@ -600,12 +600,12 @@ dummy = ENTRIES.dummy_obj.ll_dummy_value entries[i].value = dummy - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_from_cache(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_from_cache(entries, d, i): return entries[i].f_hash - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_recomputed(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_recomputed(entries, d, i): ENTRIES = lltype.typeOf(entries).TO return ENTRIES.fasthashfn(entries[i].key) @@ -962,22 +962,22 @@ if fun == FUNC_BYTE: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE) i += 1 elif fun == FUNC_SHORT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT) i += 1 elif IS_64BIT and fun == FUNC_INT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT) i += 1 elif fun == FUNC_LONG: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG) i += 1 else: assert False @@ -1015,7 +1015,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1056,7 +1056,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1305,14 +1305,14 @@ def ll_dict_update(dic1, dic2): if dic1 == dic2: return - ll_ensure_indexes(dic2) # needed for entries.hash() below + ll_ensure_indexes(dic2) # needed for entries.entry_hash() below ll_prepare_dict_update(dic1, dic2.num_live_items) i = 0 while i < dic2.num_ever_used_items: entries = dic2.entries if entries.valid(i): entry = entries[i] - hash = entries.hash(i) + hash = entries.entry_hash(dic2, i) key = entry.key value = entry.value index = dic1.lookup_function(dic1, key, hash, FLAG_STORE) @@ -1413,7 +1413,7 @@ r = lltype.malloc(ELEM.TO) r.item0 = recast(ELEM.TO.item0, entry.key) r.item1 = recast(ELEM.TO.item1, entry.value) - _ll_dict_del(dic, dic.entries.hash(i), i) + _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i) return r def ll_dict_pop(dic, key): From pypy.commits at gmail.com Mon Dec 11 11:40:53 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 08:40:53 -0800 (PST) Subject: [pypy-commit] pypy rdict-fast-hash: thread the fast_hash info through the various layers into the rordereddict Message-ID: <5a2eb515.49451c0a.d57c0.cfb5@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: rdict-fast-hash Changeset: r93367:a903e9b5358b Date: 2017-12-11 17:38 +0100 http://bitbucket.org/pypy/pypy/changeset/a903e9b5358b/ Log: thread the fast_hash info through the various layers into the rordereddict implementation diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, fast_hash=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + fast_hash=fast_hash) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -238,6 +238,14 @@ @analyzer_for(rpython.rlib.objectmodel.r_dict) def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_fast_hash=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_fast_hash) + + at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_fast_hash=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_fast_hash) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_fast_hash): if s_force_non_null is None: force_non_null = False else: @@ -249,15 +257,10 @@ assert s_fast_hash.is_constant() fast_hash = s_fast_hash.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + fast_hash=fast_hash) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - - at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + fast_hash = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.fast_hash = fast_hash def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py --- a/rpython/rtyper/lltypesystem/rdict.py +++ b/rpython/rtyper/lltypesystem/rdict.py @@ -42,7 +42,8 @@ class DictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, fast_hash=False): + # fast_hash is ignored (only implemented in rordereddict.py) self.rtyper = rtyper self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -66,7 +66,7 @@ def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, - ll_eq_function=None, method_cache={}, + ll_eq_function=None, method_cache={}, fast_hash=False, dummykeyobj=None, dummyvalueobj=None, rtyper=None): # get the actual DICT type. if DICT is None, it's created, otherwise # forward reference is becoming DICT @@ -114,7 +114,10 @@ # * the value entryfields.append(("value", DICTVALUE)) - if ll_fasthash_function is None: + if fast_hash: + assert get_custom_eq_hash is not None + entrymeths['entry_hash'] = ll_hash_custom_fast + elif ll_fasthash_function is None: entryfields.append(("f_hash", lltype.Signed)) entrymeths['entry_hash'] = ll_hash_from_cache else: @@ -140,7 +143,7 @@ 'keyeq': ll_keyeq_custom, 'r_rdict_eqfn': r_rdict_eqfn, 'r_rdict_hashfn': r_rdict_hashfn, - 'paranoia': True, + 'paranoia': not fast_hash, } else: # figure out which functions must be used to hash and compare @@ -167,13 +170,14 @@ class OrderedDictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, fast_hash=False): #assert not force_non_null self.rtyper = rtyper self.finalized = False self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) self.custom_eq_hash = custom_eq_hash is not None + self.fast_hash = fast_hash if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup() assert callable(key_repr) self._key_repr_computer = key_repr @@ -211,6 +215,7 @@ self.r_rdict_eqfn, self.r_rdict_hashfn = ( self._custom_eq_hash_repr()) kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr + kwd['fast_hash'] = self.fast_hash else: kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function() kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function() @@ -609,6 +614,12 @@ ENTRIES = lltype.typeOf(entries).TO return ENTRIES.fasthashfn(entries[i].key) + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_custom_fast(entries, d, i): + DICT = lltype.typeOf(d).TO + key = entries[i].key + return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) + def ll_keyhash_custom(d, key): DICT = lltype.typeOf(d).TO return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -15,6 +15,7 @@ s_key = dictkey.s_value s_value = dictvalue.s_value force_non_null = self.dictdef.force_non_null + fast_hash = self.dictdef.fast_hash if dictkey.custom_eq_hash: custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn), rtyper.getrepr(dictkey.s_rdict_hashfn)) @@ -22,7 +23,7 @@ custom_eq_hash = None return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key), lambda: rtyper.getrepr(s_value), dictkey, dictvalue, - custom_eq_hash, force_non_null) + custom_eq_hash, force_non_null, fast_hash) def rtyper_makekey(self): self.dictdef.dictkey .dont_change_any_more = True diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -386,8 +386,8 @@ return OrderedDict() @staticmethod - def new_r_dict(myeq, myhash): - return objectmodel.r_ordereddict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, fast_hash=False): + return objectmodel.r_ordereddict(myeq, myhash, force_non_null=force_non_null, fast_hash=fast_hash) def test_two_dicts_with_different_value_types(self): def func(i): From pypy.commits at gmail.com Mon Dec 11 11:40:51 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 08:40:51 -0800 (PST) Subject: [pypy-commit] pypy rdict-fast-hash: explain what the arguments mean Message-ID: <5a2eb513.83c7df0a.403b8.0fac@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: rdict-fast-hash Changeset: r93366:6a993ce073f5 Date: 2017-12-11 17:37 +0100 http://bitbucket.org/pypy/pypy/changeset/6a993ce073f5/ Log: explain what the arguments mean diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -749,6 +749,13 @@ return {} def __init__(self, key_eq, key_hash, force_non_null=False, fast_hash=False): + """ force_non_null=True means that the key can never be None (even if + the annotator things it could be) + + fast_hash=True means that the hash function is very fast, meaning it's + efficient enough that the dict does not have to store the hash per key. + It also implies that neither the hash nor the eq function will mutate + the dictionary. """ self._dict = self._newdict() self.key_eq = key_eq self.key_hash = key_hash From pypy.commits at gmail.com Mon Dec 11 11:46:50 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 08:46:50 -0800 (PST) Subject: [pypy-commit] pypy rdict-fast-hash: use fast_hash=True for two r_dicts Message-ID: <5a2eb67a.02431c0a.11cb5.a5cd@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: rdict-fast-hash Changeset: r93368:edf8ee0934a2 Date: 2017-12-11 17:46 +0100 http://bitbucket.org/pypy/pypy/changeset/edf8ee0934a2/ Log: use fast_hash=True for two r_dicts diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -71,7 +71,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, fast_hash=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py --- a/rpython/jit/metainterp/typesystem.py +++ b/rpython/jit/metainterp/typesystem.py @@ -106,11 +106,11 @@ # It is an r_dict on lltype. Two copies, to avoid conflicts with # the value type. Note that NULL is not allowed as a key. def new_ref_dict(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, fast_hash=True) def new_ref_dict_2(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, fast_hash=True) def new_ref_dict_3(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, fast_hash=True) def cast_vtable_to_hashable(self, cpu, ptr): adr = llmemory.cast_ptr_to_adr(ptr) From pypy.commits at gmail.com Mon Dec 11 12:23:30 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 09:23:30 -0800 (PST) Subject: [pypy-commit] pypy rdict-fast-hash: fix iter*_with_hash Message-ID: <5a2ebf12.3799df0a.b08f4.6767@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: rdict-fast-hash Changeset: r93369:abf782e86675 Date: 2017-12-11 18:22 +0100 http://bitbucket.org/pypy/pypy/changeset/abf782e86675/ Log: fix iter*_with_hash diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -90,7 +90,7 @@ resulttype=ENTRIES) # call the correct variant_*() method method = getattr(self, 'variant_' + self.variant) - return method(hop, ENTRIES, v_entries, v_index) + return method(hop, ENTRIES, v_entries, v_dict, v_index) def get_tuple_result(self, hop, items_v): # this allocates the tuple for the result, directly in the function @@ -110,7 +110,7 @@ hop.genop('setfield', [v_result, c_item, v_item]) return v_result - def variant_keys(self, hop, ENTRIES, v_entries, v_index): + def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index): KEY = ENTRIES.TO.OF.key c_key = hop.inputconst(lltype.Void, 'key') v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key], @@ -119,30 +119,30 @@ variant_reversed = variant_keys - def variant_values(self, hop, ENTRIES, v_entries, v_index): + def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index): VALUE = ENTRIES.TO.OF.value c_value = hop.inputconst(lltype.Void, 'value') v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value], resulttype=VALUE) return self.r_dict.recast_value(hop.llops, v_value) - def variant_items(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) + def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value)) - def variant_hashes(self, hop, ENTRIES, v_entries, v_index): + def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index): # there is not really a variant 'hashes', but this method is # convenient for the following variants - return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index) + return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict, v_index) - def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_hash)) - def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value, v_hash)) From pypy.commits at gmail.com Mon Dec 11 13:18:42 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 11 Dec 2017 10:18:42 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Python 3.6 is more strict with deprecated files like 'U+' Message-ID: <5a2ecc02.12711c0a.4eff8.bbbf@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93370:e0f1b581e68d Date: 2017-12-11 12:01 +0100 http://bitbucket.org/pypy/pypy/changeset/e0f1b581e68d/ Log: Python 3.6 is more strict with deprecated files like 'U+' diff --git a/pypy/module/_io/interp_io.py b/pypy/module/_io/interp_io.py --- a/pypy/module/_io/interp_io.py +++ b/pypy/module/_io/interp_io.py @@ -68,9 +68,9 @@ rawmode += "+" if universal: - if writing or appending: + if writing or appending or creating or updating: raise oefmt(space.w_ValueError, - "can't use U and writing mode at once") + "mode U cannot be combined with 'x', 'w', 'a', or '+'") space.warn(space.newtext("'U' mode is deprecated ('r' has the same " "effect in Python 3.x)"), space.w_DeprecationWarning) diff --git a/pypy/module/_io/test/test_io.py b/pypy/module/_io/test/test_io.py --- a/pypy/module/_io/test/test_io.py +++ b/pypy/module/_io/test/test_io.py @@ -210,6 +210,8 @@ raises(ValueError, io.open, self.tmpfile, "ww") raises(ValueError, io.open, self.tmpfile, "rwa") raises(ValueError, io.open, self.tmpfile, "b", newline="\n") + raises(ValueError, io.open, self.tmpfile, "U+") + raises(ValueError, io.open, self.tmpfile, "xU") def test_array_write(self): import _io, array From pypy.commits at gmail.com Mon Dec 11 13:18:44 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 11 Dec 2017 10:18:44 -0800 (PST) Subject: [pypy-commit] pypy py3.6: CPython Issue26482: can pickle recursive deque objects. Message-ID: <5a2ecc04.0abadf0a.b2929.96ea@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93371:ce0523b30644 Date: 2017-12-11 19:14 +0100 http://bitbucket.org/pypy/pypy/changeset/ce0523b30644/ Log: CPython Issue26482: can pickle recursive deque objects. diff --git a/lib_pypy/_collections.py b/lib_pypy/_collections.py --- a/lib_pypy/_collections.py +++ b/lib_pypy/_collections.py @@ -323,7 +323,7 @@ self.rotate(-index) def __reduce_ex__(self, proto): - return type(self), (list(self), self.maxlen) + return type(self), ((), self.maxlen), None, iter(self) __hash__ = None diff --git a/pypy/module/_collections/interp_deque.py b/pypy/module/_collections/interp_deque.py --- a/pypy/module/_collections/interp_deque.py +++ b/pypy/module/_collections/interp_deque.py @@ -491,23 +491,14 @@ "Return state information for pickling." space = self.space w_type = space.type(self) - w_dict = space.findattr(self, space.newtext('__dict__')) - w_list = space.call_function(space.w_list, self) - if w_dict is None: - if self.maxlen == sys.maxint: - result = [ - w_type, space.newtuple([w_list])] - else: - result = [ - w_type, space.newtuple([w_list, space.newint(self.maxlen)])] + w_dict = space.findattr(self, space.newtext('__dict__')) or space.w_None + w_it = space.iter(self) + if self.maxlen == sys.maxint: + w_lentuple = space.newtuple([]) else: - if self.maxlen == sys.maxint: - w_len = space.w_None - else: - w_len = space.newint(self.maxlen) - result = [ - w_type, space.newtuple([w_list, w_len]), w_dict] - return space.newtuple(result) + w_lentuple = space.newtuple([space.newtuple([]), + space.newint(self.maxlen)]) + return space.newtuple([w_type, w_lentuple, w_dict, w_it]) def get_maxlen(space, self): if self.maxlen == sys.maxint: diff --git a/pypy/module/_collections/test/test_deque.py b/pypy/module/_collections/test/test_deque.py --- a/pypy/module/_collections/test/test_deque.py +++ b/pypy/module/_collections/test/test_deque.py @@ -264,25 +264,25 @@ # d = deque('hello world') r = d.__reduce__() - assert r == (deque, (list('hello world'),)) + assert r[:3] == (deque, (), None) # d = deque('hello world', 42) r = d.__reduce__() - assert r == (deque, (list('hello world'), 42)) + assert r[:3] == (deque, ((), 42), None) # class D(deque): pass d = D('hello world') d.a = 5 r = d.__reduce__() - assert r == (D, (list('hello world'), None), {'a': 5}) + assert r[:3] == (D, (), {'a': 5}) # class D(deque): pass d = D('hello world', 42) d.a = 5 r = d.__reduce__() - assert r == (D, (list('hello world'), 42), {'a': 5}) + assert r[:3] == (D, ((), 42), {'a': 5}) def test_copy(self): from _collections import deque From pypy.commits at gmail.com Mon Dec 11 14:35:02 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 11:35:02 -0800 (PST) Subject: [pypy-commit] pypy rdict-fast-hash: rename the argument to simple_hash_eq, as suggested by antocuni and arigato Message-ID: <5a2edde6.8f121c0a.96737.c95d@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: rdict-fast-hash Changeset: r93372:de9aa6bbf0f7 Date: 2017-12-11 20:34 +0100 http://bitbucket.org/pypy/pypy/changeset/de9aa6bbf0f7/ Log: rename the argument to simple_hash_eq, as suggested by antocuni and arigato diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -71,7 +71,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash, fast_hash=True) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,14 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False, fast_hash=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, force_non_null=force_non_null, - fast_hash=fast_hash) + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,28 +237,28 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_fast_hash=None): - return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_fast_hash) +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) @analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_fast_hash=None): +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, - s_force_non_null, s_fast_hash) + s_force_non_null, s_simple_hash_eq) -def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_fast_hash): +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const - if s_fast_hash is None: - fast_hash = False + if s_simple_hash_eq is None: + simple_hash_eq = False else: - assert s_fast_hash.is_constant() - fast_hash = s_fast_hash.const + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, force_non_null=force_non_null, - fast_hash=fast_hash) + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) return cls(dictdef) diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -82,13 +82,13 @@ s_value = s_ImpossibleValue, is_r_dict = False, force_non_null = False, - fast_hash = False): + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null - self.fast_hash = fast_hash + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py --- a/rpython/jit/metainterp/typesystem.py +++ b/rpython/jit/metainterp/typesystem.py @@ -106,11 +106,11 @@ # It is an r_dict on lltype. Two copies, to avoid conflicts with # the value type. Note that NULL is not allowed as a key. def new_ref_dict(self): - return r_dict(rd_eq, rd_hash, fast_hash=True) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_2(self): - return r_dict(rd_eq, rd_hash, fast_hash=True) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_3(self): - return r_dict(rd_eq, rd_hash, fast_hash=True) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def cast_vtable_to_hashable(self, cpu, ptr): adr = llmemory.cast_ptr_to_adr(ptr) diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -748,11 +748,11 @@ def _newdict(self): return {} - def __init__(self, key_eq, key_hash, force_non_null=False, fast_hash=False): + def __init__(self, key_eq, key_hash, force_non_null=False, simple_hash_eq=False): """ force_non_null=True means that the key can never be None (even if the annotator things it could be) - fast_hash=True means that the hash function is very fast, meaning it's + simple_hash_eq=True means that the hash function is very fast, meaning it's efficient enough that the dict does not have to store the hash per key. It also implies that neither the hash nor the eq function will mutate the dictionary. """ @@ -760,7 +760,7 @@ self.key_eq = key_eq self.key_hash = key_hash self.force_non_null = force_non_null - self.fast_hash = fast_hash + self.simple_hash_eq = simple_hash_eq def __getitem__(self, key): return self._dict[_r_dictkey(self, key)] diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -332,7 +332,7 @@ def test_r_dict_fast_functions(self): def fn(): - d1 = r_dict(strange_key_eq, strange_key_hash, fast_hash=True) + d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True) return play_with_r_dict(d1) res = self.interpret(fn, []) assert res diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -66,7 +66,7 @@ def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, - ll_eq_function=None, method_cache={}, fast_hash=False, + ll_eq_function=None, method_cache={}, simple_hash_eq=False, dummykeyobj=None, dummyvalueobj=None, rtyper=None): # get the actual DICT type. if DICT is None, it's created, otherwise # forward reference is becoming DICT @@ -114,7 +114,7 @@ # * the value entryfields.append(("value", DICTVALUE)) - if fast_hash: + if simple_hash_eq: assert get_custom_eq_hash is not None entrymeths['entry_hash'] = ll_hash_custom_fast elif ll_fasthash_function is None: @@ -143,7 +143,7 @@ 'keyeq': ll_keyeq_custom, 'r_rdict_eqfn': r_rdict_eqfn, 'r_rdict_hashfn': r_rdict_hashfn, - 'paranoia': not fast_hash, + 'paranoia': not simple_hash_eq, } else: # figure out which functions must be used to hash and compare @@ -170,14 +170,14 @@ class OrderedDictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False, fast_hash=False): + custom_eq_hash=None, force_non_null=False, simple_hash_eq=False): #assert not force_non_null self.rtyper = rtyper self.finalized = False self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) self.custom_eq_hash = custom_eq_hash is not None - self.fast_hash = fast_hash + self.simple_hash_eq = simple_hash_eq if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup() assert callable(key_repr) self._key_repr_computer = key_repr @@ -215,7 +215,7 @@ self.r_rdict_eqfn, self.r_rdict_hashfn = ( self._custom_eq_hash_repr()) kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr - kwd['fast_hash'] = self.fast_hash + kwd['simple_hash_eq'] = self.simple_hash_eq else: kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function() kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function() diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py --- a/rpython/rtyper/rbuiltin.py +++ b/rpython/rtyper/rbuiltin.py @@ -717,8 +717,8 @@ @typer_for(OrderedDict) @typer_for(objectmodel.r_dict) @typer_for(objectmodel.r_ordereddict) -def rtype_dict_constructor(hop, i_force_non_null=None, i_fast_hash=None): - # 'i_force_non_null' and 'i_fast_hash' are ignored here; if they have any +def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None): + # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have any # effect, it has already been applied to 'hop.r_result' hop.exception_cannot_occur() r_dict = hop.r_result diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -15,7 +15,7 @@ s_key = dictkey.s_value s_value = dictvalue.s_value force_non_null = self.dictdef.force_non_null - fast_hash = self.dictdef.fast_hash + simple_hash_eq = self.dictdef.simple_hash_eq if dictkey.custom_eq_hash: custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn), rtyper.getrepr(dictkey.s_rdict_hashfn)) @@ -23,7 +23,7 @@ custom_eq_hash = None return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key), lambda: rtyper.getrepr(s_value), dictkey, dictvalue, - custom_eq_hash, force_non_null, fast_hash) + custom_eq_hash, force_non_null, simple_hash_eq) def rtyper_makekey(self): self.dictdef.dictkey .dont_change_any_more = True diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -544,7 +544,7 @@ def myhash(n): return ~n def f(): - d = self.new_r_dict(myeq, myhash, fast_hash=True) + d = self.new_r_dict(myeq, myhash, simple_hash_eq=True) d[5] = 7 d[12] = 19 return d @@ -1019,8 +1019,8 @@ return {} @staticmethod - def new_r_dict(myeq, myhash, force_non_null=False, fast_hash=False): - return r_dict(myeq, myhash, force_non_null=force_non_null, fast_hash=fast_hash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return r_dict(myeq, myhash, force_non_null=force_non_null, simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -386,8 +386,10 @@ return OrderedDict() @staticmethod - def new_r_dict(myeq, myhash, force_non_null=False, fast_hash=False): - return objectmodel.r_ordereddict(myeq, myhash, force_non_null=force_non_null, fast_hash=fast_hash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return objectmodel.r_ordereddict( + myeq, myhash, force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): From pypy.commits at gmail.com Mon Dec 11 16:08:20 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 11 Dec 2017 13:08:20 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Test and fix Message-ID: <5a2ef3c4.d4e31c0a.ad3e8.c41a@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93373:ebaac96d17ab Date: 2017-12-11 22:07 +0100 http://bitbucket.org/pypy/pypy/changeset/ebaac96d17ab/ Log: Test and fix diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -124,6 +124,7 @@ assert ["a", "u"] == re.findall("b(.)", "abalbus") assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + assert [u"xyz"] == re.findall(u".*yz", u"xyz") def test_finditer(self): import re diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -915,10 +915,10 @@ @specializectx def find_repetition_end(ctx, ppos, ptr, maxcount, marks): end = ctx.end + # First get rid of the cases where we don't have room for any match. + if maxcount <= 0 or ptr >= end: + return ptr ptrp1 = ctx.next(ptr) - # First get rid of the cases where we don't have room for any match. - if maxcount <= 0 or ptrp1 > end: - return ptr # Check the first character directly. If it doesn't match, we are done. # The idea is to be fast for cases like re.search("b+"), where we expect # the common case to be a non-match. It's much faster with the JIT to @@ -1202,12 +1202,14 @@ def regular_search(ctx, base): start = ctx.match_start - while start <= ctx.end: + while True: ctx.jitdriver_RegularSearch.jit_merge_point(ctx=ctx, start=start, base=base) if sre_match(ctx, base, start, None) is not None: ctx.match_start = start return True + if start >= ctx.end: + break start = ctx.next_indirect(start) return False From pypy.commits at gmail.com Mon Dec 11 16:19:01 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 11 Dec 2017 13:19:01 -0800 (PST) Subject: [pypy-commit] pypy rdict-fast-hash: document branch Message-ID: <5a2ef645.01ed1c0a.33d2f.7ae8@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: rdict-fast-hash Changeset: r93374:9c7f18a615ae Date: 2017-12-11 20:37 +0100 http://bitbucket.org/pypy/pypy/changeset/9c7f18a615ae/ Log: document branch diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,3 +36,6 @@ .. branch: win32-vcvars +.. branch rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. From pypy.commits at gmail.com Mon Dec 11 17:18:59 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 11 Dec 2017 14:18:59 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Add Decimal.as_integer_ratio() Message-ID: <5a2f0453.a180df0a.bd3ef.667a@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93375:5b8174a4dd1d Date: 2017-12-11 19:47 +0100 http://bitbucket.org/pypy/pypy/changeset/5b8174a4dd1d/ Log: Add Decimal.as_integer_ratio() diff --git a/lib_pypy/_decimal.py b/lib_pypy/_decimal.py --- a/lib_pypy/_decimal.py +++ b/lib_pypy/_decimal.py @@ -448,6 +448,36 @@ return DecimalTuple(sign, coeff, expt) + def as_integer_ratio(self): + "Convert a Decimal to its exact integer ratio representation" + if _mpdec.mpd_isspecial(self._mpd): + if _mpdec.mpd_isnan(self._mpd): + raise ValueError("cannot convert NaN to integer ratio") + else: + raise OverflowError("cannot convert Infinity to integer ratio") + + context = getcontext() + tmp = Decimal._new_empty() + with _CatchStatus(context) as (ctx, status_ptr): + _mpdec.mpd_qcopy(tmp._mpd, self._mpd, status_ptr) + exp = tmp._mpd.exp if tmp else 0 + tmp._mpd.exp = 0 + + # context and rounding are unused here: the conversion is exact + numerator = tmp._to_int(_mpdec.MPD_ROUND_FLOOR) + + exponent = 10 ** abs(exp) + if exp >= 0: + numerator *= exponent + denominator = 1 + else: + denominator = exponent + gcd = _math.gcd(numerator, denominator) + numerator //= gcd + denominator //= gcd + + return numerator, denominator + def _convert_for_comparison(self, other, op): if isinstance(other, Decimal): return self, other From pypy.commits at gmail.com Mon Dec 11 17:19:03 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 11 Dec 2017 14:19:03 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Check that the 'e' format works in the struct module. Message-ID: <5a2f0457.cd4a1c0a.ed16e.7349@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93377:abab5c6e2946 Date: 2017-12-11 20:29 +0100 http://bitbucket.org/pypy/pypy/changeset/abab5c6e2946/ Log: Check that the 'e' format works in the struct module. diff --git a/pypy/module/struct/test/test_struct.py b/pypy/module/struct/test/test_struct.py --- a/pypy/module/struct/test/test_struct.py +++ b/pypy/module/struct/test/test_struct.py @@ -49,6 +49,7 @@ assert calcsize('=Q') == 8 assert calcsize('d') == 8 + assert calcsize('e", 65504.0) == b'\x7b\xff' + assert unpack(">e", b'\x7b\xff') == (65504.0,) + raises(OverflowError, pack, " Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93378:4fc91603abd7 Date: 2017-12-11 23:18 +0100 http://bitbucket.org/pypy/pypy/changeset/4fc91603abd7/ Log: Match CPython exception for bad lineno or col_offset: always ValueError. diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py --- a/pypy/interpreter/astcompiler/ast.py +++ b/pypy/interpreter/astcompiler/ast.py @@ -28,6 +28,12 @@ w_obj = space.w_None return w_obj +def obj_to_int(space, w_value): + if not space.isinstance_w(w_value, space.w_long): + raise oefmt(space.w_ValueError, + "invalid integer value: %R", w_value) + return space.int_w(w_value) + class AST(object): __metaclass__ = extendabletype @@ -444,8 +450,8 @@ decorator_list_w = space.unpackiterable(w_decorator_list) _decorator_list = [expr.from_object(space, w_item) for w_item in decorator_list_w] _returns = expr.from_object(space, w_returns) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return FunctionDef(_name, _args, _body, _decorator_list, _returns, _lineno, _col_offset) State.ast_type('FunctionDef', 'stmt', ['name', 'args', 'body', 'decorator_list', 'returns']) @@ -524,8 +530,8 @@ decorator_list_w = space.unpackiterable(w_decorator_list) _decorator_list = [expr.from_object(space, w_item) for w_item in decorator_list_w] _returns = expr.from_object(space, w_returns) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return AsyncFunctionDef(_name, _args, _body, _decorator_list, _returns, _lineno, _col_offset) State.ast_type('AsyncFunctionDef', 'stmt', ['name', 'args', 'body', 'decorator_list', 'returns']) @@ -617,8 +623,8 @@ _body = [stmt.from_object(space, w_item) for w_item in body_w] decorator_list_w = space.unpackiterable(w_decorator_list) _decorator_list = [expr.from_object(space, w_item) for w_item in decorator_list_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return ClassDef(_name, _bases, _keywords, _body, _decorator_list, _lineno, _col_offset) State.ast_type('ClassDef', 'stmt', ['name', 'bases', 'keywords', 'body', 'decorator_list']) @@ -654,8 +660,8 @@ w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) _value = expr.from_object(space, w_value) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Return(_value, _lineno, _col_offset) State.ast_type('Return', 'stmt', ['value']) @@ -698,8 +704,8 @@ w_col_offset = get_field(space, w_node, 'col_offset', False) targets_w = space.unpackiterable(w_targets) _targets = [expr.from_object(space, w_item) for w_item in targets_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Delete(_targets, _lineno, _col_offset) State.ast_type('Delete', 'stmt', ['targets']) @@ -750,8 +756,8 @@ _value = expr.from_object(space, w_value) if _value is None: raise_required_value(space, w_node, 'value') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Assign(_targets, _value, _lineno, _col_offset) State.ast_type('Assign', 'stmt', ['targets', 'value']) @@ -803,8 +809,8 @@ _value = expr.from_object(space, w_value) if _value is None: raise_required_value(space, w_node, 'value') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return AugAssign(_target, _op, _value, _lineno, _col_offset) State.ast_type('AugAssign', 'stmt', ['target', 'op', 'value']) @@ -877,8 +883,8 @@ _body = [stmt.from_object(space, w_item) for w_item in body_w] orelse_w = space.unpackiterable(w_orelse) _orelse = [stmt.from_object(space, w_item) for w_item in orelse_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return For(_target, _iter, _body, _orelse, _lineno, _col_offset) State.ast_type('For', 'stmt', ['target', 'iter', 'body', 'orelse']) @@ -951,8 +957,8 @@ _body = [stmt.from_object(space, w_item) for w_item in body_w] orelse_w = space.unpackiterable(w_orelse) _orelse = [stmt.from_object(space, w_item) for w_item in orelse_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return AsyncFor(_target, _iter, _body, _orelse, _lineno, _col_offset) State.ast_type('AsyncFor', 'stmt', ['target', 'iter', 'body', 'orelse']) @@ -1017,8 +1023,8 @@ _body = [stmt.from_object(space, w_item) for w_item in body_w] orelse_w = space.unpackiterable(w_orelse) _orelse = [stmt.from_object(space, w_item) for w_item in orelse_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return While(_test, _body, _orelse, _lineno, _col_offset) State.ast_type('While', 'stmt', ['test', 'body', 'orelse']) @@ -1083,8 +1089,8 @@ _body = [stmt.from_object(space, w_item) for w_item in body_w] orelse_w = space.unpackiterable(w_orelse) _orelse = [stmt.from_object(space, w_item) for w_item in orelse_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return If(_test, _body, _orelse, _lineno, _col_offset) State.ast_type('If', 'stmt', ['test', 'body', 'orelse']) @@ -1141,8 +1147,8 @@ _items = [withitem.from_object(space, w_item) for w_item in items_w] body_w = space.unpackiterable(w_body) _body = [stmt.from_object(space, w_item) for w_item in body_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return With(_items, _body, _lineno, _col_offset) State.ast_type('With', 'stmt', ['items', 'body']) @@ -1199,8 +1205,8 @@ _items = [withitem.from_object(space, w_item) for w_item in items_w] body_w = space.unpackiterable(w_body) _body = [stmt.from_object(space, w_item) for w_item in body_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return AsyncWith(_items, _body, _lineno, _col_offset) State.ast_type('AsyncWith', 'stmt', ['items', 'body']) @@ -1243,8 +1249,8 @@ w_col_offset = get_field(space, w_node, 'col_offset', False) _exc = expr.from_object(space, w_exc) _cause = expr.from_object(space, w_cause) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Raise(_exc, _cause, _lineno, _col_offset) State.ast_type('Raise', 'stmt', ['exc', 'cause']) @@ -1329,8 +1335,8 @@ _orelse = [stmt.from_object(space, w_item) for w_item in orelse_w] finalbody_w = space.unpackiterable(w_finalbody) _finalbody = [stmt.from_object(space, w_item) for w_item in finalbody_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Try(_body, _handlers, _orelse, _finalbody, _lineno, _col_offset) State.ast_type('Try', 'stmt', ['body', 'handlers', 'orelse', 'finalbody']) @@ -1374,8 +1380,8 @@ if _test is None: raise_required_value(space, w_node, 'test') _msg = expr.from_object(space, w_msg) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Assert(_test, _msg, _lineno, _col_offset) State.ast_type('Assert', 'stmt', ['test', 'msg']) @@ -1418,8 +1424,8 @@ w_col_offset = get_field(space, w_node, 'col_offset', False) names_w = space.unpackiterable(w_names) _names = [alias.from_object(space, w_item) for w_item in names_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Import(_names, _lineno, _col_offset) State.ast_type('Import', 'stmt', ['names']) @@ -1471,9 +1477,9 @@ _module = space.text_or_none_w(w_module) names_w = space.unpackiterable(w_names) _names = [alias.from_object(space, w_item) for w_item in names_w] - _level = space.int_w(w_level) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _level = obj_to_int(space, w_level) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return ImportFrom(_module, _names, _level, _lineno, _col_offset) State.ast_type('ImportFrom', 'stmt', ['module', 'names', 'level']) @@ -1512,8 +1518,8 @@ w_col_offset = get_field(space, w_node, 'col_offset', False) names_w = space.unpackiterable(w_names) _names = [space.text_w(w_item) for w_item in names_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Global(_names, _lineno, _col_offset) State.ast_type('Global', 'stmt', ['names']) @@ -1552,8 +1558,8 @@ w_col_offset = get_field(space, w_node, 'col_offset', False) names_w = space.unpackiterable(w_names) _names = [space.text_w(w_item) for w_item in names_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Nonlocal(_names, _lineno, _col_offset) State.ast_type('Nonlocal', 'stmt', ['names']) @@ -1590,8 +1596,8 @@ _value = expr.from_object(space, w_value) if _value is None: raise_required_value(space, w_node, 'value') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Expr(_value, _lineno, _col_offset) State.ast_type('Expr', 'stmt', ['value']) @@ -1620,8 +1626,8 @@ def from_object(space, w_node): w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Pass(_lineno, _col_offset) State.ast_type('Pass', 'stmt', []) @@ -1650,8 +1656,8 @@ def from_object(space, w_node): w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Break(_lineno, _col_offset) State.ast_type('Break', 'stmt', []) @@ -1680,8 +1686,8 @@ def from_object(space, w_node): w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Continue(_lineno, _col_offset) State.ast_type('Continue', 'stmt', []) @@ -1805,8 +1811,8 @@ raise_required_value(space, w_node, 'op') values_w = space.unpackiterable(w_values) _values = [expr.from_object(space, w_item) for w_item in values_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return BoolOp(_op, _values, _lineno, _col_offset) State.ast_type('BoolOp', 'expr', ['op', 'values']) @@ -1858,8 +1864,8 @@ _right = expr.from_object(space, w_right) if _right is None: raise_required_value(space, w_node, 'right') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return BinOp(_left, _op, _right, _lineno, _col_offset) State.ast_type('BinOp', 'expr', ['left', 'op', 'right']) @@ -1903,8 +1909,8 @@ _operand = expr.from_object(space, w_operand) if _operand is None: raise_required_value(space, w_node, 'operand') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return UnaryOp(_op, _operand, _lineno, _col_offset) State.ast_type('UnaryOp', 'expr', ['op', 'operand']) @@ -1949,8 +1955,8 @@ _body = expr.from_object(space, w_body) if _body is None: raise_required_value(space, w_node, 'body') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Lambda(_args, _body, _lineno, _col_offset) State.ast_type('Lambda', 'expr', ['args', 'body']) @@ -2003,8 +2009,8 @@ _orelse = expr.from_object(space, w_orelse) if _orelse is None: raise_required_value(space, w_node, 'orelse') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return IfExp(_test, _body, _orelse, _lineno, _col_offset) State.ast_type('IfExp', 'expr', ['test', 'body', 'orelse']) @@ -2061,8 +2067,8 @@ _keys = [expr.from_object(space, w_item) for w_item in keys_w] values_w = space.unpackiterable(w_values) _values = [expr.from_object(space, w_item) for w_item in values_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Dict(_keys, _values, _lineno, _col_offset) State.ast_type('Dict', 'expr', ['keys', 'values']) @@ -2105,8 +2111,8 @@ w_col_offset = get_field(space, w_node, 'col_offset', False) elts_w = space.unpackiterable(w_elts) _elts = [expr.from_object(space, w_item) for w_item in elts_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Set(_elts, _lineno, _col_offset) State.ast_type('Set', 'expr', ['elts']) @@ -2157,8 +2163,8 @@ raise_required_value(space, w_node, 'elt') generators_w = space.unpackiterable(w_generators) _generators = [comprehension.from_object(space, w_item) for w_item in generators_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return ListComp(_elt, _generators, _lineno, _col_offset) State.ast_type('ListComp', 'expr', ['elt', 'generators']) @@ -2209,8 +2215,8 @@ raise_required_value(space, w_node, 'elt') generators_w = space.unpackiterable(w_generators) _generators = [comprehension.from_object(space, w_item) for w_item in generators_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return SetComp(_elt, _generators, _lineno, _col_offset) State.ast_type('SetComp', 'expr', ['elt', 'generators']) @@ -2269,8 +2275,8 @@ raise_required_value(space, w_node, 'value') generators_w = space.unpackiterable(w_generators) _generators = [comprehension.from_object(space, w_item) for w_item in generators_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return DictComp(_key, _value, _generators, _lineno, _col_offset) State.ast_type('DictComp', 'expr', ['key', 'value', 'generators']) @@ -2321,8 +2327,8 @@ raise_required_value(space, w_node, 'elt') generators_w = space.unpackiterable(w_generators) _generators = [comprehension.from_object(space, w_item) for w_item in generators_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return GeneratorExp(_elt, _generators, _lineno, _col_offset) State.ast_type('GeneratorExp', 'expr', ['elt', 'generators']) @@ -2359,8 +2365,8 @@ _value = expr.from_object(space, w_value) if _value is None: raise_required_value(space, w_node, 'value') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Await(_value, _lineno, _col_offset) State.ast_type('Await', 'expr', ['value']) @@ -2396,8 +2402,8 @@ w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) _value = expr.from_object(space, w_value) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Yield(_value, _lineno, _col_offset) State.ast_type('Yield', 'expr', ['value']) @@ -2434,8 +2440,8 @@ _value = expr.from_object(space, w_value) if _value is None: raise_required_value(space, w_node, 'value') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return YieldFrom(_value, _lineno, _col_offset) State.ast_type('YieldFrom', 'expr', ['value']) @@ -2496,8 +2502,8 @@ _ops = [cmpop.from_object(space, w_item) for w_item in ops_w] comparators_w = space.unpackiterable(w_comparators) _comparators = [expr.from_object(space, w_item) for w_item in comparators_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Compare(_left, _ops, _comparators, _lineno, _col_offset) State.ast_type('Compare', 'expr', ['left', 'ops', 'comparators']) @@ -2562,8 +2568,8 @@ _args = [expr.from_object(space, w_item) for w_item in args_w] keywords_w = space.unpackiterable(w_keywords) _keywords = [keyword.from_object(space, w_item) for w_item in keywords_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Call(_func, _args, _keywords, _lineno, _col_offset) State.ast_type('Call', 'expr', ['func', 'args', 'keywords']) @@ -2599,8 +2605,8 @@ _n = w_n if _n is None: raise_required_value(space, w_node, 'n') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Num(_n, _lineno, _col_offset) State.ast_type('Num', 'expr', ['n']) @@ -2636,8 +2642,8 @@ _s = check_string(space, w_s) if _s is None: raise_required_value(space, w_node, 's') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Str(_s, _lineno, _col_offset) State.ast_type('Str', 'expr', ['s']) @@ -2684,10 +2690,10 @@ _value = expr.from_object(space, w_value) if _value is None: raise_required_value(space, w_node, 'value') - _conversion = space.int_w(w_conversion) + _conversion = obj_to_int(space, w_conversion) _format_spec = expr.from_object(space, w_format_spec) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return FormattedValue(_value, _conversion, _format_spec, _lineno, _col_offset) State.ast_type('FormattedValue', 'expr', ['value', 'conversion', 'format_spec']) @@ -2730,8 +2736,8 @@ w_col_offset = get_field(space, w_node, 'col_offset', False) values_w = space.unpackiterable(w_values) _values = [expr.from_object(space, w_item) for w_item in values_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return JoinedStr(_values, _lineno, _col_offset) State.ast_type('JoinedStr', 'expr', ['values']) @@ -2767,8 +2773,8 @@ _s = check_string(space, w_s) if _s is None: raise_required_value(space, w_node, 's') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Bytes(_s, _lineno, _col_offset) State.ast_type('Bytes', 'expr', ['s']) @@ -2804,8 +2810,8 @@ _value = w_value if _value is None: raise_required_value(space, w_node, 'value') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return NameConstant(_value, _lineno, _col_offset) State.ast_type('NameConstant', 'expr', ['value']) @@ -2834,8 +2840,8 @@ def from_object(space, w_node): w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Ellipsis(_lineno, _col_offset) State.ast_type('Ellipsis', 'expr', []) @@ -2871,8 +2877,8 @@ _value = w_value if _value is None: raise_required_value(space, w_node, 'value') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Constant(_value, _lineno, _col_offset) State.ast_type('Constant', 'expr', ['value']) @@ -2923,8 +2929,8 @@ _ctx = expr_context.from_object(space, w_ctx) if _ctx is None: raise_required_value(space, w_node, 'ctx') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Attribute(_value, _attr, _ctx, _lineno, _col_offset) State.ast_type('Attribute', 'expr', ['value', 'attr', 'ctx']) @@ -2976,8 +2982,8 @@ _ctx = expr_context.from_object(space, w_ctx) if _ctx is None: raise_required_value(space, w_node, 'ctx') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Subscript(_value, _slice, _ctx, _lineno, _col_offset) State.ast_type('Subscript', 'expr', ['value', 'slice', 'ctx']) @@ -3021,8 +3027,8 @@ _ctx = expr_context.from_object(space, w_ctx) if _ctx is None: raise_required_value(space, w_node, 'ctx') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Starred(_value, _ctx, _lineno, _col_offset) State.ast_type('Starred', 'expr', ['value', 'ctx']) @@ -3065,8 +3071,8 @@ _ctx = expr_context.from_object(space, w_ctx) if _ctx is None: raise_required_value(space, w_node, 'ctx') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Name(_id, _ctx, _lineno, _col_offset) State.ast_type('Name', 'expr', ['id', 'ctx']) @@ -3116,8 +3122,8 @@ _ctx = expr_context.from_object(space, w_ctx) if _ctx is None: raise_required_value(space, w_node, 'ctx') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return List(_elts, _ctx, _lineno, _col_offset) State.ast_type('List', 'expr', ['elts', 'ctx']) @@ -3167,8 +3173,8 @@ _ctx = expr_context.from_object(space, w_ctx) if _ctx is None: raise_required_value(space, w_node, 'ctx') - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return Tuple(_elts, _ctx, _lineno, _col_offset) State.ast_type('Tuple', 'expr', ['elts', 'ctx']) @@ -3780,8 +3786,8 @@ _name = space.text_or_none_w(w_name) body_w = space.unpackiterable(w_body) _body = [stmt.from_object(space, w_item) for w_item in body_w] - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return ExceptHandler(_type, _name, _body, _lineno, _col_offset) State.ast_type('ExceptHandler', 'excepthandler', ['type', 'name', 'body']) @@ -3915,8 +3921,8 @@ if _arg is None: raise_required_value(space, w_node, 'arg') _annotation = expr.from_object(space, w_annotation) - _lineno = space.int_w(w_lineno) - _col_offset = space.int_w(w_col_offset) + _lineno = obj_to_int(space, w_lineno) + _col_offset = obj_to_int(space, w_col_offset) return arg(_arg, _annotation, _lineno, _col_offset) State.ast_type('arg', 'AST', ['arg', 'annotation'], ['lineno', 'col_offset']) diff --git a/pypy/interpreter/astcompiler/tools/asdl_py.py b/pypy/interpreter/astcompiler/tools/asdl_py.py --- a/pypy/interpreter/astcompiler/tools/asdl_py.py +++ b/pypy/interpreter/astcompiler/tools/asdl_py.py @@ -165,7 +165,7 @@ return "space.text_or_none_w(%s)" % (value,) return "space.text_w(%s)" % (value,) elif field.type in ("int",): - return "space.int_w(%s)" % (value,) + return "obj_to_int(space, %s)" % (value,) elif field.type in ("bool",): return "space.bool_w(%s)" % (value,) else: @@ -457,6 +457,12 @@ w_obj = space.w_None return w_obj +def obj_to_int(space, w_value): + if not space.isinstance_w(w_value, space.w_long): + raise oefmt(space.w_ValueError, + "invalid integer value: %R", w_value) + return space.int_w(w_value) + class AST(object): __metaclass__ = extendabletype diff --git a/pypy/module/_ast/test/test_ast.py b/pypy/module/_ast/test/test_ast.py --- a/pypy/module/_ast/test/test_ast.py +++ b/pypy/module/_ast/test/test_ast.py @@ -84,6 +84,16 @@ imp.level = 3 assert imp.level == 3 + def test_bad_int(self): + ast = self.ast + body = [ast.ImportFrom(module='time', + names=[ast.alias(name='sleep')], + level=None, + lineno=None, col_offset=None)] + mod = ast.Module(body) + exc = raises(ValueError, compile, mod, 'test', 'exec') + assert str(exc.value) == "invalid integer value: None" + def test_identifier(self): ast = self.ast name = ast.Name("name_word", ast.Load()) @@ -114,13 +124,12 @@ assert alias.name == 'mod' + expected assert alias.asname == expected - @py.test.mark.skipif("py.test.config.option.runappdirect") def test_object(self): ast = self.ast - const = ast.Const(4) - assert const.obj == 4 - const.obj = 5 - assert const.obj == 5 + const = ast.Constant(4) + assert const.value == 4 + const.value = 5 + assert const.value == 5 def test_optional(self): mod = self.get_ast("x(32)", "eval") From pypy.commits at gmail.com Mon Dec 11 17:19:01 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 11 Dec 2017 14:19:01 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Add support for half floats in the RPython rstruct module. Message-ID: <5a2f0455.a581df0a.bc9c.c3d6@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93376:51f3350c4b8b Date: 2017-12-11 20:29 +0100 http://bitbucket.org/pypy/pypy/changeset/51f3350c4b8b/ Log: Add support for half floats in the RPython rstruct module. diff --git a/rpython/rlib/rstruct/standardfmttable.py b/rpython/rlib/rstruct/standardfmttable.py --- a/rpython/rlib/rstruct/standardfmttable.py +++ b/rpython/rlib/rstruct/standardfmttable.py @@ -105,6 +105,18 @@ _pack_string(fmtiter, string, count-1) +def pack_halffloat(fmtiter): + size = 2 + fl = fmtiter.accept_float_arg() + try: + result = ieee.pack_float(fmtiter.wbuf, fmtiter.pos, + fl, size, fmtiter.bigendian) + except OverflowError: + raise StructOverflowError("float too large for format 'e'") + else: + fmtiter.advance(size) + return result + def make_float_packer(TYPE): size = rffi.sizeof(TYPE) def packer(fmtiter): @@ -247,6 +259,11 @@ end = count fmtiter.appendobj(data[1:end]) + at specialize.argtype(0) +def unpack_halffloat(fmtiter): + data = fmtiter.read(2) + fmtiter.appendobj(ieee.unpack_float(data, fmtiter.bigendian)) + def make_ieee_unpacker(TYPE): @specialize.argtype(0) def unpack_ieee(fmtiter): @@ -374,6 +391,8 @@ 'needcount' : True }, 'p':{ 'size' : 1, 'pack' : pack_pascal, 'unpack' : unpack_pascal, 'needcount' : True }, + 'e':{ 'size' : 2, 'pack' : pack_halffloat, + 'unpack' : unpack_halffloat}, 'f':{ 'size' : 4, 'pack' : make_float_packer(rffi.FLOAT), 'unpack' : unpack_float}, 'd':{ 'size' : 8, 'pack' : make_float_packer(rffi.DOUBLE), diff --git a/rpython/rlib/rstruct/test/test_pack.py b/rpython/rlib/rstruct/test/test_pack.py --- a/rpython/rlib/rstruct/test/test_pack.py +++ b/rpython/rlib/rstruct/test/test_pack.py @@ -138,6 +138,19 @@ self.check('f', 123.456) self.check('d', 123.456789) + def test_pack_halffloat(self): + if self.fmttable is nativefmttable.native_fmttable: + # Host Python cannot handle half floats. + return + size = 2 + wbuf = MutableStringBuffer(size) + self.mypack_into('e', wbuf, 6.5e+04) + got = wbuf.finish() + if self.bigendian: + assert got == b'\x7b\xef' + else: + assert got == b'\xef\x7b' + def test_float_overflow(self): if self.fmt_prefix == '@': # native packing, no overflow diff --git a/rpython/rlib/rstruct/test/test_runpack.py b/rpython/rlib/rstruct/test/test_runpack.py --- a/rpython/rlib/rstruct/test/test_runpack.py +++ b/rpython/rlib/rstruct/test/test_runpack.py @@ -78,6 +78,10 @@ assert f != 12.34 # precision lost assert abs(f - 12.34) < 1E-6 + def test_unpack_halffloat(self): + assert runpack(">e", b"\x7b\xef") == 64992.0 + assert runpack(" Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93379:bdbea29862af Date: 2017-12-12 02:44 +0000 http://bitbucket.org/pypy/pypy/changeset/bdbea29862af/ Log: Raise ValueError when array item is invalid unicode diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1030,7 +1030,11 @@ return space.newbytes(item) elif mytype.typecode == 'u': code = r_uint(ord(item)) - return space.newutf8(rutf8.unichr_as_utf8(code), 1) + try: + return space.newutf8(rutf8.unichr_as_utf8(code), 1) + except ValueError: + raise oefmt(space.w_ValueError, + "character is not in range [U+0000; U+10ffff]") assert 0, "unreachable" # interface From pypy.commits at gmail.com Mon Dec 11 21:49:42 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 11 Dec 2017 18:49:42 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix Message-ID: <5a2f43c6.16981c0a.94759.ed68@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93380:11874bd5e944 Date: 2017-12-12 02:49 +0000 http://bitbucket.org/pypy/pypy/changeset/11874bd5e944/ Log: fix diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -27,7 +27,7 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): - u_len = rutf8.get_utf8_length(utf8, True) + u_len = rutf8.get_utf8_length(utf8) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), space.newutf8(utf8, u_len), From pypy.commits at gmail.com Tue Dec 12 00:02:28 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 11 Dec 2017 21:02:28 -0800 (PST) Subject: [pypy-commit] pypy default: Backport test additions and cleanups from unicode-utf8 Message-ID: <5a2f62e4.8bc4df0a.af3e3.f41a@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93381:458ccc9243cd Date: 2017-12-12 04:57 +0000 http://bitbucket.org/pypy/pypy/changeset/458ccc9243cd/ Log: Backport test additions and cleanups from unicode-utf8 diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1246,3 +1246,7 @@ exc = py.test.raises(SyntaxError, self.get_ast, input).value assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" " bytes in position 0-1: truncated \\xXX escape") + input = "u'\\x1'" + exc = py.test.raises(SyntaxError, self.get_ast, input).value + assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" + " bytes in position 0-2: truncated \\xXX escape") diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -210,7 +210,8 @@ def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -236,7 +237,8 @@ def backslashreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -115,10 +115,10 @@ raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 0x110000}) assert (charmap_decode("\x00\x01\x02", "strict", {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) assert (charmap_decode("\x00\x01\x02", "strict", {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) def test_escape_decode_errors(self): from _codecs import escape_decode as decode @@ -537,8 +537,12 @@ assert '\xff'.decode('utf-7', 'ignore') == '' assert '\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): - assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000' + def test_backslashreplace(self): + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected def test_badhandler(self): import codecs @@ -592,11 +596,11 @@ def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) - return (u"\x01", 1) + return (u"\x01", 4) codecs.register_error("test.hui", handler_unicodeinternal) res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: - assert res == u"\u0000\u0001\u0000" # UCS4 build + assert res == u"\u0000\u0001" # UCS4 build else: assert res == u"\x00\x00\x01\x00\x00" # UCS2 build @@ -750,3 +754,31 @@ assert _codecs.unicode_escape_decode(b) == (u'', 0) assert _codecs.raw_unicode_escape_decode(b) == (u'', 0) assert _codecs.unicode_internal_decode(b) == (u'', 0) + + def test_xmlcharrefreplace(self): + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace') + assert r == 'ሴ\x80⍅y\xab' + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace') + assert r == 'ሴ€⍅y«' + + def test_errorhandler_collection(self): + import _codecs + errors = [] + def record_error(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + errors.append(exc.object[exc.start:exc.end]) + return (u'', exc.end) + _codecs.register_error("test.record", record_error) + + sin = u"\xac\u1234\u1234\u20ac\u8000" + assert sin.encode("ascii", "test.record") == "" + assert errors == [sin] + + errors = [] + assert sin.encode("latin-1", "test.record") == "\xac" + assert errors == [u'\u1234\u1234\u20ac\u8000'] + + errors = [] + assert sin.encode("iso-8859-15", "test.record") == "\xac\xa4" + assert errors == [u'\u1234\u1234', u'\u8000'] diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -414,6 +414,7 @@ if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" raise oefmt(space.w_TypeError, msg, w_decoded) + return w_decoded class W_TextIOWrapper(W_TextIOBase): @@ -940,12 +941,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self.decoded.set(space, w_decoded) + w_decoded = check_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded.text) < cookie.chars_to_skip: + if space.len_w(w_decoded) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") + self.decoded.set(space, w_decoded) self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -958,10 +960,8 @@ def tell_w(self, space): self._check_closed(space) - if not self.seekable: raise oefmt(space.w_IOError, "underlying stream is not seekable") - if not self.telling: raise oefmt(space.w_IOError, "telling position disabled by next() call") @@ -1031,14 +1031,14 @@ # We didn't get enough decoded data; signal EOF to get more. w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(""), - space.newint(1)) # final=1 + space.newint(1)) # final=1 check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.need_eof = 1 if chars_decoded < chars_to_skip: raise oefmt(space.w_IOError, - "can't reconstruct logical file position") + "can't reconstruct logical file position") finally: space.call_method(self.w_decoder, "setstate", w_saved_state) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -40,7 +40,8 @@ w_newline=space.newtext(mode)) lines = [] for limit in limits: - line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + w_line = w_textio.readline_w(space, space.newint(limit)) + line = space.unicode_w(w_line) if limit >= 0: assert len(line) <= limit if line: diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -49,24 +49,24 @@ first = 0 for i in range(first, len(u)): - c = u[i] - if c <= u'~': - if c == u'"' or c == u'\\': + c = ord(u[i]) + if c <= ord('~'): + if c == ord('"') or c == ord('\\'): sb.append('\\') - elif c < u' ': - sb.append(ESCAPE_BEFORE_SPACE[ord(c)]) + elif c < ord(' '): + sb.append(ESCAPE_BEFORE_SPACE[c]) continue - sb.append(chr(ord(c))) + sb.append(chr(c)) else: - if c <= u'\uffff': + if c <= ord(u'\uffff'): sb.append('\\u') - sb.append(HEX[ord(c) >> 12]) - sb.append(HEX[(ord(c) >> 8) & 0x0f]) - sb.append(HEX[(ord(c) >> 4) & 0x0f]) - sb.append(HEX[ord(c) & 0x0f]) + sb.append(HEX[c >> 12]) + sb.append(HEX[(c >> 8) & 0x0f]) + sb.append(HEX[(c >> 4) & 0x0f]) + sb.append(HEX[c & 0x0f]) else: # surrogate pair - n = ord(c) - 0x10000 + n = c - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -128,7 +128,7 @@ intval: lltype.Signed """ self.error(w_ffitype, w_obj) - + def handle_unichar(self, w_ffitype, w_obj, intval): """ intval: lltype.Signed @@ -174,7 +174,7 @@ def handle_struct_rawffi(self, w_ffitype, w_structinstance): """ This method should be killed as soon as we remove support for _rawffi structures - + w_structinstance: W_StructureInstance """ self.error(w_ffitype, w_structinstance) @@ -349,7 +349,7 @@ def get_struct_rawffi(self, w_ffitype, w_structdescr): """ This should be killed as soon as we kill support for _rawffi structures - + Return type: lltype.Unsigned (the address of the structure) """ diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -481,11 +481,13 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[0]) + start, end = self.do_span(w_groupnum) + return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[1]) + start, end = self.do_span(w_groupnum) + return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -87,6 +87,14 @@ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_findall_unicode(self): + import re + assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000") + assert ["a", "u"] == re.findall("b(.)", "abalbus") + assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") + assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + assert [u"xyz"] == re.findall(u".*yz", u"xyz") + def test_finditer(self): import re it = re.finditer("b(.)", "brabbel") @@ -999,3 +1007,15 @@ import re assert re.search(".+ab", "wowowowawoabwowo") assert None == re.search(".+ab", "wowowaowowo") + + +class AppTestUnicodeExtra: + def test_string_attribute(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.string == u"\u1233\u1234\u1235" + + def test_match_start(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.start() == 1 diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -259,10 +259,10 @@ result[0] = ch if not composed: # If decomposed normalization we are done - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) if j <= 1: - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) current = result[0] starter_pos = 0 @@ -310,7 +310,10 @@ result[starter_pos] = current - return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]])) + return self.build(space, result, stop=next_insert) + + def build(self, space, r, stop): + return space.newunicode(u''.join([unichr(i) for i in r[:stop]])) methods = {} diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -135,6 +135,11 @@ check(u'a' + 'b', u'ab') check('a' + u'b', u'ab') + def test_getitem(self): + assert u'abc'[2] == 'c' + raises(IndexError, u'abc'.__getitem__, 15) + assert u'g\u0105\u015b\u0107'[2] == u'\u015b' + def test_join(self): def check(a, b): assert a == b @@ -171,6 +176,8 @@ assert u'\n\n'.splitlines() == [u'', u''] assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c'] assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n'] + assert ((u'a' + '\xc2\x85'.decode('utf8') + u'b\n').splitlines() == + ['a', 'b']) def test_zfill(self): assert u'123'.zfill(2) == u'123' @@ -217,6 +224,7 @@ raises(ValueError, u'abc'.split, u'') raises(ValueError, 'abc'.split, u'') assert u' a b c d'.split(None, 0) == [u'a b c d'] + assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c'] def test_rsplit(self): assert u"".rsplit() == [] @@ -246,6 +254,7 @@ raises(ValueError, 'abc'.rsplit, u'') assert u' a b c '.rsplit(None, 0) == [u' a b c'] assert u''.rsplit('aaa') == [u''] + assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c'] def test_split_rsplit_str_unicode(self): x = 'abc'.split(u'b') @@ -291,6 +300,8 @@ assert u"bROWN fOX".title() == u"Brown Fox" assert u"Brown Fox".title() == u"Brown Fox" assert u"bro!wn fox".title() == u"Bro!Wn Fox" + assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" + assert u'\ud800'.title() == u'\ud800' def test_istitle(self): assert u"".istitle() == False @@ -315,6 +326,18 @@ assert not u'\u01c5abc'.islower() assert not u'\u01c5ABC'.isupper() + def test_lower_upper(self): + assert u'a'.lower() == u'a' + assert u'A'.lower() == u'a' + assert u'\u0105'.lower() == u'\u0105' + assert u'\u0104'.lower() == u'\u0105' + assert u'\ud800'.lower() == u'\ud800' + assert u'a'.upper() == u'A' + assert u'A'.upper() == u'A' + assert u'\u0105'.upper() == u'\u0104' + assert u'\u0104'.upper() == u'\u0104' + assert u'\ud800'.upper() == u'\ud800' + def test_capitalize(self): assert u"brown fox".capitalize() == u"Brown fox" assert u' hello '.capitalize() == u' hello ' @@ -336,6 +359,8 @@ # check with Ll chars with no upper - nothing changes here assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() == u'\u019b\u1d00\u1d86\u0221\u1fb7') + assert u'\ud800'.capitalize() == u'\ud800' + assert u'xx\ud800'.capitalize() == u'Xx\ud800' def test_rjust(self): s = u"abc" @@ -376,6 +401,16 @@ assert u'one!two!three!'.replace('x', '@') == u'one!two!three!' assert u'one!two!three!'.replace(u'x', '@', 2) == u'one!two!three!' assert u'abc'.replace('', u'-') == u'-a-b-c-' + assert u'\u1234'.replace(u'', '-') == u'-\u1234-' + assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678' + assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-' assert u'abc'.replace(u'', u'-', 3) == u'-a-b-c' assert u'abc'.replace('', '-', 0) == u'abc' assert u''.replace(u'', '') == u'' @@ -479,6 +514,9 @@ assert u''.startswith(u'a') is False assert u'x'.startswith(u'xx') is False assert u'y'.startswith(u'xx') is False + assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True + assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False + assert u'\u1234'.startswith(u'', 1, 0) is True def test_startswith_more(self): assert u'ab'.startswith(u'a', 0) is True @@ -589,7 +627,7 @@ raises(TypeError, u'hello'.translate) raises(TypeError, u'abababc'.translate, {ord('a'):''}) - def test_unicode_form_encoded_object(self): + def test_unicode_from_encoded_object(self): assert unicode('x', 'utf-8') == u'x' assert unicode('x', 'utf-8', 'strict') == u'x' @@ -634,6 +672,8 @@ assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac' assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82' assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96' + assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82' + assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96' assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80' assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80' assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000 @@ -745,6 +785,7 @@ def test_index(self): assert u"rrarrrrrrrrra".index(u'a', 4, None) == 12 assert u"rrarrrrrrrrra".index(u'a', None, 6) == 2 + assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2 def test_rindex(self): from sys import maxint @@ -754,6 +795,7 @@ assert u'abcdefghiabc'.rindex(u'abc', 0, -1) == 0 assert u'abcdefghiabc'.rindex(u'abc', -4*maxint, 4*maxint) == 9 assert u'rrarrrrrrrrra'.rindex(u'a', 4, None) == 12 + assert u"\u1234\u5678".rindex(u'\u5678') == 1 raises(ValueError, u'abcdefghiabc'.rindex, u'hib') raises(ValueError, u'defghiabc'.rindex, u'def', 1) @@ -768,12 +810,15 @@ assert u'abcdefghiabc'.rfind(u'') == 12 assert u'abcdefghiabc'.rfind(u'abcd') == 0 assert u'abcdefghiabc'.rfind(u'abcz') == -1 + assert u"\u1234\u5678".rfind(u'\u5678') == 1 def test_rfind_corner_case(self): assert u'abc'.rfind('', 4) == -1 def test_find_index_str_unicode(self): - assert 'abcdefghiabc'.find(u'bc') == 1 + assert u'abcdefghiabc'.find(u'bc') == 1 + assert u'ab\u0105b\u0107'.find('b', 2) == 3 + assert u'ab\u0105b\u0107'.find('b', 0, 1) == -1 assert 'abcdefghiabc'.rfind(u'abc') == 9 raises(UnicodeDecodeError, '\x80'.find, u'') raises(UnicodeDecodeError, '\x80'.rfind, u'') @@ -781,6 +826,7 @@ assert 'abcdefghiabc'.rindex(u'abc') == 9 raises(UnicodeDecodeError, '\x80'.index, u'') raises(UnicodeDecodeError, '\x80'.rindex, u'') + assert u"\u1234\u5678".find(u'\u5678') == 1 def test_count(self): assert u"".count(u"x") ==0 @@ -807,6 +853,7 @@ def test_swapcase(self): assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf' + assert u'\ud800'.swapcase() == u'\ud800' def test_buffer(self): buf = buffer(u'XY') @@ -878,16 +925,31 @@ def test_getslice(self): assert u'123456'.__getslice__(1, 5) == u'2345' - s = u"abc" - assert s[:] == "abc" - assert s[1:] == "bc" - assert s[:2] == "ab" - assert s[1:2] == "b" - assert s[-2:] == "bc" - assert s[:-1] == "ab" - assert s[-2:2] == "b" - assert s[1:-1] == "b" - assert s[-2:-1] == "b" + s = u"\u0105b\u0107" + assert s[:] == u"\u0105b\u0107" + assert s[1:] == u"b\u0107" + assert s[:2] == u"\u0105b" + assert s[1:2] == u"b" + assert s[-2:] == u"b\u0107" + assert s[:-1] == u"\u0105b" + assert s[-2:2] == u"b" + assert s[1:-1] == u"b" + assert s[-2:-1] == u"b" + + def test_getitem_slice(self): + assert u'123456'.__getitem__(slice(1, 5)) == u'2345' + s = u"\u0105b\u0107" + assert s[slice(3)] == u"\u0105b\u0107" + assert s[slice(1, 3)] == u"b\u0107" + assert s[slice(2)] == u"\u0105b" + assert s[slice(1,2)] == u"b" + assert s[slice(-2,3)] == u"b\u0107" + assert s[slice(-1)] == u"\u0105b" + assert s[slice(-2,2)] == u"b" + assert s[slice(1,-1)] == u"b" + assert s[slice(-2,-1)] == u"b" + assert u"abcde"[::2] == u"ace" + assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd" def test_no_len_on_str_iter(self): iterable = u"hello" From pypy.commits at gmail.com Tue Dec 12 00:14:46 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 11 Dec 2017 21:14:46 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: small cleanup Message-ID: <5a2f65c6.50b91c0a.eae32.6de5@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93382:f2b6f7bb4f98 Date: 2017-12-12 05:13 +0000 http://bitbucket.org/pypy/pypy/changeset/f2b6f7bb4f98/ Log: small cleanup diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py --- a/pypy/objspace/std/test/test_index.py +++ b/pypy/objspace/std/test/test_index.py @@ -1,7 +1,5 @@ from py.test import raises -from rpython.rlib import rutf8 - class AppTest_IndexProtocol: def setup_class(self): w_oldstyle = self.space.appexec([], """(): diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py --- a/pypy/objspace/std/test/test_lengthhint.py +++ b/pypy/objspace/std/test/test_lengthhint.py @@ -1,6 +1,3 @@ - -from rpython.rlib import rutf8 - from pypy.module._collections.interp_deque import W_Deque from pypy.module.itertools.interp_itertools import W_Repeat diff --git a/pypy/objspace/std/test/test_setstrategies.py b/pypy/objspace/std/test/test_setstrategies.py --- a/pypy/objspace/std/test/test_setstrategies.py +++ b/pypy/objspace/std/test/test_setstrategies.py @@ -1,5 +1,3 @@ - -import py from pypy.objspace.std.setobject import W_SetObject from pypy.objspace.std.setobject import ( BytesIteratorImplementation, BytesSetStrategy, EmptySetStrategy, diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1,4 +1,3 @@ - # -*- encoding: utf-8 -*- import py import sys From pypy.commits at gmail.com Tue Dec 12 00:37:42 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 11 Dec 2017 21:37:42 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: hg merge default Message-ID: <5a2f6b26.2d8fdf0a.b0cf.709c@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93383:b8d6b2298b9b Date: 2017-12-12 05:37 +0000 http://bitbucket.org/pypy/pypy/changeset/b8d6b2298b9b/ Log: hg merge default diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -149,7 +149,7 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X:: +On Mac OS X: Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -355,7 +355,11 @@ containers (as list items or in sets for example), the exact rule of equality used is "``if x is y or x == y``" (on both CPython and PyPy); as a consequence, because all ``nans`` are identical in PyPy, you -cannot have several of them in a set, unlike in CPython. (Issue `#1974`__) +cannot have several of them in a set, unlike in CPython. (Issue `#1974`__). +Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because +``cmp`` checks with ``is`` first whether the arguments are identical (there is +no good value to return from this call to ``cmp``, because ``cmp`` pretends +that there is a total order on floats, but that is wrong for NaNs). .. __: https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -5,26 +5,33 @@ .. this is a revision shortly after release-pypy2.7-v5.9.0 .. startrev:d56dadcef996 + .. branch: cppyy-packaging + Cleanup and improve cppyy packaging .. branch: docs-osx-brew-openssl .. branch: keep-debug-symbols + Add a smartstrip tool, which can optionally keep the debug symbols in a separate file, instead of just stripping them away. Use it in packaging .. branch: bsd-patches + Fix failures on FreeBSD, contributed by David Naylor as patches on the issue tracker (issues 2694, 2695, 2696, 2697) .. branch: run-extra-tests + Run extra_tests/ in buildbot .. branch: vmprof-0.4.10 + Upgrade the _vmprof backend to vmprof 0.4.10 .. branch: fix-vmprof-stacklet-switch + Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) .. branch: win32-vcvars diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst b/pypy/doc/whatsnew-pypy2-5.6.0.rst --- a/pypy/doc/whatsnew-pypy2-5.6.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst @@ -101,7 +101,7 @@ .. branch: newinitwarn -Match CPython's stricter handling of __new/init__ arguments +Match CPython's stricter handling of ``__new__``/``__init__`` arguments .. branch: openssl-1.1 diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -11,7 +11,7 @@ To build pypy-c you need a working python environment, and a C compiler. It is possible to translate with a CPython 2.6 or later, but this is not -the preferred way, because it will take a lot longer to run � depending +the preferred way, because it will take a lot longer to run – depending on your architecture, between two and three times as long. So head to `our downloads`_ and get the latest stable version. @@ -103,6 +103,7 @@ must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the ``...\9.0\VC`` directory, and edit it, changing the lines that set ``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ set WindowsSdkDir=%~dp0\..\WinSDK\ diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -404,6 +404,7 @@ def test_cmp(self): + assert cmp(float('nan'), float('nan')) == 0 assert cmp(9,9) == 0 assert cmp(0,9) < 0 assert cmp(9,0) > 0 diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -31,9 +31,15 @@ pdir.join('file2').write("test2") pdir.join('another_longer_file_name').write("test3") mod.pdir = pdir - unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + if sys.platform == 'darwin': + # see issue https://bugs.python.org/issue31380 + unicode_dir = udir.ensure('fixc5x9fier.txt', dir=True) + file_name = 'cafxe9' + else: + unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + file_name = 'caf\xe9' unicode_dir.join('somefile').write('who cares?') - unicode_dir.join('caf\xe9').write('who knows?') + unicode_dir.join(file_name).write('who knows?') mod.unicode_dir = unicode_dir # in applevel tests, os.stat uses the CPython os.stat. diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -310,12 +310,19 @@ errno = rposix.get_saved_errno() return os.strerror(errno) +def _check_sleep_arg(space, secs): + from rpython.rlib.rfloat import isinf, isnan + if secs < 0: + raise oefmt(space.w_IOError, + "Invalid argument: negative time in sleep") + if isinf(secs) or isnan(secs): + raise oefmt(space.w_IOError, + "Invalid argument: inf or nan") + if sys.platform != 'win32': @unwrap_spec(secs=float) def sleep(space, secs): - if secs < 0: - raise oefmt(space.w_IOError, - "Invalid argument: negative time in sleep") + _check_sleep_arg(space, secs) rtime.sleep(secs) else: from rpython.rlib import rwin32 @@ -336,9 +343,7 @@ OSError(EINTR, "sleep() interrupted")) @unwrap_spec(secs=float) def sleep(space, secs): - if secs < 0: - raise oefmt(space.w_IOError, - "Invalid argument: negative time in sleep") + _check_sleep_arg(space, secs) # as decreed by Guido, only the main thread can be # interrupted. main_thread = space.fromcache(State).main_thread diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -19,6 +19,8 @@ raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(IOError, time.sleep, -1.0) + raises(IOError, time.sleep, float('nan')) + raises(IOError, time.sleep, float('inf')) def test_clock(self): import time diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -288,6 +288,9 @@ def mark_dict_non_null(d): """ Mark dictionary as having non-null keys and values. A warning would be emitted (not an error!) in case annotation disagrees. + + This doesn't work for r_dicts. For them, pass + r_dict(..., force_non_null=True) to the constructor. """ assert isinstance(d, dict) return d From pypy.commits at gmail.com Tue Dec 12 03:46:50 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 12 Dec 2017 00:46:50 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Tests and fixes Message-ID: <5a2f977a.11421c0a.396eb.6bd8@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93384:d978136e55f3 Date: 2017-12-12 09:46 +0100 http://bitbucket.org/pypy/pypy/changeset/d978136e55f3/ Log: Tests and fixes * cannot access a position greater than ctx.end; need some small refactorings and added an assert * w_unicode_obj needs to be copied by fresh_copy() too, so move it to interp_sre where it really belongs diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -156,6 +156,19 @@ return rsre_core.BufMatchContext(self.code, buf, pos, endpos, self.flags) + def fresh_copy(self, ctx, start): + if isinstance(ctx, rsre_utf8.Utf8MatchContext): + result = rsre_utf8.Utf8MatchContext( + ctx.pattern, ctx._utf8, start, ctx.end, ctx.flags) + result.w_unicode_obj = ctx.w_unicode_obj + return result + if isinstance(ctx, rsre_core.StrMatchContext): + return self._make_str_match_context(ctx._string, start, ctx.end) + if isinstance(ctx, rsre_core.BufMatchContext): + return rsre_core.BufMatchContext( + ctx.pattern, ctx._buffer, start, ctx.end, ctx.flags) + raise AssertionError("bad ctx type") + def _make_str_match_context(self, str, pos, endpos): # for tests to override return rsre_core.StrMatchContext(self.code, str, @@ -182,7 +195,7 @@ space = self.space matchlist_w = [] ctx = self.make_ctx(w_string, pos, endpos) - while ctx.match_start <= ctx.end: + while True: if not searchcontext(space, ctx): break num_groups = self.num_groups @@ -201,6 +214,8 @@ matchlist_w.append(w_item) reset_at = ctx.match_end if ctx.match_start == ctx.match_end: + if reset_at == ctx.end: + break reset_at = ctx.next_indirect(reset_at) ctx.reset(reset_at) return space.newlist(matchlist_w) @@ -321,9 +336,6 @@ _sub_append_slice( ctx, space, use_builder, sublist_w, strbuilder, last_pos, ctx.match_start) - start = ctx.match_end - if start == ctx.match_start: - start = ctx.next_indirect(start) if not (last_pos == ctx.match_start == ctx.match_end and n > 0): # the above ignores empty matches on latest position @@ -345,6 +357,12 @@ n += 1 elif last_pos >= ctx.end: break # empty match at the end: finished + + start = ctx.match_end + if start == ctx.match_start: + if start == ctx.end: + break + start = ctx.next_indirect(start) ctx.reset(start) if last_pos < ctx.end: @@ -663,40 +681,52 @@ self.srepat = pattern self.ctx = ctx # 'self.ctx' is always a fresh context in which no searching - # or matching succeeded so far. + # or matching succeeded so far. It is None when the iterator is + # exhausted. def iter_w(self): return self def next_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: raise OperationError(self.space.w_StopIteration, self.space.w_None) if not searchcontext(self.space, self.ctx): raise OperationError(self.space.w_StopIteration, self.space.w_None) return self.getmatch(True) def match_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: return self.space.w_None return self.getmatch(matchcontext(self.space, self.ctx)) def search_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: return self.space.w_None return self.getmatch(searchcontext(self.space, self.ctx)) def getmatch(self, found): + ctx = self.ctx + assert ctx is not None if found: - ctx = self.ctx nextstart = ctx.match_end + exhausted = False if ctx.match_start == nextstart: - nextstart = ctx.next_indirect(nextstart) - self.ctx = ctx.fresh_copy(nextstart) + if nextstart == ctx.end: + exhausted = True + else: + nextstart = ctx.next_indirect(nextstart) + if exhausted: + self.ctx = None + else: + self.ctx = self.srepat.fresh_copy(ctx, nextstart) match = W_SRE_Match(self.srepat, ctx) return match else: # obscure corner case - self.ctx.match_start = self.ctx.next_indirect(self.ctx.match_start) + if ctx.match_start == ctx.end: + self.ctx = None + else: + ctx.match_start = ctx.next_indirect(ctx.match_start) return None W_SRE_Scanner.typedef = TypeDef( diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -28,8 +28,10 @@ # we're accepting or escaping a Position to app-level, which we # should not: Positions are meant to be byte indexes inside a # possibly UTF8 string, not character indexes. - start = support.Position(start) - end = support.Position(end) + if not isinstance(start, support.Position): + start = support.Position(start) + if not isinstance(end, support.Position): + end = support.Position(end) return support.MatchContextForTests(self.code, str, start, end, self.flags) def _bytepos_to_charindex(self, bytepos): @@ -140,6 +142,9 @@ assert ['', 'a', 'l', 'a', 'lla'] == re.split("b(a)", "balballa") assert ['', 'a', None, 'l', 'u', None, 'lla'] == ( re.split("b([ua]|(s))", "balbulla")) + assert ["abc"] == re.split("", "abc") + assert ["abc"] == re.split("X?", "abc") + assert ["a", "c"] == re.split("b?", "abc") def test_weakref(self): import re, _weakref @@ -253,6 +258,7 @@ assert "rbd\nbr\n" == re.sub("a(.)", r"b\1\n", "radar") assert ("rbd\nbr\n", 2) == re.subn("a(.)", r"b\1\n", "radar") assert ("bbbba", 2) == re.subn("a", "b", "ababa", 2) + assert "XaXbXcX" == re.sub("", "X", "abc") def test_sub_unicode(self): import re diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -173,6 +173,7 @@ def go_forward_by_bytes(self, base_position, index): return base_position + index def next_indirect(self, position): + assert position < self.end return position + 1 # like next(), but can be called indirectly def prev_indirect(self, position): position -= 1 # like prev(), but can be called indirectly @@ -213,9 +214,6 @@ return (-1, -1) return (fmarks[groupnum], fmarks[groupnum+1]) - def fresh_copy(self, start): - raise NotImplementedError - class FixedMatchContext(AbstractMatchContext): """Abstract subclass to introduce the default implementation for @@ -264,9 +262,6 @@ def get_single_byte(self, base_position, index): return self.str(base_position + index) - def fresh_copy(self, start): - return BufMatchContext(self.pattern, self._buffer, start, - self.end, self.flags) class StrMatchContext(FixedMatchContext): """Concrete subclass for matching in a plain string.""" @@ -293,9 +288,6 @@ def _real_pos(self, index): return index # overridden by tests - def fresh_copy(self, start): - return StrMatchContext(self.pattern, self._string, start, - self.end, self.flags) class UnicodeMatchContext(FixedMatchContext): """Concrete subclass for matching in a unicode string.""" diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py --- a/rpython/rlib/rsre/rsre_utf8.py +++ b/rpython/rlib/rsre/rsre_utf8.py @@ -27,10 +27,6 @@ def get_single_byte(self, base_position, index): return self.str(base_position + index) - def fresh_copy(self, start): - return Utf8MatchContext(self.pattern, self._utf8, start, - self.end, self.flags) - def next(self, position): return rutf8.next_codepoint_pos(self._utf8, position) next_indirect = next From pypy.commits at gmail.com Tue Dec 12 11:28:47 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 08:28:47 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Use Utf8StringBuilder in decode_unicode_escape() and fix handling of invalid \U escapes Message-ID: <5a3003bf.238edf0a.80921.99b2@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93385:f6e0cc1e875e Date: 2017-12-12 16:28 +0000 http://bitbucket.org/pypy/pypy/changeset/f6e0cc1e875e/ Log: Use Utf8StringBuilder in decode_unicode_escape() and fix handling of invalid \U escapes diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -351,56 +351,48 @@ endinpos = pos while endinpos < len(s) and s[endinpos] in hexdigits: endinpos += 1 - res, pos = errorhandler(errors, encoding, - message, s, pos-2, endinpos) - size = rutf8.check_utf8(res, True) + res, pos = errorhandler( + errors, encoding, message, s, pos - 2, endinpos) builder.append(res) else: try: - chr = r_uint(int(s[pos:pos+digits], 16)) + chr = r_uint(int(s[pos:pos + digits], 16)) except ValueError: endinpos = pos while s[endinpos] in hexdigits: endinpos += 1 - res, pos = errorhandler(errors, encoding, - message, s, pos-2, endinpos) - size = rutf8.check_utf8(res, True) + res, pos = errorhandler( + errors, encoding, message, s, pos - 2, endinpos) builder.append(res) else: # when we get here, chr is a 32-bit unicode character try: - rutf8.unichr_as_utf8_append(builder, intmask(chr), True) + builder.append_code(chr) + pos += digits except ValueError: message = "illegal Unicode character" - res, pos = errorhandler(errors, encoding, - message, s, pos-2, pos+digits) - size = rutf8.check_utf8(res, True) + res, pos = errorhandler( + errors, encoding, message, s, pos - 2, pos + digits) builder.append(res) - else: - pos += digits - size = 1 - - return pos, size + return pos def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler): size = len(s) if size == 0: return '', 0, 0 - builder = StringBuilder(size) + builder = rutf8.Utf8StringBuilder(size) pos = 0 - outsize = 0 while pos < size: ch = s[pos] # Non-escape characters are interpreted as Unicode ordinals if ch != '\\': if ord(ch) > 0x7F: - rutf8.unichr_as_utf8_append(builder, ord(ch)) + builder.append_code(ord(ch)) else: builder.append(ch) pos += 1 - outsize += 1 continue # - Escapes @@ -408,88 +400,70 @@ if pos >= size: message = "\\ at end of string" res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, size) - newsize = rutf8.check_utf8(res, True) - outsize + newsize + message, s, pos - 1, size) builder.append(res) continue ch = s[pos] pos += 1 # \x escapes - if ch == '\n': pass + if ch == '\n': + pass elif ch == '\\': - builder.append('\\') - outsize += 1 + builder.append_char('\\') elif ch == '\'': - builder.append('\'') - outsize += 1 + builder.append_char('\'') elif ch == '\"': - builder.append('\"') - outsize += 1 - elif ch == 'b' : - builder.append('\b') - outsize += 1 - elif ch == 'f' : - builder.append('\f') - outsize += 1 - elif ch == 't' : - builder.append('\t') - outsize += 1 - elif ch == 'n' : - builder.append('\n') - outsize += 1 - elif ch == 'r' : - builder.append('\r') - outsize += 1 - elif ch == 'v' : - builder.append('\v') - outsize += 1 - elif ch == 'a' : - builder.append('\a') - outsize += 1 + builder.append_char('\"') + elif ch == 'b': + builder.append_char('\b') + elif ch == 'f': + builder.append_char('\f') + elif ch == 't': + builder.append_char('\t') + elif ch == 'n': + builder.append_char('\n') + elif ch == 'r': + builder.append_char('\r') + elif ch == 'v': + builder.append_char('\v') + elif ch == 'a': + builder.append_char('\a') elif '0' <= ch <= '7': x = ord(ch) - ord('0') if pos < size: ch = s[pos] if '0' <= ch <= '7': pos += 1 - x = (x<<3) + ord(ch) - ord('0') + x = (x << 3) + ord(ch) - ord('0') if pos < size: ch = s[pos] if '0' <= ch <= '7': pos += 1 - x = (x<<3) + ord(ch) - ord('0') - outsize += 1 + x = (x << 3) + ord(ch) - ord('0') if x > 0x7F: - rutf8.unichr_as_utf8_append(builder, x) + builder.append_code(x) else: - builder.append(chr(x)) + builder.append_char(chr(x)) # hex escapes # \xXX elif ch == 'x': digits = 2 message = "truncated \\xXX escape" - pos, newsize = hexescape(builder, s, pos, digits, + pos = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - outsize += newsize - # \uXXXX elif ch == 'u': digits = 4 message = "truncated \\uXXXX escape" - pos, newsize = hexescape(builder, s, pos, digits, + pos = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - outsize += newsize - # \UXXXXXXXX elif ch == 'U': digits = 8 message = "truncated \\UXXXXXXXX escape" - pos, newsize = hexescape(builder, s, pos, digits, + pos = hexescape(builder, s, pos, digits, "unicodeescape", errorhandler, message, errors) - outsize += newsize - # \N{name} elif ch == 'N' and ud_handler is not None: message = "malformed \\N character escape" @@ -502,38 +476,29 @@ if look < size and s[look] == '}': # found a name. look it up in the unicode database message = "unknown Unicode character name" - name = s[pos+1:look] + name = s[pos + 1:look] code = ud_handler.call(name) if code < 0: - res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, look+1) - newsize = rutf8.check_utf8(res, True) - outsize += newsize + res, pos = errorhandler( + errors, "unicodeescape", message, + s, pos - 1, look + 1) builder.append(res) continue pos = look + 1 - outsize += 1 - rutf8.unichr_as_utf8_append(builder, code, - allow_surrogates=True) - # xxx 'code' is probably always within range here... + builder.append_code(code) else: res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, look+1) - newsize = rutf8.check_utf8(res, True) - outsize += newsize + message, s, pos - 1, look + 1) builder.append(res) else: res, pos = errorhandler(errors, "unicodeescape", - message, s, pos-1, look+1) - newsize = rutf8.check_utf8(res, True) - outsize += newsize + message, s, pos - 1, look + 1) builder.append(res) else: - builder.append('\\') - builder.append(ch) - outsize += 2 + builder.append_char('\\') + builder.append_code(ord(ch)) - return builder.build(), pos, outsize + return builder.build(), pos, builder.get_length() def wcharpsize2utf8(space, wcharp, size): """Safe version of rffi.wcharpsize2utf8. @@ -557,14 +522,14 @@ if size == 0: return '', 0, 0 - result = StringBuilder(size) + builder = rutf8.Utf8StringBuilder(size) pos = 0 while pos < size: ch = s[pos] # Non-escape characters are interpreted as Unicode ordinals if ch != '\\': - rutf8.unichr_as_utf8_append(result, ord(ch), True) + builder.append_code(ord(ch)) pos += 1 continue @@ -575,30 +540,27 @@ pos += 1 if pos == size or s[pos] != '\\': break - result.append('\\') + builder.append_char('\\') # we have a backslash at the end of the string, stop here if pos >= size: - result.append('\\') + builder.append_char('\\') break - if ((pos - bs) & 1 == 0 or - pos >= size or - (s[pos] != 'u' and s[pos] != 'U')): - result.append('\\') - rutf8.unichr_as_utf8_append(result, ord(s[pos]), True) + if ((pos - bs) & 1 == 0 or pos >= size or + (s[pos] != 'u' and s[pos] != 'U')): + builder.append_char('\\') + builder.append_code(ord(s[pos])) pos += 1 continue digits = 4 if s[pos] == 'u' else 8 message = "truncated \\uXXXX" pos += 1 - pos, _ = hexescape(result, s, pos, digits, + pos = hexescape(builder, s, pos, digits, "rawunicodeescape", errorhandler, message, errors) - r = result.build() - lgt = rutf8.check_utf8(r, True) - return r, pos, lgt + return builder.build(), pos, builder.get_length() _utf8_encode_unicode_escape = rutf8.make_utf8_escape_function() diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -120,6 +120,10 @@ {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) == (u"\U0010FFFFbc", 3)) + def test_escape_decode(self): + from _codecs import unicode_escape_decode as decode + assert decode('\\\x80') == (u'\\\x80', 2) + def test_escape_decode_errors(self): from _codecs import escape_decode as decode raises(ValueError, decode, br"\x") @@ -327,10 +331,8 @@ for decode in [unicode_escape_decode, raw_unicode_escape_decode]: for c, d in ('u', 4), ('U', 4): for i in range(d): - raises(UnicodeDecodeError, decode, - "\\" + c + "0"*i) - raises(UnicodeDecodeError, decode, - "[\\" + c + "0"*i + "]") + raises(UnicodeDecodeError, decode, "\\" + c + "0"*i) + raises(UnicodeDecodeError, decode, "[\\" + c + "0"*i + "]") data = "[\\" + c + "0"*i + "]\\" + c + "0"*i assert decode(data, "ignore") == (u"[]", len(data)) assert decode(data, "replace") == (u"[\ufffd]\ufffd", len(data)) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -706,18 +706,18 @@ @always_inline def append_char(self, s): # for characters, ascii + self._s.append(s) self._lgt += 1 - self._s.append(s) @try_inline def append_code(self, code): + unichr_as_utf8_append(self._s, code, True) self._lgt += 1 - unichr_as_utf8_append(self._s, code, True) @always_inline def append_utf8(self, utf8, length): + self._s.append(utf8) self._lgt += length - self._s.append(utf8) @always_inline def build(self): diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -1,4 +1,4 @@ -import py +import pytest import sys from hypothesis import given, strategies, settings, example @@ -9,7 +9,8 @@ def test_unichr_as_utf8(c, allow_surrogates): i = ord(c) if not allow_surrogates and 0xD800 <= i <= 0xDFFF: - py.test.raises(ValueError, rutf8.unichr_as_utf8, i, allow_surrogates) + with pytest.raises(ValueError): + rutf8.unichr_as_utf8(i, allow_surrogates) else: u = rutf8.unichr_as_utf8(i, allow_surrogates) assert u == c.encode('utf8') @@ -191,6 +192,13 @@ s.append_code(0xD800) assert s.get_length() == 5 +def test_utf8_string_builder_bad_code(): + s = rutf8.Utf8StringBuilder() + with pytest.raises(ValueError): + s.append_code(0x110000) + assert s.build() == '' + assert s.get_length() == 0 + @given(strategies.text()) def test_utf8_iterator(arg): u = rutf8.Utf8StringIterator(arg.encode('utf8')) From pypy.commits at gmail.com Tue Dec 12 12:27:55 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 12 Dec 2017 09:27:55 -0800 (PST) Subject: [pypy-commit] pypy default: merge rdict-fast-hash: Message-ID: <5a30119b.01a4df0a.ad75b.3fdd@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93386:a00ca61351ba Date: 2017-12-12 18:27 +0100 http://bitbucket.org/pypy/pypy/changeset/a00ca61351ba/ Log: merge rdict-fast-hash: make it possible to declare that the hash and eq functions used in an objectmodel.r_dict are "simple", which means that they will not change the dict, and that the hash function is fast enough so that caching the hash is not necessary. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,3 +36,6 @@ .. branch: win32-vcvars +.. branch rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -71,7 +71,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,22 +237,30 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) + + at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_simple_hash_eq) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_simple_hash_eq is None: + simple_hash_eq = False + else: + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - - at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py --- a/rpython/jit/metainterp/typesystem.py +++ b/rpython/jit/metainterp/typesystem.py @@ -106,11 +106,11 @@ # It is an r_dict on lltype. Two copies, to avoid conflicts with # the value type. Note that NULL is not allowed as a key. def new_ref_dict(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_2(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_3(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def cast_vtable_to_hashable(self, cpu, ptr): adr = llmemory.cast_ptr_to_adr(ptr) diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -748,11 +748,19 @@ def _newdict(self): return {} - def __init__(self, key_eq, key_hash, force_non_null=False): + def __init__(self, key_eq, key_hash, force_non_null=False, simple_hash_eq=False): + """ force_non_null=True means that the key can never be None (even if + the annotator things it could be) + + simple_hash_eq=True means that the hash function is very fast, meaning it's + efficient enough that the dict does not have to store the hash per key. + It also implies that neither the hash nor the eq function will mutate + the dictionary. """ self._dict = self._newdict() self.key_eq = key_eq self.key_hash = key_hash self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def __getitem__(self, key): return self._dict[_r_dictkey(self, key)] diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -330,6 +330,13 @@ res = self.interpret(g, [3]) assert res == 77 + def test_r_dict_fast_functions(self): + def fn(): + d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True) + return play_with_r_dict(d1) + res = self.interpret(fn, []) + assert res + def test_prepare_dict_update(self): def g(n): d = {} diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py --- a/rpython/rtyper/lltypesystem/rdict.py +++ b/rpython/rtyper/lltypesystem/rdict.py @@ -42,7 +42,8 @@ class DictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, fast_hash=False): + # fast_hash is ignored (only implemented in rordereddict.py) self.rtyper = rtyper self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -66,7 +66,7 @@ def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, - ll_eq_function=None, method_cache={}, + ll_eq_function=None, method_cache={}, simple_hash_eq=False, dummykeyobj=None, dummyvalueobj=None, rtyper=None): # get the actual DICT type. if DICT is None, it's created, otherwise # forward reference is becoming DICT @@ -114,11 +114,14 @@ # * the value entryfields.append(("value", DICTVALUE)) - if ll_fasthash_function is None: + if simple_hash_eq: + assert get_custom_eq_hash is not None + entrymeths['entry_hash'] = ll_hash_custom_fast + elif ll_fasthash_function is None: entryfields.append(("f_hash", lltype.Signed)) - entrymeths['hash'] = ll_hash_from_cache + entrymeths['entry_hash'] = ll_hash_from_cache else: - entrymeths['hash'] = ll_hash_recomputed + entrymeths['entry_hash'] = ll_hash_recomputed entrymeths['fasthashfn'] = ll_fasthash_function # Build the lltype data structures @@ -140,7 +143,7 @@ 'keyeq': ll_keyeq_custom, 'r_rdict_eqfn': r_rdict_eqfn, 'r_rdict_hashfn': r_rdict_hashfn, - 'paranoia': True, + 'paranoia': not simple_hash_eq, } else: # figure out which functions must be used to hash and compare @@ -167,13 +170,14 @@ class OrderedDictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, simple_hash_eq=False): #assert not force_non_null self.rtyper = rtyper self.finalized = False self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) self.custom_eq_hash = custom_eq_hash is not None + self.simple_hash_eq = simple_hash_eq if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup() assert callable(key_repr) self._key_repr_computer = key_repr @@ -211,6 +215,7 @@ self.r_rdict_eqfn, self.r_rdict_hashfn = ( self._custom_eq_hash_repr()) kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr + kwd['simple_hash_eq'] = self.simple_hash_eq else: kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function() kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function() @@ -600,15 +605,21 @@ dummy = ENTRIES.dummy_obj.ll_dummy_value entries[i].value = dummy - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_from_cache(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_from_cache(entries, d, i): return entries[i].f_hash - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_recomputed(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_recomputed(entries, d, i): ENTRIES = lltype.typeOf(entries).TO return ENTRIES.fasthashfn(entries[i].key) + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_custom_fast(entries, d, i): + DICT = lltype.typeOf(d).TO + key = entries[i].key + return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) + def ll_keyhash_custom(d, key): DICT = lltype.typeOf(d).TO return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) @@ -962,22 +973,22 @@ if fun == FUNC_BYTE: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE) i += 1 elif fun == FUNC_SHORT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT) i += 1 elif IS_64BIT and fun == FUNC_INT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT) i += 1 elif fun == FUNC_LONG: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG) i += 1 else: assert False @@ -1015,7 +1026,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1056,7 +1067,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1305,14 +1316,14 @@ def ll_dict_update(dic1, dic2): if dic1 == dic2: return - ll_ensure_indexes(dic2) # needed for entries.hash() below + ll_ensure_indexes(dic2) # needed for entries.entry_hash() below ll_prepare_dict_update(dic1, dic2.num_live_items) i = 0 while i < dic2.num_ever_used_items: entries = dic2.entries if entries.valid(i): entry = entries[i] - hash = entries.hash(i) + hash = entries.entry_hash(dic2, i) key = entry.key value = entry.value index = dic1.lookup_function(dic1, key, hash, FLAG_STORE) @@ -1413,7 +1424,7 @@ r = lltype.malloc(ELEM.TO) r.item0 = recast(ELEM.TO.item0, entry.key) r.item1 = recast(ELEM.TO.item1, entry.value) - _ll_dict_del(dic, dic.entries.hash(i), i) + _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i) return r def ll_dict_pop(dic, key): diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py --- a/rpython/rtyper/rbuiltin.py +++ b/rpython/rtyper/rbuiltin.py @@ -717,9 +717,9 @@ @typer_for(OrderedDict) @typer_for(objectmodel.r_dict) @typer_for(objectmodel.r_ordereddict) -def rtype_dict_constructor(hop, i_force_non_null=None): - # 'i_force_non_null' is ignored here; if it has any effect, it - # has already been applied to 'hop.r_result' +def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None): + # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have any + # effect, it has already been applied to 'hop.r_result' hop.exception_cannot_occur() r_dict = hop.r_result cDICT = hop.inputconst(lltype.Void, r_dict.DICT) diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -15,6 +15,7 @@ s_key = dictkey.s_value s_value = dictvalue.s_value force_non_null = self.dictdef.force_non_null + simple_hash_eq = self.dictdef.simple_hash_eq if dictkey.custom_eq_hash: custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn), rtyper.getrepr(dictkey.s_rdict_hashfn)) @@ -22,7 +23,7 @@ custom_eq_hash = None return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key), lambda: rtyper.getrepr(s_value), dictkey, dictvalue, - custom_eq_hash, force_non_null) + custom_eq_hash, force_non_null, simple_hash_eq) def rtyper_makekey(self): self.dictdef.dictkey .dont_change_any_more = True @@ -89,7 +90,7 @@ resulttype=ENTRIES) # call the correct variant_*() method method = getattr(self, 'variant_' + self.variant) - return method(hop, ENTRIES, v_entries, v_index) + return method(hop, ENTRIES, v_entries, v_dict, v_index) def get_tuple_result(self, hop, items_v): # this allocates the tuple for the result, directly in the function @@ -109,7 +110,7 @@ hop.genop('setfield', [v_result, c_item, v_item]) return v_result - def variant_keys(self, hop, ENTRIES, v_entries, v_index): + def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index): KEY = ENTRIES.TO.OF.key c_key = hop.inputconst(lltype.Void, 'key') v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key], @@ -118,30 +119,30 @@ variant_reversed = variant_keys - def variant_values(self, hop, ENTRIES, v_entries, v_index): + def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index): VALUE = ENTRIES.TO.OF.value c_value = hop.inputconst(lltype.Void, 'value') v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value], resulttype=VALUE) return self.r_dict.recast_value(hop.llops, v_value) - def variant_items(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) + def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value)) - def variant_hashes(self, hop, ENTRIES, v_entries, v_index): + def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index): # there is not really a variant 'hashes', but this method is # convenient for the following variants - return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index) + return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict, v_index) - def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_hash)) - def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value, v_hash)) diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -538,6 +538,25 @@ r_dict = rtyper.getrepr(s) assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_r_dict_can_be_fast(self): + def myeq(n, m): + return n == m + def myhash(n): + return ~n + def f(): + d = self.new_r_dict(myeq, myhash, simple_hash_eq=True) + d[5] = 7 + d[12] = 19 + return d + + t = TranslationContext() + s = t.buildannotator().build_types(f, []) + rtyper = t.buildrtyper() + rtyper.specialize() + + r_dict = rtyper.getrepr(s) + assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_tuple_dict(self): def f(i): d = self.newdict() @@ -1000,8 +1019,8 @@ return {} @staticmethod - def new_r_dict(myeq, myhash): - return r_dict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return r_dict(myeq, myhash, force_non_null=force_non_null, simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -386,8 +386,10 @@ return OrderedDict() @staticmethod - def new_r_dict(myeq, myhash): - return objectmodel.r_ordereddict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return objectmodel.r_ordereddict( + myeq, myhash, force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): From pypy.commits at gmail.com Tue Dec 12 12:31:30 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 09:31:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Mark test CPython-only - the test fails with _pyio, even on CPython Message-ID: <5a301272.90a9df0a.50226.aa98@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93387:9450e5936f78 Date: 2017-12-12 17:31 +0000 http://bitbucket.org/pypy/pypy/changeset/9450e5936f78/ Log: Mark test CPython-only - the test fails with _pyio, even on CPython diff --git a/lib-python/2.7/test/test_memoryio.py b/lib-python/2.7/test/test_memoryio.py --- a/lib-python/2.7/test/test_memoryio.py +++ b/lib-python/2.7/test/test_memoryio.py @@ -712,6 +712,7 @@ # XXX: For the Python version of io.StringIO, this is highly # dependent on the encoding used for the underlying buffer. + @support.cpython_only def test_widechar(self): buf = self.buftype("\U0002030a\U00020347") memio = self.ioclass(buf) From pypy.commits at gmail.com Tue Dec 12 12:52:52 2017 From: pypy.commits at gmail.com (amauryfa) Date: Tue, 12 Dec 2017 09:52:52 -0800 (PST) Subject: [pypy-commit] pypy py3.6: os.fspath() accepts a DirEntry Message-ID: <5a301774.13bbdf0a.b5a04.2aaa@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93388:07c74faf3629 Date: 2017-12-12 01:01 +0100 http://bitbucket.org/pypy/pypy/changeset/07c74faf3629/ Log: os.fspath() accepts a DirEntry diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -82,6 +82,7 @@ 'get_terminal_size': 'interp_posix.get_terminal_size', 'scandir': 'interp_scandir.scandir', + 'DirEntry': 'interp_scandir.W_DirEntry', 'get_inheritable': 'interp_posix.get_inheritable', 'set_inheritable': 'interp_posix.set_inheritable', 'fspath': 'interp_posix.fspath', diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -335,6 +335,7 @@ path = GetSetProperty(W_DirEntry.fget_path, doc="the entry's full path name; equivalent to " "os.path.join(scandir_path, entry.name)"), + __fspath__ = interp2app(W_DirEntry.fget_path), is_dir = interp2app(W_DirEntry.descr_is_dir), is_file = interp2app(W_DirEntry.descr_is_file), is_symlink = interp2app(W_DirEntry.descr_is_symlink), diff --git a/pypy/module/posix/test/test_scandir.py b/pypy/module/posix/test/test_scandir.py --- a/pypy/module/posix/test/test_scandir.py +++ b/pypy/module/posix/test/test_scandir.py @@ -169,8 +169,14 @@ def test_repr(self): posix = self.posix d = next(posix.scandir(self.dir1)) + assert isinstance(d, posix.DirEntry) assert repr(d) == "" + def test_fspath(self): + posix = self.posix + d = next(posix.scandir(self.dir1)) + assert self.posix.fspath(d).endswith('dir1/file1') + def test_resource_warning(self): posix = self.posix import warnings, gc From pypy.commits at gmail.com Tue Dec 12 12:52:55 2017 From: pypy.commits at gmail.com (amauryfa) Date: Tue, 12 Dec 2017 09:52:55 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Most file operations now accept a PathLike object with a __fspath__ method. Message-ID: <5a301777.42b2df0a.40715.0936@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93389:10f89fd70107 Date: 2017-12-12 18:51 +0100 http://bitbucket.org/pypy/pypy/changeset/10f89fd70107/ Log: Most file operations now accept a PathLike object with a __fspath__ method. diff --git a/pypy/module/_io/interp_io.py b/pypy/module/_io/interp_io.py --- a/pypy/module/_io/interp_io.py +++ b/pypy/module/_io/interp_io.py @@ -6,6 +6,7 @@ TypeDef, interp_attrproperty, generic_new_descr) from pypy.module._io.interp_fileio import W_FileIO from pypy.module._io.interp_textio import W_TextIOWrapper +from pypy.module.posix import interp_posix class Cache: @@ -25,7 +26,7 @@ if not (space.isinstance_w(w_file, space.w_unicode) or space.isinstance_w(w_file, space.w_bytes) or space.isinstance_w(w_file, space.w_int)): - raise oefmt(space.w_TypeError, "invalid file: %R", w_file) + w_file = interp_posix.fspath(space, w_file) reading = writing = creating = appending = updating = text = binary = universal = False diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -151,12 +151,26 @@ except OperationError as e: if not e.match(space, space.w_TypeError): raise - if allow_fd: - fd = unwrap_fd(space, w_value, "string, bytes or integer") - return Path(fd, None, None, w_value) - raise oefmt(space.w_TypeError, - "illegal type for path parameter (expected " - "string or bytes, got %T)", w_value) + if allow_fd and space.index_CHECK(w_value): + fd = unwrap_fd(space, w_value, "string, bytes or integer") + return Path(fd, None, None, w_value) + + # Inline fspath() for better error messages. + w_fspath_method = space.lookup(w_value, '__fspath__') + if w_fspath_method: + w_result = space.get_and_call_function(w_fspath_method, w_value) + if (space.isinstance_w(w_result, space.w_text) or + space.isinstance_w(w_result, space.w_bytes)): + return _unwrap_path(space, w_result, allow_fd=False) + + if allow_fd: + raise oefmt(space.w_TypeError, + "illegal type for path parameter (expected " + "string, bytes, os.PathLike or integer, got %T)", w_value) + else: + raise oefmt(space.w_TypeError, + "illegal type for path parameter (expected " + "string, bytes or os.PathLike, got %T)", w_value) class _PathOrFd(Unwrapper): def unwrap(self, space, w_value): diff --git a/pypy/module/posix/test/test_scandir.py b/pypy/module/posix/test/test_scandir.py --- a/pypy/module/posix/test/test_scandir.py +++ b/pypy/module/posix/test/test_scandir.py @@ -207,3 +207,10 @@ del iterator gc.collect() assert not l + + def test_lstat(self): + posix = self.posix + d = next(posix.scandir()) + with open(d) as fp: + length = len(fp.read()) + assert posix.lstat(d).st_size == length From pypy.commits at gmail.com Tue Dec 12 13:04:28 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 10:04:28 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix signature usage Message-ID: <5a301a2c.5de81c0a.15195.174a@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93390:f69557a0b7c2 Date: 2017-12-12 18:03 +0000 http://bitbucket.org/pypy/pypy/changeset/f69557a0b7c2/ Log: fix signature usage diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -20,7 +20,7 @@ from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline from rpython.rlib.rstring import StringBuilder from rpython.rlib import jit, types -from rpython.rlib.signature import signature +from rpython.rlib.signature import signature, finishsigs from rpython.rlib.types import char, none from rpython.rlib.rarithmetic import r_uint from rpython.rlib.unicodedata import unicodedb @@ -683,6 +683,7 @@ return unicode_escape #, char_escape_helper + at finishsigs class Utf8StringBuilder(object): @always_inline def __init__(self, size=0): @@ -702,7 +703,7 @@ newlgt = get_utf8_length(s, start, end) self._lgt += newlgt - @signature(char(), returns=none()) + @signature(types.self(), char(), returns=none()) @always_inline def append_char(self, s): # for characters, ascii From pypy.commits at gmail.com Tue Dec 12 13:22:39 2017 From: pypy.commits at gmail.com (amauryfa) Date: Tue, 12 Dec 2017 10:22:39 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Oops, fix tests and translation. Message-ID: <5a301e6f.05c41c0a.a479d.05a4@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93391:ea22b4d272be Date: 2017-12-12 19:21 +0100 http://bitbucket.org/pypy/pypy/changeset/ea22b4d272be/ Log: Oops, fix tests and translation. diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -151,9 +151,14 @@ except OperationError as e: if not e.match(space, space.w_TypeError): raise - if allow_fd and space.index_CHECK(w_value): - fd = unwrap_fd(space, w_value, "string, bytes or integer") - return Path(fd, None, None, w_value) + if allow_fd: + try: + space.index(w_value) + except OperationError: + pass + else: + fd = unwrap_fd(space, w_value, "string, bytes or integer") + return Path(fd, None, None, w_value) # Inline fspath() for better error messages. w_fspath_method = space.lookup(w_value, '__fspath__') @@ -165,12 +170,12 @@ if allow_fd: raise oefmt(space.w_TypeError, - "illegal type for path parameter (expected " - "string, bytes, os.PathLike or integer, got %T)", w_value) + "illegal type for path parameter (should be " + "string, bytes, os.PathLike or integer, not %T)", w_value) else: raise oefmt(space.w_TypeError, - "illegal type for path parameter (expected " - "string, bytes or os.PathLike, got %T)", w_value) + "illegal type for path parameter (should be " + "string, bytes or os.PathLike, not %T)", w_value) class _PathOrFd(Unwrapper): def unwrap(self, space, w_value): diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -208,7 +208,7 @@ excinfo = raises(TypeError, self.posix.stat, None) assert "can't specify None" in str(excinfo.value) excinfo = raises(TypeError, self.posix.stat, 2.) - assert "should be string, bytes or integer, not float" in str(excinfo.value) + assert "should be string, bytes, os.PathLike or integer, not float" in str(excinfo.value) raises(ValueError, self.posix.stat, -1) raises(ValueError, self.posix.stat, b"abc\x00def") raises(ValueError, self.posix.stat, u"abc\x00def") From pypy.commits at gmail.com Tue Dec 12 13:23:26 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 10:23:26 -0800 (PST) Subject: [pypy-commit] pypy default: Move test_json_extra to extra_tests/ Message-ID: <5a301e9e.b198df0a.6e23d.0815@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93392:28cc81842b02 Date: 2017-12-12 18:22 +0000 http://bitbucket.org/pypy/pypy/changeset/28cc81842b02/ Log: Move test_json_extra to extra_tests/ diff --git a/pypy/module/test_lib_pypy/test_json_extra.py b/extra_tests/test_json.py rename from pypy/module/test_lib_pypy/test_json_extra.py rename to extra_tests/test_json.py --- a/pypy/module/test_lib_pypy/test_json_extra.py +++ b/extra_tests/test_json.py @@ -1,4 +1,5 @@ -import py, json +import pytest +import json def is_(x, y): return type(x) is type(y) and x == y @@ -6,12 +7,14 @@ def test_no_ensure_ascii(): assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"') assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"') - e = py.test.raises(UnicodeDecodeError, json.dumps, - (u"\u1234", "\xc0"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") - e = py.test.raises(UnicodeDecodeError, json.dumps, - ("\xc0", u"\u1234"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps((u"\u1234", "\xc0"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps(("\xc0", u"\u1234"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") def test_issue2191(): assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') From pypy.commits at gmail.com Tue Dec 12 13:28:32 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 10:28:32 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: hg merge default Message-ID: <5a301fd0.2d8fdf0a.b0cf.3d80@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93393:fc38dc2766eb Date: 2017-12-12 18:28 +0000 http://bitbucket.org/pypy/pypy/changeset/fc38dc2766eb/ Log: hg merge default diff --git a/pypy/module/test_lib_pypy/test_json_extra.py b/extra_tests/test_json.py rename from pypy/module/test_lib_pypy/test_json_extra.py rename to extra_tests/test_json.py --- a/pypy/module/test_lib_pypy/test_json_extra.py +++ b/extra_tests/test_json.py @@ -1,4 +1,5 @@ -import py, json +import pytest +import json def is_(x, y): return type(x) is type(y) and x == y @@ -6,12 +7,14 @@ def test_no_ensure_ascii(): assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"') assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"') - e = py.test.raises(UnicodeDecodeError, json.dumps, - (u"\u1234", "\xc0"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") - e = py.test.raises(UnicodeDecodeError, json.dumps, - ("\xc0", u"\u1234"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps((u"\u1234", "\xc0"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps(("\xc0", u"\u1234"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") def test_issue2191(): assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,42 +1,45 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - - -.. branch: cppyy-packaging - -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols - -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches - -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests - -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 - -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch - -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) - -.. branch: win32-vcvars - -.. branch: unicode-utf8-re -.. branch: utf8-io -Utf8 handling for unicode - +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch + +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. + +.. branch: unicode-utf8-re +.. branch: utf8-io +Utf8 handling for unicode diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -49,7 +49,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,22 +237,30 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) + + at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_simple_hash_eq) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_simple_hash_eq is None: + simple_hash_eq = False + else: + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - - at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py --- a/rpython/jit/metainterp/typesystem.py +++ b/rpython/jit/metainterp/typesystem.py @@ -106,11 +106,11 @@ # It is an r_dict on lltype. Two copies, to avoid conflicts with # the value type. Note that NULL is not allowed as a key. def new_ref_dict(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_2(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_3(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def cast_vtable_to_hashable(self, cpu, ptr): adr = llmemory.cast_ptr_to_adr(ptr) diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -748,11 +748,19 @@ def _newdict(self): return {} - def __init__(self, key_eq, key_hash, force_non_null=False): + def __init__(self, key_eq, key_hash, force_non_null=False, simple_hash_eq=False): + """ force_non_null=True means that the key can never be None (even if + the annotator things it could be) + + simple_hash_eq=True means that the hash function is very fast, meaning it's + efficient enough that the dict does not have to store the hash per key. + It also implies that neither the hash nor the eq function will mutate + the dictionary. """ self._dict = self._newdict() self.key_eq = key_eq self.key_hash = key_hash self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def __getitem__(self, key): return self._dict[_r_dictkey(self, key)] diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -330,6 +330,13 @@ res = self.interpret(g, [3]) assert res == 77 + def test_r_dict_fast_functions(self): + def fn(): + d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True) + return play_with_r_dict(d1) + res = self.interpret(fn, []) + assert res + def test_prepare_dict_update(self): def g(n): d = {} diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py --- a/rpython/rtyper/lltypesystem/rdict.py +++ b/rpython/rtyper/lltypesystem/rdict.py @@ -42,7 +42,8 @@ class DictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, fast_hash=False): + # fast_hash is ignored (only implemented in rordereddict.py) self.rtyper = rtyper self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -66,7 +66,7 @@ def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, - ll_eq_function=None, method_cache={}, + ll_eq_function=None, method_cache={}, simple_hash_eq=False, dummykeyobj=None, dummyvalueobj=None, rtyper=None): # get the actual DICT type. if DICT is None, it's created, otherwise # forward reference is becoming DICT @@ -114,11 +114,14 @@ # * the value entryfields.append(("value", DICTVALUE)) - if ll_fasthash_function is None: + if simple_hash_eq: + assert get_custom_eq_hash is not None + entrymeths['entry_hash'] = ll_hash_custom_fast + elif ll_fasthash_function is None: entryfields.append(("f_hash", lltype.Signed)) - entrymeths['hash'] = ll_hash_from_cache + entrymeths['entry_hash'] = ll_hash_from_cache else: - entrymeths['hash'] = ll_hash_recomputed + entrymeths['entry_hash'] = ll_hash_recomputed entrymeths['fasthashfn'] = ll_fasthash_function # Build the lltype data structures @@ -140,7 +143,7 @@ 'keyeq': ll_keyeq_custom, 'r_rdict_eqfn': r_rdict_eqfn, 'r_rdict_hashfn': r_rdict_hashfn, - 'paranoia': True, + 'paranoia': not simple_hash_eq, } else: # figure out which functions must be used to hash and compare @@ -167,13 +170,14 @@ class OrderedDictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, simple_hash_eq=False): #assert not force_non_null self.rtyper = rtyper self.finalized = False self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) self.custom_eq_hash = custom_eq_hash is not None + self.simple_hash_eq = simple_hash_eq if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup() assert callable(key_repr) self._key_repr_computer = key_repr @@ -211,6 +215,7 @@ self.r_rdict_eqfn, self.r_rdict_hashfn = ( self._custom_eq_hash_repr()) kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr + kwd['simple_hash_eq'] = self.simple_hash_eq else: kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function() kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function() @@ -600,15 +605,21 @@ dummy = ENTRIES.dummy_obj.ll_dummy_value entries[i].value = dummy - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_from_cache(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_from_cache(entries, d, i): return entries[i].f_hash - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_recomputed(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_recomputed(entries, d, i): ENTRIES = lltype.typeOf(entries).TO return ENTRIES.fasthashfn(entries[i].key) + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_custom_fast(entries, d, i): + DICT = lltype.typeOf(d).TO + key = entries[i].key + return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) + def ll_keyhash_custom(d, key): DICT = lltype.typeOf(d).TO return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) @@ -962,22 +973,22 @@ if fun == FUNC_BYTE: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE) i += 1 elif fun == FUNC_SHORT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT) i += 1 elif IS_64BIT and fun == FUNC_INT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT) i += 1 elif fun == FUNC_LONG: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG) i += 1 else: assert False @@ -1015,7 +1026,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1056,7 +1067,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1305,14 +1316,14 @@ def ll_dict_update(dic1, dic2): if dic1 == dic2: return - ll_ensure_indexes(dic2) # needed for entries.hash() below + ll_ensure_indexes(dic2) # needed for entries.entry_hash() below ll_prepare_dict_update(dic1, dic2.num_live_items) i = 0 while i < dic2.num_ever_used_items: entries = dic2.entries if entries.valid(i): entry = entries[i] - hash = entries.hash(i) + hash = entries.entry_hash(dic2, i) key = entry.key value = entry.value index = dic1.lookup_function(dic1, key, hash, FLAG_STORE) @@ -1413,7 +1424,7 @@ r = lltype.malloc(ELEM.TO) r.item0 = recast(ELEM.TO.item0, entry.key) r.item1 = recast(ELEM.TO.item1, entry.value) - _ll_dict_del(dic, dic.entries.hash(i), i) + _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i) return r def ll_dict_pop(dic, key): diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py --- a/rpython/rtyper/rbuiltin.py +++ b/rpython/rtyper/rbuiltin.py @@ -717,9 +717,9 @@ @typer_for(OrderedDict) @typer_for(objectmodel.r_dict) @typer_for(objectmodel.r_ordereddict) -def rtype_dict_constructor(hop, i_force_non_null=None): - # 'i_force_non_null' is ignored here; if it has any effect, it - # has already been applied to 'hop.r_result' +def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None): + # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have any + # effect, it has already been applied to 'hop.r_result' hop.exception_cannot_occur() r_dict = hop.r_result cDICT = hop.inputconst(lltype.Void, r_dict.DICT) diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -15,6 +15,7 @@ s_key = dictkey.s_value s_value = dictvalue.s_value force_non_null = self.dictdef.force_non_null + simple_hash_eq = self.dictdef.simple_hash_eq if dictkey.custom_eq_hash: custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn), rtyper.getrepr(dictkey.s_rdict_hashfn)) @@ -22,7 +23,7 @@ custom_eq_hash = None return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key), lambda: rtyper.getrepr(s_value), dictkey, dictvalue, - custom_eq_hash, force_non_null) + custom_eq_hash, force_non_null, simple_hash_eq) def rtyper_makekey(self): self.dictdef.dictkey .dont_change_any_more = True @@ -89,7 +90,7 @@ resulttype=ENTRIES) # call the correct variant_*() method method = getattr(self, 'variant_' + self.variant) - return method(hop, ENTRIES, v_entries, v_index) + return method(hop, ENTRIES, v_entries, v_dict, v_index) def get_tuple_result(self, hop, items_v): # this allocates the tuple for the result, directly in the function @@ -109,7 +110,7 @@ hop.genop('setfield', [v_result, c_item, v_item]) return v_result - def variant_keys(self, hop, ENTRIES, v_entries, v_index): + def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index): KEY = ENTRIES.TO.OF.key c_key = hop.inputconst(lltype.Void, 'key') v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key], @@ -118,30 +119,30 @@ variant_reversed = variant_keys - def variant_values(self, hop, ENTRIES, v_entries, v_index): + def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index): VALUE = ENTRIES.TO.OF.value c_value = hop.inputconst(lltype.Void, 'value') v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value], resulttype=VALUE) return self.r_dict.recast_value(hop.llops, v_value) - def variant_items(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) + def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value)) - def variant_hashes(self, hop, ENTRIES, v_entries, v_index): + def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index): # there is not really a variant 'hashes', but this method is # convenient for the following variants - return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index) + return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict, v_index) - def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_hash)) - def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value, v_hash)) diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -538,6 +538,25 @@ r_dict = rtyper.getrepr(s) assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_r_dict_can_be_fast(self): + def myeq(n, m): + return n == m + def myhash(n): + return ~n + def f(): + d = self.new_r_dict(myeq, myhash, simple_hash_eq=True) + d[5] = 7 + d[12] = 19 + return d + + t = TranslationContext() + s = t.buildannotator().build_types(f, []) + rtyper = t.buildrtyper() + rtyper.specialize() + + r_dict = rtyper.getrepr(s) + assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_tuple_dict(self): def f(i): d = self.newdict() @@ -1000,8 +1019,8 @@ return {} @staticmethod - def new_r_dict(myeq, myhash): - return r_dict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return r_dict(myeq, myhash, force_non_null=force_non_null, simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -386,8 +386,10 @@ return OrderedDict() @staticmethod - def new_r_dict(myeq, myhash): - return objectmodel.r_ordereddict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return objectmodel.r_ordereddict( + myeq, myhash, force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): From pypy.commits at gmail.com Tue Dec 12 13:53:34 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 12 Dec 2017 10:53:34 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge default Message-ID: <5a3025ae.51bbdf0a.a2876.5a11@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93394:8986b7040a05 Date: 2017-12-12 18:28 +0100 http://bitbucket.org/pypy/pypy/changeset/8986b7040a05/ Log: merge default diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,6 +36,10 @@ .. branch: win32-vcvars +.. branch rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. + .. branch: unicode-utf8-re .. branch: utf8-io Utf8 handling for unicode diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -49,7 +49,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,22 +237,30 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) + + at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_simple_hash_eq) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_simple_hash_eq is None: + simple_hash_eq = False + else: + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - - at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py --- a/rpython/jit/metainterp/typesystem.py +++ b/rpython/jit/metainterp/typesystem.py @@ -106,11 +106,11 @@ # It is an r_dict on lltype. Two copies, to avoid conflicts with # the value type. Note that NULL is not allowed as a key. def new_ref_dict(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_2(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_3(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def cast_vtable_to_hashable(self, cpu, ptr): adr = llmemory.cast_ptr_to_adr(ptr) diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -748,11 +748,19 @@ def _newdict(self): return {} - def __init__(self, key_eq, key_hash, force_non_null=False): + def __init__(self, key_eq, key_hash, force_non_null=False, simple_hash_eq=False): + """ force_non_null=True means that the key can never be None (even if + the annotator things it could be) + + simple_hash_eq=True means that the hash function is very fast, meaning it's + efficient enough that the dict does not have to store the hash per key. + It also implies that neither the hash nor the eq function will mutate + the dictionary. """ self._dict = self._newdict() self.key_eq = key_eq self.key_hash = key_hash self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def __getitem__(self, key): return self._dict[_r_dictkey(self, key)] diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -330,6 +330,13 @@ res = self.interpret(g, [3]) assert res == 77 + def test_r_dict_fast_functions(self): + def fn(): + d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True) + return play_with_r_dict(d1) + res = self.interpret(fn, []) + assert res + def test_prepare_dict_update(self): def g(n): d = {} diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py --- a/rpython/rtyper/lltypesystem/rdict.py +++ b/rpython/rtyper/lltypesystem/rdict.py @@ -42,7 +42,8 @@ class DictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, fast_hash=False): + # fast_hash is ignored (only implemented in rordereddict.py) self.rtyper = rtyper self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -66,7 +66,7 @@ def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, - ll_eq_function=None, method_cache={}, + ll_eq_function=None, method_cache={}, simple_hash_eq=False, dummykeyobj=None, dummyvalueobj=None, rtyper=None): # get the actual DICT type. if DICT is None, it's created, otherwise # forward reference is becoming DICT @@ -114,11 +114,14 @@ # * the value entryfields.append(("value", DICTVALUE)) - if ll_fasthash_function is None: + if simple_hash_eq: + assert get_custom_eq_hash is not None + entrymeths['entry_hash'] = ll_hash_custom_fast + elif ll_fasthash_function is None: entryfields.append(("f_hash", lltype.Signed)) - entrymeths['hash'] = ll_hash_from_cache + entrymeths['entry_hash'] = ll_hash_from_cache else: - entrymeths['hash'] = ll_hash_recomputed + entrymeths['entry_hash'] = ll_hash_recomputed entrymeths['fasthashfn'] = ll_fasthash_function # Build the lltype data structures @@ -140,7 +143,7 @@ 'keyeq': ll_keyeq_custom, 'r_rdict_eqfn': r_rdict_eqfn, 'r_rdict_hashfn': r_rdict_hashfn, - 'paranoia': True, + 'paranoia': not simple_hash_eq, } else: # figure out which functions must be used to hash and compare @@ -167,13 +170,14 @@ class OrderedDictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, simple_hash_eq=False): #assert not force_non_null self.rtyper = rtyper self.finalized = False self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) self.custom_eq_hash = custom_eq_hash is not None + self.simple_hash_eq = simple_hash_eq if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup() assert callable(key_repr) self._key_repr_computer = key_repr @@ -211,6 +215,7 @@ self.r_rdict_eqfn, self.r_rdict_hashfn = ( self._custom_eq_hash_repr()) kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr + kwd['simple_hash_eq'] = self.simple_hash_eq else: kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function() kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function() @@ -600,15 +605,21 @@ dummy = ENTRIES.dummy_obj.ll_dummy_value entries[i].value = dummy - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_from_cache(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_from_cache(entries, d, i): return entries[i].f_hash - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_recomputed(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_recomputed(entries, d, i): ENTRIES = lltype.typeOf(entries).TO return ENTRIES.fasthashfn(entries[i].key) + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_custom_fast(entries, d, i): + DICT = lltype.typeOf(d).TO + key = entries[i].key + return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) + def ll_keyhash_custom(d, key): DICT = lltype.typeOf(d).TO return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) @@ -962,22 +973,22 @@ if fun == FUNC_BYTE: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE) i += 1 elif fun == FUNC_SHORT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT) i += 1 elif IS_64BIT and fun == FUNC_INT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT) i += 1 elif fun == FUNC_LONG: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG) i += 1 else: assert False @@ -1015,7 +1026,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1056,7 +1067,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1305,14 +1316,14 @@ def ll_dict_update(dic1, dic2): if dic1 == dic2: return - ll_ensure_indexes(dic2) # needed for entries.hash() below + ll_ensure_indexes(dic2) # needed for entries.entry_hash() below ll_prepare_dict_update(dic1, dic2.num_live_items) i = 0 while i < dic2.num_ever_used_items: entries = dic2.entries if entries.valid(i): entry = entries[i] - hash = entries.hash(i) + hash = entries.entry_hash(dic2, i) key = entry.key value = entry.value index = dic1.lookup_function(dic1, key, hash, FLAG_STORE) @@ -1413,7 +1424,7 @@ r = lltype.malloc(ELEM.TO) r.item0 = recast(ELEM.TO.item0, entry.key) r.item1 = recast(ELEM.TO.item1, entry.value) - _ll_dict_del(dic, dic.entries.hash(i), i) + _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i) return r def ll_dict_pop(dic, key): diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py --- a/rpython/rtyper/rbuiltin.py +++ b/rpython/rtyper/rbuiltin.py @@ -717,9 +717,9 @@ @typer_for(OrderedDict) @typer_for(objectmodel.r_dict) @typer_for(objectmodel.r_ordereddict) -def rtype_dict_constructor(hop, i_force_non_null=None): - # 'i_force_non_null' is ignored here; if it has any effect, it - # has already been applied to 'hop.r_result' +def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None): + # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have any + # effect, it has already been applied to 'hop.r_result' hop.exception_cannot_occur() r_dict = hop.r_result cDICT = hop.inputconst(lltype.Void, r_dict.DICT) diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -15,6 +15,7 @@ s_key = dictkey.s_value s_value = dictvalue.s_value force_non_null = self.dictdef.force_non_null + simple_hash_eq = self.dictdef.simple_hash_eq if dictkey.custom_eq_hash: custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn), rtyper.getrepr(dictkey.s_rdict_hashfn)) @@ -22,7 +23,7 @@ custom_eq_hash = None return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key), lambda: rtyper.getrepr(s_value), dictkey, dictvalue, - custom_eq_hash, force_non_null) + custom_eq_hash, force_non_null, simple_hash_eq) def rtyper_makekey(self): self.dictdef.dictkey .dont_change_any_more = True @@ -89,7 +90,7 @@ resulttype=ENTRIES) # call the correct variant_*() method method = getattr(self, 'variant_' + self.variant) - return method(hop, ENTRIES, v_entries, v_index) + return method(hop, ENTRIES, v_entries, v_dict, v_index) def get_tuple_result(self, hop, items_v): # this allocates the tuple for the result, directly in the function @@ -109,7 +110,7 @@ hop.genop('setfield', [v_result, c_item, v_item]) return v_result - def variant_keys(self, hop, ENTRIES, v_entries, v_index): + def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index): KEY = ENTRIES.TO.OF.key c_key = hop.inputconst(lltype.Void, 'key') v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key], @@ -118,30 +119,30 @@ variant_reversed = variant_keys - def variant_values(self, hop, ENTRIES, v_entries, v_index): + def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index): VALUE = ENTRIES.TO.OF.value c_value = hop.inputconst(lltype.Void, 'value') v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value], resulttype=VALUE) return self.r_dict.recast_value(hop.llops, v_value) - def variant_items(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) + def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value)) - def variant_hashes(self, hop, ENTRIES, v_entries, v_index): + def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index): # there is not really a variant 'hashes', but this method is # convenient for the following variants - return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index) + return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict, v_index) - def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_hash)) - def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value, v_hash)) diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -538,6 +538,25 @@ r_dict = rtyper.getrepr(s) assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_r_dict_can_be_fast(self): + def myeq(n, m): + return n == m + def myhash(n): + return ~n + def f(): + d = self.new_r_dict(myeq, myhash, simple_hash_eq=True) + d[5] = 7 + d[12] = 19 + return d + + t = TranslationContext() + s = t.buildannotator().build_types(f, []) + rtyper = t.buildrtyper() + rtyper.specialize() + + r_dict = rtyper.getrepr(s) + assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_tuple_dict(self): def f(i): d = self.newdict() @@ -1000,8 +1019,8 @@ return {} @staticmethod - def new_r_dict(myeq, myhash): - return r_dict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return r_dict(myeq, myhash, force_non_null=force_non_null, simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -386,8 +386,10 @@ return OrderedDict() @staticmethod - def new_r_dict(myeq, myhash): - return objectmodel.r_ordereddict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return objectmodel.r_ordereddict( + myeq, myhash, force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): From pypy.commits at gmail.com Tue Dec 12 13:53:36 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 12 Dec 2017 10:53:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix translation Message-ID: <5a3025b0.47b0df0a.ade9e.c4ec@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93395:b128150d928f Date: 2017-12-12 19:51 +0100 http://bitbucket.org/pypy/pypy/changeset/b128150d928f/ Log: fix translation diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -356,7 +356,7 @@ builder.append(res) else: try: - chr = r_uint(int(s[pos:pos + digits], 16)) + chr = int(s[pos:pos + digits], 16) except ValueError: endinpos = pos while s[endinpos] in hexdigits: From pypy.commits at gmail.com Tue Dec 12 13:53:38 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 12 Dec 2017 10:53:38 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge heads Message-ID: <5a3025b2.85631c0a.e53cb.1bab@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93396:43471abf9b78 Date: 2017-12-12 19:52 +0100 http://bitbucket.org/pypy/pypy/changeset/43471abf9b78/ Log: merge heads diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -43,3 +43,4 @@ .. branch: unicode-utf8-re .. branch: utf8-io Utf8 handling for unicode + From pypy.commits at gmail.com Tue Dec 12 14:06:21 2017 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 12 Dec 2017 11:06:21 -0800 (PST) Subject: [pypy-commit] pypy default: because it's easy: add a json roundtrip hypothesis test Message-ID: <5a3028ad.4ce61c0a.5017a.19ac@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93397:1aa1bd29a7b8 Date: 2017-12-12 20:05 +0100 http://bitbucket.org/pypy/pypy/changeset/1aa1bd29a7b8/ Log: because it's easy: add a json roundtrip hypothesis test diff --git a/extra_tests/test_json.py b/extra_tests/test_json.py --- a/extra_tests/test_json.py +++ b/extra_tests/test_json.py @@ -1,5 +1,6 @@ import pytest import json +from hypothesis import given, strategies def is_(x, y): return type(x) is type(y) and x == y @@ -18,3 +19,15 @@ def test_issue2191(): assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') + +jsondata = strategies.recursive( + strategies.none() | + strategies.booleans() | + strategies.floats(allow_nan=False) | + strategies.text(), + lambda children: strategies.lists(children) | + strategies.dictionaries(strategies.text(), children)) + + at given(jsondata) +def test_roundtrip(d): + assert json.loads(json.dumps(d)) == d From pypy.commits at gmail.com Tue Dec 12 14:37:35 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 11:37:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: UnicodeBuilder should return unicode; fix rutf8.check_utf8() return value Message-ID: <5a302fff.13a0df0a.56c57.161e@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93398:a31a10da4e39 Date: 2017-12-12 19:36 +0000 http://bitbucket.org/pypy/pypy/changeset/a31a10da4e39/ Log: UnicodeBuilder should return unicode; fix rutf8.check_utf8() return value diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py --- a/pypy/module/__pypy__/interp_builders.py +++ b/pypy/module/__pypy__/interp_builders.py @@ -3,6 +3,7 @@ from pypy.interpreter.gateway import interp2app, unwrap_spec from pypy.interpreter.typedef import TypeDef from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rutf8 import StringBuilder, Utf8StringBuilder from pypy.objspace.std.unicodeobject import W_UnicodeObject from rpython.tool.sourcetools import func_with_new_name @@ -54,13 +55,13 @@ class W_UnicodeBuilder(W_Root): def __init__(self, space, size): if size < 0: - self.builder = StringBuilder() + self.builder = Utf8StringBuilder() else: - self.builder = StringBuilder(size) + self.builder = Utf8StringBuilder(size) @unwrap_spec(size=int) def descr__new__(space, w_subtype, size=-1): - return W_UnicodeBuilder(space, size) + return W_UnicodeBuilder(space, 3 * size) @unwrap_spec(s='utf8') def descr_append(self, space, s): @@ -76,7 +77,7 @@ self.builder.append_slice(w_unicode._utf8, byte_start, byte_end) def descr_build(self, space): - w_s = space.newtext(self.builder.build()) + w_s = space.newutf8(self.builder.build(), self.builder.get_length()) # after build(), we can continue to append more strings # to the same builder. This is supported since # 2ff5087aca28 in RPython. diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -9,9 +9,11 @@ b.append(u"1") s = b.build() assert s == u"abc1231" + assert type(s) is unicode assert b.build() == s b.append(u"123") assert b.build() == s + u"123" + assert type(b.build()) is unicode def test_preallocate(self): from __pypy__.builders import UnicodeBuilder @@ -20,6 +22,7 @@ b.append(u"123") s = b.build() assert s == u"abc123" + assert type(s) is unicode def test_append_slice(self): from __pypy__.builders import UnicodeBuilder @@ -28,8 +31,11 @@ raises(ValueError, b.append_slice, u"1", 2, 1) s = b.build() assert s == u"cde" + assert type(s) is unicode b.append_slice(u"abc", 1, 2) - assert b.build() == u"cdeb" + s = b.build() + assert s == u"cdeb" + assert type(s) is unicode def test_stringbuilder(self): from __pypy__.builders import StringBuilder diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -407,10 +407,10 @@ continue return ~(pos - 1) - assert pos == end - assert pos - continuation_bytes >= 0 - return pos - continuation_bytes + result = pos - continuation_bytes - start + assert result >= 0 + return result def has_surrogates(utf8): # XXX write a faster version maybe diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -29,7 +29,6 @@ else: assert not raised - at settings(max_examples=10000) @given(strategies.binary(), strategies.booleans()) @example('\xf1\x80\x80\x80', False) def test_check_utf8(s, allow_surrogates): @@ -39,6 +38,13 @@ def test_check_utf8_valid(u, allow_surrogates): _test_check_utf8(u.encode('utf-8'), allow_surrogates) + at given(strategies.binary(), strategies.text(), strategies.binary()) +def test_check_utf8_slice(a, b, c): + start = len(a) + b_utf8 = b.encode('utf-8') + end = start + len(b_utf8) + assert rutf8.check_utf8(a + b_utf8 + c, False, start, end) == len(b) + def _has_surrogates(s): for u in s.decode('utf8'): if 0xD800 <= ord(u) <= 0xDFFF: From pypy.commits at gmail.com Tue Dec 12 14:37:37 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 11:37:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge heads Message-ID: <5a303001.01a4df0a.ad75b.519e@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93399:dcf0b6da89e9 Date: 2017-12-12 19:37 +0000 http://bitbucket.org/pypy/pypy/changeset/dcf0b6da89e9/ Log: merge heads diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -43,3 +43,4 @@ .. branch: unicode-utf8-re .. branch: utf8-io Utf8 handling for unicode + diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -356,7 +356,7 @@ builder.append(res) else: try: - chr = r_uint(int(s[pos:pos + digits], 16)) + chr = int(s[pos:pos + digits], 16) except ValueError: endinpos = pos while s[endinpos] in hexdigits: From pypy.commits at gmail.com Tue Dec 12 17:01:40 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 14:01:40 -0800 (PST) Subject: [pypy-commit] pypy default: Move utf_32 implementation from runicode to unicodehelper Message-ID: <5a3051c4.5dbf1c0a.78241.381f@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93400:5d091c15169c Date: 2017-12-12 22:00 +0000 http://bitbucket.org/pypy/pypy/changeset/5d091c15169c/ Log: Move utf_32 implementation from runicode to unicodehelper diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,4 +1,7 @@ -from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8 +import pytest +import struct +from pypy.interpreter.unicodehelper import ( + encode_utf8, decode_utf8, unicode_encode_utf_32_be) class FakeSpace: pass @@ -24,3 +27,23 @@ assert map(ord, got) == [0xd800, 0xdc00] got = decode_utf8(space, "\xf0\x90\x80\x80") assert map(ord, got) == [0x10000] + + at pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"]) +def test_utf32_surrogates(unich): + assert (unicode_encode_utf_32_be(unich, 1, None) == + struct.pack('>i', ord(unich))) + with pytest.raises(UnicodeEncodeError): + unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False) + + def replace_with(ru, rs): + def errorhandler(errors, enc, msg, u, startingpos, endingpos): + if errors == 'strict': + raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg) + return ru, rs, endingpos + return unicode_encode_utf_32_be( + u"<%s>" % unich, 3, None, + errorhandler, allow_surrogates=False) + + assert replace_with(u'rep', None) == u''.encode('utf-32-be') + assert (replace_with(None, '\xca\xfe\xca\xfe') == + '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>') diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,7 +1,11 @@ +from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib import runicode +from rpython.rlib.runicode import ( + default_unicode_error_encode, default_unicode_error_decode, + MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR) from pypy.interpreter.error import OperationError -from rpython.rlib.objectmodel import specialize -from rpython.rlib import runicode -from pypy.module._codecs import interp_codecs @specialize.memo() def decode_error_handler(space): @@ -37,6 +41,7 @@ # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) result, consumed = runicode.str_decode_unicode_escape( @@ -71,3 +76,229 @@ uni, len(uni), "strict", errorhandler=None, allow_surrogates=True) + +# ____________________________________________________________ +# utf-32 + +def str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native") + return result, length + +def str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big") + return result, length + +def str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little") + return result, length + +def py3k_str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2) + return result, length + +def py3k_str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", 'utf-32-be') + return result, length + +def py3k_str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", 'utf-32-le') + return result, length + +BOM32_DIRECT = intmask(0x0000FEFF) +BOM32_REVERSE = intmask(0xFFFE0000) + +def str_decode_utf_32_helper(s, size, errors, final=True, + errorhandler=None, + byteorder="native", + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_decode + bo = 0 + + if BYTEORDER == 'little': + iorder = [0, 1, 2, 3] + else: + iorder = [3, 2, 1, 0] + + # Check for BOM marks (U+FEFF) in the input and adjust current + # byte order setting accordingly. In native mode, the leading BOM + # mark is skipped, in all other modes, it is copied to the output + # stream as-is (giving a ZWNBSP character). + pos = 0 + if byteorder == 'native': + if size >= 4: + bom = intmask( + (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) | + (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]])) + if BYTEORDER == 'little': + if bom == BOM32_DIRECT: + pos += 4 + bo = -1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = 1 + else: + if bom == BOM32_DIRECT: + pos += 4 + bo = 1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = -1 + elif byteorder == 'little': + bo = -1 + else: + bo = 1 + if size == 0: + return u'', 0, bo + if bo == -1: + # force little endian + iorder = [0, 1, 2, 3] + elif bo == 1: + # force big endian + iorder = [3, 2, 1, 0] + + result = UnicodeBuilder(size // 4) + + while pos < size: + # remaining bytes at the end? (size should be divisible by 4) + if len(s) - pos < 4: + if not final: + break + r, pos = errorhandler(errors, public_encoding_name, + "truncated data", + s, pos, len(s)) + result.append(r) + if len(s) - pos < 4: + break + continue + ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | + (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) + if ch >= 0x110000: + r, pos = errorhandler(errors, public_encoding_name, + "codepoint not in range(0x110000)", + s, pos, len(s)) + result.append(r) + continue + + if MAXUNICODE < 65536 and ch >= 0x10000: + ch -= 0x10000L + result.append(unichr(0xD800 + (ch >> 10))) + result.append(unichr(0xDC00 + (ch & 0x03FF))) + else: + result.append(UNICHR(ch)) + pos += 4 + return result.build(), pos, bo + +def _STORECHAR32(result, CH, byteorder): + c0 = chr(((CH) >> 24) & 0xff) + c1 = chr(((CH) >> 16) & 0xff) + c2 = chr(((CH) >> 8) & 0xff) + c3 = chr((CH) & 0xff) + if byteorder == 'little': + result.append(c3) + result.append(c2) + result.append(c1) + result.append(c0) + else: + result.append(c0) + result.append(c1) + result.append(c2) + result.append(c3) + +def unicode_encode_utf_32_helper(s, size, errors, + errorhandler=None, + allow_surrogates=True, + byteorder='little', + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_encode + if size == 0: + if byteorder == 'native': + result = StringBuilder(4) + _STORECHAR32(result, 0xFEFF, BYTEORDER) + return result.build() + return "" + + result = StringBuilder(size * 4 + 4) + if byteorder == 'native': + _STORECHAR32(result, 0xFEFF, BYTEORDER) + byteorder = BYTEORDER + + pos = 0 + while pos < size: + ch = ord(s[pos]) + pos += 1 + ch2 = 0 + if not allow_surrogates and 0xD800 <= ch < 0xE000: + ru, rs, pos = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + if rs is not None: + # py3k only + if len(rs) % 4 != 0: + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + result.append(rs) + continue + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR32(result, ord(ch), byteorder) + else: + errorhandler( + 'strict', public_encoding_name, + 'surrogates not allowed', s, pos - 1, pos) + continue + if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: + ch2 = ord(s[pos]) + if 0xDC00 <= ch2 < 0xE000: + ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 + pos += 1 + _STORECHAR32(result, ch, byteorder) + + return result.build() + +def unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native") + +def unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big") + +def unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little") + +def py3k_unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native", + 'utf-32-' + BYTEORDER2) + +def py3k_unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big", + 'utf-32-be') + +def py3k_unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little", + 'utf-32-le') diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,10 +1,12 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib import runicode from rpython.rlib.runicode import code_to_unichr, MAXUNICODE from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter import unicodehelper class VersionTag(object): @@ -365,19 +367,19 @@ raise oefmt(space.w_TypeError, "handler must be callable") # ____________________________________________________________ -# delegation to runicode - -from rpython.rlib import runicode +# delegation to runicode/unicodehelper def make_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) + try: + func = getattr(unicodehelper, rname) + except AttributeError: + func = getattr(runicode, rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) result = func(uni, len(uni), errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) wrap_encoder.func_name = rname @@ -385,7 +387,10 @@ def make_decoder_wrapper(name): rname = "str_decode_%s" % (name.replace("_decode", ""), ) - assert hasattr(runicode, rname) + try: + func = getattr(unicodehelper, rname) + except AttributeError: + func = getattr(runicode, rname) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): @@ -393,7 +398,6 @@ errors = 'strict' final = space.is_true(w_final) state = space.fromcache(CodecState) - func = getattr(runicode, rname) result, consumed = func(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newunicode(result), space.newint(consumed)]) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -12,6 +12,7 @@ from pypy.module.cpyext.bytesobject import PyString_Check from pypy.module.sys.interp_encoding import setdefaultencoding from pypy.module._codecs.interp_codecs import CodecState +from pypy.interpreter import unicodehelper from pypy.objspace.std import unicodeobject from rpython.rlib import rstring, runicode from rpython.tool.sourcetools import func_renamer @@ -620,7 +621,7 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( + result, length, byteorder = unicodehelper.str_decode_utf_32_helper( string, size, errors, True, # final ? false for multiple passes? None, # errorhandler diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -710,7 +710,7 @@ # ____________________________________________________________ -# utf-32 +# utf-32 (not used in PyPy any more) def str_decode_utf_32(s, size, errors, final=True, errorhandler=None): From pypy.commits at gmail.com Tue Dec 12 17:14:32 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 14:14:32 -0800 (PST) Subject: [pypy-commit] pypy default: Small cleanup Message-ID: <5a3054c8.59451c0a.963b3.4a19@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93401:0ad21cfe4469 Date: 2017-12-12 22:14 +0000 http://bitbucket.org/pypy/pypy/changeset/0ad21cfe4469/ Log: Small cleanup diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -369,12 +369,16 @@ # ____________________________________________________________ # delegation to runicode/unicodehelper +def _find_implementation(impl_name): + try: + func = getattr(unicodehelper, impl_name) + except AttributeError: + func = getattr(runicode, impl_name) + return func + def make_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - try: - func = getattr(unicodehelper, rname) - except AttributeError: - func = getattr(runicode, rname) + func = _find_implementation(rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: @@ -387,10 +391,7 @@ def make_decoder_wrapper(name): rname = "str_decode_%s" % (name.replace("_decode", ""), ) - try: - func = getattr(unicodehelper, rname) - except AttributeError: - func = getattr(runicode, rname) + func = _find_implementation(rname) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): From pypy.commits at gmail.com Tue Dec 12 18:37:46 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 12 Dec 2017 15:37:46 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a30684a.50b91c0a.48ae1.9ce0@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93402:da4b6cf751a5 Date: 2017-12-12 23:37 +0000 http://bitbucket.org/pypy/pypy/changeset/da4b6cf751a5/ Log: hg merge default diff --git a/pypy/module/test_lib_pypy/test_json_extra.py b/extra_tests/test_json.py rename from pypy/module/test_lib_pypy/test_json_extra.py rename to extra_tests/test_json.py --- a/pypy/module/test_lib_pypy/test_json_extra.py +++ b/extra_tests/test_json.py @@ -1,4 +1,6 @@ -import py, json +import pytest +import json +from hypothesis import given, strategies def is_(x, y): return type(x) is type(y) and x == y @@ -6,12 +8,26 @@ def test_no_ensure_ascii(): assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"') assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"') - e = py.test.raises(UnicodeDecodeError, json.dumps, - (u"\u1234", "\xc0"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") - e = py.test.raises(UnicodeDecodeError, json.dumps, - ("\xc0", u"\u1234"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps((u"\u1234", "\xc0"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps(("\xc0", u"\u1234"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") def test_issue2191(): assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') + +jsondata = strategies.recursive( + strategies.none() | + strategies.booleans() | + strategies.floats(allow_nan=False) | + strategies.text(), + lambda children: strategies.lists(children) | + strategies.dictionaries(strategies.text(), children)) + + at given(jsondata) +def test_roundtrip(d): + assert json.loads(json.dumps(d)) == d diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -149,7 +149,7 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X:: +On Mac OS X: Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -362,7 +362,11 @@ containers (as list items or in sets for example), the exact rule of equality used is "``if x is y or x == y``" (on both CPython and PyPy); as a consequence, because all ``nans`` are identical in PyPy, you -cannot have several of them in a set, unlike in CPython. (Issue `#1974`__) +cannot have several of them in a set, unlike in CPython. (Issue `#1974`__). +Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because +``cmp`` checks with ``is`` first whether the arguments are identical (there is +no good value to return from this call to ``cmp``, because ``cmp`` pretends +that there is a total order on floats, but that is wrong for NaNs). .. __: https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,31 +1,41 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - -.. branch: cppyy-packaging -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) - -.. branch: win32-vcvars - +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch + +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst b/pypy/doc/whatsnew-pypy2-5.6.0.rst --- a/pypy/doc/whatsnew-pypy2-5.6.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst @@ -107,7 +107,7 @@ .. branch: newinitwarn -Match CPython's stricter handling of __new/init__ arguments +Match CPython's stricter handling of ``__new__``/``__init__`` arguments .. branch: openssl-1.1 diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -11,7 +11,7 @@ To build pypy-c you need a working python environment, and a C compiler. It is possible to translate with a CPython 2.6 or later, but this is not -the preferred way, because it will take a lot longer to run � depending +the preferred way, because it will take a lot longer to run – depending on your architecture, between two and three times as long. So head to `our downloads`_ and get the latest stable version. @@ -103,6 +103,7 @@ must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the ``...\9.0\VC`` directory, and edit it, changing the lines that set ``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ set WindowsSdkDir=%~dp0\..\WinSDK\ diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1404,3 +1404,7 @@ exc = py.test.raises(SyntaxError, self.get_ast, input).value assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" " bytes in position 0-1: truncated \\xXX escape") + input = "u'\\x1'" + exc = py.test.raises(SyntaxError, self.get_ast, input).value + assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" + " bytes in position 0-2: truncated \\xXX escape") diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,5 +1,8 @@ import py -from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8 +import pytest +import struct +from pypy.interpreter.unicodehelper import ( + encode_utf8, decode_utf8, unicode_encode_utf_32_be) from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp @@ -67,3 +70,23 @@ assert map(ord, got) == [0xd800, 0xdc00] got = decode_utf8sp(space, "\xf0\x90\x80\x80") assert map(ord, got) == [0x10000] + + at pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"]) +def test_utf32_surrogates(unich): + assert (unicode_encode_utf_32_be(unich, 1, None) == + struct.pack('>i', ord(unich))) + with pytest.raises(UnicodeEncodeError): + unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False) + + def replace_with(ru, rs): + def errorhandler(errors, enc, msg, u, startingpos, endingpos): + if errors == 'strict': + raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg) + return ru, rs, endingpos + return unicode_encode_utf_32_be( + u"<%s>" % unich, 3, None, + errorhandler, allow_surrogates=False) + + assert replace_with(u'rep', None) == u''.encode('utf-32-be') + assert (replace_with(None, '\xca\xfe\xca\xfe') == + '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>') diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,8 +1,13 @@ import sys from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rstring import StringBuilder, UnicodeBuilder from rpython.rlib import runicode -from pypy.module._codecs import interp_codecs +from rpython.rlib.runicode import ( + default_unicode_error_encode, default_unicode_error_decode, + MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR) + _WIN32 = sys.platform == 'win32' _MACOSX = sys.platform == 'darwin' if _WIN32: @@ -40,6 +45,7 @@ # ____________________________________________________________ def fsdecode(space, w_string): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) if _WIN32: bytes = space.bytes_w(w_string) @@ -70,6 +76,7 @@ return space.newunicode(uni) def fsencode(space, w_uni): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) if _WIN32: uni = space.unicode_w(w_uni) @@ -107,6 +114,7 @@ # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) result, consumed = runicode.str_decode_unicode_escape( @@ -157,3 +165,196 @@ # encoding error, it should always be reversible, and the reverse is # encode_utf8sp(). return decode_utf8(space, string, allow_surrogates=True) + +# ____________________________________________________________ +# utf-32 + +def str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2) + return result, length + +def str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", 'utf-32-be') + return result, length + +def str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", 'utf-32-le') + return result, length + +BOM32_DIRECT = intmask(0x0000FEFF) +BOM32_REVERSE = intmask(0xFFFE0000) + +def str_decode_utf_32_helper(s, size, errors, final=True, + errorhandler=None, + byteorder="native", + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_decode + bo = 0 + + if BYTEORDER == 'little': + iorder = [0, 1, 2, 3] + else: + iorder = [3, 2, 1, 0] + + # Check for BOM marks (U+FEFF) in the input and adjust current + # byte order setting accordingly. In native mode, the leading BOM + # mark is skipped, in all other modes, it is copied to the output + # stream as-is (giving a ZWNBSP character). + pos = 0 + if byteorder == 'native': + if size >= 4: + bom = intmask( + (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) | + (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]])) + if BYTEORDER == 'little': + if bom == BOM32_DIRECT: + pos += 4 + bo = -1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = 1 + else: + if bom == BOM32_DIRECT: + pos += 4 + bo = 1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = -1 + elif byteorder == 'little': + bo = -1 + else: + bo = 1 + if size == 0: + return u'', 0, bo + if bo == -1: + # force little endian + iorder = [0, 1, 2, 3] + elif bo == 1: + # force big endian + iorder = [3, 2, 1, 0] + + result = UnicodeBuilder(size // 4) + + while pos < size: + # remaining bytes at the end? (size should be divisible by 4) + if len(s) - pos < 4: + if not final: + break + r, pos = errorhandler(errors, public_encoding_name, + "truncated data", + s, pos, len(s)) + result.append(r) + if len(s) - pos < 4: + break + continue + ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | + (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) + if ch >= 0x110000: + r, pos = errorhandler(errors, public_encoding_name, + "codepoint not in range(0x110000)", + s, pos, len(s)) + result.append(r) + continue + + if MAXUNICODE < 65536 and ch >= 0x10000: + ch -= 0x10000L + result.append(unichr(0xD800 + (ch >> 10))) + result.append(unichr(0xDC00 + (ch & 0x03FF))) + else: + result.append(UNICHR(ch)) + pos += 4 + return result.build(), pos, bo + +def _STORECHAR32(result, CH, byteorder): + c0 = chr(((CH) >> 24) & 0xff) + c1 = chr(((CH) >> 16) & 0xff) + c2 = chr(((CH) >> 8) & 0xff) + c3 = chr((CH) & 0xff) + if byteorder == 'little': + result.append(c3) + result.append(c2) + result.append(c1) + result.append(c0) + else: + result.append(c0) + result.append(c1) + result.append(c2) + result.append(c3) + +def unicode_encode_utf_32_helper(s, size, errors, + errorhandler=None, + allow_surrogates=True, + byteorder='little', + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_encode + if size == 0: + if byteorder == 'native': + result = StringBuilder(4) + _STORECHAR32(result, 0xFEFF, BYTEORDER) + return result.build() + return "" + + result = StringBuilder(size * 4 + 4) + if byteorder == 'native': + _STORECHAR32(result, 0xFEFF, BYTEORDER) + byteorder = BYTEORDER + + pos = 0 + while pos < size: + ch = ord(s[pos]) + pos += 1 + ch2 = 0 + if not allow_surrogates and 0xD800 <= ch < 0xE000: + ru, rs, pos = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + if rs is not None: + # py3k only + if len(rs) % 4 != 0: + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + result.append(rs) + continue + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR32(result, ord(ch), byteorder) + else: + errorhandler( + 'strict', public_encoding_name, + 'surrogates not allowed', s, pos - 1, pos) + continue + if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: + ch2 = ord(s[pos]) + if 0xDC00 <= ch2 < 0xE000: + ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 + pos += 1 + _STORECHAR32(result, ch, byteorder) + + return result.build() + +def unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native", + 'utf-32-' + BYTEORDER2) + +def unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big", + 'utf-32-be') + +def unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little", + 'utf-32-le') diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -2,12 +2,14 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import UnicodeBuilder, StringBuilder +from rpython.rlib import runicode from rpython.rlib.runicode import ( code_to_unichr, MAXUNICODE, raw_unicode_escape_helper_unicode) from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter import unicodehelper from pypy.module.unicodedata import unicodedb @@ -244,7 +246,8 @@ def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -301,7 +304,8 @@ def namereplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -611,48 +615,47 @@ return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors) # ____________________________________________________________ -# delegation to runicode +# delegation to runicode/unicodehelper -from rpython.rlib import runicode +def _find_implementation(impl_name): + try: + func = getattr(unicodehelper, impl_name) + except AttributeError: + if hasattr(runicode, 'py3k_' + impl_name): + impl_name = 'py3k_' + impl_name + func = getattr(runicode, impl_name) + return func def make_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) - if hasattr(runicode, 'py3k_' + rname): - rname = 'py3k_' + rname + func = _find_implementation(rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) result = func(uni, len(uni), errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) - wrap_encoder.func_name = rname + wrap_encoder.__name__ = func.__name__ globals()[name] = wrap_encoder def make_utf_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) - if hasattr(runicode, 'py3k_' + rname): - rname = 'py3k_' + rname + func = _find_implementation(rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) result = func(uni, len(uni), errors, state.encode_error_handler, allow_surrogates=False) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) - wrap_encoder.func_name = rname + wrap_encoder.__name__ = func.__name__ globals()[name] = wrap_encoder def make_decoder_wrapper(name): rname = "str_decode_%s" % (name.replace("_decode", ""), ) - assert hasattr(runicode, rname) - if hasattr(runicode, 'py3k_' + rname): - rname = 'py3k_' + rname + func = _find_implementation(rname) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): @@ -660,11 +663,10 @@ errors = 'strict' final = space.is_true(w_final) state = space.fromcache(CodecState) - func = getattr(runicode, rname) result, consumed = func(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newunicode(result), space.newint(consumed)]) - wrap_decoder.func_name = rname + wrap_decoder.__name__ = func.__name__ globals()[name] = wrap_decoder for encoder in [ diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -116,10 +116,10 @@ raises(TypeError, charmap_decode, b'\xff', "strict", {0xff: 0x110000}) assert (charmap_decode(b"\x00\x01\x02", "strict", {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) assert (charmap_decode(b"\x00\x01\x02", "strict", {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) def test_escape_decode_errors(self): from _codecs import escape_decode as decode @@ -590,6 +590,12 @@ def test_backslashreplace(self): import codecs + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected + assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == b'a\\xac\u1234\u20ac\u8000' assert b'\x00\x60\x80'.decode( 'ascii', 'backslashreplace') == u'\x00\x60\\x80' @@ -732,7 +738,7 @@ def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) - return ("\x01", 1) + return (u"\x01", 1) codecs.register_error("test.hui", handler_unicodeinternal) res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: @@ -939,3 +945,31 @@ assert len(w) == 1 assert str(w[0].message) == warning_msg assert w[0].category == DeprecationWarning + + def test_xmlcharrefreplace(self): + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace') + assert r == b'ሴ\x80⍅y\xab' + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace') + assert r == b'ሴ€⍅y«' + + def test_errorhandler_collection(self): + import _codecs + errors = [] + def record_error(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + errors.append(exc.object[exc.start:exc.end]) + return (u'', exc.end) + _codecs.register_error("test.record", record_error) + + sin = u"\xac\u1234\u1234\u20ac\u8000" + assert sin.encode("ascii", "test.record") == b"" + assert errors == [sin] + + errors = [] + assert sin.encode("latin-1", "test.record") == b"\xac" + assert errors == [u'\u1234\u1234\u20ac\u8000'] + + errors = [] + assert sin.encode("iso-8859-15", "test.record") == b"\xac\xa4" + assert errors == [u'\u1234\u1234', u'\u8000'] diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -429,6 +429,7 @@ if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" raise oefmt(space.w_TypeError, msg, w_decoded) + return w_decoded class W_TextIOWrapper(W_TextIOBase): @@ -997,12 +998,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self.decoded.set(space, w_decoded) + w_decoded = check_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded.text) < cookie.chars_to_skip: + if space.len_w(w_decoded) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") + self.decoded.set(space, w_decoded) self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -1015,11 +1017,9 @@ def tell_w(self, space): self._check_closed(space) - if not self.seekable: self._unsupportedoperation(space, "underlying stream is not seekable") - if not self.telling: raise oefmt(space.w_IOError, "telling position disabled by next() call") @@ -1089,14 +1089,14 @@ # We didn't get enough decoded data; signal EOF to get more. w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(""), - space.newint(1)) # final=1 + space.newint(1)) # final=1 check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.need_eof = 1 if chars_decoded < chars_to_skip: raise oefmt(space.w_IOError, - "can't reconstruct logical file position") + "can't reconstruct logical file position") finally: space.call_method(self.w_decoder, "setstate", w_saved_state) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -40,7 +40,8 @@ w_newline=space.newtext(mode)) lines = [] for limit in limits: - line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + w_line = w_textio.readline_w(space, space.newint(limit)) + line = space.unicode_w(w_line) if limit >= 0: assert len(line) <= limit if line: diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -76,7 +76,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -49,24 +49,24 @@ first = 0 for i in range(first, len(u)): - c = u[i] - if c <= u'~': - if c == u'"' or c == u'\\': + c = ord(u[i]) + if c <= ord('~'): + if c == ord('"') or c == ord('\\'): sb.append('\\') - elif c < u' ': - sb.append(ESCAPE_BEFORE_SPACE[ord(c)]) + elif c < ord(' '): + sb.append(ESCAPE_BEFORE_SPACE[c]) continue - sb.append(chr(ord(c))) + sb.append(chr(c)) else: - if c <= u'\uffff': + if c <= ord(u'\uffff'): sb.append('\\u') - sb.append(HEX[ord(c) >> 12]) - sb.append(HEX[(ord(c) >> 8) & 0x0f]) - sb.append(HEX[(ord(c) >> 4) & 0x0f]) - sb.append(HEX[ord(c) & 0x0f]) + sb.append(HEX[c >> 12]) + sb.append(HEX[(c >> 8) & 0x0f]) + sb.append(HEX[(c >> 4) & 0x0f]) + sb.append(HEX[c & 0x0f]) else: # surrogate pair - n = ord(c) - 0x10000 + n = c - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -128,7 +128,7 @@ intval: lltype.Signed """ self.error(w_ffitype, w_obj) - + def handle_unichar(self, w_ffitype, w_obj, intval): """ intval: lltype.Signed @@ -174,7 +174,7 @@ def handle_struct_rawffi(self, w_ffitype, w_structinstance): """ This method should be killed as soon as we remove support for _rawffi structures - + w_structinstance: W_StructureInstance """ self.error(w_ffitype, w_structinstance) @@ -349,7 +349,7 @@ def get_struct_rawffi(self, w_ffitype, w_structdescr): """ This should be killed as soon as we kill support for _rawffi structures - + Return type: lltype.Unsigned (the address of the structure) """ diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -580,11 +580,13 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[0]) + start, end = self.do_span(w_groupnum) + return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[1]) + start, end = self.do_span(w_groupnum) + return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -94,6 +94,14 @@ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_findall_unicode(self): + import re + assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000") + assert ["a", "u"] == re.findall("b(.)", "abalbus") + assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") + assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + assert [u"xyz"] == re.findall(u".*yz", u"xyz") + def test_finditer(self): import re it = re.finditer("b(.)", "brabbel") @@ -1046,3 +1054,14 @@ import re raises(ValueError, re.split, '', '') re.split("a*", '') # -> warning + +class AppTestUnicodeExtra: + def test_string_attribute(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.string == u"\u1233\u1234\u1235" + + def test_match_start(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.start() == 1 diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -15,6 +15,7 @@ from pypy.module.cpyext.bytesobject import PyBytes_Check, PyBytes_FromObject from pypy.module._codecs.interp_codecs import ( CodecState, latin_1_decode, utf_16_decode, utf_32_decode) +from pypy.interpreter import unicodehelper from pypy.objspace.std import unicodeobject from rpython.rlib import rstring, runicode from rpython.tool.sourcetools import func_renamer @@ -869,7 +870,7 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( + result, length, byteorder = unicodehelper.str_decode_utf_32_helper( string, size, errors, True, # final ? false for multiple passes? None, # errorhandler diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -245,7 +245,7 @@ LPDWORD = rwin32.LPDWORD _GetSystemTimeAdjustment = rwin32.winexternal( 'GetSystemTimeAdjustment', - [LPDWORD, LPDWORD, rwin32.LPBOOL], + [LPDWORD, LPDWORD, rwin32.LPBOOL], rffi.INT) def gettimeofday(space, w_info=None): with lltype.scoped_alloc(rwin32.FILETIME) as system_time: @@ -270,7 +270,7 @@ lltype.scoped_alloc(rwin32.LPBOOL.TO, 1) as is_time_adjustment_disabled: _GetSystemTimeAdjustment(time_adjustment, time_increment, is_time_adjustment_disabled) - + _setinfo(space, w_info, "GetSystemTimeAsFileTime()", time_increment[0] * 1e-7, False, True) return space.newfloat(tv_sec + tv_usec * 1e-6) @@ -303,7 +303,7 @@ widen(t.c_millitm) * 0.001) if w_info is not None: _setinfo(space, w_info, "ftime()", 1e-3, - False, True) + False, True) return space.newfloat(result) else: if w_info: @@ -955,7 +955,7 @@ [rffi.CArrayPtr(lltype.SignedLongLong)], rwin32.DWORD) QueryPerformanceFrequency = rwin32.winexternal( - 'QueryPerformanceFrequency', [rffi.CArrayPtr(lltype.SignedLongLong)], + 'QueryPerformanceFrequency', [rffi.CArrayPtr(lltype.SignedLongLong)], rffi.INT) def win_perf_counter(space, w_info=None): with lltype.scoped_alloc(rffi.CArray(rffi.lltype.SignedLongLong), 1) as a: diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -19,6 +19,8 @@ raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(ValueError, time.sleep, -1.0) + raises(ValueError, time.sleep, float('nan')) + raises(OverflowError, time.sleep, float('inf')) def test_clock(self): import time diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -268,10 +268,10 @@ result[0] = ch if not composed: # If decomposed normalization we are done - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) if j <= 1: - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) current = result[0] starter_pos = 0 @@ -319,7 +319,10 @@ result[starter_pos] = current - return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]])) + return self.build(space, result, stop=next_insert) + + def build(self, space, r, stop): + return space.newunicode(u''.join([unichr(i) for i in r[:stop]])) methods = {} diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -57,6 +57,11 @@ assert 'a' + 'b' == 'ab' raises(TypeError, operator.add, b'a', 'b') + def test_getitem(self): + assert u'abc'[2] == 'c' + raises(IndexError, u'abc'.__getitem__, 15) + assert u'g\u0105\u015b\u0107'[2] == u'\u015b' + def test_join(self): def check(a, b): assert a == b @@ -82,6 +87,8 @@ assert '\n\n'.splitlines() == ['', ''] assert 'a\nb\nc'.splitlines(1) == ['a\n', 'b\n', 'c'] assert '\na\nb\n'.splitlines(1) == ['\n', 'a\n', 'b\n'] + assert ((u'a' + b'\xc2\x85'.decode('utf8') + u'b\n').splitlines() == + ['a', 'b']) def test_zfill(self): assert '123'.zfill(2) == '123' @@ -128,55 +135,57 @@ raises(ValueError, 'abc'.split, '') raises(ValueError, 'abc'.split, '') assert ' a b c d'.split(None, 0) == ['a b c d'] + assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c'] def test_rsplit(self): - assert "".rsplit() == [] - assert " ".rsplit() == [] - assert "a".rsplit() == ['a'] - assert "a".rsplit("a", 1) == ['', ''] - assert " ".rsplit(" ", 1) == ['', ''] - assert "aa".rsplit("a", 2) == ['', '', ''] - assert " a ".rsplit() == ['a'] - assert "a b c".rsplit() == ['a','b','c'] - assert 'this is the rsplit function'.rsplit() == ['this', 'is', 'the', 'rsplit', 'function'] - assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd'] - assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd'] - assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd'] - assert 'a|b|c|d'.rsplit('|', 2) == ['a|b', 'c', 'd'] - assert 'a b c d'.rsplit(None, 1) == ['a b c', 'd'] - assert 'a b c d'.rsplit(None, 2) == ['a b', 'c', 'd'] - assert 'a b c d'.rsplit(None, 3) == ['a', 'b', 'c', 'd'] - assert 'a b c d'.rsplit(None, 4) == ['a', 'b', 'c', 'd'] - assert 'a b c d'.rsplit(None, 0) == ['a b c d'] - assert 'a b c d'.rsplit(None, 2) == ['a b', 'c', 'd'] - assert 'a b c d '.rsplit() == ['a', 'b', 'c', 'd'] - assert 'a//b//c//d'.rsplit('//') == ['a', 'b', 'c', 'd'] - assert 'endcase test'.rsplit('test') == ['endcase ', ''] - raises(ValueError, 'abc'.rsplit, '') - raises(ValueError, 'abc'.rsplit, '') - raises(ValueError, 'abc'.rsplit, '') - assert ' a b c '.rsplit(None, 0) == [' a b c'] - assert ''.rsplit('aaa') == [''] + assert u"".rsplit() == [] + assert u" ".rsplit() == [] + assert u"a".rsplit() == [u'a'] + assert u"a".rsplit(u"a", 1) == [u'', u''] + assert u" ".rsplit(u" ", 1) == [u'', u''] + assert u"aa".rsplit(u"a", 2) == [u'', u'', u''] + assert u" a ".rsplit() == [u'a'] + assert u"a b c".rsplit() == [u'a',u'b',u'c'] + assert u'this is the rsplit function'.rsplit() == [u'this', u'is', u'the', u'rsplit', u'function'] + assert u'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd'] + assert u'a|b|c|d'.rsplit('|') == [u'a', u'b', u'c', u'd'] + assert 'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd'] + assert u'a|b|c|d'.rsplit(u'|', 2) == [u'a|b', u'c', u'd'] + assert u'a b c d'.rsplit(None, 1) == [u'a b c', u'd'] + assert u'a b c d'.rsplit(None, 2) == [u'a b', u'c', u'd'] + assert u'a b c d'.rsplit(None, 3) == [u'a', u'b', u'c', u'd'] + assert u'a b c d'.rsplit(None, 4) == [u'a', u'b', u'c', u'd'] + assert u'a b c d'.rsplit(None, 0) == [u'a b c d'] + assert u'a b c d'.rsplit(None, 2) == [u'a b', u'c', u'd'] + assert u'a b c d '.rsplit() == [u'a', u'b', u'c', u'd'] + assert u'a//b//c//d'.rsplit(u'//') == [u'a', u'b', u'c', u'd'] + assert u'endcase test'.rsplit(u'test') == [u'endcase ', u''] + raises(ValueError, u'abc'.rsplit, u'') + raises(ValueError, u'abc'.rsplit, '') + raises(ValueError, 'abc'.rsplit, u'') + assert u' a b c '.rsplit(None, 0) == [u' a b c'] + assert u''.rsplit('aaa') == [u''] + assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c'] def test_center(self): - s="a b" - assert s.center(0) == "a b" - assert s.center(1) == "a b" - assert s.center(2) == "a b" - assert s.center(3) == "a b" - assert s.center(4) == "a b " - assert s.center(5) == " a b " - assert s.center(6) == " a b " - assert s.center(7) == " a b " - assert s.center(8) == " a b " - assert s.center(9) == " a b " - assert 'abc'.center(10) == ' abc ' - assert 'abc'.center(6) == ' abc ' - assert 'abc'.center(3) == 'abc' - assert 'abc'.center(2) == 'abc' - assert 'abc'.center(5, '*') == '*abc*' # Python 2.4 - assert 'abc'.center(5, '*') == '*abc*' # Python 2.4 - raises(TypeError, 'abc'.center, 4, 'cba') + s=u"a b" + assert s.center(0) == u"a b" + assert s.center(1) == u"a b" + assert s.center(2) == u"a b" + assert s.center(3) == u"a b" + assert s.center(4) == u"a b " + assert s.center(5) == u" a b " + assert s.center(6) == u" a b " + assert s.center(7) == u" a b " + assert s.center(8) == u" a b " + assert s.center(9) == u" a b " + assert u'abc'.center(10) == u' abc ' + assert u'abc'.center(6) == u' abc ' + assert u'abc'.center(3) == u'abc' + assert u'abc'.center(2) == u'abc' + assert u'abc'.center(5, u'*') == u'*abc*' # Python 2.4 + assert u'abc'.center(5, '*') == u'*abc*' # Python 2.4 + raises(TypeError, u'abc'.center, 4, u'cba') def test_title(self): assert "brown fox".title() == "Brown Fox" @@ -186,23 +195,25 @@ assert "bro!wn fox".title() == "Bro!Wn Fox" assert u'A\u03a3 \u1fa1xy'.title() == u'A\u03c2 \u1fa9xy' assert u'A\u03a3A'.title() == u'A\u03c3a' + assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" + assert u'\ud800'.title() == u'\ud800' def test_istitle(self): - assert "".istitle() == False - assert "!".istitle() == False - assert "!!".istitle() == False - assert "brown fox".istitle() == False - assert "!brown fox".istitle() == False - assert "bROWN fOX".istitle() == False - assert "Brown Fox".istitle() == True - assert "bro!wn fox".istitle() == False - assert "Bro!wn fox".istitle() == False - assert "!brown Fox".istitle() == False - assert "!Brown Fox".istitle() == True - assert "Brow&&&&N Fox".istitle() == True - assert "!Brow&&&&n Fox".istitle() == False - assert '\u1FFc'.istitle() - assert 'Greek \u1FFcitlecases ...'.istitle() + assert u"".istitle() == False + assert u"!".istitle() == False + assert u"!!".istitle() == False + assert u"brown fox".istitle() == False + assert u"!brown fox".istitle() == False + assert u"bROWN fOX".istitle() == False + assert u"Brown Fox".istitle() == True + assert u"bro!wn fox".istitle() == False + assert u"Bro!wn fox".istitle() == False + assert u"!brown Fox".istitle() == False + assert u"!Brown Fox".istitle() == True + assert u"Brow&&&&N Fox".istitle() == True + assert u"!Brow&&&&n Fox".istitle() == False + assert u'\u1FFc'.istitle() + assert u'Greek \u1FFcitlecases ...'.istitle() def test_islower_isupper_with_titlecase(self): # \u01c5 is a char which is neither lowercase nor uppercase, but @@ -220,24 +231,36 @@ assert "_!var".isidentifier() is False assert "3abc".isidentifier() is False + def test_lower_upper(self): + assert u'a'.lower() == u'a' + assert u'A'.lower() == u'a' + assert u'\u0105'.lower() == u'\u0105' + assert u'\u0104'.lower() == u'\u0105' + assert u'\ud800'.lower() == u'\ud800' + assert u'a'.upper() == u'A' + assert u'A'.upper() == u'A' + assert u'\u0105'.upper() == u'\u0104' + assert u'\u0104'.upper() == u'\u0104' + assert u'\ud800'.upper() == u'\ud800' + def test_capitalize(self): - assert "brown fox".capitalize() == "Brown fox" - assert ' hello '.capitalize() == ' hello ' - assert 'Hello '.capitalize() == 'Hello ' - assert 'hello '.capitalize() == 'Hello ' - assert 'aaaa'.capitalize() == 'Aaaa' - assert 'AaAa'.capitalize() == 'Aaaa' + assert u"brown fox".capitalize() == u"Brown fox" + assert u' hello '.capitalize() == u' hello ' + assert u'Hello '.capitalize() == u'Hello ' + assert u'hello '.capitalize() == u'Hello ' + assert u'aaaa'.capitalize() == u'Aaaa' + assert u'AaAa'.capitalize() == u'Aaaa' # check that titlecased chars are lowered correctly # \u1ffc is the titlecased char - assert ('\u1ff3\u1ff3\u1ffc\u1ffc'.capitalize() == - '\u03a9\u0399\u1ff3\u1ff3\u1ff3') + assert (u'\u1ff3\u1ff3\u1ffc\u1ffc'.capitalize() == + u'\u03a9\u0399\u1ff3\u1ff3\u1ff3') # check with cased non-letter chars - assert ('\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3'.capitalize() == - '\u24c5\u24e8\u24e3\u24d7\u24de\u24dd') - assert ('\u24df\u24e8\u24e3\u24d7\u24de\u24dd'.capitalize() == - '\u24c5\u24e8\u24e3\u24d7\u24de\u24dd') - assert '\u2160\u2161\u2162'.capitalize() == '\u2160\u2171\u2172' - assert '\u2170\u2171\u2172'.capitalize() == '\u2160\u2171\u2172' + assert (u'\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3'.capitalize() == + u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd') + assert (u'\u24df\u24e8\u24e3\u24d7\u24de\u24dd'.capitalize() == + u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd') + assert u'\u2160\u2161\u2162'.capitalize() == u'\u2160\u2171\u2172' + assert u'\u2170\u2171\u2172'.capitalize() == u'\u2160\u2171\u2172' # check with Ll chars with no upper - nothing changes here assert ('\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() == '\u019b\u1d00\u1d86\u0221\u1fb7') @@ -261,34 +284,36 @@ def test_isprintable_wide(self): assert '\U0001F46F'.isprintable() # Since unicode 6.0 assert not '\U000E0020'.isprintable() + assert u'\ud800'.capitalize() == u'\ud800' + assert u'xx\ud800'.capitalize() == u'Xx\ud800' def test_rjust(self): - s = "abc" + s = u"abc" assert s.rjust(2) == s assert s.rjust(3) == s - assert s.rjust(4) == " " + s - assert s.rjust(5) == " " + s - assert 'abc'.rjust(10) == ' abc' - assert 'abc'.rjust(6) == ' abc' - assert 'abc'.rjust(3) == 'abc' - assert 'abc'.rjust(2) == 'abc' - assert 'abc'.rjust(5, '*') == '**abc' # Python 2.4 - assert 'abc'.rjust(5, '*') == '**abc' # Python 2.4 - raises(TypeError, 'abc'.rjust, 5, 'xx') + assert s.rjust(4) == u" " + s + assert s.rjust(5) == u" " + s + assert u'abc'.rjust(10) == u' abc' + assert u'abc'.rjust(6) == u' abc' + assert u'abc'.rjust(3) == u'abc' + assert u'abc'.rjust(2) == u'abc' + assert u'abc'.rjust(5, u'*') == u'**abc' # Python 2.4 + assert u'abc'.rjust(5, '*') == u'**abc' # Python 2.4 + raises(TypeError, u'abc'.rjust, 5, u'xx') def test_ljust(self): - s = "abc" + s = u"abc" assert s.ljust(2) == s assert s.ljust(3) == s - assert s.ljust(4) == s + " " - assert s.ljust(5) == s + " " - assert 'abc'.ljust(10) == 'abc ' - assert 'abc'.ljust(6) == 'abc ' - assert 'abc'.ljust(3) == 'abc' - assert 'abc'.ljust(2) == 'abc' - assert 'abc'.ljust(5, '*') == 'abc**' # Python 2.4 - assert 'abc'.ljust(5, '*') == 'abc**' # Python 2.4 - raises(TypeError, 'abc'.ljust, 6, '') + assert s.ljust(4) == s + u" " + assert s.ljust(5) == s + u" " + assert u'abc'.ljust(10) == u'abc ' + assert u'abc'.ljust(6) == u'abc ' + assert u'abc'.ljust(3) == u'abc' + assert u'abc'.ljust(2) == u'abc' + assert u'abc'.ljust(5, u'*') == u'abc**' # Python 2.4 + assert u'abc'.ljust(5, '*') == u'abc**' # Python 2.4 + raises(TypeError, u'abc'.ljust, 6, u'') def test_replace(self): assert 'one!two!three!'.replace('!', '@', 1) == 'one at two!three!' @@ -300,6 +325,16 @@ assert 'one!two!three!'.replace('!', '@') == 'one at two@three@' assert 'one!two!three!'.replace('x', '@') == 'one!two!three!' assert 'one!two!three!'.replace('x', '@', 2) == 'one!two!three!' + assert u'\u1234'.replace(u'', '-') == u'-\u1234-' + assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678' + assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-' assert 'abc'.replace('', '-') == '-a-b-c-' assert 'abc'.replace('', '-', 3) == '-a-b-c' assert 'abc'.replace('', '-', 0) == 'abc' @@ -387,6 +422,9 @@ assert ''.startswith('a') is False assert 'x'.startswith('xx') is False assert 'y'.startswith('xx') is False + assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True + assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False + assert u'\u1234'.startswith(u'') is True def test_startswith_more(self): assert 'ab'.startswith('a', 0) is True @@ -533,7 +571,7 @@ raises(TypeError, 'hello'.translate) raises(TypeError, 'abababc'.translate, 'abc', 'xyz') - def test_unicode_form_encoded_object(self): + def test_unicode_from_encoded_object(self): assert str(b'x', 'utf-8') == 'x' assert str(b'x', 'utf-8', 'strict') == 'x' @@ -659,31 +697,31 @@ def test_partition(self): - assert ('this is the par', 'ti', 'tion method') == \ - 'this is the partition method'.partition('ti') + assert (u'this is the par', u'ti', u'tion method') == \ + u'this is the partition method'.partition(u'ti') # from raymond's original specification - S = 'http://www.python.org' - assert ('http', '://', 'www.python.org') == S.partition('://') - assert ('http://www.python.org', '', '') == S.partition('?') - assert ('', 'http://', 'www.python.org') == S.partition('http://') - assert ('http://www.python.', 'org', '') == S.partition('org') + S = u'http://www.python.org' + assert (u'http', u'://', u'www.python.org') == S.partition(u'://') + assert (u'http://www.python.org', u'', u'') == S.partition(u'?') + assert (u'', u'http://', u'www.python.org') == S.partition(u'http://') + assert (u'http://www.python.', u'org', u'') == S.partition(u'org') - raises(ValueError, S.partition, '') + raises(ValueError, S.partition, u'') raises(TypeError, S.partition, None) def test_rpartition(self): - assert ('this is the rparti', 'ti', 'on method') == \ - 'this is the rpartition method'.rpartition('ti') + assert (u'this is the rparti', u'ti', u'on method') == \ + u'this is the rpartition method'.rpartition(u'ti') # from raymond's original specification - S = 'http://www.python.org' - assert ('http', '://', 'www.python.org') == S.rpartition('://') - assert ('', '', 'http://www.python.org') == S.rpartition('?') - assert ('', 'http://', 'www.python.org') == S.rpartition('http://') - assert ('http://www.python.', 'org', '') == S.rpartition('org') + S = u'http://www.python.org' + assert (u'http', u'://', u'www.python.org') == S.rpartition(u'://') + assert (u'', u'', u'http://www.python.org') == S.rpartition(u'?') + assert (u'', u'http://', u'www.python.org') == S.rpartition(u'http://') + assert (u'http://www.python.', u'org', u'') == S.rpartition(u'org') - raises(ValueError, S.rpartition, '') + raises(ValueError, S.rpartition, u'') raises(TypeError, S.rpartition, None) def test_mul(self): @@ -706,6 +744,7 @@ def test_index(self): assert "rrarrrrrrrrra".index('a', 4, None) == 12 assert "rrarrrrrrrrra".index('a', None, 6) == 2 + assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2 def test_rindex(self): from sys import maxsize @@ -715,6 +754,7 @@ assert 'abcdefghiabc'.rindex('abc', 0, -1) == 0 assert 'abcdefghiabc'.rindex('abc', -4*maxsize, 4*maxsize) == 9 assert 'rrarrrrrrrrra'.rindex('a', 4, None) == 12 + assert u"\u1234\u5678".rindex(u'\u5678') == 1 raises(ValueError, 'abcdefghiabc'.rindex, 'hib') raises(ValueError, 'defghiabc'.rindex, 'def', 1) @@ -729,6 +769,7 @@ assert 'abcdefghiabc'.rfind('') == 12 assert 'abcdefghiabc'.rfind('abcd') == 0 assert 'abcdefghiabc'.rfind('abcz') == -1 + assert u"\u1234\u5678".rfind(u'\u5678') == 1 def test_rfind_corner_case(self): assert 'abc'.rfind('', 4) == -1 @@ -802,17 +843,31 @@ assert str(Y()).__class__ is X def test_getslice(self): - assert '123456'[1:5] == '2345' - s = "abc" - assert s[:] == "abc" - assert s[1:] == "bc" - assert s[:2] == "ab" - assert s[1:2] == "b" - assert s[-2:] == "bc" - assert s[:-1] == "ab" - assert s[-2:2] == "b" - assert s[1:-1] == "b" - assert s[-2:-1] == "b" + s = u"\u0105b\u0107" + assert s[:] == u"\u0105b\u0107" + assert s[1:] == u"b\u0107" + assert s[:2] == u"\u0105b" + assert s[1:2] == u"b" + assert s[-2:] == u"b\u0107" + assert s[:-1] == u"\u0105b" + assert s[-2:2] == u"b" + assert s[1:-1] == u"b" + assert s[-2:-1] == u"b" + + def test_getitem_slice(self): + assert u'123456'.__getitem__(slice(1, 5)) == u'2345' + s = u"\u0105b\u0107" + assert s[slice(3)] == u"\u0105b\u0107" + assert s[slice(1, 3)] == u"b\u0107" + assert s[slice(2)] == u"\u0105b" + assert s[slice(1, 2)] == u"b" + assert s[slice(-2, 3)] == u"b\u0107" + assert s[slice(-1)] == u"\u0105b" + assert s[slice(-2, 2)] == u"b" + assert s[slice(1, -1)] == u"b" + assert s[slice(-2, -1)] == u"b" + assert u"abcde"[::2] == u"ace" + assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd" def test_iter(self): foo = "\u1111\u2222\u3333" @@ -898,7 +953,7 @@ def test_formatting_unicode__str__2(self): class A: def __str__(self): - return 'baz' + return u'baz' class B: def __str__(self): @@ -913,12 +968,12 @@ # "bah" is all I can say class X(object): def __repr__(self): - return '\u1234' + return u'\u1234' '%s' % X() # class X(object): def __str__(self): - return '\u1234' + return u'\u1234' '%s' % X() def test_formatting_unicode__repr__(self): diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,22 +237,30 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) + + at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_simple_hash_eq) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_simple_hash_eq is None: + simple_hash_eq = False + else: + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - - at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py --- a/rpython/jit/metainterp/typesystem.py +++ b/rpython/jit/metainterp/typesystem.py @@ -106,11 +106,11 @@ # It is an r_dict on lltype. Two copies, to avoid conflicts with # the value type. Note that NULL is not allowed as a key. def new_ref_dict(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_2(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_3(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def cast_vtable_to_hashable(self, cpu, ptr): adr = llmemory.cast_ptr_to_adr(ptr) diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -288,6 +288,9 @@ def mark_dict_non_null(d): """ Mark dictionary as having non-null keys and values. A warning would be emitted (not an error!) in case annotation disagrees. + + This doesn't work for r_dicts. For them, pass + r_dict(..., force_non_null=True) to the constructor. """ assert isinstance(d, dict) return d diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -748,11 +748,19 @@ def _newdict(self): return {} - def __init__(self, key_eq, key_hash, force_non_null=False): + def __init__(self, key_eq, key_hash, force_non_null=False, simple_hash_eq=False): + """ force_non_null=True means that the key can never be None (even if + the annotator things it could be) + + simple_hash_eq=True means that the hash function is very fast, meaning it's + efficient enough that the dict does not have to store the hash per key. + It also implies that neither the hash nor the eq function will mutate + the dictionary. """ self._dict = self._newdict() self.key_eq = key_eq self.key_hash = key_hash self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def __getitem__(self, key): return self._dict[_r_dictkey(self, key)] diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -710,7 +710,7 @@ # ____________________________________________________________ -# utf-32 +# utf-32 (not used in PyPy any more) def str_decode_utf_32(s, size, errors, final=True, errorhandler=None): diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -330,6 +330,13 @@ res = self.interpret(g, [3]) assert res == 77 + def test_r_dict_fast_functions(self): + def fn(): + d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True) + return play_with_r_dict(d1) + res = self.interpret(fn, []) + assert res + def test_prepare_dict_update(self): def g(n): d = {} diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py --- a/rpython/rtyper/lltypesystem/rdict.py +++ b/rpython/rtyper/lltypesystem/rdict.py @@ -42,7 +42,8 @@ class DictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, fast_hash=False): + # fast_hash is ignored (only implemented in rordereddict.py) self.rtyper = rtyper self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -66,7 +66,7 @@ def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, - ll_eq_function=None, method_cache={}, + ll_eq_function=None, method_cache={}, simple_hash_eq=False, dummykeyobj=None, dummyvalueobj=None, rtyper=None): # get the actual DICT type. if DICT is None, it's created, otherwise # forward reference is becoming DICT @@ -114,11 +114,14 @@ # * the value entryfields.append(("value", DICTVALUE)) - if ll_fasthash_function is None: + if simple_hash_eq: + assert get_custom_eq_hash is not None + entrymeths['entry_hash'] = ll_hash_custom_fast + elif ll_fasthash_function is None: entryfields.append(("f_hash", lltype.Signed)) - entrymeths['hash'] = ll_hash_from_cache + entrymeths['entry_hash'] = ll_hash_from_cache else: - entrymeths['hash'] = ll_hash_recomputed + entrymeths['entry_hash'] = ll_hash_recomputed entrymeths['fasthashfn'] = ll_fasthash_function # Build the lltype data structures @@ -140,7 +143,7 @@ 'keyeq': ll_keyeq_custom, 'r_rdict_eqfn': r_rdict_eqfn, 'r_rdict_hashfn': r_rdict_hashfn, - 'paranoia': True, + 'paranoia': not simple_hash_eq, } else: # figure out which functions must be used to hash and compare @@ -167,13 +170,14 @@ class OrderedDictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, simple_hash_eq=False): #assert not force_non_null self.rtyper = rtyper self.finalized = False self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) self.custom_eq_hash = custom_eq_hash is not None + self.simple_hash_eq = simple_hash_eq if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup() assert callable(key_repr) self._key_repr_computer = key_repr @@ -211,6 +215,7 @@ self.r_rdict_eqfn, self.r_rdict_hashfn = ( self._custom_eq_hash_repr()) kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr + kwd['simple_hash_eq'] = self.simple_hash_eq else: kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function() kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function() @@ -600,15 +605,21 @@ dummy = ENTRIES.dummy_obj.ll_dummy_value entries[i].value = dummy - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_from_cache(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_from_cache(entries, d, i): return entries[i].f_hash - at signature(types.any(), types.int(), returns=types.any()) -def ll_hash_recomputed(entries, i): + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_recomputed(entries, d, i): ENTRIES = lltype.typeOf(entries).TO return ENTRIES.fasthashfn(entries[i].key) + at signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_custom_fast(entries, d, i): + DICT = lltype.typeOf(d).TO + key = entries[i].key + return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) + def ll_keyhash_custom(d, key): DICT = lltype.typeOf(d).TO return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) @@ -962,22 +973,22 @@ if fun == FUNC_BYTE: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE) i += 1 elif fun == FUNC_SHORT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT) i += 1 elif IS_64BIT and fun == FUNC_INT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT) i += 1 elif fun == FUNC_LONG: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG) i += 1 else: assert False @@ -1015,7 +1026,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1056,7 +1067,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1305,14 +1316,14 @@ def ll_dict_update(dic1, dic2): if dic1 == dic2: return - ll_ensure_indexes(dic2) # needed for entries.hash() below + ll_ensure_indexes(dic2) # needed for entries.entry_hash() below ll_prepare_dict_update(dic1, dic2.num_live_items) i = 0 while i < dic2.num_ever_used_items: entries = dic2.entries if entries.valid(i): entry = entries[i] - hash = entries.hash(i) + hash = entries.entry_hash(dic2, i) key = entry.key value = entry.value index = dic1.lookup_function(dic1, key, hash, FLAG_STORE) @@ -1413,7 +1424,7 @@ r = lltype.malloc(ELEM.TO) r.item0 = recast(ELEM.TO.item0, entry.key) r.item1 = recast(ELEM.TO.item1, entry.value) - _ll_dict_del(dic, dic.entries.hash(i), i) + _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i) return r def ll_dict_pop(dic, key): diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py --- a/rpython/rtyper/rbuiltin.py +++ b/rpython/rtyper/rbuiltin.py @@ -717,9 +717,9 @@ @typer_for(OrderedDict) @typer_for(objectmodel.r_dict) @typer_for(objectmodel.r_ordereddict) -def rtype_dict_constructor(hop, i_force_non_null=None): - # 'i_force_non_null' is ignored here; if it has any effect, it - # has already been applied to 'hop.r_result' +def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None): + # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have any + # effect, it has already been applied to 'hop.r_result' hop.exception_cannot_occur() r_dict = hop.r_result cDICT = hop.inputconst(lltype.Void, r_dict.DICT) diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -15,6 +15,7 @@ s_key = dictkey.s_value s_value = dictvalue.s_value force_non_null = self.dictdef.force_non_null + simple_hash_eq = self.dictdef.simple_hash_eq if dictkey.custom_eq_hash: custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn), rtyper.getrepr(dictkey.s_rdict_hashfn)) @@ -22,7 +23,7 @@ custom_eq_hash = None return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key), lambda: rtyper.getrepr(s_value), dictkey, dictvalue, - custom_eq_hash, force_non_null) + custom_eq_hash, force_non_null, simple_hash_eq) def rtyper_makekey(self): self.dictdef.dictkey .dont_change_any_more = True @@ -89,7 +90,7 @@ resulttype=ENTRIES) # call the correct variant_*() method method = getattr(self, 'variant_' + self.variant) - return method(hop, ENTRIES, v_entries, v_index) + return method(hop, ENTRIES, v_entries, v_dict, v_index) def get_tuple_result(self, hop, items_v): # this allocates the tuple for the result, directly in the function @@ -109,7 +110,7 @@ hop.genop('setfield', [v_result, c_item, v_item]) return v_result - def variant_keys(self, hop, ENTRIES, v_entries, v_index): + def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index): KEY = ENTRIES.TO.OF.key c_key = hop.inputconst(lltype.Void, 'key') v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key], @@ -118,30 +119,30 @@ variant_reversed = variant_keys - def variant_values(self, hop, ENTRIES, v_entries, v_index): + def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index): VALUE = ENTRIES.TO.OF.value c_value = hop.inputconst(lltype.Void, 'value') v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value], resulttype=VALUE) return self.r_dict.recast_value(hop.llops, v_value) - def variant_items(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) + def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value)) - def variant_hashes(self, hop, ENTRIES, v_entries, v_index): + def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index): # there is not really a variant 'hashes', but this method is # convenient for the following variants - return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index) + return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict, v_index) - def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_hash)) - def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value, v_hash)) diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -538,6 +538,25 @@ r_dict = rtyper.getrepr(s) assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_r_dict_can_be_fast(self): + def myeq(n, m): + return n == m + def myhash(n): + return ~n + def f(): + d = self.new_r_dict(myeq, myhash, simple_hash_eq=True) + d[5] = 7 + d[12] = 19 + return d + + t = TranslationContext() + s = t.buildannotator().build_types(f, []) + rtyper = t.buildrtyper() + rtyper.specialize() + + r_dict = rtyper.getrepr(s) + assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_tuple_dict(self): def f(i): d = self.newdict() @@ -1000,8 +1019,8 @@ return {} @staticmethod - def new_r_dict(myeq, myhash): - return r_dict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return r_dict(myeq, myhash, force_non_null=force_non_null, simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -386,8 +386,10 @@ return OrderedDict() @staticmethod - def new_r_dict(myeq, myhash): - return objectmodel.r_ordereddict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return objectmodel.r_ordereddict( + myeq, myhash, force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): From pypy.commits at gmail.com Wed Dec 13 00:10:56 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 12 Dec 2017 21:10:56 -0800 (PST) Subject: [pypy-commit] buildbot default: onlyIfChanged=py3.5 and HgPoller(..., branch='default', ...) doesn't work Message-ID: <5a30b660.90bf1c0a.d3f62.741f@mx.google.com> Author: Matti Picus Branch: Changeset: r1049:ad69e6968efe Date: 2017-12-13 07:10 +0200 http://bitbucket.org/pypy/buildbot/changeset/ad69e6968efe/ Log: onlyIfChanged=py3.5 and HgPoller(..., branch='default', ...) doesn't work diff --git a/bot2/pypybuildbot/arm_master.py b/bot2/pypybuildbot/arm_master.py --- a/bot2/pypybuildbot/arm_master.py +++ b/bot2/pypybuildbot/arm_master.py @@ -137,7 +137,7 @@ BUILDJITLINUXARM, # on hhu-cross-armel, uses 1 core BUILDJITLINUXARMHF_RASPBIAN, # on hhu-cross-raspbianhf, uses 1 core ], branch="py3.5", hour=3, minute=0, - onlyIfChanged=True, + #onlyIfChanged=True, ), Nightly("nightly-arm-0-01", [ diff --git a/bot2/pypybuildbot/master.py b/bot2/pypybuildbot/master.py --- a/bot2/pypybuildbot/master.py +++ b/bot2/pypybuildbot/master.py @@ -343,7 +343,7 @@ JITMACOSX64, # on xerxes JITWIN32, # on allegro_win32, SalsaSalsa ], branch="py3.5", hour=3, minute=0, - onlyIfChanged=True, + #onlyIfChanged=True, ), # S390X vm (ibm-research) From pypy.commits at gmail.com Wed Dec 13 00:20:10 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 12 Dec 2017 21:20:10 -0800 (PST) Subject: [pypy-commit] pypy default: remove raring from release targets Message-ID: <5a30b88a.3bb0df0a.f6aff.3f21@mx.google.com> Author: Matti Picus Branch: Changeset: r93403:cf4656b0419f Date: 2017-12-13 07:19 +0200 http://bitbucket.org/pypy/pypy/changeset/cf4656b0419f/ Log: remove raring from release targets diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -62,7 +62,7 @@ * go to pypy/tool/release and run ``force-builds.py `` The following JIT binaries should be built, however, we need more buildbots - windows, linux-32, linux-64, osx64, armhf-raring, armhf-raspberrian, armel, + windows, linux-32, linux-64, osx64, armhf-raspberrian, armel, freebsd64 * wait for builds to complete, make sure there are no failures diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py --- a/pypy/tool/release/force-builds.py +++ b/pypy/tool/release/force-builds.py @@ -29,7 +29,6 @@ 'pypy-c-jit-macosx-x86-64', 'pypy-c-jit-win-x86-32', 'pypy-c-jit-linux-s390x', - 'build-pypy-c-jit-linux-armhf-raring', 'build-pypy-c-jit-linux-armhf-raspbian', 'build-pypy-c-jit-linux-armel', ] diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh --- a/pypy/tool/release/repackage.sh +++ b/pypy/tool/release/repackage.sh @@ -23,7 +23,7 @@ # Download latest builds from the buildmaster, rename the top # level directory, and repackage ready to be uploaded to bitbucket -for plat in linux linux64 linux-armhf-raspbian linux-armhf-raring linux-armel osx64 s390x +for plat in linux linux64 linux-armhf-raspbian linux-armel osx64 s390x do echo downloading package for $plat if wget -q --show-progress http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.tar.bz2 From pypy.commits at gmail.com Wed Dec 13 05:18:30 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 13 Dec 2017 02:18:30 -0800 (PST) Subject: [pypy-commit] pypy default: remove old unused dict Message-ID: <5a30fe76.238edf0a.53e66.e804@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93404:60d1d4bb6379 Date: 2017-12-13 11:17 +0100 http://bitbucket.org/pypy/pypy/changeset/60d1d4bb6379/ Log: remove old unused dict diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py --- a/rpython/jit/metainterp/optimizeopt/optimizer.py +++ b/rpython/jit/metainterp/optimizeopt/optimizer.py @@ -273,7 +273,6 @@ self.jitdriver_sd = jitdriver_sd self.cpu = metainterp_sd.cpu self.interned_refs = self.cpu.ts.new_ref_dict() - self.interned_ints = {} self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd) self.pendingfields = None # set temporarily to a list, normally by # heap.py, as we're about to generate a guard From pypy.commits at gmail.com Wed Dec 13 05:57:38 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 13 Dec 2017 02:57:38 -0800 (PST) Subject: [pypy-commit] pypy default: fix typo Message-ID: <5a3107a2.cc5e1c0a.64e4a.e295@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93405:36428ed768d3 Date: 2017-12-13 11:57 +0100 http://bitbucket.org/pypy/pypy/changeset/36428ed768d3/ Log: fix typo diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,6 +36,6 @@ .. branch: win32-vcvars -.. branch rdict-fast-hash +.. branch: rdict-fast-hash Make it possible to declare that the hash function of an r_dict is fast in RPython. From pypy.commits at gmail.com Wed Dec 13 11:42:17 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 13 Dec 2017 08:42:17 -0800 (PST) Subject: [pypy-commit] buildbot default: fix for py3.5 (death by a thousand paper cuts) Message-ID: <5a315869.21b9df0a.689f5.a55f@mx.google.com> Author: Matti Picus Branch: Changeset: r1050:527820e0350d Date: 2017-12-13 18:41 +0200 http://bitbucket.org/pypy/buildbot/changeset/527820e0350d/ Log: fix for py3.5 (death by a thousand paper cuts) diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -735,7 +735,7 @@ haltOnFailure=True, workdir='.')) # copy libpypy-c.so to the expected location within the pypy source checkout, if available - command = 'if [ -e pypy-c/bin/libpypy-c.so ]; then cp -v pypy-c/bin/libpypy-c.so build/pypy/goal; fi;' + command = 'cp -v pypy-c/bin/libpypy*-c.so build/pypy/goal/ || true' self.addStep(ShellCmd( description="copy libpypy-c.so", command=command, From pypy.commits at gmail.com Wed Dec 13 16:37:38 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 13 Dec 2017 13:37:38 -0800 (PST) Subject: [pypy-commit] pypy regalloc-playground: some more calls supported Message-ID: <5a319da2.90a9df0a.6a4d7.9487@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: regalloc-playground Changeset: r93406:5d41d2ca0275 Date: 2017-12-13 22:37 +0100 http://bitbucket.org/pypy/pypy/changeset/5d41d2ca0275/ Log: some more calls supported diff --git a/rpython/jit/backend/x86/reghint.py b/rpython/jit/backend/x86/reghint.py --- a/rpython/jit/backend/x86/reghint.py +++ b/rpython/jit/backend/x86/reghint.py @@ -148,6 +148,20 @@ consider_call_f = _consider_real_call consider_call_n = _consider_real_call + def _consider_call_may_force(self, op, position): + self._consider_call(op, position, guard_not_forced=True) + consider_call_may_force_i = _consider_call_may_force + consider_call_may_force_r = _consider_call_may_force + consider_call_may_force_f = _consider_call_may_force + consider_call_may_force_n = _consider_call_may_force + + def _consider_call_release_gil(self, op, position): + # [Const(save_err), func_addr, args...] + self._consider_call(op, position, guard_not_forced=True, first_arg_index=2) + consider_call_release_gil_i = _consider_call_release_gil + consider_call_release_gil_f = _consider_call_release_gil + consider_call_release_gil_n = _consider_call_release_gil + oplist = [X86RegisterHints.not_implemented_op] * rop._LAST for name, value in X86RegisterHints.__dict__.iteritems(): From pypy.commits at gmail.com Wed Dec 13 16:51:51 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 13 Dec 2017 13:51:51 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Port extra_tests/test_json.py to py3 Message-ID: <5a31a0f7.e1acdf0a.8a4a4.0082@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93407:3c274e0f1720 Date: 2017-12-13 21:51 +0000 http://bitbucket.org/pypy/pypy/changeset/3c274e0f1720/ Log: Port extra_tests/test_json.py to py3 diff --git a/extra_tests/test_json.py b/extra_tests/test_json.py --- a/extra_tests/test_json.py +++ b/extra_tests/test_json.py @@ -7,15 +7,11 @@ def test_no_ensure_ascii(): assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"') - assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"') - with pytest.raises(UnicodeDecodeError) as excinfo: - json.dumps((u"\u1234", "\xc0"), ensure_ascii=False) - assert str(excinfo.value).startswith( - "'ascii' codec can't decode byte 0xc0 ") - with pytest.raises(UnicodeDecodeError) as excinfo: - json.dumps(("\xc0", u"\u1234"), ensure_ascii=False) - assert str(excinfo.value).startswith( - "'ascii' codec can't decode byte 0xc0 ") + assert is_(json.dumps(u"\xc0", ensure_ascii=False), u'"\xc0"') + with pytest.raises(TypeError): + json.dumps((u"\u1234", b"x"), ensure_ascii=False) + with pytest.raises(TypeError): + json.dumps((b"x", u"\u1234"), ensure_ascii=False) def test_issue2191(): assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') From pypy.commits at gmail.com Wed Dec 13 17:29:21 2017 From: pypy.commits at gmail.com (amauryfa) Date: Wed, 13 Dec 2017 14:29:21 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Fix struct test. Message-ID: <5a31a9c1.3bb0df0a.f6aff.2923@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93408:24d2c052c1dd Date: 2017-12-12 23:21 +0100 http://bitbucket.org/pypy/pypy/changeset/24d2c052c1dd/ Log: Fix struct test. diff --git a/pypy/module/struct/test/test_struct.py b/pypy/module/struct/test/test_struct.py --- a/pypy/module/struct/test/test_struct.py +++ b/pypy/module/struct/test/test_struct.py @@ -49,7 +49,7 @@ assert calcsize('=Q') == 8 assert calcsize('d') == 8 - assert calcsize(' Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93409:8f8a8d212853 Date: 2017-12-13 10:04 +0100 http://bitbucket.org/pypy/pypy/changeset/8f8a8d212853/ Log: The py3k version of the utf32 decoder should not allow lone surrogates. diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -489,21 +489,21 @@ return result, length def py3k_str_decode_utf_16(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "native", 'utf-16-' + BYTEORDER2) return result, length def py3k_str_decode_utf_16_be(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "big", 'utf-16-be') return result, length def py3k_str_decode_utf_16_le(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "little", 'utf-16-le') @@ -714,41 +714,41 @@ def str_decode_utf_32(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "native") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native") return result, length def str_decode_utf_32_be(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "big") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big") return result, length def str_decode_utf_32_le(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "little") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little") return result, length def py3k_str_decode_utf_32(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "native", - 'utf-32-' + BYTEORDER2) + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", + 'utf-32-' + BYTEORDER2, allow_surrogates=False) return result, length def py3k_str_decode_utf_32_be(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "big", - 'utf-32-be') + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", + 'utf-32-be', allow_surrogates=False) return result, length def py3k_str_decode_utf_32_le(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "little", - 'utf-32-le') + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", + 'utf-32-le', allow_surrogates=False) return result, length BOM32_DIRECT = intmask(0x0000FEFF) @@ -757,7 +757,8 @@ def str_decode_utf_32_helper(s, size, errors, final=True, errorhandler=None, byteorder="native", - public_encoding_name='utf32'): + public_encoding_name='utf32', + allow_surrogates=True): if errorhandler is None: errorhandler = default_unicode_error_decode bo = 0 @@ -821,7 +822,13 @@ continue ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) - if ch >= 0x110000: + if not allow_surrogates and 0xD800 <= ch <= 0xDFFFF: + r, pos = errorhandler(errors, public_encoding_name, + "code point in surrogate code point " + "range(0xd800, 0xe000)", + s, pos, len(s)) + result.append(r) + elif ch >= 0x110000: r, pos = errorhandler(errors, public_encoding_name, "codepoint not in range(0x110000)", s, pos, len(s)) diff --git a/rpython/rlib/test/test_runicode.py b/rpython/rlib/test/test_runicode.py --- a/rpython/rlib/test/test_runicode.py +++ b/rpython/rlib/test/test_runicode.py @@ -284,6 +284,11 @@ errorhandler, allow_surrogates=False) assert replace_with(u'rep', None) == u''.encode('utf-32-be') assert replace_with(None, '\xca\xfe\xca\xfe') == '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>' + # + assert runicode.str_decode_utf_32_be( + b"\x00\x00\xdc\x80", 4, None) == (u'\udc80', 4) + py.test.raises(UnicodeDecodeError, runicode.py3k_str_decode_utf_32_be, + b"\x00\x00\xdc\x80", 4, None) def test_utf7_bugs(self): u = u'A\u2262\u0391.' From pypy.commits at gmail.com Wed Dec 13 17:29:25 2017 From: pypy.commits at gmail.com (amauryfa) Date: Wed, 13 Dec 2017 14:29:25 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Applevel test for the utf32 surrogates. Message-ID: <5a31a9c5.16981c0a.e4231.957a@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93410:c99a2002f6fc Date: 2017-12-13 10:05 +0100 http://bitbucket.org/pypy/pypy/changeset/c99a2002f6fc/ Log: Applevel test for the utf32 surrogates. diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -772,6 +772,18 @@ '[]'.encode(encoding)) assert (u'[\udc80]'.encode(encoding, "replace") == '[?]'.encode(encoding)) + for encoding, ill_surrogate in [('utf-8', b'\xed\xb2\x80'), + ('utf-16-le', b'\x80\xdc'), + ('utf-16-be', b'\xdc\x80'), + ('utf-32-le', b'\x80\xdc\x00\x00'), + ('utf-32-be', b'\x00\x00\xdc\x80')]: + print(encoding) + before, after = "[", "]" + before_sequence = before.encode(encoding) + after_sequence = after.encode(encoding) + test_string = before + "\uDC80" + after + test_sequence = before_sequence + ill_surrogate + after_sequence + raises(UnicodeDecodeError, test_sequence.decode, encoding) def test_charmap_encode(self): assert 'xxx'.encode('charmap') == b'xxx' From pypy.commits at gmail.com Wed Dec 13 17:29:27 2017 From: pypy.commits at gmail.com (amauryfa) Date: Wed, 13 Dec 2017 14:29:27 -0800 (PST) Subject: [pypy-commit] pypy default: The py3k version of the utf32 decoder should not allow lone surrogates. Message-ID: <5a31a9c7.94ad1c0a.494c1.9f67@mx.google.com> Author: Amaury Forgeot d'Arc Branch: Changeset: r93411:33178f62171f Date: 2017-12-13 10:04 +0100 http://bitbucket.org/pypy/pypy/changeset/33178f62171f/ Log: The py3k version of the utf32 decoder should not allow lone surrogates. diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -489,21 +489,21 @@ return result, length def py3k_str_decode_utf_16(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "native", 'utf-16-' + BYTEORDER2) return result, length def py3k_str_decode_utf_16_be(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "big", 'utf-16-be') return result, length def py3k_str_decode_utf_16_le(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "little", 'utf-16-le') @@ -714,41 +714,41 @@ def str_decode_utf_32(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "native") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native") return result, length def str_decode_utf_32_be(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "big") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big") return result, length def str_decode_utf_32_le(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "little") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little") return result, length def py3k_str_decode_utf_32(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "native", - 'utf-32-' + BYTEORDER2) + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", + 'utf-32-' + BYTEORDER2, allow_surrogates=False) return result, length def py3k_str_decode_utf_32_be(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "big", - 'utf-32-be') + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", + 'utf-32-be', allow_surrogates=False) return result, length def py3k_str_decode_utf_32_le(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "little", - 'utf-32-le') + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", + 'utf-32-le', allow_surrogates=False) return result, length BOM32_DIRECT = intmask(0x0000FEFF) @@ -757,7 +757,8 @@ def str_decode_utf_32_helper(s, size, errors, final=True, errorhandler=None, byteorder="native", - public_encoding_name='utf32'): + public_encoding_name='utf32', + allow_surrogates=True): if errorhandler is None: errorhandler = default_unicode_error_decode bo = 0 @@ -821,7 +822,13 @@ continue ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) - if ch >= 0x110000: + if not allow_surrogates and 0xD800 <= ch <= 0xDFFFF: + r, pos = errorhandler(errors, public_encoding_name, + "code point in surrogate code point " + "range(0xd800, 0xe000)", + s, pos, len(s)) + result.append(r) + elif ch >= 0x110000: r, pos = errorhandler(errors, public_encoding_name, "codepoint not in range(0x110000)", s, pos, len(s)) diff --git a/rpython/rlib/test/test_runicode.py b/rpython/rlib/test/test_runicode.py --- a/rpython/rlib/test/test_runicode.py +++ b/rpython/rlib/test/test_runicode.py @@ -284,6 +284,11 @@ errorhandler, allow_surrogates=False) assert replace_with(u'rep', None) == u''.encode('utf-32-be') assert replace_with(None, '\xca\xfe\xca\xfe') == '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>' + # + assert runicode.str_decode_utf_32_be( + b"\x00\x00\xdc\x80", 4, None) == (u'\udc80', 4) + py.test.raises(UnicodeDecodeError, runicode.py3k_str_decode_utf_32_be, + b"\x00\x00\xdc\x80", 4, None) def test_utf7_bugs(self): u = u'A\u2262\u0391.' From pypy.commits at gmail.com Wed Dec 13 17:29:29 2017 From: pypy.commits at gmail.com (amauryfa) Date: Wed, 13 Dec 2017 14:29:29 -0800 (PST) Subject: [pypy-commit] pypy default: Add support for half floats in the RPython rstruct module. Message-ID: <5a31a9c9.28afdf0a.028a.7035@mx.google.com> Author: Amaury Forgeot d'Arc Branch: Changeset: r93412:ca5586010ac3 Date: 2017-12-11 20:29 +0100 http://bitbucket.org/pypy/pypy/changeset/ca5586010ac3/ Log: Add support for half floats in the RPython rstruct module. diff --git a/rpython/rlib/rstruct/standardfmttable.py b/rpython/rlib/rstruct/standardfmttable.py --- a/rpython/rlib/rstruct/standardfmttable.py +++ b/rpython/rlib/rstruct/standardfmttable.py @@ -105,6 +105,18 @@ _pack_string(fmtiter, string, count-1) +def pack_halffloat(fmtiter): + size = 2 + fl = fmtiter.accept_float_arg() + try: + result = ieee.pack_float(fmtiter.wbuf, fmtiter.pos, + fl, size, fmtiter.bigendian) + except OverflowError: + raise StructOverflowError("float too large for format 'e'") + else: + fmtiter.advance(size) + return result + def make_float_packer(TYPE): size = rffi.sizeof(TYPE) def packer(fmtiter): @@ -247,6 +259,11 @@ end = count fmtiter.appendobj(data[1:end]) + at specialize.argtype(0) +def unpack_halffloat(fmtiter): + data = fmtiter.read(2) + fmtiter.appendobj(ieee.unpack_float(data, fmtiter.bigendian)) + def make_ieee_unpacker(TYPE): @specialize.argtype(0) def unpack_ieee(fmtiter): @@ -374,6 +391,8 @@ 'needcount' : True }, 'p':{ 'size' : 1, 'pack' : pack_pascal, 'unpack' : unpack_pascal, 'needcount' : True }, + 'e':{ 'size' : 2, 'pack' : pack_halffloat, + 'unpack' : unpack_halffloat}, 'f':{ 'size' : 4, 'pack' : make_float_packer(rffi.FLOAT), 'unpack' : unpack_float}, 'd':{ 'size' : 8, 'pack' : make_float_packer(rffi.DOUBLE), diff --git a/rpython/rlib/rstruct/test/test_pack.py b/rpython/rlib/rstruct/test/test_pack.py --- a/rpython/rlib/rstruct/test/test_pack.py +++ b/rpython/rlib/rstruct/test/test_pack.py @@ -138,6 +138,19 @@ self.check('f', 123.456) self.check('d', 123.456789) + def test_pack_halffloat(self): + if self.fmttable is nativefmttable.native_fmttable: + # Host Python cannot handle half floats. + return + size = 2 + wbuf = MutableStringBuffer(size) + self.mypack_into('e', wbuf, 6.5e+04) + got = wbuf.finish() + if self.bigendian: + assert got == b'\x7b\xef' + else: + assert got == b'\xef\x7b' + def test_float_overflow(self): if self.fmt_prefix == '@': # native packing, no overflow diff --git a/rpython/rlib/rstruct/test/test_runpack.py b/rpython/rlib/rstruct/test/test_runpack.py --- a/rpython/rlib/rstruct/test/test_runpack.py +++ b/rpython/rlib/rstruct/test/test_runpack.py @@ -78,6 +78,10 @@ assert f != 12.34 # precision lost assert abs(f - 12.34) < 1E-6 + def test_unpack_halffloat(self): + assert runpack(">e", b"\x7b\xef") == 64992.0 + assert runpack(" Author: Amaury Forgeot d'Arc Branch: py3.5 Changeset: r93413:b0267eee69d8 Date: 2017-12-13 10:04 +0100 http://bitbucket.org/pypy/pypy/changeset/b0267eee69d8/ Log: The py3k version of the utf32 decoder should not allow lone surrogates. diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -489,21 +489,21 @@ return result, length def py3k_str_decode_utf_16(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "native", 'utf-16-' + BYTEORDER2) return result, length def py3k_str_decode_utf_16_be(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "big", 'utf-16-be') return result, length def py3k_str_decode_utf_16_le(s, size, errors, final=True, - errorhandler=None): + errorhandler=None): result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, errorhandler, "little", 'utf-16-le') @@ -714,41 +714,41 @@ def str_decode_utf_32(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "native") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native") return result, length def str_decode_utf_32_be(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "big") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big") return result, length def str_decode_utf_32_le(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "little") + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little") return result, length def py3k_str_decode_utf_32(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "native", - 'utf-32-' + BYTEORDER2) + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", + 'utf-32-' + BYTEORDER2, allow_surrogates=False) return result, length def py3k_str_decode_utf_32_be(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "big", - 'utf-32-be') + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", + 'utf-32-be', allow_surrogates=False) return result, length def py3k_str_decode_utf_32_le(s, size, errors, final=True, errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper(s, size, errors, final, - errorhandler, "little", - 'utf-32-le') + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", + 'utf-32-le', allow_surrogates=False) return result, length BOM32_DIRECT = intmask(0x0000FEFF) @@ -757,7 +757,8 @@ def str_decode_utf_32_helper(s, size, errors, final=True, errorhandler=None, byteorder="native", - public_encoding_name='utf32'): + public_encoding_name='utf32', + allow_surrogates=True): if errorhandler is None: errorhandler = default_unicode_error_decode bo = 0 @@ -821,7 +822,13 @@ continue ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) - if ch >= 0x110000: + if not allow_surrogates and 0xD800 <= ch <= 0xDFFFF: + r, pos = errorhandler(errors, public_encoding_name, + "code point in surrogate code point " + "range(0xd800, 0xe000)", + s, pos, len(s)) + result.append(r) + elif ch >= 0x110000: r, pos = errorhandler(errors, public_encoding_name, "codepoint not in range(0x110000)", s, pos, len(s)) diff --git a/rpython/rlib/test/test_runicode.py b/rpython/rlib/test/test_runicode.py --- a/rpython/rlib/test/test_runicode.py +++ b/rpython/rlib/test/test_runicode.py @@ -284,6 +284,11 @@ errorhandler, allow_surrogates=False) assert replace_with(u'rep', None) == u''.encode('utf-32-be') assert replace_with(None, '\xca\xfe\xca\xfe') == '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>' + # + assert runicode.str_decode_utf_32_be( + b"\x00\x00\xdc\x80", 4, None) == (u'\udc80', 4) + py.test.raises(UnicodeDecodeError, runicode.py3k_str_decode_utf_32_be, + b"\x00\x00\xdc\x80", 4, None) def test_utf7_bugs(self): u = u'A\u2262\u0391.' From pypy.commits at gmail.com Wed Dec 13 17:34:29 2017 From: pypy.commits at gmail.com (amauryfa) Date: Wed, 13 Dec 2017 14:34:29 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Applevel test for the utf32 surrogates. Message-ID: <5a31aaf5.8283df0a.a1855.f19c@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.5 Changeset: r93414:c9a148ecf262 Date: 2017-12-13 10:05 +0100 http://bitbucket.org/pypy/pypy/changeset/c9a148ecf262/ Log: Applevel test for the utf32 surrogates. diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -778,6 +778,18 @@ '[]'.encode(encoding)) assert (u'[\udc80]'.encode(encoding, "replace") == '[?]'.encode(encoding)) + for encoding, ill_surrogate in [('utf-8', b'\xed\xb2\x80'), + ('utf-16-le', b'\x80\xdc'), + ('utf-16-be', b'\xdc\x80'), + ('utf-32-le', b'\x80\xdc\x00\x00'), + ('utf-32-be', b'\x00\x00\xdc\x80')]: + print(encoding) + before, after = "[", "]" + before_sequence = before.encode(encoding) + after_sequence = after.encode(encoding) + test_string = before + "\uDC80" + after + test_sequence = before_sequence + ill_surrogate + after_sequence + raises(UnicodeDecodeError, test_sequence.decode, encoding) def test_charmap_encode(self): assert 'xxx'.encode('charmap') == b'xxx' From pypy.commits at gmail.com Wed Dec 13 17:55:36 2017 From: pypy.commits at gmail.com (amauryfa) Date: Wed, 13 Dec 2017 14:55:36 -0800 (PST) Subject: [pypy-commit] pypy py3.6: mmap.write() return the number of bytes written: RPython part Message-ID: <5a31afe8.e28edf0a.5c44d.d959@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93415:09b70b8c9aba Date: 2017-12-13 23:50 +0100 http://bitbucket.org/pypy/pypy/changeset/09b70b8c9aba/ Log: mmap.write() return the number of bytes written: RPython part diff --git a/rpython/rlib/rmmap.py b/rpython/rlib/rmmap.py --- a/rpython/rlib/rmmap.py +++ b/rpython/rlib/rmmap.py @@ -492,6 +492,7 @@ self.setslice(start, data) self.pos = start + data_len + return data_len def write_byte(self, byte): if len(byte) != 1: diff --git a/rpython/rlib/test/test_rmmap.py b/rpython/rlib/test/test_rmmap.py --- a/rpython/rlib/test/test_rmmap.py +++ b/rpython/rlib/test/test_rmmap.py @@ -258,7 +258,7 @@ f.flush() def func(no): m = mmap.mmap(no, 6, access=mmap.ACCESS_WRITE) - m.write("ciao\n") + assert m.write("ciao\n") == 5 m.seek(0) assert m.read(6) == "ciao\nr" m.close() From pypy.commits at gmail.com Wed Dec 13 17:55:38 2017 From: pypy.commits at gmail.com (amauryfa) Date: Wed, 13 Dec 2017 14:55:38 -0800 (PST) Subject: [pypy-commit] pypy py3.6: mmap.write() return the number of bytes written: AppLevel part Message-ID: <5a31afea.449f1c0a.996bb.1627@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93416:98fb1b0c5570 Date: 2017-12-13 23:52 +0100 http://bitbucket.org/pypy/pypy/changeset/98fb1b0c5570/ Log: mmap.write() return the number of bytes written: AppLevel part diff --git a/pypy/module/mmap/interp_mmap.py b/pypy/module/mmap/interp_mmap.py --- a/pypy/module/mmap/interp_mmap.py +++ b/pypy/module/mmap/interp_mmap.py @@ -103,7 +103,7 @@ data = self.space.charbuf_w(w_data) self.check_writeable() try: - self.mmap.write(data) + return self.space.newint(self.mmap.write(data)) except RValueError as v: raise mmap_error(self.space, v) diff --git a/pypy/module/mmap/test/test_mmap.py b/pypy/module/mmap/test/test_mmap.py --- a/pypy/module/mmap/test/test_mmap.py +++ b/pypy/module/mmap/test/test_mmap.py @@ -268,7 +268,7 @@ m = mmap.mmap(f.fileno(), 6, access=mmap.ACCESS_WRITE) raises(TypeError, m.write, 123) raises(ValueError, m.write, b"c"*10) - m.write(b"ciao\n") + assert m.write(b"ciao\n") == 5 m.seek(0) assert m.read(6) == b"ciao\nr" m.close() From pypy.commits at gmail.com Wed Dec 13 17:55:40 2017 From: pypy.commits at gmail.com (amauryfa) Date: Wed, 13 Dec 2017 14:55:40 -0800 (PST) Subject: [pypy-commit] pypy default: mmap.write() return the number of bytes written: RPython part Message-ID: <5a31afec.1288df0a.1b11.5b8a@mx.google.com> Author: Amaury Forgeot d'Arc Branch: Changeset: r93417:5ef9bb870cd2 Date: 2017-12-13 23:50 +0100 http://bitbucket.org/pypy/pypy/changeset/5ef9bb870cd2/ Log: mmap.write() return the number of bytes written: RPython part diff --git a/rpython/rlib/rmmap.py b/rpython/rlib/rmmap.py --- a/rpython/rlib/rmmap.py +++ b/rpython/rlib/rmmap.py @@ -492,6 +492,7 @@ self.setslice(start, data) self.pos = start + data_len + return data_len def write_byte(self, byte): if len(byte) != 1: diff --git a/rpython/rlib/test/test_rmmap.py b/rpython/rlib/test/test_rmmap.py --- a/rpython/rlib/test/test_rmmap.py +++ b/rpython/rlib/test/test_rmmap.py @@ -258,7 +258,7 @@ f.flush() def func(no): m = mmap.mmap(no, 6, access=mmap.ACCESS_WRITE) - m.write("ciao\n") + assert m.write("ciao\n") == 5 m.seek(0) assert m.read(6) == "ciao\nr" m.close() From pypy.commits at gmail.com Wed Dec 13 21:23:38 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 13 Dec 2017 18:23:38 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Port b0267eee69d8 to unicodehelper and fix it Message-ID: <5a31e0aa.968ddf0a.40714.1c94@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93418:badb71ed332d Date: 2017-12-14 02:22 +0000 http://bitbucket.org/pypy/pypy/changeset/badb71ed332d/ Log: Port b0267eee69d8 to unicodehelper and fix it diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -2,7 +2,7 @@ import pytest import struct from pypy.interpreter.unicodehelper import ( - encode_utf8, decode_utf8, unicode_encode_utf_32_be) + encode_utf8, decode_utf8, unicode_encode_utf_32_be, str_decode_utf_32_be) from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp @@ -90,3 +90,6 @@ assert replace_with(u'rep', None) == u''.encode('utf-32-be') assert (replace_with(None, '\xca\xfe\xca\xfe') == '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>') + + with pytest.raises(UnicodeDecodeError): + str_decode_utf_32_be(b"\x00\x00\xdc\x80", 4, None) diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -172,19 +172,22 @@ def str_decode_utf_32(s, size, errors, final=True, errorhandler=None): result, length, byteorder = str_decode_utf_32_helper( - s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2) + s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2, + allow_surrogates=False) return result, length def str_decode_utf_32_be(s, size, errors, final=True, errorhandler=None): result, length, byteorder = str_decode_utf_32_helper( - s, size, errors, final, errorhandler, "big", 'utf-32-be') + s, size, errors, final, errorhandler, "big", 'utf-32-be', + allow_surrogates=False) return result, length def str_decode_utf_32_le(s, size, errors, final=True, errorhandler=None): result, length, byteorder = str_decode_utf_32_helper( - s, size, errors, final, errorhandler, "little", 'utf-32-le') + s, size, errors, final, errorhandler, "little", 'utf-32-le', + allow_surrogates=False) return result, length BOM32_DIRECT = intmask(0x0000FEFF) @@ -193,7 +196,8 @@ def str_decode_utf_32_helper(s, size, errors, final=True, errorhandler=None, byteorder="native", - public_encoding_name='utf32'): + public_encoding_name='utf32', + allow_surrogates=True): if errorhandler is None: errorhandler = default_unicode_error_decode bo = 0 @@ -256,10 +260,17 @@ continue ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) - if ch >= 0x110000: + if not allow_surrogates and 0xD800 <= ch <= 0xDFFF: + r, pos = errorhandler(errors, public_encoding_name, + "code point in surrogate code point " + "range(0xd800, 0xe000)", + s, pos, pos + 4) + result.append(r) + continue + elif ch >= 0x110000: r, pos = errorhandler(errors, public_encoding_name, "codepoint not in range(0x110000)", - s, pos, len(s)) + s, pos, pos + 4) result.append(r) continue From pypy.commits at gmail.com Wed Dec 13 21:48:04 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 13 Dec 2017 18:48:04 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Adapt test_particularly_evil_undecodable to pypy Message-ID: <5a31e664.8c6f1c0a.15191.129d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93419:7b810b0bf663 Date: 2017-12-14 02:47 +0000 http://bitbucket.org/pypy/pypy/changeset/7b810b0bf663/ Log: Adapt test_particularly_evil_undecodable to pypy diff --git a/lib-python/3/test/test_compile.py b/lib-python/3/test/test_compile.py --- a/lib-python/3/test/test_compile.py +++ b/lib-python/3/test/test_compile.py @@ -524,7 +524,8 @@ with open(fn, "wb") as fp: fp.write(src) res = script_helper.run_python_until_end(fn)[0] - self.assertIn(b"Non-UTF-8", res.err) + # PyPy change: we have a different error here + self.assertIn(b"SyntaxError", res.err) def test_yet_more_evil_still_undecodable(self): # Issue #25388 From pypy.commits at gmail.com Wed Dec 13 23:55:32 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 13 Dec 2017 20:55:32 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Add failing test that explains the failure in CPython's test_unencodable_filename() Message-ID: <5a320444.449f1c0a.996bb.e8c6@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93420:e170f5f30e32 Date: 2017-12-14 04:54 +0000 http://bitbucket.org/pypy/pypy/changeset/e170f5f30e32/ Log: Add failing test that explains the failure in CPython's test_unencodable_filename() diff --git a/pypy/objspace/std/test/test_celldict.py b/pypy/objspace/std/test/test_celldict.py --- a/pypy/objspace/std/test/test_celldict.py +++ b/pypy/objspace/std/test/test_celldict.py @@ -76,6 +76,8 @@ def test_key_not_there(self): d = type(__builtins__)("abc").__dict__ raises(KeyError, "d['def']") + assert 42 not in d + assert u"\udc00" not in d def test_fallback_evil_key(self): class F(object): From pypy.commits at gmail.com Thu Dec 14 13:00:17 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 14 Dec 2017 10:00:17 -0800 (PST) Subject: [pypy-commit] buildbot default: install virtualenv HEAD to get win32 fix from pypa/virtualenv pr 1103 Message-ID: <5a32bc31.b198df0a.472bc.a636@mx.google.com> Author: Matti Picus Branch: Changeset: r1051:46954035f3c6 Date: 2017-12-14 19:59 +0200 http://bitbucket.org/pypy/buildbot/changeset/46954035f3c6/ Log: install virtualenv HEAD to get win32 fix from pypa/virtualenv pr 1103 diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -486,7 +486,7 @@ factory.addStep(ShellCmd( description="Install recent virtualenv", command=prefix + [target, '-mpip', 'install', '--upgrade', - 'virtualenv'], + 'git+git://github.com/pypa/virtualenv at master'], workdir='venv', flunkOnFailure=True)) factory.addStep(ShellCmd( From pypy.commits at gmail.com Thu Dec 14 14:00:59 2017 From: pypy.commits at gmail.com (cfbolz) Date: Thu, 14 Dec 2017 11:00:59 -0800 (PST) Subject: [pypy-commit] pypy cpyext-faster-arg-passing: speed up passing some objects to C Message-ID: <5a32ca6b.d23f1c0a.12e64.c077@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: cpyext-faster-arg-passing Changeset: r93421:85fea167b9ea Date: 2017-12-14 20:00 +0100 http://bitbucket.org/pypy/pypy/changeset/85fea167b9ea/ Log: speed up passing some objects to C specifically, passing the instances of classes defined in C is faster, because the Python-version of these instances stores a reference to the pyobj directly in the instance. (this should be generalized and extended to more cases) diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -21,6 +21,14 @@ #________________________________________________________ # type description +class W_BaseCPyObject(W_ObjectObject): + """ A subclass of W_ObjectObject that has one field for directly storing + the link from the w_obj to the cpy ref. This is only used for C-defined + types. """ + + _cpy_ref = lltype.nullptr(PyObject.TO) + + class BaseCpyTypedescr(object): basestruct = PyObject.TO W_BaseObject = W_ObjectObject @@ -66,8 +74,12 @@ def realize(self, space, obj): w_type = from_ref(space, rffi.cast(PyObject, obj.c_ob_type)) + assert isinstance(w_type, W_TypeObject) try: - w_obj = space.allocate_instance(self.W_BaseObject, w_type) + if w_type.flag_cpytype: + w_obj = space.allocate_instance(W_BaseCPyObject, w_type) + else: + w_obj = space.allocate_instance(self.W_BaseObject, w_type) except OperationError as e: if e.match(space, space.w_TypeError): raise oefmt(space.w_SystemError, @@ -76,6 +88,9 @@ w_type) raise track_reference(space, obj, w_obj) + if w_type.flag_cpytype: + assert isinstance(w_obj, W_BaseCPyObject) + w_obj._cpy_ref = obj return w_obj typedescr_cache = {} @@ -186,7 +201,7 @@ Ties together a PyObject and an interpreter object. The PyObject's refcnt is increased by REFCNT_FROM_PYPY. The reference in 'py_obj' is not stolen! Remember to decref() - it is you need to. + it if you need to. """ # XXX looks like a PyObject_GC_TRACK assert py_obj.c_ob_refcnt < rawrefcount.REFCNT_FROM_PYPY @@ -237,7 +252,7 @@ @jit.dont_look_inside def as_pyobj(space, w_obj, w_userdata=None, immortal=False): """ - Returns a 'PyObject *' representing the given intepreter object. + Returns a 'PyObject *' representing the given interpreter object. This doesn't give a new reference, but the returned 'PyObject *' is valid at least as long as 'w_obj' is. **To be safe, you should use keepalive_until_here(w_obj) some time later.** In case of @@ -245,7 +260,12 @@ """ assert not is_pyobj(w_obj) if w_obj is not None: - py_obj = rawrefcount.from_obj(PyObject, w_obj) + if isinstance(w_obj, W_BaseCPyObject): + py_obj = w_obj._cpy_ref + if not we_are_translated(): + assert py_obj == rawrefcount.from_obj(PyObject, w_obj) + else: + py_obj = rawrefcount.from_obj(PyObject, w_obj) if not py_obj: py_obj = create_ref(space, w_obj, w_userdata, immortal=immortal) # diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py --- a/pypy/module/cpyext/test/test_typeobject.py +++ b/pypy/module/cpyext/test/test_typeobject.py @@ -3,10 +3,21 @@ from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase from pypy.module.cpyext.test.test_api import BaseApiTest from pypy.module.cpyext.api import generic_cpy_call -from pypy.module.cpyext.pyobject import make_ref, from_ref, decref +from pypy.module.cpyext.pyobject import make_ref, from_ref, decref, as_pyobj from pypy.module.cpyext.typeobject import PyTypeObjectPtr class AppTestTypeObject(AppTestCpythonExtensionBase): + + def setup_class(cls): + AppTestCpythonExtensionBase.setup_class.im_func(cls) + def _check_uses_shortcut(w_inst): + from pypy.module.cpyext.pyobject import W_BaseCPyObject + assert isinstance(w_inst, W_BaseCPyObject) + assert w_inst._cpy_ref + assert as_pyobj(cls.space, w_inst) == w_inst._cpy_ref + cls.w__check_uses_shortcut = cls.space.wrap( + gateway.interp2app(_check_uses_shortcut)) + def test_typeobject(self): import sys module = self.import_module(name='foo') @@ -157,6 +168,14 @@ assert fuu2(u"abc").baz().escape() raises(TypeError, module.fooType.object_member.__get__, 1) + def test_shortcut(self): + # test that instances of classes that are defined in C become an + # instance of W_BaseCPyObject and thus can be converted faster back to + # their pyobj, because they store a pointer to it directly. + module = self.import_module(name='foo') + obj = module.fooType() + self._check_uses_shortcut(obj) + def test_multiple_inheritance1(self): module = self.import_module(name='foo') obj = module.UnicodeSubtype(u'xyz') From pypy.commits at gmail.com Thu Dec 14 14:30:45 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 14 Dec 2017 11:30:45 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix handling of unencodable strings in ModuleDictStrategy Message-ID: <5a32d165.8988df0a.a240c.3d09@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93422:da0e3f790bdb Date: 2017-12-14 19:29 +0000 http://bitbucket.org/pypy/pypy/changeset/da0e3f790bdb/ Log: Fix handling of unencodable strings in ModuleDictStrategy diff --git a/pypy/objspace/std/celldict.py b/pypy/objspace/std/celldict.py --- a/pypy/objspace/std/celldict.py +++ b/pypy/objspace/std/celldict.py @@ -6,6 +6,7 @@ from rpython.rlib import jit, rerased, objectmodel from pypy.interpreter.baseobjspace import W_Root +from pypy.interpreter.error import OperationError from pypy.objspace.std.dictmultiobject import ( DictStrategy, ObjectDictStrategy, _never_equal_to_string, create_iterator_classes) @@ -54,10 +55,21 @@ def _getdictvalue_no_unwrapping_pure(self, version, w_dict, key): return self.unerase(w_dict.dstorage).get(key, None) + def try_unwrap_key(self, space, w_key): + if space.is_w(space.type(w_key), space.w_text): + try: + return space.text_w(w_key) + except OperationError as e: + if e.match(space, space.w_UnicodeEncodeError): + return None + raise + return None + def setitem(self, w_dict, w_key, w_value): space = self.space - if space.is_w(space.type(w_key), space.w_text): - self.setitem_str(w_dict, space.text_w(w_key), w_value) + key = self.try_unwrap_key(space, w_key) + if key is not None: + self.setitem_str(w_dict, key, w_value) else: self.switch_to_object_strategy(w_dict) w_dict.setitem(w_key, w_value) @@ -75,8 +87,8 @@ def setdefault(self, w_dict, w_key, w_default): space = self.space - if space.is_w(space.type(w_key), space.w_text): - key = space.text_w(w_key) + key = self.try_unwrap_key(space, w_key) + if key is not None: cell = self.getdictvalue_no_unwrapping(w_dict, key) w_result = unwrap_cell(self.space, cell) if w_result is not None: @@ -90,8 +102,8 @@ def delitem(self, w_dict, w_key): space = self.space w_key_type = space.type(w_key) - if space.is_w(w_key_type, space.w_text): - key = space.text_w(w_key) + key = self.try_unwrap_key(space, w_key) + if key is not None: dict_w = self.unerase(w_dict.dstorage) try: del dict_w[key] @@ -111,9 +123,9 @@ def getitem(self, w_dict, w_key): space = self.space w_lookup_type = space.type(w_key) - if space.is_w(w_lookup_type, space.w_text): - return self.getitem_str(w_dict, space.text_w(w_key)) - + key = self.try_unwrap_key(space, w_key) + if key is not None: + return self.getitem_str(w_dict, key) elif _never_equal_to_string(space, w_lookup_type): return None else: From pypy.commits at gmail.com Thu Dec 14 16:02:56 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 14 Dec 2017 13:02:56 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Give up on trying to get test_multithreaded_import() to work untranslated Message-ID: <5a32e700.c9061c0a.2b1dd.2b8d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93423:b3b1beda9224 Date: 2017-12-14 21:02 +0000 http://bitbucket.org/pypy/pypy/changeset/b3b1beda9224/ Log: Give up on trying to get test_multithreaded_import() to work untranslated diff --git a/extra_tests/test_import.py b/extra_tests/test_import.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_import.py @@ -0,0 +1,41 @@ +import pytest +import sys +import time +from _thread import start_new_thread + + at pytest.mark.xfail('__pypy__' not in sys.builtin_module_names, + reason='Fails on CPython') +def test_multithreaded_import(tmpdir): + tmpfile = tmpdir.join('multithreaded_import_test.py') + tmpfile.write('''if 1: + x = 666 + import time + for i in range(1000): time.sleep(0.001) + x = 42 + ''') + + oldpath = sys.path[:] + try: + sys.path.insert(0, str(tmpdir)) + got = [] + + def check(): + import multithreaded_import_test + got.append(getattr(multithreaded_import_test, 'x', '?')) + + for i in range(5): + start_new_thread(check, ()) + + for n in range(100): + for i in range(105): + time.sleep(0.001) + if len(got) == 5: + break + else: + raise AssertionError("got %r so far but still waiting" % + (got,)) + + assert got == [42] * 5 + + finally: + sys.path[:] = oldpath diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -1249,46 +1249,3 @@ spaceconfig = { "translation.sandbox": True } - - -class AppTestMultithreadedImp(object): - spaceconfig = dict(usemodules=['thread', 'time']) - - def setup_class(cls): - #if not conftest.option.runappdirect: - # py.test.skip("meant as an -A test") - tmpfile = udir.join('test_multithreaded_imp.py') - tmpfile.write('''if 1: - x = 666 - import time - for i in range(1000): time.sleep(0.001) - x = 42 - ''') - cls.w_tmppath = cls.space.wrap(str(udir)) - - def test_multithreaded_import(self): - import sys, _thread, time - oldpath = sys.path[:] - try: - sys.path.insert(0, self.tmppath) - got = [] - - def check(): - import test_multithreaded_imp - got.append(getattr(test_multithreaded_imp, 'x', '?')) - - for i in range(5): - _thread.start_new_thread(check, ()) - - for n in range(100): - for i in range(105): time.sleep(0.001) - if len(got) == 5: - break - else: - raise AssertionError("got %r so far but still waiting" % - (got,)) - - assert got == [42] * 5, got - - finally: - sys.path[:] = oldpath From pypy.commits at gmail.com Thu Dec 14 17:10:28 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 14 Dec 2017 14:10:28 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Use CPython's C implementation for PyUnicode_FromWideChar (fixes size==-1 case) Message-ID: <5a32f6d4.b5a0df0a.54d62.0171@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93424:cb9a68d84f56 Date: 2017-12-14 22:09 +0000 http://bitbucket.org/pypy/pypy/changeset/cb9a68d84f56/ Log: Use CPython's C implementation for PyUnicode_FromWideChar (fixes size==-1 case) diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -550,7 +550,7 @@ 'PyArg_ParseTuple', 'PyArg_UnpackTuple', 'PyArg_ParseTupleAndKeywords', 'PyArg_VaParse', 'PyArg_VaParseTupleAndKeywords', '_PyArg_NoKeywords', 'PyUnicode_FromFormat', 'PyUnicode_FromFormatV', 'PyUnicode_AsWideCharString', - 'PyUnicode_GetSize', 'PyUnicode_GetLength', + 'PyUnicode_GetSize', 'PyUnicode_GetLength', 'PyUnicode_FromWideChar', 'PyModule_AddObject', 'PyModule_AddIntConstant', 'PyModule_AddStringConstant', 'PyModule_GetDef', 'PyModuleDef_Init', 'PyModule_GetState', 'Py_BuildValue', 'Py_VaBuildValue', 'PyTuple_Pack', diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h --- a/pypy/module/cpyext/include/unicodeobject.h +++ b/pypy/module/cpyext/include/unicodeobject.h @@ -20,6 +20,14 @@ /* #define HAVE_WCHAR_H */ /* #define HAVE_USABLE_WCHAR_T */ +#ifdef HAVE_WCHAR_H +/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ +# ifdef _HAVE_BSDI +# include +# endif +# include +#endif + #ifdef __cplusplus extern "C" { #endif @@ -270,6 +278,16 @@ #ifdef HAVE_WCHAR_H +/* Create a Unicode Object from the wchar_t buffer w of the given + size. + + The buffer is copied into the new object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( + const wchar_t *w, /* wchar_t buffer */ + Py_ssize_t size /* size of buffer */ + ); + /* Convert the Unicode object to a wide character string. The output string always ends with a nul character. If size is not NULL, write the number of wide characters (excluding the null character) into *size. diff --git a/pypy/module/cpyext/src/unicodeobject.c b/pypy/module/cpyext/src/unicodeobject.c --- a/pypy/module/cpyext/src/unicodeobject.c +++ b/pypy/module/cpyext/src/unicodeobject.c @@ -54,6 +54,29 @@ plus 1 for the sign. 53/22 is an upper bound for log10(256). */ #define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) +#ifdef HAVE_WCHAR_H + +PyObject * +PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size) +{ + /* + if (w == NULL) { + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); + PyErr_BadInternalCall(); + return NULL; + } + */ + + if (size == -1) { + size = wcslen(w); + } + + return PyUnicode_FromUnicode(w, size); +} + +#endif /* HAVE_WCHAR_H */ + PyObject * PyUnicode_FromFormatV(const char *format, va_list vargs) { diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -475,13 +475,6 @@ else: return new_empty_unicode(space, length) - at cpython_api([CONST_WSTRING, Py_ssize_t], PyObject, result_is_ll=True) -def PyUnicode_FromWideChar(space, wchar_p, length): - """Create a Unicode object from the wchar_t buffer w of the given size. - Return NULL on failure.""" - # PyPy supposes Py_UNICODE == wchar_t - return PyUnicode_FromUnicode(space, wchar_p, length) - @cpython_api([PyObject, CONST_STRING], PyObject) def _PyUnicode_AsDefaultEncodedString(space, w_unicode, errors): return PyUnicode_AsEncodedString(space, w_unicode, lltype.nullptr(rffi.CCHARP.TO), errors) From pypy.commits at gmail.com Thu Dec 14 23:23:34 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 14 Dec 2017 20:23:34 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix handling of time.sleep()'s argument and use nanosecond precision internally Message-ID: <5a334e46.bb86df0a.26393.3001@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93425:570c2749ff19 Date: 2017-12-15 04:22 +0000 http://bitbucket.org/pypy/pypy/changeset/570c2749ff19/ Log: Fix handling of time.sleep()'s argument and use nanosecond precision internally diff --git a/pypy/interpreter/timeutils.py b/pypy/interpreter/timeutils.py --- a/pypy/interpreter/timeutils.py +++ b/pypy/interpreter/timeutils.py @@ -1,6 +1,14 @@ """ Access to the time module's high-resolution monotonic clock """ +import math +from rpython.rlib.rarithmetic import ( + r_longlong, ovfcheck, ovfcheck_float_to_longlong) +from pypy.interpreter.error import oefmt + +SECS_TO_NS = 10 ** 9 +MS_TO_NS = 10 ** 6 +US_TO_NS = 10 ** 3 def monotonic(space): from pypy.module.time import interp_time @@ -9,3 +17,21 @@ else: w_res = interp_time.gettimeofday(space) return space.float_w(w_res) # xxx back and forth + +def timestamp_w(space, w_secs): + if space.isinstance_w(w_secs, space.w_float): + secs = space.float_w(w_secs) + result_float = math.ceil(secs * SECS_TO_NS) + try: + return ovfcheck_float_to_longlong(result_float) + except OverflowError: + raise oefmt(space.w_OverflowError, + "timestamp %R too large to convert to C _PyTime_t", w_secs) + else: + sec = space.int_w(w_secs) + try: + result = ovfcheck(sec * SECS_TO_NS) + except OverflowError: + raise oefmt(space.w_OverflowError, + "timestamp too large to convert to C _PyTime_t") + return r_longlong(result) diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -3,10 +3,12 @@ from pypy.interpreter.error import (OperationError, oefmt, strerror as _strerror, exception_from_saved_errno) from pypy.interpreter.gateway import unwrap_spec -from pypy.interpreter import timeutils +from pypy.interpreter.timeutils import ( + SECS_TO_NS, MS_TO_NS, US_TO_NS, monotonic as _monotonic, timestamp_w) from pypy.interpreter.unicodehelper import decode_utf8, encode_utf8 from rpython.rtyper.lltypesystem import lltype -from rpython.rlib.rarithmetic import intmask, r_ulonglong, r_longfloat, widen +from rpython.rlib.rarithmetic import ( + intmask, r_ulonglong, r_longfloat, widen, ovfcheck, ovfcheck_float_to_int) from rpython.rlib.rtime import (GETTIMEOFDAY_NO_TZ, TIMEVAL, HAVE_GETTIMEOFDAY, HAVE_FTIME) from rpython.rlib import rposix, rtime @@ -452,25 +454,22 @@ from rpython.rlib.rtime import c_select from rpython.rlib import rwin32 - at unwrap_spec(secs=float) -def sleep(space, secs): - if secs < 0: +def sleep(space, w_secs): + ns = timestamp_w(space, w_secs) + if not (ns >= 0): raise oefmt(space.w_ValueError, "sleep length must be non-negative") - end_time = timeutils.monotonic(space) + secs + end_time = _monotonic(space) + float(ns) / SECS_TO_NS while True: if _WIN: # as decreed by Guido, only the main thread can be # interrupted. main_thread = space.fromcache(State).main_thread interruptible = (main_thread == thread.get_ident()) - millisecs = int(secs * 1000) + millisecs = ns // MS_TO_NS if millisecs == 0 or not interruptible: - rtime.sleep(secs) + rtime.sleep(float(ns) / SECS_TO_NS) break - MAX = int(sys.maxint / 1000) # > 24 days - if millisecs > MAX: - millisecs = MAX interrupt_event = space.fromcache(State).get_interrupt_event() rwin32.ResetEvent(interrupt_event) rc = rwin32.WaitForSingleObject(interrupt_event, millisecs) @@ -479,9 +478,10 @@ else: void = lltype.nullptr(rffi.VOIDP.TO) with lltype.scoped_alloc(TIMEVAL) as t: - frac = math.fmod(secs, 1.0) - rffi.setintfield(t, 'c_tv_sec', int(secs)) - rffi.setintfield(t, 'c_tv_usec', int(frac*1000000.0)) + seconds = ns // SECS_TO_NS + us = (ns % SECS_TO_NS) // US_TO_NS + rffi.setintfield(t, 'c_tv_sec', seconds) + rffi.setintfield(t, 'c_tv_usec', us) res = rffi.cast(rffi.LONG, c_select(0, void, void, void, t)) if res == 0: @@ -489,8 +489,8 @@ if rposix.get_saved_errno() != EINTR: raise exception_from_saved_errno(space, space.w_OSError) space.getexecutioncontext().checksignals() - secs = end_time - timeutils.monotonic(space) # retry - if secs <= 0.0: + secs = end_time - _monotonic(space) # retry + if secs <= 0: break def _get_module_object(space, obj_name): diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -13,13 +13,11 @@ assert isinstance(time._STRUCT_TM_ITEMS, int) def test_sleep(self): - import sys - import os import time raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(ValueError, time.sleep, -1.0) - raises(ValueError, time.sleep, float('nan')) + raises((ValueError, OverflowError), time.sleep, float('nan')) raises(OverflowError, time.sleep, float('inf')) def test_clock(self): From pypy.commits at gmail.com Fri Dec 15 16:43:36 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 13:43:36 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Try to make test_builtin_reimport_mess() clearer and fix it Message-ID: <5a344208.13a0df0a.0da1.1431@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93426:404dc45db8ca Date: 2017-12-15 21:42 +0000 http://bitbucket.org/pypy/pypy/changeset/404dc45db8ca/ Log: Try to make test_builtin_reimport_mess() clearer and fix it Note: _md5 was a builtin module in -A tests only, so use something that is always a builtin instead, e.g. time. diff --git a/pypy/module/imp/test/test_app.py b/pypy/module/imp/test/test_app.py --- a/pypy/module/imp/test/test_app.py +++ b/pypy/module/imp/test/test_app.py @@ -282,30 +282,31 @@ def test_builtin_reimport_mess(self): # taken from https://bugs.pypy.org/issue1514, with extra cases - # that show a difference with CPython: we can get on CPython - # several module objects for the same built-in module :-( - import sys, _md5 + import sys + import time as time1 - old = _md5.md5 - _md5.md5 = 42 + old = time1.process_time + try: + time1.process_time = 42 - # save, re-import, restore. - saved = sys.modules.pop('_md5') - _md52 = __import__('_md5') - assert _md52 is not _md5 - assert _md52.md5 is old - assert _md52 is sys.modules['_md5'] - assert _md5 is saved - assert _md5.md5 == 42 + # save, re-import, restore. + saved = sys.modules.pop('time') + assert time1 is saved + time2 = __import__('time') + assert time2 is not time1 + assert time2 is sys.modules['time'] + assert time2.process_time is old - import _md5 - assert _md5.md5 is old + import time as time3 + assert time3 is time2 + assert time3.process_time is old - sys.modules['_md5'] = saved - import _md5 - assert _md5.md5 == 42 - - _md5.md5 = old + sys.modules['time'] = time1 + import time as time4 + assert time4 is time1 + assert time4.process_time == 42 + finally: + time1.process_time = old def test_get_tag(self): import imp From pypy.commits at gmail.com Fri Dec 15 17:23:13 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 14:23:13 -0800 (PST) Subject: [pypy-commit] pypy py3.5: kill __pypy__.save_module_content_for_future_reload(), it does not do anything (probably maybe) Message-ID: <5a344b51.8fb1df0a.cc535.ca7d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93427:ff0600c50bee Date: 2017-12-15 22:22 +0000 http://bitbucket.org/pypy/pypy/changeset/ff0600c50bee/ Log: kill __pypy__.save_module_content_for_future_reload(), it does not do anything (probably maybe) diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -574,10 +574,6 @@ except ValueError: pass # ignore "2 is not a valid file descriptor" - if we_are_translated(): - import __pypy__ - __pypy__.save_module_content_for_future_reload(sys) - mainmodule = type(sys)('__main__') mainmodule.__loader__ = sys.__loader__ mainmodule.__builtins__ = os.__builtins__ diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py --- a/pypy/module/__pypy__/__init__.py +++ b/pypy/module/__pypy__/__init__.py @@ -92,8 +92,6 @@ 'set_debug' : 'interp_magic.set_debug', 'locals_to_fast' : 'interp_magic.locals_to_fast', 'set_code_callback' : 'interp_magic.set_code_callback', - 'save_module_content_for_future_reload': - 'interp_magic.save_module_content_for_future_reload', 'decode_long' : 'interp_magic.decode_long', '_promote' : 'interp_magic._promote', 'normalize_exc' : 'interp_magic.normalize_exc', @@ -131,7 +129,7 @@ raise else: pass # ok fine to ignore in this case - + if self.space.config.translation.jit: features = detect_cpu.getcpufeatures(model) self.extra_interpdef('jit_backend_features', diff --git a/pypy/module/__pypy__/interp_magic.py b/pypy/module/__pypy__/interp_magic.py --- a/pypy/module/__pypy__/interp_magic.py +++ b/pypy/module/__pypy__/interp_magic.py @@ -145,10 +145,6 @@ assert isinstance(w_frame, PyFrame) w_frame.locals2fast() - at unwrap_spec(w_module=MixedModule) -def save_module_content_for_future_reload(space, w_module): - w_module.save_module_content_for_future_reload() - def set_code_callback(space, w_callable): cache = space.fromcache(CodeHookCache) if space.is_none(w_callable): diff --git a/pypy/module/__pypy__/test/test_magic.py b/pypy/module/__pypy__/test/test_magic.py --- a/pypy/module/__pypy__/test/test_magic.py +++ b/pypy/module/__pypy__/test/test_magic.py @@ -2,18 +2,6 @@ class AppTestMagic: spaceconfig = dict(usemodules=['__pypy__']) - def test_save_module_content_for_future_reload(self): - import sys, __pypy__, imp - d = sys.dont_write_bytecode - sys.dont_write_bytecode = "hello world" - __pypy__.save_module_content_for_future_reload(sys) - sys.dont_write_bytecode = d - imp.reload(sys) - assert sys.dont_write_bytecode == "hello world" - # - sys.dont_write_bytecode = d - __pypy__.save_module_content_for_future_reload(sys) - def test_new_code_hook(self): # workaround for running on top of old CPython 2.7 versions def exec_(code, d): From pypy.commits at gmail.com Fri Dec 15 17:57:56 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 15 Dec 2017 14:57:56 -0800 (PST) Subject: [pypy-commit] pypy cpyext-faster-arg-passing: refactor to use methods Message-ID: <5a345374.0abddf0a.1eb80.36d8@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: cpyext-faster-arg-passing Changeset: r93428:fc3f8f9e8b75 Date: 2017-12-14 21:31 +0100 http://bitbucket.org/pypy/pypy/changeset/fc3f8f9e8b75/ Log: refactor to use methods diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -208,6 +208,21 @@ def _set_mapdict_storage_and_map(self, storage, map): raise NotImplementedError + + # ------------------------------------------------------------------- + # cpyext support + # these functions will only be seen by the annotator if we translate + # with the cpyext module + + def _cpyext_as_pyobj(self, space): + from pypy.module.cpyext.pyobject import w_root_as_pyobj + return w_root_as_pyobj(self, space) + + def _cpyext_attach_pyobj(self, space, py_obj): + from pypy.module.cpyext.pyobject import w_root_attach_pyobj + return w_root_attach_pyobj(self, space, py_obj) + + # ------------------------------------------------------------------- def is_w(self, space, w_other): diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -28,6 +28,30 @@ _cpy_ref = lltype.nullptr(PyObject.TO) + def _cpyext_as_pyobj(self, space): + return self._cpy_ref + + def _cpyext_attach_pyobj(self, space, py_obj): + self._cpy_ref = py_obj + rawrefcount.create_link_pyobj(self, py_obj) + +def check_true(s_arg, bookeeper): + assert s_arg.const is True + +def w_root_as_pyobj(w_obj, space): + from rpython.rlib.debug import check_annotation + # make sure that translation crashes if we see this while not translating + # with cpyext + check_annotation(space.config.objspace.usemodules.cpyext, check_true) + # default implementation of _cpyext_as_pyobj + return rawrefcount.from_obj(PyObject, w_obj) + +def w_root_attach_pyobj(w_obj, space, py_obj): + from rpython.rlib.debug import check_annotation + check_annotation(space.config.objspace.usemodules.cpyext, check_true) + assert space.config.objspace.usemodules.cpyext + # default implementation of _cpyext_attach_pyobj + rawrefcount.create_link_pypy(w_obj, py_obj) class BaseCpyTypedescr(object): basestruct = PyObject.TO @@ -206,7 +230,7 @@ # XXX looks like a PyObject_GC_TRACK assert py_obj.c_ob_refcnt < rawrefcount.REFCNT_FROM_PYPY py_obj.c_ob_refcnt += rawrefcount.REFCNT_FROM_PYPY - rawrefcount.create_link_pypy(w_obj, py_obj) + w_obj._cpyext_attach_pyobj(space, py_obj) w_marker_deallocating = W_Root() @@ -260,12 +284,7 @@ """ assert not is_pyobj(w_obj) if w_obj is not None: - if isinstance(w_obj, W_BaseCPyObject): - py_obj = w_obj._cpy_ref - if not we_are_translated(): - assert py_obj == rawrefcount.from_obj(PyObject, w_obj) - else: - py_obj = rawrefcount.from_obj(PyObject, w_obj) + py_obj = w_obj._cpyext_as_pyobj(space) if not py_obj: py_obj = create_ref(space, w_obj, w_userdata, immortal=immortal) # From pypy.commits at gmail.com Fri Dec 15 17:57:58 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 15 Dec 2017 14:57:58 -0800 (PST) Subject: [pypy-commit] pypy cpyext-faster-arg-passing: generalize the code and store py_obj references in types, bools, and None Message-ID: <5a345376.42b2df0a.fe181.8756@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: cpyext-faster-arg-passing Changeset: r93429:5187eda5bf36 Date: 2017-12-15 14:28 +0100 http://bitbucket.org/pypy/pypy/changeset/5187eda5bf36/ Log: generalize the code and store py_obj references in types, bools, and None diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -10,6 +10,8 @@ PyVarObject, Py_ssize_t, init_function, cts) from pypy.module.cpyext.state import State from pypy.objspace.std.typeobject import W_TypeObject +from pypy.objspace.std.noneobject import W_NoneObject +from pypy.objspace.std.boolobject import W_BoolObject from pypy.objspace.std.objectobject import W_ObjectObject from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.objectmodel import keepalive_until_here @@ -26,14 +28,6 @@ the link from the w_obj to the cpy ref. This is only used for C-defined types. """ - _cpy_ref = lltype.nullptr(PyObject.TO) - - def _cpyext_as_pyobj(self, space): - return self._cpy_ref - - def _cpyext_attach_pyobj(self, space, py_obj): - self._cpy_ref = py_obj - rawrefcount.create_link_pyobj(self, py_obj) def check_true(s_arg, bookeeper): assert s_arg.const is True @@ -53,6 +47,28 @@ # default implementation of _cpyext_attach_pyobj rawrefcount.create_link_pypy(w_obj, py_obj) + +def add_direct_pyobj_storage(cls): + """ Add the necessary methods to a class to store a reference to the py_obj + on its instances directly. """ + + cls._cpy_ref = lltype.nullptr(PyObject.TO) + + def _cpyext_as_pyobj(self, space): + return self._cpy_ref + cls._cpyext_as_pyobj = _cpyext_as_pyobj + + def _cpyext_attach_pyobj(self, space, py_obj): + self._cpy_ref = py_obj + rawrefcount.create_link_pyobj(self, py_obj) + cls._cpyext_attach_pyobj = _cpyext_attach_pyobj + +add_direct_pyobj_storage(W_BaseCPyObject) +add_direct_pyobj_storage(W_TypeObject) +add_direct_pyobj_storage(W_NoneObject) +add_direct_pyobj_storage(W_BoolObject) + + class BaseCpyTypedescr(object): basestruct = PyObject.TO W_BaseObject = W_ObjectObject diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py --- a/pypy/module/cpyext/test/test_typeobject.py +++ b/pypy/module/cpyext/test/test_typeobject.py @@ -11,10 +11,9 @@ def setup_class(cls): AppTestCpythonExtensionBase.setup_class.im_func(cls) def _check_uses_shortcut(w_inst): - from pypy.module.cpyext.pyobject import W_BaseCPyObject - assert isinstance(w_inst, W_BaseCPyObject) - assert w_inst._cpy_ref - assert as_pyobj(cls.space, w_inst) == w_inst._cpy_ref + res = hasattr(w_inst, "_cpy_ref") and w_inst._cpy_ref + res = res and as_pyobj(cls.space, w_inst) == w_inst._cpy_ref + return cls.space.newbool(res) cls.w__check_uses_shortcut = cls.space.wrap( gateway.interp2app(_check_uses_shortcut)) @@ -174,7 +173,16 @@ # their pyobj, because they store a pointer to it directly. module = self.import_module(name='foo') obj = module.fooType() - self._check_uses_shortcut(obj) + assert self._check_uses_shortcut(obj) + # W_TypeObjects use shortcut + assert self._check_uses_shortcut(object) + assert self._check_uses_shortcut(type) + # None, True, False use shortcut + assert self._check_uses_shortcut(None) + assert self._check_uses_shortcut(True) + assert self._check_uses_shortcut(False) + assert not self._check_uses_shortcut(1) + assert not self._check_uses_shortcut(object()) def test_multiple_inheritance1(self): module = self.import_module(name='foo') From pypy.commits at gmail.com Fri Dec 15 18:28:21 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 15:28:21 -0800 (PST) Subject: [pypy-commit] pypy py3.5: I hate RPython Message-ID: <5a345a95.8a1f1c0a.27775.19ed@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93430:09836355d06a Date: 2017-12-15 23:27 +0000 http://bitbucket.org/pypy/pypy/changeset/09836355d06a/ Log: I hate RPython diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -480,8 +480,8 @@ with lltype.scoped_alloc(TIMEVAL) as t: seconds = ns // SECS_TO_NS us = (ns % SECS_TO_NS) // US_TO_NS - rffi.setintfield(t, 'c_tv_sec', seconds) - rffi.setintfield(t, 'c_tv_usec', us) + rffi.setintfield(t, 'c_tv_sec', int(seconds)) + rffi.setintfield(t, 'c_tv_usec', int(us)) res = rffi.cast(rffi.LONG, c_select(0, void, void, void, t)) if res == 0: diff --git a/rpython/rlib/rtime.py b/rpython/rlib/rtime.py --- a/rpython/rlib/rtime.py +++ b/rpython/rlib/rtime.py @@ -165,7 +165,7 @@ 'QueryPerformanceCounter', [rffi.CArrayPtr(lltype.SignedLongLong)], lltype.Void, releasegil=False) QueryPerformanceFrequency = external( - 'QueryPerformanceFrequency', [rffi.CArrayPtr(lltype.SignedLongLong)], + 'QueryPerformanceFrequency', [rffi.CArrayPtr(lltype.SignedLongLong)], rffi.INT, releasegil=False) class State(object): divisor = 0.0 @@ -267,9 +267,10 @@ else: void = lltype.nullptr(rffi.VOIDP.TO) with lltype.scoped_alloc(TIMEVAL) as t: - frac = math.fmod(secs, 1.0) + frac = int(math.fmod(secs, 1.0) * 1000000.) + assert frac >= 0 rffi.setintfield(t, 'c_tv_sec', int(secs)) - rffi.setintfield(t, 'c_tv_usec', int(frac*1000000.0)) + rffi.setintfield(t, 'c_tv_usec', frac) if rffi.cast(rffi.LONG, c_select(0, void, void, void, t)) != 0: errno = rposix.get_saved_errno() From pypy.commits at gmail.com Fri Dec 15 20:49:07 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 17:49:07 -0800 (PST) Subject: [pypy-commit] pypy py3.5: None of these checks make sense for pypy3 Message-ID: <5a347b93.43b6df0a.9323b.a0fd@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93431:6f435c55a176 Date: 2017-12-16 01:48 +0000 http://bitbucket.org/pypy/pypy/changeset/6f435c55a176/ Log: None of these checks make sense for pypy3 diff --git a/pypy/tool/pytest/objspace.py b/pypy/tool/pytest/objspace.py --- a/pypy/tool/pytest/objspace.py +++ b/pypy/tool/pytest/objspace.py @@ -15,12 +15,12 @@ # this exception is typically only raised if a module is not available. # in this case the test should be skipped py.test.skip(str(e)) + if getattr(option, 'runappdirect', None): + return TinyObjSpace() key = config.getkey() try: return _SPACECACHE[key] except KeyError: - if getattr(option, 'runappdirect', None): - return TinyObjSpace(**kwds) space = maketestobjspace(config) _SPACECACHE[key] = space return space @@ -46,28 +46,7 @@ class TinyObjSpace(object): """An object space that delegates everything to the hosting Python.""" - def __init__(self, **kwds): - info = getattr(sys, 'pypy_translation_info', None) - for key, value in kwds.iteritems(): - if key == 'usemodules': - if info is not None: - for modname in value: - ok = info.get('objspace.usemodules.%s' % modname, - False) - if not ok: - py.test.skip("cannot runappdirect test: " - "module %r required" % (modname,)) - continue - if info is None: - py.test.skip("cannot runappdirect this test on top of CPython") - if ('translation.' + key) in info: - key = 'translation.' + key - has = info.get(key, None) - if has != value: - #print sys.pypy_translation_info - py.test.skip("cannot runappdirect test: space needs %s = %s, "\ - "while pypy3-c was built with %s" % (key, value, has)) - + def __init__(self): for name in ('int', 'long', 'str', 'unicode', 'list', 'None', 'ValueError', 'OverflowError'): setattr(self, 'w_' + name, eval(name)) @@ -93,9 +72,6 @@ return list(self.wrap(item) for item in obj) return obj - def newbytes(self, obj): - return obj - def unpackiterable(self, itr): return list(itr) From pypy.commits at gmail.com Fri Dec 15 21:17:55 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 18:17:55 -0800 (PST) Subject: [pypy-commit] pypy default: Move test_greenlet.py to pypy/module/_continuation/ Message-ID: <5a348253.0ea6df0a.9989a.0023@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93432:9028d55b25e0 Date: 2017-12-16 02:17 +0000 http://bitbucket.org/pypy/pypy/changeset/9028d55b25e0/ Log: Move test_greenlet.py to pypy/module/_continuation/ diff --git a/pypy/module/test_lib_pypy/test_greenlet.py b/pypy/module/_continuation/test/test_greenlet.py rename from pypy/module/test_lib_pypy/test_greenlet.py rename to pypy/module/_continuation/test/test_greenlet.py From pypy.commits at gmail.com Fri Dec 15 21:18:55 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 18:18:55 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a34828f.4697df0a.a4a3.d3da@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93433:4d0d6cd1346b Date: 2017-12-16 02:18 +0000 http://bitbucket.org/pypy/pypy/changeset/4d0d6cd1346b/ Log: hg merge default diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -62,7 +62,7 @@ * go to pypy/tool/release and run ``force-builds.py `` The following JIT binaries should be built, however, we need more buildbots - windows, linux-32, linux-64, osx64, armhf-raring, armhf-raspberrian, armel, + windows, linux-32, linux-64, osx64, armhf-raspberrian, armel, freebsd64 * wait for builds to complete, make sure there are no failures diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,6 +36,6 @@ .. branch: win32-vcvars -.. branch rdict-fast-hash +.. branch: rdict-fast-hash Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/module/test_lib_pypy/test_greenlet.py b/pypy/module/_continuation/test/test_greenlet.py rename from pypy/module/test_lib_pypy/test_greenlet.py rename to pypy/module/_continuation/test/test_greenlet.py diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py --- a/pypy/tool/release/force-builds.py +++ b/pypy/tool/release/force-builds.py @@ -29,7 +29,6 @@ 'pypy-c-jit-macosx-x86-64', 'pypy-c-jit-win-x86-32', 'pypy-c-jit-linux-s390x', - 'build-pypy-c-jit-linux-armhf-raring', 'build-pypy-c-jit-linux-armhf-raspbian', 'build-pypy-c-jit-linux-armel', ] diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh --- a/pypy/tool/release/repackage.sh +++ b/pypy/tool/release/repackage.sh @@ -23,7 +23,7 @@ # Download latest builds from the buildmaster, rename the top # level directory, and repackage ready to be uploaded to bitbucket -for plat in linux linux64 linux-armhf-raspbian linux-armhf-raring linux-armel osx64 s390x +for plat in linux linux64 linux-armhf-raspbian linux-armel osx64 s390x do echo downloading package for $plat if wget -q --show-progress http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.tar.bz2 diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py --- a/rpython/jit/metainterp/optimizeopt/optimizer.py +++ b/rpython/jit/metainterp/optimizeopt/optimizer.py @@ -273,7 +273,6 @@ self.jitdriver_sd = jitdriver_sd self.cpu = metainterp_sd.cpu self.interned_refs = self.cpu.ts.new_ref_dict() - self.interned_ints = {} self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd) self.pendingfields = None # set temporarily to a list, normally by # heap.py, as we're about to generate a guard diff --git a/rpython/rlib/rmmap.py b/rpython/rlib/rmmap.py --- a/rpython/rlib/rmmap.py +++ b/rpython/rlib/rmmap.py @@ -492,6 +492,7 @@ self.setslice(start, data) self.pos = start + data_len + return data_len def write_byte(self, byte): if len(byte) != 1: diff --git a/rpython/rlib/rstruct/standardfmttable.py b/rpython/rlib/rstruct/standardfmttable.py --- a/rpython/rlib/rstruct/standardfmttable.py +++ b/rpython/rlib/rstruct/standardfmttable.py @@ -105,6 +105,18 @@ _pack_string(fmtiter, string, count-1) +def pack_halffloat(fmtiter): + size = 2 + fl = fmtiter.accept_float_arg() + try: + result = ieee.pack_float(fmtiter.wbuf, fmtiter.pos, + fl, size, fmtiter.bigendian) + except OverflowError: + raise StructOverflowError("float too large for format 'e'") + else: + fmtiter.advance(size) + return result + def make_float_packer(TYPE): size = rffi.sizeof(TYPE) def packer(fmtiter): @@ -247,6 +259,11 @@ end = count fmtiter.appendobj(data[1:end]) + at specialize.argtype(0) +def unpack_halffloat(fmtiter): + data = fmtiter.read(2) + fmtiter.appendobj(ieee.unpack_float(data, fmtiter.bigendian)) + def make_ieee_unpacker(TYPE): @specialize.argtype(0) def unpack_ieee(fmtiter): @@ -374,6 +391,8 @@ 'needcount' : True }, 'p':{ 'size' : 1, 'pack' : pack_pascal, 'unpack' : unpack_pascal, 'needcount' : True }, + 'e':{ 'size' : 2, 'pack' : pack_halffloat, + 'unpack' : unpack_halffloat}, 'f':{ 'size' : 4, 'pack' : make_float_packer(rffi.FLOAT), 'unpack' : unpack_float}, 'd':{ 'size' : 8, 'pack' : make_float_packer(rffi.DOUBLE), diff --git a/rpython/rlib/rstruct/test/test_pack.py b/rpython/rlib/rstruct/test/test_pack.py --- a/rpython/rlib/rstruct/test/test_pack.py +++ b/rpython/rlib/rstruct/test/test_pack.py @@ -138,6 +138,19 @@ self.check('f', 123.456) self.check('d', 123.456789) + def test_pack_halffloat(self): + if self.fmttable is nativefmttable.native_fmttable: + # Host Python cannot handle half floats. + return + size = 2 + wbuf = MutableStringBuffer(size) + self.mypack_into('e', wbuf, 6.5e+04) + got = wbuf.finish() + if self.bigendian: + assert got == b'\x7b\xef' + else: + assert got == b'\xef\x7b' + def test_float_overflow(self): if self.fmt_prefix == '@': # native packing, no overflow diff --git a/rpython/rlib/rstruct/test/test_runpack.py b/rpython/rlib/rstruct/test/test_runpack.py --- a/rpython/rlib/rstruct/test/test_runpack.py +++ b/rpython/rlib/rstruct/test/test_runpack.py @@ -78,6 +78,10 @@ assert f != 12.34 # precision lost assert abs(f - 12.34) < 1E-6 + def test_unpack_halffloat(self): + assert runpack(">e", b"\x7b\xef") == 64992.0 + assert runpack(" Author: Ronan Lamy Branch: py3.5 Changeset: r93434:bfa08194d964 Date: 2017-12-16 03:27 +0000 http://bitbucket.org/pypy/pypy/changeset/bfa08194d964/ Log: fix test_stacklet diff --git a/pypy/module/_continuation/test/support.py b/pypy/module/_continuation/test/support.py --- a/pypy/module/_continuation/test/support.py +++ b/pypy/module/_continuation/test/support.py @@ -10,4 +10,3 @@ import rpython.rlib.rstacklet except CompilationError as e: py.test.skip("cannot import rstacklet: %s" % e) - diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -23,7 +23,7 @@ # frame cycle res.append('...') break - if f.f_code.co_name == 'runtest': + if f.f_code.co_name == '': # if we are running with -A, cut all the stack above # the test function break @@ -34,9 +34,6 @@ return res return stack """) - if cls.runappdirect: - # make sure that "self.stack" does not pass the self - cls.w_stack = staticmethod(cls.w_stack.im_func) def test_new_empty(self): from _continuation import continulet From pypy.commits at gmail.com Fri Dec 15 23:05:54 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 20:05:54 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Explicitly skip tests that cannot work with -A Message-ID: <5a349ba2.516b1c0a.12f99.4abc@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93435:88ae8a42ce83 Date: 2017-12-16 04:05 +0000 http://bitbucket.org/pypy/pypy/changeset/88ae8a42ce83/ Log: Explicitly skip tests that cannot work with -A diff --git a/pypy/objspace/std/test/test_mapdict.py b/pypy/objspace/std/test/test_mapdict.py --- a/pypy/objspace/std/test/test_mapdict.py +++ b/pypy/objspace/std/test/test_mapdict.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import pytest from pypy.objspace.std.test.test_dictmultiobject import FakeSpace, W_DictObject from pypy.objspace.std.mapdict import * @@ -884,6 +885,7 @@ assert x.__dict__ == {'日本': 3} """ + at pytest.mark.skipif('config.option.runappdirect') class AppTestWithMapDictAndCounters(object): spaceconfig = {"objspace.std.withmethodcachecounter": True} @@ -1286,7 +1288,7 @@ assert res2 == "foobar" - + at pytest.mark.skipif('config.option.runappdirect') class AppTestGlobalCaching(AppTestWithMapDict): spaceconfig = {"objspace.std.withmethodcachecounter": True} diff --git a/pypy/objspace/std/test/test_methodcache.py b/pypy/objspace/std/test/test_methodcache.py --- a/pypy/objspace/std/test/test_methodcache.py +++ b/pypy/objspace/std/test/test_methodcache.py @@ -1,6 +1,8 @@ +import pytest from pypy.objspace.std.test import test_typeobject + at pytest.mark.skipif('config.option.runappdirect') class AppTestMethodCaching(test_typeobject.AppTestTypeObject): spaceconfig = {"objspace.std.withmethodcachecounter": True} diff --git a/pypy/objspace/std/test/test_prebuiltint.py b/pypy/objspace/std/test/test_prebuiltint.py --- a/pypy/objspace/std/test/test_prebuiltint.py +++ b/pypy/objspace/std/test/test_prebuiltint.py @@ -1,5 +1,7 @@ +import pytest from pypy.objspace.std.test import test_intobject + at pytest.mark.skipif('config.option.runappdirect') class AppTestInt(test_intobject.AppTestInt): spaceconfig = {"objspace.std.withprebuiltint": True} diff --git a/pypy/objspace/std/test/test_smalllongobject.py b/pypy/objspace/std/test/test_smalllongobject.py --- a/pypy/objspace/std/test/test_smalllongobject.py +++ b/pypy/objspace/std/test/test_smalllongobject.py @@ -1,4 +1,4 @@ -import py +import pytest import sys from pypy.objspace.std.smalllongobject import W_SmallLongObject from pypy.objspace.std.test import test_longobject @@ -16,7 +16,8 @@ # assert space.int_w(w5) == 5 if sys.maxint < 0x123456789ABCDEFL: - py.test.raises(OperationError, space.int_w, wlarge) + with pytest.raises(OperationError): + space.int_w(wlarge) else: assert space.int_w(wlarge) == 0x123456789ABCDEF # @@ -44,6 +45,7 @@ assert space.unwrap(w_obj) == 42 + at pytest.mark.skipif('config.option.runappdirect') class AppTestSmallLong(test_longobject.AppTestLong): spaceconfig = {"objspace.std.withsmalllong": True} From pypy.commits at gmail.com Fri Dec 15 23:38:58 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 20:38:58 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix AppTestWriteBytecode tests Message-ID: <5a34a362.b5a0df0a.54d62.eae1@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93436:71a2dd4b2fa2 Date: 2017-12-16 04:38 +0000 http://bitbucket.org/pypy/pypy/changeset/71a2dd4b2fa2/ Log: fix AppTestWriteBytecode tests diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -1210,12 +1210,12 @@ } def setup_class(cls): - cls.saved_modules = _setup(cls) + cls.w_saved_modules = _setup(cls) sandbox = cls.spaceconfig['translation.sandbox'] cls.w_sandbox = cls.space.wrap(sandbox) def teardown_class(cls): - _teardown(cls.space, cls.saved_modules) + _teardown(cls.space, cls.w_saved_modules) cls.space.appexec([], """ (): import sys @@ -1245,6 +1245,7 @@ assert not os.path.exists(c.__cached__) + at pytest.mark.skipif('config.option.runappdirect') class AppTestWriteBytecodeSandbox(AppTestWriteBytecode): spaceconfig = { "translation.sandbox": True From pypy.commits at gmail.com Sat Dec 16 00:16:32 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 21:16:32 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Skip another test that cannot work with -A Message-ID: <5a34ac30.90a9df0a.6a4d7.bd17@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93437:7597dd2c875f Date: 2017-12-16 04:52 +0000 http://bitbucket.org/pypy/pypy/changeset/7597dd2c875f/ Log: Skip another test that cannot work with -A diff --git a/pypy/module/marshal/test/test_marshal.py b/pypy/module/marshal/test/test_marshal.py --- a/pypy/module/marshal/test/test_marshal.py +++ b/pypy/module/marshal/test/test_marshal.py @@ -1,3 +1,4 @@ +import pytest from rpython.tool.udir import udir @@ -228,6 +229,7 @@ BadReader(marshal.dumps(value))) + at pytest.mark.skipif('config.option.runappdirect') class AppTestSmallLong(AppTestMarshal): spaceconfig = AppTestMarshal.spaceconfig.copy() spaceconfig["objspace.std.withsmalllong"] = True From pypy.commits at gmail.com Sat Dec 16 00:16:34 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 15 Dec 2017 21:16:34 -0800 (PST) Subject: [pypy-commit] pypy py3.5: xfail test Message-ID: <5a34ac32.06b7df0a.57ba9.3767@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93438:61fa4de7d9bb Date: 2017-12-16 05:15 +0000 http://bitbucket.org/pypy/pypy/changeset/61fa4de7d9bb/ Log: xfail test diff --git a/pypy/tool/pytest/test/test_appsupport.py b/pypy/tool/pytest/test/test_appsupport.py --- a/pypy/tool/pytest/test/test_appsupport.py +++ b/pypy/tool/pytest/test/test_appsupport.py @@ -1,3 +1,4 @@ +import pytest import sys import py import pypy @@ -35,6 +36,7 @@ ]) class TestSpaceConfig: + @pytest.mark.xfail(reason="Can't check config with -A in pypy3") def test_applevel_skipped_on_cpython_and_spaceconfig(self, testdir): setpypyconftest(testdir) testdir.makepyfile(""" From pypy.commits at gmail.com Sat Dec 16 04:32:29 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 16 Dec 2017 01:32:29 -0800 (PST) Subject: [pypy-commit] pypy cpyext-avoid-roundtrip: Add a passing test Message-ID: <5a34e82d.4dd91c0a.ae1b6.0ea0@mx.google.com> Author: Armin Rigo Branch: cpyext-avoid-roundtrip Changeset: r93439:f1b821c8b0a9 Date: 2017-12-16 09:17 +0100 http://bitbucket.org/pypy/pypy/changeset/f1b821c8b0a9/ Log: Add a passing test diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py --- a/pypy/module/cpyext/test/test_tupleobject.py +++ b/pypy/module/cpyext/test/test_tupleobject.py @@ -267,3 +267,12 @@ raises(SystemError, module.set_after_use, s) else: module.set_after_use(s) + + def test_badinternalcall_from_c(self): + module = self.import_extension('foo', [ + ("badinternalcall", "METH_O", + """ + return PyTuple_New(-1); + """), + ]) + raises(SystemError, module.badinternalcall, None) From pypy.commits at gmail.com Sat Dec 16 04:32:31 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 16 Dec 2017 01:32:31 -0800 (PST) Subject: [pypy-commit] pypy default: Half-test, and fix Message-ID: <5a34e82f.ad88df0a.d0ab.7120@mx.google.com> Author: Armin Rigo Branch: Changeset: r93440:dae84ef106dd Date: 2017-12-16 10:31 +0100 http://bitbucket.org/pypy/pypy/changeset/dae84ef106dd/ Log: Half-test, and fix diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py --- a/pypy/module/cpyext/pyerrors.py +++ b/pypy/module/cpyext/pyerrors.py @@ -122,7 +122,9 @@ error indicator.""" raise oefmt(space.w_TypeError, "bad argument type for built-in operation") - at cpython_api([], lltype.Void) +# NB. this returns 'void' in CPython, but we can't easily, otherwise the +# function is supposed not to fail + at cpython_api([], rffi.INT_real, error=-1) def PyErr_BadInternalCall(space): raise oefmt(space.w_SystemError, "Bad internal call!") diff --git a/pypy/module/cpyext/test/test_pyerrors.py b/pypy/module/cpyext/test/test_pyerrors.py --- a/pypy/module/cpyext/test/test_pyerrors.py +++ b/pypy/module/cpyext/test/test_pyerrors.py @@ -425,3 +425,15 @@ assert orig_exc_info == reset_sys_exc_info assert new_exc_info == (new_exc.__class__, new_exc, None) assert new_exc_info == new_sys_exc_info + + def test_PyErr_BadInternalCall(self): + # NB. it only seemed to fail when run with '-s'... but I think + # that it always printed stuff to stderr + module = self.import_extension('foo', [ + ("oops", "METH_NOARGS", + r''' + PyErr_BadInternalCall(); + return NULL; + '''), + ]) + raises(SystemError, module.oops) From pypy.commits at gmail.com Sat Dec 16 04:33:36 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 16 Dec 2017 01:33:36 -0800 (PST) Subject: [pypy-commit] pypy cpyext-avoid-roundtrip: Test and fix for the case where state.C.Xxx calls directly some Message-ID: <5a34e870.eb8ddf0a.a5e6.1b6f@mx.google.com> Author: Armin Rigo Branch: cpyext-avoid-roundtrip Changeset: r93441:56643108f56a Date: 2017-12-16 10:33 +0100 http://bitbucket.org/pypy/pypy/changeset/56643108f56a/ Log: Test and fix for the case where state.C.Xxx calls directly some C function which tries to call back into RPython diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -204,6 +204,9 @@ # id. Invariant: this variable always contain 0 when the PyPy GIL is # released. It should also contain 0 when regular RPython code # executes. In non-cpyext-related code, it will thus always be 0. +# When cpyext-related C code runs, it contains the thread id (usually) +# or the value -1 (only for state.C.PyXxx() functions which are short- +# running and should not themselves release the GIL). # # **make_generic_cpy_call():** RPython to C, with the GIL held. Before # the call, must assert that the global variable is 0 and set the @@ -972,8 +975,14 @@ # see "Handling of the GIL" above (careful, we don't have the GIL here) tid = rthread.get_or_make_ident() - _gil_auto = (gil_auto_workaround and cpyext_glob_tid_ptr[0] != tid) - if gil_acquire or _gil_auto: + _gil_auto = False + if gil_auto_workaround and cpyext_glob_tid_ptr[0] != tid: + # replace '-1' with the real tid, now that we have the tid + if cpyext_glob_tid_ptr[0] == -1: + cpyext_glob_tid_ptr[0] = tid + else: + _gil_auto = True + if _gil_auto or gil_acquire: if cpyext_glob_tid_ptr[0] == tid: deadlock_error(pname) rgil.acquire() diff --git a/pypy/module/cpyext/intobject.py b/pypy/module/cpyext/intobject.py --- a/pypy/module/cpyext/intobject.py +++ b/pypy/module/cpyext/intobject.py @@ -35,7 +35,7 @@ # value. However, it's just easier to call PyInt_FromLong with a dummy # value; make sure it's big enough to avoid the smallint optimization # (if it will ever be enabled) - return state.C.PyInt_FromLong(0x0DEADBEE) + return state.ccall("PyInt_FromLong", 0x0DEADBEE) else: return BaseCpyTypedescr.allocate(typedescr, space, w_type, itemcount) diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py --- a/pypy/module/cpyext/pyerrors.py +++ b/pypy/module/cpyext/pyerrors.py @@ -123,7 +123,9 @@ error indicator.""" raise oefmt(space.w_TypeError, "bad argument type for built-in operation") - at cpython_api([], lltype.Void) +# NB. this returns 'void' in CPython, but we can't easily, otherwise the +# function is supposed not to fail + at cpython_api([], rffi.INT_real, error=-1) def PyErr_BadInternalCall(space): raise oefmt(space.w_SystemError, "Bad internal call!") diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -307,7 +307,7 @@ if w_obj is not None and space.type(w_obj) is space.w_int: state = space.fromcache(State) intval = space.int_w(w_obj) - return state.C.PyInt_FromLong(intval) + return state.ccall("PyInt_FromLong", intval) return get_pyobj_and_incref(space, w_obj, w_userdata, immortal=False) @specialize.ll() diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -1,4 +1,4 @@ -from rpython.rlib.objectmodel import we_are_translated +from rpython.rlib.objectmodel import we_are_translated, specialize from rpython.rtyper.lltypesystem import rffi, lltype from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter import executioncontext @@ -45,6 +45,7 @@ self.operror = None return operror + @specialize.arg(1) def check_and_raise_exception(self, always=False): operror = self.operror if operror: @@ -168,6 +169,18 @@ self.extensions[path] = w_copy return w_mod + @specialize.arg(1) + def ccall(self, name, *args): + from pypy.module.cpyext.api import cpyext_glob_tid_ptr + # This is similar to doing a direct call to state.C.PyXxx(), but + # must be used for any function that might potentially call back + # RPython code---most of them can, e.g. PyErr_NoMemory(). + assert cpyext_glob_tid_ptr[0] == 0 + cpyext_glob_tid_ptr[0] = -1 + result = getattr(self.C, name)(*args) + cpyext_glob_tid_ptr[0] = 0 + return result + class CNamespace: def _freeze_(self): diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py --- a/pypy/module/cpyext/test/test_tupleobject.py +++ b/pypy/module/cpyext/test/test_tupleobject.py @@ -144,6 +144,46 @@ space.newtuple([space.wrap(i) for i in range(3, 7)])) +class AppTestBadInternalCall(AppTestCpythonExtensionBase): + def setup_method(self, meth): + from pypy.module.cpyext import tupleobject + if self.runappdirect: + py.test.skip("not for runappdirect") + AppTestCpythonExtensionBase.setup_method(self, meth) + tupleobject._BAD_ITEMCOUNT = 42 + + def teardown_method(self, meth): + from pypy.module.cpyext import tupleobject + AppTestCpythonExtensionBase.teardown_method(self, meth) + tupleobject._BAD_ITEMCOUNT = None + + def test_badinternalcall_from_rpy(self): + # This used to hit "a thread is trying to wait for the GIL" in + # thread_gil.c + module = self.import_extension('foo', [ + ("badinternalcall2", "METH_O", + """ + Py_INCREF(Py_None); + return Py_None; + """), + ]) + tup = (None,) * 42 + raises(SystemError, module.badinternalcall2, tup) + + def test_badinternalcall_from_rpy_with_threads(self): + # This used to cause a deadlock in thread_gil.c + module = self.import_extension('foo', [ + ("badinternalcall2", "METH_O", + """ + Py_INCREF(Py_None); + return Py_None; + """), + ]) + import thread; thread.start_new_thread(lambda: None, ()) + tup = (None,) * 42 + raises(SystemError, module.badinternalcall2, tup) + + class AppTestTuple(AppTestCpythonExtensionBase): def test_refcounts(self): module = self.import_extension('foo', [ diff --git a/pypy/module/cpyext/tupleobject.py b/pypy/module/cpyext/tupleobject.py --- a/pypy/module/cpyext/tupleobject.py +++ b/pypy/module/cpyext/tupleobject.py @@ -1,5 +1,6 @@ from pypy.interpreter.error import oefmt from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.debug import fatalerror_notb from pypy.module.cpyext.api import ( cpython_api, Py_ssize_t, build_type_checkers_flags, @@ -56,10 +57,17 @@ return (w_type is space.w_tuple or space.issubtype_w(w_type, space.w_tuple)) +_BAD_ITEMCOUNT = None # patched in test_badinternalcall_from_rpy + def tuple_alloc(typedescr, space, w_type, itemcount): state = space.fromcache(State) if w_type is space.w_tuple: - return state.C.PyTuple_New(itemcount) + if not we_are_translated() and itemcount == _BAD_ITEMCOUNT: + itemcount = -42 + ptup = state.ccall("PyTuple_New", itemcount) + if not ptup: + state.check_and_raise_exception(always=True) + return ptup else: return BaseCpyTypedescr.allocate(typedescr, space, w_type, itemcount) @@ -116,7 +124,9 @@ def tuple_from_args_w(space, args_w): state = space.fromcache(State) n = len(args_w) - py_tuple = state.C.PyTuple_New(n) # XXX: check for errors? + py_tuple = state.ccall("PyTuple_New", n) + if not py_tuple: + state.check_and_raise_exception(always=True) py_tuple = rffi.cast(PyTupleObject, py_tuple) for i, w_obj in enumerate(args_w): py_tuple.c_ob_item[i] = make_ref(space, w_obj) @@ -181,7 +191,10 @@ PyErr_BadInternalCall(space) oldref = rffi.cast(PyTupleObject, ref) oldsize = oldref.c_ob_size - p_ref[0] = state.C.PyTuple_New(newsize) + ptup = state.ccall("PyTuple_New", newsize) + if not ptup: + state.check_and_raise_exception(always=True) + p_ref[0] = ptup newref = rffi.cast(PyTupleObject, p_ref[0]) try: if oldsize < newsize: From pypy.commits at gmail.com Sat Dec 16 04:34:05 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 16 Dec 2017 01:34:05 -0800 (PST) Subject: [pypy-commit] pypy cpyext-avoid-roundtrip: This should be fixed now Message-ID: <5a34e88d.3786df0a.ccb7e.c592@mx.google.com> Author: Armin Rigo Branch: cpyext-avoid-roundtrip Changeset: r93442:614a92d3bfbd Date: 2017-12-16 10:33 +0100 http://bitbucket.org/pypy/pypy/changeset/614a92d3bfbd/ Log: This should be fixed now diff --git a/pypy/module/cpyext/tupleobject.py b/pypy/module/cpyext/tupleobject.py --- a/pypy/module/cpyext/tupleobject.py +++ b/pypy/module/cpyext/tupleobject.py @@ -28,10 +28,6 @@ ## Then the macro PyTuple_GET_ITEM can be implemented like CPython. ## -# CCC: we have a problem in the branch: some of the functions defined in C, -# like PyTuple_New, might call PyErr_*, but since these are implemented in -# RPython, they are not GIL-safe. We need to think about it :( - PyTupleObjectStruct = lltype.ForwardReference() PyTupleObject = lltype.Ptr(PyTupleObjectStruct) ObjectItems = rffi.CArray(PyObject) From pypy.commits at gmail.com Sat Dec 16 04:47:52 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 16 Dec 2017 01:47:52 -0800 (PST) Subject: [pypy-commit] pypy cpyext-avoid-roundtrip: hg merge default Message-ID: <5a34ebc8.638fdf0a.9333.07bc@mx.google.com> Author: Armin Rigo Branch: cpyext-avoid-roundtrip Changeset: r93443:99807e834fb9 Date: 2017-12-16 10:35 +0100 http://bitbucket.org/pypy/pypy/changeset/99807e834fb9/ Log: hg merge default diff too long, truncating to 2000 out of 11374 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -71,6 +71,8 @@ ^lib_pypy/.+.c$ ^lib_pypy/.+.o$ ^lib_pypy/.+.so$ +^lib_pypy/.+.pyd$ +^lib_pypy/Release/ ^pypy/doc/discussion/.+\.html$ ^include/.+\.h$ ^include/.+\.inl$ diff --git a/_pytest/terminal.py b/_pytest/terminal.py --- a/_pytest/terminal.py +++ b/_pytest/terminal.py @@ -366,11 +366,11 @@ EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR, EXIT_NOTESTSCOLLECTED) if exitstatus in summary_exit_codes: - self.config.hook.pytest_terminal_summary(terminalreporter=self) self.summary_errors() self.summary_failures() self.summary_warnings() self.summary_passes() + self.config.hook.pytest_terminal_summary(terminalreporter=self) if exitstatus == EXIT_INTERRUPTED: self._report_keyboardinterrupt() del self._keyboardinterrupt_memo diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_bytes.py @@ -0,0 +1,84 @@ +from hypothesis import strategies as st +from hypothesis import given, example + +st_bytestring = st.binary() | st.binary().map(bytearray) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st_bytestring, st_bytestring) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_startswith_start(u, v, start): + expected = u[start:].startswith(v) if v else (start <= len(u)) + assert u.startswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st_bytestring, st_bytestring) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_endswith_2(u, v, start): + expected = u[start:].endswith(v) if v else (start <= len(u)) + assert u.endswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/extra_tests/test_json.py b/extra_tests/test_json.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_json.py @@ -0,0 +1,33 @@ +import pytest +import json +from hypothesis import given, strategies + +def is_(x, y): + return type(x) is type(y) and x == y + +def test_no_ensure_ascii(): + assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"') + assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"') + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps((u"\u1234", "\xc0"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps(("\xc0", u"\u1234"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") + +def test_issue2191(): + assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') + +jsondata = strategies.recursive( + strategies.none() | + strategies.booleans() | + strategies.floats(allow_nan=False) | + strategies.text(), + lambda children: strategies.lists(children) | + strategies.dictionaries(strategies.text(), children)) + + at given(jsondata) +def test_roundtrip(d): + assert json.loads(json.dumps(d)) == d diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,48 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper +import os + +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) + + at st.composite +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) + + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data + textio = TextIOWrapper( + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) + lines = [] + for limit in limits: + line = textio.readline(limit) + if limit >= 0: + assert len(line) <= limit + if line: + lines.append(line) + elif limit: + break + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py --- a/extra_tests/test_unicode.py +++ b/extra_tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys import pytest from hypothesis import strategies as st from hypothesis import given, settings, example @@ -32,3 +33,89 @@ @given(s=st.text()) def test_composition(s, norm1, norm2, norm3): assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) + + at given(st.text(), st.text(), st.text()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.text(), st.text()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_startswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].startswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.startswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.text(), st.text()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_endswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].endswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.endswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py --- a/lib-python/2.7/inspect.py +++ b/lib-python/2.7/inspect.py @@ -40,6 +40,10 @@ import linecache from operator import attrgetter from collections import namedtuple +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # These constants are from Include/code.h. CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8 @@ -230,7 +234,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py --- a/lib-python/2.7/test/test_urllib2net.py +++ b/lib-python/2.7/test/test_urllib2net.py @@ -286,7 +286,7 @@ self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120) u.close() - FTP_HOST = 'ftp://ftp.debian.org/debian/' + FTP_HOST = 'ftp://www.pythontest.net/' def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py --- a/lib-python/2.7/warnings.py +++ b/lib-python/2.7/warnings.py @@ -43,11 +43,12 @@ unicodetype = unicode except NameError: unicodetype = () + template = "%s: %s: %s\n" try: message = str(message) except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) + template = unicode(template) + s = template % (lineno, category.__name__, message) line = linecache.getline(filename, lineno) if line is None else line if line: line = line.strip() diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -8,60 +8,63 @@ class ArrayMeta(_CDataMeta): def __new__(self, name, cls, typedict): res = type.__new__(self, name, cls, typedict) - if '_type_' in typedict: - ffiarray = _rawffi.Array(typedict['_type_']._ffishape_) - res._ffiarray = ffiarray - subletter = getattr(typedict['_type_'], '_type_', None) - if subletter == 'c': - def getvalue(self): - return _rawffi.charp2string(self._buffer.buffer, - self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, str): - _rawffi.rawstring2charp(self._buffer.buffer, val) - else: - for i in range(len(val)): - self[i] = val[i] - if len(val) < self._length_: - self._buffer[len(val)] = '\x00' - res.value = property(getvalue, setvalue) - def getraw(self): - return _rawffi.charp2rawstring(self._buffer.buffer, - self._length_) + if cls == (_CData,): # this is the Array class defined below + res._ffiarray = None + return res + if not hasattr(res, '_length_') or not isinstance(res._length_, int): + raise AttributeError( + "class must define a '_length_' attribute, " + "which must be a positive integer") + ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_) + subletter = getattr(res._type_, '_type_', None) + if subletter == 'c': + def getvalue(self): + return _rawffi.charp2string(self._buffer.buffer, + self._length_) + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, str): + _rawffi.rawstring2charp(self._buffer.buffer, val) + else: + for i in range(len(val)): + self[i] = val[i] + if len(val) < self._length_: + self._buffer[len(val)] = b'\x00' + res.value = property(getvalue, setvalue) - def setraw(self, buffer): - if len(buffer) > self._length_: - raise ValueError("%r too long" % (buffer,)) - _rawffi.rawstring2charp(self._buffer.buffer, buffer) - res.raw = property(getraw, setraw) - elif subletter == 'u': - def getvalue(self): - return _rawffi.wcharp2unicode(self._buffer.buffer, - self._length_) + def getraw(self): + return _rawffi.charp2rawstring(self._buffer.buffer, + self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, unicode): - target = self._buffer - else: - target = self - for i in range(len(val)): - target[i] = val[i] - if len(val) < self._length_: - target[len(val)] = u'\x00' - res.value = property(getvalue, setvalue) - - if '_length_' in typedict: - res._ffishape_ = (ffiarray, typedict['_length_']) - res._fficompositesize_ = res._sizeofinstances() - else: - res._ffiarray = None + def setraw(self, buffer): + if len(buffer) > self._length_: + raise ValueError("%r too long" % (buffer,)) + _rawffi.rawstring2charp(self._buffer.buffer, buffer) + res.raw = property(getraw, setraw) + elif subletter == 'u': + def getvalue(self): + return _rawffi.wcharp2unicode(self._buffer.buffer, + self._length_) + + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, unicode): + target = self._buffer + else: + target = self + for i in range(len(val)): + target[i] = val[i] + if len(val) < self._length_: + target[len(val)] = u'\x00' + res.value = property(getvalue, setvalue) + + res._ffishape_ = (ffiarray, res._length_) + res._fficompositesize_ = res._sizeofinstances() return res from_address = cdata_from_address @@ -156,7 +159,7 @@ l = [self[i] for i in range(start, stop, step)] letter = getattr(self._type_, '_type_', None) if letter == 'c': - return "".join(l) + return b"".join(l) if letter == 'u': return u"".join(l) return l diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -176,6 +176,10 @@ def _get_buffer_value(self): return self._buffer[0] + def _copy_to(self, addr): + target = type(self).from_address(addr)._buffer + target[0] = self._get_buffer_value() + def _to_ffi_param(self): if self.__class__._is_pointer_like(): return self._get_buffer_value() diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -114,7 +114,9 @@ cobj = self._type_.from_param(value) if ensure_objects(cobj) is not None: store_reference(self, index, cobj._objects) - self._subarray(index)[0] = cobj._get_buffer_value() + address = self._buffer[0] + address += index * sizeof(self._type_) + cobj._copy_to(address) def __nonzero__(self): return self._buffer[0] != 0 diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -291,6 +291,11 @@ def _get_buffer_value(self): return self._buffer.buffer + def _copy_to(self, addr): + from ctypes import memmove + origin = self._get_buffer_value() + memmove(addr, origin, self._fficompositesize_) + def _to_ffi_param(self): return self._buffer diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -1027,21 +1027,25 @@ if '\0' in sql: raise ValueError("the query contains a null character") - first_word = sql.lstrip().split(" ")[0].upper() - if first_word == "": + + if sql: + first_word = sql.lstrip().split()[0].upper() + if first_word == '': + self._type = _STMT_TYPE_INVALID + if first_word == "SELECT": + self._type = _STMT_TYPE_SELECT + elif first_word == "INSERT": + self._type = _STMT_TYPE_INSERT + elif first_word == "UPDATE": + self._type = _STMT_TYPE_UPDATE + elif first_word == "DELETE": + self._type = _STMT_TYPE_DELETE + elif first_word == "REPLACE": + self._type = _STMT_TYPE_REPLACE + else: + self._type = _STMT_TYPE_OTHER + else: self._type = _STMT_TYPE_INVALID - elif first_word == "SELECT": - self._type = _STMT_TYPE_SELECT - elif first_word == "INSERT": - self._type = _STMT_TYPE_INSERT - elif first_word == "UPDATE": - self._type = _STMT_TYPE_UPDATE - elif first_word == "DELETE": - self._type = _STMT_TYPE_DELETE - elif first_word == "REPLACE": - self._type = _STMT_TYPE_REPLACE - else: - self._type = _STMT_TYPE_OTHER if isinstance(sql, unicode): sql = sql.encode('utf-8') diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -16,4 +16,10 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py --- a/lib_pypy/_tkinter/app.py +++ b/lib_pypy/_tkinter/app.py @@ -119,7 +119,7 @@ tklib.TCL_GLOBAL_ONLY) # This is used to get the application class for Tk 4.1 and up - argv0 = className.lower() + argv0 = className.lower().encode('ascii') tklib.Tcl_SetVar(self.interp, "argv0", argv0, tklib.TCL_GLOBAL_ONLY) @@ -180,6 +180,9 @@ if err == tklib.TCL_ERROR: self.raiseTclError() + def interpaddr(self): + return int(tkffi.cast('size_t', self.interp)) + def _var_invoke(self, func, *args, **kwargs): if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread(): # The current thread is not the interpreter thread. diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -149,7 +149,7 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X:: +On Mac OS X: Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -355,7 +355,11 @@ containers (as list items or in sets for example), the exact rule of equality used is "``if x is y or x == y``" (on both CPython and PyPy); as a consequence, because all ``nans`` are identical in PyPy, you -cannot have several of them in a set, unlike in CPython. (Issue `#1974`__) +cannot have several of them in a set, unlike in CPython. (Issue `#1974`__). +Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because +``cmp`` checks with ``is`` first whether the arguments are identical (there is +no good value to return from this call to ``cmp``, because ``cmp`` pretends +that there is a total order on floats, but that is wrong for NaNs). .. __: https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -182,6 +182,57 @@ technical difficulties. +What about numpy, numpypy, micronumpy? +-------------------------------------- + +Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy. It +has two pieces: + + * the builtin module :source:`pypy/module/micronumpy`: this is written in + RPython and roughly covers the content of the ``numpy.core.multiarray`` + module. Confusingly enough, this is available in PyPy under the name + ``_numpypy``. It is included by default in all the official releases of + PyPy (but it might be dropped in the future). + + * a fork_ of the official numpy repository maintained by us and informally + called ``numpypy``: even more confusing, the name of the repo on bitbucket + is ``numpy``. The main difference with the upstream numpy, is that it is + based on the micronumpy module written in RPython, instead of of + ``numpy.core.multiarray`` which is written in C. + +Moreover, it is also possible to install the upstream version of ``numpy``: +its core is written in C and it runs on PyPy under the cpyext compatibility +layer. This is what you get if you do ``pypy -m pip install numpy``. + + +Should I install numpy or numpypy? +----------------------------------- + +TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip +install numpy``. You might also be interested in using the experimental `PyPy +binary wheels`_ to save compilation time. + +The upstream ``numpy`` is written in C, and runs under the cpyext +compatibility layer. Nowadays, cpyext is mature enough that you can simply +use the upstream ``numpy``, since it passes 99.9% of the test suite. At the +moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext +is infamously slow, and thus it has worse performance compared to +``numpypy``. However, we are actively working on improving it, as we expect to +reach the same speed, eventually. + +On the other hand, ``numpypy`` is more JIT-friendly and very fast to call, +since it is written in RPython: but it is a reimplementation, and it's hard to +be completely compatible: over the years the project slowly matured and +eventually it was able to call out to the LAPACK and BLAS libraries to speed +matrix calculations, and reached around an 80% parity with the upstream +numpy. However, 80% is far from 100%. Since cpyext/numpy compatibility is +progressing fast, we have discontinued support for ``numpypy``. + +.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html +.. _fork: https://bitbucket.org/pypy/numpy +.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels + + Is PyPy more clever than CPython about Tail Calls? -------------------------------------------------- diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -62,7 +62,7 @@ * go to pypy/tool/release and run ``force-builds.py `` The following JIT binaries should be built, however, we need more buildbots - windows, linux-32, linux-64, osx64, armhf-raring, armhf-raspberrian, armel, + windows, linux-32, linux-64, osx64, armhf-raspberrian, armel, freebsd64 * wait for builds to complete, make sure there are no failures diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,12 +1,41 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - -.. branch: cppyy-packaging -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch + +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch: rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst b/pypy/doc/whatsnew-pypy2-5.6.0.rst --- a/pypy/doc/whatsnew-pypy2-5.6.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst @@ -101,7 +101,7 @@ .. branch: newinitwarn -Match CPython's stricter handling of __new/init__ arguments +Match CPython's stricter handling of ``__new__``/``__init__`` arguments .. branch: openssl-1.1 diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -11,7 +11,7 @@ To build pypy-c you need a working python environment, and a C compiler. It is possible to translate with a CPython 2.6 or later, but this is not -the preferred way, because it will take a lot longer to run � depending +the preferred way, because it will take a lot longer to run – depending on your architecture, between two and three times as long. So head to `our downloads`_ and get the latest stable version. @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,31 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'" + binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1246,3 +1246,7 @@ exc = py.test.raises(SyntaxError, self.get_ast, input).value assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" " bytes in position 0-1: truncated \\xXX escape") + input = "u'\\x1'" + exc = py.test.raises(SyntaxError, self.get_ast, input).value + assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" + " bytes in position 0-2: truncated \\xXX escape") diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -85,13 +85,17 @@ # permissive parsing of the given list of tokens; it relies on # the real parsing done afterwards to give errors. it.skip_newlines() - it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") - if it.skip(pygram.tokens.STRING): - it.skip_newlines() - while (it.skip_name("from") and + docstring_possible = True + while True: + it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") + if docstring_possible and it.skip(pygram.tokens.STRING): + it.skip_newlines() + docstring_possible = False + if not (it.skip_name("from") and it.skip_name("__future__") and it.skip_name("import")): + break it.skip(pygram.tokens.LPAR) # optionally # return in 'last_position' any line-column pair that points # somewhere inside the last __future__ import statement diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py --- a/pypy/interpreter/pyparser/test/test_future.py +++ b/pypy/interpreter/pyparser/test/test_future.py @@ -208,3 +208,13 @@ 'from __future__ import with_statement;') f = run(s, (2, 23)) assert f == fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_WITH_STATEMENT + +def test_future_doc_future(): + # for some reason people do this :-[ + s = ''' +from __future__ import generators +"Docstring" +from __future__ import division + ''' + f = run(s, (4, 24)) + assert f == fut.CO_FUTURE_DIVISION | fut.CO_GENERATOR_ALLOWED diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,4 +1,7 @@ -from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8 +import pytest +import struct +from pypy.interpreter.unicodehelper import ( + encode_utf8, decode_utf8, unicode_encode_utf_32_be) class FakeSpace: pass @@ -24,3 +27,23 @@ assert map(ord, got) == [0xd800, 0xdc00] got = decode_utf8(space, "\xf0\x90\x80\x80") assert map(ord, got) == [0x10000] + + at pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"]) +def test_utf32_surrogates(unich): + assert (unicode_encode_utf_32_be(unich, 1, None) == + struct.pack('>i', ord(unich))) + with pytest.raises(UnicodeEncodeError): + unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False) + + def replace_with(ru, rs): + def errorhandler(errors, enc, msg, u, startingpos, endingpos): + if errors == 'strict': + raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg) + return ru, rs, endingpos + return unicode_encode_utf_32_be( + u"<%s>" % unich, 3, None, + errorhandler, allow_surrogates=False) + + assert replace_with(u'rep', None) == u''.encode('utf-32-be') + assert (replace_with(None, '\xca\xfe\xca\xfe') == + '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>') diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,7 +1,11 @@ +from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib import runicode +from rpython.rlib.runicode import ( + default_unicode_error_encode, default_unicode_error_decode, + MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR) from pypy.interpreter.error import OperationError -from rpython.rlib.objectmodel import specialize -from rpython.rlib import runicode -from pypy.module._codecs import interp_codecs @specialize.memo() def decode_error_handler(space): @@ -37,6 +41,7 @@ # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) result, consumed = runicode.str_decode_unicode_escape( @@ -71,3 +76,229 @@ uni, len(uni), "strict", errorhandler=None, allow_surrogates=True) + +# ____________________________________________________________ +# utf-32 + +def str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native") + return result, length + +def str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big") + return result, length + +def str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little") + return result, length + +def py3k_str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2) + return result, length + +def py3k_str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", 'utf-32-be') + return result, length + +def py3k_str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", 'utf-32-le') + return result, length + +BOM32_DIRECT = intmask(0x0000FEFF) +BOM32_REVERSE = intmask(0xFFFE0000) + +def str_decode_utf_32_helper(s, size, errors, final=True, + errorhandler=None, + byteorder="native", + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_decode + bo = 0 + + if BYTEORDER == 'little': + iorder = [0, 1, 2, 3] + else: + iorder = [3, 2, 1, 0] + + # Check for BOM marks (U+FEFF) in the input and adjust current + # byte order setting accordingly. In native mode, the leading BOM + # mark is skipped, in all other modes, it is copied to the output + # stream as-is (giving a ZWNBSP character). + pos = 0 + if byteorder == 'native': + if size >= 4: + bom = intmask( + (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) | + (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]])) + if BYTEORDER == 'little': + if bom == BOM32_DIRECT: + pos += 4 + bo = -1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = 1 + else: + if bom == BOM32_DIRECT: + pos += 4 + bo = 1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = -1 + elif byteorder == 'little': + bo = -1 + else: + bo = 1 + if size == 0: + return u'', 0, bo + if bo == -1: + # force little endian + iorder = [0, 1, 2, 3] + elif bo == 1: + # force big endian + iorder = [3, 2, 1, 0] + + result = UnicodeBuilder(size // 4) + + while pos < size: + # remaining bytes at the end? (size should be divisible by 4) + if len(s) - pos < 4: + if not final: + break + r, pos = errorhandler(errors, public_encoding_name, + "truncated data", + s, pos, len(s)) + result.append(r) + if len(s) - pos < 4: + break + continue + ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | + (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) + if ch >= 0x110000: + r, pos = errorhandler(errors, public_encoding_name, + "codepoint not in range(0x110000)", + s, pos, len(s)) + result.append(r) + continue + + if MAXUNICODE < 65536 and ch >= 0x10000: + ch -= 0x10000L + result.append(unichr(0xD800 + (ch >> 10))) + result.append(unichr(0xDC00 + (ch & 0x03FF))) + else: + result.append(UNICHR(ch)) + pos += 4 + return result.build(), pos, bo + +def _STORECHAR32(result, CH, byteorder): + c0 = chr(((CH) >> 24) & 0xff) + c1 = chr(((CH) >> 16) & 0xff) + c2 = chr(((CH) >> 8) & 0xff) + c3 = chr((CH) & 0xff) + if byteorder == 'little': + result.append(c3) + result.append(c2) + result.append(c1) + result.append(c0) + else: + result.append(c0) + result.append(c1) + result.append(c2) + result.append(c3) + +def unicode_encode_utf_32_helper(s, size, errors, + errorhandler=None, + allow_surrogates=True, + byteorder='little', + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_encode + if size == 0: + if byteorder == 'native': + result = StringBuilder(4) + _STORECHAR32(result, 0xFEFF, BYTEORDER) + return result.build() + return "" + + result = StringBuilder(size * 4 + 4) + if byteorder == 'native': + _STORECHAR32(result, 0xFEFF, BYTEORDER) + byteorder = BYTEORDER + + pos = 0 + while pos < size: + ch = ord(s[pos]) + pos += 1 + ch2 = 0 + if not allow_surrogates and 0xD800 <= ch < 0xE000: + ru, rs, pos = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + if rs is not None: + # py3k only + if len(rs) % 4 != 0: + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + result.append(rs) + continue + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR32(result, ord(ch), byteorder) + else: + errorhandler( + 'strict', public_encoding_name, + 'surrogates not allowed', s, pos - 1, pos) + continue + if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: + ch2 = ord(s[pos]) + if 0xDC00 <= ch2 < 0xE000: + ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 + pos += 1 + _STORECHAR32(result, ch, byteorder) + + return result.build() + +def unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native") + +def unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big") + +def unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little") + +def py3k_unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native", + 'utf-32-' + BYTEORDER2) + +def py3k_unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big", + 'utf-32-be') + +def py3k_unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little", + 'utf-32-le') diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -404,6 +404,7 @@ def test_cmp(self): + assert cmp(float('nan'), float('nan')) == 0 assert cmp(9,9) == 0 assert cmp(0,9) < 0 assert cmp(9,0) > 0 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,10 +1,12 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib import runicode from rpython.rlib.runicode import code_to_unichr, MAXUNICODE from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter import unicodehelper class VersionTag(object): @@ -66,20 +68,17 @@ "position %d from error handler out of bounds", newpos) replace = space.unicode_w(w_replace) - return replace, newpos + if decode: + return replace, newpos + else: + return replace, None, newpos return call_errorhandler def make_decode_errorhandler(self, space): return self._make_errorhandler(space, True) def make_encode_errorhandler(self, space): - errorhandler = self._make_errorhandler(space, False) - def encode_call_errorhandler(errors, encoding, reason, input, startpos, - endpos): - replace, newpos = errorhandler(errors, encoding, reason, input, - startpos, endpos) - return replace, None, newpos - return encode_call_errorhandler + return self._make_errorhandler(space, False) def get_unicodedata_handler(self, space): if self.unicodedata_handler: @@ -213,7 +212,8 @@ def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -239,7 +239,8 @@ def backslashreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -366,19 +367,23 @@ raise oefmt(space.w_TypeError, "handler must be callable") # ____________________________________________________________ -# delegation to runicode +# delegation to runicode/unicodehelper -from rpython.rlib import runicode +def _find_implementation(impl_name): + try: + func = getattr(unicodehelper, impl_name) + except AttributeError: + func = getattr(runicode, impl_name) + return func def make_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) + func = _find_implementation(rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) result = func(uni, len(uni), errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) wrap_encoder.func_name = rname @@ -386,7 +391,7 @@ def make_decoder_wrapper(name): rname = "str_decode_%s" % (name.replace("_decode", ""), ) - assert hasattr(runicode, rname) + func = _find_implementation(rname) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): @@ -394,7 +399,6 @@ errors = 'strict' final = space.is_true(w_final) state = space.fromcache(CodecState) - func = getattr(runicode, rname) result, consumed = func(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newunicode(result), space.newint(consumed)]) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -115,10 +115,10 @@ raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 0x110000}) assert (charmap_decode("\x00\x01\x02", "strict", {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) assert (charmap_decode("\x00\x01\x02", "strict", {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) def test_escape_decode_errors(self): from _codecs import escape_decode as decode @@ -537,8 +537,12 @@ assert '\xff'.decode('utf-7', 'ignore') == '' assert '\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): - assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000' + def test_backslashreplace(self): + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected def test_badhandler(self): import codecs @@ -592,11 +596,11 @@ def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) - return (u"\x01", 1) + return (u"\x01", 4) codecs.register_error("test.hui", handler_unicodeinternal) res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: - assert res == u"\u0000\u0001\u0000" # UCS4 build + assert res == u"\u0000\u0001" # UCS4 build else: assert res == u"\x00\x00\x01\x00\x00" # UCS2 build @@ -750,3 +754,31 @@ assert _codecs.unicode_escape_decode(b) == (u'', 0) assert _codecs.raw_unicode_escape_decode(b) == (u'', 0) assert _codecs.unicode_internal_decode(b) == (u'', 0) + + def test_xmlcharrefreplace(self): + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace') + assert r == 'ሴ\x80⍅y\xab' + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace') + assert r == 'ሴ€⍅y«' + + def test_errorhandler_collection(self): + import _codecs + errors = [] + def record_error(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + errors.append(exc.object[exc.start:exc.end]) + return (u'', exc.end) + _codecs.register_error("test.record", record_error) + + sin = u"\xac\u1234\u1234\u20ac\u8000" + assert sin.encode("ascii", "test.record") == "" + assert errors == [sin] + + errors = [] + assert sin.encode("latin-1", "test.record") == "\xac" + assert errors == [u'\u1234\u1234\u20ac\u8000'] + + errors = [] + assert sin.encode("iso-8859-15", "test.record") == "\xac\xa4" + assert errors == [u'\u1234\u1234', u'\u8000'] diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() diff --git a/pypy/module/test_lib_pypy/test_greenlet.py b/pypy/module/_continuation/test/test_greenlet.py rename from pypy/module/test_lib_pypy/test_greenlet.py rename to pypy/module/_continuation/test/test_greenlet.py diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -8,6 +8,35 @@ cls.w_translated = cls.space.wrap( os.path.join(os.path.dirname(__file__), 'test_translated.py')) + cls.w_stack = cls.space.appexec([], """(): + import sys + def stack(f=None): + ''' + get the call-stack of the caller or the specified frame + ''' + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame cycle + res.append('...') + break + if f.f_code.co_name == 'runtest': + # if we are running with -A, cut all the stack above + # the test function + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + return stack + """) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) def test_new_empty(self): from _continuation import continulet @@ -290,66 +319,100 @@ def test_random_switching(self): from _continuation import continulet # + seen = [] + # def t1(c1): - return c1.switch() + seen.append(3) + res = c1.switch() + seen.append(6) + return res + # def s1(c1, n): + seen.append(2) assert n == 123 c2 = t1(c1) - return c1.switch('a') + 1 + seen.append(7) + res = c1.switch('a') + 1 + seen.append(10) + return res # def s2(c2, c1): + seen.append(5) res = c1.switch(c2) + seen.append(8) assert res == 'a' - return c2.switch('b') + 2 + res = c2.switch('b') + 2 + seen.append(12) + return res # def f(): + seen.append(1) c1 = continulet(s1, 123) c2 = continulet(s2, c1) c1.switch() + seen.append(4) res = c2.switch() + seen.append(9) assert res == 'b' res = c1.switch(1000) + seen.append(11) assert res == 1001 - return c2.switch(2000) + res = c2.switch(2000) + seen.append(13) + return res # res = f() assert res == 2002 + assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] def test_f_back(self): import sys from _continuation import continulet + stack = self.stack # - def g(c): + def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) + # + assert stack() == ['bar', 'foo', 'main', 'test_f_back'] c.switch(sys._getframe(1).f_back) - assert sys._getframe(2) is f3.f_back + # + assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] + assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) - def f(c): - g(c) + def foo(c): + bar(c) # - c = continulet(f) - f1 = c.switch() - assert f1.f_code.co_name == 'g' - f2 = c.switch() - assert f2.f_code.co_name == 'f' - f3 = c.switch() - assert f3 is f2 - assert f1.f_back is f3 + assert stack() == ['test_f_back'] + c = continulet(foo) + f1_bar = c.switch() + assert f1_bar.f_code.co_name == 'bar' + f2_foo = c.switch() + assert f2_foo.f_code.co_name == 'foo' + f3_foo = c.switch() + assert f3_foo is f2_foo + assert f1_bar.f_back is f3_foo + # def main(): - f4 = c.switch() - assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f4_main = c.switch() + assert f4_main.f_code.co_name == 'main' + assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main', 'test_f_back'] + assert stack(f1_bar) == ['bar', 'foo', '...'] + # def main2(): - f5 = c.switch() - assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f5_main2 = c.switch() + assert f5_main2.f_code.co_name == 'main2' + assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack(f1_bar) == ['bar', 'foo', '...'] + # main() main2() res = c.switch() assert res is None - assert f3.f_back is None + assert f3_foo.f_back is None def test_traceback_is_complete(self): import sys diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -5,6 +5,7 @@ py.test.skip("to run on top of a translated pypy-c") import sys, random +from rpython.tool.udir import udir # ____________________________________________________________ @@ -92,6 +93,33 @@ from pypy.conftest import option if not option.runappdirect: py.test.skip("meant only for -A run") + cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof'))) + + def test_vmprof(self): + """ + The point of this test is to check that we do NOT segfault. In + particular, we need to ensure that vmprof does not sample the stack in + the middle of a switch, else we read nonsense. + """ + try: + import _vmprof + except ImportError: + py.test.skip("no _vmprof") + # + def switch_forever(c): + while True: + c.switch() + # + f = open(self.vmprof_file, 'w+b') + _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False) + c = _continuation.continulet(switch_forever) + for i in range(10**7): + if i % 100000 == 0: + print i + c.switch() + _vmprof.disable() + f.close() + def _setup(): for _i in range(20): diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -7,7 +7,7 @@ interpleveldefs = { '_resolve_name' : 'interp_cppyy.resolve_name', '_scope_byname' : 'interp_cppyy.scope_byname', - '_template_byname' : 'interp_cppyy.template_byname', + '_is_template' : 'interp_cppyy.is_template', '_std_string_name' : 'interp_cppyy.std_string_name', '_set_class_generator' : 'interp_cppyy.set_class_generator', '_set_function_generator': 'interp_cppyy.set_function_generator', @@ -15,7 +15,9 @@ '_get_nullptr' : 'interp_cppyy.get_nullptr', 'CPPClassBase' : 'interp_cppyy.W_CPPClass', 'addressof' : 'interp_cppyy.addressof', + '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', + 'move' : 'interp_cppyy.move', } appleveldefs = { diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -217,7 +217,8 @@ 'method_req_args' : ([c_scope, c_index], c_int), 'method_arg_type' : ([c_scope, c_index, c_int], c_ccharp), 'method_arg_default' : ([c_scope, c_index, c_int], c_ccharp), - 'method_signature' : ([c_scope, c_index], c_ccharp), + 'method_signature' : ([c_scope, c_index, c_int], c_ccharp), + 'method_prototype' : ([c_scope, c_index, c_int], c_ccharp), 'method_is_template' : ([c_scope, c_index], c_int), 'method_num_template_args' : ([c_scope, c_index], c_int), @@ -498,9 +499,12 @@ def c_method_arg_default(space, cppscope, index, arg_index): args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_default', args)) -def c_method_signature(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] +def c_method_signature(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_signature', args)) +def c_method_prototype(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] + return charp2str_free(space, call_capi(space, 'method_prototype', args)) def c_method_is_template(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -4,7 +4,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat -from rpython.rlib import rfloat +from rpython.rlib import rfloat, rawrefcount from pypy.module._rawffi.interp_rawffi import letter2tp from pypy.module._rawffi.array import W_Array, W_ArrayInstance @@ -21,9 +21,9 @@ # match for the qualified type. -def get_rawobject(space, w_obj): +def get_rawobject(space, w_obj, can_be_None=True): from pypy.module._cppyy.interp_cppyy import W_CPPClass - cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True) + cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None) if cppinstance: rawobject = cppinstance.get_rawobject() assert lltype.typeOf(rawobject) == capi.C_OBJECT @@ -48,17 +48,16 @@ return capi.C_NULL_OBJECT def is_nullpointer_specialcase(space, w_obj): - # 0, None, and nullptr may serve as "NULL", check for any of them + # 0 and nullptr may serve as "NULL" # integer 0 try: return space.int_w(w_obj) == 0 except Exception: pass - # None or nullptr + # C++-style nullptr from pypy.module._cppyy import interp_cppyy - return space.is_true(space.is_(w_obj, space.w_None)) or \ - space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) + return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) def get_rawbuffer(space, w_obj): # raw buffer @@ -74,7 +73,7 @@ return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) except Exception: pass - # pre-defined NULL + # pre-defined nullptr if is_nullpointer_specialcase(space, w_obj): return rffi.cast(rffi.VOIDP, 0) raise TypeError("not an addressable buffer") @@ -392,6 +391,7 @@ _immutable_fields_ = ['typecode'] typecode = 'g' + class CStringConverter(TypeConverter): def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.LONGP, address) @@ -408,18 +408,27 @@ def free_argument(self, space, arg, call_local): lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw') +class CStringConverterWithSize(CStringConverter): + _immutable_fields_ = ['size'] + + def __init__(self, space, extra): + self.size = extra + + def from_memory(self, space, w_obj, w_pycppclass, offset): + address = self._get_raw_address(space, w_obj, offset) + charpptr = rffi.cast(rffi.CCHARP, address) + strsize = self.size + if charpptr[self.size-1] == '\0': + strsize = self.size-1 # rffi will add \0 back + return space.newbytes(rffi.charpsize2str(charpptr, strsize)) + class VoidPtrConverter(TypeConverter): def _unwrap_object(self, space, w_obj): try: obj = get_rawbuffer(space, w_obj) except TypeError: - try: - # TODO: accept a 'capsule' rather than naked int - # (do accept int(0), though) - obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj)) - except Exception: - obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) + obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False)) return obj def cffi_type(self, space): @@ -463,12 +472,12 @@ def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.VOIDPP, address) ba = rffi.cast(rffi.CCHARP, address) - r = rffi.cast(rffi.VOIDPP, call_local) try: - r[0] = get_rawbuffer(space, w_obj) + x[0] = get_rawbuffer(space, w_obj) except TypeError: + r = rffi.cast(rffi.VOIDPP, call_local) r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) - x[0] = rffi.cast(rffi.VOIDP, call_local) + x[0] = rffi.cast(rffi.VOIDP, call_local) ba[capi.c_function_arg_typeoffset(space)] = self.typecode def finalize_call(self, space, w_obj, call_local): @@ -495,9 +504,13 @@ def _unwrap_object(self, space, w_obj): from pypy.module._cppyy.interp_cppyy import W_CPPClass if isinstance(w_obj, W_CPPClass): - if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl): + from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + # reject moves as all are explicit + raise ValueError("lvalue expected") + if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl): rawobject = w_obj.get_rawobject() - offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1) + offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1) obj_address = capi.direct_ptradd(rawobject, offset) return rffi.cast(capi.C_OBJECT, obj_address) raise oefmt(space.w_TypeError, @@ -518,6 +531,17 @@ x = rffi.cast(rffi.VOIDPP, address) x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj)) +class InstanceMoveConverter(InstanceRefConverter): + def _unwrap_object(self, space, w_obj): + # moving is same as by-ref, but have to check that move is allowed + from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE + if isinstance(w_obj, W_CPPClass): + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE + return InstanceRefConverter._unwrap_object(self, space, w_obj) + raise oefmt(space.w_ValueError, "object is not an rvalue") + + class InstanceConverter(InstanceRefConverter): def convert_argument_libffi(self, space, w_obj, address, call_local): @@ -527,7 +551,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): self._is_abstract(space) @@ -548,7 +572,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset)) @@ -582,8 +606,8 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, - do_cast=False, is_ref=True) + return interp_cppyy.wrap_cppinstance( + space, address, self.clsdecl, do_cast=False, is_ref=True) class StdStringConverter(InstanceConverter): @@ -606,7 +630,7 @@ assign = self.clsdecl.get_overload("__assign__") from pypy.module._cppyy import interp_cppyy assign.call( - interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value]) + interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value]) except Exception: InstanceConverter.to_memory(self, space, w_obj, w_value, offset) @@ -672,7 +696,7 @@ _converters = {} # builtin and custom types _a_converters = {} # array and ptr versions of above -def get_converter(space, name, default): +def get_converter(space, _name, default): # The matching of the name to a converter should follow: # 1) full, exact match # 1a) const-removed match @@ -680,9 +704,9 @@ # 3) accept ref as pointer (for the stubs, const& can be # by value, but that does not work for the ffi path) # 4) generalized cases (covers basically all user classes) - # 5) void converter, which fails on use + # 5) void* or void converter (which fails on use) - name = capi.c_resolve_name(space, name) + name = capi.c_resolve_name(space, _name) # 1) full, exact match try: @@ -701,7 +725,7 @@ clean_name = capi.c_resolve_name(space, helper.clean_type(name)) try: # array_index may be negative to indicate no size or no size found - array_size = helper.array_size(name) + array_size = helper.array_size(_name) # uses original arg return _a_converters[clean_name+compound](space, array_size) except KeyError: pass @@ -719,6 +743,8 @@ return InstancePtrConverter(space, clsdecl) elif compound == "&": return InstanceRefConverter(space, clsdecl) + elif compound == "&&": + return InstanceMoveConverter(space, clsdecl) elif compound == "**": return InstancePtrPtrConverter(space, clsdecl) elif compound == "": @@ -726,11 +752,13 @@ elif capi.c_is_enum(space, clean_name): return _converters['unsigned'](space, default) - # 5) void converter, which fails on use - # + # 5) void* or void converter (which fails on use) + if 0 <= compound.find('*'): + return VoidPtrConverter(space, default) # "user knows best" + # return a void converter here, so that the class can be build even - # when some types are unknown; this overload will simply fail on use - return VoidConverter(space, name) + # when some types are unknown + return VoidConverter(space, name) # fails on use _converters["bool"] = BoolConverter @@ -847,6 +875,10 @@ for name in names: _a_converters[name+'[]'] = ArrayConverter _a_converters[name+'*'] = PtrConverter + + # special case, const char* w/ size and w/o '\0' + _a_converters["const char[]"] = CStringConverterWithSize + _build_array_converters() # add another set of aliased names diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -159,7 +159,7 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) return pyres def execute_libffi(self, space, cif_descr, funcaddr, buffer): @@ -167,7 +167,7 @@ result = rffi.ptradd(buffer, cif_descr.exchange_result) from pypy.module._cppyy import interp_cppyy ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) class InstancePtrPtrExecutor(InstancePtrExecutor): From pypy.commits at gmail.com Sat Dec 16 04:47:56 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 16 Dec 2017 01:47:56 -0800 (PST) Subject: [pypy-commit] pypy cpyext-avoid-roundtrip: Found out how to have a Void-returning function raise Message-ID: <5a34ebcc.4f851c0a.1549d.4355@mx.google.com> Author: Armin Rigo Branch: cpyext-avoid-roundtrip Changeset: r93445:b7431c4cc863 Date: 2017-12-16 10:47 +0100 http://bitbucket.org/pypy/pypy/changeset/b7431c4cc863/ Log: Found out how to have a Void-returning function raise diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py --- a/pypy/module/cpyext/pyerrors.py +++ b/pypy/module/cpyext/pyerrors.py @@ -123,9 +123,7 @@ error indicator.""" raise oefmt(space.w_TypeError, "bad argument type for built-in operation") -# NB. this returns 'void' in CPython, but we can't easily, otherwise the -# function is supposed not to fail - at cpython_api([], rffi.INT_real, error=-1) + at cpython_api([], lltype.Void, error=None) def PyErr_BadInternalCall(space): raise oefmt(space.w_SystemError, "Bad internal call!") From pypy.commits at gmail.com Sat Dec 16 04:47:55 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 16 Dec 2017 01:47:55 -0800 (PST) Subject: [pypy-commit] pypy default: Found out how to have a Void-returning function raise Message-ID: <5a34ebcb.08691c0a.5bf25.cee7@mx.google.com> Author: Armin Rigo Branch: Changeset: r93444:4f856cec59aa Date: 2017-12-16 10:46 +0100 http://bitbucket.org/pypy/pypy/changeset/4f856cec59aa/ Log: Found out how to have a Void-returning function raise diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py --- a/pypy/module/cpyext/pyerrors.py +++ b/pypy/module/cpyext/pyerrors.py @@ -122,9 +122,7 @@ error indicator.""" raise oefmt(space.w_TypeError, "bad argument type for built-in operation") -# NB. this returns 'void' in CPython, but we can't easily, otherwise the -# function is supposed not to fail - at cpython_api([], rffi.INT_real, error=-1) + at cpython_api([], lltype.Void, error=None) def PyErr_BadInternalCall(space): raise oefmt(space.w_SystemError, "Bad internal call!") From pypy.commits at gmail.com Sat Dec 16 06:02:41 2017 From: pypy.commits at gmail.com (antocuni) Date: Sat, 16 Dec 2017 03:02:41 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: merge default Message-ID: <5a34fd51.cd5c1c0a.4c1d5.8362@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93446:92e4ca3c2daa Date: 2017-12-16 12:02 +0100 http://bitbucket.org/pypy/pypy/changeset/92e4ca3c2daa/ Log: merge default diff too long, truncating to 2000 out of 2875 lines diff --git a/pypy/module/test_lib_pypy/test_json_extra.py b/extra_tests/test_json.py rename from pypy/module/test_lib_pypy/test_json_extra.py rename to extra_tests/test_json.py --- a/pypy/module/test_lib_pypy/test_json_extra.py +++ b/extra_tests/test_json.py @@ -1,4 +1,6 @@ -import py, json +import pytest +import json +from hypothesis import given, strategies def is_(x, y): return type(x) is type(y) and x == y @@ -6,12 +8,26 @@ def test_no_ensure_ascii(): assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"') assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"') - e = py.test.raises(UnicodeDecodeError, json.dumps, - (u"\u1234", "\xc0"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") - e = py.test.raises(UnicodeDecodeError, json.dumps, - ("\xc0", u"\u1234"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps((u"\u1234", "\xc0"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps(("\xc0", u"\u1234"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") def test_issue2191(): assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') + +jsondata = strategies.recursive( + strategies.none() | + strategies.booleans() | + strategies.floats(allow_nan=False) | + strategies.text(), + lambda children: strategies.lists(children) | + strategies.dictionaries(strategies.text(), children)) + + at given(jsondata) +def test_roundtrip(d): + assert json.loads(json.dumps(d)) == d diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -1,28 +1,48 @@ from hypothesis import given, strategies as st from io import BytesIO, TextIOWrapper +import os -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(txt, mode, limit): + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data textio = TextIOWrapper( - BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) lines = [] - while True: + for limit in limits: line = textio.readline(limit) - if limit > 0: - assert len(line) < limit + if limit >= 0: + assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -149,7 +149,7 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X:: +On Mac OS X: Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -355,7 +355,11 @@ containers (as list items or in sets for example), the exact rule of equality used is "``if x is y or x == y``" (on both CPython and PyPy); as a consequence, because all ``nans`` are identical in PyPy, you -cannot have several of them in a set, unlike in CPython. (Issue `#1974`__) +cannot have several of them in a set, unlike in CPython. (Issue `#1974`__). +Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because +``cmp`` checks with ``is`` first whether the arguments are identical (there is +no good value to return from this call to ``cmp``, because ``cmp`` pretends +that there is a total order on floats, but that is wrong for NaNs). .. __: https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -62,7 +62,7 @@ * go to pypy/tool/release and run ``force-builds.py `` The following JIT binaries should be built, however, we need more buildbots - windows, linux-32, linux-64, osx64, armhf-raring, armhf-raspberrian, armel, + windows, linux-32, linux-64, osx64, armhf-raspberrian, armel, freebsd64 * wait for builds to complete, make sure there are no failures diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,30 +1,42 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - -.. branch: cppyy-packaging -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch -.. branch: fix-vmprof-stacklet-switch-2 -Fix vmprof+ continulet (i.e. greenelts, eventlet, gevent, ...) - +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +.. branch: fix-vmprof-stacklet-switch-2 +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch: rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. + diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst b/pypy/doc/whatsnew-pypy2-5.6.0.rst --- a/pypy/doc/whatsnew-pypy2-5.6.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst @@ -101,7 +101,7 @@ .. branch: newinitwarn -Match CPython's stricter handling of __new/init__ arguments +Match CPython's stricter handling of ``__new__``/``__init__`` arguments .. branch: openssl-1.1 diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -11,7 +11,7 @@ To build pypy-c you need a working python environment, and a C compiler. It is possible to translate with a CPython 2.6 or later, but this is not -the preferred way, because it will take a lot longer to run � depending +the preferred way, because it will take a lot longer to run – depending on your architecture, between two and three times as long. So head to `our downloads`_ and get the latest stable version. @@ -103,6 +103,7 @@ must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the ``...\9.0\VC`` directory, and edit it, changing the lines that set ``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ set WindowsSdkDir=%~dp0\..\WinSDK\ diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1246,3 +1246,7 @@ exc = py.test.raises(SyntaxError, self.get_ast, input).value assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" " bytes in position 0-1: truncated \\xXX escape") + input = "u'\\x1'" + exc = py.test.raises(SyntaxError, self.get_ast, input).value + assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" + " bytes in position 0-2: truncated \\xXX escape") diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,4 +1,7 @@ -from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8 +import pytest +import struct +from pypy.interpreter.unicodehelper import ( + encode_utf8, decode_utf8, unicode_encode_utf_32_be) class FakeSpace: pass @@ -24,3 +27,23 @@ assert map(ord, got) == [0xd800, 0xdc00] got = decode_utf8(space, "\xf0\x90\x80\x80") assert map(ord, got) == [0x10000] + + at pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"]) +def test_utf32_surrogates(unich): + assert (unicode_encode_utf_32_be(unich, 1, None) == + struct.pack('>i', ord(unich))) + with pytest.raises(UnicodeEncodeError): + unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False) + + def replace_with(ru, rs): + def errorhandler(errors, enc, msg, u, startingpos, endingpos): + if errors == 'strict': + raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg) + return ru, rs, endingpos + return unicode_encode_utf_32_be( + u"<%s>" % unich, 3, None, + errorhandler, allow_surrogates=False) + + assert replace_with(u'rep', None) == u''.encode('utf-32-be') + assert (replace_with(None, '\xca\xfe\xca\xfe') == + '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>') diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,7 +1,11 @@ +from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib import runicode +from rpython.rlib.runicode import ( + default_unicode_error_encode, default_unicode_error_decode, + MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR) from pypy.interpreter.error import OperationError -from rpython.rlib.objectmodel import specialize -from rpython.rlib import runicode -from pypy.module._codecs import interp_codecs @specialize.memo() def decode_error_handler(space): @@ -37,6 +41,7 @@ # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) result, consumed = runicode.str_decode_unicode_escape( @@ -71,3 +76,229 @@ uni, len(uni), "strict", errorhandler=None, allow_surrogates=True) + +# ____________________________________________________________ +# utf-32 + +def str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native") + return result, length + +def str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big") + return result, length + +def str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little") + return result, length + +def py3k_str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2) + return result, length + +def py3k_str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", 'utf-32-be') + return result, length + +def py3k_str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", 'utf-32-le') + return result, length + +BOM32_DIRECT = intmask(0x0000FEFF) +BOM32_REVERSE = intmask(0xFFFE0000) + +def str_decode_utf_32_helper(s, size, errors, final=True, + errorhandler=None, + byteorder="native", + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_decode + bo = 0 + + if BYTEORDER == 'little': + iorder = [0, 1, 2, 3] + else: + iorder = [3, 2, 1, 0] + + # Check for BOM marks (U+FEFF) in the input and adjust current + # byte order setting accordingly. In native mode, the leading BOM + # mark is skipped, in all other modes, it is copied to the output + # stream as-is (giving a ZWNBSP character). + pos = 0 + if byteorder == 'native': + if size >= 4: + bom = intmask( + (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) | + (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]])) + if BYTEORDER == 'little': + if bom == BOM32_DIRECT: + pos += 4 + bo = -1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = 1 + else: + if bom == BOM32_DIRECT: + pos += 4 + bo = 1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = -1 + elif byteorder == 'little': + bo = -1 + else: + bo = 1 + if size == 0: + return u'', 0, bo + if bo == -1: + # force little endian + iorder = [0, 1, 2, 3] + elif bo == 1: + # force big endian + iorder = [3, 2, 1, 0] + + result = UnicodeBuilder(size // 4) + + while pos < size: + # remaining bytes at the end? (size should be divisible by 4) + if len(s) - pos < 4: + if not final: + break + r, pos = errorhandler(errors, public_encoding_name, + "truncated data", + s, pos, len(s)) + result.append(r) + if len(s) - pos < 4: + break + continue + ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | + (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) + if ch >= 0x110000: + r, pos = errorhandler(errors, public_encoding_name, + "codepoint not in range(0x110000)", + s, pos, len(s)) + result.append(r) + continue + + if MAXUNICODE < 65536 and ch >= 0x10000: + ch -= 0x10000L + result.append(unichr(0xD800 + (ch >> 10))) + result.append(unichr(0xDC00 + (ch & 0x03FF))) + else: + result.append(UNICHR(ch)) + pos += 4 + return result.build(), pos, bo + +def _STORECHAR32(result, CH, byteorder): + c0 = chr(((CH) >> 24) & 0xff) + c1 = chr(((CH) >> 16) & 0xff) + c2 = chr(((CH) >> 8) & 0xff) + c3 = chr((CH) & 0xff) + if byteorder == 'little': + result.append(c3) + result.append(c2) + result.append(c1) + result.append(c0) + else: + result.append(c0) + result.append(c1) + result.append(c2) + result.append(c3) + +def unicode_encode_utf_32_helper(s, size, errors, + errorhandler=None, + allow_surrogates=True, + byteorder='little', + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_encode + if size == 0: + if byteorder == 'native': + result = StringBuilder(4) + _STORECHAR32(result, 0xFEFF, BYTEORDER) + return result.build() + return "" + + result = StringBuilder(size * 4 + 4) + if byteorder == 'native': + _STORECHAR32(result, 0xFEFF, BYTEORDER) + byteorder = BYTEORDER + + pos = 0 + while pos < size: + ch = ord(s[pos]) + pos += 1 + ch2 = 0 + if not allow_surrogates and 0xD800 <= ch < 0xE000: + ru, rs, pos = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + if rs is not None: + # py3k only + if len(rs) % 4 != 0: + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + result.append(rs) + continue + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR32(result, ord(ch), byteorder) + else: + errorhandler( + 'strict', public_encoding_name, + 'surrogates not allowed', s, pos - 1, pos) + continue + if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: + ch2 = ord(s[pos]) + if 0xDC00 <= ch2 < 0xE000: + ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 + pos += 1 + _STORECHAR32(result, ch, byteorder) + + return result.build() + +def unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native") + +def unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big") + +def unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little") + +def py3k_unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native", + 'utf-32-' + BYTEORDER2) + +def py3k_unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big", + 'utf-32-be') + +def py3k_unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little", + 'utf-32-le') diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -404,6 +404,7 @@ def test_cmp(self): + assert cmp(float('nan'), float('nan')) == 0 assert cmp(9,9) == 0 assert cmp(0,9) < 0 assert cmp(9,0) > 0 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,10 +1,12 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib import runicode from rpython.rlib.runicode import code_to_unichr, MAXUNICODE from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter import unicodehelper class VersionTag(object): @@ -210,7 +212,8 @@ def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -236,7 +239,8 @@ def backslashreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -363,19 +367,23 @@ raise oefmt(space.w_TypeError, "handler must be callable") # ____________________________________________________________ -# delegation to runicode +# delegation to runicode/unicodehelper -from rpython.rlib import runicode +def _find_implementation(impl_name): + try: + func = getattr(unicodehelper, impl_name) + except AttributeError: + func = getattr(runicode, impl_name) + return func def make_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) + func = _find_implementation(rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) result = func(uni, len(uni), errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) wrap_encoder.func_name = rname @@ -383,7 +391,7 @@ def make_decoder_wrapper(name): rname = "str_decode_%s" % (name.replace("_decode", ""), ) - assert hasattr(runicode, rname) + func = _find_implementation(rname) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): @@ -391,7 +399,6 @@ errors = 'strict' final = space.is_true(w_final) state = space.fromcache(CodecState) - func = getattr(runicode, rname) result, consumed = func(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newunicode(result), space.newint(consumed)]) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -115,10 +115,10 @@ raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 0x110000}) assert (charmap_decode("\x00\x01\x02", "strict", {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) assert (charmap_decode("\x00\x01\x02", "strict", {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) def test_escape_decode_errors(self): from _codecs import escape_decode as decode @@ -537,8 +537,12 @@ assert '\xff'.decode('utf-7', 'ignore') == '' assert '\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): - assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000' + def test_backslashreplace(self): + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected def test_badhandler(self): import codecs @@ -592,11 +596,11 @@ def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) - return (u"\x01", 1) + return (u"\x01", 4) codecs.register_error("test.hui", handler_unicodeinternal) res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: - assert res == u"\u0000\u0001\u0000" # UCS4 build + assert res == u"\u0000\u0001" # UCS4 build else: assert res == u"\x00\x00\x01\x00\x00" # UCS2 build @@ -750,3 +754,31 @@ assert _codecs.unicode_escape_decode(b) == (u'', 0) assert _codecs.raw_unicode_escape_decode(b) == (u'', 0) assert _codecs.unicode_internal_decode(b) == (u'', 0) + + def test_xmlcharrefreplace(self): + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace') + assert r == 'ሴ\x80⍅y\xab' + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace') + assert r == 'ሴ€⍅y«' + + def test_errorhandler_collection(self): + import _codecs + errors = [] + def record_error(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + errors.append(exc.object[exc.start:exc.end]) + return (u'', exc.end) + _codecs.register_error("test.record", record_error) + + sin = u"\xac\u1234\u1234\u20ac\u8000" + assert sin.encode("ascii", "test.record") == "" + assert errors == [sin] + + errors = [] + assert sin.encode("latin-1", "test.record") == "\xac" + assert errors == [u'\u1234\u1234\u20ac\u8000'] + + errors = [] + assert sin.encode("iso-8859-15", "test.record") == "\xac\xa4" + assert errors == [u'\u1234\u1234', u'\u8000'] diff --git a/pypy/module/test_lib_pypy/test_greenlet.py b/pypy/module/_continuation/test/test_greenlet.py rename from pypy/module/test_lib_pypy/test_greenlet.py rename to pypy/module/_continuation/test/test_greenlet.py diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -353,6 +353,7 @@ while scanned < limit: try: ch = self.next_char() + scanned += 1 except StopIteration: return False if ch == u'\n': @@ -413,6 +414,7 @@ if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" raise oefmt(space.w_TypeError, msg, w_decoded) + return w_decoded class W_TextIOWrapper(W_TextIOBase): @@ -737,7 +739,7 @@ remnant = None continue - if limit > 0: + if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: @@ -939,12 +941,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self.decoded.set(space, w_decoded) + w_decoded = check_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded.text) < cookie.chars_to_skip: + if space.len_w(w_decoded) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") + self.decoded.set(space, w_decoded) self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -957,10 +960,8 @@ def tell_w(self, space): self._check_closed(space) - if not self.seekable: raise oefmt(space.w_IOError, "underlying stream is not seekable") - if not self.telling: raise oefmt(space.w_IOError, "telling position disabled by next() call") @@ -1030,14 +1031,14 @@ # We didn't get enough decoded data; signal EOF to get more. w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(""), - space.newint(1)) # final=1 + space.newint(1)) # final=1 check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.need_eof = 1 if chars_decoded < chars_to_skip: raise oefmt(space.w_IOError, - "can't reconstruct logical file position") + "can't reconstruct logical file position") finally: space.call_method(self.w_decoder, "setstate", w_saved_state) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,40 +1,54 @@ import pytest try: - from hypothesis import given, strategies as st, assume + from hypothesis import given, strategies as st except ImportError: pytest.skip("hypothesis required") +import os from pypy.module._io.interp_bytesio import W_BytesIO from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace(u'\r\n', u'\n') + text = text.replace(u'\r', u'\n') + return text.replace(u'\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + fragments = [] + limits = [] + for _ in range(n_lines): + line = draw(st.text(st.characters(blacklist_characters=u'\r\n'))) + fragments.append(line) + ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n'])) + fragments.append(ending) + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + return (u''.join(fragments), limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(space, txt, mode, limit): - assume(limit != 0) + at given(data=st_readline(), + mode=st.sampled_from(['\r', '\n', '\r\n', ''])) +def test_readline(space, data, mode): + txt, limits = data w_stream = W_BytesIO(space) w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) w_textio = W_TextIOWrapper(space) w_textio.descr_init( - space, w_stream, encoding='utf-8', + space, w_stream, + encoding='utf-8', w_errors=space.newtext('surrogatepass'), w_newline=space.newtext(mode)) lines = [] - while True: - line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) - if limit > 0: + for limit in limits: + w_line = w_textio.readline_w(space, space.newint(limit)) + line = space.unicode_w(w_line) + if limit >= 0: assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + assert txt.startswith(u''.join(lines)) @given(st.text()) def test_read_buffer(text): diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -71,7 +71,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -49,24 +49,24 @@ first = 0 for i in range(first, len(u)): - c = u[i] - if c <= u'~': - if c == u'"' or c == u'\\': + c = ord(u[i]) + if c <= ord('~'): + if c == ord('"') or c == ord('\\'): sb.append('\\') - elif c < u' ': - sb.append(ESCAPE_BEFORE_SPACE[ord(c)]) + elif c < ord(' '): + sb.append(ESCAPE_BEFORE_SPACE[c]) continue - sb.append(chr(ord(c))) + sb.append(chr(c)) else: - if c <= u'\uffff': + if c <= ord(u'\uffff'): sb.append('\\u') - sb.append(HEX[ord(c) >> 12]) - sb.append(HEX[(ord(c) >> 8) & 0x0f]) - sb.append(HEX[(ord(c) >> 4) & 0x0f]) - sb.append(HEX[ord(c) & 0x0f]) + sb.append(HEX[c >> 12]) + sb.append(HEX[(c >> 8) & 0x0f]) + sb.append(HEX[(c >> 4) & 0x0f]) + sb.append(HEX[c & 0x0f]) else: # surrogate pair - n = ord(c) - 0x10000 + n = c - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -128,7 +128,7 @@ intval: lltype.Signed """ self.error(w_ffitype, w_obj) - + def handle_unichar(self, w_ffitype, w_obj, intval): """ intval: lltype.Signed @@ -174,7 +174,7 @@ def handle_struct_rawffi(self, w_ffitype, w_structinstance): """ This method should be killed as soon as we remove support for _rawffi structures - + w_structinstance: W_StructureInstance """ self.error(w_ffitype, w_structinstance) @@ -349,7 +349,7 @@ def get_struct_rawffi(self, w_ffitype, w_structdescr): """ This should be killed as soon as we kill support for _rawffi structures - + Return type: lltype.Unsigned (the address of the structure) """ diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -481,11 +481,13 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[0]) + start, end = self.do_span(w_groupnum) + return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[1]) + start, end = self.do_span(w_groupnum) + return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -87,6 +87,14 @@ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_findall_unicode(self): + import re + assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000") + assert ["a", "u"] == re.findall("b(.)", "abalbus") + assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") + assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + assert [u"xyz"] == re.findall(u".*yz", u"xyz") + def test_finditer(self): import re it = re.finditer("b(.)", "brabbel") @@ -999,3 +1007,15 @@ import re assert re.search(".+ab", "wowowowawoabwowo") assert None == re.search(".+ab", "wowowaowowo") + + +class AppTestUnicodeExtra: + def test_string_attribute(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.string == u"\u1233\u1234\u1235" + + def test_match_start(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.start() == 1 diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py --- a/pypy/module/cpyext/pyerrors.py +++ b/pypy/module/cpyext/pyerrors.py @@ -122,7 +122,7 @@ error indicator.""" raise oefmt(space.w_TypeError, "bad argument type for built-in operation") - at cpython_api([], lltype.Void) + at cpython_api([], lltype.Void, error=None) def PyErr_BadInternalCall(space): raise oefmt(space.w_SystemError, "Bad internal call!") diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unwrap(w_decoded) == u'späm' + assert space.unicode_w(w_decoded) == u'späm' rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -131,7 +131,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unwrap(w_a) == u'caf\xe9' + assert space.unicode_w(w_a) == u'caf\xe9' lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -8,7 +8,7 @@ from pypy.module.cpyext.object import ( PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString, PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr, - PyObject_GetItem, + PyObject_GetItem, PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor, PyObject_Hash, PyObject_Cmp, PyObject_Unicode ) @@ -209,9 +209,9 @@ PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr) def test_unicode(self, space, api): - assert space.unwrap(api.PyObject_Unicode(None)) == u"" - assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]" - assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e" + assert space.unicode_w(api.PyObject_Unicode(None)) == u"" + assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" + assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" with raises_w(space, UnicodeDecodeError): PyObject_Unicode(space, space.wrap("\xe9")) @@ -562,7 +562,7 @@ PyObject *a = PyTuple_GetItem(args, 0); PyObject *b = PyTuple_GetItem(args, 1); int res = PyObject_RichCompareBool(a, b, Py_EQ); - return PyLong_FromLong(res); + return PyLong_FromLong(res); """),]) a = float('nan') b = float('nan') diff --git a/pypy/module/cpyext/test/test_pyerrors.py b/pypy/module/cpyext/test/test_pyerrors.py --- a/pypy/module/cpyext/test/test_pyerrors.py +++ b/pypy/module/cpyext/test/test_pyerrors.py @@ -425,3 +425,15 @@ assert orig_exc_info == reset_sys_exc_info assert new_exc_info == (new_exc.__class__, new_exc, None) assert new_exc_info == new_sys_exc_info + + def test_PyErr_BadInternalCall(self): + # NB. it only seemed to fail when run with '-s'... but I think + # that it always printed stuff to stderr + module = self.import_extension('foo', [ + ("oops", "METH_NOARGS", + r''' + PyErr_BadInternalCall(); + return NULL; + '''), + ]) + raises(SystemError, module.oops) diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -178,7 +178,7 @@ array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unwrap(word)): + for (i, char) in enumerate(space.unicode_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char @@ -216,12 +216,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) Py_DecRef(space, res) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_unicode_resize(self, space): @@ -256,17 +256,17 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == u'sp\x134m' + assert space.unicode_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unicode_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8') + assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -364,18 +364,18 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unwrap( + assert space.unicode_w( PyUnicode_FromObject(space, space.wrap('test'))) == 'test' def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unwrap( + assert space.unicode_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unicode_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -391,7 +391,8 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text + assert space.unicode_w(PyUnicode_Decode( + space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) @@ -508,7 +509,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unwrap(w_res) == u'ab' + assert space.unicode_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -579,29 +580,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args)) + assert (space.unicode_w(w_formated) == + space.unicode_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unwrap(w_joined) == u'ab' + assert space.unicode_w(w_joined) == u'ab' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unwrap(w_char) == u'A' + assert space.unicode_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unwrap(w_char) == u'\0' + assert space.unicode_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unwrap(w_char) == u'\uFFFF' + assert space.unicode_w(w_char) == u'\uFFFF' def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unwrap( + assert u"zbzbabab" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unwrap( + assert u"zbzbzbzb" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -12,6 +12,7 @@ from pypy.module.cpyext.bytesobject import PyString_Check from pypy.module.sys.interp_encoding import setdefaultencoding from pypy.module._codecs.interp_codecs import CodecState +from pypy.interpreter import unicodehelper from pypy.objspace.std import unicodeobject from rpython.rlib import rstring, runicode from rpython.tool.sourcetools import func_renamer @@ -620,7 +621,7 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( + result, length, byteorder = unicodehelper.str_decode_utf_32_helper( string, size, errors, True, # final ? false for multiple passes? None, # errorhandler diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -31,9 +31,15 @@ pdir.join('file2').write("test2") pdir.join('another_longer_file_name').write("test3") mod.pdir = pdir - unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + if sys.platform == 'darwin': + # see issue https://bugs.python.org/issue31380 + unicode_dir = udir.ensure('fixc5x9fier.txt', dir=True) + file_name = 'cafxe9' + else: + unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + file_name = 'caf\xe9' unicode_dir.join('somefile').write('who cares?') - unicode_dir.join('caf\xe9').write('who knows?') + unicode_dir.join(file_name).write('who knows?') mod.unicode_dir = unicode_dir # in applevel tests, os.stat uses the CPython os.stat. diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -310,12 +310,19 @@ errno = rposix.get_saved_errno() return os.strerror(errno) +def _check_sleep_arg(space, secs): + from rpython.rlib.rfloat import isinf, isnan + if secs < 0: + raise oefmt(space.w_IOError, + "Invalid argument: negative time in sleep") + if isinf(secs) or isnan(secs): + raise oefmt(space.w_IOError, + "Invalid argument: inf or nan") + if sys.platform != 'win32': @unwrap_spec(secs=float) def sleep(space, secs): - if secs < 0: - raise oefmt(space.w_IOError, - "Invalid argument: negative time in sleep") + _check_sleep_arg(space, secs) rtime.sleep(secs) else: from rpython.rlib import rwin32 @@ -336,9 +343,7 @@ OSError(EINTR, "sleep() interrupted")) @unwrap_spec(secs=float) def sleep(space, secs): - if secs < 0: - raise oefmt(space.w_IOError, - "Invalid argument: negative time in sleep") + _check_sleep_arg(space, secs) # as decreed by Guido, only the main thread can be # interrupted. main_thread = space.fromcache(State).main_thread diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -19,6 +19,8 @@ raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(IOError, time.sleep, -1.0) + raises(IOError, time.sleep, float('nan')) + raises(IOError, time.sleep, float('inf')) def test_clock(self): import time diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -259,10 +259,10 @@ result[0] = ch if not composed: # If decomposed normalization we are done - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) if j <= 1: - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) current = result[0] starter_pos = 0 @@ -310,7 +310,10 @@ result[starter_pos] = current - return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]])) + return self.build(space, result, stop=next_insert) + + def build(self, space, r, stop): + return space.newunicode(u''.join([unichr(i) for i in r[:stop]])) methods = {} diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -10,7 +10,7 @@ def normalize(s): w_s = space.newunicode(s) w_res = ucd.normalize(space, NF_code, w_s) - return space.unwrap(w_res) + return space.unicode_w(w_res) return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -135,6 +135,11 @@ check(u'a' + 'b', u'ab') check('a' + u'b', u'ab') + def test_getitem(self): + assert u'abc'[2] == 'c' + raises(IndexError, u'abc'.__getitem__, 15) + assert u'g\u0105\u015b\u0107'[2] == u'\u015b' + def test_join(self): def check(a, b): assert a == b @@ -171,6 +176,8 @@ assert u'\n\n'.splitlines() == [u'', u''] assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c'] assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n'] + assert ((u'a' + '\xc2\x85'.decode('utf8') + u'b\n').splitlines() == + ['a', 'b']) def test_zfill(self): assert u'123'.zfill(2) == u'123' @@ -217,6 +224,7 @@ raises(ValueError, u'abc'.split, u'') raises(ValueError, 'abc'.split, u'') assert u' a b c d'.split(None, 0) == [u'a b c d'] + assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c'] def test_rsplit(self): assert u"".rsplit() == [] @@ -246,6 +254,7 @@ raises(ValueError, 'abc'.rsplit, u'') assert u' a b c '.rsplit(None, 0) == [u' a b c'] assert u''.rsplit('aaa') == [u''] + assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c'] def test_split_rsplit_str_unicode(self): x = 'abc'.split(u'b') @@ -291,6 +300,8 @@ assert u"bROWN fOX".title() == u"Brown Fox" assert u"Brown Fox".title() == u"Brown Fox" assert u"bro!wn fox".title() == u"Bro!Wn Fox" + assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" + assert u'\ud800'.title() == u'\ud800' def test_istitle(self): assert u"".istitle() == False @@ -315,6 +326,18 @@ assert not u'\u01c5abc'.islower() assert not u'\u01c5ABC'.isupper() + def test_lower_upper(self): + assert u'a'.lower() == u'a' + assert u'A'.lower() == u'a' + assert u'\u0105'.lower() == u'\u0105' + assert u'\u0104'.lower() == u'\u0105' + assert u'\ud800'.lower() == u'\ud800' + assert u'a'.upper() == u'A' + assert u'A'.upper() == u'A' + assert u'\u0105'.upper() == u'\u0104' + assert u'\u0104'.upper() == u'\u0104' + assert u'\ud800'.upper() == u'\ud800' + def test_capitalize(self): assert u"brown fox".capitalize() == u"Brown fox" assert u' hello '.capitalize() == u' hello ' @@ -336,6 +359,8 @@ # check with Ll chars with no upper - nothing changes here assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() == u'\u019b\u1d00\u1d86\u0221\u1fb7') + assert u'\ud800'.capitalize() == u'\ud800' + assert u'xx\ud800'.capitalize() == u'Xx\ud800' def test_rjust(self): s = u"abc" @@ -376,6 +401,16 @@ assert u'one!two!three!'.replace('x', '@') == u'one!two!three!' assert u'one!two!three!'.replace(u'x', '@', 2) == u'one!two!three!' assert u'abc'.replace('', u'-') == u'-a-b-c-' + assert u'\u1234'.replace(u'', '-') == u'-\u1234-' + assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678' + assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-' assert u'abc'.replace(u'', u'-', 3) == u'-a-b-c' assert u'abc'.replace('', '-', 0) == u'abc' assert u''.replace(u'', '') == u'' @@ -479,6 +514,9 @@ assert u''.startswith(u'a') is False assert u'x'.startswith(u'xx') is False assert u'y'.startswith(u'xx') is False + assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True + assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False + assert u'\u1234'.startswith(u'', 1, 0) is True def test_startswith_more(self): assert u'ab'.startswith(u'a', 0) is True @@ -589,7 +627,7 @@ raises(TypeError, u'hello'.translate) raises(TypeError, u'abababc'.translate, {ord('a'):''}) - def test_unicode_form_encoded_object(self): + def test_unicode_from_encoded_object(self): assert unicode('x', 'utf-8') == u'x' assert unicode('x', 'utf-8', 'strict') == u'x' @@ -634,6 +672,8 @@ assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac' assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82' assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96' + assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82' + assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96' assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80' assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80' assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000 @@ -745,6 +785,7 @@ def test_index(self): assert u"rrarrrrrrrrra".index(u'a', 4, None) == 12 assert u"rrarrrrrrrrra".index(u'a', None, 6) == 2 + assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2 def test_rindex(self): from sys import maxint @@ -754,6 +795,7 @@ assert u'abcdefghiabc'.rindex(u'abc', 0, -1) == 0 assert u'abcdefghiabc'.rindex(u'abc', -4*maxint, 4*maxint) == 9 assert u'rrarrrrrrrrra'.rindex(u'a', 4, None) == 12 + assert u"\u1234\u5678".rindex(u'\u5678') == 1 raises(ValueError, u'abcdefghiabc'.rindex, u'hib') raises(ValueError, u'defghiabc'.rindex, u'def', 1) @@ -768,12 +810,15 @@ assert u'abcdefghiabc'.rfind(u'') == 12 assert u'abcdefghiabc'.rfind(u'abcd') == 0 assert u'abcdefghiabc'.rfind(u'abcz') == -1 + assert u"\u1234\u5678".rfind(u'\u5678') == 1 def test_rfind_corner_case(self): assert u'abc'.rfind('', 4) == -1 def test_find_index_str_unicode(self): - assert 'abcdefghiabc'.find(u'bc') == 1 + assert u'abcdefghiabc'.find(u'bc') == 1 + assert u'ab\u0105b\u0107'.find('b', 2) == 3 + assert u'ab\u0105b\u0107'.find('b', 0, 1) == -1 assert 'abcdefghiabc'.rfind(u'abc') == 9 raises(UnicodeDecodeError, '\x80'.find, u'') raises(UnicodeDecodeError, '\x80'.rfind, u'') @@ -781,6 +826,7 @@ assert 'abcdefghiabc'.rindex(u'abc') == 9 raises(UnicodeDecodeError, '\x80'.index, u'') raises(UnicodeDecodeError, '\x80'.rindex, u'') + assert u"\u1234\u5678".find(u'\u5678') == 1 def test_count(self): assert u"".count(u"x") ==0 @@ -807,6 +853,7 @@ def test_swapcase(self): assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf' + assert u'\ud800'.swapcase() == u'\ud800' def test_buffer(self): buf = buffer(u'XY') @@ -878,16 +925,31 @@ def test_getslice(self): assert u'123456'.__getslice__(1, 5) == u'2345' - s = u"abc" - assert s[:] == "abc" - assert s[1:] == "bc" - assert s[:2] == "ab" - assert s[1:2] == "b" - assert s[-2:] == "bc" - assert s[:-1] == "ab" - assert s[-2:2] == "b" - assert s[1:-1] == "b" - assert s[-2:-1] == "b" + s = u"\u0105b\u0107" + assert s[:] == u"\u0105b\u0107" + assert s[1:] == u"b\u0107" + assert s[:2] == u"\u0105b" + assert s[1:2] == u"b" + assert s[-2:] == u"b\u0107" + assert s[:-1] == u"\u0105b" + assert s[-2:2] == u"b" + assert s[1:-1] == u"b" + assert s[-2:-1] == u"b" + + def test_getitem_slice(self): + assert u'123456'.__getitem__(slice(1, 5)) == u'2345' + s = u"\u0105b\u0107" + assert s[slice(3)] == u"\u0105b\u0107" + assert s[slice(1, 3)] == u"b\u0107" + assert s[slice(2)] == u"\u0105b" + assert s[slice(1,2)] == u"b" + assert s[slice(-2,3)] == u"b\u0107" + assert s[slice(-1)] == u"\u0105b" + assert s[slice(-2,2)] == u"b" + assert s[slice(1,-1)] == u"b" + assert s[slice(-2,-1)] == u"b" + assert u"abcde"[::2] == u"ace" + assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd" def test_no_len_on_str_iter(self): iterable = u"hello" diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py --- a/pypy/tool/release/force-builds.py +++ b/pypy/tool/release/force-builds.py @@ -29,7 +29,6 @@ 'pypy-c-jit-macosx-x86-64', 'pypy-c-jit-win-x86-32', 'pypy-c-jit-linux-s390x', - 'build-pypy-c-jit-linux-armhf-raring', 'build-pypy-c-jit-linux-armhf-raspbian', 'build-pypy-c-jit-linux-armel', ] diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh --- a/pypy/tool/release/repackage.sh +++ b/pypy/tool/release/repackage.sh @@ -23,7 +23,7 @@ # Download latest builds from the buildmaster, rename the top # level directory, and repackage ready to be uploaded to bitbucket -for plat in linux linux64 linux-armhf-raspbian linux-armhf-raring linux-armel osx64 s390x +for plat in linux linux64 linux-armhf-raspbian linux-armel osx64 s390x do echo downloading package for $plat if wget -q --show-progress http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.tar.bz2 diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,22 +237,30 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) + + at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_simple_hash_eq) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_simple_hash_eq is None: + simple_hash_eq = False + else: + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - - at analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,19 +25,6 @@ return (1 << ((byte_size << 3) - 1)) - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -50,7 +37,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the reuslt + # FIXME: This takes care of the instruction where box is the result # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -91,14 +78,8 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - if b1.known_ge(IntBound(0, 0)) and \ - b2.known_ge(IntBound(0, 0)): - r = self.getintbound(op) - if b1.has_upper and b2.has_upper: - mostsignificant = b1.upper | b2.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) + b = b1.or_bound(b2) + self.getintbound(op).intersect(b) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -112,15 +93,8 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - r = self.getintbound(op) - pos1 = b1.known_ge(IntBound(0, 0)) - pos2 = b2.known_ge(IntBound(0, 0)) - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(b1) - if pos2: - r.make_le(b2) + b = b1.and_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_SUB(self, op): return self.emit(op) @@ -211,16 +185,10 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): + b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - if b2.is_constant(): - val = b2.getint() - r = self.getintbound(op) - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + r = self.getintbound(op) + r.intersect(b1.mod_bound(b2)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -436,7 +404,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_ge(IntBound(0, 0)): + if b.known_nonnegative(): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -647,7 +615,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_ge(IntBound(0, 0)): + if b1.known_nonnegative(): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,6 +12,19 @@ MAXINT = maxint MININT = -maxint - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -92,6 +105,9 @@ def known_ge(self, other): return other.known_le(self) + def known_nonnegative(self): + return self.has_lower and 0 <= self.lower + def intersect(self, other): r = False @@ -192,10 +208,22 @@ else: return IntUnbounded() + def mod_bound(self, other): + r = IntUnbounded() + if other.is_constant(): + val = other.getint() + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) + return r + def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -211,7 +239,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -221,7 +249,32 @@ else: return IntUnbounded() + def and_bound(self, other): + pos1 = self.known_nonnegative() + pos2 = other.known_nonnegative() + r = IntUnbounded() + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(self) + if pos2: + r.make_le(other) + return r + + def or_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + mostsignificant = self.upper | other.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) + return r + def contains(self, val): + if not we_are_translated(): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -282,7 +335,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_ge(ConstIntBound(0)) and + return (self.bounded() and self.known_nonnegative() and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -297,7 +350,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_ge(IntBound(0, 0)) and \ + if self.known_nonnegative() and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py --- a/rpython/jit/metainterp/optimizeopt/optimizer.py +++ b/rpython/jit/metainterp/optimizeopt/optimizer.py @@ -273,7 +273,6 @@ self.jitdriver_sd = jitdriver_sd self.cpu = metainterp_sd.cpu self.interned_refs = self.cpu.ts.new_ref_dict() - self.interned_ints = {} self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd) self.pendingfields = None # set temporarily to a list, normally by # heap.py, as we're about to generate a guard diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,12 +1,34 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded -from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 + IntLowerBound, IntUnbounded, next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT +from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -def bound(a,b): +from hypothesis import given, strategies + +special_values = ( + range(-100, 100) + + [2 ** i for i in range(1, LONG_BIT)] + + [-2 ** i for i in range(1, LONG_BIT)] + + [2 ** i - 1 for i in range(1, LONG_BIT)] + + [-2 ** i - 1 for i in range(1, LONG_BIT)] + + [2 ** i + 1 for i in range(1, LONG_BIT)] + + [-2 ** i + 1 for i in range(1, LONG_BIT)] + + [sys.maxint, -sys.maxint-1]) + +special_values = strategies.sampled_from( + [int(v) for v in special_values if type(int(v)) is int]) + +ints = strategies.builds( + int, # strategies.integers sometimes returns a long? + special_values | strategies.integers( + min_value=int(-sys.maxint-1), max_value=sys.maxint)) + +ints_or_none = strategies.none() | ints + + +def bound(a, b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -14,11 +36,55 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a,b) + return IntBound(a, b) def const(a): return bound(a,a) + +def build_bound_with_contained_number(a, b, c): + a, b, c = sorted([a, b, c]) + r = bound(a, c) + assert r.contains(b) + return r, b + +bound_with_contained_number = strategies.builds( + build_bound_with_contained_number, + ints_or_none, + ints_or_none, + ints +) + +unbounded = strategies.builds( + lambda x: (bound(None, None), int(x)), + ints +) + +lower_bounded = strategies.builds( + lambda x, y: (bound(min(x, y), None), max(x, y)), + ints, + ints From pypy.commits at gmail.com Sat Dec 16 11:13:43 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 16 Dec 2017 08:13:43 -0800 (PST) Subject: [pypy-commit] pypy.org extradoc: remove numpy, bug/issues from nav bar, expand contact info to mention registration Message-ID: <5a354637.44acdf0a.7e1e.2714@mx.google.com> Author: Matti Picus Branch: extradoc Changeset: r903:47fbcae75874 Date: 2017-12-16 18:13 +0200 http://bitbucket.org/pypy/pypy.org/changeset/47fbcae75874/ Log: remove numpy, bug/issues from nav bar, expand contact info to mention registration diff --git a/archive.html b/archive.html --- a/archive.html +++ b/archive.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
Py3k donations | - NumPy donations - | STM donations


diff --git a/compat.html b/compat.html --- a/compat.html +++ b/compat.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
Py3k donations | - NumPy donations - | STM donations
diff --git a/contact.html b/contact.html --- a/contact.html +++ b/contact.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
Py3k donations | - NumPy donations - | STM donations
@@ -75,7 +71,7 @@
  • irc: #pypy on irc.freenode.net
  • mailing list: pypy-dev at python.org
  • for security related issues, non-public funding enquiries etc. please contact pypy@sfconservancy.org
  • -
  • the bug tracker
  • +
  • the bitbucket bug tracker (registration required to open new issues or to comment)
  • more on our dev site.
  • code on bitbucket.
  • diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/features.html b/features.html --- a/features.html +++ b/features.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/howtohelp.html b/howtohelp.html --- a/howtohelp.html +++ b/howtohelp.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/index.html b/index.html --- a/index.html +++ b/index.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/numpydonate.html b/numpydonate.html --- a/numpydonate.html +++ b/numpydonate.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/people.html b/people.html --- a/people.html +++ b/people.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/performance.html b/performance.html --- a/performance.html +++ b/performance.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/py3donate.html b/py3donate.html --- a/py3donate.html +++ b/py3donate.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/source/_layouts/site.genshi b/source/_layouts/site.genshi --- a/source/_layouts/site.genshi +++ b/source/_layouts/site.genshi @@ -11,11 +11,9 @@ ('Performance', 'performance.html'), ('Dev Documentation', 'http://doc.pypy.org'), ('Blog', 'http://morepypy.blogspot.com'), - ('Bug/Issues', 'https://bitbucket.org/pypy/pypy/issues?status=new&status=open'), ('People', 'people.html'), ('Contact', 'contact.html'), ('Py3k donations', 'py3donate.html'), - ('NumPy donations', 'numpydonate.html'), ('STM donations', 'tmdonate2.html'), ], } diff --git a/source/contact.txt b/source/contact.txt --- a/source/contact.txt +++ b/source/contact.txt @@ -10,9 +10,9 @@ * mailing list: `pypy-dev at python.org`__ -* for security related issues, non-public funding enquiries etc. please contact pypy at sfconservancy.org +* for security related issues, non-public funding enquiries etc. please contact pypy at sfconservancy.org -* the `bug tracker`_ +* the bitbucket `bug tracker`_ (registration required to open new issues or to comment) * more on our `dev site`_. diff --git a/sponsor.html b/sponsor.html --- a/sponsor.html +++ b/sponsor.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/success.html b/success.html --- a/success.html +++ b/success.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/tmdonate.html b/tmdonate.html --- a/tmdonate.html +++ b/tmdonate.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    diff --git a/tmdonate2.html b/tmdonate2.html --- a/tmdonate2.html +++ b/tmdonate2.html @@ -53,16 +53,12 @@ | Blog | - Bug/Issues - | People | Contact
    Py3k donations | - NumPy donations - | STM donations
    From pypy.commits at gmail.com Sun Dec 17 07:58:12 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 17 Dec 2017 04:58:12 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix test Message-ID: <5a3669e4.02431c0a.3f3a9.ce3a@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93447:ea2056d81509 Date: 2017-12-16 07:08 +0000 http://bitbucket.org/pypy/pypy/changeset/ea2056d81509/ Log: fix test diff --git a/pypy/module/_cppyy/test/test_datatypes.py b/pypy/module/_cppyy/test/test_datatypes.py --- a/pypy/module/_cppyy/test/test_datatypes.py +++ b/pypy/module/_cppyy/test/test_datatypes.py @@ -100,8 +100,9 @@ # can not access an instance member on the class raises(AttributeError, getattr, CppyyTestData, 'm_bool') raises(AttributeError, getattr, CppyyTestData, 'm_int') - raises(ReferenceError, hasattr, CppyyTestData, 'm_bool') - raises(ReferenceError, hasattr, CppyyTestData, 'm_int') + + assert not hasattr(CppyyTestData, 'm_bool') + assert not hasattr(CppyyTestData, 'm_int') c.__destruct__() From pypy.commits at gmail.com Sun Dec 17 08:23:22 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 17 Dec 2017 05:23:22 -0800 (PST) Subject: [pypy-commit] pypy default: Fix test: missing incref Message-ID: <5a366fca.b7a0df0a.1d24f.71cf@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93448:a560a9381f17 Date: 2017-12-17 13:22 +0000 http://bitbucket.org/pypy/pypy/changeset/a560a9381f17/ Log: Fix test: missing incref diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py --- a/pypy/module/cpyext/test/test_tupleobject.py +++ b/pypy/module/cpyext/test/test_tupleobject.py @@ -1,6 +1,7 @@ import py -from pypy.module.cpyext.pyobject import PyObject, PyObjectP, make_ref, from_ref +from pypy.module.cpyext.pyobject import ( + PyObject, PyObjectP, make_ref, from_ref, incref) from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase from rpython.rtyper.lltypesystem import rffi, lltype @@ -14,6 +15,7 @@ def test_tupleobject(self, space): assert not PyTuple_Check(space, space.w_None) with raises_w(space, SystemError): + incref(space, space.w_None) PyTuple_SetItem(space, space.w_None, 0, space.w_None) atuple = space.newtuple([space.wrap(0), space.wrap(1), space.wrap('yay')]) From pypy.commits at gmail.com Sun Dec 17 08:32:43 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 17 Dec 2017 05:32:43 -0800 (PST) Subject: [pypy-commit] pypy py3.5: continulet pickling seems to be working Message-ID: <5a3671fb.6596df0a.35d41.f79c@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93449:ed6f48246238 Date: 2017-12-17 13:31 +0000 http://bitbucket.org/pypy/pypy/changeset/ed6f48246238/ Log: continulet pickling seems to be working diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -118,16 +118,12 @@ return self.space.newbool(valid) def descr__reduce__(self): - raise oefmt(self.space.w_NotImplementedError, - "continulet's pickle support is currently disabled") from pypy.module._continuation import interp_pickle return interp_pickle.reduce(self) def descr__setstate__(self, w_args): # XXX: review direct calls to frame.run(), notably when # unpickling generators (or coroutines!) - raise oefmt(self.space.w_NotImplementedError, - "continulet's pickle support is currently disabled") from pypy.module._continuation import interp_pickle interp_pickle.setstate(self, w_args) From pypy.commits at gmail.com Sun Dec 17 10:03:48 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 17 Dec 2017 07:03:48 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix 2 tests Message-ID: <5a368754.c7471c0a.e69a3.b5f5@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93450:413ffdce2e44 Date: 2017-12-17 15:03 +0000 http://bitbucket.org/pypy/pypy/changeset/413ffdce2e44/ Log: fix 2 tests diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py --- a/pypy/module/pypyjit/test_pypy_c/test_containers.py +++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py @@ -66,10 +66,12 @@ guard_not_invalidated(descr=...) p109 = call_r(ConstClass(ll_str__IntegerR_SignedConst_Signed), i5, descr=) guard_no_exception(descr=...) - guard_nonnull(p109, descr=...) - p10 = call_r(ConstClass(ll_str2unicode__rpy_stringPtr), p109, descr=) + i80 = strlen(p109) + p86 = call_r(ConstClass(str_decode_utf_8), p109, i80, ConstPtr(ptr82), 1, ConstClass(raise_unicode_exception_decode), 1, descr=) guard_no_exception(descr=...) + p10 = getfield_gc_r(p86, descr=) guard_nonnull(p10, descr=...) + i99 = unicodehash(p10) # NOTE: with siphash24, notably on unicodes, computing the hash # may raise MemoryError diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -116,10 +116,7 @@ p97 = call_r(ConstClass(_rpy_unicode_to_decimal_w), p25, descr=) guard_no_exception(descr=...) i98 = unicodelen(p97) - p99 = force_token() - setfield_gc(p0, p99, descr=) - p104 = call_may_force_r(ConstClass(unicode_encode_utf_8_impl), p97, i98, ConstPtr(ptr101), 1, 1, descr=) - guard_not_forced(descr=...) + p104 = call_r(ConstClass(unicode_encode_utf_8), p97, i98, ConstPtr(ptr94), 1, descr=) guard_no_exception(descr=...) i107 = call_i(ConstClass(string_to_int), p104, 16, descr=) guard_no_exception(descr=...) From pypy.commits at gmail.com Sun Dec 17 16:20:23 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 17 Dec 2017 13:20:23 -0800 (PST) Subject: [pypy-commit] buildbot default: fix for non-windows build slaves without git Message-ID: <5a36df97.4fabdf0a.b5a0.1c54@mx.google.com> Author: Matti Picus Branch: Changeset: r1052:054832151c19 Date: 2017-12-17 23:19 +0200 http://bitbucket.org/pypy/buildbot/changeset/054832151c19/ Log: fix for non-windows build slaves without git diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -470,9 +470,11 @@ if platform == 'win32': virt_pypy = r'..\venv\pypy-venv\Scripts\python.exe' clean = 'rmdir /s /q pypy-venv' + virt_package = 'git+git://github.com/pypa/virtualenv at master' else: virt_pypy = '../venv/pypy-venv/bin/python' clean = 'rm -rf pypy-venv' + virt_package = 'virtualenv' target = Property('target_path') factory.addStep(ShellCmd( description="ensurepip", @@ -486,7 +488,7 @@ factory.addStep(ShellCmd( description="Install recent virtualenv", command=prefix + [target, '-mpip', 'install', '--upgrade', - 'git+git://github.com/pypa/virtualenv at master'], + virt_package], workdir='venv', flunkOnFailure=True)) factory.addStep(ShellCmd( From pypy.commits at gmail.com Sun Dec 17 16:26:15 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 17 Dec 2017 13:26:15 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix translation Message-ID: <5a36e0f7.06d21c0a.5abd1.e7b7@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93451:b1f6c07626d3 Date: 2017-12-17 21:23 +0000 http://bitbucket.org/pypy/pypy/changeset/b1f6c07626d3/ Log: fix translation diff --git a/pypy/module/_continuation/interp_pickle.py b/pypy/module/_continuation/interp_pickle.py --- a/pypy/module/_continuation/interp_pickle.py +++ b/pypy/module/_continuation/interp_pickle.py @@ -2,10 +2,9 @@ from rpython.rlib import jit from pypy.interpreter.error import OperationError from pypy.interpreter.pyframe import PyFrame -from pypy.module._continuation.interp_continuation import State, global_state -from pypy.module._continuation.interp_continuation import build_sthread -from pypy.module._continuation.interp_continuation import post_switch -from pypy.module._continuation.interp_continuation import get_result, geterror +from pypy.module._continuation.interp_continuation import ( + State, global_state, build_sthread, pre_switch, post_switch, + get_result, geterror) def getunpickle(space): @@ -65,9 +64,10 @@ if self.bottomframe is None: w_result = space.w_None else: + saved_exception = pre_switch(sthread) h = sthread.switch(self.h) try: - w_result = post_switch(sthread, h) + w_result = post_switch(sthread, h, saved_exception) operr = None except OperationError as e: w_result = None From pypy.commits at gmail.com Sun Dec 17 17:07:42 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 17 Dec 2017 14:07:42 -0800 (PST) Subject: [pypy-commit] pypy py3.5: backout ed6f48246238 Message-ID: <5a36eaae.43a6df0a.1fea0.e026@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93452:3bdc752d4824 Date: 2017-12-17 21:48 +0000 http://bitbucket.org/pypy/pypy/changeset/3bdc752d4824/ Log: backout ed6f48246238 diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -118,12 +118,16 @@ return self.space.newbool(valid) def descr__reduce__(self): + raise oefmt(self.space.w_NotImplementedError, + "continulet's pickle support is currently disabled") from pypy.module._continuation import interp_pickle return interp_pickle.reduce(self) def descr__setstate__(self, w_args): # XXX: review direct calls to frame.run(), notably when # unpickling generators (or coroutines!) + raise oefmt(self.space.w_NotImplementedError, + "continulet's pickle support is currently disabled") from pypy.module._continuation import interp_pickle interp_pickle.setstate(self, w_args) From pypy.commits at gmail.com Sun Dec 17 17:07:44 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 17 Dec 2017 14:07:44 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix test_gr_frame in test_greenlet.py Message-ID: <5a36eab0.d23f1c0a.12e64.e6b9@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93453:cf79d090d371 Date: 2017-12-17 22:07 +0000 http://bitbucket.org/pypy/pypy/changeset/cf79d090d371/ Log: Fix test_gr_frame in test_greenlet.py diff --git a/lib_pypy/greenlet.py b/lib_pypy/greenlet.py --- a/lib_pypy/greenlet.py +++ b/lib_pypy/greenlet.py @@ -127,7 +127,7 @@ return None if self.__main: self = getcurrent() - f = _continulet.__reduce__(self)[2][0] + f = self._get_frame() if not f: return None return f.f_back.f_back.f_back # go past start(), __switch(), switch() diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -131,6 +131,15 @@ from pypy.module._continuation import interp_pickle interp_pickle.setstate(self, w_args) + def descr_get_frame(self, space): + if self.sthread is None: + w_frame = space.w_False + elif self.sthread.is_empty_handle(self.h): + w_frame = space.w_None + else: + w_frame = self.bottomframe + return w_frame + def W_Continulet___new__(space, w_subtype, __args__): r = space.allocate_instance(W_Continulet, w_subtype) @@ -153,6 +162,7 @@ is_pending = interp2app(W_Continulet.descr_is_pending), __reduce__ = interp2app(W_Continulet.descr__reduce__), __setstate__= interp2app(W_Continulet.descr__setstate__), + _get_frame=interp2app(W_Continulet.descr_get_frame) ) # ____________________________________________________________ diff --git a/pypy/module/_continuation/interp_pickle.py b/pypy/module/_continuation/interp_pickle.py --- a/pypy/module/_continuation/interp_pickle.py +++ b/pypy/module/_continuation/interp_pickle.py @@ -18,12 +18,7 @@ # __getnewargs__ or __getstate__ defined in the subclass, etc. # Doing the right thing looks involved, though... space = self.space - if self.sthread is None: - w_frame = space.w_False - elif self.sthread.is_empty_handle(self.h): - w_frame = space.w_None - else: - w_frame = self.bottomframe + w_frame = self.descr_get_frame(space) w_continulet_type = space.type(self) w_dict = self.getdict(space) or space.w_None args = [getunpickle(space), From pypy.commits at gmail.com Sun Dec 17 17:14:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 17 Dec 2017 14:14:53 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a36ec5d.ceb51c0a.97f0b.557f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93454:774bffb126ce Date: 2017-12-17 22:14 +0000 http://bitbucket.org/pypy/pypy/changeset/774bffb126ce/ Log: hg merge default diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py --- a/pypy/module/cpyext/pyerrors.py +++ b/pypy/module/cpyext/pyerrors.py @@ -154,7 +154,7 @@ error indicator.""" raise oefmt(space.w_TypeError, "bad argument type for built-in operation") - at cpython_api([], lltype.Void) + at cpython_api([], lltype.Void, error=None) def PyErr_BadInternalCall(space): raise oefmt(space.w_SystemError, "Bad internal call!") diff --git a/pypy/module/cpyext/test/test_pyerrors.py b/pypy/module/cpyext/test/test_pyerrors.py --- a/pypy/module/cpyext/test/test_pyerrors.py +++ b/pypy/module/cpyext/test/test_pyerrors.py @@ -463,3 +463,15 @@ ]) res = module.test(StopIteration("foo!")) assert res == "foo!" + + def test_PyErr_BadInternalCall(self): + # NB. it only seemed to fail when run with '-s'... but I think + # that it always printed stuff to stderr + module = self.import_extension('foo', [ + ("oops", "METH_NOARGS", + r''' + PyErr_BadInternalCall(); + return NULL; + '''), + ]) + raises(SystemError, module.oops) diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py --- a/pypy/module/cpyext/test/test_tupleobject.py +++ b/pypy/module/cpyext/test/test_tupleobject.py @@ -1,6 +1,7 @@ import py -from pypy.module.cpyext.pyobject import PyObject, PyObjectP, make_ref, from_ref +from pypy.module.cpyext.pyobject import ( + PyObject, PyObjectP, make_ref, from_ref, incref) from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase from rpython.rtyper.lltypesystem import rffi, lltype @@ -14,6 +15,7 @@ def test_tupleobject(self, space): assert not PyTuple_Check(space, space.w_None) with raises_w(space, SystemError): + incref(space, space.w_None) PyTuple_SetItem(space, space.w_None, 0, space.w_None) atuple = space.newtuple([space.wrap(0), space.wrap(1), space.wrap('yay')]) From pypy.commits at gmail.com Mon Dec 18 03:37:47 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 18 Dec 2017 00:37:47 -0800 (PST) Subject: [pypy-commit] pypy py3.6: CPython Issue #25971: Unify error messages in float.as_integer_ratio(), Decimal.as_integer_ratio(), and Fraction constructors. Message-ID: <5a377e5b.c4bf1c0a.2e67f.3ced@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93455:bea2e807c6dc Date: 2017-12-14 00:05 +0100 http://bitbucket.org/pypy/pypy/changeset/bea2e807c6dc/ Log: CPython Issue #25971: Unify error messages in float.as_integer_ratio(), Decimal.as_integer_ratio(), and Fraction constructors. diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -620,10 +620,10 @@ num, den = float_as_rbigint_ratio(value) except OverflowError: raise oefmt(space.w_OverflowError, - "cannot pass infinity to as_integer_ratio()") + "cannot convert Infinity to integer ratio") except ValueError: raise oefmt(space.w_ValueError, - "cannot pass nan to as_integer_ratio()") + "cannot convert NaN to integer ratio") w_num = space.newlong_from_rbigint(num) w_den = space.newlong_from_rbigint(den) From pypy.commits at gmail.com Mon Dec 18 03:37:51 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 18 Dec 2017 00:37:51 -0800 (PST) Subject: [pypy-commit] pypy py3.6: CPython Issue #16864: Cursor.lastrowid now supports REPLACE statement Message-ID: <5a377e5f.8a871c0a.5d16b.6a21@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93457:b9413da29d91 Date: 2017-12-14 22:52 +0100 http://bitbucket.org/pypy/pypy/changeset/b9413da29d91/ Log: CPython Issue #16864: Cursor.lastrowid now supports REPLACE statement diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -930,7 +930,9 @@ self.__rowcount = 0 self.__rowcount += _lib.sqlite3_changes(self.__connection._db) - if not multiple and self.__statement._type == _STMT_TYPE_INSERT: + if not multiple and self.__statement._type in ( + # REPLACE is an alias for INSERT OR REPLACE + _STMT_TYPE_INSERT, _STMT_TYPE_REPLACE): self.__lastrowid = _lib.sqlite3_last_insert_rowid(self.__connection._db) else: self.__lastrowid = None From pypy.commits at gmail.com Mon Dec 18 03:37:54 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 18 Dec 2017 00:37:54 -0800 (PST) Subject: [pypy-commit] pypy py3.6: CPython Issue #29444: Add array bound check in group(), because the underlying Message-ID: <5a377e62.478edf0a.61a81.f2cc@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93458:dda99c39353f Date: 2017-12-17 18:57 +0100 http://bitbucket.org/pypy/pypy/changeset/dda99c39353f/ Log: CPython Issue #29444: Add array bound check in group(), because the underlying buffer is mutable. Difficult to test in non-translated code... diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -36,6 +36,9 @@ def slice_w(space, ctx, start, end, w_default): if 0 <= start <= end: if isinstance(ctx, rsre_core.BufMatchContext): + length = ctx._buffer.getlength() + start = min(start, length) + end = min(end, length) return space.newbytes(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): From pypy.commits at gmail.com Mon Dec 18 03:37:49 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 18 Dec 2017 00:37:49 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Correctly port pypy modifications to sqlite tests Message-ID: <5a377e5d.90aa1c0a.853d6.2f5a@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93456:f83d3719aebd Date: 2017-12-14 22:48 +0100 http://bitbucket.org/pypy/pypy/changeset/f83d3719aebd/ Log: Correctly port pypy modifications to sqlite tests diff --git a/lib-python/3/sqlite3/test/userfunctions.py b/lib-python/3/sqlite3/test/userfunctions.py --- a/lib-python/3/sqlite3/test/userfunctions.py +++ b/lib-python/3/sqlite3/test/userfunctions.py @@ -314,7 +314,7 @@ # XXX it's better to raise OperationalError in order to stop # the query earlier. cur = self.con.cursor() - with self.assertRaises(AttributeError) as cm: + with self.assertRaises(sqlite.OperationalError) as cm: cur.execute("select nostep(t) from test") self.assertEqual(str(cm.exception), "user-defined aggregate's 'step' method raised error") From pypy.commits at gmail.com Mon Dec 18 03:37:56 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 18 Dec 2017 00:37:56 -0800 (PST) Subject: [pypy-commit] pypy py3.6: re.Match.group() now accepts index-like objects. Message-ID: <5a377e64.1dbf1c0a.fbeed.4338@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93459:5e9a2c6daf17 Date: 2017-12-17 19:19 +0100 http://bitbucket.org/pypy/pypy/changeset/5e9a2c6daf17/ Log: re.Match.group() now accepts index-like objects. diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -604,7 +604,7 @@ def do_span(self, w_arg): space = self.space try: - groupnum = space.int_w(w_arg) + groupnum = space.getindex_w(w_arg, space.w_OverflowError) except OperationError as e: if not e.match(space, space.w_TypeError) and \ not e.match(space, space.w_OverflowError): diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -227,6 +227,15 @@ exc = raises(IndexError, re.match("", "").group, sys.maxsize + 1) assert str(exc.value) == "no such group" + def test_group_takes_index(self): + import re + class Index: + def __init__(self, value): + self.value = value + def __index__(self): + return self.value + assert re.match("(foo)", "foo").group(Index(1)) == "foo" + def test_expand(self): import re m = re.search("a(..)(?P..)", "ab1bc") From pypy.commits at gmail.com Mon Dec 18 03:37:58 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 18 Dec 2017 00:37:58 -0800 (PST) Subject: [pypy-commit] pypy py3.6: CPython Issue #24454: Regular expression match object groups are now Message-ID: <5a377e66.82641c0a.c922e.5f7e@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93460:0e57c6df8538 Date: 2017-12-17 19:34 +0100 http://bitbucket.org/pypy/pypy/changeset/0e57c6df8538/ Log: CPython Issue #24454: Regular expression match object groups are now accessible using __getitem__. "mo[x]" is equivalent to "mo.group(x)". diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -533,6 +533,10 @@ space = self.space raise oefmt(space.w_TypeError, "cannot copy this match object") + def descr_getitem(self, space, w_index): + start, end = self.do_span(w_index) + return slice_w(space, self.ctx, start, end, space.w_None) + @jit.look_inside_iff(lambda self, args_w: jit.isconstant(len(args_w))) def group_w(self, args_w): space = self.space @@ -684,6 +688,8 @@ __copy__ = interp2app(W_SRE_Match.cannot_copy_w), __deepcopy__ = interp2app(W_SRE_Match.cannot_copy_w), __repr__ = interp2app(W_SRE_Match.repr_w), + __getitem__ = interp2app(W_SRE_Match.descr_getitem), + # group = interp2app(W_SRE_Match.group_w), groups = interp2app(W_SRE_Match.groups_w), groupdict = interp2app(W_SRE_Match.groupdict_w), diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -236,6 +236,10 @@ return self.value assert re.match("(foo)", "foo").group(Index(1)) == "foo" + def test_getitem(self): + import re + assert re.match("(foo)bar", "foobar")[1] == "foo" + def test_expand(self): import re m = re.search("a(..)(?P..)", "ab1bc") From pypy.commits at gmail.com Mon Dec 18 03:38:00 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 18 Dec 2017 00:38:00 -0800 (PST) Subject: [pypy-commit] pypy py3.6: CPython Issue #28727: re.Pattern objects created by re.compile() become comparable Message-ID: <5a377e68.449f1c0a.996bb.7ff8@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93461:77d216c5b248 Date: 2017-12-17 22:03 +0100 http://bitbucket.org/pypy/pypy/changeset/77d216c5b248/ Log: CPython Issue #28727: re.Pattern objects created by re.compile() become comparable (only x==y and x!=y operators) diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -5,6 +5,7 @@ from pypy.interpreter.typedef import make_weakref_descr from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt +from rpython.rlib.objectmodel import compute_hash from rpython.rlib.rarithmetic import intmask from rpython.rlib import jit from rpython.rlib.rstring import StringBuilder, UnicodeBuilder @@ -133,6 +134,24 @@ uflags = u'|'.join([item.decode('latin-1') for item in flag_items]) return space.newunicode(u're.compile(%s%s%s)' % (u, usep, uflags)) + def descr_eq(self, space, w_other): + if not isinstance(w_other, W_SRE_Pattern): + return space.w_NotImplemented + other = w_other + # Compare the code and the pattern because the same pattern can + # produce different codes depending on the locale used to compile the + # pattern when the re.LOCALE flag is used. Don't compare groups, + # indexgroup nor groupindex: they are derivated from the pattern. + return space.newbool( + self.flags == other.flags and + self.code == other.code and + space.eq_w(self.w_pattern, other.w_pattern)) + + def descr_hash(self, space): + code = ''.join([chr(c) for c in self.code]) + return space.newint(compute_hash( + (self.flags, code, space.hash_w(self.w_pattern)))) + def fget_groupindex(self, space): w_groupindex = self.w_groupindex if space.isinstance_w(w_groupindex, space.w_dict): @@ -488,6 +507,8 @@ __deepcopy__ = interp2app(W_SRE_Pattern.cannot_copy_w), __repr__ = interp2app(W_SRE_Pattern.repr_w), __weakref__ = make_weakref_descr(W_SRE_Pattern), + __eq__ = interp2app(W_SRE_Pattern.descr_eq), + __hash__ = interp2app(W_SRE_Pattern.descr_hash), findall = interp2app(W_SRE_Pattern.findall_w), finditer = interp2app(W_SRE_Pattern.finditer_w), match = interp2app(W_SRE_Pattern.match_w), diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -133,6 +133,33 @@ assert repr(r) == ( r"""re.compile('f(o"\\d)', re.IGNORECASE|re.DOTALL|re.VERBOSE)""") + def test_pattern_compare(self): + import re + pattern1 = re.compile('abc', re.IGNORECASE) + + # equal to itself + assert pattern1 == pattern1 + assert not(pattern1 != pattern1) + # equal + re.purge() + pattern2 = re.compile('abc', re.IGNORECASE) + assert hash(pattern2) == hash(pattern1) + assert pattern2 == pattern1 + + # not equal: different pattern + re.purge() + pattern3 = re.compile('XYZ', re.IGNORECASE) + # warranty that hash values are different + assert pattern3 != pattern1 + + # not equal: different flag (flags=0) + re.purge() + pattern4 = re.compile('abc') + assert pattern4 != pattern1 + + # only == and != comparison operators are supported + raises(TypeError, "pattern1 < pattern2") + class AppTestSreMatch: spaceconfig = dict(usemodules=('array', )) From pypy.commits at gmail.com Mon Dec 18 03:38:02 2017 From: pypy.commits at gmail.com (amauryfa) Date: Mon, 18 Dec 2017 00:38:02 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Since Python3.6, the seed() call to urandom() has been moved to the _random module. Message-ID: <5a377e6a.4a981c0a.7f086.9197@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r93462:3f28eff2fcfc Date: 2017-12-18 09:36 +0100 http://bitbucket.org/pypy/pypy/changeset/3f28eff2fcfc/ Log: Since Python3.6, the seed() call to urandom() has been moved to the _random module. diff --git a/pypy/module/_random/interp_random.py b/pypy/module/_random/interp_random.py --- a/pypy/module/_random/interp_random.py +++ b/pypy/module/_random/interp_random.py @@ -1,13 +1,13 @@ import time -from pypy.interpreter.error import oefmt +from pypy.interpreter.error import oefmt, OperationError from pypy.interpreter.typedef import TypeDef from pypy.interpreter.gateway import interp2app, unwrap_spec from pypy.interpreter.baseobjspace import W_Root +from pypy.module.posix import interp_posix from rpython.rlib.rarithmetic import r_uint, intmask, widen from rpython.rlib import rbigint, rrandom, rstring - def descr_new__(space, w_subtype, __args__): w_anything = __args__.firstarg() x = space.allocate_instance(W_Random, w_subtype) @@ -25,14 +25,19 @@ return space.newfloat(self._rnd.random()) def seed(self, space, w_n=None): - if w_n is None: - w_n = space.newint(int(time.time())) + if space.is_none(w_n): + # TODO: Use a non-blocking version of urandom + try: + w_n = interp_posix.urandom(space, 8) + except OperationError as e: + if not e.match(space, space.w_OSError): + raise + w_n = space.newint(int(time.time() * 256)) + if space.isinstance_w(w_n, space.w_int): + w_n = space.abs(w_n) else: - if space.isinstance_w(w_n, space.w_int): - w_n = space.abs(w_n) - else: - n = space.hash_w(w_n) - w_n = space.newint(r_uint(n)) + n = space.hash_w(w_n) + w_n = space.newint(r_uint(n)) key = [] w_one = space.newint(1) w_two = space.newint(2) diff --git a/pypy/module/_random/test/test_random.py b/pypy/module/_random/test/test_random.py --- a/pypy/module/_random/test/test_random.py +++ b/pypy/module/_random/test/test_random.py @@ -86,9 +86,9 @@ rnd = _random.Random() rnd.seed() state1 = rnd.getstate() - import time; time.sleep(1.1) # must be at least 1 second here - rnd.seed() # (note that random.py overrides - state2 = rnd.getstate() # seed() to improve the resolution) + import time; time.sleep(0.01) + rnd.seed() + state2 = rnd.getstate() assert state1 != state2 def test_randbits(self): From pypy.commits at gmail.com Mon Dec 18 04:57:36 2017 From: pypy.commits at gmail.com (antocuni) Date: Mon, 18 Dec 2017 01:57:36 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: fix Message-ID: <5a379110.57b61c0a.3c9e3.ef22@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93463:0a82f7762c72 Date: 2017-12-18 10:56 +0100 http://bitbucket.org/pypy/pypy/changeset/0a82f7762c72/ Log: fix diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -37,6 +37,7 @@ return res return stack """) + cls.w_appdirect = cls.space.wrap(cls.runappdirect) if cls.runappdirect: # make sure that "self.stack" does not pass the self cls.w_stack = staticmethod(cls.w_stack.im_func) @@ -798,7 +799,7 @@ raises(error, continulet.switch, c1, to=c2) def test_sampling_inside_callback(self): - if self.runappdirect: + if self.appdirect: # see also # extra_tests.test_vmprof_greenlet.test_sampling_inside_callback # for a "translated" version of this test From pypy.commits at gmail.com Mon Dec 18 05:04:13 2017 From: pypy.commits at gmail.com (antocuni) Date: Mon, 18 Dec 2017 02:04:13 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: close merged branch Message-ID: <5a37929d.ee85df0a.9bceb.16cb@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93464:339c7996dc19 Date: 2017-12-18 11:02 +0100 http://bitbucket.org/pypy/pypy/changeset/339c7996dc19/ Log: close merged branch From pypy.commits at gmail.com Mon Dec 18 05:04:16 2017 From: pypy.commits at gmail.com (antocuni) Date: Mon, 18 Dec 2017 02:04:16 -0800 (PST) Subject: [pypy-commit] pypy default: merge the fix-vmprof-stacklet-switch-2 branch, which fixes vmprof+greenlet: Message-ID: <5a3792a0.14121c0a.64efd.eea9@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93465:c30916ebe15f Date: 2017-12-18 11:03 +0100 http://bitbucket.org/pypy/pypy/changeset/c30916ebe15f/ Log: merge the fix-vmprof-stacklet-switch-2 branch, which fixes vmprof+greenlet: before, vmprof did not take any sample inside greenlets as soon as you do a switch(). diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt --- a/extra_tests/requirements.txt +++ b/extra_tests/requirements.txt @@ -1,2 +1,3 @@ pytest hypothesis +vmprof diff --git a/extra_tests/test_vmprof_greenlet.py b/extra_tests/test_vmprof_greenlet.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_vmprof_greenlet.py @@ -0,0 +1,28 @@ +import time +import pytest +import greenlet +import vmprof + +def count_samples(filename): + stats = vmprof.read_profile(filename) + return len(stats.profiles) + +def cpuburn(duration): + end = time.time() + duration + while time.time() < end: + pass + +def test_sampling_inside_callback(tmpdir): + # see also test_sampling_inside_callback inside + # pypy/module/_continuation/test/test_stacklet.py + # + G = greenlet.greenlet(cpuburn) + fname = tmpdir.join('log.vmprof') + with fname.open('w+b') as f: + vmprof.enable(f.fileno(), 1/250.0) + G.switch(0.1) + vmprof.disable() + + samples = count_samples(str(fname)) + # 0.1 seconds at 250Hz should be 25 samples + assert 23 < samples < 27 diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -31,7 +31,7 @@ Upgrade the _vmprof backend to vmprof 0.4.10 .. branch: fix-vmprof-stacklet-switch - +.. branch: fix-vmprof-stacklet-switch-2 Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) .. branch: win32-vcvars @@ -39,3 +39,4 @@ .. branch: rdict-fast-hash Make it possible to declare that the hash function of an r_dict is fast in RPython. + diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -1,5 +1,6 @@ from rpython.rlib.rstacklet import StackletThread from rpython.rlib import jit +from rpython.rlib import rvmprof from pypy.interpreter.error import OperationError, get_cleared_operation_error from pypy.interpreter.executioncontext import ExecutionContext from pypy.interpreter.baseobjspace import W_Root @@ -222,12 +223,15 @@ self.h = h global_state.clear() try: + rvmprof.start_sampling() frame = self.bottomframe w_result = frame.execute_frame() except Exception as e: global_state.propagate_exception = e else: global_state.w_value = w_result + finally: + rvmprof.stop_sampling() self.sthread.ec.topframeref = jit.vref_None global_state.origin = self global_state.destination = self diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -1,7 +1,10 @@ +import pytest import os +from rpython.rlib.rvmprof.test.support import fakevmprof +from pypy.interpreter.gateway import interp2app from pypy.module._continuation.test.support import BaseAppTest - + at pytest.mark.usefixtures('app_fakevmprof') class AppTestStacklet(BaseAppTest): def setup_class(cls): BaseAppTest.setup_class.im_func(cls) @@ -34,10 +37,34 @@ return res return stack """) + cls.w_appdirect = cls.space.wrap(cls.runappdirect) if cls.runappdirect: # make sure that "self.stack" does not pass the self cls.w_stack = staticmethod(cls.w_stack.im_func) + + @pytest.fixture + def app_fakevmprof(self, fakevmprof): + """ + This is automaticaly re-initialized for every method: thanks to + fakevmprof's finalizer, it checks that we called {start,stop}_sampling + the in pairs + """ + w = self.space.wrap + i2a = interp2app + def is_sampling_enabled(space): + return space.wrap(fakevmprof.is_sampling_enabled) + self.w_is_sampling_enabled = w(i2a(is_sampling_enabled)) + # + def start_sampling(space): + fakevmprof.start_sampling() + self.w_start_sampling = w(i2a(start_sampling)) + # + def stop_sampling(space): + fakevmprof.stop_sampling() + self.w_stop_sampling = w(i2a(stop_sampling)) + + def test_new_empty(self): from _continuation import continulet # @@ -770,3 +797,25 @@ continulet.switch(c1, to=c2) raises(error, continulet.switch, c1, to=c2) + + def test_sampling_inside_callback(self): + if self.appdirect: + # see also + # extra_tests.test_vmprof_greenlet.test_sampling_inside_callback + # for a "translated" version of this test + skip("we can't run this until we have _vmprof.is_sampling_enabled") + from _continuation import continulet + # + def my_callback(c1): + assert self.is_sampling_enabled() + return 42 + # + try: + self.start_sampling() + assert self.is_sampling_enabled() + c = continulet(my_callback) + res = c.switch() + assert res == 42 + assert self.is_sampling_enabled() + finally: + self.stop_sampling() diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -1,4 +1,5 @@ import py +import pytest try: import _continuation except ImportError: @@ -101,11 +102,7 @@ particular, we need to ensure that vmprof does not sample the stack in the middle of a switch, else we read nonsense. """ - try: - import _vmprof - except ImportError: - py.test.skip("no _vmprof") - # + _vmprof = pytest.importorskip('_vmprof') def switch_forever(c): while True: c.switch() diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,7 +3,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory -from rpython.rlib.rvmprof import cintf +from rpython.rlib import rvmprof DEBUG = False @@ -25,12 +25,12 @@ def new(self, callback, arg=llmemory.NULL): if DEBUG: callback = _debug_wrapper(callback) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: - cintf.empty_rvmprof_stack() + rvmprof.empty_stack() h = self._gcrootfinder.new(self, callback, arg) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h @@ -40,11 +40,11 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: h = self._gcrootfinder.switch(stacklet) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -56,10 +56,27 @@ return None def stop_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling - fd = vmprof_stop_sampling() - return rffi.cast(lltype.Signed, fd) + return _get_vmprof().stop_sampling() def start_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_start_sampling - vmprof_start_sampling() + return _get_vmprof().start_sampling() + +# ---------------- +# stacklet support +# ---------------- +# +# Ideally, vmprof_tl_stack, VMPROFSTACK etc. should be part of "self.cintf": +# not sure why they are a global. Eventually, we should probably fix all this +# mess. +from rpython.rlib.rvmprof.cintf import vmprof_tl_stack, VMPROFSTACK + +def save_stack(): + stop_sampling() + return vmprof_tl_stack.get_or_make_raw() + +def empty_stack(): + vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) + +def restore_stack(x): + vmprof_tl_stack.setraw(x) + start_sampling() diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -122,32 +122,16 @@ lltype.Signed, compilation_info=eci, _nowrapper=True) + vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], + rffi.INT, compilation_info=eci, + _nowrapper=True) + vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], + lltype.Void, compilation_info=eci, + _nowrapper=True) + return CInterface(locals()) -# this is always present, but compiles to no-op if RPYTHON_VMPROF is not -# defined (i.e. if we don't actually use vmprof in the generated C) -auto_eci = ExternalCompilationInfo(post_include_bits=[""" -#ifndef RPYTHON_VMPROF -# define vmprof_stop_sampling() (-1) -# define vmprof_start_sampling() ((void)0) -#endif -"""]) - -if get_translation_config() is None: - # tests need the full eci here - _eci = global_eci -else: - _eci = auto_eci - -vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=_eci, - _nowrapper=True) -vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=_eci, - _nowrapper=True) - - class CInterface(object): def __init__(self, namespace): for k, v in namespace.iteritems(): @@ -232,20 +216,6 @@ leave_code(s) # -# stacklet support - -def save_rvmprof_stack(): - vmprof_stop_sampling() - return vmprof_tl_stack.get_or_make_raw() - -def empty_rvmprof_stack(): - vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) - -def restore_rvmprof_stack(x): - vmprof_tl_stack.setraw(x) - vmprof_start_sampling() - -# # traceback support def get_rvmprof_stack(): diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -168,6 +168,21 @@ if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0: raise VMProfError("vmprof buffers full! disk full or too slow") + def stop_sampling(self): + """ + Temporarily stop the sampling of stack frames. Signals are still + delivered, but are ignored. + """ + fd = self.cintf.vmprof_stop_sampling() + return rffi.cast(lltype.Signed, fd) + + def start_sampling(self): + """ + Undo the effect of stop_sampling + """ + self.cintf.vmprof_start_sampling() + + def vmprof_execute_code(name, get_code_fn, result_class=None, _hack_update_stack_untranslated=False): """Decorator to be used on the function that interprets a code object. diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/support.py @@ -0,0 +1,45 @@ +import pytest +from rpython.rlib import rvmprof + +class FakeVMProf(object): + + def __init__(self): + self._enabled = False + self._ignore_signals = 1 + + # --- VMProf official API --- + # add fake methods as needed by the tests + + def stop_sampling(self): + self._ignore_signals += 1 + + def start_sampling(self): + assert self._ignore_signals > 0, ('calling start_sampling() without ' + 'the corresponding stop_sampling()?') + self._ignore_signals -= 1 + + # --- FakeVMProf specific API --- + # this API is not part of rvmprof, but available only inside tests using + # fakevmprof + + @property + def is_sampling_enabled(self): + return self._ignore_signals == 0 + + def check_status(self): + """ + To be called during test teardown + """ + if self._ignore_signals != 1: + msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, ' + 'got %d. This probably means that you called ' + '{start,stop}_sampling() a wrong number of times') + raise ValueError, msg % self._ignore_signals + + + at pytest.fixture +def fakevmprof(request, monkeypatch): + fake = FakeVMProf() + monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake) + request.addfinalizer(fake.check_status) + return fake diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/test_support.py @@ -0,0 +1,42 @@ +import pytest +from rpython.rlib import rvmprof +from rpython.rlib.rvmprof.test.support import FakeVMProf, fakevmprof + +class TestFakeVMProf(object): + + def test_sampling(self): + fake = FakeVMProf() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert fake.is_sampling_enabled + # + fake.stop_sampling() + fake.stop_sampling() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert not fake.is_sampling_enabled + fake.start_sampling() + assert fake.is_sampling_enabled + # + pytest.raises(AssertionError, "fake.start_sampling()") + + def test_check_status(self): + fake = FakeVMProf() + fake.stop_sampling() + pytest.raises(ValueError, "fake.check_status()") + + +class TestFixture(object): + + def test_fixture(self, fakevmprof): + assert isinstance(fakevmprof, FakeVMProf) + assert rvmprof._get_vmprof() is fakevmprof + # + # tweak sampling using the "real" API, and check that we actually used + # the fake + rvmprof.start_sampling() + assert fakevmprof.is_sampling_enabled + rvmprof.stop_sampling() + assert not fakevmprof.is_sampling_enabled From pypy.commits at gmail.com Mon Dec 18 07:43:51 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 04:43:51 -0800 (PST) Subject: [pypy-commit] pypy default: Add FormatMessageW() to get the errors in unicode Message-ID: <5a37b807.410e1c0a.3a471.b425@mx.google.com> Author: Armin Rigo Branch: Changeset: r93466:d5ba3ff15b4c Date: 2017-12-18 13:12 +0100 http://bitbucket.org/pypy/pypy/changeset/d5ba3ff15b4c/ Log: Add FormatMessageW() to get the errors in unicode diff --git a/rpython/rlib/rwin32.py b/rpython/rlib/rwin32.py --- a/rpython/rlib/rwin32.py +++ b/rpython/rlib/rwin32.py @@ -210,6 +210,10 @@ 'FormatMessageA', [DWORD, rffi.VOIDP, DWORD, DWORD, rffi.CCHARP, DWORD, rffi.VOIDP], DWORD) + FormatMessageW = winexternal( + 'FormatMessageW', + [DWORD, rffi.VOIDP, DWORD, DWORD, rffi.CWCHARP, DWORD, rffi.VOIDP], + DWORD) _get_osfhandle = rffi.llexternal('_get_osfhandle', [rffi.INT], HANDLE) @@ -286,6 +290,8 @@ # A bit like strerror... def FormatError(code): return llimpl_FormatError(code) + def FormatErrorW(code): + return llimpl_FormatErrorW(code) def llimpl_FormatError(code): "Return a message corresponding to the given Windows error code." @@ -318,6 +324,37 @@ return result + def llimpl_FormatErrorW(code): + "Return a unicode message corresponding to the given Windows error code." + buf = lltype.malloc(rffi.CWCHARPP.TO, 1, flavor='raw') + buf[0] = lltype.nullptr(rffi.CWCHARP.TO) + try: + msglen = FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + None, + rffi.cast(DWORD, code), + DEFAULT_LANGUAGE, + rffi.cast(rffi.CWCHARP, buf), + 0, None) + buflen = intmask(msglen) + + # remove trailing cr/lf and dots + s_buf = buf[0] + while buflen > 0 and (ord(s_buf[buflen - 1]) <= ord(' ') or + s_buf[buflen - 1] == u'.'): + buflen -= 1 + + if buflen <= 0: + result = u'Windows Error %d' % (code,) + else: + result = rffi.wcharpsize2unicode(s_buf, buflen) + finally: + LocalFree(rffi.cast(rffi.VOIDP, buf[0])) + lltype.free(buf, flavor='raw') + + return result + def lastSavedWindowsError(context="Windows Error"): code = GetLastError_saved() return WindowsError(code, context) diff --git a/rpython/rlib/test/test_rwin32.py b/rpython/rlib/test/test_rwin32.py --- a/rpython/rlib/test/test_rwin32.py +++ b/rpython/rlib/test/test_rwin32.py @@ -64,3 +64,7 @@ msg = rwin32.FormatError(34) assert '%2' in msg +def test_formaterror_unicode(): + msg = rwin32.FormatErrorW(34) + assert type(msg) is unicode + assert u'%2' in msg diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1011,6 +1011,7 @@ # char** CCHARPP = lltype.Ptr(lltype.Array(CCHARP, hints={'nolength': True})) +CWCHARPP = lltype.Ptr(lltype.Array(CWCHARP, hints={'nolength': True})) def liststr2charpp(l): """ list[str] -> char**, NULL terminated From pypy.commits at gmail.com Mon Dec 18 07:43:53 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 04:43:53 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a37b809.e986df0a.a1485.66e9@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93467:a04f367d45a5 Date: 2017-12-18 13:13 +0100 http://bitbucket.org/pypy/pypy/changeset/a04f367d45a5/ Log: hg merge default diff --git a/rpython/rlib/rwin32.py b/rpython/rlib/rwin32.py --- a/rpython/rlib/rwin32.py +++ b/rpython/rlib/rwin32.py @@ -210,6 +210,10 @@ 'FormatMessageA', [DWORD, rffi.VOIDP, DWORD, DWORD, rffi.CCHARP, DWORD, rffi.VOIDP], DWORD) + FormatMessageW = winexternal( + 'FormatMessageW', + [DWORD, rffi.VOIDP, DWORD, DWORD, rffi.CWCHARP, DWORD, rffi.VOIDP], + DWORD) _get_osfhandle = rffi.llexternal('_get_osfhandle', [rffi.INT], HANDLE) @@ -286,6 +290,8 @@ # A bit like strerror... def FormatError(code): return llimpl_FormatError(code) + def FormatErrorW(code): + return llimpl_FormatErrorW(code) def llimpl_FormatError(code): "Return a message corresponding to the given Windows error code." @@ -318,6 +324,37 @@ return result + def llimpl_FormatErrorW(code): + "Return a unicode message corresponding to the given Windows error code." + buf = lltype.malloc(rffi.CWCHARPP.TO, 1, flavor='raw') + buf[0] = lltype.nullptr(rffi.CWCHARP.TO) + try: + msglen = FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + None, + rffi.cast(DWORD, code), + DEFAULT_LANGUAGE, + rffi.cast(rffi.CWCHARP, buf), + 0, None) + buflen = intmask(msglen) + + # remove trailing cr/lf and dots + s_buf = buf[0] + while buflen > 0 and (ord(s_buf[buflen - 1]) <= ord(' ') or + s_buf[buflen - 1] == u'.'): + buflen -= 1 + + if buflen <= 0: + result = u'Windows Error %d' % (code,) + else: + result = rffi.wcharpsize2unicode(s_buf, buflen) + finally: + LocalFree(rffi.cast(rffi.VOIDP, buf[0])) + lltype.free(buf, flavor='raw') + + return result + def lastSavedWindowsError(context="Windows Error"): code = GetLastError_saved() return WindowsError(code, context) diff --git a/rpython/rlib/test/test_rwin32.py b/rpython/rlib/test/test_rwin32.py --- a/rpython/rlib/test/test_rwin32.py +++ b/rpython/rlib/test/test_rwin32.py @@ -64,3 +64,7 @@ msg = rwin32.FormatError(34) assert '%2' in msg +def test_formaterror_unicode(): + msg = rwin32.FormatErrorW(34) + assert type(msg) is unicode + assert u'%2' in msg diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1011,6 +1011,7 @@ # char** CCHARPP = lltype.Ptr(lltype.Array(CCHARP, hints={'nolength': True})) +CWCHARPP = lltype.Ptr(lltype.Array(CWCHARP, hints={'nolength': True})) def liststr2charpp(l): """ list[str] -> char**, NULL terminated From pypy.commits at gmail.com Mon Dec 18 07:43:57 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 04:43:57 -0800 (PST) Subject: [pypy-commit] pypy py3.5: More Windows compatibility to return unicode error messages Message-ID: <5a37b80d.8b8a1c0a.f669b.3668@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93469:b0e0af09b762 Date: 2017-12-18 13:41 +0100 http://bitbucket.org/pypy/pypy/changeset/b0e0af09b762/ Log: More Windows compatibility to return unicode error messages diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -1361,6 +1361,11 @@ def gai_strerror_str(errno): return rwin32.FormatError(errno) + def socket_strerror_unicode(errno): + return rwin32.FormatErrorW(errno) + def gai_strerror_unicode(errno): + return rwin32.FormatErrorW(errno) + # WinSock does not use a bitmask in select, and uses # socket handles greater than FD_SETSIZE MAX_FD_SIZE = None @@ -1372,4 +1377,9 @@ def gai_strerror_str(errno): return rffi.charp2str(gai_strerror(errno)) + def socket_strerror_unicode(errno): + return socket_strerror_str(errno).decode('latin-1') + def gai_strerror_unicode(errno): + return gai_strerror_str(errno).decode('latin-1') + MAX_FD_SIZE = FD_SETSIZE diff --git a/rpython/rlib/rpoll.py b/rpython/rlib/rpoll.py --- a/rpython/rlib/rpoll.py +++ b/rpython/rlib/rpoll.py @@ -28,12 +28,16 @@ self.errno = errno def get_msg(self): return _c.socket_strerror_str(self.errno) + def get_msg_unicode(self): + return _c.socket_strerror_unicode(self.errno) class SelectError(Exception): def __init__(self, errno): self.errno = errno def get_msg(self): return _c.socket_strerror_str(self.errno) + def get_msg_unicode(self): + return _c.socket_strerror_unicode(self.errno) # ____________________________________________________________ # poll() for POSIX systems diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py --- a/rpython/rlib/rsocket.py +++ b/rpython/rlib/rsocket.py @@ -1298,6 +1298,8 @@ pass def get_msg(self): return '' + def get_msg_unicode(self): + return self.get_msg().decode('latin-1') def __str__(self): return self.get_msg() @@ -1314,6 +1316,8 @@ class CSocketError(SocketErrorWithErrno): def get_msg(self): return _c.socket_strerror_str(self.errno) + def get_msg_unicode(self): + return _c.socket_strerror_unicode(self.errno) def last_error(): return CSocketError(_c.geterrno()) @@ -1322,6 +1326,8 @@ applevelerrcls = 'gaierror' def get_msg(self): return _c.gai_strerror_str(self.errno) + def get_msg_unicode(self): + return _c.gai_strerror_unicode(self.errno) class HSocketError(SocketError): applevelerrcls = 'herror' diff --git a/rpython/rlib/test/test_rsocket.py b/rpython/rlib/test/test_rsocket.py --- a/rpython/rlib/test/test_rsocket.py +++ b/rpython/rlib/test/test_rsocket.py @@ -409,6 +409,7 @@ # catch-all address (i.e. opendns). e = py.test.raises(GAIError, getaddrinfo, 'www.very-invalidaddress.com', None) assert isinstance(e.value.get_msg(), str) + assert isinstance(e.value.get_msg_unicode(), unicode) def getaddrinfo_pydotorg(i, result): lst = getaddrinfo('python.org', None) From pypy.commits at gmail.com Mon Dec 18 07:43:55 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 04:43:55 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Use FormatErrorW() instead of the bogus space.newtext(FormatError()), Message-ID: <5a37b80b.55a81c0a.51904.5e61@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93468:72a8c93b5914 Date: 2017-12-18 13:18 +0100 http://bitbucket.org/pypy/pypy/changeset/72a8c93b5914/ Log: Use FormatErrorW() instead of the bogus space.newtext(FormatError()), because FormatError() does not return utf8 at all diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -621,12 +621,12 @@ if rwin32.WIN32 and isinstance(e, WindowsError): winerror = e.winerror try: - msg = rwin32.FormatError(winerror) + msg = rwin32.FormatErrorW(winerror) except ValueError: - msg = 'Windows Error %d' % winerror + msg = u'Windows Error %d' % winerror w_errno = space.w_None w_winerror = space.newint(winerror) - w_msg = space.newtext(msg) + w_msg = space.newunicode(msg) else: errno = e.errno if errno == EINTR: diff --git a/pypy/module/_cffi_backend/cerrno.py b/pypy/module/_cffi_backend/cerrno.py --- a/pypy/module/_cffi_backend/cerrno.py +++ b/pypy/module/_cffi_backend/cerrno.py @@ -26,5 +26,5 @@ from rpython.rlib.rwin32 import GetLastError_alt_saved, FormatError if code == -1: code = GetLastError_alt_saved() - message = FormatError(code) - return space.newtuple([space.newint(code), space.newtext(message)]) + message = FormatErrorW(code) + return space.newtuple([space.newint(code), space.newunicode(message)]) diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -628,7 +628,7 @@ if _MS_WINDOWS: @unwrap_spec(code=int) def FormatError(space, code): - return space.newtext(rwin32.FormatError(code)) + return space.newunicode(rwin32.FormatErrorW(code)) @unwrap_spec(hresult=int) def check_HRESULT(space, hresult): diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py --- a/pypy/module/_winreg/interp_winreg.py +++ b/pypy/module/_winreg/interp_winreg.py @@ -8,10 +8,10 @@ from rpython.rlib.rarithmetic import r_uint, intmask def raiseWindowsError(space, errcode, context): - message = rwin32.FormatError(errcode) + message = rwin32.FormatErrorW(errcode) raise OperationError(space.w_WindowsError, space.newtuple([space.newint(errcode), - space.newtext(message)])) + space.newunicode(message)])) class W_HKEY(W_Root): def __init__(self, space, hkey): From pypy.commits at gmail.com Mon Dec 18 07:43:59 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 04:43:59 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fixes for unicode error messages Message-ID: <5a37b80f.4a981c0a.7f086.a3a6@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93470:20fdafb25341 Date: 2017-12-18 13:41 +0100 http://bitbucket.org/pypy/pypy/changeset/20fdafb25341/ Log: Fixes for unicode error messages diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -21,6 +21,7 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_decode(errors, encoding, msg, s, startingpos, endingpos): + import pdb;pdb.set_trace() raise OperationError(space.w_UnicodeDecodeError, space.newtuple([space.newtext(encoding), space.newbytes(s), diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py --- a/pypy/module/_socket/interp_socket.py +++ b/pypy/module/_socket/interp_socket.py @@ -855,7 +855,7 @@ @specialize.arg(2) def converted_error(space, e, eintr_retry=False): - message = e.get_msg() + message = e.get_msg_unicode() w_exception_class = get_error(space, e.applevelerrcls) if isinstance(e, SocketErrorWithErrno): if e.errno == errno.EINTR: @@ -863,9 +863,9 @@ if eintr_retry: return # only return None if eintr_retry==True w_exception = space.call_function(w_exception_class, space.newint(e.errno), - space.newtext(message)) + space.newunicode(message)) else: - w_exception = space.call_function(w_exception_class, space.newtext(message)) + w_exception = space.call_function(w_exception_class, space.newunicode(message)) raise OperationError(w_exception_class, w_exception) def explicit_socket_error(space, msg): diff --git a/pypy/module/select/interp_select.py b/pypy/module/select/interp_select.py --- a/pypy/module/select/interp_select.py +++ b/pypy/module/select/interp_select.py @@ -80,10 +80,10 @@ if timeout < 0: timeout = 0 continue - message = e.get_msg() + message = e.get_msg_unicode() raise OperationError(space.w_OSError, space.newtuple([space.newint(e.errno), - space.newtext(message)])) + space.newunicode(message)])) finally: self.running = False break @@ -152,9 +152,9 @@ break # normal path err = _c.geterrno() if err != errno.EINTR: - msg = _c.socket_strerror_str(err) + msg = _c.socket_strerror_unicode(err) raise OperationError(space.w_OSError, space.newtuple([ - space.newint(err), space.newtext(msg)])) + space.newint(err), space.newunicode(msg)])) # got EINTR, automatic retry space.getexecutioncontext().checksignals() if timeout > 0.0: diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -441,7 +441,8 @@ _set_module_object(space, "timezone", space.newint(timezone)) _set_module_object(space, 'daylight', space.newint(daylight)) - tzname_w = [space.newtext(tzname[0]), space.newtext(tzname[1])] + tzname_w = [space.newunicode(tzname[0].decode('latin-1')), + space.newunicode(tzname[1].decode('latin-1'))] _set_module_object(space, 'tzname', space.newtuple(tzname_w)) _set_module_object(space, 'altzone', space.newint(altzone)) From pypy.commits at gmail.com Mon Dec 18 07:44:01 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 04:44:01 -0800 (PST) Subject: [pypy-commit] pypy default: backport b0e0af09b762 Message-ID: <5a37b811.cfb0df0a.1e569.f205@mx.google.com> Author: Armin Rigo Branch: Changeset: r93471:ed98419ad5e6 Date: 2017-12-18 13:43 +0100 http://bitbucket.org/pypy/pypy/changeset/ed98419ad5e6/ Log: backport b0e0af09b762 diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -1361,6 +1361,11 @@ def gai_strerror_str(errno): return rwin32.FormatError(errno) + def socket_strerror_unicode(errno): + return rwin32.FormatErrorW(errno) + def gai_strerror_unicode(errno): + return rwin32.FormatErrorW(errno) + # WinSock does not use a bitmask in select, and uses # socket handles greater than FD_SETSIZE MAX_FD_SIZE = None @@ -1372,4 +1377,9 @@ def gai_strerror_str(errno): return rffi.charp2str(gai_strerror(errno)) + def socket_strerror_unicode(errno): + return socket_strerror_str(errno).decode('latin-1') + def gai_strerror_unicode(errno): + return gai_strerror_str(errno).decode('latin-1') + MAX_FD_SIZE = FD_SETSIZE diff --git a/rpython/rlib/rpoll.py b/rpython/rlib/rpoll.py --- a/rpython/rlib/rpoll.py +++ b/rpython/rlib/rpoll.py @@ -28,12 +28,16 @@ self.errno = errno def get_msg(self): return _c.socket_strerror_str(self.errno) + def get_msg_unicode(self): + return _c.socket_strerror_unicode(self.errno) class SelectError(Exception): def __init__(self, errno): self.errno = errno def get_msg(self): return _c.socket_strerror_str(self.errno) + def get_msg_unicode(self): + return _c.socket_strerror_unicode(self.errno) # ____________________________________________________________ # poll() for POSIX systems diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py --- a/rpython/rlib/rsocket.py +++ b/rpython/rlib/rsocket.py @@ -1298,6 +1298,8 @@ pass def get_msg(self): return '' + def get_msg_unicode(self): + return self.get_msg().decode('latin-1') def __str__(self): return self.get_msg() @@ -1314,6 +1316,8 @@ class CSocketError(SocketErrorWithErrno): def get_msg(self): return _c.socket_strerror_str(self.errno) + def get_msg_unicode(self): + return _c.socket_strerror_unicode(self.errno) def last_error(): return CSocketError(_c.geterrno()) @@ -1322,6 +1326,8 @@ applevelerrcls = 'gaierror' def get_msg(self): return _c.gai_strerror_str(self.errno) + def get_msg_unicode(self): + return _c.gai_strerror_unicode(self.errno) class HSocketError(SocketError): applevelerrcls = 'herror' diff --git a/rpython/rlib/test/test_rsocket.py b/rpython/rlib/test/test_rsocket.py --- a/rpython/rlib/test/test_rsocket.py +++ b/rpython/rlib/test/test_rsocket.py @@ -409,6 +409,7 @@ # catch-all address (i.e. opendns). e = py.test.raises(GAIError, getaddrinfo, 'www.very-invalidaddress.com', None) assert isinstance(e.value.get_msg(), str) + assert isinstance(e.value.get_msg_unicode(), unicode) def getaddrinfo_pydotorg(i, result): lst = getaddrinfo('python.org', None) From pypy.commits at gmail.com Mon Dec 18 07:44:03 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 04:44:03 -0800 (PST) Subject: [pypy-commit] pypy default: merge heads Message-ID: <5a37b813.06d21c0a.5abd1.96c3@mx.google.com> Author: Armin Rigo Branch: Changeset: r93472:13a87780bd5a Date: 2017-12-18 13:43 +0100 http://bitbucket.org/pypy/pypy/changeset/13a87780bd5a/ Log: merge heads diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt --- a/extra_tests/requirements.txt +++ b/extra_tests/requirements.txt @@ -1,2 +1,3 @@ pytest hypothesis +vmprof diff --git a/extra_tests/test_vmprof_greenlet.py b/extra_tests/test_vmprof_greenlet.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_vmprof_greenlet.py @@ -0,0 +1,28 @@ +import time +import pytest +import greenlet +import vmprof + +def count_samples(filename): + stats = vmprof.read_profile(filename) + return len(stats.profiles) + +def cpuburn(duration): + end = time.time() + duration + while time.time() < end: + pass + +def test_sampling_inside_callback(tmpdir): + # see also test_sampling_inside_callback inside + # pypy/module/_continuation/test/test_stacklet.py + # + G = greenlet.greenlet(cpuburn) + fname = tmpdir.join('log.vmprof') + with fname.open('w+b') as f: + vmprof.enable(f.fileno(), 1/250.0) + G.switch(0.1) + vmprof.disable() + + samples = count_samples(str(fname)) + # 0.1 seconds at 250Hz should be 25 samples + assert 23 < samples < 27 diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -31,7 +31,7 @@ Upgrade the _vmprof backend to vmprof 0.4.10 .. branch: fix-vmprof-stacklet-switch - +.. branch: fix-vmprof-stacklet-switch-2 Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) .. branch: win32-vcvars @@ -39,3 +39,4 @@ .. branch: rdict-fast-hash Make it possible to declare that the hash function of an r_dict is fast in RPython. + diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -1,5 +1,6 @@ from rpython.rlib.rstacklet import StackletThread from rpython.rlib import jit +from rpython.rlib import rvmprof from pypy.interpreter.error import OperationError, get_cleared_operation_error from pypy.interpreter.executioncontext import ExecutionContext from pypy.interpreter.baseobjspace import W_Root @@ -222,12 +223,15 @@ self.h = h global_state.clear() try: + rvmprof.start_sampling() frame = self.bottomframe w_result = frame.execute_frame() except Exception as e: global_state.propagate_exception = e else: global_state.w_value = w_result + finally: + rvmprof.stop_sampling() self.sthread.ec.topframeref = jit.vref_None global_state.origin = self global_state.destination = self diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -1,7 +1,10 @@ +import pytest import os +from rpython.rlib.rvmprof.test.support import fakevmprof +from pypy.interpreter.gateway import interp2app from pypy.module._continuation.test.support import BaseAppTest - + at pytest.mark.usefixtures('app_fakevmprof') class AppTestStacklet(BaseAppTest): def setup_class(cls): BaseAppTest.setup_class.im_func(cls) @@ -34,10 +37,34 @@ return res return stack """) + cls.w_appdirect = cls.space.wrap(cls.runappdirect) if cls.runappdirect: # make sure that "self.stack" does not pass the self cls.w_stack = staticmethod(cls.w_stack.im_func) + + @pytest.fixture + def app_fakevmprof(self, fakevmprof): + """ + This is automaticaly re-initialized for every method: thanks to + fakevmprof's finalizer, it checks that we called {start,stop}_sampling + the in pairs + """ + w = self.space.wrap + i2a = interp2app + def is_sampling_enabled(space): + return space.wrap(fakevmprof.is_sampling_enabled) + self.w_is_sampling_enabled = w(i2a(is_sampling_enabled)) + # + def start_sampling(space): + fakevmprof.start_sampling() + self.w_start_sampling = w(i2a(start_sampling)) + # + def stop_sampling(space): + fakevmprof.stop_sampling() + self.w_stop_sampling = w(i2a(stop_sampling)) + + def test_new_empty(self): from _continuation import continulet # @@ -770,3 +797,25 @@ continulet.switch(c1, to=c2) raises(error, continulet.switch, c1, to=c2) + + def test_sampling_inside_callback(self): + if self.appdirect: + # see also + # extra_tests.test_vmprof_greenlet.test_sampling_inside_callback + # for a "translated" version of this test + skip("we can't run this until we have _vmprof.is_sampling_enabled") + from _continuation import continulet + # + def my_callback(c1): + assert self.is_sampling_enabled() + return 42 + # + try: + self.start_sampling() + assert self.is_sampling_enabled() + c = continulet(my_callback) + res = c.switch() + assert res == 42 + assert self.is_sampling_enabled() + finally: + self.stop_sampling() diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -1,4 +1,5 @@ import py +import pytest try: import _continuation except ImportError: @@ -101,11 +102,7 @@ particular, we need to ensure that vmprof does not sample the stack in the middle of a switch, else we read nonsense. """ - try: - import _vmprof - except ImportError: - py.test.skip("no _vmprof") - # + _vmprof = pytest.importorskip('_vmprof') def switch_forever(c): while True: c.switch() diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,7 +3,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory -from rpython.rlib.rvmprof import cintf +from rpython.rlib import rvmprof DEBUG = False @@ -25,12 +25,12 @@ def new(self, callback, arg=llmemory.NULL): if DEBUG: callback = _debug_wrapper(callback) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: - cintf.empty_rvmprof_stack() + rvmprof.empty_stack() h = self._gcrootfinder.new(self, callback, arg) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h @@ -40,11 +40,11 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: h = self._gcrootfinder.switch(stacklet) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -56,10 +56,27 @@ return None def stop_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling - fd = vmprof_stop_sampling() - return rffi.cast(lltype.Signed, fd) + return _get_vmprof().stop_sampling() def start_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_start_sampling - vmprof_start_sampling() + return _get_vmprof().start_sampling() + +# ---------------- +# stacklet support +# ---------------- +# +# Ideally, vmprof_tl_stack, VMPROFSTACK etc. should be part of "self.cintf": +# not sure why they are a global. Eventually, we should probably fix all this +# mess. +from rpython.rlib.rvmprof.cintf import vmprof_tl_stack, VMPROFSTACK + +def save_stack(): + stop_sampling() + return vmprof_tl_stack.get_or_make_raw() + +def empty_stack(): + vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) + +def restore_stack(x): + vmprof_tl_stack.setraw(x) + start_sampling() diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -122,32 +122,16 @@ lltype.Signed, compilation_info=eci, _nowrapper=True) + vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], + rffi.INT, compilation_info=eci, + _nowrapper=True) + vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], + lltype.Void, compilation_info=eci, + _nowrapper=True) + return CInterface(locals()) -# this is always present, but compiles to no-op if RPYTHON_VMPROF is not -# defined (i.e. if we don't actually use vmprof in the generated C) -auto_eci = ExternalCompilationInfo(post_include_bits=[""" -#ifndef RPYTHON_VMPROF -# define vmprof_stop_sampling() (-1) -# define vmprof_start_sampling() ((void)0) -#endif -"""]) - -if get_translation_config() is None: - # tests need the full eci here - _eci = global_eci -else: - _eci = auto_eci - -vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=_eci, - _nowrapper=True) -vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=_eci, - _nowrapper=True) - - class CInterface(object): def __init__(self, namespace): for k, v in namespace.iteritems(): @@ -232,20 +216,6 @@ leave_code(s) # -# stacklet support - -def save_rvmprof_stack(): - vmprof_stop_sampling() - return vmprof_tl_stack.get_or_make_raw() - -def empty_rvmprof_stack(): - vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) - -def restore_rvmprof_stack(x): - vmprof_tl_stack.setraw(x) - vmprof_start_sampling() - -# # traceback support def get_rvmprof_stack(): diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -168,6 +168,21 @@ if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0: raise VMProfError("vmprof buffers full! disk full or too slow") + def stop_sampling(self): + """ + Temporarily stop the sampling of stack frames. Signals are still + delivered, but are ignored. + """ + fd = self.cintf.vmprof_stop_sampling() + return rffi.cast(lltype.Signed, fd) + + def start_sampling(self): + """ + Undo the effect of stop_sampling + """ + self.cintf.vmprof_start_sampling() + + def vmprof_execute_code(name, get_code_fn, result_class=None, _hack_update_stack_untranslated=False): """Decorator to be used on the function that interprets a code object. diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/support.py @@ -0,0 +1,45 @@ +import pytest +from rpython.rlib import rvmprof + +class FakeVMProf(object): + + def __init__(self): + self._enabled = False + self._ignore_signals = 1 + + # --- VMProf official API --- + # add fake methods as needed by the tests + + def stop_sampling(self): + self._ignore_signals += 1 + + def start_sampling(self): + assert self._ignore_signals > 0, ('calling start_sampling() without ' + 'the corresponding stop_sampling()?') + self._ignore_signals -= 1 + + # --- FakeVMProf specific API --- + # this API is not part of rvmprof, but available only inside tests using + # fakevmprof + + @property + def is_sampling_enabled(self): + return self._ignore_signals == 0 + + def check_status(self): + """ + To be called during test teardown + """ + if self._ignore_signals != 1: + msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, ' + 'got %d. This probably means that you called ' + '{start,stop}_sampling() a wrong number of times') + raise ValueError, msg % self._ignore_signals + + + at pytest.fixture +def fakevmprof(request, monkeypatch): + fake = FakeVMProf() + monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake) + request.addfinalizer(fake.check_status) + return fake diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/test_support.py @@ -0,0 +1,42 @@ +import pytest +from rpython.rlib import rvmprof +from rpython.rlib.rvmprof.test.support import FakeVMProf, fakevmprof + +class TestFakeVMProf(object): + + def test_sampling(self): + fake = FakeVMProf() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert fake.is_sampling_enabled + # + fake.stop_sampling() + fake.stop_sampling() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert not fake.is_sampling_enabled + fake.start_sampling() + assert fake.is_sampling_enabled + # + pytest.raises(AssertionError, "fake.start_sampling()") + + def test_check_status(self): + fake = FakeVMProf() + fake.stop_sampling() + pytest.raises(ValueError, "fake.check_status()") + + +class TestFixture(object): + + def test_fixture(self, fakevmprof): + assert isinstance(fakevmprof, FakeVMProf) + assert rvmprof._get_vmprof() is fakevmprof + # + # tweak sampling using the "real" API, and check that we actually used + # the fake + rvmprof.start_sampling() + assert fakevmprof.is_sampling_enabled + rvmprof.stop_sampling() + assert not fakevmprof.is_sampling_enabled From pypy.commits at gmail.com Mon Dec 18 07:47:55 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 04:47:55 -0800 (PST) Subject: [pypy-commit] pypy py3.5: oops Message-ID: <5a37b8fb.14121c0a.64efd.07e6@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93473:b1f2250e284c Date: 2017-12-18 13:47 +0100 http://bitbucket.org/pypy/pypy/changeset/b1f2250e284c/ Log: oops diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -21,7 +21,6 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_decode(errors, encoding, msg, s, startingpos, endingpos): - import pdb;pdb.set_trace() raise OperationError(space.w_UnicodeDecodeError, space.newtuple([space.newtext(encoding), space.newbytes(s), From pypy.commits at gmail.com Mon Dec 18 10:02:38 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 07:02:38 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Typo Message-ID: <5a37d88e.89c0df0a.3704c.88c4@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93474:98de75801589 Date: 2017-12-18 13:53 +0100 http://bitbucket.org/pypy/pypy/changeset/98de75801589/ Log: Typo diff --git a/pypy/module/_cffi_backend/cerrno.py b/pypy/module/_cffi_backend/cerrno.py --- a/pypy/module/_cffi_backend/cerrno.py +++ b/pypy/module/_cffi_backend/cerrno.py @@ -23,7 +23,7 @@ @unwrap_spec(code=int) def getwinerror(space, code=-1): - from rpython.rlib.rwin32 import GetLastError_alt_saved, FormatError + from rpython.rlib.rwin32 import GetLastError_alt_saved, FormatErrorW if code == -1: code = GetLastError_alt_saved() message = FormatErrorW(code) From pypy.commits at gmail.com Mon Dec 18 10:02:41 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 07:02:41 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Remove platform.machine() from the extension of the CPython- and CFFI-compatible dynamic libraries. I cannot figure out why it was added in the first place, and it seems wrong (we might get AMD64 on a 32-bit python). It also causes a bug in importlib that was quite some efforts to track. Message-ID: <5a37d891.e4addf0a.3b86a.04ec@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93475:d79aaf54b0bc Date: 2017-12-18 15:37 +0100 http://bitbucket.org/pypy/pypy/changeset/d79aaf54b0bc/ Log: Remove platform.machine() from the extension of the CPython- and CFFI-compatible dynamic libraries. I cannot figure out why it was added in the first place, and it seems wrong (we might get AMD64 on a 32-bit python). It also causes a bug in importlib that was quite some efforts to track. diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -47,14 +47,19 @@ if platform_name == 'linux2': platform_name = 'linux' - soabi += '-' + platform.machine() + '-' + platform_name + soabi += '-' + platform_name + # xxx used to also include platform.machine(), but this is wrong + # (might get AMD64 on a 32-bit python) and it is the source of a + # importlib bug if we get uppercase characters from there... if platform_name == 'linux': soabi += '-gnu' if sys.maxsize == (2**31 - 1) and platform.machine() == 'x86_64': soabi += 'x32' - return '.' + soabi + SO + result = '.' + soabi + SO + assert result == result.lower() # this is an implicit requirement of importlib on Windows! + return result def has_so_extension(space): return (space.config.objspace.usemodules.cpyext or From pypy.commits at gmail.com Mon Dec 18 12:18:19 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 18 Dec 2017 09:18:19 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: create release branch Message-ID: <5a37f85b.06d21c0a.5abd1.df65@mx.google.com> Author: fijal Branch: release-pypy3.5-v5.9.x Changeset: r93476:0d10fe25b245 Date: 2017-12-18 19:17 +0200 http://bitbucket.org/pypy/pypy/changeset/0d10fe25b245/ Log: create release branch From pypy.commits at gmail.com Mon Dec 18 12:24:11 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 18 Dec 2017 09:24:11 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: bump the versions Message-ID: <5a37f9bb.d7991c0a.73a77.1c6b@mx.google.com> Author: fijal Branch: release-pypy3.5-v5.9.x Changeset: r93477:cb3d0a043647 Date: 2017-12-18 19:23 +0200 http://bitbucket.org/pypy/pypy/changeset/cb3d0a043647/ Log: bump the versions diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = '5.8' +version = '5.10' # The full version, including alpha/beta/rc tags. -release = '5.8.0' +release = '5.10.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -29,7 +29,7 @@ #define PY_VERSION "3.5.3" /* PyPy version as a string */ -#define PYPY_VERSION "5.10.0-alpha0" +#define PYPY_VERSION "5.10.0" #define PYPY_VERSION_NUM 0x050A0000 /* Defined to mean a PyPy where cpyext holds more regular references diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,7 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (5, 10, 0, "alpha", 0) #XXX # sync patchlevel.h +PYPY_VERSION = (5, 10, 0, "final", 0) #XXX # sync patchlevel.h import pypy From pypy.commits at gmail.com Mon Dec 18 15:08:19 2017 From: pypy.commits at gmail.com (mattip) Date: Mon, 18 Dec 2017 12:08:19 -0800 (PST) Subject: [pypy-commit] pypy py3.5: minimum fix to allow import _ssl, build_cffi_imports to run on win32 Message-ID: <5a382033.139ddf0a.b8267.c708@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93478:1c2e6a98c3e0 Date: 2017-12-18 22:07 +0200 http://bitbucket.org/pypy/pypy/changeset/1c2e6a98c3e0/ Log: minimum fix to allow import _ssl, build_cffi_imports to run on win32 diff --git a/lib_pypy/_cffi_ssl/_stdssl/__init__.py b/lib_pypy/_cffi_ssl/_stdssl/__init__.py --- a/lib_pypy/_cffi_ssl/_stdssl/__init__.py +++ b/lib_pypy/_cffi_ssl/_stdssl/__init__.py @@ -20,9 +20,14 @@ SSL_ERROR_EOF, SSL_ERROR_NO_SOCKET, SSL_ERROR_INVALID_ERROR_CODE, pyerr_write_unraisable) from _cffi_ssl._stdssl import error -from select import poll, POLLIN, POLLOUT, select +from select import select from enum import IntEnum as _IntEnum +if sys.platform == 'win32': + HAVE_POLL = False +else: + from select import poll, POLLIN, POLLOUT + OPENSSL_VERSION = ffi.string(lib.OPENSSL_VERSION_TEXT).decode('utf-8') OPENSSL_VERSION_NUMBER = lib.OPENSSL_VERSION_NUMBER ver = OPENSSL_VERSION_NUMBER @@ -158,8 +163,6 @@ def _monotonic_clock(): return time.clock_gettime(time.CLOCK_MONOTONIC) -HAVE_POLL = True - def _ssl_select(sock, writing, timeout): if HAVE_POLL: p = poll() diff --git a/pypy/tool/build_cffi_imports.py b/pypy/tool/build_cffi_imports.py --- a/pypy/tool/build_cffi_imports.py +++ b/pypy/tool/build_cffi_imports.py @@ -17,8 +17,8 @@ "resource": "_resource_build.py" if sys.platform != "win32" else None, "lzma": "_lzma_build.py", "_decimal": "_decimal_build.py", - "ssl": "_ssl_build.py", - # hashlib does not need to be built! It uses API calls from ssl + "_ssl": "_ssl_build.py", + # hashlib does not need to be built! It uses API calls from _ssl "xx": None, # for testing: 'None' should be completely ignored } @@ -28,7 +28,7 @@ 'lzma': ('https://tukaani.org/xz/xz-5.2.3.tar.gz', '71928b357d0a09a12a4b4c5fafca8c31c19b0e7d3b8ebb19622e96f26dbf28cb', []), - 'ssl': ('http://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.6.2.tar.gz', + '_ssl': ('http://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.6.2.tar.gz', 'b029d2492b72a9ba5b5fcd9f3d602c9fd0baa087912f2aaecc28f52f567ec478', ['--without-openssldir']), '_gdbm': ('http://ftp.gnu.org/gnu/gdbm/gdbm-1.13.tar.gz', @@ -159,6 +159,12 @@ continue if module is None or getattr(options, 'no_' + key, False): continue + # the key is the module name, has it already been built? + status, stdout, stderr = run_subprocess(str(pypy_c), ['-c', 'import %s' % key]) + if status == 0: + print('*', ' %s already built' % key, file=sys.stderr) + continue + if module.endswith('.py'): args = [module] cwd = str(join(basedir,'lib_pypy')) @@ -175,7 +181,7 @@ shutil.rmtree(destdir, ignore_errors=True) os.makedirs(destdir) - if key == 'ssl' and sys.platform == 'darwin': + if key == '_ssl' and sys.platform == 'darwin': # this patch is loosely inspired by an Apple and adds # a fallback to the OS X roots when none are available patches = [ @@ -201,7 +207,7 @@ env['LDFLAGS'] = \ '-L{}/usr/lib {}'.format(destdir, env.get('LDFLAGS', '')) - if key == 'ssl' and sys.platform == 'darwin': + if key == '_ssl' and sys.platform == 'darwin': # needed for our roots patch env['LDFLAGS'] += ' -framework CoreFoundation -framework Security' @@ -237,7 +243,7 @@ help='instead of executing sys.executable' \ ' you can specify an alternative pypy vm here') parser.add_argument('--only', dest='only', default=None, - help='Only build the modules delimited by a colon. E.g. ssl,sqlite') + help='Only build the modules delimited by a colon. E.g. _ssl,sqlite') parser.add_argument('--embed-dependencies', dest='embed_dependencies', action='store_true', help='embed dependencies for distribution') args = parser.parse_args() From pypy.commits at gmail.com Mon Dec 18 15:09:30 2017 From: pypy.commits at gmail.com (mattip) Date: Mon, 18 Dec 2017 12:09:30 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix for non-win32 Message-ID: <5a38207a.57b61c0a.71df7.0dde@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93479:75cccb748415 Date: 2017-12-18 22:08 +0200 http://bitbucket.org/pypy/pypy/changeset/75cccb748415/ Log: fix for non-win32 diff --git a/lib_pypy/_cffi_ssl/_stdssl/__init__.py b/lib_pypy/_cffi_ssl/_stdssl/__init__.py --- a/lib_pypy/_cffi_ssl/_stdssl/__init__.py +++ b/lib_pypy/_cffi_ssl/_stdssl/__init__.py @@ -27,6 +27,7 @@ HAVE_POLL = False else: from select import poll, POLLIN, POLLOUT + HAVE_POLL = True OPENSSL_VERSION = ffi.string(lib.OPENSSL_VERSION_TEXT).decode('utf-8') OPENSSL_VERSION_NUMBER = lib.OPENSSL_VERSION_NUMBER From pypy.commits at gmail.com Mon Dec 18 15:24:21 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 12:24:21 -0800 (PST) Subject: [pypy-commit] pypy mmap-for-arenas: Write the Windows part of arena_mmap() Message-ID: <5a3823f5.e5b2df0a.10835.374f@mx.google.com> Author: Armin Rigo Branch: mmap-for-arenas Changeset: r93480:1c3ba5303112 Date: 2017-12-18 21:20 +0100 http://bitbucket.org/pypy/pypy/changeset/1c3ba5303112/ Log: Write the Windows part of arena_mmap() diff --git a/rpython/rlib/rmmap.py b/rpython/rlib/rmmap.py --- a/rpython/rlib/rmmap.py +++ b/rpython/rlib/rmmap.py @@ -805,6 +805,18 @@ def madvise_free(addr, map_size): "No madvise() on this platform" + def arena_mmap(nbytes): + flags = MAP_PRIVATE | MAP_ANONYMOUS + prot = PROT_READ | PROT_WRITE + p = c_mmap_safe(lltype.nullptr(PTR.TO), nbytes, prot, flags, -1, 0) + if p == rffi.cast(PTR, -1): + p = rffi.cast(PTR, 0) + return p + + def arena_munmap(arena_ptr, nbytes): + assert nbytes >= 0 + c_munmap_safe(rffi.cast(PTR, arena_ptr), nbytes) + elif _MS_WINDOWS: def mmap(fileno, length, tagname="", access=_ACCESS_DEFAULT, offset=0): # XXX flags is or-ed into access by now. @@ -965,3 +977,13 @@ rffi.cast(DWORD, PAGE_READWRITE)) #from rpython.rlib import debug #debug.debug_print("madvise_free:", r) + + def arena_mmap(nbytes): + null = lltype.nullptr(rffi.VOIDP.TO) + res = VirtualAlloc_safe(null, nbytes, MEM_COMMIT | MEM_RESERVE, + PAGE_READWRITE) + return rffi.cast(PTR, res) + + def arena_munmap(arena_ptr, nbytes): + assert nbytes >= 0 + VirtualFree_safe(rffi.cast(rffi.VOIDP, arena_ptr), 0, MEM_RELEASE) diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py --- a/rpython/rtyper/lltypesystem/llarena.py +++ b/rpython/rtyper/lltypesystem/llarena.py @@ -543,13 +543,8 @@ def llimpl_arena_mmap(nbytes): from rpython.rlib import rmmap - flags = rmmap.MAP_PRIVATE | rmmap.MAP_ANONYMOUS - prot = rmmap.PROT_READ | rmmap.PROT_WRITE - p = rffi.cast(llmemory.Address, rmmap.c_mmap_safe( - lltype.nullptr(rmmap.PTR.TO), nbytes, prot, flags, -1, 0)) - if p == rffi.cast(llmemory.Address, -1): - p = rffi.cast(llmemory.Address, 0) - return p + p = rmmap.arena_mmap(nbytes) + return rffi.cast(llmemory.Address, p) register_external(arena_mmap, [int], llmemory.Address, 'll_arena.arena_mmap', llimpl=llimpl_arena_mmap, @@ -558,8 +553,7 @@ def llimpl_arena_munmap(arena_addr, nbytes): from rpython.rlib import rmmap - assert nbytes >= 0 - rmmap.c_munmap_safe(rffi.cast(rmmap.PTR, arena_addr), nbytes) + rmmap.arena_munmap(arena_addr, nbytes) register_external(arena_munmap, [llmemory.Address, int], None, 'll_arena.arena_munmap', llimpl=llimpl_arena_munmap, diff --git a/rpython/rtyper/lltypesystem/test/test_llarena.py b/rpython/rtyper/lltypesystem/test/test_llarena.py --- a/rpython/rtyper/lltypesystem/test/test_llarena.py +++ b/rpython/rtyper/lltypesystem/test/test_llarena.py @@ -322,6 +322,14 @@ assert rffi.cast(lltype.Signed, addr) == 124 * pagesize assert size == pagesize * 5 +def test_arena_mmap_munmap(): + p = llarena.arena_mmap(32*1024) + q = p + 32*1024 - 16 + llarena.arena_reserve(q, llmemory.sizeof(lltype.Signed)) + q.signed[0] = -123456789 + assert q.signed[0] == -123456789 + llarena.arena_munmap(p, 32*1024) + class TestStandalone(test_standalone.StandaloneTests): def test_compiled_arena_protect(self): @@ -361,3 +369,19 @@ cbuilder.cmdexec('2', expect_crash=True) if sys.platform.startswith('win'): ctypes.windll.kernel32.SetErrorMode(old_err_mode) + + def test_compiled_arena_mmap_munmap(self): + # mostly a "does not crash during translation" test + import sys + # + def fn(argv): + p = llarena.arena_mmap(32*1024) + p.char[32*1024-1] = 'X' + assert p.char[32*1024-1] == 'X' + llarena.arena_munmap(p, 32*1024) + print 42 + return 0 + # + t, cbuilder = self.compile(fn) + data = cbuilder.cmdexec('') + assert data == '42\n' From pypy.commits at gmail.com Mon Dec 18 16:17:25 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 18 Dec 2017 13:17:25 -0800 (PST) Subject: [pypy-commit] pypy mmap-for-arenas: Translation fix Message-ID: <5a383065.cf0e1c0a.6c53.2a7b@mx.google.com> Author: Armin Rigo Branch: mmap-for-arenas Changeset: r93481:685824de07b4 Date: 2017-12-18 21:52 +0100 http://bitbucket.org/pypy/pypy/changeset/685824de07b4/ Log: Translation fix diff --git a/rpython/rlib/rmmap.py b/rpython/rlib/rmmap.py --- a/rpython/rlib/rmmap.py +++ b/rpython/rlib/rmmap.py @@ -808,6 +808,9 @@ def arena_mmap(nbytes): flags = MAP_PRIVATE | MAP_ANONYMOUS prot = PROT_READ | PROT_WRITE + if we_are_translated(): + flags = NonConstant(flags) + prot = NonConstant(prot) p = c_mmap_safe(lltype.nullptr(PTR.TO), nbytes, prot, flags, -1, 0) if p == rffi.cast(PTR, -1): p = rffi.cast(PTR, 0) @@ -955,8 +958,12 @@ case of a sandboxed process """ null = lltype.nullptr(rffi.VOIDP.TO) - res = VirtualAlloc_safe(null, map_size, MEM_COMMIT | MEM_RESERVE, - PAGE_EXECUTE_READWRITE) + alloctype = MEM_COMMIT | MEM_RESERVE + protect = PAGE_EXECUTE_READWRITE + if we_are_translated(): + alloctype = NonConstant(alloctype) + protect = NonConstant(protect) + res = VirtualAlloc_safe(null, map_size, alloctype, protect) if not res: raise MemoryError arg = lltype.malloc(LPDWORD.TO, 1, zero=True, flavor='raw') @@ -980,8 +987,12 @@ def arena_mmap(nbytes): null = lltype.nullptr(rffi.VOIDP.TO) - res = VirtualAlloc_safe(null, nbytes, MEM_COMMIT | MEM_RESERVE, - PAGE_READWRITE) + alloctype = MEM_COMMIT | MEM_RESERVE + protect = PAGE_READWRITE + if we_are_translated(): + alloctype = NonConstant(alloctype) + protect = NonConstant(protect) + res = VirtualAlloc_safe(null, nbytes, alloctype, protect) return rffi.cast(PTR, res) def arena_munmap(arena_ptr, nbytes): From pypy.commits at gmail.com Tue Dec 19 04:27:48 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 19 Dec 2017 01:27:48 -0800 (PST) Subject: [pypy-commit] pypy non-linux-vmprof-stacklet-switch-2: try to fix translation for non-linux Message-ID: <5a38db94.4a861c0a.baaf1.9c55@mx.google.com> Author: Matti Picus Branch: non-linux-vmprof-stacklet-switch-2 Changeset: r93482:80316b2e779f Date: 2017-12-19 11:26 +0200 http://bitbucket.org/pypy/pypy/changeset/80316b2e779f/ Log: try to fix translation for non-linux diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -210,6 +210,7 @@ try: _get_vmprof() except cintf.VMProfPlatformUnsupported: + func.c_name = '__vmprof_eval_vmprof' return func @jit.oopspec("rvmprof.jitted(unique_id)") From pypy.commits at gmail.com Tue Dec 19 05:02:19 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 19 Dec 2017 02:02:19 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Windows: missing _setmode(O_BINARY) in FileIO Message-ID: <5a38e3ab.a1abdf0a.4adb2.88e0@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93483:64afb0785729 Date: 2017-12-19 10:57 +0100 http://bitbucket.org/pypy/pypy/changeset/64afb0785729/ Log: Windows: missing _setmode(O_BINARY) in FileIO diff --git a/pypy/module/_io/interp_fileio.py b/pypy/module/_io/interp_fileio.py --- a/pypy/module/_io/interp_fileio.py +++ b/pypy/module/_io/interp_fileio.py @@ -8,6 +8,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib import rposix from rpython.rlib.rposix_stat import STAT_FIELD_TYPES +from rpython.rlib.streamio import _setfd_binary from rpython.rtyper.lltypesystem import lltype, rffi from os import O_RDONLY, O_WRONLY, O_RDWR, O_CREAT, O_TRUNC, O_EXCL import sys, os, stat, errno @@ -239,6 +240,8 @@ if HAS_BLKSIZE and st.st_blksize > 1: self.blksize = st.st_blksize + _setfd_binary(self.fd) + space.setattr(self, space.newtext("name"), w_name) if self.appending: From pypy.commits at gmail.com Tue Dec 19 08:27:05 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 19 Dec 2017 05:27:05 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: bump whatsnew Message-ID: <5a3913a9.c7471c0a.5b773.1e06@mx.google.com> Author: fijal Branch: release-pypy3.5-v5.9.x Changeset: r93484:636834242c92 Date: 2017-12-18 19:30 +0200 http://bitbucket.org/pypy/pypy/changeset/636834242c92/ Log: bump whatsnew diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,41 +1,4 @@ =========================== -What's new in PyPy2.7 5.10+ +What's new in PyPy3 6.0 =========================== -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - - -.. branch: cppyy-packaging - -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols - -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches - -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests - -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 - -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch - -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) - -.. branch: win32-vcvars - -.. branch: rdict-fast-hash - -Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-pypy3-5.10.0.rst copy from pypy/doc/whatsnew-head.rst copy to pypy/doc/whatsnew-pypy3-5.10.0.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-pypy3-5.10.0.rst @@ -1,5 +1,5 @@ =========================== -What's new in PyPy2.7 5.10+ +What's new in PyPy3 5.10x =========================== .. this is a revision shortly after release-pypy2.7-v5.9.0 From pypy.commits at gmail.com Tue Dec 19 08:27:07 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 19 Dec 2017 05:27:07 -0800 (PST) Subject: [pypy-commit] pypy default: start writing release notes Message-ID: <5a3913ab.06d21c0a.aa696.ee6e@mx.google.com> Author: fijal Branch: Changeset: r93485:183901a755d3 Date: 2017-12-19 15:26 +0200 http://bitbucket.org/pypy/pypy/changeset/183901a755d3/ Log: start writing release notes diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-v5.10.0.rst @@ -0,0 +1,67 @@ +====================================== +PyPy2.7 and PyPy3.5 v5.10 dual release +====================================== + +The PyPy team is proud to release both PyPy2.7 v5.10 (an interpreter supporting +Python 2.7 syntax), and a final PyPy3.5 v5.10 (an interpreter for Python +3.5 syntax). The two releases are both based on much the same codebase, thus +the dual release. + +This release is an incremental release with very few new features, the main +feature being the final PyPy3.5 release that works on linux and OS X with beta +windows support. It also includes fixes for `vmprof`_ cooperation with greenlets. + +Compared to 5.9, the 5.10 release contains mostly bugfixes and small improvements. +We have in the pipeline big new features coming for PyPy 6.0 that did not make +the release cut and should be available within the next couple months. + +As always, this release is 100% compatible with the previous one and fixed +several issues and bugs raised by the growing community of PyPy users. +As always, wxe strongly recommend updating. + +This release concludes the Mozilla Open Source `grant`_ for having a compatible +PyPy 3.5 release and we're very grateful for that. + +You can download the v5.10 releases here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. + +We would also like to thank our contributors and +encourage new people to join the project. PyPy has many +layers and we need help with all of them: `PyPy`_ and `RPython`_ documentation +improvements, tweaking popular `modules`_ to run on pypy, or general `help`_ +with making RPython's JIT even better. + +.. _vmprof: http://vmprof.readthedocs.io +.. _grant: https://morepypy.blogspot.com/2016/08/pypy-gets-funding-from-mozilla-for.html +.. _`PyPy`: index.html +.. _`RPython`: https://rpython.readthedocs.org +.. _`modules`: project-ideas.html#make-more-python-modules-pypy-friendly +.. _`help`: project-ideas.html + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7 and CPython 3.5. It's fast (`PyPy and CPython 2.7.x`_ performance comparison) +due to its integrated tracing JIT compiler. + +We also welcome developers of other `dynamic languages`_ to see what RPython +can do for them. + +The PyPy 2.7 release supports: + + * **x86** machines on most common operating systems + (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) + + * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux, + + * big- and little-endian variants of **PPC64** running Linux, + + * **s390x** running Linux + +.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org +.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html From pypy.commits at gmail.com Tue Dec 19 08:33:29 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 19 Dec 2017 05:33:29 -0800 (PST) Subject: [pypy-commit] pypy default: Add a sentence Message-ID: <5a391529.8a821c0a.3eba.8226@mx.google.com> Author: Armin Rigo Branch: Changeset: r93486:e013e4c45333 Date: 2017-12-19 14:32 +0100 http://bitbucket.org/pypy/pypy/changeset/e013e4c45333/ Log: Add a sentence diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst --- a/pypy/doc/release-v5.10.0.rst +++ b/pypy/doc/release-v5.10.0.rst @@ -17,10 +17,11 @@ As always, this release is 100% compatible with the previous one and fixed several issues and bugs raised by the growing community of PyPy users. -As always, wxe strongly recommend updating. +As always, we strongly recommend updating. This release concludes the Mozilla Open Source `grant`_ for having a compatible -PyPy 3.5 release and we're very grateful for that. +PyPy 3.5 release and we're very grateful for that. Of course, we will continue +to improve PyPy 3.5 and probably move to 3.6 during the course of 2018. You can download the v5.10 releases here: From pypy.commits at gmail.com Tue Dec 19 08:59:37 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 19 Dec 2017 05:59:37 -0800 (PST) Subject: [pypy-commit] pypy default: go through history Message-ID: <5a391b49.cfb0df0a.1e569.930f@mx.google.com> Author: fijal Branch: Changeset: r93487:c4e2adba9d2d Date: 2017-12-19 15:58 +0200 http://bitbucket.org/pypy/pypy/changeset/c4e2adba9d2d/ Log: go through history diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst --- a/pypy/doc/release-v5.10.0.rst +++ b/pypy/doc/release-v5.10.0.rst @@ -65,3 +65,27 @@ .. _`PyPy and CPython 2.7.x`: http://speed.pypy.org .. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + +Changelog +========= + +* improve ssl handling on windows for pypy3 (makes pip work) +* improve unicode handling in various error reporters +* fix vmprof cooperation with greenlets +* fix some things in cpyext +* test and document the cmp(nan, nan) == 0 behaviour +* don't crash when calling sleep with inf or nan +* fix bugs in _io module +* inspect.isbuiltin() now returns True for functions implemented in C +* allow the sequences future-import, docstring, future-import for CPython bug compatibility +* Issue #2699: non-ascii messages in warnings +* posix.lockf +* fixes for FreeBSD platform +* add .debug files, so builds contain debugging info, instead of being stripped +* improvements to cppyy +* issue #2677 copy pure c PyBuffer_{From,To}Contiguous from cpython +* issue #2682, split firstword on any whitespace in sqlite3 +* ctypes: allow ptr[0] = foo when ptr is a pointer to struct +* matplotlib works with tgagg backend +* improvements to utf32 surrogate handling +* cffi version bump to 1.11.2 From pypy.commits at gmail.com Tue Dec 19 08:59:38 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 19 Dec 2017 05:59:38 -0800 (PST) Subject: [pypy-commit] pypy default: merge Message-ID: <5a391b4a.ee85df0a.9bceb.011a@mx.google.com> Author: fijal Branch: Changeset: r93488:2ac941dfb825 Date: 2017-12-19 15:58 +0200 http://bitbucket.org/pypy/pypy/changeset/2ac941dfb825/ Log: merge diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst --- a/pypy/doc/release-v5.10.0.rst +++ b/pypy/doc/release-v5.10.0.rst @@ -17,10 +17,11 @@ As always, this release is 100% compatible with the previous one and fixed several issues and bugs raised by the growing community of PyPy users. -As always, wxe strongly recommend updating. +As always, we strongly recommend updating. This release concludes the Mozilla Open Source `grant`_ for having a compatible -PyPy 3.5 release and we're very grateful for that. +PyPy 3.5 release and we're very grateful for that. Of course, we will continue +to improve PyPy 3.5 and probably move to 3.6 during the course of 2018. You can download the v5.10 releases here: From pypy.commits at gmail.com Tue Dec 19 10:12:25 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 19 Dec 2017 07:12:25 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: move the ECI stuff into a proper function: this way, it will be easier to ensure that the ECI is not created at all on unsupported platforms Message-ID: <5a392c59.02431c0a.8f39b.5e6d@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93489:df87d2ec0d1b Date: 2017-12-19 15:49 +0100 http://bitbucket.org/pypy/pypy/changeset/df87d2ec0d1b/ Log: move the ECI stuff into a proper function: this way, it will be easier to ensure that the ECI is not created at all on unsupported platforms diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -19,62 +19,68 @@ SHARED = SRC.join('shared') BACKTRACE = SHARED.join('libbacktrace') -compile_extra = ['-DRPYTHON_VMPROF'] -separate_module_files = [ - SHARED.join('symboltable.c'), - SHARED.join('vmprof_unix.c') -] -if sys.platform.startswith('linux'): - separate_module_files += [ - BACKTRACE.join('atomic.c'), - BACKTRACE.join('backtrace.c'), - BACKTRACE.join('state.c'), - BACKTRACE.join('elf.c'), - BACKTRACE.join('dwarf.c'), - BACKTRACE.join('fileline.c'), - BACKTRACE.join('mmap.c'), - BACKTRACE.join('mmapio.c'), - BACKTRACE.join('posix.c'), - BACKTRACE.join('sort.c'), +def make_eci(): + if make_eci.called: + raise ValueError("make_eci() should be called at most once") + # + compile_extra = ['-DRPYTHON_VMPROF'] + separate_module_files = [ + SHARED.join('symboltable.c'), + SHARED.join('vmprof_unix.c') ] - _libs = ['dl'] - compile_extra += ['-DVMPROF_UNIX'] - compile_extra += ['-DVMPROF_LINUX'] -elif sys.platform == 'win32': - compile_extra += ['-DVMPROF_WINDOWS'] - separate_module_files = [SHARED.join('vmprof_win.c')] - _libs = [] -else: - # Guessing a BSD-like Unix platform - compile_extra += ['-DVMPROF_UNIX'] - compile_extra += ['-DVMPROF_MAC'] - if sys.platform.startswith('freebsd'): - _libs = ['unwind'] + if sys.platform.startswith('linux'): + separate_module_files += [ + BACKTRACE.join('atomic.c'), + BACKTRACE.join('backtrace.c'), + BACKTRACE.join('state.c'), + BACKTRACE.join('elf.c'), + BACKTRACE.join('dwarf.c'), + BACKTRACE.join('fileline.c'), + BACKTRACE.join('mmap.c'), + BACKTRACE.join('mmapio.c'), + BACKTRACE.join('posix.c'), + BACKTRACE.join('sort.c'), + ] + _libs = ['dl'] + compile_extra += ['-DVMPROF_UNIX'] + compile_extra += ['-DVMPROF_LINUX'] + elif sys.platform == 'win32': + compile_extra += ['-DVMPROF_WINDOWS'] + separate_module_files = [SHARED.join('vmprof_win.c')] + _libs = [] else: - _libs = [] + # Guessing a BSD-like Unix platform + compile_extra += ['-DVMPROF_UNIX'] + compile_extra += ['-DVMPROF_MAC'] + if sys.platform.startswith('freebsd'): + _libs = ['unwind'] + else: + _libs = [] -eci_kwds = dict( - include_dirs = [SRC, SHARED, BACKTRACE], - includes = ['rvmprof.h','vmprof_stack.h'], - libraries = _libs, - separate_module_files = [ - SRC.join('rvmprof.c'), - SHARED.join('compat.c'), - SHARED.join('machine.c'), - SHARED.join('vmp_stack.c'), - SHARED.join('vmprof_memory.c'), - SHARED.join('vmprof_common.c'), - # symbol table already in separate_module_files - ] + separate_module_files, - post_include_bits=[], - compile_extra=compile_extra - ) -if sys.platform != 'win32': - eci_kwds['separate_module_files'].append( - SHARED.join('vmprof_mt.c'), - ) -global_eci = ExternalCompilationInfo(**eci_kwds) + eci_kwds = dict( + include_dirs = [SRC, SHARED, BACKTRACE], + includes = ['rvmprof.h','vmprof_stack.h'], + libraries = _libs, + separate_module_files = [ + SRC.join('rvmprof.c'), + SHARED.join('compat.c'), + SHARED.join('machine.c'), + SHARED.join('vmp_stack.c'), + SHARED.join('vmprof_memory.c'), + SHARED.join('vmprof_common.c'), + # symbol table already in separate_module_files + ] + separate_module_files, + post_include_bits=[], + compile_extra=compile_extra + ) + if sys.platform != 'win32': + eci_kwds['separate_module_files'].append( + SHARED.join('vmprof_mt.c'), + ) + make_eci.called = True + return ExternalCompilationInfo(**eci_kwds), eci_kwds +make_eci.called = False def configure_libbacktrace_linux(): bits = 32 if sys.maxsize == 2**31-1 else 64 @@ -88,11 +94,11 @@ if sys.platform.startswith('linux'): configure_libbacktrace_linux() + eci, eci_kwds = make_eci() eci_kwds['compile_extra'].append('-DRPYTHON_LL2CTYPES') platform.verify_eci(ExternalCompilationInfo( **eci_kwds)) - eci = global_eci vmprof_init = rffi.llexternal("vmprof_init", [rffi.INT, rffi.DOUBLE, rffi.INT, rffi.INT, rffi.CCHARP, rffi.INT, rffi.INT], From pypy.commits at gmail.com Tue Dec 19 10:12:28 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 19 Dec 2017 07:12:28 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: Switch to a saner way to handle vmprof on unsupported platforms: Message-ID: <5a392c5c.19a0df0a.7a9e.7f41@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93490:afe32451952d Date: 2017-12-19 16:11 +0100 http://bitbucket.org/pypy/pypy/changeset/afe32451952d/ Log: Switch to a saner way to handle vmprof on unsupported platforms: - currently, we always compiled rvmprof.c & co. and hoped that the compilation worked well enough, even if rvmprof was not supposed to be used anyway - with this commit, we compile rvmprof.c & co. ONLY if they are actually supported; moreover, we introduce a DummyVMProf to be unsed on unsupported platforms: this way, other modules can simply use rvmprof API without caring whether vmprof is supported or not (in the latter case, most calls are just no- ops). Hopefully, this should fix compilation on ARM and s390x diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -14,6 +14,9 @@ class VMProfPlatformUnsupported(Exception): pass +# vmprof works only on x86 for now +IS_SUPPORTED = host_platform.machine() in ('x86', 'x86_64') + ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') SRC = ROOT.join('src') SHARED = SRC.join('shared') @@ -57,7 +60,6 @@ else: _libs = [] - eci_kwds = dict( include_dirs = [SRC, SHARED, BACKTRACE], includes = ['rvmprof.h','vmprof_stack.h'], @@ -91,6 +93,9 @@ shutil.copy(str(BACKTRACE.join(specific_config)), str(config)) def setup(): + if not IS_SUPPORTED: + raise VMProfPlatformUnsupported + if sys.platform.startswith('linux'): configure_libbacktrace_linux() diff --git a/rpython/rlib/rvmprof/dummy.py b/rpython/rlib/rvmprof/dummy.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/dummy.py @@ -0,0 +1,26 @@ +from rpython.rlib.objectmodel import specialize + +class DummyVMProf(object): + + def __init__(self): + self._unique_id = 0 + + def register_code_object_class(self, CodeClass, full_name_func): + CodeClass._vmprof_unique_id = self._unique_id + self._unique_id += 1 + + @specialize.argtype(1) + def register_code(self, code, full_name_func): + pass + + def enable(self, fileno, interval, memory=0, native=0, real_time=0): + pass + + def disable(self): + pass + + def start_sampling(self): + pass + + def stop_sampling(self): + pass diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -2,6 +2,7 @@ from rpython.rlib.objectmodel import specialize, we_are_translated, not_rpython from rpython.rlib import jit, rposix, rgc from rpython.rlib.rvmprof import cintf +from rpython.rlib.rvmprof.dummy import DummyVMProf from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -34,6 +35,9 @@ return [] class VMProf(object): + """ + NOTE: the API of this class should be kept in sync with dummy.DummyVMProf + """ _immutable_fields_ = ['is_enabled?'] @@ -255,5 +259,8 @@ def _get_vmprof(): global _vmprof_instance if _vmprof_instance is None: - _vmprof_instance = VMProf() + try: + _vmprof_instance = VMProf() + except cintf.VMProfPlatformUnsupported: + _vmprof_instance = DummyVMProf() return _vmprof_instance From pypy.commits at gmail.com Tue Dec 19 10:13:33 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 19 Dec 2017 07:13:33 -0800 (PST) Subject: [pypy-commit] pypy py3.5-xattr: wip Message-ID: <5a392c9d.b387df0a.10bc4.79f7@mx.google.com> Author: Ronan Lamy Branch: py3.5-xattr Changeset: r93491:61730fb1f196 Date: 2017-12-19 15:11 +0000 http://bitbucket.org/pypy/pypy/changeset/61730fb1f196/ Log: wip diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -122,7 +122,7 @@ else: path_b = path.as_bytes assert path_b is not None - return func(path.as_bytes, *args) + return func(path_b, *args) class Path(object): @@ -2283,7 +2283,9 @@ This function will not follow symbolic links. Equivalent to chflags(path, flags, follow_symlinks=False).""" -def getxattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + follow_symlinks=bool) +def getxattr(space, path, attribute, __kwonly__, follow_symlinks=True): """getxattr(path, attribute, *, follow_symlinks=True) -> value Return the value of extended attribute attribute on path. @@ -2292,8 +2294,29 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, getxattr will examine the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "getxattr: cannot use fd and follow_symlinks together") + try: + result = rposix.fgetxattr(path.as_fd, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + if follow_symlinks: + result = rposix.getxattr(path.as_bytes, attribute.as_bytes) + else: + result = rposix.lgetxattr(path.as_bytes, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + return space.newbytes(result) -def setxattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + flags=c_int, + follow_symlinks=bool) +def setxattr(space, path, attribute, w_value, flags=0, + __kwonly__=None, follow_symlinks=True): """setxattr(path, attribute, value, flags=0, *, follow_symlinks=True) Set extended attribute attribute on path to value. @@ -2301,9 +2324,28 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, setxattr will modify the symbolic link itself instead of the file the link points to.""" + value = space.charbuf_w(w_value) + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "setxattr: cannot use fd and follow_symlinks together") + try: + rposix.fsetxattr(path.as_fd, attribute.as_bytes, value) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + if follow_symlinks: + rposix.setxattr(path.as_bytes, attribute.as_bytes, value) + else: + rposix.lsetxattr(path.as_bytes, attribute.as_bytes, value) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) -def removexattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + follow_symlinks=bool) +def removexattr(space, path, attribute, __kwonly__, follow_symlinks=True): """removexattr(path, attribute, *, follow_symlinks=True) Remove extended attribute attribute on path. @@ -2311,8 +2353,27 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, removexattr will modify the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "removexattr: cannot use fd and follow_symlinks together") + try: + rposix.fremovexattr(path.as_fd, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + if follow_symlinks: + rposix.removexattr(path.as_bytes, attribute.as_bytes) + else: + rposix.lremovexattr(path.as_bytes, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) -def listxattr(): + + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + follow_symlinks=bool) +def listxattr(space, path, __kwonly__, follow_symlinks=True): """listxattr(path='.', *, follow_symlinks=True) Return a list of extended attributes on path. @@ -2322,6 +2383,23 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, listxattr will examine the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "listxattr: cannot use fd and follow_symlinks together") + try: + result = rposix.flistxattr(path.as_fd, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + if follow_symlinks: + result = rposix.listxattr(path.as_bytes, attribute.as_bytes) + else: + result = rposix.llistxattr(path.as_bytes, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + return xxx have_functions = [] @@ -2449,8 +2527,8 @@ @unwrap_spec(policy=int) def sched_get_priority_max(space, policy): - """returns the maximum priority value that - can be used with the scheduling algorithm + """returns the maximum priority value that + can be used with the scheduling algorithm identified by policy """ while True: @@ -2464,7 +2542,7 @@ @unwrap_spec(policy=int) def sched_get_priority_min(space, policy): """returns the minimum priority value that - can be used with the scheduling algorithm + can be used with the scheduling algorithm identified by policy """ while True: @@ -2477,7 +2555,7 @@ @unwrap_spec(fd=c_int, cmd=c_int, length=r_longlong) def lockf(space, fd, cmd, length): - """apply, test or remove a POSIX lock on an + """apply, test or remove a POSIX lock on an open file. """ while True: diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -2574,3 +2574,112 @@ """Passes offset==NULL; not support on all OSes""" res = c_sendfile(out_fd, in_fd, lltype.nullptr(_OFF_PTR_T.TO), count) return handle_posix_error('sendfile', res) + +# ____________________________________________________________ +# Support for *xattr functions + +if sys.platform.startswith('linux'): + + class CConfig: + _compilation_info_ = ExternalCompilationInfo( + includes=['sys/xattr.h', 'linux/limits.h'],) + XATTR_SIZE_MAX = rffi_platform.DefinedConstantInteger('XATTR_SIZE_MAX') + XATTR_CREATE = rffi_platform.DefinedConstantInteger('XATTR_CREATE') + XATTR_REPLACE = rffi_platform.DefinedConstantInteger('XATTR_REPLACE') + + cConfig = rffi_platform.configure(CConfig) + globals().update(cConfig) + c_fgetxattr = external('fgetxattr', + [rffi.INT, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_getxattr = external('getxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lgetxattr = external('lgetxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_fsetxattr = external('fsetxattr', + [rffi.INT, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_setxattr = external('setxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lsetxattr = external('lsetxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_fremovexattr = external('fremovexattr', + [rffi.INT, rffi.CCHARP], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_removexattr = external('removexattr', + [rffi.CCHARP, rffi.CCHARP], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lremovexattr = external('lremovexattr', + [rffi.CCHARP, rffi.CCHARP], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + buf_sizes = [256, XATTR_SIZE_MAX] + + def fgetxattr(fd, name): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + void_buf = rffi.cast(rffi.VOIDP, buf.raw) + res = c_fgetxattr(fd, name, void_buf, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + raise OSError(err, 'fgetxattr failed') + else: + return buf.str(res) + else: + raise OSError(errno.ERANGE, 'fgetxattr failed') + + def getxattr(path, name, follow_symlinks=True): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + void_buf = rffi.cast(rffi.VOIDP, buf.raw) + if follow_symlinks: + res = c_getxattr(path, name, void_buf, size) + else: + res = c_lgetxattr(path, name, void_buf, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + c_name = 'getxattr' if follow_symlinks else 'lgetxattr' + raise OSError(err, c_name + 'failed') + else: + return buf.str(res) + else: + c_name = 'getxattr' if follow_symlinks else 'lgetxattr' + raise OSError(errno.ERANGE, c_name + 'failed') + + def fsetxattr(fd, name, value, flags=0): + return handle_posix_error( + 'fsetxattr', c_fsetxattr(fd, name, value, len(value), flags)) + + def setxattr(path, name, value, flags=0, follow_symlinks=True): + if follow_symlinks: + return handle_posix_error( + 'setxattr', c_setxattr(path, name, value, len(value), flags)) + else: + return handle_posix_error( + 'lsetxattr', c_lsetxattr(path, name, value, len(value), flags)) + + def fremovexattr(fd, name): + return handle_posix_error('fremovexattr', c_fremovexattr(fd, name)) + + def removexattr(path, name, follow_symlinks=True): + if follow_symlinks: + return handle_posix_error('removexattr', c_removexattr(path, name)) + else: + return handle_posix_error('lremovexattr', c_lremovexattr(path, name)) diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -1,3 +1,6 @@ +from hypothesis import given, strategies as st, assume +import pytest + from rpython.rtyper.test.test_llinterp import interpret from rpython.translator.c.test.test_genc import compile from rpython.tool.pytest.expecttest import ExpectTest @@ -8,10 +11,10 @@ import py def rposix_requires(funcname): - return py.test.mark.skipif(not hasattr(rposix, funcname), + return pytest.mark.skipif(not hasattr(rposix, funcname), reason="Requires rposix.%s()" % funcname) -win_only = py.test.mark.skipif("os.name != 'nt'") +win_only = pytest.mark.skipif("os.name != 'nt'") class TestPosixFunction: def test_access(self): @@ -827,3 +830,47 @@ rposix.lockf(fd, rposix.F_ULOCK, 4) finally: os.close(fd) + +def check_working_xattr(): + fname = str(udir.join('xattr_test0.txt')) + with open(fname, 'wb'): + pass + try: + rposix.getxattr(fname, 'foo') + except OSError as e: + return e.errno != errno.ENOTSUP + else: + raise RuntimeError('getxattr() succeeded unexpectedly!?!') + + at pytest.mark.skipif(not (hasattr(rposix, 'getxattr') and check_working_xattr()), + reason="Requires working rposix.getxattr()") + at given(name=st.binary(max_size=10), value=st.binary(max_size=10), + follow_symlinks=st.booleans(), use_fd=st.booleans()) +def test_xattr(name, value, follow_symlinks, use_fd): + use_fd = False + assume(follow_symlinks or not use_fd) + fname = str(udir.join('xattr_test.txt')) + with open(fname, 'wb'): + pass + if use_fd: + file_id = os.open(fname, os.O_CREAT, 0777) + read, write, delete = rposix.fgetxattr, rposix.fsetxattr, rposix.fremovexattr + else: + file_id = fname + if follow_symlinks: + read, write, delete = rposix.getxattr, rposix.setxattr, rposix.removexattr + else: + read = lambda *args, **kwargs: rposix.getxattr(*args, follow_symlinks=False, **kwargs) + write = lambda *args, **kwargs: rposix.setxattr(*args, follow_symlinks=False, **kwargs) + delete = lambda *args, **kwargs: rposix.removexattr(*args, follow_symlinks=False, **kwargs) + try: + with pytest.raises(OSError): + read(file_id, name) + write(file_id, name, value) + assert read(file_id, name) == value + delete(file_id, name) + with pytest.raises(OSError): + read(file_id, name) + finally: + if use_fd: + os.close(file_id) From pypy.commits at gmail.com Tue Dec 19 10:43:07 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 19 Dec 2017 07:43:07 -0800 (PST) Subject: [pypy-commit] pypy py3.5-xattr: Fix test_xattr() Message-ID: <5a39338b.50a4df0a.dd0c.70ea@mx.google.com> Author: Ronan Lamy Branch: py3.5-xattr Changeset: r93492:eef439fa3527 Date: 2017-12-19 15:42 +0000 http://bitbucket.org/pypy/pypy/changeset/eef439fa3527/ Log: Fix test_xattr() diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -836,19 +836,22 @@ with open(fname, 'wb'): pass try: - rposix.getxattr(fname, 'foo') - except OSError as e: - return e.errno != errno.ENOTSUP + rposix.setxattr(fname, 'user.foo', '') + except OSError: + return False else: - raise RuntimeError('getxattr() succeeded unexpectedly!?!') + return True @pytest.mark.skipif(not (hasattr(rposix, 'getxattr') and check_working_xattr()), reason="Requires working rposix.getxattr()") - at given(name=st.binary(max_size=10), value=st.binary(max_size=10), + at given( + name=st.text( + alphabet=st.characters(min_codepoint=1), min_size=1, max_size=10), + value=st.binary(max_size=10), follow_symlinks=st.booleans(), use_fd=st.booleans()) def test_xattr(name, value, follow_symlinks, use_fd): - use_fd = False assume(follow_symlinks or not use_fd) + name = 'user.' + name.encode('utf-8') fname = str(udir.join('xattr_test.txt')) with open(fname, 'wb'): pass From pypy.commits at gmail.com Tue Dec 19 11:10:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 19 Dec 2017 08:10:53 -0800 (PST) Subject: [pypy-commit] pypy py3.5-xattr: Enable getxattr, setxattr, removexattr Message-ID: <5a393a0d.7ba5500a.c0d39.721b@mx.google.com> Author: Ronan Lamy Branch: py3.5-xattr Changeset: r93493:7bb9dfa2b7ff Date: 2017-12-19 16:10 +0000 http://bitbucket.org/pypy/pypy/changeset/7bb9dfa2b7ff/ Log: Enable getxattr, setxattr, removexattr diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -229,7 +229,7 @@ 'POSIX_FADV_RANDOM', 'POSIX_FADV_NOREUSE', 'POSIX_FADV_DONTNEED']: assert getattr(rposix, _name) is not None, "missing %r" % (_name,) interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) - + if hasattr(rposix, 'sched_get_priority_max'): interpleveldefs['sched_get_priority_max'] = 'interp_posix.sched_get_priority_max' interpleveldefs['sched_get_priority_min'] = 'interp_posix.sched_get_priority_min' @@ -246,11 +246,20 @@ if hasattr(rposix, 'sched_yield'): interpleveldefs['sched_yield'] = 'interp_posix.sched_yield' - + for _name in ["O_CLOEXEC"]: if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + if hasattr(rposix, 'getxattr'): + interpleveldefs['getxattr'] = 'interp_posix.getxattr' + interpleveldefs['setxattr'] = 'interp_posix.setxattr' + interpleveldefs['removexattr'] = 'interp_posix.removexattr' + for _name in ['XATTR_SIZE_MAX', 'XATTR_CREATE', 'XATTR_REPLACE']: + if getattr(rposix, _name) is not None: + interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + + def startup(self, space): from pypy.module.posix import interp_posix from pypy.module.imp import importing diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -386,8 +386,8 @@ def test_times(self): """ - posix.times() should return a posix.times_result object giving - float-representations (seconds, effectively) of the four fields from + posix.times() should return a posix.times_result object giving + float-representations (seconds, effectively) of the four fields from the underlying struct tms and the return value. """ result = self.posix.times() @@ -977,7 +977,7 @@ assert posix.sched_get_priority_min(posix.SCHED_OTHER) != -1 if getattr(posix, 'SCHED_BATCH', None): assert posix.sched_get_priority_min(posix.SCHED_BATCH) != -1 - + if hasattr(rposix, 'sched_get_priority_min'): def test_os_sched_priority_max_greater_than_min(self): posix, os = self.posix, self.os @@ -992,7 +992,7 @@ def test_sched_yield(self): os = self.posix #Always suceeds on Linux - os.sched_yield() + os.sched_yield() def test_write_buffer(self): os = self.posix @@ -1350,7 +1350,7 @@ posix.close(fd) s2.close() s1.close() - + def test_os_lockf(self): posix, os = self.posix, self.os fd = os.open(self.path2 + 'test_os_lockf', os.O_WRONLY | os.O_CREAT) @@ -1441,6 +1441,20 @@ e = raises(OSError, self.posix.symlink, 'bok', '/nonexistentdir/boz') assert str(e.value).endswith(": 'bok' -> '/nonexistentdir/boz'") + if hasattr(rposix, 'getxattr'): + def test_xattr_simple(self): + # Minimal testing here, lib-python has better tests. + os = self.posix + with open(self.path, 'wb'): + pass + raises(OSError, os.getxattr, self.path, 'user.test') + os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE) + assert os.getxattr(self.path, 'user.test') == b'' + os.setxattr(self.path, 'user.test', b'foo', os.XATTR_REPLACE) + assert os.getxattr(self.path, 'user.test') == b'foo' + os.removexattr(self.path, 'user.test') + raises(OSError, os.getxattr, self.path, 'user.test') + class AppTestEnvironment(object): def setup_class(cls): @@ -1495,6 +1509,7 @@ res = os.system(cmd) assert res == 0 + @py.test.fixture def check_fsencoding(space, pytestconfig): if pytestconfig.getvalue('runappdirect'): From pypy.commits at gmail.com Tue Dec 19 11:49:18 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 19 Dec 2017 08:49:18 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: this is the correct string for 32bit intel Message-ID: <5a39430e.e393df0a.9b191.4003@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93494:41babd8886fa Date: 2017-12-19 17:48 +0100 http://bitbucket.org/pypy/pypy/changeset/41babd8886fa/ Log: this is the correct string for 32bit intel diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -15,7 +15,7 @@ pass # vmprof works only on x86 for now -IS_SUPPORTED = host_platform.machine() in ('x86', 'x86_64') +IS_SUPPORTED = host_platform.machine() in ('i686', 'x86_64') ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') SRC = ROOT.join('src') From pypy.commits at gmail.com Tue Dec 19 11:50:33 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 19 Dec 2017 08:50:33 -0800 (PST) Subject: [pypy-commit] pypy py3.5-xattr: Add rposix.(f)listxattr Message-ID: <5a394359.08691c0a.a8b82.83c9@mx.google.com> Author: Ronan Lamy Branch: py3.5-xattr Changeset: r93495:05c458bd2a65 Date: 2017-12-19 16:49 +0000 http://bitbucket.org/pypy/pypy/changeset/05c458bd2a65/ Log: Add rposix.(f)listxattr diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -2603,29 +2603,41 @@ save_err=rffi.RFFI_SAVE_ERRNO) c_fsetxattr = external('fsetxattr', [rffi.INT, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], - rffi.SSIZE_T, + rffi.INT, compilation_info=CConfig._compilation_info_, save_err=rffi.RFFI_SAVE_ERRNO) c_setxattr = external('setxattr', [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], - rffi.SSIZE_T, + rffi.INT, compilation_info=CConfig._compilation_info_, save_err=rffi.RFFI_SAVE_ERRNO) c_lsetxattr = external('lsetxattr', [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], - rffi.SSIZE_T, + rffi.INT, compilation_info=CConfig._compilation_info_, save_err=rffi.RFFI_SAVE_ERRNO) c_fremovexattr = external('fremovexattr', - [rffi.INT, rffi.CCHARP], rffi.SSIZE_T, + [rffi.INT, rffi.CCHARP], rffi.INT, compilation_info=CConfig._compilation_info_, save_err=rffi.RFFI_SAVE_ERRNO) c_removexattr = external('removexattr', - [rffi.CCHARP, rffi.CCHARP], rffi.SSIZE_T, + [rffi.CCHARP, rffi.CCHARP], rffi.INT, compilation_info=CConfig._compilation_info_, save_err=rffi.RFFI_SAVE_ERRNO) c_lremovexattr = external('lremovexattr', - [rffi.CCHARP, rffi.CCHARP], rffi.SSIZE_T, + [rffi.CCHARP, rffi.CCHARP], rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_flistxattr = external('flistxattr', + [rffi.INT, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_listxattr = external('listxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_llistxattr = external('llistxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, compilation_info=CConfig._compilation_info_, save_err=rffi.RFFI_SAVE_ERRNO) buf_sizes = [256, XATTR_SIZE_MAX] @@ -2683,3 +2695,39 @@ return handle_posix_error('removexattr', c_removexattr(path, name)) else: return handle_posix_error('lremovexattr', c_lremovexattr(path, name)) + + def _unpack_attrs(attr_string): + result = attr_string.split('\0') + del result[-1] + return result + + def flistxattr(fd): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + res = c_flistxattr(fd, buf.raw, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + raise OSError(err, 'flistxattr failed') + else: + return _unpack_attrs(buf.str(res)) + else: + raise OSError(errno.ERANGE, 'flistxattr failed') + + def listxattr(path, follow_symlinks=True): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + if follow_symlinks: + res = c_listxattr(path, buf.raw, size) + else: + res = c_llistxattr(path, buf.raw, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + c_name = 'listxattr' if follow_symlinks else 'llistxattr' + raise OSError(err, c_name + 'failed') + else: + return _unpack_attrs(buf.str(res)) + else: + c_name = 'listxattr' if follow_symlinks else 'llistxattr' + raise OSError(errno.ERANGE, c_name + 'failed') diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -853,27 +853,38 @@ assume(follow_symlinks or not use_fd) name = 'user.' + name.encode('utf-8') fname = str(udir.join('xattr_test.txt')) + try: + os.unlink(fname) + except OSError: + pass with open(fname, 'wb'): pass if use_fd: file_id = os.open(fname, os.O_CREAT, 0777) read, write, delete = rposix.fgetxattr, rposix.fsetxattr, rposix.fremovexattr + all_names = rposix.flistxattr else: file_id = fname if follow_symlinks: read, write, delete = rposix.getxattr, rposix.setxattr, rposix.removexattr + all_names = rposix.listxattr else: read = lambda *args, **kwargs: rposix.getxattr(*args, follow_symlinks=False, **kwargs) write = lambda *args, **kwargs: rposix.setxattr(*args, follow_symlinks=False, **kwargs) delete = lambda *args, **kwargs: rposix.removexattr(*args, follow_symlinks=False, **kwargs) + all_names = lambda *args, **kwargs: rposix.listxattr(*args, follow_symlinks=False, **kwargs) try: + init_names = all_names(file_id) with pytest.raises(OSError): read(file_id, name) write(file_id, name, value) assert read(file_id, name) == value + assert set(all_names(file_id)) == set(init_names + [name]) + assert '' not in all_names(file_id) delete(file_id, name) with pytest.raises(OSError): read(file_id, name) + assert set(all_names(file_id)) == set(init_names) finally: if use_fd: os.close(file_id) From pypy.commits at gmail.com Tue Dec 19 12:04:15 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 19 Dec 2017 09:04:15 -0800 (PST) Subject: [pypy-commit] pypy py3.5-xattr: Implement posix.listxattr(), fix test Message-ID: <5a39468f.cf0e1c0a.6c53.2854@mx.google.com> Author: Ronan Lamy Branch: py3.5-xattr Changeset: r93496:eeeb03d063f0 Date: 2017-12-19 17:02 +0000 http://bitbucket.org/pypy/pypy/changeset/eeeb03d063f0/ Log: Implement posix.listxattr(), fix test diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -255,6 +255,7 @@ interpleveldefs['getxattr'] = 'interp_posix.getxattr' interpleveldefs['setxattr'] = 'interp_posix.setxattr' interpleveldefs['removexattr'] = 'interp_posix.removexattr' + interpleveldefs['listxattr'] = 'interp_posix.listxattr' for _name in ['XATTR_SIZE_MAX', 'XATTR_CREATE', 'XATTR_REPLACE']: if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2304,10 +2304,8 @@ raise wrap_oserror(space, e, eintr_retry=False) else: try: - if follow_symlinks: - result = rposix.getxattr(path.as_bytes, attribute.as_bytes) - else: - result = rposix.lgetxattr(path.as_bytes, attribute.as_bytes) + result = rposix.getxattr(path.as_bytes, attribute.as_bytes, + follow_symlinks=follow_symlinks) except OSError as e: raise wrap_oserror(space, e, eintr_retry=False) return space.newbytes(result) @@ -2335,10 +2333,8 @@ raise wrap_oserror(space, e, eintr_retry=False) else: try: - if follow_symlinks: - rposix.setxattr(path.as_bytes, attribute.as_bytes, value) - else: - rposix.lsetxattr(path.as_bytes, attribute.as_bytes, value) + rposix.setxattr(path.as_bytes, attribute.as_bytes, value, + follow_symlinks=follow_symlinks) except OSError as e: raise wrap_oserror(space, e, eintr_retry=False) @@ -2363,16 +2359,13 @@ raise wrap_oserror(space, e, eintr_retry=False) else: try: - if follow_symlinks: - rposix.removexattr(path.as_bytes, attribute.as_bytes) - else: - rposix.lremovexattr(path.as_bytes, attribute.as_bytes) + rposix.removexattr(path.as_bytes, attribute.as_bytes, + follow_symlinks=follow_symlinks) except OSError as e: raise wrap_oserror(space, e, eintr_retry=False) - at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), - follow_symlinks=bool) + at unwrap_spec(path=path_or_fd(), follow_symlinks=bool) def listxattr(space, path, __kwonly__, follow_symlinks=True): """listxattr(path='.', *, follow_symlinks=True) @@ -2388,18 +2381,15 @@ raise oefmt(space.w_ValueError, "listxattr: cannot use fd and follow_symlinks together") try: - result = rposix.flistxattr(path.as_fd, attribute.as_bytes) + result = rposix.flistxattr(path.as_fd) except OSError as e: raise wrap_oserror(space, e, eintr_retry=False) else: try: - if follow_symlinks: - result = rposix.listxattr(path.as_bytes, attribute.as_bytes) - else: - result = rposix.llistxattr(path.as_bytes, attribute.as_bytes) + result = rposix.listxattr(path.as_bytes, follow_symlinks) except OSError as e: raise wrap_oserror(space, e, eintr_retry=False) - return xxx + return space.newlist([space.newbytes(attr) for attr in result]) have_functions = [] diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -1447,13 +1447,17 @@ os = self.posix with open(self.path, 'wb'): pass + init_names = os.listxattr(self.path) raises(OSError, os.getxattr, self.path, 'user.test') - os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE) + os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE, follow_symlinks=False) assert os.getxattr(self.path, 'user.test') == b'' os.setxattr(self.path, 'user.test', b'foo', os.XATTR_REPLACE) - assert os.getxattr(self.path, 'user.test') == b'foo' - os.removexattr(self.path, 'user.test') + assert os.getxattr(self.path, 'user.test', follow_symlinks=False) == b'foo' + assert set(os.listxattr(self.path)) == set( + init_names + [b'user.test']) + os.removexattr(self.path, 'user.test', follow_symlinks=False) raises(OSError, os.getxattr, self.path, 'user.test') + assert os.listxattr(self.path, follow_symlinks=False) == init_names class AppTestEnvironment(object): From pypy.commits at gmail.com Tue Dec 19 12:24:58 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 19 Dec 2017 09:24:58 -0800 (PST) Subject: [pypy-commit] pypy py3.5-xattr: Close branch py3.5-xattr Message-ID: <5a394b6a.11c6df0a.477fa.1c9d@mx.google.com> Author: Ronan Lamy Branch: py3.5-xattr Changeset: r93497:2c730fa110fc Date: 2017-12-19 17:24 +0000 http://bitbucket.org/pypy/pypy/changeset/2c730fa110fc/ Log: Close branch py3.5-xattr From pypy.commits at gmail.com Tue Dec 19 12:25:19 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 19 Dec 2017 09:25:19 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Merged in py3.5-xattr (pull request #586) Message-ID: <5a394b7f.5de81c0a.66b9d.b8af@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93498:fa25c452dbb5 Date: 2017-12-19 17:24 +0000 http://bitbucket.org/pypy/pypy/changeset/fa25c452dbb5/ Log: Merged in py3.5-xattr (pull request #586) Implement posix.*xattr functions diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -229,7 +229,7 @@ 'POSIX_FADV_RANDOM', 'POSIX_FADV_NOREUSE', 'POSIX_FADV_DONTNEED']: assert getattr(rposix, _name) is not None, "missing %r" % (_name,) interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) - + if hasattr(rposix, 'sched_get_priority_max'): interpleveldefs['sched_get_priority_max'] = 'interp_posix.sched_get_priority_max' interpleveldefs['sched_get_priority_min'] = 'interp_posix.sched_get_priority_min' @@ -246,11 +246,21 @@ if hasattr(rposix, 'sched_yield'): interpleveldefs['sched_yield'] = 'interp_posix.sched_yield' - + for _name in ["O_CLOEXEC"]: if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + if hasattr(rposix, 'getxattr'): + interpleveldefs['getxattr'] = 'interp_posix.getxattr' + interpleveldefs['setxattr'] = 'interp_posix.setxattr' + interpleveldefs['removexattr'] = 'interp_posix.removexattr' + interpleveldefs['listxattr'] = 'interp_posix.listxattr' + for _name in ['XATTR_SIZE_MAX', 'XATTR_CREATE', 'XATTR_REPLACE']: + if getattr(rposix, _name) is not None: + interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + + def startup(self, space): from pypy.module.posix import interp_posix from pypy.module.imp import importing diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -122,7 +122,7 @@ else: path_b = path.as_bytes assert path_b is not None - return func(path.as_bytes, *args) + return func(path_b, *args) class Path(object): @@ -2283,7 +2283,9 @@ This function will not follow symbolic links. Equivalent to chflags(path, flags, follow_symlinks=False).""" -def getxattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + follow_symlinks=bool) +def getxattr(space, path, attribute, __kwonly__, follow_symlinks=True): """getxattr(path, attribute, *, follow_symlinks=True) -> value Return the value of extended attribute attribute on path. @@ -2292,8 +2294,27 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, getxattr will examine the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "getxattr: cannot use fd and follow_symlinks together") + try: + result = rposix.fgetxattr(path.as_fd, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + result = rposix.getxattr(path.as_bytes, attribute.as_bytes, + follow_symlinks=follow_symlinks) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + return space.newbytes(result) -def setxattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + flags=c_int, + follow_symlinks=bool) +def setxattr(space, path, attribute, w_value, flags=0, + __kwonly__=None, follow_symlinks=True): """setxattr(path, attribute, value, flags=0, *, follow_symlinks=True) Set extended attribute attribute on path to value. @@ -2301,9 +2322,26 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, setxattr will modify the symbolic link itself instead of the file the link points to.""" + value = space.charbuf_w(w_value) + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "setxattr: cannot use fd and follow_symlinks together") + try: + rposix.fsetxattr(path.as_fd, attribute.as_bytes, value) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + rposix.setxattr(path.as_bytes, attribute.as_bytes, value, + follow_symlinks=follow_symlinks) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) -def removexattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + follow_symlinks=bool) +def removexattr(space, path, attribute, __kwonly__, follow_symlinks=True): """removexattr(path, attribute, *, follow_symlinks=True) Remove extended attribute attribute on path. @@ -2311,8 +2349,24 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, removexattr will modify the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "removexattr: cannot use fd and follow_symlinks together") + try: + rposix.fremovexattr(path.as_fd, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + rposix.removexattr(path.as_bytes, attribute.as_bytes, + follow_symlinks=follow_symlinks) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) -def listxattr(): + + at unwrap_spec(path=path_or_fd(), follow_symlinks=bool) +def listxattr(space, path, __kwonly__, follow_symlinks=True): """listxattr(path='.', *, follow_symlinks=True) Return a list of extended attributes on path. @@ -2322,6 +2376,20 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, listxattr will examine the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "listxattr: cannot use fd and follow_symlinks together") + try: + result = rposix.flistxattr(path.as_fd) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + result = rposix.listxattr(path.as_bytes, follow_symlinks) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + return space.newlist([space.newbytes(attr) for attr in result]) have_functions = [] @@ -2449,8 +2517,8 @@ @unwrap_spec(policy=int) def sched_get_priority_max(space, policy): - """returns the maximum priority value that - can be used with the scheduling algorithm + """returns the maximum priority value that + can be used with the scheduling algorithm identified by policy """ while True: @@ -2464,7 +2532,7 @@ @unwrap_spec(policy=int) def sched_get_priority_min(space, policy): """returns the minimum priority value that - can be used with the scheduling algorithm + can be used with the scheduling algorithm identified by policy """ while True: @@ -2477,7 +2545,7 @@ @unwrap_spec(fd=c_int, cmd=c_int, length=r_longlong) def lockf(space, fd, cmd, length): - """apply, test or remove a POSIX lock on an + """apply, test or remove a POSIX lock on an open file. """ while True: diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -386,8 +386,8 @@ def test_times(self): """ - posix.times() should return a posix.times_result object giving - float-representations (seconds, effectively) of the four fields from + posix.times() should return a posix.times_result object giving + float-representations (seconds, effectively) of the four fields from the underlying struct tms and the return value. """ result = self.posix.times() @@ -977,7 +977,7 @@ assert posix.sched_get_priority_min(posix.SCHED_OTHER) != -1 if getattr(posix, 'SCHED_BATCH', None): assert posix.sched_get_priority_min(posix.SCHED_BATCH) != -1 - + if hasattr(rposix, 'sched_get_priority_min'): def test_os_sched_priority_max_greater_than_min(self): posix, os = self.posix, self.os @@ -992,7 +992,7 @@ def test_sched_yield(self): os = self.posix #Always suceeds on Linux - os.sched_yield() + os.sched_yield() def test_write_buffer(self): os = self.posix @@ -1350,7 +1350,7 @@ posix.close(fd) s2.close() s1.close() - + def test_os_lockf(self): posix, os = self.posix, self.os fd = os.open(self.path2 + 'test_os_lockf', os.O_WRONLY | os.O_CREAT) @@ -1441,6 +1441,24 @@ e = raises(OSError, self.posix.symlink, 'bok', '/nonexistentdir/boz') assert str(e.value).endswith(": 'bok' -> '/nonexistentdir/boz'") + if hasattr(rposix, 'getxattr'): + def test_xattr_simple(self): + # Minimal testing here, lib-python has better tests. + os = self.posix + with open(self.path, 'wb'): + pass + init_names = os.listxattr(self.path) + raises(OSError, os.getxattr, self.path, 'user.test') + os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE, follow_symlinks=False) + assert os.getxattr(self.path, 'user.test') == b'' + os.setxattr(self.path, 'user.test', b'foo', os.XATTR_REPLACE) + assert os.getxattr(self.path, 'user.test', follow_symlinks=False) == b'foo' + assert set(os.listxattr(self.path)) == set( + init_names + [b'user.test']) + os.removexattr(self.path, 'user.test', follow_symlinks=False) + raises(OSError, os.getxattr, self.path, 'user.test') + assert os.listxattr(self.path, follow_symlinks=False) == init_names + class AppTestEnvironment(object): def setup_class(cls): @@ -1495,6 +1513,7 @@ res = os.system(cmd) assert res == 0 + @py.test.fixture def check_fsencoding(space, pytestconfig): if pytestconfig.getvalue('runappdirect'): diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -2574,3 +2574,160 @@ """Passes offset==NULL; not support on all OSes""" res = c_sendfile(out_fd, in_fd, lltype.nullptr(_OFF_PTR_T.TO), count) return handle_posix_error('sendfile', res) + +# ____________________________________________________________ +# Support for *xattr functions + +if sys.platform.startswith('linux'): + + class CConfig: + _compilation_info_ = ExternalCompilationInfo( + includes=['sys/xattr.h', 'linux/limits.h'],) + XATTR_SIZE_MAX = rffi_platform.DefinedConstantInteger('XATTR_SIZE_MAX') + XATTR_CREATE = rffi_platform.DefinedConstantInteger('XATTR_CREATE') + XATTR_REPLACE = rffi_platform.DefinedConstantInteger('XATTR_REPLACE') + + cConfig = rffi_platform.configure(CConfig) + globals().update(cConfig) + c_fgetxattr = external('fgetxattr', + [rffi.INT, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_getxattr = external('getxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lgetxattr = external('lgetxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_fsetxattr = external('fsetxattr', + [rffi.INT, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_setxattr = external('setxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lsetxattr = external('lsetxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_fremovexattr = external('fremovexattr', + [rffi.INT, rffi.CCHARP], rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_removexattr = external('removexattr', + [rffi.CCHARP, rffi.CCHARP], rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lremovexattr = external('lremovexattr', + [rffi.CCHARP, rffi.CCHARP], rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_flistxattr = external('flistxattr', + [rffi.INT, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_listxattr = external('listxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_llistxattr = external('llistxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + buf_sizes = [256, XATTR_SIZE_MAX] + + def fgetxattr(fd, name): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + void_buf = rffi.cast(rffi.VOIDP, buf.raw) + res = c_fgetxattr(fd, name, void_buf, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + raise OSError(err, 'fgetxattr failed') + else: + return buf.str(res) + else: + raise OSError(errno.ERANGE, 'fgetxattr failed') + + def getxattr(path, name, follow_symlinks=True): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + void_buf = rffi.cast(rffi.VOIDP, buf.raw) + if follow_symlinks: + res = c_getxattr(path, name, void_buf, size) + else: + res = c_lgetxattr(path, name, void_buf, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + c_name = 'getxattr' if follow_symlinks else 'lgetxattr' + raise OSError(err, c_name + 'failed') + else: + return buf.str(res) + else: + c_name = 'getxattr' if follow_symlinks else 'lgetxattr' + raise OSError(errno.ERANGE, c_name + 'failed') + + def fsetxattr(fd, name, value, flags=0): + return handle_posix_error( + 'fsetxattr', c_fsetxattr(fd, name, value, len(value), flags)) + + def setxattr(path, name, value, flags=0, follow_symlinks=True): + if follow_symlinks: + return handle_posix_error( + 'setxattr', c_setxattr(path, name, value, len(value), flags)) + else: + return handle_posix_error( + 'lsetxattr', c_lsetxattr(path, name, value, len(value), flags)) + + def fremovexattr(fd, name): + return handle_posix_error('fremovexattr', c_fremovexattr(fd, name)) + + def removexattr(path, name, follow_symlinks=True): + if follow_symlinks: + return handle_posix_error('removexattr', c_removexattr(path, name)) + else: + return handle_posix_error('lremovexattr', c_lremovexattr(path, name)) + + def _unpack_attrs(attr_string): + result = attr_string.split('\0') + del result[-1] + return result + + def flistxattr(fd): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + res = c_flistxattr(fd, buf.raw, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + raise OSError(err, 'flistxattr failed') + else: + return _unpack_attrs(buf.str(res)) + else: + raise OSError(errno.ERANGE, 'flistxattr failed') + + def listxattr(path, follow_symlinks=True): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + if follow_symlinks: + res = c_listxattr(path, buf.raw, size) + else: + res = c_llistxattr(path, buf.raw, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + c_name = 'listxattr' if follow_symlinks else 'llistxattr' + raise OSError(err, c_name + 'failed') + else: + return _unpack_attrs(buf.str(res)) + else: + c_name = 'listxattr' if follow_symlinks else 'llistxattr' + raise OSError(errno.ERANGE, c_name + 'failed') diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -1,3 +1,6 @@ +from hypothesis import given, strategies as st, assume +import pytest + from rpython.rtyper.test.test_llinterp import interpret from rpython.translator.c.test.test_genc import compile from rpython.tool.pytest.expecttest import ExpectTest @@ -8,10 +11,10 @@ import py def rposix_requires(funcname): - return py.test.mark.skipif(not hasattr(rposix, funcname), + return pytest.mark.skipif(not hasattr(rposix, funcname), reason="Requires rposix.%s()" % funcname) -win_only = py.test.mark.skipif("os.name != 'nt'") +win_only = pytest.mark.skipif("os.name != 'nt'") class TestPosixFunction: def test_access(self): @@ -827,3 +830,61 @@ rposix.lockf(fd, rposix.F_ULOCK, 4) finally: os.close(fd) + +def check_working_xattr(): + fname = str(udir.join('xattr_test0.txt')) + with open(fname, 'wb'): + pass + try: + rposix.setxattr(fname, 'user.foo', '') + except OSError: + return False + else: + return True + + at pytest.mark.skipif(not (hasattr(rposix, 'getxattr') and check_working_xattr()), + reason="Requires working rposix.getxattr()") + at given( + name=st.text( + alphabet=st.characters(min_codepoint=1), min_size=1, max_size=10), + value=st.binary(max_size=10), + follow_symlinks=st.booleans(), use_fd=st.booleans()) +def test_xattr(name, value, follow_symlinks, use_fd): + assume(follow_symlinks or not use_fd) + name = 'user.' + name.encode('utf-8') + fname = str(udir.join('xattr_test.txt')) + try: + os.unlink(fname) + except OSError: + pass + with open(fname, 'wb'): + pass + if use_fd: + file_id = os.open(fname, os.O_CREAT, 0777) + read, write, delete = rposix.fgetxattr, rposix.fsetxattr, rposix.fremovexattr + all_names = rposix.flistxattr + else: + file_id = fname + if follow_symlinks: + read, write, delete = rposix.getxattr, rposix.setxattr, rposix.removexattr + all_names = rposix.listxattr + else: + read = lambda *args, **kwargs: rposix.getxattr(*args, follow_symlinks=False, **kwargs) + write = lambda *args, **kwargs: rposix.setxattr(*args, follow_symlinks=False, **kwargs) + delete = lambda *args, **kwargs: rposix.removexattr(*args, follow_symlinks=False, **kwargs) + all_names = lambda *args, **kwargs: rposix.listxattr(*args, follow_symlinks=False, **kwargs) + try: + init_names = all_names(file_id) + with pytest.raises(OSError): + read(file_id, name) + write(file_id, name, value) + assert read(file_id, name) == value + assert set(all_names(file_id)) == set(init_names + [name]) + assert '' not in all_names(file_id) + delete(file_id, name) + with pytest.raises(OSError): + read(file_id, name) + assert set(all_names(file_id)) == set(init_names) + finally: + if use_fd: + os.close(file_id) From pypy.commits at gmail.com Tue Dec 19 13:06:37 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 19 Dec 2017 10:06:37 -0800 (PST) Subject: [pypy-commit] pypy default: merge again fix-vmprof-stacklet-switch-2: this should fix translation on platforms where vmprof is not supported, and it also refactor rvmprof.cintf to be slightly saner Message-ID: <5a39552d.910f1c0a.5affe.8544@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93500:e742e3594267 Date: 2017-12-19 19:04 +0100 http://bitbucket.org/pypy/pypy/changeset/e742e3594267/ Log: merge again fix-vmprof-stacklet-switch-2: this should fix translation on platforms where vmprof is not supported, and it also refactor rvmprof.cintf to be slightly saner diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -14,67 +14,75 @@ class VMProfPlatformUnsupported(Exception): pass +# vmprof works only on x86 for now +IS_SUPPORTED = host_platform.machine() in ('i686', 'x86_64') + ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') SRC = ROOT.join('src') SHARED = SRC.join('shared') BACKTRACE = SHARED.join('libbacktrace') -compile_extra = ['-DRPYTHON_VMPROF'] -separate_module_files = [ - SHARED.join('symboltable.c'), - SHARED.join('vmprof_unix.c') -] -if sys.platform.startswith('linux'): - separate_module_files += [ - BACKTRACE.join('atomic.c'), - BACKTRACE.join('backtrace.c'), - BACKTRACE.join('state.c'), - BACKTRACE.join('elf.c'), - BACKTRACE.join('dwarf.c'), - BACKTRACE.join('fileline.c'), - BACKTRACE.join('mmap.c'), - BACKTRACE.join('mmapio.c'), - BACKTRACE.join('posix.c'), - BACKTRACE.join('sort.c'), +def make_eci(): + if make_eci.called: + raise ValueError("make_eci() should be called at most once") + # + compile_extra = ['-DRPYTHON_VMPROF'] + separate_module_files = [ + SHARED.join('symboltable.c'), + SHARED.join('vmprof_unix.c') ] - _libs = ['dl'] - compile_extra += ['-DVMPROF_UNIX'] - compile_extra += ['-DVMPROF_LINUX'] -elif sys.platform == 'win32': - compile_extra += ['-DVMPROF_WINDOWS'] - separate_module_files = [SHARED.join('vmprof_win.c')] - _libs = [] -else: - # Guessing a BSD-like Unix platform - compile_extra += ['-DVMPROF_UNIX'] - compile_extra += ['-DVMPROF_MAC'] - if sys.platform.startswith('freebsd'): - _libs = ['unwind'] + if sys.platform.startswith('linux'): + separate_module_files += [ + BACKTRACE.join('atomic.c'), + BACKTRACE.join('backtrace.c'), + BACKTRACE.join('state.c'), + BACKTRACE.join('elf.c'), + BACKTRACE.join('dwarf.c'), + BACKTRACE.join('fileline.c'), + BACKTRACE.join('mmap.c'), + BACKTRACE.join('mmapio.c'), + BACKTRACE.join('posix.c'), + BACKTRACE.join('sort.c'), + ] + _libs = ['dl'] + compile_extra += ['-DVMPROF_UNIX'] + compile_extra += ['-DVMPROF_LINUX'] + elif sys.platform == 'win32': + compile_extra += ['-DVMPROF_WINDOWS'] + separate_module_files = [SHARED.join('vmprof_win.c')] + _libs = [] else: - _libs = [] + # Guessing a BSD-like Unix platform + compile_extra += ['-DVMPROF_UNIX'] + compile_extra += ['-DVMPROF_MAC'] + if sys.platform.startswith('freebsd'): + _libs = ['unwind'] + else: + _libs = [] - -eci_kwds = dict( - include_dirs = [SRC, SHARED, BACKTRACE], - includes = ['rvmprof.h','vmprof_stack.h'], - libraries = _libs, - separate_module_files = [ - SRC.join('rvmprof.c'), - SHARED.join('compat.c'), - SHARED.join('machine.c'), - SHARED.join('vmp_stack.c'), - SHARED.join('vmprof_memory.c'), - SHARED.join('vmprof_common.c'), - # symbol table already in separate_module_files - ] + separate_module_files, - post_include_bits=[], - compile_extra=compile_extra - ) -if sys.platform != 'win32': - eci_kwds['separate_module_files'].append( - SHARED.join('vmprof_mt.c'), - ) -global_eci = ExternalCompilationInfo(**eci_kwds) + eci_kwds = dict( + include_dirs = [SRC, SHARED, BACKTRACE], + includes = ['rvmprof.h','vmprof_stack.h'], + libraries = _libs, + separate_module_files = [ + SRC.join('rvmprof.c'), + SHARED.join('compat.c'), + SHARED.join('machine.c'), + SHARED.join('vmp_stack.c'), + SHARED.join('vmprof_memory.c'), + SHARED.join('vmprof_common.c'), + # symbol table already in separate_module_files + ] + separate_module_files, + post_include_bits=[], + compile_extra=compile_extra + ) + if sys.platform != 'win32': + eci_kwds['separate_module_files'].append( + SHARED.join('vmprof_mt.c'), + ) + make_eci.called = True + return ExternalCompilationInfo(**eci_kwds), eci_kwds +make_eci.called = False def configure_libbacktrace_linux(): bits = 32 if sys.maxsize == 2**31-1 else 64 @@ -85,14 +93,17 @@ shutil.copy(str(BACKTRACE.join(specific_config)), str(config)) def setup(): + if not IS_SUPPORTED: + raise VMProfPlatformUnsupported + if sys.platform.startswith('linux'): configure_libbacktrace_linux() + eci, eci_kwds = make_eci() eci_kwds['compile_extra'].append('-DRPYTHON_LL2CTYPES') platform.verify_eci(ExternalCompilationInfo( **eci_kwds)) - eci = global_eci vmprof_init = rffi.llexternal("vmprof_init", [rffi.INT, rffi.DOUBLE, rffi.INT, rffi.INT, rffi.CCHARP, rffi.INT, rffi.INT], diff --git a/rpython/rlib/rvmprof/dummy.py b/rpython/rlib/rvmprof/dummy.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/dummy.py @@ -0,0 +1,26 @@ +from rpython.rlib.objectmodel import specialize + +class DummyVMProf(object): + + def __init__(self): + self._unique_id = 0 + + def register_code_object_class(self, CodeClass, full_name_func): + CodeClass._vmprof_unique_id = self._unique_id + self._unique_id += 1 + + @specialize.argtype(1) + def register_code(self, code, full_name_func): + pass + + def enable(self, fileno, interval, memory=0, native=0, real_time=0): + pass + + def disable(self): + pass + + def start_sampling(self): + pass + + def stop_sampling(self): + pass diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -2,6 +2,7 @@ from rpython.rlib.objectmodel import specialize, we_are_translated, not_rpython from rpython.rlib import jit, rposix, rgc from rpython.rlib.rvmprof import cintf +from rpython.rlib.rvmprof.dummy import DummyVMProf from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -34,6 +35,9 @@ return [] class VMProf(object): + """ + NOTE: the API of this class should be kept in sync with dummy.DummyVMProf + """ _immutable_fields_ = ['is_enabled?'] @@ -255,5 +259,8 @@ def _get_vmprof(): global _vmprof_instance if _vmprof_instance is None: - _vmprof_instance = VMProf() + try: + _vmprof_instance = VMProf() + except cintf.VMProfPlatformUnsupported: + _vmprof_instance = DummyVMProf() return _vmprof_instance From pypy.commits at gmail.com Tue Dec 19 13:06:35 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 19 Dec 2017 10:06:35 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: close this branch again Message-ID: <5a39552b.95a2df0a.e9846.681a@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93499:40f2b92848b2 Date: 2017-12-19 19:03 +0100 http://bitbucket.org/pypy/pypy/changeset/40f2b92848b2/ Log: close this branch again From pypy.commits at gmail.com Tue Dec 19 15:55:31 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 19 Dec 2017 12:55:31 -0800 (PST) Subject: [pypy-commit] pypy py3.5: update version to 5.11 Message-ID: <5a397cc3.068a1c0a.cca17.bbce@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93502:24caad1df703 Date: 2017-12-19 22:52 +0200 http://bitbucket.org/pypy/pypy/changeset/24caad1df703/ Log: update version to 5.11 diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -29,8 +29,8 @@ #define PY_VERSION "3.5.3" /* PyPy version as a string */ -#define PYPY_VERSION "5.10.0-alpha0" -#define PYPY_VERSION_NUM 0x050A0000 +#define PYPY_VERSION "5.11.0-alpha0" +#define PYPY_VERSION_NUM 0x050B0000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,7 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (5, 10, 0, "alpha", 0) #XXX # sync patchlevel.h +PYPY_VERSION = (5, 11, 0, "alpha", 0) #XXX # sync patchlevel.h import pypy From pypy.commits at gmail.com Tue Dec 19 15:55:29 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 19 Dec 2017 12:55:29 -0800 (PST) Subject: [pypy-commit] pypy default: add release note to index Message-ID: <5a397cc1.0ae61c0a.9852c.9c45@mx.google.com> Author: Matti Picus Branch: Changeset: r93501:7a2df755e1a5 Date: 2017-12-19 22:51 +0200 http://bitbucket.org/pypy/pypy/changeset/7a2df755e1a5/ Log: add release note to index diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-v5.10.0.rst release-v5.9.0.rst release-v5.8.0.rst release-v5.7.1.rst From pypy.commits at gmail.com Tue Dec 19 16:06:26 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 19 Dec 2017 13:06:26 -0800 (PST) Subject: [pypy-commit] pypy default: tweak release note Message-ID: <5a397f52.c4141c0a.5696d.11f5@mx.google.com> Author: Matti Picus Branch: Changeset: r93503:e167b99ca093 Date: 2017-12-19 23:05 +0200 http://bitbucket.org/pypy/pypy/changeset/e167b99ca093/ Log: tweak release note diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst --- a/pypy/doc/release-v5.10.0.rst +++ b/pypy/doc/release-v5.10.0.rst @@ -87,6 +87,8 @@ * issue #2677 copy pure c PyBuffer_{From,To}Contiguous from cpython * issue #2682, split firstword on any whitespace in sqlite3 * ctypes: allow ptr[0] = foo when ptr is a pointer to struct -* matplotlib works with tgagg backend +* matplotlib will work with tkagg backend once `matplotlib pr #9356`_ is merged * improvements to utf32 surrogate handling * cffi version bump to 1.11.2 + +.. _`matplotlib pr #9356`: https://github.com/matplotlib/matplotlib/pull/9356 From pypy.commits at gmail.com Wed Dec 20 01:40:59 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 19 Dec 2017 22:40:59 -0800 (PST) Subject: [pypy-commit] pypy default: Make virtualenv work on Windows too Message-ID: <5a3a05fb.a291df0a.6e8d0.7910@mx.google.com> Author: Armin Rigo Branch: Changeset: r93504:c1c8d16890dd Date: 2017-12-20 07:40 +0100 http://bitbucket.org/pypy/pypy/changeset/c1c8d16890dd/ Log: Make virtualenv work on Windows too diff --git a/lib-python/2.7/subprocess.py b/lib-python/2.7/subprocess.py --- a/lib-python/2.7/subprocess.py +++ b/lib-python/2.7/subprocess.py @@ -1296,7 +1296,7 @@ 'copyfile' in caller.f_globals): dest_dir = sys.pypy_resolvedirof(target_executable) src_dir = sys.pypy_resolvedirof(sys.executable) - for libname in ['libpypy-c.so', 'libpypy-c.dylib']: + for libname in ['libpypy-c.so', 'libpypy-c.dylib', 'libpypy-c.dll']: dest_library = os.path.join(dest_dir, libname) src_library = os.path.join(src_dir, libname) if os.path.exists(src_library): From pypy.commits at gmail.com Wed Dec 20 01:45:50 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 19 Dec 2017 22:45:50 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Port of c1c8d16890dd Message-ID: <5a3a071e.4dd91c0a.5f1f0.dfe3@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93505:fd74b9439261 Date: 2017-12-20 07:45 +0100 http://bitbucket.org/pypy/pypy/changeset/fd74b9439261/ Log: Port of c1c8d16890dd diff --git a/lib-python/3/subprocess.py b/lib-python/3/subprocess.py --- a/lib-python/3/subprocess.py +++ b/lib-python/3/subprocess.py @@ -1560,7 +1560,7 @@ 'copyfile' in caller.f_globals): dest_dir = sys.pypy_resolvedirof(target_executable) src_dir = sys.pypy_resolvedirof(sys.executable) - for libname in ['libpypy3-c.so', 'libpypy3-c.dylib']: + for libname in ['libpypy3-c.so', 'libpypy3-c.dylib', 'libpypy3-c.dll']: dest_library = os.path.join(dest_dir, libname) src_library = os.path.join(src_dir, libname) if os.path.exists(src_library): From pypy.commits at gmail.com Wed Dec 20 02:00:03 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 19 Dec 2017 23:00:03 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Add a dummy faulthandler.py for the CPython test suite Message-ID: <5a3a0a73.02be1c0a.e187c.7d2d@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93506:3c8c318aa8d4 Date: 2017-12-20 07:59 +0100 http://bitbucket.org/pypy/pypy/changeset/3c8c318aa8d4/ Log: Add a dummy faulthandler.py for the CPython test suite diff --git a/lib_pypy/faulthandler.py b/lib_pypy/faulthandler.py new file mode 100644 --- /dev/null +++ b/lib_pypy/faulthandler.py @@ -0,0 +1,3 @@ +# This is only imported for platforms where the built-in faulthandler module is not +# available. It provides no function at all so far, but it is enough to start the +# CPython test suite. \ No newline at end of file From pypy.commits at gmail.com Wed Dec 20 02:06:42 2017 From: pypy.commits at gmail.com (arigo) Date: Tue, 19 Dec 2017 23:06:42 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Add a dummy enum_certificates() function, for now Message-ID: <5a3a0c02.8a871c0a.5d16b.d088@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93507:cffba7a52fb2 Date: 2017-12-20 08:05 +0100 http://bitbucket.org/pypy/pypy/changeset/cffba7a52fb2/ Log: Add a dummy enum_certificates() function, for now diff --git a/lib_pypy/_ssl/__init__.py b/lib_pypy/_ssl/__init__.py --- a/lib_pypy/_ssl/__init__.py +++ b/lib_pypy/_ssl/__init__.py @@ -14,3 +14,14 @@ # RAND_egd is optional and might not be available on e.g. libressl if hasattr(_stdssl, 'RAND_egd'): RAND_egd = builtinify(RAND_egd) + +import sys +if sys.platform == "win32" and 'enum_certificates' not in globals(): + def enum_certificates(*args, **kwds): + import warnings + warnings.warn("ssl.enum_certificates() is not implemented") + return [] + def enum_crls(*args, **kwds): + import warnings + warnings.warn("ssl.enum_crls() is not implemented") + return [] From pypy.commits at gmail.com Wed Dec 20 05:16:27 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 20 Dec 2017 02:16:27 -0800 (PST) Subject: [pypy-commit] pypy default: this method of determining the CPU is broken in presence of cross-translations (like we do e.g. on ARM). Try to use detect_cpu, which seems to have logic to handle that Message-ID: <5a3a387b.8b8a1c0a.29421.5d2c@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93508:b1b0f51304ce Date: 2017-12-20 11:15 +0100 http://bitbucket.org/pypy/pypy/changeset/b1b0f51304ce/ Log: this method of determining the CPU is broken in presence of cross- translations (like we do e.g. on ARM). Try to use detect_cpu, which seems to have logic to handle that diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -10,12 +10,13 @@ from rpython.rlib import rthread, jit from rpython.rlib.objectmodel import we_are_translated from rpython.config.translationoption import get_translation_config +from rpython.jit.backend import detect_cpu class VMProfPlatformUnsupported(Exception): pass # vmprof works only on x86 for now -IS_SUPPORTED = host_platform.machine() in ('i686', 'x86_64') +IS_SUPPORTED = detect_cpu.autodetect().startswith('x86') ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') SRC = ROOT.join('src') From pypy.commits at gmail.com Wed Dec 20 06:15:26 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 03:15:26 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Enough os.scandir() to pass our own tests on Windows Message-ID: <5a3a464e.f4acdf0a.e88f3.2e84@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93510:6deed1172830 Date: 2017-12-20 10:11 +0100 http://bitbucket.org/pypy/pypy/changeset/6deed1172830/ Log: Enough os.scandir() to pass our own tests on Windows diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -135,9 +135,6 @@ assert 0 <= rposix_scandir.DT_LNK <= 255 FLAG_STAT = 256 FLAG_LSTAT = 512 -else: - FLAG_STAT = 256 - # XXX lstat and symlinks are not implemented on Windows class W_DirEntry(W_Root): @@ -149,9 +146,9 @@ self.name = name # always bytes on Posix; always unicode on Windows self.inode = inode self.flags = known_type - assert known_type == (known_type & 255) # if not _WIN32: + assert known_type == (known_type & 255) w_name = self.space.newbytes(name) if not scandir_iterator.result_is_bytes: w_name = self.space.fsdecode(w_name) @@ -178,93 +175,109 @@ # the end of the class. Every method only calls methods *before* # it in program order, so there is no cycle. - def get_lstat(self): - """Get the lstat() of the direntry.""" - if (self.flags & FLAG_LSTAT) == 0: - # Unlike CPython, try to use fstatat() if possible - dirfd = self.scandir_iterator.dirfd - if dirfd != -1 and rposix.HAVE_FSTATAT: - st = rposix_stat.fstatat(self.name, dirfd, - follow_symlinks=False) - else: - path = self.space.fsencode_w(self.fget_path(self.space)) - st = rposix_stat.lstat(path) - self.d_lstat = st - self.flags |= FLAG_LSTAT - return self.d_lstat - - def get_stat(self): - """Get the stat() of the direntry. This is implemented in - such a way that it won't do both a stat() and a lstat(). - """ - if (self.flags & FLAG_STAT) == 0: - # We don't have the 'd_stat'. If the known_type says the - # direntry is not a DT_LNK, then try to get and cache the - # 'd_lstat' instead. Then, or if we already have a - # 'd_lstat' from before, *and* if the 'd_lstat' is not a - # S_ISLNK, we can reuse it unchanged for 'd_stat'. - # - # Note how, in the common case where the known_type says - # it is a DT_REG or DT_DIR, then we call and cache lstat() - # and that's it. Also note that in a d_type-less OS or on - # a filesystem that always answer DT_UNKNOWN, this method - # will instead only call at most stat(), but not cache it - # as 'd_lstat'. - known_type = self.flags & 255 - if (known_type != rposix_scandir.DT_UNKNOWN and - known_type != rposix_scandir.DT_LNK): - self.get_lstat() # fill the 'd_lstat' cache - have_lstat = True - else: - have_lstat = (self.flags & FLAG_LSTAT) != 0 - - if have_lstat: - # We have the lstat() but not the stat(). They are - # the same, unless the 'd_lstat' is a S_IFLNK. - must_call_stat = stat.S_ISLNK(self.d_lstat.st_mode) - else: - must_call_stat = True - - if must_call_stat: - # Must call stat(). Try to use fstatat() if possible + if not _WIN32: + def get_lstat(self): + """Get the lstat() of the direntry.""" + if (self.flags & FLAG_LSTAT) == 0: + # Unlike CPython, try to use fstatat() if possible dirfd = self.scandir_iterator.dirfd - if dirfd != -1 and rposix.HAVE_FSTATAT: + if rposix.HAVE_FSTATAT and dirfd != -1: st = rposix_stat.fstatat(self.name, dirfd, - follow_symlinks=True) + follow_symlinks=False) else: path = self.space.fsencode_w(self.fget_path(self.space)) - st = rposix_stat.stat(path) + st = rposix_stat.lstat(path) + self.d_lstat = st + self.flags |= FLAG_LSTAT + return self.d_lstat + + def get_stat(self): + """Get the stat() of the direntry. This is implemented in + such a way that it won't do both a stat() and a lstat(). + """ + if (self.flags & FLAG_STAT) == 0: + # We don't have the 'd_stat'. If the known_type says the + # direntry is not a DT_LNK, then try to get and cache the + # 'd_lstat' instead. Then, or if we already have a + # 'd_lstat' from before, *and* if the 'd_lstat' is not a + # S_ISLNK, we can reuse it unchanged for 'd_stat'. + # + # Note how, in the common case where the known_type says + # it is a DT_REG or DT_DIR, then we call and cache lstat() + # and that's it. Also note that in a d_type-less OS or on + # a filesystem that always answer DT_UNKNOWN, this method + # will instead only call at most stat(), but not cache it + # as 'd_lstat'. + known_type = self.flags & 255 + if (known_type != rposix_scandir.DT_UNKNOWN and + known_type != rposix_scandir.DT_LNK): + self.get_lstat() # fill the 'd_lstat' cache + have_lstat = True + else: + have_lstat = (self.flags & FLAG_LSTAT) != 0 + + if have_lstat: + # We have the lstat() but not the stat(). They are + # the same, unless the 'd_lstat' is a S_IFLNK. + must_call_stat = stat.S_ISLNK(self.d_lstat.st_mode) + else: + must_call_stat = True + + if must_call_stat: + # Must call stat(). Try to use fstatat() if possible + dirfd = self.scandir_iterator.dirfd + if dirfd != -1 and rposix.HAVE_FSTATAT: + st = rposix_stat.fstatat(self.name, dirfd, + follow_symlinks=True) + else: + path = self.space.fsencode_w(self.fget_path(self.space)) + st = rposix_stat.stat(path) + else: + st = self.d_lstat + + self.d_stat = st + self.flags |= FLAG_STAT + return self.d_stat + + def get_stat_or_lstat(self, follow_symlinks): + if follow_symlinks: + return self.get_stat() else: - st = self.d_lstat + return self.get_lstat() - self.d_stat = st - self.flags |= FLAG_STAT - return self.d_stat + def check_mode(self, follow_symlinks): + """Get the stat() or lstat() of the direntry, and return the + S_IFMT. If calling stat()/lstat() gives us ENOENT, return -1 + instead; it is better to give up and answer "no, not this type" + to requests, rather than propagate the error. + """ + try: + st = self.get_stat_or_lstat(follow_symlinks) + except OSError as e: + if e.errno == ENOENT: # not found + return -1 + raise wrap_oserror2(self.space, e, self.fget_path(self.space), + eintr_retry=False) + return stat.S_IFMT(st.st_mode) - def get_stat_or_lstat(self, follow_symlinks): - if follow_symlinks: - return self.get_stat() - else: - return self.get_lstat() + else: + # Win32 + stat_cached = False - def check_mode(self, follow_symlinks): - """Get the stat() or lstat() of the direntry, and return the - S_IFMT. If calling stat()/lstat() gives us ENOENT, return -1 - instead; it is better to give up and answer "no, not this type" - to requests, rather than propagate the error. - """ - try: - st = self.get_stat_or_lstat(follow_symlinks) - except OSError as e: - if e.errno == ENOENT: # not found - return -1 - raise wrap_oserror2(self.space, e, self.fget_path(self.space), - eintr_retry=False) - return stat.S_IFMT(st.st_mode) + def check_mode(self, follow_symlinks): + return self.flags + + def get_stat_or_lstat(self, follow_symlinks): # 'follow_symlinks' ignored + if not self.stat_cached: + path = self.space.unicode_w(self.fget_path(self.space)) + self.d_stat = rposix_stat.stat(path) + self.stat_cached = True + return self.d_stat + def is_dir(self, follow_symlinks): known_type = self.flags & 255 - if known_type != rposix_scandir.DT_UNKNOWN: + if not _WIN32 and known_type != rposix_scandir.DT_UNKNOWN: if known_type == rposix_scandir.DT_DIR: return True elif follow_symlinks and known_type == rposix_scandir.DT_LNK: @@ -275,7 +288,7 @@ def is_file(self, follow_symlinks): known_type = self.flags & 255 - if known_type != rposix_scandir.DT_UNKNOWN: + if not _WIN32 and known_type != rposix_scandir.DT_UNKNOWN: if known_type == rposix_scandir.DT_REG: return True elif follow_symlinks and known_type == rposix_scandir.DT_LNK: @@ -287,7 +300,7 @@ def is_symlink(self): """Check if the direntry is a symlink. May get the lstat().""" known_type = self.flags & 255 - if known_type != rposix_scandir.DT_UNKNOWN: + if not _WIN32 and known_type != rposix_scandir.DT_UNKNOWN: return known_type == rposix_scandir.DT_LNK return self.check_mode(follow_symlinks=False) == stat.S_IFLNK @@ -316,7 +329,15 @@ return build_stat_result(space, st) def descr_inode(self, space): - return space.newint(self.inode) + inode = self.inode + if inode is None: # _WIN32 + try: + st = self.get_stat_or_lstat(follow_symlinks=False) + except OSError as e: + raise wrap_oserror2(space, e, self.fget_path(space), + eintr_retry=False) + inode = st.st_ino + return space.newint(inode) W_DirEntry.typedef = TypeDef( diff --git a/pypy/module/posix/test/test_scandir.py b/pypy/module/posix/test/test_scandir.py --- a/pypy/module/posix/test/test_scandir.py +++ b/pypy/module/posix/test/test_scandir.py @@ -1,4 +1,5 @@ import sys, os +import py from rpython.tool.udir import udir from pypy.module.posix.test import test_posix2 @@ -97,6 +98,8 @@ assert d.stat().st_mode & 0o170000 == 0o100000 # S_IFREG assert d.stat().st_size == 0 + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_stat4(self): posix = self.posix d = next(posix.scandir(self.dir4)) @@ -126,6 +129,8 @@ assert not d.is_file(follow_symlinks=False) assert d.is_dir(follow_symlinks=False) + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_dir3(self): posix = self.posix d = next(posix.scandir(self.dir3)) @@ -136,6 +141,8 @@ assert d.is_file(follow_symlinks=True) assert not d.is_file(follow_symlinks=False) + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_dir4(self): posix = self.posix d = next(posix.scandir(self.dir4)) @@ -146,6 +153,8 @@ assert d.is_dir(follow_symlinks=True) assert not d.is_dir(follow_symlinks=False) + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_dir5(self): posix = self.posix d = next(posix.scandir(self.dir5)) @@ -155,6 +164,8 @@ assert d.is_symlink() raises(OSError, d.stat) + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_dir6(self): posix = self.posix d = next(posix.scandir(self.dir6)) diff --git a/rpython/rlib/rposix_scandir.py b/rpython/rlib/rposix_scandir.py --- a/rpython/rlib/rposix_scandir.py +++ b/rpython/rlib/rposix_scandir.py @@ -1,6 +1,7 @@ from rpython.rlib import rposix, rwin32 from rpython.rlib.objectmodel import specialize from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib.rarithmetic import intmask if not rwin32.WIN32: @@ -54,8 +55,10 @@ else: # ----- Win32 version ----- + import stat from rpython.rlib._os_support import unicode_traits from rpython.rlib.rwin32file import make_win32_traits + from rpython.rlib import rposix_stat win32traits = make_win32_traits(unicode_traits) @@ -70,21 +73,6 @@ if self.hFindFile != rwin32.INVALID_HANDLE_VALUE: win32traits.FindClose(self.hFindFile) - class DirEntP: - def __init__(self, filedata): - self.filedata = filedata - # ^^^ note that this structure is overwritten by the next() call, so - # we must copy a few pieces of information out of it now: - self.dwFileAttributes = filedata.c_dwFileAttributes - self.CreationTimeLow = filedata.c_ftCreationTime.c_dwLowDateTime - self.CreationTimeHigh = filedata.c_ftCreationTime.c_dwHighDateTime - self.LastAccessTimeLow = filedata.c_ftLastAccessTime.c_dwLowDateTime - self.LastAccessTimeHigh = filedata.c_ftLastAccessTime.c_dwHighDateTime - self.LastWriteTimeLow = filedata.c_ftLastWriteTime.c_dwLowDateTime - self.LastWriteTimeHigh = filedata.c_ftLastWriteTime.c_dwHighDateTime - self.nFileSizeHigh = filedata.c_nFileSizeHigh - self.nFileSizeLow = filedata.c_nFileSizeLow - # must only be called with unicode! def opendir(path): @@ -113,7 +101,7 @@ Use the methods has_xxx() and get_xxx() to read from that opaque object. The opaque object is valid until the next time nextentry() or closedir() is called. This may raise - WindowsError, or return None when exhausted. Note + WindowsError, or return NULL when exhausted. Note that this doesn't filter out the "." and ".." entries. """ if dirp.first_time: @@ -123,16 +111,18 @@ # error or no more files error = rwin32.GetLastError_saved() if error == win32traits.ERROR_NO_MORE_FILES: - return None + return lltype.nullptr(win32traits.WIN32_FIND_DATA) raise WindowsError(error, "FindNextFileW failed") - return DirEntP(dirp.filedata) + return dirp.filedata - def get_name_unicode(direntp): + def get_name_unicode(filedata): return unicode_traits.charp2str(rffi.cast(unicode_traits.CCHARP, - direntp.filedata.c_cFileName)) + filedata.c_cFileName)) def get_known_type(filedata): - return 0 + attr = filedata.c_dwFileAttributes + st_mode = rposix_stat.win32_attributes_to_mode(win32traits, attr) + return stat.S_IFMT(st_mode) def get_inode(filedata): return None From pypy.commits at gmail.com Wed Dec 20 06:15:28 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 03:15:28 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Obscure annotation fixes: propagate no_nul a tiny bit more Message-ID: <5a3a4650.64b8df0a.a99a1.5b08@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93511:2bd764ec6d22 Date: 2017-12-20 10:29 +0100 http://bitbucket.org/pypy/pypy/changeset/2bd764ec6d22/ Log: Obscure annotation fixes: propagate no_nul a tiny bit more diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py --- a/rpython/annotator/binaryop.py +++ b/rpython/annotator/binaryop.py @@ -381,16 +381,14 @@ class __extend__(pairtype(SomeChar, SomeUnicodeCodePoint), pairtype(SomeUnicodeCodePoint, SomeChar)): def union((uchr1, uchr2)): - return SomeUnicodeCodePoint() + no_nul = uchr1.no_nul and uchr2.no_nul + return SomeUnicodeCodePoint(no_nul=no_nul) class __extend__(pairtype(SomeUnicodeCodePoint, SomeUnicodeCodePoint)): def union((uchr1, uchr2)): no_nul = uchr1.no_nul and uchr2.no_nul return SomeUnicodeCodePoint(no_nul=no_nul) - def add((chr1, chr2)): - return SomeUnicodeString() - class __extend__(pairtype(SomeString, SomeUnicodeString), pairtype(SomeUnicodeString, SomeString)): def mod((str, unistring)): From pypy.commits at gmail.com Wed Dec 20 06:15:24 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 03:15:24 -0800 (PST) Subject: [pypy-commit] pypy py3.5: In-progress: os.scandir() on Windows Message-ID: <5a3a464c.8a5b1c0a.1b61c.9bbd@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93509:4bb7cf3fd5a1 Date: 2017-12-20 09:27 +0100 http://bitbucket.org/pypy/pypy/changeset/4bb7cf3fd5a1/ Log: In-progress: os.scandir() on Windows diff --git a/lib-python/3/ssl.py b/lib-python/3/ssl.py --- a/lib-python/3/ssl.py +++ b/lib-python/3/ssl.py @@ -140,6 +140,23 @@ except NameError: _SSLv2_IF_EXISTS = None + + + +import os +class DirEntry: + def __init__(self, path, name): + self.path = os.path.join(path, name) + self.name = name + def is_dir(self): + return os.path.isdir(self.path) +def myscandir(path='.'): + for name in os.listdir(path): + yield DirEntry(path, name) +os.scandir = myscandir + + + if sys.platform == "win32": from _ssl import enum_certificates, enum_crls diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -13,28 +13,39 @@ def scandir(space, w_path=None): "scandir(path='.') -> iterator of DirEntry objects for given path" - if _WIN32: - raise NotImplementedError("XXX WIN32") - if space.is_none(w_path): w_path = space.newunicode(u".") - if space.isinstance_w(w_path, space.w_bytes): - path_bytes = space.bytes0_w(w_path) - result_is_bytes = True + + if not _WIN32: + if space.isinstance_w(w_path, space.w_bytes): + path = space.bytes0_w(w_path) + result_is_bytes = True + else: + path = space.fsencode_w(w_path) + result_is_bytes = False else: - path_bytes = space.fsencode_w(w_path) + if space.isinstance_w(w_path, space.w_bytes): + raise oefmt(space.w_TypeError, "os.scandir() doesn't support bytes path" + " on Windows, use Unicode instead") + path = space.unicode_w(w_path) result_is_bytes = False + # 'path' is always bytes on posix and always unicode on windows try: - dirp = rposix_scandir.opendir(path_bytes) + dirp = rposix_scandir.opendir(path) except OSError as e: raise wrap_oserror2(space, e, w_path, eintr_retry=False) - path_prefix = path_bytes - if len(path_prefix) > 0 and path_prefix[-1] != '/': - path_prefix += '/' - w_path_prefix = space.newbytes(path_prefix) - if not result_is_bytes: - w_path_prefix = space.fsdecode(w_path_prefix) + path_prefix = path + if not _WIN32: + if len(path_prefix) > 0 and path_prefix[-1] != '/': + path_prefix += '/' + w_path_prefix = space.newbytes(path_prefix) + if not result_is_bytes: + w_path_prefix = space.fsdecode(w_path_prefix) + else: + if len(path_prefix) > 0 and path_prefix[-1] not in (u'\\', u'/', u':'): + path_prefix += u'\\' + w_path_prefix = space.newunicode(path_prefix) if rposix.HAVE_FSTATAT: dirfd = rposix.c_dirfd(dirp) else: @@ -89,10 +100,14 @@ eintr_retry=False)) if not entry: raise self.fail() - assert rposix_scandir.has_name_bytes(entry) - name = rposix_scandir.get_name_bytes(entry) - if name != '.' and name != '..': - break + if not _WIN32: + name = rposix_scandir.get_name_bytes(entry) + if name != '.' and name != '..': + break + else: + name = rposix_scandir.get_name_unicode(entry) + if name != u'.' and name != u'..': + break # known_type = rposix_scandir.get_known_type(entry) inode = rposix_scandir.get_inode(entry) @@ -113,12 +128,16 @@ class FileNotFound(Exception): pass -assert 0 <= rposix_scandir.DT_UNKNOWN <= 255 -assert 0 <= rposix_scandir.DT_REG <= 255 -assert 0 <= rposix_scandir.DT_DIR <= 255 -assert 0 <= rposix_scandir.DT_LNK <= 255 -FLAG_STAT = 256 -FLAG_LSTAT = 512 +if not _WIN32: + assert 0 <= rposix_scandir.DT_UNKNOWN <= 255 + assert 0 <= rposix_scandir.DT_REG <= 255 + assert 0 <= rposix_scandir.DT_DIR <= 255 + assert 0 <= rposix_scandir.DT_LNK <= 255 + FLAG_STAT = 256 + FLAG_LSTAT = 512 +else: + FLAG_STAT = 256 + # XXX lstat and symlinks are not implemented on Windows class W_DirEntry(W_Root): @@ -127,14 +146,17 @@ def __init__(self, scandir_iterator, name, known_type, inode): self.space = scandir_iterator.space self.scandir_iterator = scandir_iterator - self.name = name # always bytes on Posix + self.name = name # always bytes on Posix; always unicode on Windows self.inode = inode self.flags = known_type assert known_type == (known_type & 255) # - w_name = self.space.newbytes(name) - if not scandir_iterator.result_is_bytes: - w_name = self.space.fsdecode(w_name) + if not _WIN32: + w_name = self.space.newbytes(name) + if not scandir_iterator.result_is_bytes: + w_name = self.space.fsdecode(w_name) + else: + w_name = self.space.newunicode(name) self.w_name = w_name def descr_repr(self, space): diff --git a/pypy/module/posix/test/test_scandir.py b/pypy/module/posix/test/test_scandir.py --- a/pypy/module/posix/test/test_scandir.py +++ b/pypy/module/posix/test/test_scandir.py @@ -31,6 +31,8 @@ def setup_class(cls): space = cls.space + cls.w_WIN32 = space.wrap(sys.platform == 'win32') + cls.w_sep = space.wrap(os.sep) cls.w_posix = space.appexec([], test_posix2.GET_POSIX) cls.w_dir_empty = space.wrap(_make_dir('empty', {})) cls.w_dir0 = space.wrap(_make_dir('dir0', {'f1': 'file', @@ -38,10 +40,11 @@ 'f3': 'file'})) cls.w_dir1 = space.wrap(_make_dir('dir1', {'file1': 'file'})) cls.w_dir2 = space.wrap(_make_dir('dir2', {'subdir2': 'dir'})) - cls.w_dir3 = space.wrap(_make_dir('dir3', {'sfile3': 'symlink-file'})) - cls.w_dir4 = space.wrap(_make_dir('dir4', {'sdir4': 'symlink-dir'})) - cls.w_dir5 = space.wrap(_make_dir('dir5', {'sbrok5': 'symlink-broken'})) - cls.w_dir6 = space.wrap(_make_dir('dir6', {'serr6': 'symlink-error'})) + if sys.platform != 'win32': + cls.w_dir3 = space.wrap(_make_dir('dir3', {'sfile3': 'symlink-file'})) + cls.w_dir4 = space.wrap(_make_dir('dir4', {'sdir4': 'symlink-dir'})) + cls.w_dir5 = space.wrap(_make_dir('dir5', {'sbrok5': 'symlink-broken'})) + cls.w_dir6 = space.wrap(_make_dir('dir6', {'serr6': 'symlink-error'})) def test_scandir_empty(self): posix = self.posix @@ -60,27 +63,32 @@ d = next(posix.scandir()) assert type(d.name) is str assert type(d.path) is str - assert d.path == './' + d.name + assert d.path == '.' + self.sep + d.name d = next(posix.scandir(None)) assert type(d.name) is str assert type(d.path) is str - assert d.path == './' + d.name + assert d.path == '.' + self.sep + d.name d = next(posix.scandir(u'.')) assert type(d.name) is str assert type(d.path) is str - assert d.path == './' + d.name - d = next(posix.scandir(b'.')) - assert type(d.name) is bytes - assert type(d.path) is bytes - assert d.path == b'./' + d.name - d = next(posix.scandir('/')) + assert d.path == '.' + self.sep + d.name + d = next(posix.scandir(self.sep)) assert type(d.name) is str assert type(d.path) is str - assert d.path == '/' + d.name - d = next(posix.scandir(b'/')) - assert type(d.name) is bytes - assert type(d.path) is bytes - assert d.path == b'/' + d.name + assert d.path == self.sep + d.name + if not self.WIN32: + d = next(posix.scandir(b'.')) + assert type(d.name) is bytes + assert type(d.path) is bytes + assert d.path == b'./' + d.name + d = next(posix.scandir(b'/')) + assert type(d.name) is bytes + assert type(d.path) is bytes + assert d.path == b'/' + d.name + else: + raises(TypeError, posix.scandir, b'.') + raises(TypeError, posix.scandir, b'/') + raises(TypeError, posix.scandir, b'\\') def test_stat1(self): posix = self.posix diff --git a/rpython/rlib/rposix_scandir.py b/rpython/rlib/rposix_scandir.py --- a/rpython/rlib/rposix_scandir.py +++ b/rpython/rlib/rposix_scandir.py @@ -3,54 +3,136 @@ from rpython.rtyper.lltypesystem import lltype, rffi - at specialize.argtype(0) -def opendir(path): - path = rposix._as_bytes0(path) - return opendir_bytes(path) +if not rwin32.WIN32: + @specialize.argtype(0) + def opendir(path): + path = rposix._as_bytes0(path) + return opendir_bytes(path) -def opendir_bytes(path): - dirp = rposix.c_opendir(path) - if not dirp: - raise OSError(rposix.get_saved_errno(), "opendir failed") - return dirp + def opendir_bytes(path): + dirp = rposix.c_opendir(path) + if not dirp: + raise OSError(rposix.get_saved_errno(), "opendir failed") + return dirp -def closedir(dirp): - rposix.c_closedir(dirp) + def closedir(dirp): + rposix.c_closedir(dirp) -if not rwin32.WIN32: NULL_DIRP = lltype.nullptr(rposix.DIRP.TO) -def nextentry(dirp): - """Read the next entry and returns an opaque object. - Use the methods has_xxx() and get_xxx() to read from that - opaque object. The opaque object is valid until the next - time nextentry() or closedir() is called. This may raise - OSError, or return a NULL pointer when exhausted. Note - that this doesn't filter out the "." and ".." entries. - """ - direntp = rposix.c_readdir(dirp) - if direntp: - error = rposix.get_saved_errno() - if error: - raise OSError(error, "readdir failed") - return direntp + def nextentry(dirp): + """Read the next entry and returns an opaque object. + Use the methods has_xxx() and get_xxx() to read from that + opaque object. The opaque object is valid until the next + time nextentry() or closedir() is called. This may raise + OSError, or return a NULL pointer when exhausted. Note + that this doesn't filter out the "." and ".." entries. + """ + direntp = rposix.c_readdir(dirp) + if direntp: + error = rposix.get_saved_errno() + if error: + raise OSError(error, "readdir failed") + return direntp -def has_name_bytes(direntp): - return True + def get_name_bytes(direntp): + namep = rffi.cast(rffi.CCHARP, direntp.c_d_name) + return rffi.charp2str(namep) -def get_name_bytes(direntp): - namep = rffi.cast(rffi.CCHARP, direntp.c_d_name) - return rffi.charp2str(namep) + DT_UNKNOWN = rposix.dirent_config.get('DT_UNKNOWN', 0) + DT_REG = rposix.dirent_config.get('DT_REG', 255) + DT_DIR = rposix.dirent_config.get('DT_DIR', 255) + DT_LNK = rposix.dirent_config.get('DT_LNK', 255) -DT_UNKNOWN = rposix.dirent_config.get('DT_UNKNOWN', 0) -DT_REG = rposix.dirent_config.get('DT_REG', 255) -DT_DIR = rposix.dirent_config.get('DT_DIR', 255) -DT_LNK = rposix.dirent_config.get('DT_LNK', 255) + def get_known_type(direntp): + if rposix.HAVE_D_TYPE: + return rffi.getintfield(direntp, 'c_d_type') + return DT_UNKNOWN -def get_known_type(direntp): - if rposix.HAVE_D_TYPE: - return rffi.getintfield(direntp, 'c_d_type') - return DT_UNKNOWN + def get_inode(direntp): + return rffi.getintfield(direntp, 'c_d_ino') -def get_inode(direntp): - return rffi.getintfield(direntp, 'c_d_ino') +else: + # ----- Win32 version ----- + from rpython.rlib._os_support import unicode_traits + from rpython.rlib.rwin32file import make_win32_traits + + win32traits = make_win32_traits(unicode_traits) + + + class DirP: + def __init__(self): + self.filedata = lltype.malloc(win32traits.WIN32_FIND_DATA, flavor='raw') + self.hFindFile = rwin32.INVALID_HANDLE_VALUE + + def close(self): + lltype.free(self.filedata, flavor='raw') + if self.hFindFile != rwin32.INVALID_HANDLE_VALUE: + win32traits.FindClose(self.hFindFile) + + class DirEntP: + def __init__(self, filedata): + self.filedata = filedata + # ^^^ note that this structure is overwritten by the next() call, so + # we must copy a few pieces of information out of it now: + self.dwFileAttributes = filedata.c_dwFileAttributes + self.CreationTimeLow = filedata.c_ftCreationTime.c_dwLowDateTime + self.CreationTimeHigh = filedata.c_ftCreationTime.c_dwHighDateTime + self.LastAccessTimeLow = filedata.c_ftLastAccessTime.c_dwLowDateTime + self.LastAccessTimeHigh = filedata.c_ftLastAccessTime.c_dwHighDateTime + self.LastWriteTimeLow = filedata.c_ftLastWriteTime.c_dwLowDateTime + self.LastWriteTimeHigh = filedata.c_ftLastWriteTime.c_dwHighDateTime + self.nFileSizeHigh = filedata.c_nFileSizeHigh + self.nFileSizeLow = filedata.c_nFileSizeLow + + + # must only be called with unicode! + def opendir(path): + if len(path) == 0: + path = u'.' + if path[-1] not in (u'\\', u'/', u':'): + path += u'\\' + mask = path + u'*.*' + dirp = DirP() + hFindFile = win32traits.FindFirstFile(mask, dirp.filedata) + if hFindFile == rwin32.INVALID_HANDLE_VALUE: + error = rwin32.GetLastError_saved() + dirp.close() + raise WindowsError(error, "FindFirstFileW failed") + dirp.hFindFile = hFindFile + dirp.first_time = True + return dirp + + def closedir(dirp): + dirp.close() + + NULL_DIRP = None + + def nextentry(dirp): + """Read the next entry and returns an opaque object. + Use the methods has_xxx() and get_xxx() to read from that + opaque object. The opaque object is valid until the next + time nextentry() or closedir() is called. This may raise + WindowsError, or return None when exhausted. Note + that this doesn't filter out the "." and ".." entries. + """ + if dirp.first_time: + dirp.first_time = False + else: + if not win32traits.FindNextFile(dirp.hFindFile, dirp.filedata): + # error or no more files + error = rwin32.GetLastError_saved() + if error == win32traits.ERROR_NO_MORE_FILES: + return None + raise WindowsError(error, "FindNextFileW failed") + return DirEntP(dirp.filedata) + + def get_name_unicode(direntp): + return unicode_traits.charp2str(rffi.cast(unicode_traits.CCHARP, + direntp.filedata.c_cFileName)) + + def get_known_type(filedata): + return 0 + + def get_inode(filedata): + return None From pypy.commits at gmail.com Wed Dec 20 06:15:30 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 03:15:30 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Translation fixes Message-ID: <5a3a4652.cf0e1c0a.6c53.27c5@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93512:c45c28743766 Date: 2017-12-20 12:14 +0100 http://bitbucket.org/pypy/pypy/changeset/c45c28743766/ Log: Translation fixes diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -269,7 +269,7 @@ def get_stat_or_lstat(self, follow_symlinks): # 'follow_symlinks' ignored if not self.stat_cached: - path = self.space.unicode_w(self.fget_path(self.space)) + path = self.space.unicode0_w(self.fget_path(self.space)) self.d_stat = rposix_stat.stat(path) self.stat_cached = True return self.d_stat diff --git a/rpython/rlib/rposix_scandir.py b/rpython/rlib/rposix_scandir.py --- a/rpython/rlib/rposix_scandir.py +++ b/rpython/rlib/rposix_scandir.py @@ -63,15 +63,12 @@ win32traits = make_win32_traits(unicode_traits) - class DirP: - def __init__(self): - self.filedata = lltype.malloc(win32traits.WIN32_FIND_DATA, flavor='raw') - self.hFindFile = rwin32.INVALID_HANDLE_VALUE - - def close(self): - lltype.free(self.filedata, flavor='raw') - if self.hFindFile != rwin32.INVALID_HANDLE_VALUE: - win32traits.FindClose(self.hFindFile) + SCANDIRP = lltype.Ptr(lltype.Struct('SCANDIRP', + ('filedata', win32traits.WIN32_FIND_DATA), + ('hFindFile', rwin32.HANDLE), + ('first_time', lltype.Bool), + )) + NULL_DIRP = lltype.nullptr(SCANDIRP.TO) # must only be called with unicode! @@ -79,22 +76,23 @@ if len(path) == 0: path = u'.' if path[-1] not in (u'\\', u'/', u':'): - path += u'\\' - mask = path + u'*.*' - dirp = DirP() + mask = path + u'\\*.*' + else: + mask = path + u'*.*' + dirp = lltype.malloc(SCANDIRP.TO, flavor='raw') hFindFile = win32traits.FindFirstFile(mask, dirp.filedata) if hFindFile == rwin32.INVALID_HANDLE_VALUE: error = rwin32.GetLastError_saved() - dirp.close() + lltype.free(dirp, flavor='raw') raise WindowsError(error, "FindFirstFileW failed") dirp.hFindFile = hFindFile dirp.first_time = True return dirp def closedir(dirp): - dirp.close() - - NULL_DIRP = None + if dirp.hFindFile != rwin32.INVALID_HANDLE_VALUE: + win32traits.FindClose(dirp.hFindFile) + lltype.free(dirp, flavor='raw') def nextentry(dirp): """Read the next entry and returns an opaque object. diff --git a/rpython/rlib/rwin32file.py b/rpython/rlib/rwin32file.py --- a/rpython/rlib/rwin32file.py +++ b/rpython/rlib/rwin32file.py @@ -148,7 +148,7 @@ save_err=rffi.RFFI_SAVE_LASTERROR) FindClose = external('FindClose', [rwin32.HANDLE], - rwin32.BOOL) + rwin32.BOOL, releasegil=False) GetFileAttributes = external( 'GetFileAttributes' + suffix, From pypy.commits at gmail.com Wed Dec 20 08:30:03 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 05:30:03 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Another attempt at this logic Message-ID: <5a3a65db.478edf0a.61a81.2201@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93513:92a3e28a4aef Date: 2017-12-20 14:27 +0100 http://bitbucket.org/pypy/pypy/changeset/92a3e28a4aef/ Log: Another attempt at this logic diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -2,7 +2,7 @@ Implementation of the interpreter-level default import logic. """ -import sys, os, stat, platform +import sys, os, stat from pypy.interpreter.module import Module, init_extra_module_attrs from pypy.interpreter.gateway import interp2app, unwrap_spec @@ -44,18 +44,17 @@ soabi += 'i' platform_name = sys.platform - if platform_name == 'linux2': - platform_name = 'linux' + if platform_name.startswith('linux'): + if sys.maxsize < 2**32: + platform_name = 'i686-linux-gnu' + # xxx should detect if we are inside 'x32', but not for now + # because it's not supported anyway by PyPy. (Using + # platform.machine() does not work, it may return x86_64 + # anyway) + else: + platform_name = 'x86_64-linux-gnu' soabi += '-' + platform_name - # xxx used to also include platform.machine(), but this is wrong - # (might get AMD64 on a 32-bit python) and it is the source of a - # importlib bug if we get uppercase characters from there... - - if platform_name == 'linux': - soabi += '-gnu' - if sys.maxsize == (2**31 - 1) and platform.machine() == 'x86_64': - soabi += 'x32' result = '.' + soabi + SO assert result == result.lower() # this is an implicit requirement of importlib on Windows! From pypy.commits at gmail.com Wed Dec 20 08:32:13 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 05:32:13 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix test Message-ID: <5a3a665d.3799df0a.43a1f.86c3@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93514:1dd2914caff4 Date: 2017-12-20 14:31 +0100 http://bitbucket.org/pypy/pypy/changeset/1dd2914caff4/ Log: fix test diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -769,9 +769,9 @@ class TestAbi: def test_abi_tag(self): - space1 = maketestobjspace(make_config(None, soabi='TEST')) + space1 = maketestobjspace(make_config(None, soabi='footest')) space2 = maketestobjspace(make_config(None, soabi='')) - assert importing.get_so_extension(space1).startswith('.TEST') + assert importing.get_so_extension(space1).startswith('.footest') if sys.platform == 'win32': assert importing.get_so_extension(space2) == '.pyd' else: From pypy.commits at gmail.com Wed Dec 20 08:35:58 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 20 Dec 2017 05:35:58 -0800 (PST) Subject: [pypy-commit] pypy py3.5: listxattr returns a list of str, not of bytes Message-ID: <5a3a673e.5488df0a.e1f77.2268@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93515:def83ef09f31 Date: 2017-12-20 13:35 +0000 http://bitbucket.org/pypy/pypy/changeset/def83ef09f31/ Log: listxattr returns a list of str, not of bytes diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2389,7 +2389,8 @@ result = rposix.listxattr(path.as_bytes, follow_symlinks) except OSError as e: raise wrap_oserror(space, e, eintr_retry=False) - return space.newlist([space.newbytes(attr) for attr in result]) + return space.newlist([ + space.fsdecode(space.newbytes(attr)) for attr in result]) have_functions = [] diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -1451,10 +1451,10 @@ raises(OSError, os.getxattr, self.path, 'user.test') os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE, follow_symlinks=False) assert os.getxattr(self.path, 'user.test') == b'' - os.setxattr(self.path, 'user.test', b'foo', os.XATTR_REPLACE) + os.setxattr(self.path, b'user.test', b'foo', os.XATTR_REPLACE) assert os.getxattr(self.path, 'user.test', follow_symlinks=False) == b'foo' assert set(os.listxattr(self.path)) == set( - init_names + [b'user.test']) + init_names + ['user.test']) os.removexattr(self.path, 'user.test', follow_symlinks=False) raises(OSError, os.getxattr, self.path, 'user.test') assert os.listxattr(self.path, follow_symlinks=False) == init_names From pypy.commits at gmail.com Wed Dec 20 08:48:41 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 05:48:41 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Still use machine.platform() to know if we're on a x86 or not at all Message-ID: <5a3a6a39.a490df0a.3202d.d31b@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93516:934eabf72b5c Date: 2017-12-20 14:48 +0100 http://bitbucket.org/pypy/pypy/changeset/934eabf72b5c/ Log: Still use machine.platform() to know if we're on a x86 or not at all diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -2,7 +2,7 @@ Implementation of the interpreter-level default import logic. """ -import sys, os, stat +import sys, os, stat, re, platform from pypy.interpreter.module import Module, init_extra_module_attrs from pypy.interpreter.gateway import interp2app, unwrap_spec @@ -45,14 +45,17 @@ platform_name = sys.platform if platform_name.startswith('linux'): - if sys.maxsize < 2**32: - platform_name = 'i686-linux-gnu' - # xxx should detect if we are inside 'x32', but not for now - # because it's not supported anyway by PyPy. (Using - # platform.machine() does not work, it may return x86_64 - # anyway) + if re.match('(i[3-6]86|x86_64)$', platform.machine()): + if sys.maxsize < 2**32: + platform_name = 'i686-linux-gnu' + # xxx should detect if we are inside 'x32', but not for now + # because it's not supported anyway by PyPy. (Relying + # on platform.machine() does not work, it may return x86_64 + # anyway) + else: + platform_name = 'x86_64-linux-gnu' else: - platform_name = 'x86_64-linux-gnu' + platform_name = 'linux-gnu' soabi += '-' + platform_name From pypy.commits at gmail.com Wed Dec 20 09:01:49 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 20 Dec 2017 06:01:49 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix error reporting from posix.*xattr functions Message-ID: <5a3a6d4d.139ddf0a.b8267.5d67@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93517:0c9668c522b1 Date: 2017-12-20 14:01 +0000 http://bitbucket.org/pypy/pypy/changeset/0c9668c522b1/ Log: Fix error reporting from posix.*xattr functions diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2301,13 +2301,13 @@ try: result = rposix.fgetxattr(path.as_fd, attribute.as_bytes) except OSError as e: - raise wrap_oserror(space, e, eintr_retry=False) + raise wrap_oserror(space, e, path.as_bytes) else: try: result = rposix.getxattr(path.as_bytes, attribute.as_bytes, follow_symlinks=follow_symlinks) except OSError as e: - raise wrap_oserror(space, e, eintr_retry=False) + raise wrap_oserror(space, e, path.as_bytes) return space.newbytes(result) @unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), @@ -2330,13 +2330,13 @@ try: rposix.fsetxattr(path.as_fd, attribute.as_bytes, value) except OSError as e: - raise wrap_oserror(space, e, eintr_retry=False) + raise wrap_oserror(space, e, path.as_bytes) else: try: rposix.setxattr(path.as_bytes, attribute.as_bytes, value, follow_symlinks=follow_symlinks) except OSError as e: - raise wrap_oserror(space, e, eintr_retry=False) + raise wrap_oserror(space, e, path.as_bytes) @unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), @@ -2356,13 +2356,13 @@ try: rposix.fremovexattr(path.as_fd, attribute.as_bytes) except OSError as e: - raise wrap_oserror(space, e, eintr_retry=False) + raise wrap_oserror(space, e, path.as_bytes) else: try: rposix.removexattr(path.as_bytes, attribute.as_bytes, follow_symlinks=follow_symlinks) except OSError as e: - raise wrap_oserror(space, e, eintr_retry=False) + raise wrap_oserror(space, e, path.as_bytes) @unwrap_spec(path=path_or_fd(), follow_symlinks=bool) @@ -2388,9 +2388,8 @@ try: result = rposix.listxattr(path.as_bytes, follow_symlinks) except OSError as e: - raise wrap_oserror(space, e, eintr_retry=False) - return space.newlist([ - space.fsdecode(space.newbytes(attr)) for attr in result]) + raise wrap_oserror(space, e, path.as_bytes) + return space.newlist([space.newfilename(attr) for attr in result]) have_functions = [] diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -1448,7 +1448,8 @@ with open(self.path, 'wb'): pass init_names = os.listxattr(self.path) - raises(OSError, os.getxattr, self.path, 'user.test') + excinfo = raises(OSError, os.getxattr, self.path, 'user.test') + assert excinfo.value.filename == self.path os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE, follow_symlinks=False) assert os.getxattr(self.path, 'user.test') == b'' os.setxattr(self.path, b'user.test', b'foo', os.XATTR_REPLACE) From pypy.commits at gmail.com Wed Dec 20 09:31:36 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 20 Dec 2017 06:31:36 -0800 (PST) Subject: [pypy-commit] pypy py3.5: document branch Message-ID: <5a3a7448.cd4a1c0a.7e0b4.083f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93518:e5552dd110c1 Date: 2017-12-20 14:30 +0000 http://bitbucket.org/pypy/pypy/changeset/e5552dd110c1/ Log: document branch diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -16,3 +16,6 @@ Download and patch dependencies when building cffi-based stdlib modules .. branch: os_lockf + +.. branch: py3.5-xattr +Add posix.*attr() functions From pypy.commits at gmail.com Wed Dec 20 11:03:40 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 08:03:40 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix for test_package. This might fix the next issue for "pypy3 -m ensurepip". Message-ID: <5a3a89dc.02c7df0a.21a25.d93c@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93519:5a865516f947 Date: 2017-12-20 17:01 +0100 http://bitbucket.org/pypy/pypy/changeset/5a865516f947/ Log: Fix for test_package. This might fix the next issue for "pypy3 -m ensurepip". diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py --- a/pypy/module/zipimport/interp_zipimport.py +++ b/pypy/module/zipimport/interp_zipimport.py @@ -252,6 +252,8 @@ gets in code_object.co_filename. Something like 'myfile.zip/mymodule.py' """ + if ZIPSEP != os.path.sep: + filename = filename.replace(ZIPSEP, os.path.sep) return self.filename + os.path.sep + filename def load_module(self, space, w_fullname): From pypy.commits at gmail.com Wed Dec 20 11:03:42 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 08:03:42 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge heads Message-ID: <5a3a89de.59451c0a.523e.e933@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93520:d8113c2982af Date: 2017-12-20 17:03 +0100 http://bitbucket.org/pypy/pypy/changeset/d8113c2982af/ Log: merge heads diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py --- a/pypy/module/zipimport/interp_zipimport.py +++ b/pypy/module/zipimport/interp_zipimport.py @@ -252,6 +252,8 @@ gets in code_object.co_filename. Something like 'myfile.zip/mymodule.py' """ + if ZIPSEP != os.path.sep: + filename = filename.replace(ZIPSEP, os.path.sep) return self.filename + os.path.sep + filename def load_module(self, space, w_fullname): From pypy.commits at gmail.com Wed Dec 20 11:44:00 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 20 Dec 2017 08:44:00 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Test and fix (for lib-python/3/pathlib): 'os.symlink' must exist, even Message-ID: <5a3a9350.ad88df0a.d0ab.8b4b@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93521:50aa133f3ee9 Date: 2017-12-20 17:36 +0100 http://bitbucket.org/pypy/pypy/changeset/50aa133f3ee9/ Log: Test and fix (for lib-python/3/pathlib): 'os.symlink' must exist, even if it can raise NotImplementedError on Windows diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -80,6 +80,7 @@ 'urandom': 'interp_posix.urandom', 'device_encoding': 'interp_posix.device_encoding', 'get_terminal_size': 'interp_posix.get_terminal_size', + 'symlink': 'interp_posix.symlink', 'scandir': 'interp_scandir.scandir', 'get_inheritable': 'interp_posix.get_inheritable', @@ -111,8 +112,6 @@ interpleveldefs['killpg'] = 'interp_posix.killpg' if hasattr(os, 'getpid'): interpleveldefs['getpid'] = 'interp_posix.getpid' - if hasattr(os, 'symlink'): - interpleveldefs['symlink'] = 'interp_posix.symlink' if hasattr(os, 'readlink'): interpleveldefs['readlink'] = 'interp_posix.readlink' if hasattr(os, 'fork'): diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -1273,6 +1273,9 @@ and path should be relative; path will then be relative to that directory. dir_fd may not be implemented on your platform. If it is unavailable, using it will raise a NotImplementedError.""" + if _WIN32: + raise oefmt(space.w_NotImplementedError, + "symlink() is not implemented for PyPy on Windows") try: if rposix.HAVE_SYMLINKAT and dir_fd != DEFAULT_DIR_FD: src = space.fsencode_w(w_src) diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -1157,7 +1157,7 @@ expected = min(myprio + 3, 19) assert os.WEXITSTATUS(status1) == expected - if hasattr(os, 'symlink'): + if sys.platform != 'win32': def test_symlink(self): posix = self.posix bytes_dir = self.bytes_dir @@ -1187,6 +1187,10 @@ finally: posix.close(f) posix.unlink(bytes_dir + '/somelink'.encode()) + else: + def test_symlink(self): + posix = self.posix + raises(NotImplementedError, posix.symlink, 'a', 'b') if hasattr(os, 'ftruncate'): def test_truncate(self): From pypy.commits at gmail.com Wed Dec 20 11:56:52 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 20 Dec 2017 08:56:52 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Cache the wrapped code.co_filename: space.newfilename() is expensive Message-ID: <5a3a9654.a490df0a.3202d.eeba@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93522:74f1be327b1d Date: 2017-12-20 16:56 +0000 http://bitbucket.org/pypy/pypy/changeset/74f1be327b1d/ Log: Cache the wrapped code.co_filename: space.newfilename() is expensive diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -4,7 +4,7 @@ The bytecode interpreter itself is implemented by the PyFrame class. """ -import imp, struct, types, new, sys, os +import imp, struct, types, sys, os from pypy.interpreter import eval from pypy.interpreter.signature import Signature @@ -80,7 +80,7 @@ class PyCode(eval.Code): "CPython-style code objects." _immutable_fields_ = ["_signature", "co_argcount", "co_kwonlyargcount", "co_cellvars[*]", - "co_code", "co_consts_w[*]", "co_filename", + "co_code", "co_consts_w[*]", "co_filename", "w_filename", "co_firstlineno", "co_flags", "co_freevars[*]", "co_lnotab", "co_names_w[*]", "co_nlocals", "co_stacksize", "co_varnames[*]", @@ -111,6 +111,7 @@ assert isinstance(filename, str) rstring.check_str0(filename) self.co_filename = filename + self.w_filename = space.newfilename(filename) self.co_name = name self.co_firstlineno = firstlineno self.co_lnotab = lnotab @@ -203,6 +204,7 @@ if lastdirname: basename = '%s/%s' % (lastdirname, basename) self.co_filename = '/%s' % (basename,) + self.w_filename = space.newfilename(self.co_filename) co_names = property(lambda self: [self.space.str_w(w_name) for w_name in self.co_names_w]) # for trace @@ -427,7 +429,7 @@ space.newtuple(self.co_consts_w), space.newtuple(self.co_names_w), space.newtuple([space.newtext(v) for v in self.co_varnames]), - space.newtext(self.co_filename), + self.w_filename, space.newtext(self.co_name), space.newint(self.co_firstlineno), space.newbytes(self.co_lnotab), @@ -451,7 +453,7 @@ space = self.space # co_name should be an identifier name = self.co_name.decode('utf-8') - fn = space.fsdecode_w(space.newbytes(self.co_filename)) + fn = space.unicode_w(self.w_filename) return space.newunicode(u'' % ( name, unicode(self.getaddrstring(space)), fn, -1 if self.co_firstlineno == 0 else self.co_firstlineno)) diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py --- a/pypy/interpreter/typedef.py +++ b/pypy/interpreter/typedef.py @@ -625,7 +625,7 @@ co_varnames = GetSetProperty(PyCode.fget_co_varnames), co_freevars = GetSetProperty(PyCode.fget_co_freevars), co_cellvars = GetSetProperty(PyCode.fget_co_cellvars), - co_filename = interp_attrproperty('co_filename', cls=PyCode, wrapfn="newfilename"), + co_filename = interp_attrproperty_w('w_filename', cls=PyCode), co_name = interp_attrproperty('co_name', cls=PyCode, wrapfn="newtext"), co_firstlineno = interp_attrproperty('co_firstlineno', cls=PyCode, wrapfn="newint"), co_lnotab = interp_attrproperty('co_lnotab', cls=PyCode, wrapfn="newbytes"), diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py --- a/pypy/module/cpyext/funcobject.py +++ b/pypy/module/cpyext/funcobject.py @@ -70,7 +70,7 @@ py_code = rffi.cast(PyCodeObject, py_obj) assert isinstance(w_obj, PyCode) py_code.c_co_name = make_ref(space, space.newtext(w_obj.co_name)) - py_code.c_co_filename = make_ref(space, space.newtext(w_obj.co_filename)) + py_code.c_co_filename = make_ref(space, w_obj.w_filename) co_flags = 0 for name, value in ALL_CODE_FLAGS: if w_obj.co_flags & getattr(pycode, name): diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -259,7 +259,7 @@ if pathname is not None: w_pathname = get_sourcefile(space, pathname) else: - w_pathname = space.newfilename(code_w.co_filename) + w_pathname = code_w.w_filename if cpathname is not None: w_cpathname = space.newfilename(cpathname) else: @@ -353,6 +353,7 @@ return code_w.co_filename = pathname + code_w.w_filename = space.newfilename(pathname) constants = code_w.co_consts_w for const in constants: if const is not None and isinstance(const, PyCode): From pypy.commits at gmail.com Wed Dec 20 15:10:07 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 20 Dec 2017 12:10:07 -0800 (PST) Subject: [pypy-commit] pypy cpyext-avoid-roundtrip: quiet warnings Message-ID: <5a3ac39f.a490df0a.3202d.0fc4@mx.google.com> Author: Matti Picus Branch: cpyext-avoid-roundtrip Changeset: r93523:5db9d141c474 Date: 2017-12-20 21:42 +0200 http://bitbucket.org/pypy/pypy/changeset/5db9d141c474/ Log: quiet warnings diff --git a/pypy/module/cpyext/src/object.c b/pypy/module/cpyext/src/object.c --- a/pypy/module/cpyext/src/object.c +++ b/pypy/module/cpyext/src/object.c @@ -120,7 +120,7 @@ return (PyVarObject*)PyErr_NoMemory(); if (type->tp_itemsize == 0) - return PyObject_INIT(py_obj, type); + return (PyVarObject*)PyObject_INIT(py_obj, type); else return PyObject_INIT_VAR((PyVarObject*)py_obj, type, nitems); } @@ -138,5 +138,5 @@ PyObject_InitVar(PyVarObject *obj, PyTypeObject *type, Py_ssize_t size) { obj->ob_size = size; - return PyObject_Init((PyObject*)obj, type); + return (PyVarObject*)PyObject_Init((PyObject*)obj, type); } From pypy.commits at gmail.com Wed Dec 20 15:10:09 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 20 Dec 2017 12:10:09 -0800 (PST) Subject: [pypy-commit] pypy cpyext-avoid-roundtrip: test, fix PySequence_GetItem allows negative indices Message-ID: <5a3ac3a1.c4141c0a.5696d.29e8@mx.google.com> Author: Matti Picus Branch: cpyext-avoid-roundtrip Changeset: r93524:f428c66de212 Date: 2017-12-20 22:09 +0200 http://bitbucket.org/pypy/pypy/changeset/f428c66de212/ Log: test, fix PySequence_GetItem allows negative indices diff --git a/pypy/module/cpyext/sequence.py b/pypy/module/cpyext/sequence.py --- a/pypy/module/cpyext/sequence.py +++ b/pypy/module/cpyext/sequence.py @@ -165,6 +165,9 @@ def PySequence_GetItem(space, w_obj, i): """Return the ith element of o, or NULL on failure. This is the equivalent of the Python expression o[i].""" + if i < 0: + l = PySequence_Length(space, w_obj) + i += l return PySequence_ITEM(space, w_obj, i) @cpython_api([PyObject], PyObject) diff --git a/pypy/module/cpyext/test/test_sequence.py b/pypy/module/cpyext/test/test_sequence.py --- a/pypy/module/cpyext/test/test_sequence.py +++ b/pypy/module/cpyext/test/test_sequence.py @@ -184,6 +184,9 @@ p2 = api.PySequence_GetItem(w1, 1) assert p1 == p2 assert p1.c_ob_refcnt > 1 + p1 = api.PySequence_GetItem(w1, -1) + p2 = api.PySequence_GetItem(w1, 2) + assert p1 == p2 class AppTestSetObject(AppTestCpythonExtensionBase): From pypy.commits at gmail.com Wed Dec 20 17:02:36 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 20 Dec 2017 14:02:36 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix 'errors' arg in PyUnicode_Decode() Message-ID: <5a3addfc.d4e31c0a.77416.0d87@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93525:9ae4bd0c4555 Date: 2017-12-20 22:01 +0000 http://bitbucket.org/pypy/pypy/changeset/9ae4bd0c4555/ Log: Fix 'errors' arg in PyUnicode_Decode() diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -631,8 +631,9 @@ def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unicode_w( - PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' + b_errors = rffi.str2charp('strict') + assert space.unicode_w(PyUnicode_Decode( + space, b_text, 4, b_encoding, b_errors)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.newbytes("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -492,7 +492,7 @@ w_str = space.newbytes(rffi.charpsize2str(s, size)) w_encoding = space.newtext(rffi.charp2str(encoding)) if errors: - w_errors = space.newbytes(rffi.charp2str(errors)) + w_errors = space.newtext(rffi.charp2str(errors)) else: w_errors = None return space.call_method(w_str, 'decode', w_encoding, w_errors) From pypy.commits at gmail.com Thu Dec 21 00:17:27 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 20 Dec 2017 21:17:27 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix translation? Message-ID: <5a3b43e7.5387df0a.11b78.ce73@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93526:0fda157e0d0e Date: 2017-12-21 07:15 +0200 http://bitbucket.org/pypy/pypy/changeset/0fda157e0d0e/ Log: fix translation? diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -204,7 +204,7 @@ if lastdirname: basename = '%s/%s' % (lastdirname, basename) self.co_filename = '/%s' % (basename,) - self.w_filename = space.newfilename(self.co_filename) + self.w_filename = self.space.newfilename(self.co_filename) co_names = property(lambda self: [self.space.str_w(w_name) for w_name in self.co_names_w]) # for trace From pypy.commits at gmail.com Thu Dec 21 00:22:47 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 20 Dec 2017 21:22:47 -0800 (PST) Subject: [pypy-commit] pypy default: exclude win32 Message-ID: <5a3b4527.41afdf0a.5f3b0.2ca8@mx.google.com> Author: Matti Picus Branch: Changeset: r93527:0b47a415bba3 Date: 2017-12-21 07:22 +0200 http://bitbucket.org/pypy/pypy/changeset/0b47a415bba3/ Log: exclude win32 diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -17,6 +17,8 @@ # vmprof works only on x86 for now IS_SUPPORTED = detect_cpu.autodetect().startswith('x86') +if sys.platform == 'win32': + IS_SUPPORTED = False ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') SRC = ROOT.join('src') From pypy.commits at gmail.com Thu Dec 21 04:05:11 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 21 Dec 2017 01:05:11 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: merge py3.5 Message-ID: <5a3b7947.ccc3df0a.a0129.d9f0@mx.google.com> Author: fijal Branch: release-pypy3.5-v5.9.x Changeset: r93528:067f612e97ae Date: 2017-12-21 11:04 +0200 http://bitbucket.org/pypy/pypy/changeset/067f612e97ae/ Log: merge py3.5 diff --git a/lib-python/3/ssl.py b/lib-python/3/ssl.py --- a/lib-python/3/ssl.py +++ b/lib-python/3/ssl.py @@ -140,6 +140,23 @@ except NameError: _SSLv2_IF_EXISTS = None + + + +import os +class DirEntry: + def __init__(self, path, name): + self.path = os.path.join(path, name) + self.name = name + def is_dir(self): + return os.path.isdir(self.path) +def myscandir(path='.'): + for name in os.listdir(path): + yield DirEntry(path, name) +os.scandir = myscandir + + + if sys.platform == "win32": from _ssl import enum_certificates, enum_crls diff --git a/lib-python/3/subprocess.py b/lib-python/3/subprocess.py --- a/lib-python/3/subprocess.py +++ b/lib-python/3/subprocess.py @@ -1560,7 +1560,7 @@ 'copyfile' in caller.f_globals): dest_dir = sys.pypy_resolvedirof(target_executable) src_dir = sys.pypy_resolvedirof(sys.executable) - for libname in ['libpypy3-c.so', 'libpypy3-c.dylib']: + for libname in ['libpypy3-c.so', 'libpypy3-c.dylib', 'libpypy3-c.dll']: dest_library = os.path.join(dest_dir, libname) src_library = os.path.join(src_dir, libname) if os.path.exists(src_library): diff --git a/lib_pypy/_cffi_ssl/_stdssl/__init__.py b/lib_pypy/_cffi_ssl/_stdssl/__init__.py --- a/lib_pypy/_cffi_ssl/_stdssl/__init__.py +++ b/lib_pypy/_cffi_ssl/_stdssl/__init__.py @@ -20,9 +20,15 @@ SSL_ERROR_EOF, SSL_ERROR_NO_SOCKET, SSL_ERROR_INVALID_ERROR_CODE, pyerr_write_unraisable) from _cffi_ssl._stdssl import error -from select import poll, POLLIN, POLLOUT, select +from select import select from enum import IntEnum as _IntEnum +if sys.platform == 'win32': + HAVE_POLL = False +else: + from select import poll, POLLIN, POLLOUT + HAVE_POLL = True + OPENSSL_VERSION = ffi.string(lib.OPENSSL_VERSION_TEXT).decode('utf-8') OPENSSL_VERSION_NUMBER = lib.OPENSSL_VERSION_NUMBER ver = OPENSSL_VERSION_NUMBER @@ -158,8 +164,6 @@ def _monotonic_clock(): return time.clock_gettime(time.CLOCK_MONOTONIC) -HAVE_POLL = True - def _ssl_select(sock, writing, timeout): if HAVE_POLL: p = poll() diff --git a/lib_pypy/_ssl/__init__.py b/lib_pypy/_ssl/__init__.py --- a/lib_pypy/_ssl/__init__.py +++ b/lib_pypy/_ssl/__init__.py @@ -14,3 +14,14 @@ # RAND_egd is optional and might not be available on e.g. libressl if hasattr(_stdssl, 'RAND_egd'): RAND_egd = builtinify(RAND_egd) + +import sys +if sys.platform == "win32" and 'enum_certificates' not in globals(): + def enum_certificates(*args, **kwds): + import warnings + warnings.warn("ssl.enum_certificates() is not implemented") + return [] + def enum_crls(*args, **kwds): + import warnings + warnings.warn("ssl.enum_crls() is not implemented") + return [] diff --git a/lib_pypy/faulthandler.py b/lib_pypy/faulthandler.py new file mode 100644 --- /dev/null +++ b/lib_pypy/faulthandler.py @@ -0,0 +1,3 @@ +# This is only imported for platforms where the built-in faulthandler module is not +# available. It provides no function at all so far, but it is enough to start the +# CPython test suite. \ No newline at end of file diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -16,3 +16,6 @@ Download and patch dependencies when building cffi-based stdlib modules .. branch: os_lockf + +.. branch: py3.5-xattr +Add posix.*attr() functions diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -4,7 +4,7 @@ The bytecode interpreter itself is implemented by the PyFrame class. """ -import imp, struct, types, new, sys, os +import imp, struct, types, sys, os from pypy.interpreter import eval from pypy.interpreter.signature import Signature @@ -80,7 +80,7 @@ class PyCode(eval.Code): "CPython-style code objects." _immutable_fields_ = ["_signature", "co_argcount", "co_kwonlyargcount", "co_cellvars[*]", - "co_code", "co_consts_w[*]", "co_filename", + "co_code", "co_consts_w[*]", "co_filename", "w_filename", "co_firstlineno", "co_flags", "co_freevars[*]", "co_lnotab", "co_names_w[*]", "co_nlocals", "co_stacksize", "co_varnames[*]", @@ -111,6 +111,7 @@ assert isinstance(filename, str) rstring.check_str0(filename) self.co_filename = filename + self.w_filename = space.newfilename(filename) self.co_name = name self.co_firstlineno = firstlineno self.co_lnotab = lnotab @@ -203,6 +204,7 @@ if lastdirname: basename = '%s/%s' % (lastdirname, basename) self.co_filename = '/%s' % (basename,) + self.w_filename = self.space.newfilename(self.co_filename) co_names = property(lambda self: [self.space.str_w(w_name) for w_name in self.co_names_w]) # for trace @@ -427,7 +429,7 @@ space.newtuple(self.co_consts_w), space.newtuple(self.co_names_w), space.newtuple([space.newtext(v) for v in self.co_varnames]), - space.newtext(self.co_filename), + self.w_filename, space.newtext(self.co_name), space.newint(self.co_firstlineno), space.newbytes(self.co_lnotab), @@ -451,7 +453,7 @@ space = self.space # co_name should be an identifier name = self.co_name.decode('utf-8') - fn = space.fsdecode_w(space.newbytes(self.co_filename)) + fn = space.unicode_w(self.w_filename) return space.newunicode(u'' % ( name, unicode(self.getaddrstring(space)), fn, -1 if self.co_firstlineno == 0 else self.co_firstlineno)) diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py --- a/pypy/interpreter/typedef.py +++ b/pypy/interpreter/typedef.py @@ -625,7 +625,7 @@ co_varnames = GetSetProperty(PyCode.fget_co_varnames), co_freevars = GetSetProperty(PyCode.fget_co_freevars), co_cellvars = GetSetProperty(PyCode.fget_co_cellvars), - co_filename = interp_attrproperty('co_filename', cls=PyCode, wrapfn="newfilename"), + co_filename = interp_attrproperty_w('w_filename', cls=PyCode), co_name = interp_attrproperty('co_name', cls=PyCode, wrapfn="newtext"), co_firstlineno = interp_attrproperty('co_firstlineno', cls=PyCode, wrapfn="newint"), co_lnotab = interp_attrproperty('co_lnotab', cls=PyCode, wrapfn="newbytes"), diff --git a/pypy/module/_io/interp_fileio.py b/pypy/module/_io/interp_fileio.py --- a/pypy/module/_io/interp_fileio.py +++ b/pypy/module/_io/interp_fileio.py @@ -8,6 +8,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib import rposix from rpython.rlib.rposix_stat import STAT_FIELD_TYPES +from rpython.rlib.streamio import _setfd_binary from rpython.rtyper.lltypesystem import lltype, rffi from os import O_RDONLY, O_WRONLY, O_RDWR, O_CREAT, O_TRUNC, O_EXCL import sys, os, stat, errno @@ -239,6 +240,8 @@ if HAS_BLKSIZE and st.st_blksize > 1: self.blksize = st.st_blksize + _setfd_binary(self.fd) + space.setattr(self, space.newtext("name"), w_name) if self.appending: diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py --- a/pypy/module/cpyext/funcobject.py +++ b/pypy/module/cpyext/funcobject.py @@ -70,7 +70,7 @@ py_code = rffi.cast(PyCodeObject, py_obj) assert isinstance(w_obj, PyCode) py_code.c_co_name = make_ref(space, space.newtext(w_obj.co_name)) - py_code.c_co_filename = make_ref(space, space.newtext(w_obj.co_filename)) + py_code.c_co_filename = make_ref(space, w_obj.w_filename) co_flags = 0 for name, value in ALL_CODE_FLAGS: if w_obj.co_flags & getattr(pycode, name): diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -631,8 +631,9 @@ def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unicode_w( - PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' + b_errors = rffi.str2charp('strict') + assert space.unicode_w(PyUnicode_Decode( + space, b_text, 4, b_encoding, b_errors)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.newbytes("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -492,7 +492,7 @@ w_str = space.newbytes(rffi.charpsize2str(s, size)) w_encoding = space.newtext(rffi.charp2str(encoding)) if errors: - w_errors = space.newbytes(rffi.charp2str(errors)) + w_errors = space.newtext(rffi.charp2str(errors)) else: w_errors = None return space.call_method(w_str, 'decode', w_encoding, w_errors) diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -2,7 +2,7 @@ Implementation of the interpreter-level default import logic. """ -import sys, os, stat, platform +import sys, os, stat, re, platform from pypy.interpreter.module import Module, init_extra_module_attrs from pypy.interpreter.gateway import interp2app, unwrap_spec @@ -44,18 +44,20 @@ soabi += 'i' platform_name = sys.platform - if platform_name == 'linux2': - platform_name = 'linux' + if platform_name.startswith('linux'): + if re.match('(i[3-6]86|x86_64)$', platform.machine()): + if sys.maxsize < 2**32: + platform_name = 'i686-linux-gnu' + # xxx should detect if we are inside 'x32', but not for now + # because it's not supported anyway by PyPy. (Relying + # on platform.machine() does not work, it may return x86_64 + # anyway) + else: + platform_name = 'x86_64-linux-gnu' + else: + platform_name = 'linux-gnu' soabi += '-' + platform_name - # xxx used to also include platform.machine(), but this is wrong - # (might get AMD64 on a 32-bit python) and it is the source of a - # importlib bug if we get uppercase characters from there... - - if platform_name == 'linux': - soabi += '-gnu' - if sys.maxsize == (2**31 - 1) and platform.machine() == 'x86_64': - soabi += 'x32' result = '.' + soabi + SO assert result == result.lower() # this is an implicit requirement of importlib on Windows! @@ -257,7 +259,7 @@ if pathname is not None: w_pathname = get_sourcefile(space, pathname) else: - w_pathname = space.newfilename(code_w.co_filename) + w_pathname = code_w.w_filename if cpathname is not None: w_cpathname = space.newfilename(cpathname) else: @@ -351,6 +353,7 @@ return code_w.co_filename = pathname + code_w.w_filename = space.newfilename(pathname) constants = code_w.co_consts_w for const in constants: if const is not None and isinstance(const, PyCode): diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -769,9 +769,9 @@ class TestAbi: def test_abi_tag(self): - space1 = maketestobjspace(make_config(None, soabi='TEST')) + space1 = maketestobjspace(make_config(None, soabi='footest')) space2 = maketestobjspace(make_config(None, soabi='')) - assert importing.get_so_extension(space1).startswith('.TEST') + assert importing.get_so_extension(space1).startswith('.footest') if sys.platform == 'win32': assert importing.get_so_extension(space2) == '.pyd' else: diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -80,6 +80,7 @@ 'urandom': 'interp_posix.urandom', 'device_encoding': 'interp_posix.device_encoding', 'get_terminal_size': 'interp_posix.get_terminal_size', + 'symlink': 'interp_posix.symlink', 'scandir': 'interp_scandir.scandir', 'get_inheritable': 'interp_posix.get_inheritable', @@ -111,8 +112,6 @@ interpleveldefs['killpg'] = 'interp_posix.killpg' if hasattr(os, 'getpid'): interpleveldefs['getpid'] = 'interp_posix.getpid' - if hasattr(os, 'symlink'): - interpleveldefs['symlink'] = 'interp_posix.symlink' if hasattr(os, 'readlink'): interpleveldefs['readlink'] = 'interp_posix.readlink' if hasattr(os, 'fork'): @@ -229,7 +228,7 @@ 'POSIX_FADV_RANDOM', 'POSIX_FADV_NOREUSE', 'POSIX_FADV_DONTNEED']: assert getattr(rposix, _name) is not None, "missing %r" % (_name,) interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) - + if hasattr(rposix, 'sched_get_priority_max'): interpleveldefs['sched_get_priority_max'] = 'interp_posix.sched_get_priority_max' interpleveldefs['sched_get_priority_min'] = 'interp_posix.sched_get_priority_min' @@ -246,11 +245,21 @@ if hasattr(rposix, 'sched_yield'): interpleveldefs['sched_yield'] = 'interp_posix.sched_yield' - + for _name in ["O_CLOEXEC"]: if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + if hasattr(rposix, 'getxattr'): + interpleveldefs['getxattr'] = 'interp_posix.getxattr' + interpleveldefs['setxattr'] = 'interp_posix.setxattr' + interpleveldefs['removexattr'] = 'interp_posix.removexattr' + interpleveldefs['listxattr'] = 'interp_posix.listxattr' + for _name in ['XATTR_SIZE_MAX', 'XATTR_CREATE', 'XATTR_REPLACE']: + if getattr(rposix, _name) is not None: + interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + + def startup(self, space): from pypy.module.posix import interp_posix from pypy.module.imp import importing diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -122,7 +122,7 @@ else: path_b = path.as_bytes assert path_b is not None - return func(path.as_bytes, *args) + return func(path_b, *args) class Path(object): @@ -1273,6 +1273,9 @@ and path should be relative; path will then be relative to that directory. dir_fd may not be implemented on your platform. If it is unavailable, using it will raise a NotImplementedError.""" + if _WIN32: + raise oefmt(space.w_NotImplementedError, + "symlink() is not implemented for PyPy on Windows") try: if rposix.HAVE_SYMLINKAT and dir_fd != DEFAULT_DIR_FD: src = space.fsencode_w(w_src) @@ -2283,7 +2286,9 @@ This function will not follow symbolic links. Equivalent to chflags(path, flags, follow_symlinks=False).""" -def getxattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + follow_symlinks=bool) +def getxattr(space, path, attribute, __kwonly__, follow_symlinks=True): """getxattr(path, attribute, *, follow_symlinks=True) -> value Return the value of extended attribute attribute on path. @@ -2292,8 +2297,27 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, getxattr will examine the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "getxattr: cannot use fd and follow_symlinks together") + try: + result = rposix.fgetxattr(path.as_fd, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, path.as_bytes) + else: + try: + result = rposix.getxattr(path.as_bytes, attribute.as_bytes, + follow_symlinks=follow_symlinks) + except OSError as e: + raise wrap_oserror(space, e, path.as_bytes) + return space.newbytes(result) -def setxattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + flags=c_int, + follow_symlinks=bool) +def setxattr(space, path, attribute, w_value, flags=0, + __kwonly__=None, follow_symlinks=True): """setxattr(path, attribute, value, flags=0, *, follow_symlinks=True) Set extended attribute attribute on path to value. @@ -2301,9 +2325,26 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, setxattr will modify the symbolic link itself instead of the file the link points to.""" + value = space.charbuf_w(w_value) + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "setxattr: cannot use fd and follow_symlinks together") + try: + rposix.fsetxattr(path.as_fd, attribute.as_bytes, value) + except OSError as e: + raise wrap_oserror(space, e, path.as_bytes) + else: + try: + rposix.setxattr(path.as_bytes, attribute.as_bytes, value, + follow_symlinks=follow_symlinks) + except OSError as e: + raise wrap_oserror(space, e, path.as_bytes) -def removexattr(): + at unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), + follow_symlinks=bool) +def removexattr(space, path, attribute, __kwonly__, follow_symlinks=True): """removexattr(path, attribute, *, follow_symlinks=True) Remove extended attribute attribute on path. @@ -2311,8 +2352,24 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, removexattr will modify the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "removexattr: cannot use fd and follow_symlinks together") + try: + rposix.fremovexattr(path.as_fd, attribute.as_bytes) + except OSError as e: + raise wrap_oserror(space, e, path.as_bytes) + else: + try: + rposix.removexattr(path.as_bytes, attribute.as_bytes, + follow_symlinks=follow_symlinks) + except OSError as e: + raise wrap_oserror(space, e, path.as_bytes) -def listxattr(): + + at unwrap_spec(path=path_or_fd(), follow_symlinks=bool) +def listxattr(space, path, __kwonly__, follow_symlinks=True): """listxattr(path='.', *, follow_symlinks=True) Return a list of extended attributes on path. @@ -2322,6 +2379,20 @@ If follow_symlinks is False, and the last element of the path is a symbolic link, listxattr will examine the symbolic link itself instead of the file the link points to.""" + if path.as_fd != -1: + if not follow_symlinks: + raise oefmt(space.w_ValueError, + "listxattr: cannot use fd and follow_symlinks together") + try: + result = rposix.flistxattr(path.as_fd) + except OSError as e: + raise wrap_oserror(space, e, eintr_retry=False) + else: + try: + result = rposix.listxattr(path.as_bytes, follow_symlinks) + except OSError as e: + raise wrap_oserror(space, e, path.as_bytes) + return space.newlist([space.newfilename(attr) for attr in result]) have_functions = [] @@ -2449,8 +2520,8 @@ @unwrap_spec(policy=int) def sched_get_priority_max(space, policy): - """returns the maximum priority value that - can be used with the scheduling algorithm + """returns the maximum priority value that + can be used with the scheduling algorithm identified by policy """ while True: @@ -2464,7 +2535,7 @@ @unwrap_spec(policy=int) def sched_get_priority_min(space, policy): """returns the minimum priority value that - can be used with the scheduling algorithm + can be used with the scheduling algorithm identified by policy """ while True: @@ -2477,7 +2548,7 @@ @unwrap_spec(fd=c_int, cmd=c_int, length=r_longlong) def lockf(space, fd, cmd, length): - """apply, test or remove a POSIX lock on an + """apply, test or remove a POSIX lock on an open file. """ while True: diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -13,28 +13,39 @@ def scandir(space, w_path=None): "scandir(path='.') -> iterator of DirEntry objects for given path" - if _WIN32: - raise NotImplementedError("XXX WIN32") - if space.is_none(w_path): w_path = space.newunicode(u".") - if space.isinstance_w(w_path, space.w_bytes): - path_bytes = space.bytes0_w(w_path) - result_is_bytes = True + + if not _WIN32: + if space.isinstance_w(w_path, space.w_bytes): + path = space.bytes0_w(w_path) + result_is_bytes = True + else: + path = space.fsencode_w(w_path) + result_is_bytes = False else: - path_bytes = space.fsencode_w(w_path) + if space.isinstance_w(w_path, space.w_bytes): + raise oefmt(space.w_TypeError, "os.scandir() doesn't support bytes path" + " on Windows, use Unicode instead") + path = space.unicode_w(w_path) result_is_bytes = False + # 'path' is always bytes on posix and always unicode on windows try: - dirp = rposix_scandir.opendir(path_bytes) + dirp = rposix_scandir.opendir(path) except OSError as e: raise wrap_oserror2(space, e, w_path, eintr_retry=False) - path_prefix = path_bytes - if len(path_prefix) > 0 and path_prefix[-1] != '/': - path_prefix += '/' - w_path_prefix = space.newbytes(path_prefix) - if not result_is_bytes: - w_path_prefix = space.fsdecode(w_path_prefix) + path_prefix = path + if not _WIN32: + if len(path_prefix) > 0 and path_prefix[-1] != '/': + path_prefix += '/' + w_path_prefix = space.newbytes(path_prefix) + if not result_is_bytes: + w_path_prefix = space.fsdecode(w_path_prefix) + else: + if len(path_prefix) > 0 and path_prefix[-1] not in (u'\\', u'/', u':'): + path_prefix += u'\\' + w_path_prefix = space.newunicode(path_prefix) if rposix.HAVE_FSTATAT: dirfd = rposix.c_dirfd(dirp) else: @@ -89,10 +100,14 @@ eintr_retry=False)) if not entry: raise self.fail() - assert rposix_scandir.has_name_bytes(entry) - name = rposix_scandir.get_name_bytes(entry) - if name != '.' and name != '..': - break + if not _WIN32: + name = rposix_scandir.get_name_bytes(entry) + if name != '.' and name != '..': + break + else: + name = rposix_scandir.get_name_unicode(entry) + if name != u'.' and name != u'..': + break # known_type = rposix_scandir.get_known_type(entry) inode = rposix_scandir.get_inode(entry) @@ -113,12 +128,13 @@ class FileNotFound(Exception): pass -assert 0 <= rposix_scandir.DT_UNKNOWN <= 255 -assert 0 <= rposix_scandir.DT_REG <= 255 -assert 0 <= rposix_scandir.DT_DIR <= 255 -assert 0 <= rposix_scandir.DT_LNK <= 255 -FLAG_STAT = 256 -FLAG_LSTAT = 512 +if not _WIN32: + assert 0 <= rposix_scandir.DT_UNKNOWN <= 255 + assert 0 <= rposix_scandir.DT_REG <= 255 + assert 0 <= rposix_scandir.DT_DIR <= 255 + assert 0 <= rposix_scandir.DT_LNK <= 255 + FLAG_STAT = 256 + FLAG_LSTAT = 512 class W_DirEntry(W_Root): @@ -127,14 +143,17 @@ def __init__(self, scandir_iterator, name, known_type, inode): self.space = scandir_iterator.space self.scandir_iterator = scandir_iterator - self.name = name # always bytes on Posix + self.name = name # always bytes on Posix; always unicode on Windows self.inode = inode self.flags = known_type - assert known_type == (known_type & 255) # - w_name = self.space.newbytes(name) - if not scandir_iterator.result_is_bytes: - w_name = self.space.fsdecode(w_name) + if not _WIN32: + assert known_type == (known_type & 255) + w_name = self.space.newbytes(name) + if not scandir_iterator.result_is_bytes: + w_name = self.space.fsdecode(w_name) + else: + w_name = self.space.newunicode(name) self.w_name = w_name def descr_repr(self, space): @@ -156,93 +175,109 @@ # the end of the class. Every method only calls methods *before* # it in program order, so there is no cycle. - def get_lstat(self): - """Get the lstat() of the direntry.""" - if (self.flags & FLAG_LSTAT) == 0: - # Unlike CPython, try to use fstatat() if possible - dirfd = self.scandir_iterator.dirfd - if dirfd != -1 and rposix.HAVE_FSTATAT: - st = rposix_stat.fstatat(self.name, dirfd, - follow_symlinks=False) - else: - path = self.space.fsencode_w(self.fget_path(self.space)) - st = rposix_stat.lstat(path) - self.d_lstat = st - self.flags |= FLAG_LSTAT - return self.d_lstat - - def get_stat(self): - """Get the stat() of the direntry. This is implemented in - such a way that it won't do both a stat() and a lstat(). - """ - if (self.flags & FLAG_STAT) == 0: - # We don't have the 'd_stat'. If the known_type says the - # direntry is not a DT_LNK, then try to get and cache the - # 'd_lstat' instead. Then, or if we already have a - # 'd_lstat' from before, *and* if the 'd_lstat' is not a - # S_ISLNK, we can reuse it unchanged for 'd_stat'. - # - # Note how, in the common case where the known_type says - # it is a DT_REG or DT_DIR, then we call and cache lstat() - # and that's it. Also note that in a d_type-less OS or on - # a filesystem that always answer DT_UNKNOWN, this method - # will instead only call at most stat(), but not cache it - # as 'd_lstat'. - known_type = self.flags & 255 - if (known_type != rposix_scandir.DT_UNKNOWN and - known_type != rposix_scandir.DT_LNK): - self.get_lstat() # fill the 'd_lstat' cache - have_lstat = True - else: - have_lstat = (self.flags & FLAG_LSTAT) != 0 - - if have_lstat: - # We have the lstat() but not the stat(). They are - # the same, unless the 'd_lstat' is a S_IFLNK. - must_call_stat = stat.S_ISLNK(self.d_lstat.st_mode) - else: - must_call_stat = True - - if must_call_stat: - # Must call stat(). Try to use fstatat() if possible + if not _WIN32: + def get_lstat(self): + """Get the lstat() of the direntry.""" + if (self.flags & FLAG_LSTAT) == 0: + # Unlike CPython, try to use fstatat() if possible dirfd = self.scandir_iterator.dirfd - if dirfd != -1 and rposix.HAVE_FSTATAT: + if rposix.HAVE_FSTATAT and dirfd != -1: st = rposix_stat.fstatat(self.name, dirfd, - follow_symlinks=True) + follow_symlinks=False) else: path = self.space.fsencode_w(self.fget_path(self.space)) - st = rposix_stat.stat(path) + st = rposix_stat.lstat(path) + self.d_lstat = st + self.flags |= FLAG_LSTAT + return self.d_lstat + + def get_stat(self): + """Get the stat() of the direntry. This is implemented in + such a way that it won't do both a stat() and a lstat(). + """ + if (self.flags & FLAG_STAT) == 0: + # We don't have the 'd_stat'. If the known_type says the + # direntry is not a DT_LNK, then try to get and cache the + # 'd_lstat' instead. Then, or if we already have a + # 'd_lstat' from before, *and* if the 'd_lstat' is not a + # S_ISLNK, we can reuse it unchanged for 'd_stat'. + # + # Note how, in the common case where the known_type says + # it is a DT_REG or DT_DIR, then we call and cache lstat() + # and that's it. Also note that in a d_type-less OS or on + # a filesystem that always answer DT_UNKNOWN, this method + # will instead only call at most stat(), but not cache it + # as 'd_lstat'. + known_type = self.flags & 255 + if (known_type != rposix_scandir.DT_UNKNOWN and + known_type != rposix_scandir.DT_LNK): + self.get_lstat() # fill the 'd_lstat' cache + have_lstat = True + else: + have_lstat = (self.flags & FLAG_LSTAT) != 0 + + if have_lstat: + # We have the lstat() but not the stat(). They are + # the same, unless the 'd_lstat' is a S_IFLNK. + must_call_stat = stat.S_ISLNK(self.d_lstat.st_mode) + else: + must_call_stat = True + + if must_call_stat: + # Must call stat(). Try to use fstatat() if possible + dirfd = self.scandir_iterator.dirfd + if dirfd != -1 and rposix.HAVE_FSTATAT: + st = rposix_stat.fstatat(self.name, dirfd, + follow_symlinks=True) + else: + path = self.space.fsencode_w(self.fget_path(self.space)) + st = rposix_stat.stat(path) + else: + st = self.d_lstat + + self.d_stat = st + self.flags |= FLAG_STAT + return self.d_stat + + def get_stat_or_lstat(self, follow_symlinks): + if follow_symlinks: + return self.get_stat() else: - st = self.d_lstat + return self.get_lstat() - self.d_stat = st - self.flags |= FLAG_STAT - return self.d_stat + def check_mode(self, follow_symlinks): + """Get the stat() or lstat() of the direntry, and return the + S_IFMT. If calling stat()/lstat() gives us ENOENT, return -1 + instead; it is better to give up and answer "no, not this type" + to requests, rather than propagate the error. + """ + try: + st = self.get_stat_or_lstat(follow_symlinks) + except OSError as e: + if e.errno == ENOENT: # not found + return -1 + raise wrap_oserror2(self.space, e, self.fget_path(self.space), + eintr_retry=False) + return stat.S_IFMT(st.st_mode) - def get_stat_or_lstat(self, follow_symlinks): - if follow_symlinks: - return self.get_stat() - else: - return self.get_lstat() + else: + # Win32 + stat_cached = False - def check_mode(self, follow_symlinks): - """Get the stat() or lstat() of the direntry, and return the - S_IFMT. If calling stat()/lstat() gives us ENOENT, return -1 - instead; it is better to give up and answer "no, not this type" - to requests, rather than propagate the error. - """ - try: - st = self.get_stat_or_lstat(follow_symlinks) - except OSError as e: - if e.errno == ENOENT: # not found - return -1 - raise wrap_oserror2(self.space, e, self.fget_path(self.space), - eintr_retry=False) - return stat.S_IFMT(st.st_mode) + def check_mode(self, follow_symlinks): + return self.flags + + def get_stat_or_lstat(self, follow_symlinks): # 'follow_symlinks' ignored + if not self.stat_cached: + path = self.space.unicode0_w(self.fget_path(self.space)) + self.d_stat = rposix_stat.stat(path) + self.stat_cached = True + return self.d_stat + def is_dir(self, follow_symlinks): known_type = self.flags & 255 - if known_type != rposix_scandir.DT_UNKNOWN: + if not _WIN32 and known_type != rposix_scandir.DT_UNKNOWN: if known_type == rposix_scandir.DT_DIR: return True elif follow_symlinks and known_type == rposix_scandir.DT_LNK: @@ -253,7 +288,7 @@ def is_file(self, follow_symlinks): known_type = self.flags & 255 - if known_type != rposix_scandir.DT_UNKNOWN: + if not _WIN32 and known_type != rposix_scandir.DT_UNKNOWN: if known_type == rposix_scandir.DT_REG: return True elif follow_symlinks and known_type == rposix_scandir.DT_LNK: @@ -265,7 +300,7 @@ def is_symlink(self): """Check if the direntry is a symlink. May get the lstat().""" known_type = self.flags & 255 - if known_type != rposix_scandir.DT_UNKNOWN: + if not _WIN32 and known_type != rposix_scandir.DT_UNKNOWN: return known_type == rposix_scandir.DT_LNK return self.check_mode(follow_symlinks=False) == stat.S_IFLNK @@ -294,7 +329,15 @@ return build_stat_result(space, st) def descr_inode(self, space): - return space.newint(self.inode) + inode = self.inode + if inode is None: # _WIN32 + try: + st = self.get_stat_or_lstat(follow_symlinks=False) + except OSError as e: + raise wrap_oserror2(space, e, self.fget_path(space), + eintr_retry=False) + inode = st.st_ino + return space.newint(inode) W_DirEntry.typedef = TypeDef( diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -386,8 +386,8 @@ def test_times(self): """ - posix.times() should return a posix.times_result object giving - float-representations (seconds, effectively) of the four fields from + posix.times() should return a posix.times_result object giving + float-representations (seconds, effectively) of the four fields from the underlying struct tms and the return value. """ result = self.posix.times() @@ -977,7 +977,7 @@ assert posix.sched_get_priority_min(posix.SCHED_OTHER) != -1 if getattr(posix, 'SCHED_BATCH', None): assert posix.sched_get_priority_min(posix.SCHED_BATCH) != -1 - + if hasattr(rposix, 'sched_get_priority_min'): def test_os_sched_priority_max_greater_than_min(self): posix, os = self.posix, self.os @@ -992,7 +992,7 @@ def test_sched_yield(self): os = self.posix #Always suceeds on Linux - os.sched_yield() + os.sched_yield() def test_write_buffer(self): os = self.posix @@ -1157,7 +1157,7 @@ expected = min(myprio + 3, 19) assert os.WEXITSTATUS(status1) == expected - if hasattr(os, 'symlink'): + if sys.platform != 'win32': def test_symlink(self): posix = self.posix bytes_dir = self.bytes_dir @@ -1187,6 +1187,10 @@ finally: posix.close(f) posix.unlink(bytes_dir + '/somelink'.encode()) + else: + def test_symlink(self): + posix = self.posix + raises(NotImplementedError, posix.symlink, 'a', 'b') if hasattr(os, 'ftruncate'): def test_truncate(self): @@ -1350,7 +1354,7 @@ posix.close(fd) s2.close() s1.close() - + def test_os_lockf(self): posix, os = self.posix, self.os fd = os.open(self.path2 + 'test_os_lockf', os.O_WRONLY | os.O_CREAT) @@ -1441,6 +1445,25 @@ e = raises(OSError, self.posix.symlink, 'bok', '/nonexistentdir/boz') assert str(e.value).endswith(": 'bok' -> '/nonexistentdir/boz'") + if hasattr(rposix, 'getxattr'): + def test_xattr_simple(self): + # Minimal testing here, lib-python has better tests. + os = self.posix + with open(self.path, 'wb'): + pass + init_names = os.listxattr(self.path) + excinfo = raises(OSError, os.getxattr, self.path, 'user.test') + assert excinfo.value.filename == self.path + os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE, follow_symlinks=False) + assert os.getxattr(self.path, 'user.test') == b'' + os.setxattr(self.path, b'user.test', b'foo', os.XATTR_REPLACE) + assert os.getxattr(self.path, 'user.test', follow_symlinks=False) == b'foo' + assert set(os.listxattr(self.path)) == set( + init_names + ['user.test']) + os.removexattr(self.path, 'user.test', follow_symlinks=False) + raises(OSError, os.getxattr, self.path, 'user.test') + assert os.listxattr(self.path, follow_symlinks=False) == init_names + class AppTestEnvironment(object): def setup_class(cls): @@ -1495,6 +1518,7 @@ res = os.system(cmd) assert res == 0 + @py.test.fixture def check_fsencoding(space, pytestconfig): if pytestconfig.getvalue('runappdirect'): diff --git a/pypy/module/posix/test/test_scandir.py b/pypy/module/posix/test/test_scandir.py --- a/pypy/module/posix/test/test_scandir.py +++ b/pypy/module/posix/test/test_scandir.py @@ -1,4 +1,5 @@ import sys, os +import py from rpython.tool.udir import udir from pypy.module.posix.test import test_posix2 @@ -31,6 +32,8 @@ def setup_class(cls): space = cls.space + cls.w_WIN32 = space.wrap(sys.platform == 'win32') + cls.w_sep = space.wrap(os.sep) cls.w_posix = space.appexec([], test_posix2.GET_POSIX) cls.w_dir_empty = space.wrap(_make_dir('empty', {})) cls.w_dir0 = space.wrap(_make_dir('dir0', {'f1': 'file', @@ -38,10 +41,11 @@ 'f3': 'file'})) cls.w_dir1 = space.wrap(_make_dir('dir1', {'file1': 'file'})) cls.w_dir2 = space.wrap(_make_dir('dir2', {'subdir2': 'dir'})) - cls.w_dir3 = space.wrap(_make_dir('dir3', {'sfile3': 'symlink-file'})) - cls.w_dir4 = space.wrap(_make_dir('dir4', {'sdir4': 'symlink-dir'})) - cls.w_dir5 = space.wrap(_make_dir('dir5', {'sbrok5': 'symlink-broken'})) - cls.w_dir6 = space.wrap(_make_dir('dir6', {'serr6': 'symlink-error'})) + if sys.platform != 'win32': + cls.w_dir3 = space.wrap(_make_dir('dir3', {'sfile3': 'symlink-file'})) + cls.w_dir4 = space.wrap(_make_dir('dir4', {'sdir4': 'symlink-dir'})) + cls.w_dir5 = space.wrap(_make_dir('dir5', {'sbrok5': 'symlink-broken'})) + cls.w_dir6 = space.wrap(_make_dir('dir6', {'serr6': 'symlink-error'})) def test_scandir_empty(self): posix = self.posix @@ -60,27 +64,32 @@ d = next(posix.scandir()) assert type(d.name) is str assert type(d.path) is str - assert d.path == './' + d.name + assert d.path == '.' + self.sep + d.name d = next(posix.scandir(None)) assert type(d.name) is str assert type(d.path) is str - assert d.path == './' + d.name + assert d.path == '.' + self.sep + d.name d = next(posix.scandir(u'.')) assert type(d.name) is str assert type(d.path) is str - assert d.path == './' + d.name - d = next(posix.scandir(b'.')) - assert type(d.name) is bytes - assert type(d.path) is bytes - assert d.path == b'./' + d.name - d = next(posix.scandir('/')) + assert d.path == '.' + self.sep + d.name + d = next(posix.scandir(self.sep)) assert type(d.name) is str assert type(d.path) is str - assert d.path == '/' + d.name - d = next(posix.scandir(b'/')) - assert type(d.name) is bytes - assert type(d.path) is bytes - assert d.path == b'/' + d.name + assert d.path == self.sep + d.name + if not self.WIN32: + d = next(posix.scandir(b'.')) + assert type(d.name) is bytes + assert type(d.path) is bytes + assert d.path == b'./' + d.name + d = next(posix.scandir(b'/')) + assert type(d.name) is bytes + assert type(d.path) is bytes + assert d.path == b'/' + d.name + else: + raises(TypeError, posix.scandir, b'.') + raises(TypeError, posix.scandir, b'/') + raises(TypeError, posix.scandir, b'\\') def test_stat1(self): posix = self.posix @@ -89,6 +98,8 @@ assert d.stat().st_mode & 0o170000 == 0o100000 # S_IFREG assert d.stat().st_size == 0 + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_stat4(self): posix = self.posix d = next(posix.scandir(self.dir4)) @@ -118,6 +129,8 @@ assert not d.is_file(follow_symlinks=False) assert d.is_dir(follow_symlinks=False) + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_dir3(self): posix = self.posix d = next(posix.scandir(self.dir3)) @@ -128,6 +141,8 @@ assert d.is_file(follow_symlinks=True) assert not d.is_file(follow_symlinks=False) + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_dir4(self): posix = self.posix d = next(posix.scandir(self.dir4)) @@ -138,6 +153,8 @@ assert d.is_dir(follow_symlinks=True) assert not d.is_dir(follow_symlinks=False) + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_dir5(self): posix = self.posix d = next(posix.scandir(self.dir5)) @@ -147,6 +164,8 @@ assert d.is_symlink() raises(OSError, d.stat) + @py.test.mark.skipif(sys.platform == "win32", + reason="no symlink support so far") def test_dir6(self): posix = self.posix d = next(posix.scandir(self.dir6)) diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py --- a/pypy/module/zipimport/interp_zipimport.py +++ b/pypy/module/zipimport/interp_zipimport.py @@ -252,6 +252,8 @@ gets in code_object.co_filename. Something like 'myfile.zip/mymodule.py' """ + if ZIPSEP != os.path.sep: + filename = filename.replace(ZIPSEP, os.path.sep) return self.filename + os.path.sep + filename def load_module(self, space, w_fullname): diff --git a/pypy/tool/build_cffi_imports.py b/pypy/tool/build_cffi_imports.py --- a/pypy/tool/build_cffi_imports.py +++ b/pypy/tool/build_cffi_imports.py @@ -17,8 +17,8 @@ "resource": "_resource_build.py" if sys.platform != "win32" else None, "lzma": "_lzma_build.py", "_decimal": "_decimal_build.py", - "ssl": "_ssl_build.py", - # hashlib does not need to be built! It uses API calls from ssl + "_ssl": "_ssl_build.py", + # hashlib does not need to be built! It uses API calls from _ssl "xx": None, # for testing: 'None' should be completely ignored } @@ -28,7 +28,7 @@ 'lzma': ('https://tukaani.org/xz/xz-5.2.3.tar.gz', '71928b357d0a09a12a4b4c5fafca8c31c19b0e7d3b8ebb19622e96f26dbf28cb', []), - 'ssl': ('http://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.6.2.tar.gz', + '_ssl': ('http://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.6.2.tar.gz', 'b029d2492b72a9ba5b5fcd9f3d602c9fd0baa087912f2aaecc28f52f567ec478', ['--without-openssldir']), '_gdbm': ('http://ftp.gnu.org/gnu/gdbm/gdbm-1.13.tar.gz', @@ -159,6 +159,12 @@ continue if module is None or getattr(options, 'no_' + key, False): continue + # the key is the module name, has it already been built? + status, stdout, stderr = run_subprocess(str(pypy_c), ['-c', 'import %s' % key]) + if status == 0: + print('*', ' %s already built' % key, file=sys.stderr) + continue + if module.endswith('.py'): args = [module] cwd = str(join(basedir,'lib_pypy')) @@ -175,7 +181,7 @@ shutil.rmtree(destdir, ignore_errors=True) os.makedirs(destdir) - if key == 'ssl' and sys.platform == 'darwin': + if key == '_ssl' and sys.platform == 'darwin': # this patch is loosely inspired by an Apple and adds # a fallback to the OS X roots when none are available patches = [ @@ -201,7 +207,7 @@ env['LDFLAGS'] = \ '-L{}/usr/lib {}'.format(destdir, env.get('LDFLAGS', '')) - if key == 'ssl' and sys.platform == 'darwin': + if key == '_ssl' and sys.platform == 'darwin': # needed for our roots patch env['LDFLAGS'] += ' -framework CoreFoundation -framework Security' @@ -237,7 +243,7 @@ help='instead of executing sys.executable' \ ' you can specify an alternative pypy vm here') parser.add_argument('--only', dest='only', default=None, - help='Only build the modules delimited by a colon. E.g. ssl,sqlite') + help='Only build the modules delimited by a colon. E.g. _ssl,sqlite') parser.add_argument('--embed-dependencies', dest='embed_dependencies', action='store_true', help='embed dependencies for distribution') args = parser.parse_args() diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py --- a/rpython/annotator/binaryop.py +++ b/rpython/annotator/binaryop.py @@ -381,16 +381,14 @@ class __extend__(pairtype(SomeChar, SomeUnicodeCodePoint), pairtype(SomeUnicodeCodePoint, SomeChar)): def union((uchr1, uchr2)): - return SomeUnicodeCodePoint() + no_nul = uchr1.no_nul and uchr2.no_nul + return SomeUnicodeCodePoint(no_nul=no_nul) class __extend__(pairtype(SomeUnicodeCodePoint, SomeUnicodeCodePoint)): def union((uchr1, uchr2)): no_nul = uchr1.no_nul and uchr2.no_nul return SomeUnicodeCodePoint(no_nul=no_nul) - def add((chr1, chr2)): - return SomeUnicodeString() - class __extend__(pairtype(SomeString, SomeUnicodeString), pairtype(SomeUnicodeString, SomeString)): def mod((str, unistring)): diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -2574,3 +2574,160 @@ """Passes offset==NULL; not support on all OSes""" res = c_sendfile(out_fd, in_fd, lltype.nullptr(_OFF_PTR_T.TO), count) return handle_posix_error('sendfile', res) + +# ____________________________________________________________ +# Support for *xattr functions + +if sys.platform.startswith('linux'): + + class CConfig: + _compilation_info_ = ExternalCompilationInfo( + includes=['sys/xattr.h', 'linux/limits.h'],) + XATTR_SIZE_MAX = rffi_platform.DefinedConstantInteger('XATTR_SIZE_MAX') + XATTR_CREATE = rffi_platform.DefinedConstantInteger('XATTR_CREATE') + XATTR_REPLACE = rffi_platform.DefinedConstantInteger('XATTR_REPLACE') + + cConfig = rffi_platform.configure(CConfig) + globals().update(cConfig) + c_fgetxattr = external('fgetxattr', + [rffi.INT, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_getxattr = external('getxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lgetxattr = external('lgetxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.VOIDP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_fsetxattr = external('fsetxattr', + [rffi.INT, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_setxattr = external('setxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lsetxattr = external('lsetxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T, rffi.INT], + rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_fremovexattr = external('fremovexattr', + [rffi.INT, rffi.CCHARP], rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_removexattr = external('removexattr', + [rffi.CCHARP, rffi.CCHARP], rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_lremovexattr = external('lremovexattr', + [rffi.CCHARP, rffi.CCHARP], rffi.INT, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_flistxattr = external('flistxattr', + [rffi.INT, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_listxattr = external('listxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + c_llistxattr = external('llistxattr', + [rffi.CCHARP, rffi.CCHARP, rffi.SIZE_T], rffi.SSIZE_T, + compilation_info=CConfig._compilation_info_, + save_err=rffi.RFFI_SAVE_ERRNO) + buf_sizes = [256, XATTR_SIZE_MAX] + + def fgetxattr(fd, name): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + void_buf = rffi.cast(rffi.VOIDP, buf.raw) + res = c_fgetxattr(fd, name, void_buf, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + raise OSError(err, 'fgetxattr failed') + else: + return buf.str(res) + else: + raise OSError(errno.ERANGE, 'fgetxattr failed') + + def getxattr(path, name, follow_symlinks=True): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + void_buf = rffi.cast(rffi.VOIDP, buf.raw) + if follow_symlinks: + res = c_getxattr(path, name, void_buf, size) + else: + res = c_lgetxattr(path, name, void_buf, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + c_name = 'getxattr' if follow_symlinks else 'lgetxattr' + raise OSError(err, c_name + 'failed') + else: + return buf.str(res) + else: + c_name = 'getxattr' if follow_symlinks else 'lgetxattr' + raise OSError(errno.ERANGE, c_name + 'failed') + + def fsetxattr(fd, name, value, flags=0): + return handle_posix_error( + 'fsetxattr', c_fsetxattr(fd, name, value, len(value), flags)) + + def setxattr(path, name, value, flags=0, follow_symlinks=True): + if follow_symlinks: + return handle_posix_error( + 'setxattr', c_setxattr(path, name, value, len(value), flags)) + else: + return handle_posix_error( + 'lsetxattr', c_lsetxattr(path, name, value, len(value), flags)) + + def fremovexattr(fd, name): + return handle_posix_error('fremovexattr', c_fremovexattr(fd, name)) + + def removexattr(path, name, follow_symlinks=True): + if follow_symlinks: + return handle_posix_error('removexattr', c_removexattr(path, name)) + else: + return handle_posix_error('lremovexattr', c_lremovexattr(path, name)) + + def _unpack_attrs(attr_string): + result = attr_string.split('\0') + del result[-1] + return result + + def flistxattr(fd): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + res = c_flistxattr(fd, buf.raw, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + raise OSError(err, 'flistxattr failed') + else: + return _unpack_attrs(buf.str(res)) + else: + raise OSError(errno.ERANGE, 'flistxattr failed') + + def listxattr(path, follow_symlinks=True): + for size in buf_sizes: + with rffi.scoped_alloc_buffer(size) as buf: + if follow_symlinks: + res = c_listxattr(path, buf.raw, size) + else: + res = c_llistxattr(path, buf.raw, size) + if res < 0: + err = get_saved_errno() + if err != errno.ERANGE: + c_name = 'listxattr' if follow_symlinks else 'llistxattr' + raise OSError(err, c_name + 'failed') + else: + return _unpack_attrs(buf.str(res)) + else: + c_name = 'listxattr' if follow_symlinks else 'llistxattr' + raise OSError(errno.ERANGE, c_name + 'failed') diff --git a/rpython/rlib/rposix_scandir.py b/rpython/rlib/rposix_scandir.py --- a/rpython/rlib/rposix_scandir.py +++ b/rpython/rlib/rposix_scandir.py @@ -1,56 +1,126 @@ from rpython.rlib import rposix, rwin32 from rpython.rlib.objectmodel import specialize from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib.rarithmetic import intmask - at specialize.argtype(0) -def opendir(path): - path = rposix._as_bytes0(path) - return opendir_bytes(path) - -def opendir_bytes(path): - dirp = rposix.c_opendir(path) - if not dirp: - raise OSError(rposix.get_saved_errno(), "opendir failed") - return dirp - -def closedir(dirp): - rposix.c_closedir(dirp) - if not rwin32.WIN32: + @specialize.argtype(0) + def opendir(path): + path = rposix._as_bytes0(path) + return opendir_bytes(path) + + def opendir_bytes(path): + dirp = rposix.c_opendir(path) + if not dirp: + raise OSError(rposix.get_saved_errno(), "opendir failed") + return dirp + + def closedir(dirp): + rposix.c_closedir(dirp) + NULL_DIRP = lltype.nullptr(rposix.DIRP.TO) -def nextentry(dirp): - """Read the next entry and returns an opaque object. - Use the methods has_xxx() and get_xxx() to read from that - opaque object. The opaque object is valid until the next - time nextentry() or closedir() is called. This may raise - OSError, or return a NULL pointer when exhausted. Note - that this doesn't filter out the "." and ".." entries. - """ - direntp = rposix.c_readdir(dirp) - if direntp: - error = rposix.get_saved_errno() - if error: - raise OSError(error, "readdir failed") - return direntp + def nextentry(dirp): + """Read the next entry and returns an opaque object. + Use the methods has_xxx() and get_xxx() to read from that + opaque object. The opaque object is valid until the next + time nextentry() or closedir() is called. This may raise + OSError, or return a NULL pointer when exhausted. Note + that this doesn't filter out the "." and ".." entries. + """ + direntp = rposix.c_readdir(dirp) + if direntp: + error = rposix.get_saved_errno() + if error: + raise OSError(error, "readdir failed") + return direntp -def has_name_bytes(direntp): - return True + def get_name_bytes(direntp): + namep = rffi.cast(rffi.CCHARP, direntp.c_d_name) + return rffi.charp2str(namep) -def get_name_bytes(direntp): - namep = rffi.cast(rffi.CCHARP, direntp.c_d_name) - return rffi.charp2str(namep) + DT_UNKNOWN = rposix.dirent_config.get('DT_UNKNOWN', 0) + DT_REG = rposix.dirent_config.get('DT_REG', 255) + DT_DIR = rposix.dirent_config.get('DT_DIR', 255) + DT_LNK = rposix.dirent_config.get('DT_LNK', 255) -DT_UNKNOWN = rposix.dirent_config.get('DT_UNKNOWN', 0) -DT_REG = rposix.dirent_config.get('DT_REG', 255) -DT_DIR = rposix.dirent_config.get('DT_DIR', 255) -DT_LNK = rposix.dirent_config.get('DT_LNK', 255) + def get_known_type(direntp): + if rposix.HAVE_D_TYPE: + return rffi.getintfield(direntp, 'c_d_type') + return DT_UNKNOWN -def get_known_type(direntp): - if rposix.HAVE_D_TYPE: - return rffi.getintfield(direntp, 'c_d_type') - return DT_UNKNOWN + def get_inode(direntp): + return rffi.getintfield(direntp, 'c_d_ino') -def get_inode(direntp): - return rffi.getintfield(direntp, 'c_d_ino') +else: + # ----- Win32 version ----- + import stat + from rpython.rlib._os_support import unicode_traits + from rpython.rlib.rwin32file import make_win32_traits + from rpython.rlib import rposix_stat + + win32traits = make_win32_traits(unicode_traits) + + + SCANDIRP = lltype.Ptr(lltype.Struct('SCANDIRP', + ('filedata', win32traits.WIN32_FIND_DATA), + ('hFindFile', rwin32.HANDLE), + ('first_time', lltype.Bool), + )) + NULL_DIRP = lltype.nullptr(SCANDIRP.TO) + + + # must only be called with unicode! + def opendir(path): + if len(path) == 0: + path = u'.' + if path[-1] not in (u'\\', u'/', u':'): + mask = path + u'\\*.*' + else: + mask = path + u'*.*' + dirp = lltype.malloc(SCANDIRP.TO, flavor='raw') + hFindFile = win32traits.FindFirstFile(mask, dirp.filedata) + if hFindFile == rwin32.INVALID_HANDLE_VALUE: + error = rwin32.GetLastError_saved() + lltype.free(dirp, flavor='raw') + raise WindowsError(error, "FindFirstFileW failed") + dirp.hFindFile = hFindFile + dirp.first_time = True + return dirp + + def closedir(dirp): + if dirp.hFindFile != rwin32.INVALID_HANDLE_VALUE: + win32traits.FindClose(dirp.hFindFile) + lltype.free(dirp, flavor='raw') + + def nextentry(dirp): + """Read the next entry and returns an opaque object. + Use the methods has_xxx() and get_xxx() to read from that + opaque object. The opaque object is valid until the next + time nextentry() or closedir() is called. This may raise + WindowsError, or return NULL when exhausted. Note + that this doesn't filter out the "." and ".." entries. + """ + if dirp.first_time: + dirp.first_time = False + else: + if not win32traits.FindNextFile(dirp.hFindFile, dirp.filedata): + # error or no more files + error = rwin32.GetLastError_saved() + if error == win32traits.ERROR_NO_MORE_FILES: + return lltype.nullptr(win32traits.WIN32_FIND_DATA) + raise WindowsError(error, "FindNextFileW failed") + return dirp.filedata + + def get_name_unicode(filedata): + return unicode_traits.charp2str(rffi.cast(unicode_traits.CCHARP, + filedata.c_cFileName)) + + def get_known_type(filedata): + attr = filedata.c_dwFileAttributes + st_mode = rposix_stat.win32_attributes_to_mode(win32traits, attr) + return stat.S_IFMT(st_mode) + + def get_inode(filedata): + return None diff --git a/rpython/rlib/rwin32file.py b/rpython/rlib/rwin32file.py --- a/rpython/rlib/rwin32file.py +++ b/rpython/rlib/rwin32file.py @@ -148,7 +148,7 @@ save_err=rffi.RFFI_SAVE_LASTERROR) FindClose = external('FindClose', [rwin32.HANDLE], - rwin32.BOOL) + rwin32.BOOL, releasegil=False) GetFileAttributes = external( 'GetFileAttributes' + suffix, diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -1,3 +1,6 @@ +from hypothesis import given, strategies as st, assume +import pytest + from rpython.rtyper.test.test_llinterp import interpret from rpython.translator.c.test.test_genc import compile from rpython.tool.pytest.expecttest import ExpectTest @@ -8,10 +11,10 @@ import py def rposix_requires(funcname): - return py.test.mark.skipif(not hasattr(rposix, funcname), + return pytest.mark.skipif(not hasattr(rposix, funcname), reason="Requires rposix.%s()" % funcname) -win_only = py.test.mark.skipif("os.name != 'nt'") +win_only = pytest.mark.skipif("os.name != 'nt'") class TestPosixFunction: def test_access(self): @@ -827,3 +830,61 @@ rposix.lockf(fd, rposix.F_ULOCK, 4) finally: os.close(fd) + +def check_working_xattr(): + fname = str(udir.join('xattr_test0.txt')) + with open(fname, 'wb'): + pass + try: + rposix.setxattr(fname, 'user.foo', '') + except OSError: + return False + else: + return True + + at pytest.mark.skipif(not (hasattr(rposix, 'getxattr') and check_working_xattr()), + reason="Requires working rposix.getxattr()") + at given( + name=st.text( + alphabet=st.characters(min_codepoint=1), min_size=1, max_size=10), + value=st.binary(max_size=10), + follow_symlinks=st.booleans(), use_fd=st.booleans()) +def test_xattr(name, value, follow_symlinks, use_fd): + assume(follow_symlinks or not use_fd) + name = 'user.' + name.encode('utf-8') + fname = str(udir.join('xattr_test.txt')) + try: + os.unlink(fname) + except OSError: + pass + with open(fname, 'wb'): + pass + if use_fd: + file_id = os.open(fname, os.O_CREAT, 0777) + read, write, delete = rposix.fgetxattr, rposix.fsetxattr, rposix.fremovexattr + all_names = rposix.flistxattr + else: + file_id = fname + if follow_symlinks: + read, write, delete = rposix.getxattr, rposix.setxattr, rposix.removexattr + all_names = rposix.listxattr + else: + read = lambda *args, **kwargs: rposix.getxattr(*args, follow_symlinks=False, **kwargs) + write = lambda *args, **kwargs: rposix.setxattr(*args, follow_symlinks=False, **kwargs) + delete = lambda *args, **kwargs: rposix.removexattr(*args, follow_symlinks=False, **kwargs) + all_names = lambda *args, **kwargs: rposix.listxattr(*args, follow_symlinks=False, **kwargs) + try: + init_names = all_names(file_id) + with pytest.raises(OSError): + read(file_id, name) + write(file_id, name, value) + assert read(file_id, name) == value + assert set(all_names(file_id)) == set(init_names + [name]) + assert '' not in all_names(file_id) + delete(file_id, name) + with pytest.raises(OSError): + read(file_id, name) + assert set(all_names(file_id)) == set(init_names) + finally: + if use_fd: + os.close(file_id) From pypy.commits at gmail.com Thu Dec 21 05:00:58 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 21 Dec 2017 02:00:58 -0800 (PST) Subject: [pypy-commit] pypy default: skip this test on platforms where _vmprof is not built Message-ID: <5a3b865a.e6361c0a.e898e.fbd8@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93529:726f599fb32a Date: 2017-12-21 11:00 +0100 http://bitbucket.org/pypy/pypy/changeset/726f599fb32a/ Log: skip this test on platforms where _vmprof is not built diff --git a/extra_tests/test_vmprof_greenlet.py b/extra_tests/test_vmprof_greenlet.py --- a/extra_tests/test_vmprof_greenlet.py +++ b/extra_tests/test_vmprof_greenlet.py @@ -1,7 +1,7 @@ import time import pytest import greenlet -import vmprof +vmprof = pytest.importorskip('vmprof') def count_samples(filename): stats = vmprof.read_profile(filename) From pypy.commits at gmail.com Thu Dec 21 05:48:03 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 21 Dec 2017 02:48:03 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Backout lib-python/ change in 4bb7cf3 Message-ID: <5a3b9163.478edf0a.61a81.5b6d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93531:a3e4d351acbe Date: 2017-12-21 10:47 +0000 http://bitbucket.org/pypy/pypy/changeset/a3e4d351acbe/ Log: Backout lib-python/ change in 4bb7cf3 diff --git a/lib-python/3/ssl.py b/lib-python/3/ssl.py --- a/lib-python/3/ssl.py +++ b/lib-python/3/ssl.py @@ -140,23 +140,6 @@ except NameError: _SSLv2_IF_EXISTS = None - - - -import os -class DirEntry: - def __init__(self, path, name): - self.path = os.path.join(path, name) - self.name = name - def is_dir(self): - return os.path.isdir(self.path) -def myscandir(path='.'): - for name in os.listdir(path): - yield DirEntry(path, name) -os.scandir = myscandir - - - if sys.platform == "win32": from _ssl import enum_certificates, enum_crls From pypy.commits at gmail.com Thu Dec 21 07:39:25 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 21 Dec 2017 04:39:25 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Clean up tests Message-ID: <5a3bab7d.82641c0a.788b7.ad45@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93533:6276a7287d4b Date: 2017-12-21 12:38 +0000 http://bitbucket.org/pypy/pypy/changeset/6276a7287d4b/ Log: Clean up tests diff --git a/pypy/module/errno/test/test_errno.py b/pypy/module/errno/test/test_errno.py --- a/pypy/module/errno/test/test_errno.py +++ b/pypy/module/errno/test/test_errno.py @@ -11,29 +11,11 @@ assert not hasattr(self.errno, '__file__') def test_constants(self): - host_errorcode = self.errorcode.copy() - # On some systems, ENOTSUP is an alias to EOPNOTSUPP. Adjust the - # host_errorcode dictionary in case the host interpreter has slightly - # different errorcodes than the interpreter under test - if ('ENOTSUP' not in host_errorcode.values() and - 'ENOTSUP' in self.errno.errorcode.values()): - host_errorcode[self.errno.ENOTSUP] = 'ENOTSUP' - if ('EOPNOTSUPP' not in host_errorcode.values() and - 'EOPNOTSUPP' in self.errno.errorcode.values()): - host_errorcode[self.errno.EOPNOTSUPP] = 'EOPNOTSUPP' - for code, name in host_errorcode.items(): + # Assumes that our constants are a superset of the host's + for code, name in self.errorcode.items(): assert getattr(self.errno, name) == code def test_errorcode(self): - host_errorcode = self.errorcode.copy() - # On some systems, ENOTSUP is an alias to EOPNOTSUPP. Adjust the - # host_errorcode dictionary in case the host interpreter has slightly - # different errorcodes than the interpreter under test - if ('ENOTSUP' not in host_errorcode.values() and - 'ENOTSUP' in self.errno.errorcode.values()): - host_errorcode[self.errno.ENOTSUP] = 'ENOTSUP' - if ('EOPNOTSUPP' not in host_errorcode.values() and - 'EOPNOTSUPP' in self.errno.errorcode.values()): - host_errorcode[self.errno.EOPNOTSUPP] = 'EOPNOTSUPP' - for value, name in host_errorcode.items(): + # Assumes that our codes are a superset of the host's + for value, name in self.errorcode.items(): assert self.errno.errorcode[value] == name From pypy.commits at gmail.com Thu Dec 21 08:51:57 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 21 Dec 2017 05:51:57 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default Message-ID: <5a3bbc7d.8b97df0a.31008.7cf3@mx.google.com> Author: fijal Branch: py3.5 Changeset: r93534:4f88f2f2d3f2 Date: 2017-12-21 15:51 +0200 http://bitbucket.org/pypy/pypy/changeset/4f88f2f2d3f2/ Log: merge default diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt --- a/extra_tests/requirements.txt +++ b/extra_tests/requirements.txt @@ -1,2 +1,3 @@ pytest hypothesis +vmprof diff --git a/extra_tests/test_vmprof_greenlet.py b/extra_tests/test_vmprof_greenlet.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_vmprof_greenlet.py @@ -0,0 +1,28 @@ +import time +import pytest +import greenlet +vmprof = pytest.importorskip('vmprof') + +def count_samples(filename): + stats = vmprof.read_profile(filename) + return len(stats.profiles) + +def cpuburn(duration): + end = time.time() + duration + while time.time() < end: + pass + +def test_sampling_inside_callback(tmpdir): + # see also test_sampling_inside_callback inside + # pypy/module/_continuation/test/test_stacklet.py + # + G = greenlet.greenlet(cpuburn) + fname = tmpdir.join('log.vmprof') + with fname.open('w+b') as f: + vmprof.enable(f.fileno(), 1/250.0) + G.switch(0.1) + vmprof.disable() + + samples = count_samples(str(fname)) + # 0.1 seconds at 250Hz should be 25 samples + assert 23 < samples < 27 diff --git a/lib-python/2.7/subprocess.py b/lib-python/2.7/subprocess.py --- a/lib-python/2.7/subprocess.py +++ b/lib-python/2.7/subprocess.py @@ -1296,7 +1296,7 @@ 'copyfile' in caller.f_globals): dest_dir = sys.pypy_resolvedirof(target_executable) src_dir = sys.pypy_resolvedirof(sys.executable) - for libname in ['libpypy-c.so', 'libpypy-c.dylib']: + for libname in ['libpypy-c.so', 'libpypy-c.dylib', 'libpypy-c.dll']: dest_library = os.path.join(dest_dir, libname) src_library = os.path.join(src_dir, libname) if os.path.exists(src_library): diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-v5.10.0.rst release-v5.9.0.rst release-v5.8.0.rst release-v5.7.1.rst diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-v5.10.0.rst @@ -0,0 +1,94 @@ +====================================== +PyPy2.7 and PyPy3.5 v5.10 dual release +====================================== + +The PyPy team is proud to release both PyPy2.7 v5.10 (an interpreter supporting +Python 2.7 syntax), and a final PyPy3.5 v5.10 (an interpreter for Python +3.5 syntax). The two releases are both based on much the same codebase, thus +the dual release. + +This release is an incremental release with very few new features, the main +feature being the final PyPy3.5 release that works on linux and OS X with beta +windows support. It also includes fixes for `vmprof`_ cooperation with greenlets. + +Compared to 5.9, the 5.10 release contains mostly bugfixes and small improvements. +We have in the pipeline big new features coming for PyPy 6.0 that did not make +the release cut and should be available within the next couple months. + +As always, this release is 100% compatible with the previous one and fixed +several issues and bugs raised by the growing community of PyPy users. +As always, we strongly recommend updating. + +This release concludes the Mozilla Open Source `grant`_ for having a compatible +PyPy 3.5 release and we're very grateful for that. Of course, we will continue +to improve PyPy 3.5 and probably move to 3.6 during the course of 2018. + +You can download the v5.10 releases here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. + +We would also like to thank our contributors and +encourage new people to join the project. PyPy has many +layers and we need help with all of them: `PyPy`_ and `RPython`_ documentation +improvements, tweaking popular `modules`_ to run on pypy, or general `help`_ +with making RPython's JIT even better. + +.. _vmprof: http://vmprof.readthedocs.io +.. _grant: https://morepypy.blogspot.com/2016/08/pypy-gets-funding-from-mozilla-for.html +.. _`PyPy`: index.html +.. _`RPython`: https://rpython.readthedocs.org +.. _`modules`: project-ideas.html#make-more-python-modules-pypy-friendly +.. _`help`: project-ideas.html + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7 and CPython 3.5. It's fast (`PyPy and CPython 2.7.x`_ performance comparison) +due to its integrated tracing JIT compiler. + +We also welcome developers of other `dynamic languages`_ to see what RPython +can do for them. + +The PyPy 2.7 release supports: + + * **x86** machines on most common operating systems + (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) + + * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux, + + * big- and little-endian variants of **PPC64** running Linux, + + * **s390x** running Linux + +.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org +.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + +Changelog +========= + +* improve ssl handling on windows for pypy3 (makes pip work) +* improve unicode handling in various error reporters +* fix vmprof cooperation with greenlets +* fix some things in cpyext +* test and document the cmp(nan, nan) == 0 behaviour +* don't crash when calling sleep with inf or nan +* fix bugs in _io module +* inspect.isbuiltin() now returns True for functions implemented in C +* allow the sequences future-import, docstring, future-import for CPython bug compatibility +* Issue #2699: non-ascii messages in warnings +* posix.lockf +* fixes for FreeBSD platform +* add .debug files, so builds contain debugging info, instead of being stripped +* improvements to cppyy +* issue #2677 copy pure c PyBuffer_{From,To}Contiguous from cpython +* issue #2682, split firstword on any whitespace in sqlite3 +* ctypes: allow ptr[0] = foo when ptr is a pointer to struct +* matplotlib will work with tkagg backend once `matplotlib pr #9356`_ is merged +* improvements to utf32 surrogate handling +* cffi version bump to 1.11.2 + +.. _`matplotlib pr #9356`: https://github.com/matplotlib/matplotlib/pull/9356 diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -31,7 +31,7 @@ Upgrade the _vmprof backend to vmprof 0.4.10 .. branch: fix-vmprof-stacklet-switch - +.. branch: fix-vmprof-stacklet-switch-2 Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) .. branch: win32-vcvars @@ -39,3 +39,4 @@ .. branch: rdict-fast-hash Make it possible to declare that the hash function of an r_dict is fast in RPython. + diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -1,5 +1,6 @@ from rpython.rlib.rstacklet import StackletThread from rpython.rlib import jit +from rpython.rlib import rvmprof from pypy.interpreter.error import OperationError, get_cleared_operation_error from pypy.interpreter.error import oefmt from pypy.interpreter.executioncontext import ExecutionContext @@ -241,12 +242,15 @@ self.h = h global_state.clear() try: + rvmprof.start_sampling() frame = self.bottomframe w_result = frame.execute_frame() except Exception as e: global_state.propagate_exception = e else: global_state.w_value = w_result + finally: + rvmprof.stop_sampling() self.sthread.ec.topframeref = jit.vref_None global_state.origin = self global_state.destination = self diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -1,7 +1,10 @@ +import pytest import os +from rpython.rlib.rvmprof.test.support import fakevmprof +from pypy.interpreter.gateway import interp2app from pypy.module._continuation.test.support import BaseAppTest - + at pytest.mark.usefixtures('app_fakevmprof') class AppTestStacklet(BaseAppTest): def setup_class(cls): BaseAppTest.setup_class.im_func(cls) @@ -34,6 +37,33 @@ return res return stack """) + cls.w_appdirect = cls.space.wrap(cls.runappdirect) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) + + + @pytest.fixture + def app_fakevmprof(self, fakevmprof): + """ + This is automaticaly re-initialized for every method: thanks to + fakevmprof's finalizer, it checks that we called {start,stop}_sampling + the in pairs + """ + w = self.space.wrap + i2a = interp2app + def is_sampling_enabled(space): + return space.wrap(fakevmprof.is_sampling_enabled) + self.w_is_sampling_enabled = w(i2a(is_sampling_enabled)) + # + def start_sampling(space): + fakevmprof.start_sampling() + self.w_start_sampling = w(i2a(start_sampling)) + # + def stop_sampling(space): + fakevmprof.stop_sampling() + self.w_stop_sampling = w(i2a(stop_sampling)) + def test_new_empty(self): from _continuation import continulet @@ -797,3 +827,25 @@ bd50 = continulet(f) main.switch(to=bd50) print(999) + + def test_sampling_inside_callback(self): + if self.appdirect: + # see also + # extra_tests.test_vmprof_greenlet.test_sampling_inside_callback + # for a "translated" version of this test + skip("we can't run this until we have _vmprof.is_sampling_enabled") + from _continuation import continulet + # + def my_callback(c1): + assert self.is_sampling_enabled() + return 42 + # + try: + self.start_sampling() + assert self.is_sampling_enabled() + c = continulet(my_callback) + res = c.switch() + assert res == 42 + assert self.is_sampling_enabled() + finally: + self.stop_sampling() diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -1,4 +1,5 @@ import py +import pytest try: import _continuation except ImportError: @@ -101,11 +102,7 @@ particular, we need to ensure that vmprof does not sample the stack in the middle of a switch, else we read nonsense. """ - try: - import _vmprof - except ImportError: - py.test.skip("no _vmprof") - # + _vmprof = pytest.importorskip('_vmprof') def switch_forever(c): while True: c.switch() diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,7 +3,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory -from rpython.rlib.rvmprof import cintf +from rpython.rlib import rvmprof DEBUG = False @@ -25,12 +25,12 @@ def new(self, callback, arg=llmemory.NULL): if DEBUG: callback = _debug_wrapper(callback) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: - cintf.empty_rvmprof_stack() + rvmprof.empty_stack() h = self._gcrootfinder.new(self, callback, arg) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h @@ -40,11 +40,11 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: h = self._gcrootfinder.switch(stacklet) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -56,10 +56,27 @@ return None def stop_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling - fd = vmprof_stop_sampling() - return rffi.cast(lltype.Signed, fd) + return _get_vmprof().stop_sampling() def start_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_start_sampling - vmprof_start_sampling() + return _get_vmprof().start_sampling() + +# ---------------- +# stacklet support +# ---------------- +# +# Ideally, vmprof_tl_stack, VMPROFSTACK etc. should be part of "self.cintf": +# not sure why they are a global. Eventually, we should probably fix all this +# mess. +from rpython.rlib.rvmprof.cintf import vmprof_tl_stack, VMPROFSTACK + +def save_stack(): + stop_sampling() + return vmprof_tl_stack.get_or_make_raw() + +def empty_stack(): + vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) + +def restore_stack(x): + vmprof_tl_stack.setraw(x) + start_sampling() diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -10,71 +10,82 @@ from rpython.rlib import rthread, jit from rpython.rlib.objectmodel import we_are_translated from rpython.config.translationoption import get_translation_config +from rpython.jit.backend import detect_cpu class VMProfPlatformUnsupported(Exception): pass +# vmprof works only on x86 for now +IS_SUPPORTED = detect_cpu.autodetect().startswith('x86') +if sys.platform == 'win32': + IS_SUPPORTED = False + ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') SRC = ROOT.join('src') SHARED = SRC.join('shared') BACKTRACE = SHARED.join('libbacktrace') -compile_extra = ['-DRPYTHON_VMPROF'] -separate_module_files = [ - SHARED.join('symboltable.c'), - SHARED.join('vmprof_unix.c') -] -if sys.platform.startswith('linux'): - separate_module_files += [ - BACKTRACE.join('atomic.c'), - BACKTRACE.join('backtrace.c'), - BACKTRACE.join('state.c'), - BACKTRACE.join('elf.c'), - BACKTRACE.join('dwarf.c'), - BACKTRACE.join('fileline.c'), - BACKTRACE.join('mmap.c'), - BACKTRACE.join('mmapio.c'), - BACKTRACE.join('posix.c'), - BACKTRACE.join('sort.c'), +def make_eci(): + if make_eci.called: + raise ValueError("make_eci() should be called at most once") + # + compile_extra = ['-DRPYTHON_VMPROF'] + separate_module_files = [ + SHARED.join('symboltable.c'), + SHARED.join('vmprof_unix.c') ] - _libs = ['dl'] - compile_extra += ['-DVMPROF_UNIX'] - compile_extra += ['-DVMPROF_LINUX'] -elif sys.platform == 'win32': - compile_extra += ['-DVMPROF_WINDOWS'] - separate_module_files = [SHARED.join('vmprof_win.c')] - _libs = [] -else: - # Guessing a BSD-like Unix platform - compile_extra += ['-DVMPROF_UNIX'] - compile_extra += ['-DVMPROF_MAC'] - if sys.platform.startswith('freebsd'): - _libs = ['unwind'] + if sys.platform.startswith('linux'): + separate_module_files += [ + BACKTRACE.join('atomic.c'), + BACKTRACE.join('backtrace.c'), + BACKTRACE.join('state.c'), + BACKTRACE.join('elf.c'), + BACKTRACE.join('dwarf.c'), + BACKTRACE.join('fileline.c'), + BACKTRACE.join('mmap.c'), + BACKTRACE.join('mmapio.c'), + BACKTRACE.join('posix.c'), + BACKTRACE.join('sort.c'), + ] + _libs = ['dl'] + compile_extra += ['-DVMPROF_UNIX'] + compile_extra += ['-DVMPROF_LINUX'] + elif sys.platform == 'win32': + compile_extra += ['-DVMPROF_WINDOWS'] + separate_module_files = [SHARED.join('vmprof_win.c')] + _libs = [] else: - _libs = [] + # Guessing a BSD-like Unix platform + compile_extra += ['-DVMPROF_UNIX'] + compile_extra += ['-DVMPROF_MAC'] + if sys.platform.startswith('freebsd'): + _libs = ['unwind'] + else: + _libs = [] - -eci_kwds = dict( - include_dirs = [SRC, SHARED, BACKTRACE], - includes = ['rvmprof.h','vmprof_stack.h'], - libraries = _libs, - separate_module_files = [ - SRC.join('rvmprof.c'), - SHARED.join('compat.c'), - SHARED.join('machine.c'), - SHARED.join('vmp_stack.c'), - SHARED.join('vmprof_memory.c'), - SHARED.join('vmprof_common.c'), - # symbol table already in separate_module_files - ] + separate_module_files, - post_include_bits=[], - compile_extra=compile_extra - ) -if sys.platform != 'win32': - eci_kwds['separate_module_files'].append( - SHARED.join('vmprof_mt.c'), - ) -global_eci = ExternalCompilationInfo(**eci_kwds) + eci_kwds = dict( + include_dirs = [SRC, SHARED, BACKTRACE], + includes = ['rvmprof.h','vmprof_stack.h'], + libraries = _libs, + separate_module_files = [ + SRC.join('rvmprof.c'), + SHARED.join('compat.c'), + SHARED.join('machine.c'), + SHARED.join('vmp_stack.c'), + SHARED.join('vmprof_memory.c'), + SHARED.join('vmprof_common.c'), + # symbol table already in separate_module_files + ] + separate_module_files, + post_include_bits=[], + compile_extra=compile_extra + ) + if sys.platform != 'win32': + eci_kwds['separate_module_files'].append( + SHARED.join('vmprof_mt.c'), + ) + make_eci.called = True + return ExternalCompilationInfo(**eci_kwds), eci_kwds +make_eci.called = False def configure_libbacktrace_linux(): bits = 32 if sys.maxsize == 2**31-1 else 64 @@ -85,14 +96,17 @@ shutil.copy(str(BACKTRACE.join(specific_config)), str(config)) def setup(): + if not IS_SUPPORTED: + raise VMProfPlatformUnsupported + if sys.platform.startswith('linux'): configure_libbacktrace_linux() + eci, eci_kwds = make_eci() eci_kwds['compile_extra'].append('-DRPYTHON_LL2CTYPES') platform.verify_eci(ExternalCompilationInfo( **eci_kwds)) - eci = global_eci vmprof_init = rffi.llexternal("vmprof_init", [rffi.INT, rffi.DOUBLE, rffi.INT, rffi.INT, rffi.CCHARP, rffi.INT, rffi.INT], @@ -122,32 +136,16 @@ lltype.Signed, compilation_info=eci, _nowrapper=True) + vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], + rffi.INT, compilation_info=eci, + _nowrapper=True) + vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], + lltype.Void, compilation_info=eci, + _nowrapper=True) + return CInterface(locals()) -# this is always present, but compiles to no-op if RPYTHON_VMPROF is not -# defined (i.e. if we don't actually use vmprof in the generated C) -auto_eci = ExternalCompilationInfo(post_include_bits=[""" -#ifndef RPYTHON_VMPROF -# define vmprof_stop_sampling() (-1) -# define vmprof_start_sampling() ((void)0) -#endif -"""]) - -if get_translation_config() is None: - # tests need the full eci here - _eci = global_eci -else: - _eci = auto_eci - -vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=_eci, - _nowrapper=True) -vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=_eci, - _nowrapper=True) - - class CInterface(object): def __init__(self, namespace): for k, v in namespace.iteritems(): @@ -232,20 +230,6 @@ leave_code(s) # -# stacklet support - -def save_rvmprof_stack(): - vmprof_stop_sampling() - return vmprof_tl_stack.get_or_make_raw() - -def empty_rvmprof_stack(): - vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) - -def restore_rvmprof_stack(x): - vmprof_tl_stack.setraw(x) - vmprof_start_sampling() - -# # traceback support def get_rvmprof_stack(): diff --git a/rpython/rlib/rvmprof/dummy.py b/rpython/rlib/rvmprof/dummy.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/dummy.py @@ -0,0 +1,26 @@ +from rpython.rlib.objectmodel import specialize + +class DummyVMProf(object): + + def __init__(self): + self._unique_id = 0 + + def register_code_object_class(self, CodeClass, full_name_func): + CodeClass._vmprof_unique_id = self._unique_id + self._unique_id += 1 + + @specialize.argtype(1) + def register_code(self, code, full_name_func): + pass + + def enable(self, fileno, interval, memory=0, native=0, real_time=0): + pass + + def disable(self): + pass + + def start_sampling(self): + pass + + def stop_sampling(self): + pass diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -2,6 +2,7 @@ from rpython.rlib.objectmodel import specialize, we_are_translated, not_rpython from rpython.rlib import jit, rposix, rgc from rpython.rlib.rvmprof import cintf +from rpython.rlib.rvmprof.dummy import DummyVMProf from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -34,6 +35,9 @@ return [] class VMProf(object): + """ + NOTE: the API of this class should be kept in sync with dummy.DummyVMProf + """ _immutable_fields_ = ['is_enabled?'] @@ -168,6 +172,21 @@ if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0: raise VMProfError("vmprof buffers full! disk full or too slow") + def stop_sampling(self): + """ + Temporarily stop the sampling of stack frames. Signals are still + delivered, but are ignored. + """ + fd = self.cintf.vmprof_stop_sampling() + return rffi.cast(lltype.Signed, fd) + + def start_sampling(self): + """ + Undo the effect of stop_sampling + """ + self.cintf.vmprof_start_sampling() + + def vmprof_execute_code(name, get_code_fn, result_class=None, _hack_update_stack_untranslated=False): """Decorator to be used on the function that interprets a code object. @@ -240,5 +259,8 @@ def _get_vmprof(): global _vmprof_instance if _vmprof_instance is None: - _vmprof_instance = VMProf() + try: + _vmprof_instance = VMProf() + except cintf.VMProfPlatformUnsupported: + _vmprof_instance = DummyVMProf() return _vmprof_instance diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/support.py @@ -0,0 +1,45 @@ +import pytest +from rpython.rlib import rvmprof + +class FakeVMProf(object): + + def __init__(self): + self._enabled = False + self._ignore_signals = 1 + + # --- VMProf official API --- + # add fake methods as needed by the tests + + def stop_sampling(self): + self._ignore_signals += 1 + + def start_sampling(self): + assert self._ignore_signals > 0, ('calling start_sampling() without ' + 'the corresponding stop_sampling()?') + self._ignore_signals -= 1 + + # --- FakeVMProf specific API --- + # this API is not part of rvmprof, but available only inside tests using + # fakevmprof + + @property + def is_sampling_enabled(self): + return self._ignore_signals == 0 + + def check_status(self): + """ + To be called during test teardown + """ + if self._ignore_signals != 1: + msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, ' + 'got %d. This probably means that you called ' + '{start,stop}_sampling() a wrong number of times') + raise ValueError, msg % self._ignore_signals + + + at pytest.fixture +def fakevmprof(request, monkeypatch): + fake = FakeVMProf() + monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake) + request.addfinalizer(fake.check_status) + return fake diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/test_support.py @@ -0,0 +1,42 @@ +import pytest +from rpython.rlib import rvmprof +from rpython.rlib.rvmprof.test.support import FakeVMProf, fakevmprof + +class TestFakeVMProf(object): + + def test_sampling(self): + fake = FakeVMProf() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert fake.is_sampling_enabled + # + fake.stop_sampling() + fake.stop_sampling() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert not fake.is_sampling_enabled + fake.start_sampling() + assert fake.is_sampling_enabled + # + pytest.raises(AssertionError, "fake.start_sampling()") + + def test_check_status(self): + fake = FakeVMProf() + fake.stop_sampling() + pytest.raises(ValueError, "fake.check_status()") + + +class TestFixture(object): + + def test_fixture(self, fakevmprof): + assert isinstance(fakevmprof, FakeVMProf) + assert rvmprof._get_vmprof() is fakevmprof + # + # tweak sampling using the "real" API, and check that we actually used + # the fake + rvmprof.start_sampling() + assert fakevmprof.is_sampling_enabled + rvmprof.stop_sampling() + assert not fakevmprof.is_sampling_enabled From pypy.commits at gmail.com Thu Dec 21 08:54:00 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 21 Dec 2017 05:54:00 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: merge py3.5 Message-ID: <5a3bbcf8.99451c0a.2fb3c.25b2@mx.google.com> Author: fijal Branch: release-pypy3.5-v5.9.x Changeset: r93535:42207cd2265c Date: 2017-12-21 15:53 +0200 http://bitbucket.org/pypy/pypy/changeset/42207cd2265c/ Log: merge py3.5 diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt --- a/extra_tests/requirements.txt +++ b/extra_tests/requirements.txt @@ -1,2 +1,3 @@ pytest hypothesis +vmprof diff --git a/extra_tests/test_vmprof_greenlet.py b/extra_tests/test_vmprof_greenlet.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_vmprof_greenlet.py @@ -0,0 +1,28 @@ +import time +import pytest +import greenlet +vmprof = pytest.importorskip('vmprof') + +def count_samples(filename): + stats = vmprof.read_profile(filename) + return len(stats.profiles) + +def cpuburn(duration): + end = time.time() + duration + while time.time() < end: + pass + +def test_sampling_inside_callback(tmpdir): + # see also test_sampling_inside_callback inside + # pypy/module/_continuation/test/test_stacklet.py + # + G = greenlet.greenlet(cpuburn) + fname = tmpdir.join('log.vmprof') + with fname.open('w+b') as f: + vmprof.enable(f.fileno(), 1/250.0) + G.switch(0.1) + vmprof.disable() + + samples = count_samples(str(fname)) + # 0.1 seconds at 250Hz should be 25 samples + assert 23 < samples < 27 diff --git a/lib-python/2.7/subprocess.py b/lib-python/2.7/subprocess.py --- a/lib-python/2.7/subprocess.py +++ b/lib-python/2.7/subprocess.py @@ -1296,7 +1296,7 @@ 'copyfile' in caller.f_globals): dest_dir = sys.pypy_resolvedirof(target_executable) src_dir = sys.pypy_resolvedirof(sys.executable) - for libname in ['libpypy-c.so', 'libpypy-c.dylib']: + for libname in ['libpypy-c.so', 'libpypy-c.dylib', 'libpypy-c.dll']: dest_library = os.path.join(dest_dir, libname) src_library = os.path.join(src_dir, libname) if os.path.exists(src_library): diff --git a/lib-python/3/ssl.py b/lib-python/3/ssl.py --- a/lib-python/3/ssl.py +++ b/lib-python/3/ssl.py @@ -140,23 +140,6 @@ except NameError: _SSLv2_IF_EXISTS = None - - - -import os -class DirEntry: - def __init__(self, path, name): - self.path = os.path.join(path, name) - self.name = name - def is_dir(self): - return os.path.isdir(self.path) -def myscandir(path='.'): - for name in os.listdir(path): - yield DirEntry(path, name) -os.scandir = myscandir - - - if sys.platform == "win32": from _ssl import enum_certificates, enum_crls diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-v5.10.0.rst release-v5.9.0.rst release-v5.8.0.rst release-v5.7.1.rst diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-v5.10.0.rst @@ -0,0 +1,94 @@ +====================================== +PyPy2.7 and PyPy3.5 v5.10 dual release +====================================== + +The PyPy team is proud to release both PyPy2.7 v5.10 (an interpreter supporting +Python 2.7 syntax), and a final PyPy3.5 v5.10 (an interpreter for Python +3.5 syntax). The two releases are both based on much the same codebase, thus +the dual release. + +This release is an incremental release with very few new features, the main +feature being the final PyPy3.5 release that works on linux and OS X with beta +windows support. It also includes fixes for `vmprof`_ cooperation with greenlets. + +Compared to 5.9, the 5.10 release contains mostly bugfixes and small improvements. +We have in the pipeline big new features coming for PyPy 6.0 that did not make +the release cut and should be available within the next couple months. + +As always, this release is 100% compatible with the previous one and fixed +several issues and bugs raised by the growing community of PyPy users. +As always, we strongly recommend updating. + +This release concludes the Mozilla Open Source `grant`_ for having a compatible +PyPy 3.5 release and we're very grateful for that. Of course, we will continue +to improve PyPy 3.5 and probably move to 3.6 during the course of 2018. + +You can download the v5.10 releases here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. + +We would also like to thank our contributors and +encourage new people to join the project. PyPy has many +layers and we need help with all of them: `PyPy`_ and `RPython`_ documentation +improvements, tweaking popular `modules`_ to run on pypy, or general `help`_ +with making RPython's JIT even better. + +.. _vmprof: http://vmprof.readthedocs.io +.. _grant: https://morepypy.blogspot.com/2016/08/pypy-gets-funding-from-mozilla-for.html +.. _`PyPy`: index.html +.. _`RPython`: https://rpython.readthedocs.org +.. _`modules`: project-ideas.html#make-more-python-modules-pypy-friendly +.. _`help`: project-ideas.html + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7 and CPython 3.5. It's fast (`PyPy and CPython 2.7.x`_ performance comparison) +due to its integrated tracing JIT compiler. + +We also welcome developers of other `dynamic languages`_ to see what RPython +can do for them. + +The PyPy 2.7 release supports: + + * **x86** machines on most common operating systems + (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) + + * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux, + + * big- and little-endian variants of **PPC64** running Linux, + + * **s390x** running Linux + +.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org +.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + +Changelog +========= + +* improve ssl handling on windows for pypy3 (makes pip work) +* improve unicode handling in various error reporters +* fix vmprof cooperation with greenlets +* fix some things in cpyext +* test and document the cmp(nan, nan) == 0 behaviour +* don't crash when calling sleep with inf or nan +* fix bugs in _io module +* inspect.isbuiltin() now returns True for functions implemented in C +* allow the sequences future-import, docstring, future-import for CPython bug compatibility +* Issue #2699: non-ascii messages in warnings +* posix.lockf +* fixes for FreeBSD platform +* add .debug files, so builds contain debugging info, instead of being stripped +* improvements to cppyy +* issue #2677 copy pure c PyBuffer_{From,To}Contiguous from cpython +* issue #2682, split firstword on any whitespace in sqlite3 +* ctypes: allow ptr[0] = foo when ptr is a pointer to struct +* matplotlib will work with tkagg backend once `matplotlib pr #9356`_ is merged +* improvements to utf32 surrogate handling +* cffi version bump to 1.11.2 + +.. _`matplotlib pr #9356`: https://github.com/matplotlib/matplotlib/pull/9356 diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -2,3 +2,41 @@ What's new in PyPy3 6.0 =========================== +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +.. branch: fix-vmprof-stacklet-switch-2 +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch: rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. + diff --git a/pypy/doc/whatsnew-pypy3-5.10.0.rst b/pypy/doc/whatsnew-pypy3-5.10.0.rst --- a/pypy/doc/whatsnew-pypy3-5.10.0.rst +++ b/pypy/doc/whatsnew-pypy3-5.10.0.rst @@ -31,7 +31,7 @@ Upgrade the _vmprof backend to vmprof 0.4.10 .. branch: fix-vmprof-stacklet-switch - +.. branch: fix-vmprof-stacklet-switch-2 Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) .. branch: win32-vcvars @@ -39,3 +39,4 @@ .. branch: rdict-fast-hash Make it possible to declare that the hash function of an r_dict is fast in RPython. + diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -1,5 +1,6 @@ from rpython.rlib.rstacklet import StackletThread from rpython.rlib import jit +from rpython.rlib import rvmprof from pypy.interpreter.error import OperationError, get_cleared_operation_error from pypy.interpreter.error import oefmt from pypy.interpreter.executioncontext import ExecutionContext @@ -241,12 +242,15 @@ self.h = h global_state.clear() try: + rvmprof.start_sampling() frame = self.bottomframe w_result = frame.execute_frame() except Exception as e: global_state.propagate_exception = e else: global_state.w_value = w_result + finally: + rvmprof.stop_sampling() self.sthread.ec.topframeref = jit.vref_None global_state.origin = self global_state.destination = self diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -1,7 +1,10 @@ +import pytest import os +from rpython.rlib.rvmprof.test.support import fakevmprof +from pypy.interpreter.gateway import interp2app from pypy.module._continuation.test.support import BaseAppTest - + at pytest.mark.usefixtures('app_fakevmprof') class AppTestStacklet(BaseAppTest): def setup_class(cls): BaseAppTest.setup_class.im_func(cls) @@ -34,6 +37,33 @@ return res return stack """) + cls.w_appdirect = cls.space.wrap(cls.runappdirect) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) + + + @pytest.fixture + def app_fakevmprof(self, fakevmprof): + """ + This is automaticaly re-initialized for every method: thanks to + fakevmprof's finalizer, it checks that we called {start,stop}_sampling + the in pairs + """ + w = self.space.wrap + i2a = interp2app + def is_sampling_enabled(space): + return space.wrap(fakevmprof.is_sampling_enabled) + self.w_is_sampling_enabled = w(i2a(is_sampling_enabled)) + # + def start_sampling(space): + fakevmprof.start_sampling() + self.w_start_sampling = w(i2a(start_sampling)) + # + def stop_sampling(space): + fakevmprof.stop_sampling() + self.w_stop_sampling = w(i2a(stop_sampling)) + def test_new_empty(self): from _continuation import continulet @@ -797,3 +827,25 @@ bd50 = continulet(f) main.switch(to=bd50) print(999) + + def test_sampling_inside_callback(self): + if self.appdirect: + # see also + # extra_tests.test_vmprof_greenlet.test_sampling_inside_callback + # for a "translated" version of this test + skip("we can't run this until we have _vmprof.is_sampling_enabled") + from _continuation import continulet + # + def my_callback(c1): + assert self.is_sampling_enabled() + return 42 + # + try: + self.start_sampling() + assert self.is_sampling_enabled() + c = continulet(my_callback) + res = c.switch() + assert res == 42 + assert self.is_sampling_enabled() + finally: + self.stop_sampling() diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -1,4 +1,5 @@ import py +import pytest try: import _continuation except ImportError: @@ -101,11 +102,7 @@ particular, we need to ensure that vmprof does not sample the stack in the middle of a switch, else we read nonsense. """ - try: - import _vmprof - except ImportError: - py.test.skip("no _vmprof") - # + _vmprof = pytest.importorskip('_vmprof') def switch_forever(c): while True: c.switch() diff --git a/pypy/module/errno/__init__.py b/pypy/module/errno/__init__.py --- a/pypy/module/errno/__init__.py +++ b/pypy/module/errno/__init__.py @@ -1,6 +1,5 @@ -# Package initialisation from pypy.interpreter.mixedmodule import MixedModule -import errno +from pypy.module.errno.interp_errno import name2code class Module(MixedModule): """This module makes available standard errno system symbols. @@ -18,9 +17,7 @@ appleveldefs = {} interpleveldefs = {"errorcode": "interp_errno.get_errorcode(space)"} - -for name in dir(errno): - if name.startswith('__') or name in Module.interpleveldefs: - continue - Module.interpleveldefs[name] = ("space.newint(%s)" % - (getattr(errno, name), )) + +for name, code in name2code.iteritems(): + if code is not None: + Module.interpleveldefs[name] = ("space.newint(%s)" % code) diff --git a/pypy/module/errno/interp_errno.py b/pypy/module/errno/interp_errno.py --- a/pypy/module/errno/interp_errno.py +++ b/pypy/module/errno/interp_errno.py @@ -1,7 +1,95 @@ -import errno from rpython.rlib.objectmodel import not_rpython +from rpython.rtyper.tool.rffi_platform import DefinedConstantInteger, configure +from rpython.translator.tool.cbuild import ExternalCompilationInfo + +# from CPython 3.5 +errors = [ + "ENODEV", "ENOCSI", "EHOSTUNREACH", "ENOMSG", "EUCLEAN", "EL2NSYNC", + "EL2HLT", "ENODATA", "ENOTBLK", "ENOSYS", "EPIPE", "EINVAL", "EOVERFLOW", + "EADV", "EINTR", "EUSERS", "ENOTEMPTY", "ENOBUFS", "EPROTO", "EREMOTE", + "ENAVAIL", "ECHILD", "ELOOP", "EXDEV", "E2BIG", "ESRCH", "EMSGSIZE", + "EAFNOSUPPORT", "EBADR", "EHOSTDOWN", "EPFNOSUPPORT", "ENOPROTOOPT", + "EBUSY", "EWOULDBLOCK", "EBADFD", "EDOTDOT", "EISCONN", "ENOANO", + "ESHUTDOWN", "ECHRNG", "ELIBBAD", "ENONET", "EBADE", "EBADF", "EMULTIHOP", + "EIO", "EUNATCH", "EPROTOTYPE", "ENOSPC", "ENOEXEC", "EALREADY", + "ENETDOWN", "ENOTNAM", "EACCES", "ELNRNG", "EILSEQ", "ENOTDIR", "ENOTUNIQ", + "EPERM", "EDOM", "EXFULL", "ECONNREFUSED", "EISDIR", "EPROTONOSUPPORT", + "EROFS", "EADDRNOTAVAIL", "EIDRM", "ECOMM", "ESRMNT", "EREMOTEIO", + "EL3RST", "EBADMSG", "ENFILE", "ELIBMAX", "ESPIPE", "ENOLINK", "ENETRESET", + "ETIMEDOUT", "ENOENT", "EEXIST", "EDQUOT", "ENOSTR", "EBADSLT", "EBADRQC", + "ELIBACC", "EFAULT", "EFBIG", "EDEADLK", "ENOTCONN", "EDESTADDRREQ", + "ELIBSCN", "ENOLCK", "EISNAM", "ECONNABORTED", "ENETUNREACH", "ESTALE", + "ENOSR", "ENOMEM", "ENOTSOCK", "ESTRPIPE", "EMLINK", "ERANGE", "ELIBEXEC", + "EL3HLT", "ECONNRESET", "EADDRINUSE", "EOPNOTSUPP", "EREMCHG", "EAGAIN", + "ENAMETOOLONG", "ENOTTY", "ERESTART", "ESOCKTNOSUPPORT", "ETIME", "EBFONT", + "EDEADLOCK", "ETOOMANYREFS", "EMFILE", "ETXTBSY", "EINPROGRESS", "ENXIO", + "ENOPKG",] + +win_errors = [ + "WSASY", "WSAEHOSTDOWN", "WSAENETDOWN", "WSAENOTSOCK", "WSAEHOSTUNREACH", + "WSAELOOP", "WSAEMFILE", "WSAESTALE", "WSAVERNOTSUPPORTED", + "WSAENETUNREACH", "WSAEPROCLIM", "WSAEFAULT", "WSANOTINITIALISED", + "WSAEUSERS", "WSAMAKEASYNCREPL", "WSAENOPROTOOPT", "WSAECONNABORTED", + "WSAENAMETOOLONG", "WSAENOTEMPTY", "WSAESHUTDOWN", "WSAEAFNOSUPPORT", + "WSAETOOMANYREFS", "WSAEACCES", "WSATR", "WSABASEERR", "WSADESCRIPTIO", + "WSAEMSGSIZE", "WSAEBADF", "WSAECONNRESET", "WSAGETSELECTERRO", + "WSAETIMEDOUT", "WSAENOBUFS", "WSAEDISCON", "WSAEINTR", "WSAEPROTOTYPE", + "WSAHOS", "WSAEADDRINUSE", "WSAEADDRNOTAVAIL", "WSAEALREADY", + "WSAEPROTONOSUPPORT", "WSASYSNOTREADY", "WSAEWOULDBLOCK", + "WSAEPFNOSUPPORT", "WSAEOPNOTSUPP", "WSAEISCONN", "WSAENOTCONN", + "WSAEREMOTE", "WSAEINVAL", "WSAEINPROGRESS", "WSAGETSELECTEVEN", + "WSAESOCKTNOSUPPORT", "WSAGETASYNCERRO", "WSAMAKESELECTREPL", + "WSAGETASYNCBUFLE", "WSAEDESTADDRREQ", "WSAECONNREFUSED", "WSAENETRESET", + "WSAN",] + +more_errors = [ + "ENOMEDIUM", "EMEDIUMTYPE", "ECANCELED", "ENOKEY", "EKEYEXPIRED", + "EKEYREVOKED", "EKEYREJECTED", "EOWNERDEAD", "ENOTRECOVERABLE", "ERFKILL", + + # Solaris-specific errnos + "ECANCELED", "ENOTSUP", "EOWNERDEAD", "ENOTRECOVERABLE", "ELOCKUNMAPPED", + "ENOTACTIVE", + + # MacOSX specific errnos + "EAUTH", "EBADARCH", "EBADEXEC", "EBADMACHO", "EBADRPC", "EDEVERR", + "EFTYPE", "ENEEDAUTH", "ENOATTR", "ENOPOLICY", "EPROCLIM", "EPROCUNAVAIL", + "EPROGMISMATCH", "EPROGUNAVAIL", "EPWROFF", "ERPCMISMATCH", "ESHLIBVERS"] + + + +class CConfig: + _compilation_info_ = ExternalCompilationInfo(includes=['sys/errno.h']) + +for err_name in errors + win_errors + more_errors: + setattr(CConfig, err_name, DefinedConstantInteger(err_name)) +config = configure(CConfig) + +errorcode = {} +name2code = {} +for err_name in errors: + # Note: later names take precedence over earlier ones, if they have the + # same value + code = config[err_name] + if code is not None: + errorcode[code] = err_name + name2code[err_name] = code +for name in win_errors: + assert name.startswith('WSA') + code = config[name] + if code is not None: + if name[3:] in errors: + # errno.EFOO = + name2code[name[3:]] = code + # errno.WSABAR = + name2code[name] = code + errorcode[code] = name + +for err_name in more_errors: + code = config[err_name] + if code is not None: + errorcode[code] = err_name + name2code[err_name] = code @not_rpython def get_errorcode(space): - return space.wrap(errno.errorcode) # initializiation time - + return space.wrap(errorcode) # initialization time diff --git a/pypy/module/errno/test/test_errno.py b/pypy/module/errno/test/test_errno.py --- a/pypy/module/errno/test/test_errno.py +++ b/pypy/module/errno/test/test_errno.py @@ -3,7 +3,7 @@ class AppTestErrno: spaceconfig = dict(usemodules=['errno']) - def setup_class(cls): + def setup_class(cls): cls.w_errno = cls.space.appexec([], "(): import errno ; return errno") cls.w_errorcode = cls.space.wrap(errno.errorcode) @@ -11,28 +11,11 @@ assert not hasattr(self.errno, '__file__') def test_constants(self): - host_errorcode = self.errorcode.copy() - # On some systems, ENOTSUP is an alias to EOPNOTSUPP. Adjust the - # host_errorcode dictionary in case the host interpreter has slightly - # different errorcodes than the interpreter under test - if ('ENOTSUP' not in host_errorcode.values() and - 'ENOTSUP' in self.errno.errorcode.values()): - host_errorcode[self.errno.ENOTSUP] = 'ENOTSUP' - if ('EOPNOTSUPP' not in host_errorcode.values() and - 'EOPNOTSUPP' in self.errno.errorcode.values()): - host_errorcode[self.errno.EOPNOTSUPP] = 'EOPNOTSUPP' - for code, name in host_errorcode.items(): + # Assumes that our constants are a superset of the host's + for code, name in self.errorcode.items(): assert getattr(self.errno, name) == code def test_errorcode(self): - host_errorcode = self.errorcode.copy() - # On some systems, ENOTSUP is an alias to EOPNOTSUPP. Adjust the - # host_errorcode dictionary in case the host interpreter has slightly - # different errorcodes than the interpreter under test - if ('ENOTSUP' not in host_errorcode.values() and - 'ENOTSUP' in self.errno.errorcode.values()): - host_errorcode[self.errno.ENOTSUP] = 'ENOTSUP' - if ('EOPNOTSUPP' not in host_errorcode.values() and - 'EOPNOTSUPP' in self.errno.errorcode.values()): - host_errorcode[self.errno.EOPNOTSUPP] = 'EOPNOTSUPP' - assert host_errorcode == self.errno.errorcode + # Assumes that our codes are a superset of the host's + for value, name in self.errorcode.items(): + assert self.errno.errorcode[value] == name diff --git a/pypy/module/posix/test/test_scandir.py b/pypy/module/posix/test/test_scandir.py --- a/pypy/module/posix/test/test_scandir.py +++ b/pypy/module/posix/test/test_scandir.py @@ -98,8 +98,7 @@ assert d.stat().st_mode & 0o170000 == 0o100000 # S_IFREG assert d.stat().st_size == 0 - @py.test.mark.skipif(sys.platform == "win32", - reason="no symlink support so far") + @py.test.mark.skipif(sys.platform == "win32", reason="no symlink support so far") def test_stat4(self): posix = self.posix d = next(posix.scandir(self.dir4)) @@ -129,8 +128,7 @@ assert not d.is_file(follow_symlinks=False) assert d.is_dir(follow_symlinks=False) - @py.test.mark.skipif(sys.platform == "win32", - reason="no symlink support so far") + @py.test.mark.skipif(sys.platform == "win32", reason="no symlink support so far") def test_dir3(self): posix = self.posix d = next(posix.scandir(self.dir3)) @@ -141,8 +139,7 @@ assert d.is_file(follow_symlinks=True) assert not d.is_file(follow_symlinks=False) - @py.test.mark.skipif(sys.platform == "win32", - reason="no symlink support so far") + @py.test.mark.skipif(sys.platform == "win32", reason="no symlink support so far") def test_dir4(self): posix = self.posix d = next(posix.scandir(self.dir4)) @@ -153,8 +150,7 @@ assert d.is_dir(follow_symlinks=True) assert not d.is_dir(follow_symlinks=False) - @py.test.mark.skipif(sys.platform == "win32", - reason="no symlink support so far") + @py.test.mark.skipif(sys.platform == "win32", reason="no symlink support so far") def test_dir5(self): posix = self.posix d = next(posix.scandir(self.dir5)) @@ -164,8 +160,7 @@ assert d.is_symlink() raises(OSError, d.stat) - @py.test.mark.skipif(sys.platform == "win32", - reason="no symlink support so far") + @py.test.mark.skipif(sys.platform == "win32", reason="no symlink support so far") def test_dir6(self): posix = self.posix d = next(posix.scandir(self.dir6)) diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,7 +3,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory -from rpython.rlib.rvmprof import cintf +from rpython.rlib import rvmprof DEBUG = False @@ -25,12 +25,12 @@ def new(self, callback, arg=llmemory.NULL): if DEBUG: callback = _debug_wrapper(callback) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: - cintf.empty_rvmprof_stack() + rvmprof.empty_stack() h = self._gcrootfinder.new(self, callback, arg) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h @@ -40,11 +40,11 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: h = self._gcrootfinder.switch(stacklet) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -56,10 +56,27 @@ return None def stop_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling - fd = vmprof_stop_sampling() - return rffi.cast(lltype.Signed, fd) + return _get_vmprof().stop_sampling() def start_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_start_sampling - vmprof_start_sampling() + return _get_vmprof().start_sampling() + +# ---------------- +# stacklet support +# ---------------- +# +# Ideally, vmprof_tl_stack, VMPROFSTACK etc. should be part of "self.cintf": +# not sure why they are a global. Eventually, we should probably fix all this +# mess. +from rpython.rlib.rvmprof.cintf import vmprof_tl_stack, VMPROFSTACK + +def save_stack(): + stop_sampling() + return vmprof_tl_stack.get_or_make_raw() + +def empty_stack(): + vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) + +def restore_stack(x): + vmprof_tl_stack.setraw(x) + start_sampling() diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -10,71 +10,82 @@ from rpython.rlib import rthread, jit from rpython.rlib.objectmodel import we_are_translated from rpython.config.translationoption import get_translation_config +from rpython.jit.backend import detect_cpu class VMProfPlatformUnsupported(Exception): pass +# vmprof works only on x86 for now +IS_SUPPORTED = detect_cpu.autodetect().startswith('x86') +if sys.platform == 'win32': + IS_SUPPORTED = False + ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') SRC = ROOT.join('src') SHARED = SRC.join('shared') BACKTRACE = SHARED.join('libbacktrace') -compile_extra = ['-DRPYTHON_VMPROF'] -separate_module_files = [ - SHARED.join('symboltable.c'), - SHARED.join('vmprof_unix.c') -] -if sys.platform.startswith('linux'): - separate_module_files += [ - BACKTRACE.join('atomic.c'), - BACKTRACE.join('backtrace.c'), - BACKTRACE.join('state.c'), - BACKTRACE.join('elf.c'), - BACKTRACE.join('dwarf.c'), - BACKTRACE.join('fileline.c'), - BACKTRACE.join('mmap.c'), - BACKTRACE.join('mmapio.c'), - BACKTRACE.join('posix.c'), - BACKTRACE.join('sort.c'), +def make_eci(): + if make_eci.called: + raise ValueError("make_eci() should be called at most once") + # + compile_extra = ['-DRPYTHON_VMPROF'] + separate_module_files = [ + SHARED.join('symboltable.c'), + SHARED.join('vmprof_unix.c') ] - _libs = ['dl'] - compile_extra += ['-DVMPROF_UNIX'] - compile_extra += ['-DVMPROF_LINUX'] -elif sys.platform == 'win32': - compile_extra += ['-DVMPROF_WINDOWS'] - separate_module_files = [SHARED.join('vmprof_win.c')] - _libs = [] -else: - # Guessing a BSD-like Unix platform - compile_extra += ['-DVMPROF_UNIX'] - compile_extra += ['-DVMPROF_MAC'] - if sys.platform.startswith('freebsd'): - _libs = ['unwind'] + if sys.platform.startswith('linux'): + separate_module_files += [ + BACKTRACE.join('atomic.c'), + BACKTRACE.join('backtrace.c'), + BACKTRACE.join('state.c'), + BACKTRACE.join('elf.c'), + BACKTRACE.join('dwarf.c'), + BACKTRACE.join('fileline.c'), + BACKTRACE.join('mmap.c'), + BACKTRACE.join('mmapio.c'), + BACKTRACE.join('posix.c'), + BACKTRACE.join('sort.c'), + ] + _libs = ['dl'] + compile_extra += ['-DVMPROF_UNIX'] + compile_extra += ['-DVMPROF_LINUX'] + elif sys.platform == 'win32': + compile_extra += ['-DVMPROF_WINDOWS'] + separate_module_files = [SHARED.join('vmprof_win.c')] + _libs = [] else: - _libs = [] + # Guessing a BSD-like Unix platform + compile_extra += ['-DVMPROF_UNIX'] + compile_extra += ['-DVMPROF_MAC'] + if sys.platform.startswith('freebsd'): + _libs = ['unwind'] + else: + _libs = [] - -eci_kwds = dict( - include_dirs = [SRC, SHARED, BACKTRACE], - includes = ['rvmprof.h','vmprof_stack.h'], - libraries = _libs, - separate_module_files = [ - SRC.join('rvmprof.c'), - SHARED.join('compat.c'), - SHARED.join('machine.c'), - SHARED.join('vmp_stack.c'), - SHARED.join('vmprof_memory.c'), - SHARED.join('vmprof_common.c'), - # symbol table already in separate_module_files - ] + separate_module_files, - post_include_bits=[], - compile_extra=compile_extra - ) -if sys.platform != 'win32': - eci_kwds['separate_module_files'].append( - SHARED.join('vmprof_mt.c'), - ) -global_eci = ExternalCompilationInfo(**eci_kwds) + eci_kwds = dict( + include_dirs = [SRC, SHARED, BACKTRACE], + includes = ['rvmprof.h','vmprof_stack.h'], + libraries = _libs, + separate_module_files = [ + SRC.join('rvmprof.c'), + SHARED.join('compat.c'), + SHARED.join('machine.c'), + SHARED.join('vmp_stack.c'), + SHARED.join('vmprof_memory.c'), + SHARED.join('vmprof_common.c'), + # symbol table already in separate_module_files + ] + separate_module_files, + post_include_bits=[], + compile_extra=compile_extra + ) + if sys.platform != 'win32': + eci_kwds['separate_module_files'].append( + SHARED.join('vmprof_mt.c'), + ) + make_eci.called = True + return ExternalCompilationInfo(**eci_kwds), eci_kwds +make_eci.called = False def configure_libbacktrace_linux(): bits = 32 if sys.maxsize == 2**31-1 else 64 @@ -85,14 +96,17 @@ shutil.copy(str(BACKTRACE.join(specific_config)), str(config)) def setup(): + if not IS_SUPPORTED: + raise VMProfPlatformUnsupported + if sys.platform.startswith('linux'): configure_libbacktrace_linux() + eci, eci_kwds = make_eci() eci_kwds['compile_extra'].append('-DRPYTHON_LL2CTYPES') platform.verify_eci(ExternalCompilationInfo( **eci_kwds)) - eci = global_eci vmprof_init = rffi.llexternal("vmprof_init", [rffi.INT, rffi.DOUBLE, rffi.INT, rffi.INT, rffi.CCHARP, rffi.INT, rffi.INT], @@ -122,32 +136,16 @@ lltype.Signed, compilation_info=eci, _nowrapper=True) + vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], + rffi.INT, compilation_info=eci, + _nowrapper=True) + vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], + lltype.Void, compilation_info=eci, + _nowrapper=True) + return CInterface(locals()) -# this is always present, but compiles to no-op if RPYTHON_VMPROF is not -# defined (i.e. if we don't actually use vmprof in the generated C) -auto_eci = ExternalCompilationInfo(post_include_bits=[""" -#ifndef RPYTHON_VMPROF -# define vmprof_stop_sampling() (-1) -# define vmprof_start_sampling() ((void)0) -#endif -"""]) - -if get_translation_config() is None: - # tests need the full eci here - _eci = global_eci -else: - _eci = auto_eci - -vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=_eci, - _nowrapper=True) -vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=_eci, - _nowrapper=True) - - class CInterface(object): def __init__(self, namespace): for k, v in namespace.iteritems(): @@ -232,20 +230,6 @@ leave_code(s) # -# stacklet support - -def save_rvmprof_stack(): - vmprof_stop_sampling() - return vmprof_tl_stack.get_or_make_raw() - -def empty_rvmprof_stack(): - vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) - -def restore_rvmprof_stack(x): - vmprof_tl_stack.setraw(x) - vmprof_start_sampling() - -# # traceback support def get_rvmprof_stack(): diff --git a/rpython/rlib/rvmprof/dummy.py b/rpython/rlib/rvmprof/dummy.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/dummy.py @@ -0,0 +1,26 @@ +from rpython.rlib.objectmodel import specialize + +class DummyVMProf(object): + + def __init__(self): + self._unique_id = 0 + + def register_code_object_class(self, CodeClass, full_name_func): + CodeClass._vmprof_unique_id = self._unique_id + self._unique_id += 1 + + @specialize.argtype(1) + def register_code(self, code, full_name_func): + pass + + def enable(self, fileno, interval, memory=0, native=0, real_time=0): + pass + + def disable(self): + pass + + def start_sampling(self): + pass + + def stop_sampling(self): + pass diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -2,6 +2,7 @@ from rpython.rlib.objectmodel import specialize, we_are_translated, not_rpython from rpython.rlib import jit, rposix, rgc from rpython.rlib.rvmprof import cintf +from rpython.rlib.rvmprof.dummy import DummyVMProf from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -34,6 +35,9 @@ return [] class VMProf(object): + """ + NOTE: the API of this class should be kept in sync with dummy.DummyVMProf + """ _immutable_fields_ = ['is_enabled?'] @@ -168,6 +172,21 @@ if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0: raise VMProfError("vmprof buffers full! disk full or too slow") + def stop_sampling(self): + """ + Temporarily stop the sampling of stack frames. Signals are still + delivered, but are ignored. + """ + fd = self.cintf.vmprof_stop_sampling() + return rffi.cast(lltype.Signed, fd) + + def start_sampling(self): + """ + Undo the effect of stop_sampling + """ + self.cintf.vmprof_start_sampling() + + def vmprof_execute_code(name, get_code_fn, result_class=None, _hack_update_stack_untranslated=False): """Decorator to be used on the function that interprets a code object. @@ -240,5 +259,8 @@ def _get_vmprof(): global _vmprof_instance if _vmprof_instance is None: - _vmprof_instance = VMProf() + try: + _vmprof_instance = VMProf() + except cintf.VMProfPlatformUnsupported: + _vmprof_instance = DummyVMProf() return _vmprof_instance diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/support.py @@ -0,0 +1,45 @@ +import pytest +from rpython.rlib import rvmprof + +class FakeVMProf(object): + + def __init__(self): + self._enabled = False + self._ignore_signals = 1 + + # --- VMProf official API --- + # add fake methods as needed by the tests + + def stop_sampling(self): + self._ignore_signals += 1 + + def start_sampling(self): + assert self._ignore_signals > 0, ('calling start_sampling() without ' + 'the corresponding stop_sampling()?') + self._ignore_signals -= 1 + + # --- FakeVMProf specific API --- + # this API is not part of rvmprof, but available only inside tests using + # fakevmprof + + @property + def is_sampling_enabled(self): + return self._ignore_signals == 0 + + def check_status(self): + """ + To be called during test teardown + """ + if self._ignore_signals != 1: + msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, ' + 'got %d. This probably means that you called ' + '{start,stop}_sampling() a wrong number of times') + raise ValueError, msg % self._ignore_signals + + + at pytest.fixture +def fakevmprof(request, monkeypatch): + fake = FakeVMProf() + monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake) + request.addfinalizer(fake.check_status) + return fake diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/test_support.py @@ -0,0 +1,42 @@ +import pytest +from rpython.rlib import rvmprof +from rpython.rlib.rvmprof.test.support import FakeVMProf, fakevmprof + +class TestFakeVMProf(object): + + def test_sampling(self): + fake = FakeVMProf() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert fake.is_sampling_enabled + # + fake.stop_sampling() + fake.stop_sampling() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert not fake.is_sampling_enabled + fake.start_sampling() + assert fake.is_sampling_enabled + # + pytest.raises(AssertionError, "fake.start_sampling()") + + def test_check_status(self): + fake = FakeVMProf() + fake.stop_sampling() + pytest.raises(ValueError, "fake.check_status()") + + +class TestFixture(object): + + def test_fixture(self, fakevmprof): + assert isinstance(fakevmprof, FakeVMProf) + assert rvmprof._get_vmprof() is fakevmprof + # + # tweak sampling using the "real" API, and check that we actually used + # the fake + rvmprof.start_sampling() + assert fakevmprof.is_sampling_enabled + rvmprof.stop_sampling() + assert not fakevmprof.is_sampling_enabled From pypy.commits at gmail.com Thu Dec 21 13:06:33 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 21 Dec 2017 10:06:33 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix for MSVC compatibility Message-ID: <5a3bf829.46101c0a.cb049.456c@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93536:7dce5a70e8e7 Date: 2017-12-21 19:55 +0200 http://bitbucket.org/pypy/pypy/changeset/7dce5a70e8e7/ Log: fix for MSVC compatibility diff --git a/pypy/module/errno/interp_errno.py b/pypy/module/errno/interp_errno.py --- a/pypy/module/errno/interp_errno.py +++ b/pypy/module/errno/interp_errno.py @@ -58,7 +58,7 @@ class CConfig: - _compilation_info_ = ExternalCompilationInfo(includes=['sys/errno.h']) + _compilation_info_ = ExternalCompilationInfo(includes=['errno.h']) for err_name in errors + win_errors + more_errors: setattr(CConfig, err_name, DefinedConstantInteger(err_name)) From pypy.commits at gmail.com Thu Dec 21 13:06:40 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 21 Dec 2017 10:06:40 -0800 (PST) Subject: [pypy-commit] pypy default: increment version Message-ID: <5a3bf830.50b91c0a.a2558.806c@mx.google.com> Author: Matti Picus Branch: Changeset: r93539:6b024edd9d12 Date: 2017-12-21 20:00 +0200 http://bitbucket.org/pypy/pypy/changeset/6b024edd9d12/ Log: increment version diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -29,8 +29,8 @@ #define PY_VERSION "2.7.13" /* PyPy version as a string */ -#define PYPY_VERSION "5.10.0-alpha0" -#define PYPY_VERSION_NUM 0x050A0000 +#define PYPY_VERSION "5.11.0-alpha0" +#define PYPY_VERSION_NUM 0x050B0000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,7 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (5, 10, 0, "alpha", 0) #XXX # sync patchlevel.h +PYPY_VERSION = (5, 11, 0, "alpha", 0) #XXX # sync patchlevel.h import pypy From pypy.commits at gmail.com Thu Dec 21 13:06:36 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 21 Dec 2017 10:06:36 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: merge py3.5 into release Message-ID: <5a3bf82c.f4acdf0a.e88f3.be63@mx.google.com> Author: Matti Picus Branch: release-pypy3.5-v5.9.x Changeset: r93537:ad8e7f9311f5 Date: 2017-12-21 19:56 +0200 http://bitbucket.org/pypy/pypy/changeset/ad8e7f9311f5/ Log: merge py3.5 into release diff --git a/pypy/module/errno/interp_errno.py b/pypy/module/errno/interp_errno.py --- a/pypy/module/errno/interp_errno.py +++ b/pypy/module/errno/interp_errno.py @@ -58,7 +58,7 @@ class CConfig: - _compilation_info_ = ExternalCompilationInfo(includes=['sys/errno.h']) + _compilation_info_ = ExternalCompilationInfo(includes=['errno.h']) for err_name in errors + win_errors + more_errors: setattr(CConfig, err_name, DefinedConstantInteger(err_name)) From pypy.commits at gmail.com Thu Dec 21 13:06:37 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 21 Dec 2017 10:06:37 -0800 (PST) Subject: [pypy-commit] pypy release-pypy2.7-v5.9.x: make version final Message-ID: <5a3bf82d.919bdf0a.d135e.8caa@mx.google.com> Author: Matti Picus Branch: release-pypy2.7-v5.9.x Changeset: r93538:0e7ea4fe15e8 Date: 2017-12-21 20:00 +0200 http://bitbucket.org/pypy/pypy/changeset/0e7ea4fe15e8/ Log: make version final diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -29,7 +29,7 @@ #define PY_VERSION "2.7.13" /* PyPy version as a string */ -#define PYPY_VERSION "5.10.0-alpha0" +#define PYPY_VERSION "5.10.0" #define PYPY_VERSION_NUM 0x050A0000 /* Defined to mean a PyPy where cpyext holds more regular references diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,7 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (5, 10, 0, "alpha", 0) #XXX # sync patchlevel.h +PYPY_VERSION = (5, 10, 0, "final", 0) #XXX # sync patchlevel.h import pypy From pypy.commits at gmail.com Thu Dec 21 13:06:43 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 21 Dec 2017 10:06:43 -0800 (PST) Subject: [pypy-commit] pypy default: part two of restart whats-new Message-ID: <5a3bf833.94571c0a.6df4f.2b76@mx.google.com> Author: Matti Picus Branch: Changeset: r93541:58004bbab6dc Date: 2017-12-21 20:05 +0200 http://bitbucket.org/pypy/pypy/changeset/58004bbab6dc/ Log: part two of restart whats-new diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-head.rst @@ -0,0 +1,7 @@ +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.10.0 +.. startrev: 6b024edd9d12 + From pypy.commits at gmail.com Thu Dec 21 13:06:42 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 21 Dec 2017 10:06:42 -0800 (PST) Subject: [pypy-commit] pypy default: restart whats-new Message-ID: <5a3bf832.07c5df0a.1427f.34e4@mx.google.com> Author: Matti Picus Branch: Changeset: r93540:bc445709766c Date: 2017-12-21 20:05 +0200 http://bitbucket.org/pypy/pypy/changeset/bc445709766c/ Log: restart whats-new diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -7,6 +7,7 @@ .. toctree:: whatsnew-head.rst + whatsnew-pypy2-5.10.0.rst whatsnew-pypy2-5.9.0.rst whatsnew-pypy2-5.8.0.rst whatsnew-pypy2-5.7.0.rst diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-pypy2-5.10.0.rst rename from pypy/doc/whatsnew-head.rst rename to pypy/doc/whatsnew-pypy2-5.10.0.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-pypy2-5.10.0.rst @@ -1,6 +1,6 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== +========================== +What's new in PyPy2.7 5.10 +========================== .. this is a revision shortly after release-pypy2.7-v5.9.0 .. startrev:d56dadcef996 From pypy.commits at gmail.com Fri Dec 22 02:08:18 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 21 Dec 2017 23:08:18 -0800 (PST) Subject: [pypy-commit] pypy py3.5: I think this is how it was supposed to be, unclear how to test Message-ID: <5a3caf62.19a0df0a.7a9e.e6aa@mx.google.com> Author: fijal Branch: py3.5 Changeset: r93542:34c63fba0bba Date: 2017-12-22 09:07 +0200 http://bitbucket.org/pypy/pypy/changeset/34c63fba0bba/ Log: I think this is how it was supposed to be, unclear how to test diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -38,9 +38,6 @@ return stack """) cls.w_appdirect = cls.space.wrap(cls.runappdirect) - if cls.runappdirect: - # make sure that "self.stack" does not pass the self - cls.w_stack = staticmethod(cls.w_stack.im_func) @pytest.fixture From pypy.commits at gmail.com Fri Dec 22 02:09:09 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 21 Dec 2017 23:09:09 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: merge py3.5 Message-ID: <5a3caf95.e5b2df0a.10835.732e@mx.google.com> Author: fijal Branch: release-pypy3.5-v5.9.x Changeset: r93543:a91df6163fb7 Date: 2017-12-22 09:08 +0200 http://bitbucket.org/pypy/pypy/changeset/a91df6163fb7/ Log: merge py3.5 diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -38,9 +38,6 @@ return stack """) cls.w_appdirect = cls.space.wrap(cls.runappdirect) - if cls.runappdirect: - # make sure that "self.stack" does not pass the self - cls.w_stack = staticmethod(cls.w_stack.im_func) @pytest.fixture From pypy.commits at gmail.com Fri Dec 22 05:04:08 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 22 Dec 2017 02:04:08 -0800 (PST) Subject: [pypy-commit] pypy default: mention next milestones Message-ID: <5a3cd898.c4141c0a.5696d.1000@mx.google.com> Author: fijal Branch: Changeset: r93544:b5a861c01c71 Date: 2017-12-22 12:03 +0200 http://bitbucket.org/pypy/pypy/changeset/b5a861c01c71/ Log: mention next milestones diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst --- a/pypy/doc/release-v5.10.0.rst +++ b/pypy/doc/release-v5.10.0.rst @@ -19,6 +19,12 @@ several issues and bugs raised by the growing community of PyPy users. As always, we strongly recommend updating. +There are quite a few important changes that are in the pipeline that did not +make it into the 5.10 release. Most important are speed improvements to cpyext +(which will make numpy and pandas a bit faster) and utf8 branch that changes +internal representation of unicode to utf8, which should help especially the +Python 3.5 version of PyPy. + This release concludes the Mozilla Open Source `grant`_ for having a compatible PyPy 3.5 release and we're very grateful for that. Of course, we will continue to improve PyPy 3.5 and probably move to 3.6 during the course of 2018. @@ -53,7 +59,7 @@ We also welcome developers of other `dynamic languages`_ to see what RPython can do for them. -The PyPy 2.7 release supports: +The PyPy release supports: * **x86** machines on most common operating systems (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) From pypy.commits at gmail.com Fri Dec 22 05:10:14 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 22 Dec 2017 02:10:14 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: Added tag release-pypy3.5-5.10.0 for changeset a91df6163fb7 Message-ID: <5a3cda06.12711c0a.9d3fa.a6fb@mx.google.com> Author: fijal Branch: release-pypy3.5-v5.9.x Changeset: r93545:09f9160b643e Date: 2017-12-22 12:09 +0200 http://bitbucket.org/pypy/pypy/changeset/09f9160b643e/ Log: Added tag release-pypy3.5-5.10.0 for changeset a91df6163fb7 diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -44,3 +44,4 @@ d72f9800a42b46a8056951b1da2426d2c2d8d502 release-pypy3.5-v5.9.0 03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0 +a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-5.10.0 From pypy.commits at gmail.com Fri Dec 22 06:03:10 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 22 Dec 2017 03:03:10 -0800 (PST) Subject: [pypy-commit] pypy default: Added tag release-pypy2.7-v5.10.0 for changeset 0e7ea4fe15e8 Message-ID: <5a3ce66e.b7a0df0a.1d24f.e5a5@mx.google.com> Author: Matti Picus Branch: Changeset: r93546:8ac1cb1e43e5 Date: 2017-12-22 12:59 +0200 http://bitbucket.org/pypy/pypy/changeset/8ac1cb1e43e5/ Log: Added tag release-pypy2.7-v5.10.0 for changeset 0e7ea4fe15e8 diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -44,3 +44,4 @@ d72f9800a42b46a8056951b1da2426d2c2d8d502 release-pypy3.5-v5.9.0 03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0 +0e7ea4fe15e82d5124e805e2e4a37cae1a402d4b release-pypy2.7-v5.10.0 From pypy.commits at gmail.com Fri Dec 22 06:03:12 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 22 Dec 2017 03:03:12 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: Removed tag release-pypy3.5-5.10.0 Message-ID: <5a3ce670.87a9df0a.48e7.8af5@mx.google.com> Author: Matti Picus Branch: release-pypy3.5-v5.9.x Changeset: r93547:7a22aa3bd5bf Date: 2017-12-22 13:01 +0200 http://bitbucket.org/pypy/pypy/changeset/7a22aa3bd5bf/ Log: Removed tag release-pypy3.5-5.10.0 diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -45,3 +45,5 @@ 03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0 a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-5.10.0 +a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-5.10.0 +0000000000000000000000000000000000000000 release-pypy3.5-5.10.0 From pypy.commits at gmail.com Fri Dec 22 06:03:14 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 22 Dec 2017 03:03:14 -0800 (PST) Subject: [pypy-commit] pypy default: Added tag release-pypy3.5-v5.10.0 for changeset a91df6163fb7 Message-ID: <5a3ce672.90bf1c0a.a8530.0d8b@mx.google.com> Author: Matti Picus Branch: Changeset: r93548:fdf52f9fd458 Date: 2017-12-22 13:02 +0200 http://bitbucket.org/pypy/pypy/changeset/fdf52f9fd458/ Log: Added tag release-pypy3.5-v5.10.0 for changeset a91df6163fb7 diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -45,3 +45,4 @@ 03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0 0e7ea4fe15e82d5124e805e2e4a37cae1a402d4b release-pypy2.7-v5.10.0 +a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-v5.10.0 From pypy.commits at gmail.com Fri Dec 22 07:13:05 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 22 Dec 2017 04:13:05 -0800 (PST) Subject: [pypy-commit] pypy default: Make ctypes_tests independent of rpython Message-ID: <5a3cf6d1.e5b2df0a.10835.f43c@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93549:9d239f4dad8a Date: 2017-12-22 13:08 +0100 http://bitbucket.org/pypy/pypy/changeset/9d239f4dad8a/ Log: Make ctypes_tests independent of rpython diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c --- a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c +++ b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c @@ -3,9 +3,7 @@ #define MS_WIN32 #endif -#include "src/precommondefs.h" - -#define EXPORT(x) RPY_EXPORTED x +#define EXPORT(x) extern x #include #include @@ -272,7 +270,7 @@ { double x, sum=0.0, dx=(b-a)/(double)nstep; for(x=a+0.5*dx; (b-x)*(x-a)>0.0; x+=dx) - { + { double y = f(x); printf("f(x)=%.1f\n", y); sum += f(x); @@ -287,7 +285,7 @@ static void _xxx_init(void *(*Xalloc)(int), void (*Xfree)(void *)) { void *ptr; - + printf("_xxx_init got %p %p\n", Xalloc, Xfree); printf("calling\n"); ptr = Xalloc(32); @@ -438,7 +436,7 @@ #endif /********/ - + #ifndef MS_WIN32 typedef struct { diff --git a/pypy/module/test_lib_pypy/ctypes_tests/conftest.py b/pypy/module/test_lib_pypy/ctypes_tests/conftest.py --- a/pypy/module/test_lib_pypy/ctypes_tests/conftest.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/conftest.py @@ -1,14 +1,91 @@ -import py, pytest +import py +import pytest import sys +import os def pytest_ignore_collect(path): if '__pypy__' not in sys.builtin_module_names: return True +# XXX: copied from pypy/tool/cpyext/extbuild.py +if os.name != 'nt': + so_ext = 'so' +else: + so_ext = 'dll' + +def _build(cfilenames, outputfilename, compile_extra, link_extra, + include_dirs, libraries, library_dirs): + try: + # monkeypatch distutils for some versions of msvc compiler + import setuptools + except ImportError: + # XXX if this fails and is required, + # we must call pypy -mensurepip after translation + pass + from distutils.ccompiler import new_compiler + from distutils import sysconfig + + # XXX for Darwin running old versions of CPython 2.7.x + sysconfig.get_config_vars() + + compiler = new_compiler(force=1) + sysconfig.customize_compiler(compiler) # XXX + objects = [] + for cfile in cfilenames: + cfile = py.path.local(cfile) + old = cfile.dirpath().chdir() + try: + res = compiler.compile([cfile.basename], + include_dirs=include_dirs, extra_preargs=compile_extra) + assert len(res) == 1 + cobjfile = py.path.local(res[0]) + assert cobjfile.check() + objects.append(str(cobjfile)) + finally: + old.chdir() + + compiler.link_shared_object( + objects, str(outputfilename), + libraries=libraries, + extra_preargs=link_extra, + library_dirs=library_dirs) + +def c_compile(cfilenames, outputfilename, + compile_extra=None, link_extra=None, + include_dirs=None, libraries=None, library_dirs=None): + compile_extra = compile_extra or [] + link_extra = link_extra or [] + include_dirs = include_dirs or [] + libraries = libraries or [] + library_dirs = library_dirs or [] + if sys.platform == 'win32': + link_extra = link_extra + ['/DEBUG'] # generate .pdb file + if sys.platform == 'darwin': + # support Fink & Darwinports + for s in ('/sw/', '/opt/local/'): + if (s + 'include' not in include_dirs + and os.path.exists(s + 'include')): + include_dirs.append(s + 'include') + if s + 'lib' not in library_dirs and os.path.exists(s + 'lib'): + library_dirs.append(s + 'lib') + + outputfilename = py.path.local(outputfilename).new(ext=so_ext) + saved_environ = os.environ.copy() + try: + _build( + cfilenames, outputfilename, + compile_extra, link_extra, + include_dirs, libraries, library_dirs) + finally: + # workaround for a distutils bugs where some env vars can + # become longer and longer every time it is used + for key, value in saved_environ.items(): + if os.environ.get(key) != value: + os.environ[key] = value + return outputfilename +# end copy + def compile_so_file(): - from rpython.translator.platform import platform - from rpython.translator.tool.cbuild import ExternalCompilationInfo - from rpython.translator import cdir udir = pytest.ensuretemp('_ctypes_test') cfile = py.path.local(__file__).dirpath().join("_ctypes_test.c") @@ -16,11 +93,8 @@ libraries = ['oleaut32'] else: libraries = [] - eci = ExternalCompilationInfo(libraries=libraries, - include_dirs=[cdir]) - return platform.compile([cfile], eci, str(udir.join('_ctypes_test')), - standalone=False) + return c_compile([cfile], str(udir / '_ctypes_test'), libraries=libraries) # we need to run after the "tmpdir" plugin which installs pytest.ensuretemp @pytest.mark.trylast diff --git a/pypy/tool/cpyext/extbuild.py b/pypy/tool/cpyext/extbuild.py --- a/pypy/tool/cpyext/extbuild.py +++ b/pypy/tool/cpyext/extbuild.py @@ -199,7 +199,7 @@ # monkeypatch distutils for some versions of msvc compiler import setuptools except ImportError: - # XXX if this fails and is required, + # XXX if this fails and is required, # we must call pypy -mensurepip after translation pass from distutils.ccompiler import new_compiler From pypy.commits at gmail.com Fri Dec 22 09:25:23 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 22 Dec 2017 06:25:23 -0800 (PST) Subject: [pypy-commit] pypy default: Clean up tests and don't rely on magical injection of raises() into globals Message-ID: <5a3d15d3.4fabdf0a.b5a0.d197@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93550:edd970341375 Date: 2017-12-22 15:24 +0100 http://bitbucket.org/pypy/pypy/changeset/edd970341375/ Log: Clean up tests and don't rely on magical injection of raises() into globals diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py b/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py @@ -1,3 +1,4 @@ +import pytest from ctypes import * from support import BaseCTypesTestChecker @@ -22,18 +23,15 @@ def test_anon_nonseq(self): # TypeError: _anonymous_ must be a sequence - raises(TypeError, - lambda: type(Structure)("Name", - (Structure,), - {"_fields_": [], "_anonymous_": 42})) + with pytest.raises(TypeError): + type(Structure)( + "Name", (Structure,), {"_fields_": [], "_anonymous_": 42}) def test_anon_nonmember(self): # AttributeError: type object 'Name' has no attribute 'x' - raises(AttributeError, - lambda: type(Structure)("Name", - (Structure,), - {"_fields_": [], - "_anonymous_": ["x"]})) + with pytest.raises(AttributeError): + type(Structure)( + "Name", (Structure,), {"_fields_": [], "_anonymous_": ["x"]}) def test_nested(self): class ANON_S(Structure): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py b/pypy/module/test_lib_pypy/ctypes_tests/test_array.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_array.py @@ -1,5 +1,4 @@ - -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker @@ -40,7 +39,8 @@ assert values == [0] * len(init) # Too many in itializers should be caught - py.test.raises(IndexError, int_array, *range(alen*2)) + with pytest.raises(IndexError): + int_array(*range(alen*2)) CharArray = ARRAY(c_char, 3) @@ -48,7 +48,8 @@ # Should this work? It doesn't: # CharArray("abc") - py.test.raises(TypeError, CharArray, "abc") + with pytest.raises(TypeError): + CharArray("abc") assert ca[0] == "a" assert ca[1] == "b" @@ -61,10 +62,12 @@ # slicing is now supported, but not extended slicing (3-argument)! from operator import getslice, delitem - py.test.raises(TypeError, getslice, ca, 0, 1, -1) + with pytest.raises(TypeError): + getslice(ca, 0, 1, -1) # cannot delete items - py.test.raises(TypeError, delitem, ca, 0) + with pytest.raises(TypeError): + delitem(ca, 0) def test_numeric_arrays(self): @@ -165,7 +168,8 @@ assert isinstance(Car("abcdefghi", 42.0, "12345").brand, bytes) assert Car("abcdefghi", 42.0, "12345").brand == "abcdefghi" assert Car("abcdefghio", 42.0, "12345").brand == "abcdefghio" - raises(ValueError, Car, "abcdefghiop", 42.0, "12345") + with pytest.raises(ValueError): + Car("abcdefghiop", 42.0, "12345") A = Car._fields_[2][1] TP = POINTER(A) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py b/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py @@ -1,5 +1,5 @@ from ctypes import * -import py +import pytest from support import BaseCTypesTestChecker def setup_module(mod): @@ -104,7 +104,8 @@ # check that the prototype works: we call f with wrong # argument types cb = AnotherCallback(callback) - raises(ArgumentError, f, self.wrap(-10), self.wrap(cb)) + with pytest.raises(ArgumentError): + f(self.wrap(-10), self.wrap(cb)) def test_callbacks_2(self): # Can also use simple datatypes as argument type specifiers @@ -213,4 +214,4 @@ wrap = AsParamPropertyWrapper #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - + diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py b/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker import os @@ -245,7 +245,5 @@ def test_set_fields_cycle_fails(self): class A(Structure): pass - import pytest - pytest.raises(AttributeError, """ + with pytest.raises(AttributeError): A._fields_ = [("a", A)] - """) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py @@ -1,5 +1,5 @@ from ctypes import * -import py +import pytest from support import BaseCTypesTestChecker class TestCallbacks(BaseCTypesTestChecker): @@ -22,7 +22,7 @@ c_uint: (int, long), c_ulong: (int, long), } - + PROTO = self.functype.im_func(typ, typ) cfunc = PROTO(self.callback) result = cfunc(arg) @@ -101,15 +101,18 @@ ## self.check_type(c_char_p, "abc") ## self.check_type(c_char_p, "def") + + @pytest.mark.xfail( + reason="we are less strict about callback return type sanity") def test_unsupported_restype_1(self): - py.test.skip("we are less strict about callback return type sanity") # Only "fundamental" result types are supported for callback # functions, the type must have a non-NULL stgdict->setfunc. # POINTER(c_double), for example, is not supported. prototype = self.functype.im_func(POINTER(c_double)) # The type is checked when the prototype is called - raises(TypeError, prototype, lambda: None) + with pytest.raises(TypeError): + prototype(lambda: None) try: WINFUNCTYPE @@ -193,9 +196,10 @@ class RECT(Structure): _fields_ = [("left", c_int), ("top", c_int), ("right", c_int), ("bottom", c_int)] - + proto = CFUNCTYPE(RECT, c_int) - raises(TypeError, proto, lambda r: 0) + with pytest.raises(TypeError): + proto(lambda r: 0) def test_qsort(self): @@ -210,7 +214,7 @@ a[i] = 5-i assert a[0] == 5 # sanity - + def comp(a, b): a = a.contents.value b = b.contents.value @@ -273,4 +277,5 @@ FUNC = CFUNCTYPE(None, c_void_p) cfunc = FUNC(callback) param = c_uint(42) - py.test.raises(ArgumentError, "cfunc(param)") + with pytest.raises(ArgumentError): + cfunc(param) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_checkretval.py b/pypy/module/test_lib_pypy/ctypes_tests/test_checkretval.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_checkretval.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_checkretval.py @@ -1,4 +1,4 @@ -import py +import pytest import sys from ctypes import * @@ -33,6 +33,5 @@ pass else: def test_oledll(self): - raises(WindowsError, - oledll.oleaut32.CreateTypeLib2, - 0, 0, 0) + with pytest.raises(WindowsError): + oledll.oleaut32.CreateTypeLib2(0, 0, 0) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_funcptr.py b/pypy/module/test_lib_pypy/ctypes_tests/test_funcptr.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_funcptr.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_funcptr.py @@ -1,4 +1,4 @@ -import py +import pytest import sys, os, unittest from ctypes import * @@ -27,6 +27,8 @@ assert sizeof(x) == sizeof(c_voidp) assert sizeof(X) == sizeof(c_voidp) + @pytest.mark.xfail( + reason="cdecl funcptrs ignoring extra args is not implemented") def test_first(self): StdCallback = WINFUNCTYPE(c_int, c_int, c_int) CdeclCallback = CFUNCTYPE(c_int, c_int, c_int) @@ -42,14 +44,13 @@ # The following no longer raises a TypeError - it is now # possible, as in C, to call cdecl functions with more parameters. #self.assertRaises(TypeError, c, 1, 2, 3) - py.test.skip("cdecl funcptrs ignoring extra args is not implemented") assert c(1, 2, 3, 4, 5, 6) == 3 if not WINFUNCTYPE is CFUNCTYPE and os.name != "ce": - raises(TypeError, s, 1, 2, 3) + with pytest.raises(TypeError): + s(1, 2, 3) + @pytest.mark.skipif("sys.platform != 'win32'") def test_structures(self): - if sys.platform != 'win32': - py.test.skip("win32 related") WNDPROC = WINFUNCTYPE(c_long, c_int, c_int, c_int, c_int) def wndproc(hwnd, msg, wParam, lParam): @@ -130,9 +131,10 @@ assert strtok(None, "\n") == "c" assert strtok(None, "\n") == None + @pytest.mark.xfail( + reason="This test needs mmap to make sure the code is executable, " + "please rewrite me") def test_from_address(self): - py.test.skip("This test needs mmap to make sure the" - " code is executable, please rewrite me") def make_function(): proto = CFUNCTYPE(c_int) a=create_string_buffer( diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py @@ -8,7 +8,7 @@ from __future__ import with_statement from ctypes import * import sys -import py +import pytest from support import BaseCTypesTestChecker try: @@ -140,7 +140,8 @@ assert type(result) == int # You cannot assing character format codes as restype any longer - raises(TypeError, setattr, f, "restype", "i") + with pytest.raises(TypeError): + setattr(f, "restype", "i") def test_unicode_function_name(self): f = dll[u'_testfunc_i_bhilfd'] @@ -237,7 +238,8 @@ result = f(arg) assert not result.contents == v.value - raises(ArgumentError, f, byref(c_short(22))) + with pytest.raises(ArgumentError): + f(byref(c_short(22))) # It is dangerous, however, because you don't control the lifetime # of the pointer: @@ -262,7 +264,8 @@ class X(Structure): _fields_ = [("y", c_int)] - raises(ArgumentError, f, X()) #cannot convert parameter + with pytest.raises(ArgumentError): + f(X()) #cannot convert parameter ################################################################ def test_shorts(self): @@ -310,7 +313,8 @@ # check that the prototype works: we call f with wrong # argument types cb = AnotherCallback(callback) - raises(ArgumentError, f, -10, cb) + with pytest.raises(ArgumentError): + f(-10, cb) def test_callbacks_2(self): @@ -351,8 +355,10 @@ assert 13577625587 == f(1000000000000, cb) def test_errors_2(self): - raises(AttributeError, getattr, dll, "_xxx_yyy") - raises(ValueError, c_int.in_dll, dll, "_xxx_yyy") + with pytest.raises(AttributeError): + getattr(dll, "_xxx_yyy") + with pytest.raises(ValueError): + c_int.in_dll(dll, "_xxx_yyy") def test_byval(self): # without prototype @@ -466,16 +472,16 @@ result = f("abcd", ord("b"), 42) assert result == "bcd" + @pytest.mark.xfail(reason="we are less strict in checking callback parameters") def test_sf1651235(self): - py.test.skip("we are less strict in checking callback parameters") # see http://www.python.org/sf/1651235 - proto = CFUNCTYPE(c_int, RECT, POINT) def callback(*args): return 0 callback = proto(callback) - raises(ArgumentError, lambda: callback((1, 2, 3, 4), POINT())) + with pytest.raises(ArgumentError): + callback((1, 2, 3, 4), POINT()) def test_argument_conversion_and_checks(self): #This test is designed to check for segfaults if the wrong type of argument is passed as parameter @@ -485,8 +491,10 @@ assert strlen("eggs", ord("g")) == "ggs" # Should raise ArgumentError, not segfault - py.test.raises(ArgumentError, strlen, 0, 0) - py.test.raises(ArgumentError, strlen, False, 0) + with pytest.raises(ArgumentError): + strlen(0, 0) + with pytest.raises(ArgumentError): + strlen(False, 0) def test_union_as_passed_value(self): class UN(Union): @@ -524,8 +532,8 @@ assert tf_b("yadda") == -42 assert seen == ["yadda"] + @pytest.mark.xfail(reason="warnings are disabled") def test_warnings(self): - py.test.skip("warnings are disabled") import warnings warnings.simplefilter("always") with warnings.catch_warnings(record=True) as w: @@ -534,8 +542,8 @@ assert issubclass(w[0].category, RuntimeWarning) assert "C function without declared arguments called" in str(w[0].message) + @pytest.mark.xfail def test_errcheck(self): - py.test.skip('fixme') def errcheck(result, func, args): assert result == -42 assert type(result) is int @@ -556,12 +564,12 @@ assert len(w) == 1 assert issubclass(w[0].category, RuntimeWarning) assert "C function without declared return type called" in str(w[0].message) - + with warnings.catch_warnings(record=True) as w: dll.get_an_integer.restype = None dll.get_an_integer() assert len(w) == 0 - + warnings.resetwarnings() diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py b/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * import sys import os, StringIO @@ -27,14 +27,17 @@ def test_load(self): CDLL(libc_name) CDLL(os.path.basename(libc_name)) - raises(OSError, CDLL, self.unknowndll) + with pytest.raises(OSError): + CDLL(self.unknowndll) if libc_name is not None and os.path.basename(libc_name) == "libc.so.6": def test_load_version(self): cdll.LoadLibrary("libc.so.6") # linux uses version, libc 9 should not exist - raises(OSError, cdll.LoadLibrary, "libc.so.9") - raises(OSError, cdll.LoadLibrary, self.unknowndll) + with pytest.raises(OSError): + cdll.LoadLibrary("libc.so.9") + with pytest.raises(OSError): + cdll.LoadLibrary(self.unknowndll) def test_find(self): for name in ("c", "m"): @@ -80,7 +83,5 @@ f_name_addr = c_void_p.from_address(a_name).value assert hex(f_ord_addr) == hex(f_name_addr) - raises(AttributeError, dll.__getitem__, 1234) - -if __name__ == "__main__": - unittest.main() + with pytest.raises(AttributeError): + dll[1234] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker import sys, struct @@ -65,8 +65,10 @@ # Only numbers are allowed in the contructor, # otherwise TypeError is raised for t in signed_types + unsigned_types + float_types: - raises(TypeError, t, "") - raises(TypeError, t, None) + with pytest.raises(TypeError): + t("") + with pytest.raises(TypeError): + t(None) ## def test_valid_ranges(self): ## # invalid values of the correct type @@ -75,16 +77,16 @@ ## self.assertRaises(ValueError, t, l-1) ## self.assertRaises(ValueError, t, h+1) + @pytest.mark.xfail(reason="testing implementation internals") def test_from_param(self): # the from_param class method attribute always # returns PyCArgObject instances - py.test.skip("testing implementation internals") for t in signed_types + unsigned_types + float_types: assert ArgType == type(t.from_param(0)) + @pytest.mark.xfail(reason="testing implementation internals") def test_byref(self): # calling byref returns also a PyCArgObject instance - py.test.skip("testing implementation internals") for t in signed_types + unsigned_types + float_types: parm = byref(t()) assert ArgType == type(parm) @@ -108,7 +110,8 @@ def test_integers(self): # integers cannot be constructed from floats for t in signed_types + unsigned_types: - raises(TypeError, t, 3.14) + with pytest.raises(TypeError): + t(3.14) def test_sizes(self): for t in signed_types + unsigned_types + float_types: @@ -185,7 +188,8 @@ # c_int() can be initialized from Python's int, and c_int. # Not from c_long or so, which seems strange, abd should # probably be changed: - raises(TypeError, c_int, c_long(42)) + with pytest.raises(TypeError): + c_int(c_long(42)) def test_subclass(self): class enum(c_int): @@ -195,14 +199,13 @@ _fields_ = [('t', enum)] assert isinstance(S().t, enum) + #@pytest.mark.xfail("'__pypy__' not in sys.builtin_module_names") + @pytest.mark.xfail def test_no_missing_shape_to_ffi_type(self): # whitebox test - import sys - if '__pypy__' not in sys.builtin_module_names: - skip("only for pypy's ctypes") - skip("re-enable after adding 'g' to _shape_to_ffi_type.typemap, " - "which I think needs fighting all the way up from " - "rpython.rlib.libffi") + "re-enable after adding 'g' to _shape_to_ffi_type.typemap, " + "which I think needs fighting all the way up from " + "rpython.rlib.libffi" from _ctypes.basics import _shape_to_ffi_type from _rawffi import Array for i in range(1, 256): @@ -213,7 +216,7 @@ else: assert chr(i) in _shape_to_ffi_type.typemap - @py.test.mark.xfail + @pytest.mark.xfail def test_pointer_to_long_double(self): import ctypes ctypes.POINTER(ctypes.c_longdouble) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_parameters.py b/pypy/module/test_lib_pypy/ctypes_tests/test_parameters.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_parameters.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_parameters.py @@ -1,4 +1,4 @@ -import py +import pytest import sys class TestSimpleTypes: @@ -60,10 +60,10 @@ o = My_void_pp() assert Void_pp.from_param(o) is o - + # XXX Replace by c_char_p tests + @pytest.mark.xfail(reason="testing implementation internals") def test_cstrings(self): - py.test.skip("testing implementation internals") from ctypes import c_char_p, byref # c_char_p.from_param on a Python String packs the string @@ -73,17 +73,19 @@ # new in 0.9.1: convert (encode) unicode to ascii assert c_char_p.from_param(u"123")._obj == "123" - raises(UnicodeEncodeError, c_char_p.from_param, u"123\377") + with pytest.raises(UnicodeEncodeError): + c_char_p.from_param(u"123\377") - raises(TypeError, c_char_p.from_param, 42) + with pytest.raises(TypeError): + c_char_p.from_param(42) # calling c_char_p.from_param with a c_char_p instance # returns the argument itself: a = c_char_p("123") assert c_char_p.from_param(a) is a + @pytest.mark.xfail(reason="testing implementation internals") def test_cw_strings(self): - py.test.skip("testing implementation internals") from ctypes import byref try: from ctypes import c_wchar_p @@ -93,11 +95,13 @@ s = u"123" if sys.platform == "win32": assert c_wchar_p.from_param(s)._obj is s - raises(TypeError, c_wchar_p.from_param, 42) + with pytest.raises(TypeError): + c_wchar_p.from_param(42) # new in 0.9.1: convert (decode) ascii to unicode assert c_wchar_p.from_param("123")._obj == u"123" - raises(UnicodeDecodeError, c_wchar_p.from_param, "123\377") + with pytest.raises(UnicodeDecodeError): + c_wchar_p.from_param("123\377") pa = c_wchar_p.from_param(c_wchar_p(u"123")) assert type(pa) == c_wchar_p @@ -115,9 +119,12 @@ assert not LPINT.from_param(None) if c_int != c_long: - raises(TypeError, LPINT.from_param, pointer(c_long(42))) - raises(TypeError, LPINT.from_param, pointer(c_uint(42))) - raises(TypeError, LPINT.from_param, pointer(c_short(42))) + with pytest.raises(TypeError): + LPINT.from_param(pointer(c_long(42))) + with pytest.raises(TypeError): + LPINT.from_param(pointer(c_uint(42))) + with pytest.raises(TypeError): + LPINT.from_param(pointer(c_short(42))) def test_byref_pointer(self): # The from_param class method of POINTER(typ) classes accepts what is @@ -127,10 +134,13 @@ LPINT.from_param(byref(c_int(42))) - raises(TypeError, LPINT.from_param, byref(c_short(22))) + with pytest.raises(TypeError): + LPINT.from_param(byref(c_short(22))) if c_int != c_long: - raises(TypeError, LPINT.from_param, byref(c_long(22))) - raises(TypeError, LPINT.from_param, byref(c_uint(22))) + with pytest.raises(TypeError): + LPINT.from_param(byref(c_long(22))) + with pytest.raises(TypeError): + LPINT.from_param(byref(c_uint(22))) def test_byref_pointerpointer(self): # See above @@ -139,10 +149,13 @@ LPLPINT = POINTER(POINTER(c_int)) LPLPINT.from_param(byref(pointer(c_int(42)))) - raises(TypeError, LPLPINT.from_param, byref(pointer(c_short(22)))) + with pytest.raises(TypeError): + LPLPINT.from_param(byref(pointer(c_short(22)))) if c_int != c_long: - raises(TypeError, LPLPINT.from_param, byref(pointer(c_long(22)))) - raises(TypeError, LPLPINT.from_param, byref(pointer(c_uint(22)))) + with pytest.raises(TypeError): + LPLPINT.from_param(byref(pointer(c_long(22)))) + with pytest.raises(TypeError): + LPLPINT.from_param(byref(pointer(c_uint(22)))) def test_array_pointers(self): from ctypes import c_short, c_uint, c_int, c_long, POINTER @@ -155,15 +168,18 @@ # the same type! LPINT = POINTER(c_int) LPINT.from_param((c_int*3)()) - raises(TypeError, LPINT.from_param, c_short*3) - raises(TypeError, LPINT.from_param, c_long*3) - raises(TypeError, LPINT.from_param, c_uint*3) + with pytest.raises(TypeError): + LPINT.from_param(c_short*3) + with pytest.raises(TypeError): + LPINT.from_param(c_long*3) + with pytest.raises(TypeError): + LPINT.from_param(c_uint*3) ## def test_performance(self): ## check_perf() + @pytest.mark.xfail(reason="testing implementation internals") def test_noctypes_argtype(self): - py.test.skip("we implement details differently") from ctypes import CDLL, c_void_p, ArgumentError import conftest dll = CDLL(str(conftest.sofile)) @@ -171,7 +187,8 @@ func = dll._testfunc_p_p func.restype = c_void_p # TypeError: has no from_param method - raises(TypeError, setattr, func, "argtypes", (object,)) + with pytest.raises(TypeError): + setattr(func, "argtypes", (object,)) class Adapter(object): def from_param(cls, obj): @@ -187,7 +204,8 @@ func.argtypes = (Adapter(),) # don't know how to convert parameter 1 - raises(ArgumentError, func, object()) + with pytest.raises(ArgumentError): + func(object()) assert func(c_void_p(42)) == 42 class Adapter(object): @@ -196,7 +214,8 @@ func.argtypes = (Adapter(),) # ArgumentError: argument 1: ValueError: 99 - raises(ArgumentError, func, 99) + with pytest.raises(ArgumentError): + func(99) def test_multiple_signature(self): # when .argtypes is not set, calling a function with a certain @@ -212,4 +231,4 @@ # This one is normal assert func(None) == 0 - + diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker @@ -31,21 +31,27 @@ def test_restype_setattr(self): func = testdll._testfunc_p_p - raises(TypeError, setattr, func, 'restype', 20) + with pytest.raises(TypeError): + setattr(func, 'restype', 20) def test_argtypes_setattr(self): func = testdll._testfunc_p_p - raises(TypeError, setattr, func, 'argtypes', 20) - raises(TypeError, setattr, func, 'argtypes', [20]) + with pytest.raises(TypeError): + setattr(func, 'argtypes', 20) + with pytest.raises(TypeError): + setattr(func, 'argtypes', [20]) func = CFUNCTYPE(c_long, c_void_p, c_long)(lambda: None) assert func.argtypes == (c_void_p, c_long) def test_paramflags_setattr(self): func = CFUNCTYPE(c_long, c_void_p, c_long)(lambda: None) - raises(TypeError, setattr, func, 'paramflags', 'spam') - raises(ValueError, setattr, func, 'paramflags', (1, 2, 3, 4)) - raises(TypeError, setattr, func, 'paramflags', ((1,), ('a',))) + with pytest.raises(TypeError): + setattr(func, 'paramflags', 'spam') + with pytest.raises(ValueError): + setattr(func, 'paramflags', (1, 2, 3, 4)) + with pytest.raises(TypeError): + setattr(func, 'paramflags', ((1,), ('a',))) func.paramflags = (1,), (1|4,) def test_kwargs(self): @@ -107,13 +113,16 @@ positive_address(func(byref(ci)))) func.argtypes = c_char_p, - raises(ArgumentError, func, byref(ci)) + with pytest.raises(ArgumentError): + func(byref(ci)) func.argtypes = POINTER(c_short), - raises(ArgumentError, func, byref(ci)) + with pytest.raises(ArgumentError): + func(byref(ci)) func.argtypes = POINTER(c_double), - raises(ArgumentError, func, byref(ci)) + with pytest.raises(ArgumentError): + func(byref(ci)) def test_POINTER_c_char_arg(self): func = testdll._testfunc_p_p @@ -252,7 +261,8 @@ func.restype = POINTER(c_int) func.argtypes = [c_int * 8] array = ARRAY(1, 2, 3, 4, 5, 6, 7, 8) - py.test.raises(ArgumentError, "func(array)") + with pytest.raises(ArgumentError): + func(array) ################################################################ diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_returnfuncptrs.py b/pypy/module/test_lib_pypy/ctypes_tests/test_returnfuncptrs.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_returnfuncptrs.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_returnfuncptrs.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * @@ -16,8 +16,10 @@ strchr = get_strchr() assert strchr("abcdef", "b") == "bcdef" assert strchr("abcdef", "x") == None - raises(ArgumentError, strchr, "abcdef", 3) - raises(TypeError, strchr, "abcdef") + with pytest.raises(ArgumentError): + strchr("abcdef", 3) + with pytest.raises(TypeError): + strchr("abcdef") def test_without_prototype(self): get_strchr = dll.get_strchr @@ -29,5 +31,7 @@ strchr = CFUNCTYPE(c_char_p, c_char_p, c_char)(addr) assert strchr("abcdef", "b"), "bcdef" assert strchr("abcdef", "x") == None - raises(ArgumentError, strchr, "abcdef", 3) - raises(TypeError, strchr, "abcdef") + with pytest.raises(ArgumentError): + strchr("abcdef", 3) + with pytest.raises(TypeError): + strchr("abcdef") diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py b/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py @@ -1,4 +1,4 @@ -import py +import pytest from support import BaseCTypesTestChecker from ctypes import * @@ -30,13 +30,17 @@ from operator import setslice # TypeError: int expected instead of str instance - raises(TypeError, setslice, a, 0, 5, "abcde") + with pytest.raises(TypeError): + setslice(a, 0, 5, "abcde") # TypeError: int expected instead of str instance - raises(TypeError, setslice, a, 0, 5, ["a", "b", "c", "d", "e"]) + with pytest.raises(TypeError): + setslice(a, 0, 5, ["a", "b", "c", "d", "e"]) # TypeError: int expected instead of float instance - raises(TypeError, setslice, a, 0, 5, [1, 2, 3, 4, 3.14]) + with pytest.raises(TypeError): + setslice(a, 0, 5, [1, 2, 3, 4, 3.14]) # ValueError: Can only assign sequence of same size - raises(ValueError, setslice, a, 0, 5, range(32)) + with pytest.raises(ValueError): + setslice(a, 0, 5, range(32)) def test_char_ptr(self): s = "abcdefghijklmnopqrstuvwxyz" @@ -47,8 +51,8 @@ assert res[:len(s)] == s import operator - raises(TypeError, operator.setslice, - res, 0, 5, u"abcde") + with pytest.raises(TypeError): + operator.setslice(res, 0, 5, u"abcde") dll.my_free(res) dll.my_strdup.restype = POINTER(c_byte) @@ -99,8 +103,8 @@ assert res[:len(s)] == s import operator - raises(TypeError, operator.setslice, - res, 0, 5, u"abcde") + with pytest.raises(TypeError): + operator.setslice(res, 0, 5, u"abcde") dll.my_free(res) if sizeof(c_wchar) == sizeof(c_short): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py b/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py @@ -1,4 +1,4 @@ -import py +import pytest from support import BaseCTypesTestChecker from ctypes import * @@ -16,7 +16,8 @@ x = X() # NULL pointer access - raises(ValueError, getattr, x.str, "contents") + with pytest.raises(ValueError): + x.str.contents b = c_buffer("Hello, World") #from sys import getrefcount as grc #assert grc(b) == 2 @@ -31,7 +32,6 @@ # XXX pypy modified: #raises(TypeError, setattr, x, "str", "Hello, World") x = b = None - py.test.skip("test passes! but modified to avoid getrefcount and detail issues") def test__c_char_p(self): class X(Structure): @@ -47,7 +47,6 @@ #b = c_buffer("Hello, World") #raises(TypeError, setattr, x, "str", b) x = None - py.test.skip("test passes! but modified to avoid detail issues") def test_functions(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py b/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py @@ -1,4 +1,4 @@ -import py +import pytest from support import BaseCTypesTestChecker from ctypes import * @@ -18,31 +18,37 @@ pass assert sizeof(X) == 0 # not finalized X._fields_ = [] # finalized - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] def test_1_B(self): class X(Structure): _fields_ = [] # finalized - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] def test_2(self): class X(Structure): pass X() - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] def test_3(self): class X(Structure): pass class Y(Structure): _fields_ = [("x", X)] # finalizes X - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] def test_4(self): class X(Structure): pass class Y(X): pass - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] Y._fields_ = [] - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -2,7 +2,7 @@ from struct import calcsize from support import BaseCTypesTestChecker -import py +import pytest class TestSubclasses(BaseCTypesTestChecker): @@ -143,8 +143,10 @@ assert X.y.size == sizeof(c_char) # readonly - raises((TypeError, AttributeError), setattr, X.x, "offset", 92) - raises((TypeError, AttributeError), setattr, X.x, "size", 92) + with pytest.raises((TypeError, AttributeError)): + X.x.offset = 92 + with pytest.raises((TypeError, AttributeError)): + X.x.size = 92 class X(Union): _fields_ = [("x", c_int), @@ -157,8 +159,10 @@ assert X.y.size == sizeof(c_char) # readonly - raises((TypeError, AttributeError), setattr, X.x, "offset", 92) - raises((TypeError, AttributeError), setattr, X.x, "size", 92) + with pytest.raises((TypeError, AttributeError)): + X.x.offset = 92 + with pytest.raises((TypeError, AttributeError)): + X.x.size = 92 # XXX Should we check nested data types also? # offset is always relative to the class... @@ -202,23 +206,28 @@ d = {"_fields_": [("a", "b"), ("b", "q")], "_pack_": -1} - raises(ValueError, type(Structure), "X", (Structure,), d) + with pytest.raises(ValueError): + type(Structure)("X", (Structure,), d) def test_initializers(self): class Person(Structure): _fields_ = [("name", c_char*6), ("age", c_int)] - raises(TypeError, Person, 42) - raises(ValueError, Person, "asldkjaslkdjaslkdj") - raises(TypeError, Person, "Name", "HI") + with pytest.raises(TypeError): + Person(42) + with pytest.raises(ValueError): + Person("asldkjaslkdjaslkdj") + with pytest.raises(TypeError): + Person("Name", "HI") # short enough assert Person("12345", 5).name == "12345" # exact fit assert Person("123456", 5).name == "123456" # too long - raises(ValueError, Person, "1234567", 5) + with pytest.raises(ValueError): + Person("1234567", 5) def test_keyword_initializers(self): @@ -246,7 +255,8 @@ def test_invalid_field_types(self): class POINT(Structure): pass - raises(TypeError, setattr, POINT, "_fields_", [("x", 1), ("y", 2)]) + with pytest.raises(TypeError): + POINT._fields_ = [("x", 1), ("y", 2)] def test_intarray_fields(self): class SomeInts(Structure): @@ -257,7 +267,8 @@ assert SomeInts((1, 2, 3, 4)).a[:] == [1, 2, 3, 4] # too long # XXX Should raise ValueError?, not RuntimeError - raises(RuntimeError, SomeInts, (1, 2, 3, 4, 5)) + with pytest.raises(RuntimeError): + SomeInts((1, 2, 3, 4, 5)) def test_nested_initializers(self): # test initializing nested structures @@ -278,7 +289,7 @@ assert p.age == 5 def test_structures_with_wchar(self): - py.test.skip("need unicode support on _rawffi level") + pytest.skip("need unicode support on _rawffi level") try: c_wchar except NameError: @@ -296,10 +307,11 @@ # exact fit assert PersonW(u"123456789012").name == u"123456789012" #too long - raises(ValueError, PersonW, u"1234567890123") + with pytest.raises(ValueError): + PersonW(u"1234567890123") def test_init_errors(self): - py.test.skip("not implemented error details") + pytest.skip("not implemented error details") class Phone(Structure): _fields_ = [("areacode", c_char*6), ("number", c_char*12)] @@ -347,7 +359,7 @@ ## (AttributeError, "class must define a '_fields_' attribute")) def test_abstract_class(self): - py.test.skip("_abstract_ semantics not implemented") + pytest.skip("_abstract_ semantics not implemented") class X(Structure): _abstract_ = "something" # try 'X()' @@ -373,7 +385,7 @@ assert p.age == 6 def test_subclassing_field_is_a_tuple(self): - py.test.skip("subclassing semantics not implemented") + pytest.skip("subclassing semantics not implemented") class Person(Structure): _fields_ = (("name", c_char*6), ("age", c_int)) @@ -547,7 +559,7 @@ raise AssertionError("Structure or union cannot contain itself") def test_vice_versa(self): - py.test.skip("mutually dependent lazily defined structures error semantics") + pytest.skip("mutually dependent lazily defined structures error semantics") class First(Structure): pass class Second(Structure): @@ -568,18 +580,21 @@ pass assert sizeof(X) == 0 X._fields_ = [("a", c_int),] - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] class X(Structure): pass X() - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] class X(Structure): pass class Y(X): pass - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] Y.__fields__ = [] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py @@ -1,6 +1,6 @@ # coding: latin-1 import ctypes -import py +import pytest from support import BaseCTypesTestChecker try: @@ -33,7 +33,7 @@ assert wcslen(u"ab\u2070") == 3 # string args are converted assert wcslen("abc") == 3 - py.test.raises(ctypes.ArgumentError, wcslen, "ab�") + pytest.raises(ctypes.ArgumentError, wcslen, "ab�") def test_ascii_replace(self): ctypes.set_conversion_mode("ascii", "replace") @@ -86,7 +86,8 @@ ctypes.set_conversion_mode("ascii", "strict") assert func("abc") == "abc" assert func(u"abc") == "abc" - raises(ctypes.ArgumentError, func, u"ab�") + with pytest.raises(ctypes.ArgumentError): + func(u"ab�") def test_ascii_ignore(self): ctypes.set_conversion_mode("ascii", "ignore") diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_values.py b/pypy/module/test_lib_pypy/ctypes_tests/test_values.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_values.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_values.py @@ -2,7 +2,7 @@ A testcase which accesses *values* in a dll. """ -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker @@ -27,12 +27,13 @@ assert ctdll.get_a_string_char(15) == ord('$') def test_undefined(self): - raises(ValueError, c_int.in_dll, ctdll, "Undefined_Symbol") + with pytest.raises(ValueError): + c_int.in_dll(ctdll, "Undefined_Symbol") class TestWin_Values(BaseCTypesTestChecker): """This test only works when python itself is a dll/shared library""" def setup_class(cls): - py.test.skip("tests expect and access cpython dll") + pytest.skip("tests expect and access cpython dll") def test_optimizeflag(self): # This test accesses the Py_OptimizeFlag intger, which is @@ -86,7 +87,8 @@ del _pointer_type_cache[struct_frozen] def test_undefined(self): - raises(ValueError, c_int.in_dll, pydll, "Undefined_Symbol") + with pytest.raises(ValueError): + c_int.in_dll(pydll, "Undefined_Symbol") if __name__ == '__main__': unittest.main() diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py b/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py @@ -1,10 +1,10 @@ -import py +import pytest from support import BaseCTypesTestChecker from ctypes import * class TestVarSize(BaseCTypesTestChecker): def test_resize(self): - py.test.skip("resizing not implemented") + pytest.skip("resizing not implemented") class X(Structure): _fields_ = [("item", c_int), ("array", c_int * 1)] @@ -35,15 +35,23 @@ def test_array_invalid_length(self): # cannot create arrays with non-positive size - raises(ValueError, lambda: c_int * -1) - raises(ValueError, lambda: c_int * -3) + with pytest.raises(ValueError): + c_int * -1 + with pytest.raises(ValueError): + c_int * -3 def test_zerosized_array(self): array = (c_int * 0)() # accessing elements of zero-sized arrays raise IndexError - raises(IndexError, array.__setitem__, 0, None) - raises(IndexError, array.__getitem__, 0) - raises(IndexError, array.__setitem__, 1, None) - raises(IndexError, array.__getitem__, 1) - raises(IndexError, array.__setitem__, -1, None) - raises(IndexError, array.__getitem__, -1) + with pytest.raises(IndexError): + array.__setitem__(0, None) + with pytest.raises(IndexError): + array.__getitem__(0) + with pytest.raises(IndexError): + array.__setitem__(1, None) + with pytest.raises(IndexError): + array.__getitem__(1) + with pytest.raises(IndexError): + array.__setitem__(-1, None) + with pytest.raises(IndexError): + array.__getitem__(-1) From pypy.commits at gmail.com Fri Dec 22 10:14:33 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 22 Dec 2017 07:14:33 -0800 (PST) Subject: [pypy-commit] pypy default: Compatibility with pytest 3.* Message-ID: <5a3d2159.14121c0a.a2b60.92ca@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93551:3f6df999bd4c Date: 2017-12-22 16:09 +0100 http://bitbucket.org/pypy/pypy/changeset/3f6df999bd4c/ Log: Compatibility with pytest 3.* diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py b/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py @@ -2,10 +2,10 @@ # Can't resist from implementing some kind of mini-comtypes # theller ;-) -import py +import pytest import sys if sys.platform != "win32": - py.test.skip('windows only test') + pytest.importorskip('skip_the_whole_module') # hack! import ctypes, new, unittest from ctypes.wintypes import HRESULT @@ -27,7 +27,7 @@ if instance is None: return self return new.instancemethod(self.func, instance, owner) - + def commethod(index, restype, *argtypes): """A decorator that generates COM methods. The decorated function itself is not used except for it's name.""" @@ -72,7 +72,8 @@ assert 4 == punk.AddRef() punk.SetName("TypeLib_ByPYPY") - py.test.raises(COMError, lambda: punk.SetName(None)) + with pytest.raises(COMError): + punk.SetName(None) # This would save the typelib to disk. ## punk.SaveAllChanges() diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_init.py b/pypy/module/test_lib_pypy/ctypes_tests/test_init.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_init.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_init.py @@ -1,7 +1,6 @@ -import py +import pytest from ctypes import * -py.test.skip("subclassing semantics and implementation details not implemented") class X(Structure): _fields_ = [("a", c_int), @@ -21,19 +20,20 @@ _fields_ = [("x", X)] -class TestInit: - def test_get(self): - # make sure the only accessing a nested structure - # doesn't call the structure's __new__ and __init__ - y = Y() - assert (y.x.a, y.x.b) == (0, 0) - assert y.x.new_was_called == False + at pytest.mark.xfail( + reason="subclassing semantics and implementation details not implemented") +def test_get(): + # make sure the only accessing a nested structure + # doesn't call the structure's __new__ and __init__ + y = Y() + assert (y.x.a, y.x.b) == (0, 0) + assert y.x.new_was_called == False - # But explicitely creating an X structure calls __new__ and __init__, of course. - x = X() - assert (x.a, x.b) == (9, 12) - assert x.new_was_called == True + # But explicitely creating an X structure calls __new__ and __init__, of course. + x = X() + assert (x.a, x.b) == (9, 12) + assert x.new_was_called == True - y.x = x - assert (y.x.a, y.x.b) == (9, 12) - assert y.x.new_was_called == False + y.x = x + assert (y.x.a, y.x.b) == (9, 12) + assert y.x.new_was_called == False diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py b/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py @@ -4,11 +4,11 @@ from ctypes.test import is_resource_enabled from support import BaseCTypesTestChecker -import py +import pytest import sys if sys.platform != "win32": - py.test.skip("win32-only tests") + pytest.importorskip('skip_the_whole_module') # hack! class TestWindows(BaseCTypesTestChecker): def test_callconv_1(self): @@ -16,13 +16,15 @@ IsWindow = windll.user32.IsWindow # ValueError: Procedure probably called with not enough arguments (4 bytes missing) - py.test.raises(ValueError, IsWindow) + with pytest.raises(ValueError): + IsWindow() # This one should succeeed... assert IsWindow(0) == 0 # ValueError: Procedure probably called with too many arguments (8 bytes in excess) - py.test.raises(ValueError, IsWindow, 0, 0, 0) + with pytest.raises(ValueError): + IsWindow(0, 0, 0) def test_callconv_2(self): # Calling stdcall function as cdecl @@ -31,13 +33,15 @@ # ValueError: Procedure called with not enough arguments (4 bytes missing) # or wrong calling convention - py.test.raises(ValueError, IsWindow, None) + with pytest.raises(ValueError): + IsWindow(None) if is_resource_enabled("SEH"): def test_SEH(self): # Call functions with invalid arguments, and make sure that access violations # are trapped and raise an exception. - py.test.raises(WindowsError, windll.kernel32.GetModuleHandleA, 32) + with pytest.raises(WindowsError): + windll.kernel32.GetModuleHandleA(32) class TestWintypes(BaseCTypesTestChecker): From pypy.commits at gmail.com Fri Dec 22 10:14:35 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 22 Dec 2017 07:14:35 -0800 (PST) Subject: [pypy-commit] pypy default: Avoid dependency on rpython Message-ID: <5a3d215b.b0abdf0a.8b3f.fae8@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93552:2af71eb93682 Date: 2017-12-22 16:13 +0100 http://bitbucket.org/pypy/pypy/changeset/2af71eb93682/ Log: Avoid dependency on rpython diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py @@ -605,7 +605,7 @@ get_data.errcheck = ret_list_p(1) assert get_data('testing!') == [-1, -2, -3, -4] - def test_issue2533(self): + def test_issue2533(self, tmpdir): import cffi ffi = cffi.FFI() ffi.cdef("int **fetchme(void);") @@ -617,11 +617,10 @@ return &pa; } """) - from rpython.tool.udir import udir - ffi.compile(verbose=True, tmpdir=str(udir)) + ffi.compile(verbose=True, tmpdir=str(tmpdir)) import sys - sys.path.insert(0, str(udir)) + sys.path.insert(0, str(tmpdir)) try: from _x_cffi import ffi, lib finally: From pypy.commits at gmail.com Fri Dec 22 10:27:33 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 22 Dec 2017 07:27:33 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a3d2465.ceb51c0a.3cc20.1026@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93553:c653db1b380d Date: 2017-12-22 16:26 +0100 http://bitbucket.org/pypy/pypy/changeset/c653db1b380d/ Log: hg merge default diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -44,3 +44,5 @@ d72f9800a42b46a8056951b1da2426d2c2d8d502 release-pypy3.5-v5.9.0 03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0 +0e7ea4fe15e82d5124e805e2e4a37cae1a402d4b release-pypy2.7-v5.10.0 +a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-v5.10.0 diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -7,6 +7,7 @@ .. toctree:: whatsnew-head.rst + whatsnew-pypy2-5.10.0.rst whatsnew-pypy2-5.9.0.rst whatsnew-pypy2-5.8.0.rst whatsnew-pypy2-5.7.0.rst diff --git a/pypy/doc/release-v5.10.0.rst b/pypy/doc/release-v5.10.0.rst --- a/pypy/doc/release-v5.10.0.rst +++ b/pypy/doc/release-v5.10.0.rst @@ -19,6 +19,12 @@ several issues and bugs raised by the growing community of PyPy users. As always, we strongly recommend updating. +There are quite a few important changes that are in the pipeline that did not +make it into the 5.10 release. Most important are speed improvements to cpyext +(which will make numpy and pandas a bit faster) and utf8 branch that changes +internal representation of unicode to utf8, which should help especially the +Python 3.5 version of PyPy. + This release concludes the Mozilla Open Source `grant`_ for having a compatible PyPy 3.5 release and we're very grateful for that. Of course, we will continue to improve PyPy 3.5 and probably move to 3.6 during the course of 2018. @@ -53,7 +59,7 @@ We also welcome developers of other `dynamic languages`_ to see what RPython can do for them. -The PyPy 2.7 release supports: +The PyPy release supports: * **x86** machines on most common operating systems (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -2,41 +2,6 @@ What's new in PyPy2.7 5.10+ =========================== -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 +.. this is a revision shortly after release-pypy2.7-v5.10.0 +.. startrev: 6b024edd9d12 - -.. branch: cppyy-packaging - -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols - -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches - -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests - -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 - -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch -.. branch: fix-vmprof-stacklet-switch-2 -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) - -.. branch: win32-vcvars - -.. branch: rdict-fast-hash - -Make it possible to declare that the hash function of an r_dict is fast in RPython. - diff --git a/pypy/doc/whatsnew-pypy2-5.10.0.rst b/pypy/doc/whatsnew-pypy2-5.10.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-pypy2-5.10.0.rst @@ -0,0 +1,42 @@ +========================== +What's new in PyPy2.7 5.10 +========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +.. branch: fix-vmprof-stacklet-switch-2 +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch: rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. + diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c --- a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c +++ b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c @@ -3,9 +3,7 @@ #define MS_WIN32 #endif -#include "src/precommondefs.h" - -#define EXPORT(x) RPY_EXPORTED x +#define EXPORT(x) extern x #include #include @@ -272,7 +270,7 @@ { double x, sum=0.0, dx=(b-a)/(double)nstep; for(x=a+0.5*dx; (b-x)*(x-a)>0.0; x+=dx) - { + { double y = f(x); printf("f(x)=%.1f\n", y); sum += f(x); @@ -287,7 +285,7 @@ static void _xxx_init(void *(*Xalloc)(int), void (*Xfree)(void *)) { void *ptr; - + printf("_xxx_init got %p %p\n", Xalloc, Xfree); printf("calling\n"); ptr = Xalloc(32); @@ -438,7 +436,7 @@ #endif /********/ - + #ifndef MS_WIN32 typedef struct { diff --git a/pypy/module/test_lib_pypy/ctypes_tests/conftest.py b/pypy/module/test_lib_pypy/ctypes_tests/conftest.py --- a/pypy/module/test_lib_pypy/ctypes_tests/conftest.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/conftest.py @@ -1,14 +1,91 @@ -import py, pytest +import py +import pytest import sys +import os def pytest_ignore_collect(path): if '__pypy__' not in sys.builtin_module_names: return True +# XXX: copied from pypy/tool/cpyext/extbuild.py +if os.name != 'nt': + so_ext = 'so' +else: + so_ext = 'dll' + +def _build(cfilenames, outputfilename, compile_extra, link_extra, + include_dirs, libraries, library_dirs): + try: + # monkeypatch distutils for some versions of msvc compiler + import setuptools + except ImportError: + # XXX if this fails and is required, + # we must call pypy -mensurepip after translation + pass + from distutils.ccompiler import new_compiler + from distutils import sysconfig + + # XXX for Darwin running old versions of CPython 2.7.x + sysconfig.get_config_vars() + + compiler = new_compiler(force=1) + sysconfig.customize_compiler(compiler) # XXX + objects = [] + for cfile in cfilenames: + cfile = py.path.local(cfile) + old = cfile.dirpath().chdir() + try: + res = compiler.compile([cfile.basename], + include_dirs=include_dirs, extra_preargs=compile_extra) + assert len(res) == 1 + cobjfile = py.path.local(res[0]) + assert cobjfile.check() + objects.append(str(cobjfile)) + finally: + old.chdir() + + compiler.link_shared_object( + objects, str(outputfilename), + libraries=libraries, + extra_preargs=link_extra, + library_dirs=library_dirs) + +def c_compile(cfilenames, outputfilename, + compile_extra=None, link_extra=None, + include_dirs=None, libraries=None, library_dirs=None): + compile_extra = compile_extra or [] + link_extra = link_extra or [] + include_dirs = include_dirs or [] + libraries = libraries or [] + library_dirs = library_dirs or [] + if sys.platform == 'win32': + link_extra = link_extra + ['/DEBUG'] # generate .pdb file + if sys.platform == 'darwin': + # support Fink & Darwinports + for s in ('/sw/', '/opt/local/'): + if (s + 'include' not in include_dirs + and os.path.exists(s + 'include')): + include_dirs.append(s + 'include') + if s + 'lib' not in library_dirs and os.path.exists(s + 'lib'): + library_dirs.append(s + 'lib') + + outputfilename = py.path.local(outputfilename).new(ext=so_ext) + saved_environ = os.environ.copy() + try: + _build( + cfilenames, outputfilename, + compile_extra, link_extra, + include_dirs, libraries, library_dirs) + finally: + # workaround for a distutils bugs where some env vars can + # become longer and longer every time it is used + for key, value in saved_environ.items(): + if os.environ.get(key) != value: + os.environ[key] = value + return outputfilename +# end copy + def compile_so_file(): - from rpython.translator.platform import platform - from rpython.translator.tool.cbuild import ExternalCompilationInfo - from rpython.translator import cdir udir = pytest.ensuretemp('_ctypes_test') cfile = py.path.local(__file__).dirpath().join("_ctypes_test.c") @@ -16,11 +93,8 @@ libraries = ['oleaut32'] else: libraries = [] - eci = ExternalCompilationInfo(libraries=libraries, - include_dirs=[cdir]) - return platform.compile([cfile], eci, str(udir.join('_ctypes_test')), - standalone=False) + return c_compile([cfile], str(udir / '_ctypes_test'), libraries=libraries) # we need to run after the "tmpdir" plugin which installs pytest.ensuretemp @pytest.mark.trylast diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py b/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py @@ -1,3 +1,4 @@ +import pytest from ctypes import * from support import BaseCTypesTestChecker @@ -22,18 +23,15 @@ def test_anon_nonseq(self): # TypeError: _anonymous_ must be a sequence - raises(TypeError, - lambda: type(Structure)("Name", - (Structure,), - {"_fields_": [], "_anonymous_": 42})) + with pytest.raises(TypeError): + type(Structure)( + "Name", (Structure,), {"_fields_": [], "_anonymous_": 42}) def test_anon_nonmember(self): # AttributeError: type object 'Name' has no attribute 'x' - raises(AttributeError, - lambda: type(Structure)("Name", - (Structure,), - {"_fields_": [], - "_anonymous_": ["x"]})) + with pytest.raises(AttributeError): + type(Structure)( + "Name", (Structure,), {"_fields_": [], "_anonymous_": ["x"]}) def test_nested(self): class ANON_S(Structure): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py b/pypy/module/test_lib_pypy/ctypes_tests/test_array.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_array.py @@ -1,5 +1,4 @@ - -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker @@ -40,7 +39,8 @@ assert values == [0] * len(init) # Too many in itializers should be caught - py.test.raises(IndexError, int_array, *range(alen*2)) + with pytest.raises(IndexError): + int_array(*range(alen*2)) CharArray = ARRAY(c_char, 3) @@ -48,7 +48,8 @@ # Should this work? It doesn't: # CharArray("abc") - py.test.raises(TypeError, CharArray, "abc") + with pytest.raises(TypeError): + CharArray("abc") assert ca[0] == "a" assert ca[1] == "b" @@ -61,10 +62,12 @@ # slicing is now supported, but not extended slicing (3-argument)! from operator import getslice, delitem - py.test.raises(TypeError, getslice, ca, 0, 1, -1) + with pytest.raises(TypeError): + getslice(ca, 0, 1, -1) # cannot delete items - py.test.raises(TypeError, delitem, ca, 0) + with pytest.raises(TypeError): + delitem(ca, 0) def test_numeric_arrays(self): @@ -165,7 +168,8 @@ assert isinstance(Car("abcdefghi", 42.0, "12345").brand, bytes) assert Car("abcdefghi", 42.0, "12345").brand == "abcdefghi" assert Car("abcdefghio", 42.0, "12345").brand == "abcdefghio" - raises(ValueError, Car, "abcdefghiop", 42.0, "12345") + with pytest.raises(ValueError): + Car("abcdefghiop", 42.0, "12345") A = Car._fields_[2][1] TP = POINTER(A) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py b/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py @@ -1,5 +1,5 @@ from ctypes import * -import py +import pytest from support import BaseCTypesTestChecker def setup_module(mod): @@ -104,7 +104,8 @@ # check that the prototype works: we call f with wrong # argument types cb = AnotherCallback(callback) - raises(ArgumentError, f, self.wrap(-10), self.wrap(cb)) + with pytest.raises(ArgumentError): + f(self.wrap(-10), self.wrap(cb)) def test_callbacks_2(self): # Can also use simple datatypes as argument type specifiers @@ -213,4 +214,4 @@ wrap = AsParamPropertyWrapper #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - + diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py b/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker import os @@ -245,7 +245,5 @@ def test_set_fields_cycle_fails(self): class A(Structure): pass - import pytest - pytest.raises(AttributeError, """ + with pytest.raises(AttributeError): A._fields_ = [("a", A)] - """) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py @@ -1,5 +1,5 @@ from ctypes import * -import py +import pytest from support import BaseCTypesTestChecker class TestCallbacks(BaseCTypesTestChecker): @@ -22,7 +22,7 @@ c_uint: (int, long), c_ulong: (int, long), } - + PROTO = self.functype.im_func(typ, typ) cfunc = PROTO(self.callback) result = cfunc(arg) @@ -101,15 +101,18 @@ ## self.check_type(c_char_p, "abc") ## self.check_type(c_char_p, "def") + + @pytest.mark.xfail( + reason="we are less strict about callback return type sanity") def test_unsupported_restype_1(self): - py.test.skip("we are less strict about callback return type sanity") # Only "fundamental" result types are supported for callback # functions, the type must have a non-NULL stgdict->setfunc. # POINTER(c_double), for example, is not supported. prototype = self.functype.im_func(POINTER(c_double)) # The type is checked when the prototype is called - raises(TypeError, prototype, lambda: None) + with pytest.raises(TypeError): + prototype(lambda: None) try: WINFUNCTYPE @@ -193,9 +196,10 @@ class RECT(Structure): _fields_ = [("left", c_int), ("top", c_int), ("right", c_int), ("bottom", c_int)] - + proto = CFUNCTYPE(RECT, c_int) - raises(TypeError, proto, lambda r: 0) + with pytest.raises(TypeError): + proto(lambda r: 0) def test_qsort(self): @@ -210,7 +214,7 @@ a[i] = 5-i assert a[0] == 5 # sanity - + def comp(a, b): a = a.contents.value b = b.contents.value @@ -273,4 +277,5 @@ FUNC = CFUNCTYPE(None, c_void_p) cfunc = FUNC(callback) param = c_uint(42) - py.test.raises(ArgumentError, "cfunc(param)") + with pytest.raises(ArgumentError): + cfunc(param) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_checkretval.py b/pypy/module/test_lib_pypy/ctypes_tests/test_checkretval.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_checkretval.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_checkretval.py @@ -1,4 +1,4 @@ -import py +import pytest import sys from ctypes import * @@ -33,6 +33,5 @@ pass else: def test_oledll(self): - raises(WindowsError, - oledll.oleaut32.CreateTypeLib2, - 0, 0, 0) + with pytest.raises(WindowsError): + oledll.oleaut32.CreateTypeLib2(0, 0, 0) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py b/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py @@ -2,10 +2,10 @@ # Can't resist from implementing some kind of mini-comtypes # theller ;-) -import py +import pytest import sys if sys.platform != "win32": - py.test.skip('windows only test') + pytest.importorskip('skip_the_whole_module') # hack! import ctypes, new, unittest from ctypes.wintypes import HRESULT @@ -27,7 +27,7 @@ if instance is None: return self return new.instancemethod(self.func, instance, owner) - + def commethod(index, restype, *argtypes): """A decorator that generates COM methods. The decorated function itself is not used except for it's name.""" @@ -72,7 +72,8 @@ assert 4 == punk.AddRef() punk.SetName("TypeLib_ByPYPY") - py.test.raises(COMError, lambda: punk.SetName(None)) + with pytest.raises(COMError): + punk.SetName(None) # This would save the typelib to disk. ## punk.SaveAllChanges() diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_funcptr.py b/pypy/module/test_lib_pypy/ctypes_tests/test_funcptr.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_funcptr.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_funcptr.py @@ -1,4 +1,4 @@ -import py +import pytest import sys, os, unittest from ctypes import * @@ -27,6 +27,8 @@ assert sizeof(x) == sizeof(c_voidp) assert sizeof(X) == sizeof(c_voidp) + @pytest.mark.xfail( + reason="cdecl funcptrs ignoring extra args is not implemented") def test_first(self): StdCallback = WINFUNCTYPE(c_int, c_int, c_int) CdeclCallback = CFUNCTYPE(c_int, c_int, c_int) @@ -42,14 +44,13 @@ # The following no longer raises a TypeError - it is now # possible, as in C, to call cdecl functions with more parameters. #self.assertRaises(TypeError, c, 1, 2, 3) - py.test.skip("cdecl funcptrs ignoring extra args is not implemented") assert c(1, 2, 3, 4, 5, 6) == 3 if not WINFUNCTYPE is CFUNCTYPE and os.name != "ce": - raises(TypeError, s, 1, 2, 3) + with pytest.raises(TypeError): + s(1, 2, 3) + @pytest.mark.skipif("sys.platform != 'win32'") def test_structures(self): - if sys.platform != 'win32': - py.test.skip("win32 related") WNDPROC = WINFUNCTYPE(c_long, c_int, c_int, c_int, c_int) def wndproc(hwnd, msg, wParam, lParam): @@ -130,9 +131,10 @@ assert strtok(None, "\n") == "c" assert strtok(None, "\n") == None + @pytest.mark.xfail( + reason="This test needs mmap to make sure the code is executable, " + "please rewrite me") def test_from_address(self): - py.test.skip("This test needs mmap to make sure the" - " code is executable, please rewrite me") def make_function(): proto = CFUNCTYPE(c_int) a=create_string_buffer( diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py @@ -8,7 +8,7 @@ from __future__ import with_statement from ctypes import * import sys -import py +import pytest from support import BaseCTypesTestChecker try: @@ -90,7 +90,7 @@ f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double] f.restype = c_wchar result = f(0, 0, 0, 0, 0, 0) - assert result == '\x00' + assert result == u'\x00' def test_char_result(self): f = dll._testfunc_i_bhilfd @@ -140,7 +140,8 @@ assert type(result) == int # You cannot assing character format codes as restype any longer - raises(TypeError, setattr, f, "restype", "i") + with pytest.raises(TypeError): + setattr(f, "restype", "i") def test_unicode_function_name(self): f = dll[u'_testfunc_i_bhilfd'] @@ -237,7 +238,8 @@ result = f(arg) assert not result.contents == v.value - raises(ArgumentError, f, byref(c_short(22))) + with pytest.raises(ArgumentError): + f(byref(c_short(22))) # It is dangerous, however, because you don't control the lifetime # of the pointer: @@ -262,7 +264,8 @@ class X(Structure): _fields_ = [("y", c_int)] - raises(ArgumentError, f, X()) #cannot convert parameter + with pytest.raises(ArgumentError): + f(X()) #cannot convert parameter ################################################################ def test_shorts(self): @@ -310,7 +313,8 @@ # check that the prototype works: we call f with wrong # argument types cb = AnotherCallback(callback) - raises(ArgumentError, f, -10, cb) + with pytest.raises(ArgumentError): + f(-10, cb) def test_callbacks_2(self): @@ -351,8 +355,10 @@ assert 13577625587 == f(1000000000000, cb) def test_errors_2(self): - raises(AttributeError, getattr, dll, "_xxx_yyy") - raises(ValueError, c_int.in_dll, dll, "_xxx_yyy") + with pytest.raises(AttributeError): + getattr(dll, "_xxx_yyy") + with pytest.raises(ValueError): + c_int.in_dll(dll, "_xxx_yyy") def test_byval(self): # without prototype @@ -466,16 +472,16 @@ result = f("abcd", ord("b"), 42) assert result == "bcd" + @pytest.mark.xfail(reason="we are less strict in checking callback parameters") def test_sf1651235(self): - py.test.skip("we are less strict in checking callback parameters") # see http://www.python.org/sf/1651235 - proto = CFUNCTYPE(c_int, RECT, POINT) def callback(*args): return 0 callback = proto(callback) - raises(ArgumentError, lambda: callback((1, 2, 3, 4), POINT())) + with pytest.raises(ArgumentError): + callback((1, 2, 3, 4), POINT()) def test_argument_conversion_and_checks(self): #This test is designed to check for segfaults if the wrong type of argument is passed as parameter @@ -485,8 +491,10 @@ assert strlen("eggs", ord("g")) == "ggs" # Should raise ArgumentError, not segfault - py.test.raises(ArgumentError, strlen, 0, 0) - py.test.raises(ArgumentError, strlen, False, 0) + with pytest.raises(ArgumentError): + strlen(0, 0) + with pytest.raises(ArgumentError): + strlen(False, 0) def test_union_as_passed_value(self): class UN(Union): @@ -524,8 +532,8 @@ assert tf_b("yadda") == -42 assert seen == ["yadda"] + @pytest.mark.xfail(reason="warnings are disabled") def test_warnings(self): - py.test.skip("warnings are disabled") import warnings warnings.simplefilter("always") with warnings.catch_warnings(record=True) as w: @@ -534,8 +542,8 @@ assert issubclass(w[0].category, RuntimeWarning) assert "C function without declared arguments called" in str(w[0]) + @pytest.mark.xfail def test_errcheck(self): - py.test.skip('fixme') def errcheck(result, func, args): assert result == -42 assert type(result) is int @@ -556,12 +564,12 @@ assert len(w) == 1 assert issubclass(w[0].category, RuntimeWarning) assert "C function without declared return type called" in str(w[0]) - + with warnings.catch_warnings(record=True) as w: dll.get_an_integer.restype = None dll.get_an_integer() assert len(w) == 0 - + warnings.resetwarnings() @@ -597,7 +605,7 @@ get_data.errcheck = ret_list_p(1) assert get_data('testing!') == [-1, -2, -3, -4] - def test_issue2533(self): + def test_issue2533(self, tmpdir): import cffi ffi = cffi.FFI() ffi.cdef("int **fetchme(void);") @@ -609,11 +617,10 @@ return &pa; } """) - from rpython.tool.udir import udir - ffi.compile(verbose=True, tmpdir=str(udir)) + ffi.compile(verbose=True, tmpdir=str(tmpdir)) import sys - sys.path.insert(0, str(udir)) + sys.path.insert(0, str(tmpdir)) try: from _x_cffi import ffi, lib finally: diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_init.py b/pypy/module/test_lib_pypy/ctypes_tests/test_init.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_init.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_init.py @@ -1,7 +1,6 @@ -import py +import pytest from ctypes import * -py.test.skip("subclassing semantics and implementation details not implemented") class X(Structure): _fields_ = [("a", c_int), @@ -21,19 +20,20 @@ _fields_ = [("x", X)] -class TestInit: - def test_get(self): - # make sure the only accessing a nested structure - # doesn't call the structure's __new__ and __init__ - y = Y() - assert (y.x.a, y.x.b) == (0, 0) - assert y.x.new_was_called == False + at pytest.mark.xfail( + reason="subclassing semantics and implementation details not implemented") +def test_get(): + # make sure the only accessing a nested structure + # doesn't call the structure's __new__ and __init__ + y = Y() + assert (y.x.a, y.x.b) == (0, 0) + assert y.x.new_was_called == False - # But explicitely creating an X structure calls __new__ and __init__, of course. - x = X() - assert (x.a, x.b) == (9, 12) - assert x.new_was_called == True + # But explicitely creating an X structure calls __new__ and __init__, of course. + x = X() + assert (x.a, x.b) == (9, 12) + assert x.new_was_called == True - y.x = x - assert (y.x.a, y.x.b) == (9, 12) - assert y.x.new_was_called == False + y.x = x + assert (y.x.a, y.x.b) == (9, 12) + assert y.x.new_was_called == False diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py b/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_loading.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * import sys import os @@ -27,14 +27,17 @@ def test_load(self): CDLL(libc_name) CDLL(os.path.basename(libc_name)) - raises(OSError, CDLL, self.unknowndll) + with pytest.raises(OSError): + CDLL(self.unknowndll) if libc_name is not None and os.path.basename(libc_name) == "libc.so.6": def test_load_version(self): cdll.LoadLibrary("libc.so.6") # linux uses version, libc 9 should not exist - raises(OSError, cdll.LoadLibrary, "libc.so.9") - raises(OSError, cdll.LoadLibrary, self.unknowndll) + with pytest.raises(OSError): + cdll.LoadLibrary("libc.so.9") + with pytest.raises(OSError): + cdll.LoadLibrary(self.unknowndll) def test_find(self): for name in ("c", "m"): @@ -80,7 +83,5 @@ f_name_addr = c_void_p.from_address(a_name).value assert hex(f_ord_addr) == hex(f_name_addr) - raises(AttributeError, dll.__getitem__, 1234) - -if __name__ == "__main__": - unittest.main() + with pytest.raises(AttributeError): + dll[1234] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker import sys, struct @@ -65,8 +65,10 @@ # Only numbers are allowed in the contructor, # otherwise TypeError is raised for t in signed_types + unsigned_types + float_types: - raises(TypeError, t, "") - raises(TypeError, t, None) + with pytest.raises(TypeError): + t("") + with pytest.raises(TypeError): + t(None) ## def test_valid_ranges(self): ## # invalid values of the correct type @@ -75,16 +77,16 @@ ## self.assertRaises(ValueError, t, l-1) ## self.assertRaises(ValueError, t, h+1) + @pytest.mark.xfail(reason="testing implementation internals") def test_from_param(self): # the from_param class method attribute always # returns PyCArgObject instances - py.test.skip("testing implementation internals") for t in signed_types + unsigned_types + float_types: assert ArgType == type(t.from_param(0)) + @pytest.mark.xfail(reason="testing implementation internals") def test_byref(self): # calling byref returns also a PyCArgObject instance - py.test.skip("testing implementation internals") for t in signed_types + unsigned_types + float_types: parm = byref(t()) assert ArgType == type(parm) @@ -107,7 +109,8 @@ def test_integers(self): # integers cannot be constructed from floats for t in signed_types + unsigned_types: - raises(TypeError, t, 3.14) + with pytest.raises(TypeError): + t(3.14) def test_sizes(self): for t in signed_types + unsigned_types + float_types: @@ -184,7 +187,8 @@ # c_int() can be initialized from Python's int, and c_int. # Not from c_long or so, which seems strange, abd should # probably be changed: - raises(TypeError, c_int, c_long(42)) + with pytest.raises(TypeError): + c_int(c_long(42)) def test_subclass(self): class enum(c_int): @@ -194,14 +198,13 @@ _fields_ = [('t', enum)] assert isinstance(S().t, enum) + #@pytest.mark.xfail("'__pypy__' not in sys.builtin_module_names") + @pytest.mark.xfail def test_no_missing_shape_to_ffi_type(self): # whitebox test - import sys - if '__pypy__' not in sys.builtin_module_names: - skip("only for pypy's ctypes") - skip("re-enable after adding 'g' to _shape_to_ffi_type.typemap, " - "which I think needs fighting all the way up from " - "rpython.rlib.libffi") + "re-enable after adding 'g' to _shape_to_ffi_type.typemap, " + "which I think needs fighting all the way up from " + "rpython.rlib.libffi" from _ctypes.basics import _shape_to_ffi_type from _rawffi import Array for i in range(1, 256): @@ -212,7 +215,7 @@ else: assert chr(i) in _shape_to_ffi_type.typemap - @py.test.mark.xfail + @pytest.mark.xfail def test_pointer_to_long_double(self): import ctypes ctypes.POINTER(ctypes.c_longdouble) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_parameters.py b/pypy/module/test_lib_pypy/ctypes_tests/test_parameters.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_parameters.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_parameters.py @@ -1,4 +1,4 @@ -import py +import pytest import sys class TestSimpleTypes: @@ -60,10 +60,10 @@ o = My_void_pp() assert Void_pp.from_param(o) is o - + # XXX Replace by c_char_p tests + @pytest.mark.xfail(reason="testing implementation internals") def test_cstrings(self): - py.test.skip("testing implementation internals") from ctypes import c_char_p, byref # c_char_p.from_param on a Python String packs the string @@ -72,18 +72,20 @@ assert c_char_p.from_param(s)._obj is s # new in 0.9.1: convert (encode) unicode to ascii - assert c_char_p.from_param("123")._obj == b"123" - raises(UnicodeEncodeError, c_char_p.from_param, "123\377") + assert c_char_p.from_param(u"123")._obj == b"123" + with pytest.raises(UnicodeEncodeError): + c_char_p.from_param(u"123\377") - raises(TypeError, c_char_p.from_param, 42) + with pytest.raises(TypeError): + c_char_p.from_param(42) # calling c_char_p.from_param with a c_char_p instance # returns the argument itself: a = c_char_p("123") assert c_char_p.from_param(a) is a + @pytest.mark.xfail(reason="testing implementation internals") def test_cw_strings(self): - py.test.skip("testing implementation internals") from ctypes import byref try: from ctypes import c_wchar_p @@ -93,11 +95,13 @@ s = "123" if sys.platform == "win32": assert c_wchar_p.from_param(s)._obj is s - raises(TypeError, c_wchar_p.from_param, 42) + with pytest.raises(TypeError): + c_wchar_p.from_param(42) # new in 0.9.1: convert (decode) ascii to unicode - assert c_wchar_p.from_param(b"123")._obj == "123" - raises(UnicodeDecodeError, c_wchar_p.from_param, b"123\377") + assert c_wchar_p.from_param(b"123")._obj == u"123" + with pytest.raises(UnicodeDecodeError): + c_wchar_p.from_param(b"123\377") pa = c_wchar_p.from_param(c_wchar_p("123")) assert type(pa) == c_wchar_p @@ -115,9 +119,12 @@ assert not LPINT.from_param(None) if c_int != c_long: - raises(TypeError, LPINT.from_param, pointer(c_long(42))) - raises(TypeError, LPINT.from_param, pointer(c_uint(42))) - raises(TypeError, LPINT.from_param, pointer(c_short(42))) + with pytest.raises(TypeError): + LPINT.from_param(pointer(c_long(42))) + with pytest.raises(TypeError): + LPINT.from_param(pointer(c_uint(42))) + with pytest.raises(TypeError): + LPINT.from_param(pointer(c_short(42))) def test_byref_pointer(self): # The from_param class method of POINTER(typ) classes accepts what is @@ -127,10 +134,13 @@ LPINT.from_param(byref(c_int(42))) - raises(TypeError, LPINT.from_param, byref(c_short(22))) + with pytest.raises(TypeError): + LPINT.from_param(byref(c_short(22))) if c_int != c_long: - raises(TypeError, LPINT.from_param, byref(c_long(22))) - raises(TypeError, LPINT.from_param, byref(c_uint(22))) + with pytest.raises(TypeError): + LPINT.from_param(byref(c_long(22))) + with pytest.raises(TypeError): + LPINT.from_param(byref(c_uint(22))) def test_byref_pointerpointer(self): # See above @@ -139,10 +149,13 @@ LPLPINT = POINTER(POINTER(c_int)) LPLPINT.from_param(byref(pointer(c_int(42)))) - raises(TypeError, LPLPINT.from_param, byref(pointer(c_short(22)))) + with pytest.raises(TypeError): + LPLPINT.from_param(byref(pointer(c_short(22)))) if c_int != c_long: - raises(TypeError, LPLPINT.from_param, byref(pointer(c_long(22)))) - raises(TypeError, LPLPINT.from_param, byref(pointer(c_uint(22)))) + with pytest.raises(TypeError): + LPLPINT.from_param(byref(pointer(c_long(22)))) + with pytest.raises(TypeError): + LPLPINT.from_param(byref(pointer(c_uint(22)))) def test_array_pointers(self): from ctypes import c_short, c_uint, c_int, c_long, POINTER @@ -155,15 +168,18 @@ # the same type! LPINT = POINTER(c_int) LPINT.from_param((c_int*3)()) - raises(TypeError, LPINT.from_param, c_short*3) - raises(TypeError, LPINT.from_param, c_long*3) - raises(TypeError, LPINT.from_param, c_uint*3) + with pytest.raises(TypeError): + LPINT.from_param(c_short*3) + with pytest.raises(TypeError): + LPINT.from_param(c_long*3) + with pytest.raises(TypeError): + LPINT.from_param(c_uint*3) ## def test_performance(self): ## check_perf() + @pytest.mark.xfail(reason="testing implementation internals") def test_noctypes_argtype(self): - py.test.skip("we implement details differently") from ctypes import CDLL, c_void_p, ArgumentError import conftest dll = CDLL(str(conftest.sofile)) @@ -171,7 +187,8 @@ func = dll._testfunc_p_p func.restype = c_void_p # TypeError: has no from_param method - raises(TypeError, setattr, func, "argtypes", (object,)) + with pytest.raises(TypeError): + setattr(func, "argtypes", (object,)) class Adapter(object): def from_param(cls, obj): @@ -187,7 +204,8 @@ func.argtypes = (Adapter(),) # don't know how to convert parameter 1 - raises(ArgumentError, func, object()) + with pytest.raises(ArgumentError): + func(object()) assert func(c_void_p(42)) == 42 class Adapter(object): @@ -196,7 +214,8 @@ func.argtypes = (Adapter(),) # ArgumentError: argument 1: ValueError: 99 - raises(ArgumentError, func, 99) + with pytest.raises(ArgumentError): + func(99) def test_multiple_signature(self): # when .argtypes is not set, calling a function with a certain @@ -212,4 +231,4 @@ # This one is normal assert func(None) == 0 - + diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker @@ -31,21 +31,27 @@ def test_restype_setattr(self): func = testdll._testfunc_p_p - raises(TypeError, setattr, func, 'restype', 20) + with pytest.raises(TypeError): + setattr(func, 'restype', 20) def test_argtypes_setattr(self): func = testdll._testfunc_p_p - raises(TypeError, setattr, func, 'argtypes', 20) - raises(TypeError, setattr, func, 'argtypes', [20]) + with pytest.raises(TypeError): + setattr(func, 'argtypes', 20) + with pytest.raises(TypeError): + setattr(func, 'argtypes', [20]) func = CFUNCTYPE(c_long, c_void_p, c_long)(lambda: None) assert func.argtypes == (c_void_p, c_long) def test_paramflags_setattr(self): func = CFUNCTYPE(c_long, c_void_p, c_long)(lambda: None) - raises(TypeError, setattr, func, 'paramflags', 'spam') - raises(ValueError, setattr, func, 'paramflags', (1, 2, 3, 4)) - raises(TypeError, setattr, func, 'paramflags', ((1,), ('a',))) + with pytest.raises(TypeError): + setattr(func, 'paramflags', 'spam') + with pytest.raises(ValueError): + setattr(func, 'paramflags', (1, 2, 3, 4)) + with pytest.raises(TypeError): + setattr(func, 'paramflags', ((1,), ('a',))) func.paramflags = (1,), (1|4,) def test_kwargs(self): @@ -107,13 +113,16 @@ positive_address(func(byref(ci)))) func.argtypes = c_char_p, - raises(ArgumentError, func, byref(ci)) + with pytest.raises(ArgumentError): + func(byref(ci)) func.argtypes = POINTER(c_short), - raises(ArgumentError, func, byref(ci)) + with pytest.raises(ArgumentError): + func(byref(ci)) func.argtypes = POINTER(c_double), - raises(ArgumentError, func, byref(ci)) + with pytest.raises(ArgumentError): + func(byref(ci)) def test_POINTER_c_char_arg(self): func = testdll._testfunc_p_p @@ -252,7 +261,8 @@ func.restype = POINTER(c_int) func.argtypes = [c_int * 8] array = ARRAY(1, 2, 3, 4, 5, 6, 7, 8) - py.test.raises(ArgumentError, "func(array)") + with pytest.raises(ArgumentError): + func(array) ################################################################ diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_returnfuncptrs.py b/pypy/module/test_lib_pypy/ctypes_tests/test_returnfuncptrs.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_returnfuncptrs.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_returnfuncptrs.py @@ -1,4 +1,4 @@ -import py +import pytest from ctypes import * @@ -16,8 +16,10 @@ strchr = get_strchr() assert strchr("abcdef", "b") == "bcdef" assert strchr("abcdef", "x") == None - raises(ArgumentError, strchr, "abcdef", 3) - raises(TypeError, strchr, "abcdef") + with pytest.raises(ArgumentError): + strchr("abcdef", 3) + with pytest.raises(TypeError): + strchr("abcdef") def test_without_prototype(self): get_strchr = dll.get_strchr @@ -29,5 +31,7 @@ strchr = CFUNCTYPE(c_char_p, c_char_p, c_char)(addr) assert strchr("abcdef", "b"), "bcdef" assert strchr("abcdef", "x") == None - raises(ArgumentError, strchr, "abcdef", 3) - raises(TypeError, strchr, "abcdef") + with pytest.raises(ArgumentError): + strchr("abcdef", 3) + with pytest.raises(TypeError): + strchr("abcdef") diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py b/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py @@ -1,4 +1,4 @@ -import py +import pytest from support import BaseCTypesTestChecker from ctypes import * @@ -30,13 +30,17 @@ from operator import setslice # TypeError: int expected instead of str instance - raises(TypeError, setslice, a, 0, 5, b"abcde") + with pytest.raises(TypeError): + setslice(a, 0, 5, b"abcde") # TypeError: int expected instead of str instance - raises(TypeError, setslice, a, 0, 5, [b"a", b"b", b"c", b"d", b"e"]) + with pytest.raises(TypeError): + setslice(a, 0, 5, [b"a", b"b", b"c", b"d", b"e"]) # TypeError: int expected instead of float instance - raises(TypeError, setslice, a, 0, 5, [1, 2, 3, 4, 3.14]) + with pytest.raises(TypeError): + setslice(a, 0, 5, [1, 2, 3, 4, 3.14]) # ValueError: Can only assign sequence of same size - raises(ValueError, setslice, a, 0, 5, range(32)) + with pytest.raises(ValueError): + setslice(a, 0, 5, range(32)) def test_char_ptr(self): s = "abcdefghijklmnopqrstuvwxyz" @@ -47,8 +51,8 @@ assert res[:len(s)] == s import operator - raises(TypeError, operator.setslice, - res, 0, 5, "abcde") + with pytest.raises(TypeError): + operator.setslice(res, 0, 5, u"abcde") dll.my_free(res) dll.my_strdup.restype = POINTER(c_byte) @@ -99,8 +103,8 @@ assert res[:len(s)] == s import operator - raises(TypeError, operator.setslice, - res, 0, 5, "abcde") + with pytest.raises(TypeError): + operator.setslice(res, 0, 5, u"abcde") dll.my_free(res) if sizeof(c_wchar) == sizeof(c_short): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py b/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py @@ -1,4 +1,4 @@ -import py +import pytest from support import BaseCTypesTestChecker from ctypes import * @@ -16,7 +16,8 @@ x = X() # NULL pointer access - raises(ValueError, getattr, x.str, "contents") + with pytest.raises(ValueError): + x.str.contents b = c_buffer("Hello, World") #from sys import getrefcount as grc #assert grc(b) == 2 @@ -31,7 +32,6 @@ # XXX pypy modified: #raises(TypeError, setattr, x, "str", "Hello, World") x = b = None - py.test.skip("test passes! but modified to avoid getrefcount and detail issues") def test__c_char_p(self): class X(Structure): @@ -47,7 +47,6 @@ #b = c_buffer("Hello, World") #raises(TypeError, setattr, x, "str", b) x = None - py.test.skip("test passes! but modified to avoid detail issues") def test_functions(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py b/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py @@ -1,4 +1,4 @@ -import py +import pytest from support import BaseCTypesTestChecker from ctypes import * @@ -18,31 +18,37 @@ pass assert sizeof(X) == 0 # not finalized X._fields_ = [] # finalized - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] def test_1_B(self): class X(Structure): _fields_ = [] # finalized - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] def test_2(self): class X(Structure): pass X() - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] def test_3(self): class X(Structure): pass class Y(Structure): _fields_ = [("x", X)] # finalizes X - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] def test_4(self): class X(Structure): pass class Y(X): pass - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] Y._fields_ = [] - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -2,7 +2,7 @@ from struct import calcsize from support import BaseCTypesTestChecker -import py +import pytest class TestSubclasses(BaseCTypesTestChecker): @@ -143,8 +143,10 @@ assert X.y.size == sizeof(c_char) # readonly - raises((TypeError, AttributeError), setattr, X.x, "offset", 92) - raises((TypeError, AttributeError), setattr, X.x, "size", 92) + with pytest.raises((TypeError, AttributeError)): + X.x.offset = 92 + with pytest.raises((TypeError, AttributeError)): + X.x.size = 92 class X(Union): _fields_ = [("x", c_int), @@ -157,8 +159,10 @@ assert X.y.size == sizeof(c_char) # readonly - raises((TypeError, AttributeError), setattr, X.x, "offset", 92) - raises((TypeError, AttributeError), setattr, X.x, "size", 92) + with pytest.raises((TypeError, AttributeError)): + X.x.offset = 92 + with pytest.raises((TypeError, AttributeError)): + X.x.size = 92 # XXX Should we check nested data types also? # offset is always relative to the class... @@ -202,23 +206,28 @@ d = {"_fields_": [("a", "b"), ("b", "q")], "_pack_": -1} - raises(ValueError, type(Structure), "X", (Structure,), d) + with pytest.raises(ValueError): + type(Structure)("X", (Structure,), d) def test_initializers(self): class Person(Structure): _fields_ = [("name", c_char*6), ("age", c_int)] - raises(TypeError, Person, 42) - raises(ValueError, Person, "asldkjaslkdjaslkdj") - raises(TypeError, Person, "Name", "HI") + with pytest.raises(TypeError): + Person(42) + with pytest.raises(ValueError): + Person("asldkjaslkdjaslkdj") + with pytest.raises(TypeError): + Person("Name", "HI") # short enough assert Person("12345", 5).name == "12345" # exact fit assert Person("123456", 5).name == "123456" # too long - raises(ValueError, Person, "1234567", 5) + with pytest.raises(ValueError): + Person("1234567", 5) def test_keyword_initializers(self): @@ -246,7 +255,8 @@ def test_invalid_field_types(self): class POINT(Structure): pass - raises(TypeError, setattr, POINT, "_fields_", [("x", 1), ("y", 2)]) + with pytest.raises(TypeError): + POINT._fields_ = [("x", 1), ("y", 2)] def test_intarray_fields(self): class SomeInts(Structure): @@ -257,7 +267,8 @@ assert SomeInts((1, 2, 3, 4)).a[:] == [1, 2, 3, 4] # too long # XXX Should raise ValueError?, not RuntimeError - raises(RuntimeError, SomeInts, (1, 2, 3, 4, 5)) + with pytest.raises(RuntimeError): + SomeInts((1, 2, 3, 4, 5)) def test_nested_initializers(self): # test initializing nested structures @@ -278,7 +289,7 @@ assert p.age == 5 def test_structures_with_wchar(self): - py.test.skip("need unicode support on _rawffi level") + pytest.skip("need unicode support on _rawffi level") try: c_wchar except NameError: @@ -288,18 +299,19 @@ _fields_ = [("name", c_wchar * 12), ("age", c_int)] - p = PersonW("Someone") + p = PersonW(u"Someone") assert p.name == "Someone" - assert PersonW("1234567890").name == "1234567890" - assert PersonW("12345678901").name == "12345678901" + assert PersonW(u"1234567890").name == u"1234567890" + assert PersonW(u"12345678901").name == u"12345678901" # exact fit - assert PersonW("123456789012").name == "123456789012" + assert PersonW(u"123456789012").name == u"123456789012" #too long - raises(ValueError, PersonW, "1234567890123") + with pytest.raises(ValueError): + PersonW(u"1234567890123") def test_init_errors(self): - py.test.skip("not implemented error details") + pytest.skip("not implemented error details") class Phone(Structure): _fields_ = [("areacode", c_char*6), ("number", c_char*12)] @@ -347,7 +359,7 @@ ## (AttributeError, "class must define a '_fields_' attribute")) def test_abstract_class(self): - py.test.skip("_abstract_ semantics not implemented") + pytest.skip("_abstract_ semantics not implemented") class X(Structure): _abstract_ = "something" # try 'X()' @@ -373,7 +385,7 @@ assert p.age == 6 def test_subclassing_field_is_a_tuple(self): - py.test.skip("subclassing semantics not implemented") + pytest.skip("subclassing semantics not implemented") class Person(Structure): _fields_ = (("name", c_char*6), ("age", c_int)) @@ -542,7 +554,7 @@ raise AssertionError("Structure or union cannot contain itself") def test_vice_versa(self): - py.test.skip("mutually dependent lazily defined structures error semantics") + pytest.skip("mutually dependent lazily defined structures error semantics") class First(Structure): pass class Second(Structure): @@ -563,18 +575,21 @@ pass assert sizeof(X) == 0 X._fields_ = [("a", c_int),] - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] class X(Structure): pass X() - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] class X(Structure): pass class Y(X): pass - raises(AttributeError, setattr, X, "_fields_", []) + with pytest.raises(AttributeError): + X._fields_ = [] Y.__fields__ = [] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py @@ -1,6 +1,6 @@ # coding: latin-1 import ctypes -import py +import pytest from support import BaseCTypesTestChecker try: @@ -29,11 +29,11 @@ def test_ascii_strict(self): ctypes.set_conversion_mode("ascii", "strict") # no conversions take place with unicode arguments - assert wcslen("abc") == 3 - assert wcslen("ab\u2070") == 3 - # string args are converted + assert wcslen(u"abc") == 3 + assert wcslen(u"ab\u2070") == 3 + # bytes args are converted assert wcslen(b"abc") == 3 - py.test.raises(ctypes.ArgumentError, wcslen, b"aba") + pytest.raises(ctypes.ArgumentError, wcslen, "ab�") def test_ascii_replace(self): ctypes.set_conversion_mode("ascii", "replace") @@ -69,7 +69,7 @@ ctypes.set_conversion_mode("ascii", "ignore") buf = ctypes.create_unicode_buffer(b"ab���") # is that correct? not sure. But with 'ignore', you get what you pay for.. - assert buf[:] == "ab\0\0\0\0" + assert buf[:] == u"ab\0\0\0\0" class TestString(TestUnicode): def setup_method(self, method): @@ -85,32 +85,33 @@ def test_ascii_replace(self): ctypes.set_conversion_mode("ascii", "strict") assert func(b"abc") == "abc" - assert func("abc") == "abc" - raises(ctypes.ArgumentError, func, "ab�") + assert func(u"abc") == "abc" + with pytest.raises(ctypes.ArgumentError): + func(u"ab�") def test_ascii_ignore(self): ctypes.set_conversion_mode("ascii", "ignore") assert func("abc") == "abc" - assert func("abc") == "abc" - assert func("����") == "" + assert func(u"abc") == "abc" + assert func(u"����") == "" def test_ascii_replace_2(self): ctypes.set_conversion_mode("ascii", "replace") assert func("abc") == "abc" - assert func("abc") == "abc" - assert func("����") == "????" + assert func(u"abc") == "abc" + assert func(u"����") == "????" def test_buffers(self): ctypes.set_conversion_mode("ascii", "strict") - buf = ctypes.create_string_buffer("abc") + buf = ctypes.create_string_buffer(u"abc") assert len(buf) == 3+1 ctypes.set_conversion_mode("ascii", "replace") - buf = ctypes.create_string_buffer("ab���") + buf = ctypes.create_string_buffer(u"ab���") assert buf[:] == "ab???\0" ctypes.set_conversion_mode("ascii", "ignore") - buf = ctypes.create_string_buffer("ab���") + buf = ctypes.create_string_buffer(u"ab���") # is that correct? not sure. But with 'ignore', you get what you pay for.. assert buf[:] == "ab\0\0\0\0" diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_values.py b/pypy/module/test_lib_pypy/ctypes_tests/test_values.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_values.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_values.py @@ -2,7 +2,7 @@ A testcase which accesses *values* in a dll. """ -import py +import pytest from ctypes import * from support import BaseCTypesTestChecker @@ -27,12 +27,13 @@ assert ctdll.get_a_string_char(15) == ord('$') def test_undefined(self): - raises(ValueError, c_int.in_dll, ctdll, "Undefined_Symbol") + with pytest.raises(ValueError): + c_int.in_dll(ctdll, "Undefined_Symbol") class TestWin_Values(BaseCTypesTestChecker): """This test only works when python itself is a dll/shared library""" def setup_class(cls): - py.test.skip("tests expect and access cpython dll") + pytest.skip("tests expect and access cpython dll") def test_optimizeflag(self): # This test accesses the Py_OptimizeFlag intger, which is @@ -86,7 +87,8 @@ del _pointer_type_cache[struct_frozen] def test_undefined(self): - raises(ValueError, c_int.in_dll, pydll, "Undefined_Symbol") + with pytest.raises(ValueError): + c_int.in_dll(pydll, "Undefined_Symbol") if __name__ == '__main__': unittest.main() diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py b/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py @@ -1,10 +1,10 @@ -import py +import pytest from support import BaseCTypesTestChecker from ctypes import * class TestVarSize(BaseCTypesTestChecker): def test_resize(self): - py.test.skip("resizing not implemented") + pytest.skip("resizing not implemented") class X(Structure): _fields_ = [("item", c_int), ("array", c_int * 1)] @@ -35,15 +35,23 @@ def test_array_invalid_length(self): # cannot create arrays with non-positive size - raises(ValueError, lambda: c_int * -1) - raises(ValueError, lambda: c_int * -3) + with pytest.raises(ValueError): + c_int * -1 + with pytest.raises(ValueError): + c_int * -3 def test_zerosized_array(self): array = (c_int * 0)() # accessing elements of zero-sized arrays raise IndexError - raises(IndexError, array.__setitem__, 0, None) - raises(IndexError, array.__getitem__, 0) - raises(IndexError, array.__setitem__, 1, None) - raises(IndexError, array.__getitem__, 1) - raises(IndexError, array.__setitem__, -1, None) - raises(IndexError, array.__getitem__, -1) + with pytest.raises(IndexError): + array.__setitem__(0, None) + with pytest.raises(IndexError): + array.__getitem__(0) + with pytest.raises(IndexError): + array.__setitem__(1, None) + with pytest.raises(IndexError): + array.__getitem__(1) + with pytest.raises(IndexError): + array.__setitem__(-1, None) + with pytest.raises(IndexError): + array.__getitem__(-1) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py b/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py @@ -4,11 +4,11 @@ from ctypes.test import is_resource_enabled from support import BaseCTypesTestChecker -import py +import pytest import sys if sys.platform != "win32": - py.test.skip("win32-only tests") + pytest.importorskip('skip_the_whole_module') # hack! class TestWindows(BaseCTypesTestChecker): def test_callconv_1(self): @@ -16,13 +16,15 @@ IsWindow = windll.user32.IsWindow # ValueError: Procedure probably called with not enough arguments (4 bytes missing) - py.test.raises(ValueError, IsWindow) + with pytest.raises(ValueError): + IsWindow() # This one should succeeed... assert IsWindow(0) == 0 # ValueError: Procedure probably called with too many arguments (8 bytes in excess) - py.test.raises(ValueError, IsWindow, 0, 0, 0) + with pytest.raises(ValueError): + IsWindow(0, 0, 0) def test_callconv_2(self): # Calling stdcall function as cdecl @@ -31,13 +33,15 @@ # ValueError: Procedure called with not enough arguments (4 bytes missing) # or wrong calling convention - py.test.raises(ValueError, IsWindow, None) + with pytest.raises(ValueError): + IsWindow(None) if is_resource_enabled("SEH"): def test_SEH(self): # Call functions with invalid arguments, and make sure that access violations # are trapped and raise an exception. - py.test.raises(WindowsError, windll.kernel32.GetModuleHandleA, 32) + with pytest.raises(WindowsError): + windll.kernel32.GetModuleHandleA(32) class TestWintypes(BaseCTypesTestChecker): diff --git a/pypy/tool/cpyext/extbuild.py b/pypy/tool/cpyext/extbuild.py --- a/pypy/tool/cpyext/extbuild.py +++ b/pypy/tool/cpyext/extbuild.py @@ -201,7 +201,7 @@ # monkeypatch distutils for some versions of msvc compiler import setuptools except ImportError: - # XXX if this fails and is required, + # XXX if this fails and is required, # we must call pypy -mensurepip after translation pass from distutils.ccompiler import new_compiler From pypy.commits at gmail.com Fri Dec 22 12:29:48 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 22 Dec 2017 09:29:48 -0800 (PST) Subject: [pypy-commit] pypy default: Make relative imports explicit Message-ID: <5a3d410c.4d341c0a.2faa0.1d43@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93554:34217fe03263 Date: 2017-12-22 18:28 +0100 http://bitbucket.org/pypy/pypy/changeset/34217fe03263/ Log: Make relative imports explicit diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py b/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py @@ -1,6 +1,6 @@ import pytest from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker class TestAnon(BaseCTypesTestChecker): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py b/pypy/module/test_lib_pypy/ctypes_tests/test_array.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_array.py @@ -1,6 +1,6 @@ import pytest from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker formats = "bBhHiIlLqQfd" diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_array_in_pointer.py b/pypy/module/test_lib_pypy/ctypes_tests/test_array_in_pointer.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_array_in_pointer.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_array_in_pointer.py @@ -2,7 +2,7 @@ from binascii import hexlify import re import py -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker def dump(obj): # helper function to dump memory contents in hex, with a hyphen diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py b/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_as_parameter.py @@ -1,6 +1,6 @@ from ctypes import * import pytest -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker def setup_module(mod): import conftest diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_base.py b/pypy/module/test_lib_pypy/ctypes_tests/test_base.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_base.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_base.py @@ -1,4 +1,4 @@ -from support import WhiteBoxTests +from .support import WhiteBoxTests from ctypes import * @@ -18,7 +18,7 @@ x = X() assert x.y._base is x assert x.y._index == 1 - + def test_array(self): X = POINTER(c_int) * 24 x = X() diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py b/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py @@ -1,6 +1,6 @@ import pytest from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker import os import ctypes diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py @@ -1,5 +1,5 @@ from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker class TestStringBuffer(BaseCTypesTestChecker): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py @@ -1,6 +1,6 @@ from ctypes import * import pytest -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker class TestCallbacks(BaseCTypesTestChecker): functype = CFUNCTYPE diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py b/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py @@ -1,6 +1,6 @@ from ctypes import * import sys, py -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker def setup_module(mod): import conftest @@ -65,7 +65,7 @@ def test_char_p(self): # This didn't work: bad argument to internal function s = c_char_p("hiho") - + assert cast(cast(s, c_void_p), c_char_p).value == ( "hiho") diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_cfuncs.py b/pypy/module/test_lib_pypy/ctypes_tests/test_cfuncs.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_cfuncs.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_cfuncs.py @@ -3,7 +3,7 @@ from ctypes import * import py -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker def setup_module(mod): import conftest diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_extra.py b/pypy/module/test_lib_pypy/ctypes_tests/test_extra.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_extra.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_extra.py @@ -5,7 +5,7 @@ import py from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker class TestExtra(BaseCTypesTestChecker): def test_primitive_pointer(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py @@ -9,7 +9,7 @@ from ctypes import * import sys import pytest -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker try: WINFUNCTYPE diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_keepalive.py b/pypy/module/test_lib_pypy/ctypes_tests/test_keepalive.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_keepalive.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_keepalive.py @@ -1,5 +1,4 @@ import py -import support from ctypes import * import sys @@ -96,7 +95,7 @@ assert x._objects == {'0': n._objects, '1': n._objects} assert x._objects['0'] is n._objects assert n._objects is not None - + def test_pointer_setitem(self): x = c_int(2) y = c_int(3) @@ -165,7 +164,7 @@ s.x=3 a[3].p = pointer(s) - assert a._objects['0:3']['1'] is s + assert a._objects['0:3']['1'] is s def test_struct_with_inlined_array(self): class S(Structure): @@ -190,7 +189,7 @@ def test_struct_within_struct(self): class R(Structure): _fields_ = [('p', POINTER(c_int))] - + class S(Structure): _fields_ = [('b', c_int), ('r', R)] @@ -208,7 +207,7 @@ def test_union_within_union(self): class R(Union): _fields_ = [('p', POINTER(c_int))] - + class S(Union): _fields_ = [('b', c_int), ('r', R)] @@ -217,10 +216,10 @@ stuff = c_int(2) s.r.p = pointer(stuff) assert s._objects == {'0:1': {'1': stuff}} - + r = R() s.r = r - # obscure + # obscure assert s._objects == {'1': {}, '0:1': {'1': stuff}} def test_c_char_p(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_memfunctions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_memfunctions.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_memfunctions.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_memfunctions.py @@ -2,7 +2,7 @@ import py import sys from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker class TestMemFunctions(BaseCTypesTestChecker): def test_memmove(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py @@ -1,6 +1,6 @@ import pytest from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker import sys, struct def valid_ranges(*types): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py @@ -1,6 +1,6 @@ import py from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker ctype_types = [c_byte, c_ubyte, c_short, c_ushort, c_int, c_uint, c_long, c_ulong, c_longlong, c_ulonglong, c_double, c_float] @@ -18,7 +18,7 @@ ffitype = P.get_ffi_argtype() assert P.get_ffi_argtype() is ffitype assert ffitype.deref_pointer() is c_int.get_ffi_argtype() - + def test_pointer_crash(self): class A(POINTER(c_ulong)): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py @@ -1,6 +1,6 @@ import pytest from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker # IMPORTANT INFO: # diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_repr.py b/pypy/module/test_lib_pypy/ctypes_tests/test_repr.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_repr.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_repr.py @@ -1,5 +1,5 @@ import py -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker from ctypes import * diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_simplesubclasses.py b/pypy/module/test_lib_pypy/ctypes_tests/test_simplesubclasses.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_simplesubclasses.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_simplesubclasses.py @@ -1,5 +1,5 @@ import py -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker from ctypes import * class MyInt(c_int): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_sizes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_sizes.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_sizes.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_sizes.py @@ -1,7 +1,7 @@ # Test specifically-sized containers. from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker class TestSizes(BaseCTypesTestChecker): def test_8(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py b/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_slicing.py @@ -1,5 +1,5 @@ import pytest -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker from ctypes import * def setup_module(mod): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py b/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_stringptr.py @@ -1,5 +1,5 @@ import pytest -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker from ctypes import * def setup_module(mod): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_strings.py b/pypy/module/test_lib_pypy/ctypes_tests/test_strings.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_strings.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_strings.py @@ -1,7 +1,7 @@ import py from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker class TestStringArray(BaseCTypesTestChecker): def test_one(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py b/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_struct_fields.py @@ -1,5 +1,5 @@ import pytest -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker from ctypes import * class TestStructFields(BaseCTypesTestChecker): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -1,6 +1,6 @@ from ctypes import * from struct import calcsize -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker import pytest diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py @@ -1,7 +1,7 @@ # coding: latin-1 import ctypes import pytest -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker try: ctypes.c_wchar diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py @@ -1,6 +1,6 @@ import sys from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker class TestUnion(BaseCTypesTestChecker): def test_getattr(self): @@ -28,4 +28,4 @@ u = UnionofStuff() u.one.x = 3 assert u.two.x == 3 - + diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_values.py b/pypy/module/test_lib_pypy/ctypes_tests/test_values.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_values.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_values.py @@ -4,7 +4,7 @@ import pytest from ctypes import * -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker def setup_module(mod): import conftest diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py b/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_varsize_struct.py @@ -1,5 +1,5 @@ import pytest -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker from ctypes import * class TestVarSize(BaseCTypesTestChecker): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py b/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_win32.py @@ -2,7 +2,7 @@ from ctypes import * from ctypes.test import is_resource_enabled -from support import BaseCTypesTestChecker +from .support import BaseCTypesTestChecker import pytest import sys From pypy.commits at gmail.com Sat Dec 23 15:15:39 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 23 Dec 2017 12:15:39 -0800 (PST) Subject: [pypy-commit] pypy default: Removed tag release-pypy3.5-v5.10.0 Message-ID: <5a3eb96b.478f1c0a.bff99.88fb@mx.google.com> Author: Matti Picus Branch: Changeset: r93555:166696ac4db7 Date: 2017-12-23 22:12 +0200 http://bitbucket.org/pypy/pypy/changeset/166696ac4db7/ Log: Removed tag release-pypy3.5-v5.10.0 diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -46,3 +46,5 @@ 84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0 0e7ea4fe15e82d5124e805e2e4a37cae1a402d4b release-pypy2.7-v5.10.0 a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-v5.10.0 +a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-v5.10.0 +0000000000000000000000000000000000000000 release-pypy3.5-v5.10.0 From pypy.commits at gmail.com Sat Dec 23 15:15:41 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 23 Dec 2017 12:15:41 -0800 (PST) Subject: [pypy-commit] pypy default: Added tag release-pypy3.5-v5.10.0 for changeset 09f9160b643e Message-ID: <5a3eb96d.f285df0a.12099.78e9@mx.google.com> Author: Matti Picus Branch: Changeset: r93556:d996bbfdc722 Date: 2017-12-23 22:12 +0200 http://bitbucket.org/pypy/pypy/changeset/d996bbfdc722/ Log: Added tag release-pypy3.5-v5.10.0 for changeset 09f9160b643e diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -48,3 +48,5 @@ a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-v5.10.0 a91df6163fb76df245091f741dbf6a23ddc72374 release-pypy3.5-v5.10.0 0000000000000000000000000000000000000000 release-pypy3.5-v5.10.0 +0000000000000000000000000000000000000000 release-pypy3.5-v5.10.0 +09f9160b643e3f02ccb8c843b2fbb4e5cbf54082 release-pypy3.5-v5.10.0 From pypy.commits at gmail.com Sat Dec 23 15:22:51 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 23 Dec 2017 12:22:51 -0800 (PST) Subject: [pypy-commit] pypy.org extradoc: update the release to 5.10 but do not regenreate yet (ppc64 and py3.5 missing) Message-ID: <5a3ebb1b.2db9df0a.6c825.4698@mx.google.com> Author: Matti Picus Branch: extradoc Changeset: r904:bb077c085963 Date: 2017-12-23 22:22 +0200 http://bitbucket.org/pypy/pypy.org/changeset/bb077c085963/ Log: update the release to 5.10 but do not regenreate yet (ppc64 and py3.5 missing) diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -15,14 +15,14 @@ We provide binaries for x86, ARM, PPC and s390x running on different operating systems such as Linux, Mac OS X and Windows: -* the Python2.7 compatible release — **PyPy2.7 v5.9.0** — (`what's new in PyPy2.7?`_) +* the Python2.7 compatible release — **PyPy2.7 v5.10.0** — (`what's new in PyPy2.7?`_) -* the Python3.5 compatible beta quality release — **PyPy3.5 v5.9.0** — (`what's new in PyPy3.5?`_). +* the Python3.5 compatible beta quality release — **PyPy3.5 v5.10.0** — (`what's new in PyPy3.5?`_). * the Python2.7 Software Transactional Memory special release — **PyPy-STM 2.5.1** (Linux x86-64 only) -.. _what's new in PyPy2.7?: http://doc.pypy.org/en/latest/release-v5.9.0.html -.. _what's new in PyPy3.5?: http://doc.pypy.org/en/latest/release-v5.9.0.html +.. _what's new in PyPy2.7?: http://doc.pypy.org/en/latest/release-v5.10.0.html +.. _what's new in PyPy3.5?: http://doc.pypy.org/en/latest/release-v5.10.0.html .. class:: download_menu @@ -79,7 +79,7 @@ .. _release: -Python2.7 compatible PyPy 5.9.0 +Python2.7 compatible PyPy 5.10.0 ------------------------------- * `Linux x86 binary (32bit, tar.bz2 built on Ubuntu 12.04 - 16.04)`__ (see ``[1]`` below) @@ -98,32 +98,30 @@ * `All our downloads,`__ including previous versions. We also have a mirror_, but please use only if you have troubles accessing the links above -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-linux32.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-linux64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-linux-armhf-raspbian.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-linux-armhf-raring.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-linux-armel.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-osx64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-win32.zip -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-ppc64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-ppc64le.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-s390x.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-src.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-src.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux32.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armhf-raspbian.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armhf-raring.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armel.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-osx64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-win32.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-ppc64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-ppc64le.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-s390x.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-src.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-src.zip .. _`vcredist_x86.exe`: http://www.microsoft.com/en-us/download/details.aspx?id=5582 .. __: https://bitbucket.org/pypy/pypy/downloads .. _mirror: http://buildbot.pypy.org/mirror/ .. _FreshPorts: http://www.freshports.org/lang/pypy -Python 3.5.3 compatible PyPy3.5 v5.9 +Python 3.5.3 compatible PyPy3.5 v5.10 ------------------------------------ .. class:: download_menu - Warning: PyPy3.5 is considered **beta software.** All binaries - are thus called "beta". It is known to be rarely much slower than - PyPy 2. You are welcome to use it anyway; if you're lucky it will - be fast in your case. + Warning: PyPy3.5 is known to be rarely much slower than + PyPy 2. You are welcome to use it anyway * `Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 16.04)`__ (see ``[1]`` below) * `Source (tar.bz2)`__ @@ -131,9 +129,18 @@ * `All our downloads,`__ including previous versions. We also have a mirror_, but please use only if you have troubles accessing the links above -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.9.0-linux64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.9.0-src.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.9.0-src.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux32.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armhf-raspbian.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armhf-raring.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armel.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-osx64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-win32.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-ppc64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-ppc64le.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-s390x.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-src.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-src.zip .. __: https://bitbucket.org/pypy/pypy/downloads If your CPU is really, really old, it may be a x86-32 without SSE2. @@ -199,7 +206,7 @@ uncompressed, they run in-place. For now you can uncompress them either somewhere in your home directory or, say, in ``/opt``, and if you want, put a symlink from somewhere like -``/usr/local/bin/pypy`` to ``/path/to/pypy2-5.9.0/bin/pypy``. Do +``/usr/local/bin/pypy`` to ``/path/to/pypy2-5.10.0/bin/pypy``. Do not move or copy the executable ``pypy`` outside the tree --- put a symlink to it, otherwise it will not find its libraries. @@ -295,9 +302,9 @@ Alternatively, the following smaller package contains the source at the same revision as the above binaries: - * `pypy2-v5.9.0-src.tar.bz2`__ (sources) + * `pypy2-v5.10.0-src.tar.bz2`__ (sources) - .. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.9.0-src.tar.bz2 + .. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-src.tar.bz2 2. Make sure you **installed the dependencies.** See the list here__. @@ -427,21 +434,6 @@ Here are the checksums for each of the downloads -pypy2.7-v5.8.0 sha256:: - - a0b125a5781f7e5ddfc3baca46503b14f4ee6a0e234e8d72bfcf3afdf4120bef pypy2-v5.8.0-linux32.tar.bz2 - 6274292d0e954a2609b15978cde6efa30942ba20aa5d2acbbf1c70c0a54e9b1e pypy2-v5.8.0-linux64.tar.bz2 - 28b7fd0cc7418ffc66c71520728e87941be40ebf4b82675c57e25598a2a702b0 pypy2-v5.8.0-linux-armel.tar.bz2 - ddceca9c5c9a456d4bf1beab177660adffbbdf255a922244e1cc05f20318be46 pypy2-v5.8.0-linux-armhf-raring.tar.bz2 - da58279a0e3706889fc0df06087cea08f8cfd22322139fe9bae73ef9b2d119b7 pypy2-v5.8.0-linux-armhf-raspbian.tar.bz2 - 04b61d1cf13aaca6d0420e854c820b8bd049dc88be16c02542abe8ca26eb075c pypy2-v5.8.0-osx64.tar.bz2 - 35aea25e2b9d2f7c8742c47e4e7474ef0f93ce1b5e3d4f5a99795bab23c1ad2c pypy2-v5.8.0-s390x.tar.bz2 - 504c2d522595baf8775ae1045a217a2b120732537861d31b889d47c340b58bd5 pypy2-v5.8.0-src.tar.bz2 - ec1e34cc81a7f4086135bab29dcbe61d19fcd8d9d8fc1b149bea8373f94fd958 pypy2-v5.8.0-src.zip - 43d6217653e5bdc09e3ff8cb56fb52c4eb019429063d80107be4e88eef79ea8d pypy2-v5.8.0-win32.zip - 2e464bcbc8216e55bb2433ace712130244fd1f3fa78de0c0c98745fd8ff12b03 pypy2-v5.8.0-ppc64.tar.bz2 - 5746823904df74423376e0326046e1171df9693a6d4c95e8ce14ca83534bae72 pypy2-v5.8.0-ppc64le.tar.bz2 - pypy2.7-5.9.0 sha256: a2431a9e4ef879da1a2b56b111013b4a6efb87d4173a37bf650de47834ac5fe4 pypy2-v5.9.0-linux32.tar.bz2 @@ -455,11 +447,17 @@ db42dbed029eeac2da1dfe9bc71d63c934106acbed6bfad8910d2dabb557d9c2 pypy2-v5.9.0-src.zip b61081e24e05b83d8110da1262be19f0094532c6cacc293e318a1c186d926533 pypy2-v5.9.0-win32.zip -pypy 3.5-v5.8.0 sha256:: +pypy2.7-5.10.0 sha256 - 9d090127335c3c0fd2b14c8835bf91752e62756e55ea06aad3353f24a6854223 pypy3-v5.8.0-src.tar.bz2 - 57d871a7f1135719c138cee4e3533c3275d682a76a40ff668e95150c65923035 pypy3-v5.8.0-linux64.tar.bz2 - 8c868b5c8d15ce8acdf967f3c25da44bf52f6c7aa1fd1e50ebd50590f98066a4 pypy3-v5.8.0-src.zip + ee1980467ac8cc9fa9d609f7da93c5282503e59a548781248fe1914a7199d540 pypy2-v5.10.0-linux32.tar.bz2 + da85af9240220179493ad66c857934dc7ea91aef8f168cd293a2d99af8346ee2 pypy2-v5.10.0-linux64.tar.bz2 + 6fdd55dd8f674efd06f76edb60a09a03b9b04a5fbc56741f416a94a0b9d2ff91 pypy2-v5.10.0-linux-armel.tar.bz2 + 5ec3617bb9a07a0a0b2f3c8fbe69912345da4696cdb0a2aca7889b6f1e74435c pypy2-v5.10.0-linux-armhf-raspbian.tar.bz2 + 7e4120f0a83529a6851cbae0ec107dc7085ba8a4aeff4e7bd9da9aadb1ef37a4 pypy2-v5.10.0-osx64.tar.bz2 + dab4dccfa71820c4f803f5a82e13f76517bfde5fafe1e5fba6ff58ef2ba318ab pypy2-v5.10.0-s390x.tar.bz2 + 1209f2db718e6afda17528baa5138177a14a0938588a7d3e1b7c722c483079a8 pypy2-v5.10.0-src.tar.bz2 + 89304eb886f84b5c65f3f4079445ef018cdb9a6e59ef4ed2095d37248a3fefcc pypy2-v5.10.0-src.zip + 350914f9b70404781674f2f188f84d440d9d25da46ed9733b3f98269a510e033 pypy2-v5.10.0-win32.zip pypy 3.5-v5.9.0 sha256:: From pypy.commits at gmail.com Sat Dec 23 15:39:32 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 23 Dec 2017 12:39:32 -0800 (PST) Subject: [pypy-commit] pypy default: update contributors Message-ID: <5a3ebf04.47a7df0a.5578c.b3a5@mx.google.com> Author: Matti Picus Branch: Changeset: r93557:f9e4e9cb7b19 Date: 2017-12-23 22:38 +0200 http://bitbucket.org/pypy/pypy/changeset/f9e4e9cb7b19/ Log: update contributors diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -339,8 +339,10 @@ Stanisław Halik Julien Phalip Roman Podoliaka + Steve Papanik Eli Stevens Boglarka Vezer + gabrielg PavloKapyshin Tomer Chachamu Christopher Groskopf @@ -363,11 +365,13 @@ Konrad Delong Dinu Gherman pizi + Tomáš Pružina James Robert Armin Ronacher Diana Popa Mads Kiilerich Brett Cannon + Caleb Hattingh aliceinwire Zooko Wilcox-O Hearn James Lan @@ -388,6 +392,7 @@ Jason Madden Yaroslav Fedevych Even Wiik Thomassen + m at funkyhat.org Stefan Marr Heinrich-Heine University, Germany diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -217,6 +217,7 @@ Alejandro J. Cura Vladimir Kryachko Gabriel + Thomas Hisch Mark Williams Kunal Grover Nathan Taylor @@ -306,8 +307,10 @@ Stanisław Halik Julien Phalip Roman Podoliaka + Steve Papanik Eli Stevens Boglarka Vezer + gabrielg PavloKapyshin Tomer Chachamu Christopher Groskopf @@ -330,11 +333,13 @@ Konrad Delong Dinu Gherman pizi + Tomáš Pružina James Robert Armin Ronacher Diana Popa Mads Kiilerich Brett Cannon + Caleb Hattingh aliceinwire Zooko Wilcox-O Hearn James Lan @@ -355,4 +360,5 @@ Jason Madden Yaroslav Fedevych Even Wiik Thomassen + m at funkyhat.org Stefan Marr diff --git a/pypy/doc/tool/makecontributor.py b/pypy/doc/tool/makecontributor.py --- a/pypy/doc/tool/makecontributor.py +++ b/pypy/doc/tool/makecontributor.py @@ -81,6 +81,7 @@ 'Yasir Suhail':['yasirs'], 'Squeaky': ['squeaky'], "Amaury Forgeot d'Arc": ['amauryfa at gmail.com'], + "Dodan Mihai": ['mihai.dodan at gmail.com'], } alias_map = {} From pypy.commits at gmail.com Sat Dec 23 17:12:44 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 23 Dec 2017 14:12:44 -0800 (PST) Subject: [pypy-commit] pypy.org extradoc: add sha256 for pypy 3.5 Message-ID: <5a3ed4dc.41afdf0a.5f3b0.8fa7@mx.google.com> Author: Matti Picus Branch: extradoc Changeset: r905:41427b24c739 Date: 2017-12-24 00:11 +0200 http://bitbucket.org/pypy/pypy.org/changeset/41427b24c739/ Log: add sha256 for pypy 3.5 diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -465,3 +465,16 @@ a014f47f50a1480f871a0b82705f904b38c93c4ca069850eb37653fedafb1b97 pypy3-v5.9.0-src.tar.bz2 c5d7fa206cdf425de3950ef8ff578deb0745a723b69b64bb121210a5b8df8c65 pypy3-v5.9.0-src.zip +pypy 3.5-v5.10.0 sha256 + + 529bc3b11edbdcdd676d90c805b8f607f6eedd5f0ec457a31bbe09c03f5bebfe pypy3-v5.10.0-linux32.tar.bz2 + aa4fb52fb858d973dd838dcf8d74f30705e5afdf1150acb8e056eb99353dfe77 pypy3-v5.10.0-linux64.tar.bz2 + c2cc529befb3e1f2ef8bd4e96af4a823c52ef2d180b0b3bd87511c5b47d59210 pypy3-v5.10.0-linux-armel.tar.bz2 + 4e902e0e79f62f2a9049c1c71310ff4fc801011bec4d25082edb5c537d3f15c9 pypy3-v5.10.0-linux-armhf-raspbian.tar.bz2 + 7e389a103f560de1eead1271ec3a2df9424c6ccffe7cbae8e95e6e81ae811a16 pypy3-v5.10.0-osx64.tar.bz2 + e0ffec9d033002eb61af488b1f66c319380da8408abd14a3bc202ded4705dc9a pypy3-v5.10.0-s390x.tar.bz2 + a6e4cffde71e3f08b6e1befa5c0352a9bcc5f4e9f5cbf395001e0763a1a0d9e3 pypy3-v5.10.0-src.tar.bz2 + 96cf354fb410599cd5acd21732855e25e742e13eac7dc079c0c02b0625908cb9 pypy3-v5.10.0-src.zip + 2d93bf2bd7b1d031b96331d3fde6cacdda95673ce6875d6d1669c4c0ea2a52bc pypy3-v5.10.0-win32.zip + + From pypy.commits at gmail.com Sun Dec 24 15:05:09 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 24 Dec 2017 12:05:09 -0800 (PST) Subject: [pypy-commit] pypy.org extradoc: sha256 of pypy2-ppc64* Message-ID: <5a400875.5de81c0a.66b9d.6d90@mx.google.com> Author: Armin Rigo Branch: extradoc Changeset: r906:3e1d30178f49 Date: 2017-12-24 21:05 +0100 http://bitbucket.org/pypy/pypy.org/changeset/3e1d30178f49/ Log: sha256 of pypy2-ppc64* diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -458,6 +458,8 @@ 1209f2db718e6afda17528baa5138177a14a0938588a7d3e1b7c722c483079a8 pypy2-v5.10.0-src.tar.bz2 89304eb886f84b5c65f3f4079445ef018cdb9a6e59ef4ed2095d37248a3fefcc pypy2-v5.10.0-src.zip 350914f9b70404781674f2f188f84d440d9d25da46ed9733b3f98269a510e033 pypy2-v5.10.0-win32.zip + 9afa1a36a5fc55ebc3e80576f05f44294f2b0de279862286fe00f5ee139965b1 pypy2-v5.10.0-ppc64.tar.bz2 + 2c32ccfa80e3e2ec56b4cc848526046d7b0de1f2f1a92b0cedeb414ec76745ab pypy2-v5.10.0-ppc64le.tar.bz2 pypy 3.5-v5.9.0 sha256:: From pypy.commits at gmail.com Sun Dec 24 17:18:16 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 24 Dec 2017 14:18:16 -0800 (PST) Subject: [pypy-commit] pypy py3.5: skip this on win32, the setup_class is run with python but the test with pypy3 Message-ID: <5a4027a8.c287df0a.387f9.9c05@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93558:0f5ba74055d1 Date: 2017-12-24 23:37 +0200 http://bitbucket.org/pypy/pypy/changeset/0f5ba74055d1/ Log: skip this on win32, the setup_class is run with python but the test with pypy3 diff --git a/pypy/module/thread/test/test_thread.py b/pypy/module/thread/test/test_thread.py --- a/pypy/module/thread/test/test_thread.py +++ b/pypy/module/thread/test/test_thread.py @@ -194,8 +194,8 @@ assert sorted(lst) == list(range(120)) def test_many_threads(self): - import _thread, time - if self.can_start_many_threads: + import _thread, time, sys + if self.can_start_many_threads or sys.platform == 'win32': skip("this OS supports too many threads to check (> 1000)") lock = _thread.allocate_lock() lock.acquire() From pypy.commits at gmail.com Sun Dec 24 17:18:18 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 24 Dec 2017 14:18:18 -0800 (PST) Subject: [pypy-commit] pypy py3.5: make test fail not hang on win32 Message-ID: <5a4027aa.c3a6df0a.8c75.f52c@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93559:248a5a9859ef Date: 2017-12-25 00:17 +0200 http://bitbucket.org/pypy/pypy/changeset/248a5a9859ef/ Log: make test fail not hang on win32 diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -13,10 +13,12 @@ assert isinstance(time._STRUCT_TM_ITEMS, int) def test_sleep(self): - import time + import time, sys raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(ValueError, time.sleep, -1.0) + if sys.platform == 'win32': + assert False, 'hangs on win32 after translation' raises((ValueError, OverflowError), time.sleep, float('nan')) raises(OverflowError, time.sleep, float('inf')) From pypy.commits at gmail.com Mon Dec 25 03:30:53 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 25 Dec 2017 00:30:53 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix 248a5a9859ef (probably) Message-ID: <5a40b73d.83871c0a.239d.a270@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r93560:36cb02a2cd90 Date: 2017-12-25 09:30 +0100 http://bitbucket.org/pypy/pypy/changeset/36cb02a2cd90/ Log: Fix 248a5a9859ef (probably) diff --git a/pypy/interpreter/timeutils.py b/pypy/interpreter/timeutils.py --- a/pypy/interpreter/timeutils.py +++ b/pypy/interpreter/timeutils.py @@ -4,6 +4,7 @@ import math from rpython.rlib.rarithmetic import ( r_longlong, ovfcheck, ovfcheck_float_to_longlong) +from rpython.rlib import rfloat from pypy.interpreter.error import oefmt SECS_TO_NS = 10 ** 9 @@ -21,6 +22,8 @@ def timestamp_w(space, w_secs): if space.isinstance_w(w_secs, space.w_float): secs = space.float_w(w_secs) + if rfloat.isnan(secs): + raise oefmt(space.w_ValueError, "timestamp is nan") result_float = math.ceil(secs * SECS_TO_NS) try: return ovfcheck_float_to_longlong(result_float) diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -13,12 +13,10 @@ assert isinstance(time._STRUCT_TM_ITEMS, int) def test_sleep(self): - import time, sys + import time raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(ValueError, time.sleep, -1.0) - if sys.platform == 'win32': - assert False, 'hangs on win32 after translation' raises((ValueError, OverflowError), time.sleep, float('nan')) raises(OverflowError, time.sleep, float('inf')) From pypy.commits at gmail.com Mon Dec 25 13:46:23 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 25 Dec 2017 10:46:23 -0800 (PST) Subject: [pypy-commit] pypy.org extradoc: regen and add new OS X build Message-ID: <5a41477f.04b0df0a.aab9d.faf0@mx.google.com> Author: fijal Branch: extradoc Changeset: r907:51a69a2367a0 Date: 2017-12-25 20:46 +0200 http://bitbucket.org/pypy/pypy.org/changeset/51a69a2367a0/ Log: regen and add new OS X build diff --git a/compat.html b/compat.html --- a/compat.html +++ b/compat.html @@ -111,11 +111,11 @@ not support refcounting semantics. The following code won't fill the file immediately, but only after a certain period of time, when the GC does a collection:

    -
    open("filename", "w").write("stuff")
    +
    open("filename", "w").write("stuff")

    The proper fix is

    -
    f = open("filename", "w")
    f.write("stuff")
    f.close()
    +
    f = open("filename", "w")
    f.write("stuff")
    f.close()

    or using the with keyword

    -
    with open("filename", "w") as f:
    f.write("stuff")
    +
    with open("filename", "w") as f:
    f.write("stuff")

    The same problem–not closing your files–can also show up if your program opens a large number of files without closing them explicitly. In that case, you can easily hit the system limit on the number of file @@ -129,7 +129,7 @@

    Similarly, remember that you must close() a non-exhausted generator in order to have its pending finally or with clauses executed immediately:

    -
    def mygen():
    with foo:
    yield 42

    for x in mygen():
    if x == 42:
    break # foo.__exit__ is not run immediately!

    # fixed version:
    gen = mygen()
    try:
    for x in gen:
    if x == 42:
    break
    finally:
    gen.close()
    +
    def mygen():
    with foo:
    yield 42

    for x in mygen():
    if x == 42:
    break # foo.__exit__ is not run immediately!

    # fixed version:
    gen = mygen()
    try:
    for x in gen:
    if x == 42:
    break
    finally:
    gen.close()

    More generally, __del__() methods are not executed as predictively as on CPython: they run “some time later” in PyPy (or not at all if the program finishes running in the meantime). See more details diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -73,8 +73,8 @@

    We provide binaries for x86, ARM, PPC and s390x running on different operating systems such as Linux, Mac OS X and Windows:

      @@ -113,37 +113,52 @@
    • or translate your own PyPy.
    -
    -

    Python2.7 compatible PyPy 5.9.0

    +
    +

    Python2.7 compatible PyPy 5.10.0

    +
    +

    System Message: WARNING/2 ([dynamic-text], line 78)

    +

    Title underline too short.

    +
    +Python2.7 compatible PyPy 5.10.0
    +-------------------------------
    +
    +
    -
    -

    Python 3.5.3 compatible PyPy3.5 v5.9

    -

    Warning: PyPy3.5 is considered beta software. All binaries -are thus called “beta”. It is known to be rarely much slower than -PyPy 2. You are welcome to use it anyway; if you're lucky it will -be fast in your case.

    +
    +

    Python 3.5.3 compatible PyPy3.5 v5.10

    +
    +

    System Message: WARNING/2 ([dynamic-text], line 116)

    +

    Title underline too short.

    +
    +Python 3.5.3 compatible PyPy3.5 v5.10
    +------------------------------------
    +
    +
    +

    Warning: PyPy3.5 is known to be rarely much slower than +PyPy 2. You are welcome to use it anyway

    If your CPU is really, really old, it may be a x86-32 without SSE2. @@ -162,7 +177,7 @@

    This is a special version of PyPy! See the Software Transactional Memory (STM) documentation.

    @@ -173,8 +188,8 @@ release is too old for what you want to do. There are versions for different libc on this site too.
  • Reverse debugger: This version enables debugging your Python -programs by going forward and backward in time. See the RevDB -documentation.
  • +programs by going forward and backward in time. See the `RevDB +documentation`__.
    • Sandboxing: A special safe version. Read the docs about sandboxing. @@ -183,7 +198,7 @@ version, or otherwise play around on your own. We provide this documentation only for historical reasons. Please do not use in production. For reference, there are some very old, unmaintained -binaries for Linux (32bit, 64bit).
    • +binaries for Linux (32bit__, 64bit__).
    @@ -192,7 +207,7 @@ uncompressed, they run in-place. For now you can uncompress them either somewhere in your home directory or, say, in /opt, and if you want, put a symlink from somewhere like -/usr/local/bin/pypy to /path/to/pypy2-5.9.0/bin/pypy. Do +/usr/local/bin/pypy to /path/to/pypy2-5.10.0/bin/pypy. Do not move or copy the executable pypy outside the tree – put a symlink to it, otherwise it will not find its libraries.

    @@ -224,7 +239,7 @@

    2. NumPyPy

    -

    The “numpy” module can also be installed from our own repository rather +

    The “numpy” module can also be installed from `our own repository`__ rather than from the official source. This version uses our built-in _numpypy multiarray replacement module, written in RPython. This module is not complete, but if it works it should give correct answers. @@ -248,7 +263,7 @@

    Note again that this version is incomplete: many things do not work and those that do may not be any faster than NumPy on CPython. -For further instructions see the pypy/numpy repository and the +For further instructions see `the pypy/numpy repository`__ and the FAQ question about the difference between the two.

    @@ -266,10 +281,10 @@

    Alternatively, the following smaller package contains the source at the same revision as the above binaries:

    -
  • Make sure you installed the dependencies. See the list here.

    +
  • Make sure you installed the dependencies. See the list here__.

  • Enter the goal directory:

    @@ -381,21 +396,6 @@
     

    Checksums

    Here are the checksums for each of the downloads

    -

    pypy2.7-v5.8.0 sha256:

    -
    -a0b125a5781f7e5ddfc3baca46503b14f4ee6a0e234e8d72bfcf3afdf4120bef  pypy2-v5.8.0-linux32.tar.bz2
    -6274292d0e954a2609b15978cde6efa30942ba20aa5d2acbbf1c70c0a54e9b1e  pypy2-v5.8.0-linux64.tar.bz2
    -28b7fd0cc7418ffc66c71520728e87941be40ebf4b82675c57e25598a2a702b0  pypy2-v5.8.0-linux-armel.tar.bz2
    -ddceca9c5c9a456d4bf1beab177660adffbbdf255a922244e1cc05f20318be46  pypy2-v5.8.0-linux-armhf-raring.tar.bz2
    -da58279a0e3706889fc0df06087cea08f8cfd22322139fe9bae73ef9b2d119b7  pypy2-v5.8.0-linux-armhf-raspbian.tar.bz2
    -04b61d1cf13aaca6d0420e854c820b8bd049dc88be16c02542abe8ca26eb075c  pypy2-v5.8.0-osx64.tar.bz2
    -35aea25e2b9d2f7c8742c47e4e7474ef0f93ce1b5e3d4f5a99795bab23c1ad2c  pypy2-v5.8.0-s390x.tar.bz2
    -504c2d522595baf8775ae1045a217a2b120732537861d31b889d47c340b58bd5  pypy2-v5.8.0-src.tar.bz2
    -ec1e34cc81a7f4086135bab29dcbe61d19fcd8d9d8fc1b149bea8373f94fd958  pypy2-v5.8.0-src.zip
    -43d6217653e5bdc09e3ff8cb56fb52c4eb019429063d80107be4e88eef79ea8d  pypy2-v5.8.0-win32.zip
    -2e464bcbc8216e55bb2433ace712130244fd1f3fa78de0c0c98745fd8ff12b03  pypy2-v5.8.0-ppc64.tar.bz2
    -5746823904df74423376e0326046e1171df9693a6d4c95e8ce14ca83534bae72  pypy2-v5.8.0-ppc64le.tar.bz2
    -

    pypy2.7-5.9.0 sha256:

    a2431a9e4ef879da1a2b56b111013b4a6efb87d4173a37bf650de47834ac5fe4 pypy2-v5.9.0-linux32.tar.bz2 @@ -408,18 +408,44 @@ de4bf05df47f1349dbac97233d9277bbaf1ef3331663ea2557fd5da3dbcfd0a7 pypy2-v5.9.0-src.tar.bz2 db42dbed029eeac2da1dfe9bc71d63c934106acbed6bfad8910d2dabb557d9c2 pypy2-v5.9.0-src.zip b61081e24e05b83d8110da1262be19f0094532c6cacc293e318a1c186d926533 pypy2-v5.9.0-win32.zip
    -

    pypy 3.5-v5.8.0 sha256:

    -
    -9d090127335c3c0fd2b14c8835bf91752e62756e55ea06aad3353f24a6854223  pypy3-v5.8.0-src.tar.bz2
    -57d871a7f1135719c138cee4e3533c3275d682a76a40ff668e95150c65923035  pypy3-v5.8.0-linux64.tar.bz2
    -8c868b5c8d15ce8acdf967f3c25da44bf52f6c7aa1fd1e50ebd50590f98066a4  pypy3-v5.8.0-src.zip
    -
    +

    pypy2.7-5.10.0 sha256

    +
    +ee1980467ac8cc9fa9d609f7da93c5282503e59a548781248fe1914a7199d540 pypy2-v5.10.0-linux32.tar.bz2 +da85af9240220179493ad66c857934dc7ea91aef8f168cd293a2d99af8346ee2 pypy2-v5.10.0-linux64.tar.bz2 +6fdd55dd8f674efd06f76edb60a09a03b9b04a5fbc56741f416a94a0b9d2ff91 pypy2-v5.10.0-linux-armel.tar.bz2 +5ec3617bb9a07a0a0b2f3c8fbe69912345da4696cdb0a2aca7889b6f1e74435c pypy2-v5.10.0-linux-armhf-raspbian.tar.bz2 +7e4120f0a83529a6851cbae0ec107dc7085ba8a4aeff4e7bd9da9aadb1ef37a4 pypy2-v5.10.0-osx64.tar.bz2 +dab4dccfa71820c4f803f5a82e13f76517bfde5fafe1e5fba6ff58ef2ba318ab pypy2-v5.10.0-s390x.tar.bz2 +1209f2db718e6afda17528baa5138177a14a0938588a7d3e1b7c722c483079a8 pypy2-v5.10.0-src.tar.bz2 +89304eb886f84b5c65f3f4079445ef018cdb9a6e59ef4ed2095d37248a3fefcc pypy2-v5.10.0-src.zip +350914f9b70404781674f2f188f84d440d9d25da46ed9733b3f98269a510e033 pypy2-v5.10.0-win32.zip +9afa1a36a5fc55ebc3e80576f05f44294f2b0de279862286fe00f5ee139965b1 pypy2-v5.10.0-ppc64.tar.bz2 +2c32ccfa80e3e2ec56b4cc848526046d7b0de1f2f1a92b0cedeb414ec76745ab pypy2-v5.10.0-ppc64le.tar.bz2

    pypy 3.5-v5.9.0 sha256:

     d8c41ede3758127718944cc2fd6bf78ed4303d946f85596cac91281ccce36165  pypy3-v5.9.0-linux64.tar.bz2
     a014f47f50a1480f871a0b82705f904b38c93c4ca069850eb37653fedafb1b97  pypy3-v5.9.0-src.tar.bz2
     c5d7fa206cdf425de3950ef8ff578deb0745a723b69b64bb121210a5b8df8c65  pypy3-v5.9.0-src.zip
     
    +

    pypy 3.5-v5.10.0 sha256

    +
    +529bc3b11edbdcdd676d90c805b8f607f6eedd5f0ec457a31bbe09c03f5bebfe pypy3-v5.10.0-linux32.tar.bz2 +aa4fb52fb858d973dd838dcf8d74f30705e5afdf1150acb8e056eb99353dfe77 pypy3-v5.10.0-linux64.tar.bz2 +c2cc529befb3e1f2ef8bd4e96af4a823c52ef2d180b0b3bd87511c5b47d59210 pypy3-v5.10.0-linux-armel.tar.bz2 +4e902e0e79f62f2a9049c1c71310ff4fc801011bec4d25082edb5c537d3f15c9 pypy3-v5.10.0-linux-armhf-raspbian.tar.bz2 +7e389a103f560de1eead1271ec3a2df9424c6ccffe7cbae8e95e6e81ae811a16 pypy3-v5.10.0-osx64.tar.bz2 +f5ced20934fff78e55c72aa82a4703954349a5a8099b94e77d74b96a94326a2c pypy3-v5.10.0-osx64-2.tar.bz2 +e0ffec9d033002eb61af488b1f66c319380da8408abd14a3bc202ded4705dc9a pypy3-v5.10.0-s390x.tar.bz2 +a6e4cffde71e3f08b6e1befa5c0352a9bcc5f4e9f5cbf395001e0763a1a0d9e3 pypy3-v5.10.0-src.tar.bz2 +96cf354fb410599cd5acd21732855e25e742e13eac7dc079c0c02b0625908cb9 pypy3-v5.10.0-src.zip +2d93bf2bd7b1d031b96331d3fde6cacdda95673ce6875d6d1669c4c0ea2a52bc pypy3-v5.10.0-win32.zip
    +
    +
    +

    Docutils System Messages

    +
    +

    System Message: ERROR/3 ([dynamic-text]); backlinks: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26

    +Anonymous hyperlink mismatch: 26 references but 35 targets. +See “backrefs” attribute for IDs.

    Python2.7 compatible PyPy 5.10.0

    -
    -

    System Message: WARNING/2 ([dynamic-text], line 78)

    -

    Title underline too short.

    -
    -Python2.7 compatible PyPy 5.10.0
    --------------------------------
    -
    -

    Python 3.5.3 compatible PyPy3.5 v5.10

    -
    -

    System Message: WARNING/2 ([dynamic-text], line 116)

    -

    Title underline too short.

    -
    -Python 3.5.3 compatible PyPy3.5 v5.10
    -------------------------------------
    -
    -

    Warning: PyPy3.5 is known to be rarely much slower than PyPy 2. You are welcome to use it anyway

    If your CPU is really, really old, it may be a x86-32 without SSE2. @@ -177,7 +167,7 @@

    This is a special version of PyPy! See the Software Transactional Memory (STM) documentation.

    @@ -188,8 +178,8 @@ release is too old for what you want to do. There are versions for different libc on this site too.
  • Reverse debugger: This version enables debugging your Python -programs by going forward and backward in time. See the `RevDB -documentation`__.
  • +programs by going forward and backward in time. See the RevDB +documentation.
    • Sandboxing: A special safe version. Read the docs about sandboxing. @@ -198,7 +188,7 @@ version, or otherwise play around on your own. We provide this documentation only for historical reasons. Please do not use in production. For reference, there are some very old, unmaintained -binaries for Linux (32bit__, 64bit__).
    • +binaries for Linux (32bit, 64bit).
    @@ -239,7 +229,7 @@

    2. NumPyPy

    -

    The “numpy” module can also be installed from `our own repository`__ rather +

    The “numpy” module can also be installed from our own repository rather than from the official source. This version uses our built-in _numpypy multiarray replacement module, written in RPython. This module is not complete, but if it works it should give correct answers. @@ -263,7 +253,7 @@

    Note again that this version is incomplete: many things do not work and those that do may not be any faster than NumPy on CPython. -For further instructions see `the pypy/numpy repository`__ and the +For further instructions see the pypy/numpy repository and the FAQ question about the difference between the two.

    @@ -281,10 +271,10 @@

    Alternatively, the following smaller package contains the source at the same revision as the above binaries:

    -
  • Make sure you installed the dependencies. See the list here__.

    +
  • Make sure you installed the dependencies. See the list here.

  • Enter the goal directory:

    @@ -440,13 +430,6 @@
     96cf354fb410599cd5acd21732855e25e742e13eac7dc079c0c02b0625908cb9  pypy3-v5.10.0-src.zip
     2d93bf2bd7b1d031b96331d3fde6cacdda95673ce6875d6d1669c4c0ea2a52bc  pypy3-v5.10.0-win32.zip
     
    -
    -

    Docutils System Messages

    -
    -

    System Message: ERROR/3 ([dynamic-text]); backlinks: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26

    -Anonymous hyperlink mismatch: 26 references but 35 targets. -See “backrefs” attribute for IDs.
    -
    diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -80,15 +80,14 @@ .. _release: Python2.7 compatible PyPy 5.10.0 -------------------------------- +-------------------------------- * `Linux x86 binary (32bit, tar.bz2 built on Ubuntu 12.04 - 16.04)`__ (see ``[1]`` below) * `Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 16.04)`__ (see ``[1]`` below) * `ARM Hardfloat Linux binary (ARMHF/gnueabihf, tar.bz2, Raspbian)`__ (see ``[1]`` below) * `ARM Hardfloat Linux binary (ARMHF/gnueabihf, tar.bz2, Ubuntu Raring)`__ (see ``[1]`` below) * `ARM Softfloat Linux binary (ARMEL/gnueabi, tar.bz2, Ubuntu Precise)`__ (see ``[1]`` below) -* `Mac OS X binary (64bit)`__ (High Sierra) -* `Mac OS X binary (64bit) (2)`__ (Sierra and below) +* `Mac OS X binary (64bit)`__ * FreeBSD x86 and x86_64: see FreshPorts_ * `Windows binary (32bit)`__ (you might need the VS 2008 runtime library installer `vcredist_x86.exe`_.) @@ -105,7 +104,6 @@ .. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armhf-raring.tar.bz2 .. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armel.tar.bz2 .. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-osx64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-osx64-2.tar.bz2 .. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-win32.zip .. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-ppc64.tar.bz2 .. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-ppc64le.tar.bz2 @@ -118,31 +116,36 @@ .. _FreshPorts: http://www.freshports.org/lang/pypy Python 3.5.3 compatible PyPy3.5 v5.10 ------------------------------------- +------------------------------------- .. class:: download_menu Warning: PyPy3.5 is known to be rarely much slower than PyPy 2. You are welcome to use it anyway +* `Linux x86 binary (32bit, tar.bz2 built on Ubuntu 12.04 - 16.04)`__ (see ``[1]`` below) * `Linux x86-64 binary (64bit, tar.bz2 built on Ubuntu 12.04 - 16.04)`__ (see ``[1]`` below) -* `Source (tar.bz2)`__ -* `Source (zip)`__ +* `ARM Hardfloat Linux binary (ARMHF/gnueabihf, tar.bz2, Raspbian)`__ (see ``[1]`` below) +* `ARM Softfloat Linux binary (ARMEL/gnueabi, tar.bz2, Ubuntu Precise)`__ (see ``[1]`` below) +* `Mac OS X binary (64bit)`__ (High Sierra) +* `Mac OS X binary (64bit) (2)`__ (Sierra and below) +* `Windows binary (32bit)`__ (you might need the VS 2008 runtime library + installer `vcredist_x86.exe`_.) +* `s390x Linux binary (tar.bz2 built on Redhat Linux 7.2)`__ (see ``[1]`` below) +* `Source (tar.bz2)`__; `Source (zip)`__. See below for more about the sources. * `All our downloads,`__ including previous versions. We also have a mirror_, but please use only if you have troubles accessing the links above -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux32.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armhf-raspbian.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armhf-raring.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-linux-armel.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-osx64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-win32.zip -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-ppc64.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-ppc64le.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-s390x.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-src.tar.bz2 -.. __: https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.10.0-src.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-linux32.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-linux64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-linux-armhf-raspbian.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-linux-armel.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-osx64.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-osx64-2.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-win32.zip +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-s390x.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-src.tar.bz2 +.. __: https://bitbucket.org/pypy/pypy/downloads/pypy3-v5.10.0-src.zip .. __: https://bitbucket.org/pypy/pypy/downloads If your CPU is really, really old, it may be a x86-32 without SSE2. From pypy.commits at gmail.com Mon Dec 25 14:59:53 2017 From: pypy.commits at gmail.com (pjenvey) Date: Mon, 25 Dec 2017 11:59:53 -0800 (PST) Subject: [pypy-commit] pypy.org extradoc: fix formatting of sha lists Message-ID: <5a4158b9.08691c0a.a8b82.6b91@mx.google.com> Author: Philip Jenvey Branch: extradoc Changeset: r909:42a37d570b66 Date: 2017-12-25 12:00 -0800 http://bitbucket.org/pypy/pypy.org/changeset/42a37d570b66/ Log: fix formatting of sha lists diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -387,7 +387,7 @@

    Checksums

    Here are the checksums for each of the downloads

    pypy2.7-5.9.0 sha256:

    -
    +
     a2431a9e4ef879da1a2b56b111013b4a6efb87d4173a37bf650de47834ac5fe4  pypy2-v5.9.0-linux32.tar.bz2
     790febd4f09e22d6e2f81154efc7dc4b2feec72712aaf4f82aa91b550abb4b48  pypy2-v5.9.0-linux64.tar.bz2
     ac0676d91dfb388c799ec5c2845f42018a666423376f52f3ae13d61fd2e6f87d  pypy2-v5.9.0-linux-armel.tar.bz2
    @@ -397,9 +397,10 @@
     36d6b5158801c8aa4ef5b9d8990ca0a3782a38a04916be5644a33c2a82465101  pypy2-v5.9.0-s390x.tar.bz2
     de4bf05df47f1349dbac97233d9277bbaf1ef3331663ea2557fd5da3dbcfd0a7  pypy2-v5.9.0-src.tar.bz2
     db42dbed029eeac2da1dfe9bc71d63c934106acbed6bfad8910d2dabb557d9c2  pypy2-v5.9.0-src.zip
    -b61081e24e05b83d8110da1262be19f0094532c6cacc293e318a1c186d926533  pypy2-v5.9.0-win32.zip
    -

    pypy2.7-5.10.0 sha256

    -
    +b61081e24e05b83d8110da1262be19f0094532c6cacc293e318a1c186d926533 pypy2-v5.9.0-win32.zip +
    +

    pypy2.7-5.10.0 sha256:

    +
     ee1980467ac8cc9fa9d609f7da93c5282503e59a548781248fe1914a7199d540  pypy2-v5.10.0-linux32.tar.bz2
     da85af9240220179493ad66c857934dc7ea91aef8f168cd293a2d99af8346ee2  pypy2-v5.10.0-linux64.tar.bz2
     6fdd55dd8f674efd06f76edb60a09a03b9b04a5fbc56741f416a94a0b9d2ff91  pypy2-v5.10.0-linux-armel.tar.bz2
    @@ -410,15 +411,16 @@
     89304eb886f84b5c65f3f4079445ef018cdb9a6e59ef4ed2095d37248a3fefcc  pypy2-v5.10.0-src.zip
     350914f9b70404781674f2f188f84d440d9d25da46ed9733b3f98269a510e033  pypy2-v5.10.0-win32.zip
     9afa1a36a5fc55ebc3e80576f05f44294f2b0de279862286fe00f5ee139965b1  pypy2-v5.10.0-ppc64.tar.bz2
    -2c32ccfa80e3e2ec56b4cc848526046d7b0de1f2f1a92b0cedeb414ec76745ab  pypy2-v5.10.0-ppc64le.tar.bz2
    +2c32ccfa80e3e2ec56b4cc848526046d7b0de1f2f1a92b0cedeb414ec76745ab  pypy2-v5.10.0-ppc64le.tar.bz2
    +

    pypy 3.5-v5.9.0 sha256:

     d8c41ede3758127718944cc2fd6bf78ed4303d946f85596cac91281ccce36165  pypy3-v5.9.0-linux64.tar.bz2
     a014f47f50a1480f871a0b82705f904b38c93c4ca069850eb37653fedafb1b97  pypy3-v5.9.0-src.tar.bz2
     c5d7fa206cdf425de3950ef8ff578deb0745a723b69b64bb121210a5b8df8c65  pypy3-v5.9.0-src.zip
     
    -

    pypy 3.5-v5.10.0 sha256

    -
    +

    pypy 3.5-v5.10.0 sha256:

    +
     529bc3b11edbdcdd676d90c805b8f607f6eedd5f0ec457a31bbe09c03f5bebfe  pypy3-v5.10.0-linux32.tar.bz2
     aa4fb52fb858d973dd838dcf8d74f30705e5afdf1150acb8e056eb99353dfe77  pypy3-v5.10.0-linux64.tar.bz2
     c2cc529befb3e1f2ef8bd4e96af4a823c52ef2d180b0b3bd87511c5b47d59210  pypy3-v5.10.0-linux-armel.tar.bz2
    @@ -428,7 +430,8 @@
     e0ffec9d033002eb61af488b1f66c319380da8408abd14a3bc202ded4705dc9a  pypy3-v5.10.0-s390x.tar.bz2
     a6e4cffde71e3f08b6e1befa5c0352a9bcc5f4e9f5cbf395001e0763a1a0d9e3  pypy3-v5.10.0-src.tar.bz2
     96cf354fb410599cd5acd21732855e25e742e13eac7dc079c0c02b0625908cb9  pypy3-v5.10.0-src.zip
    -2d93bf2bd7b1d031b96331d3fde6cacdda95673ce6875d6d1669c4c0ea2a52bc  pypy3-v5.10.0-win32.zip
    +2d93bf2bd7b1d031b96331d3fde6cacdda95673ce6875d6d1669c4c0ea2a52bc pypy3-v5.10.0-win32.zip +
    @@ -71,12 +68,11 @@ language, passing Python test suite (with minor modifications that were already accepted in the main python in newer versions). It supports most of the commonly used Python standard library modules; details below.

    -

    (PyPy3 implements the Python language version 3.5.3. It is beta right now, -and it is quite possible that a few things are missing. The rest of this -document only describes the situation of the 2.7.x implementation.)

    +

    PyPy3 implements the Python language version 3.5.3. It has been released, +but Python is a large language and it is quite possible that a few things are missing.

    List of installable top 1000 PyPI packages

    -

    PyPy has alpha/beta-level support for the CPython C API, however, -this feature is not yet complete. We strongly advise use of CFFI +

    PyPy has support for the CPython C API, however there are constructs +that are not compatible. We strongly advise use of CFFI instead. CFFI come builtin with PyPy. Many libraries will require a bit of effort to work, but there are known success stories. Check out PyPy blog for updates, as well as the Compatibility Wiki.

    @@ -106,16 +102,17 @@
  • pyglet
  • Pillow (the PIL fork)
  • lxml
  • +
  • NumPy
  • The main difference that is not going to be fixed is that PyPy does not support refcounting semantics. The following code won't fill the file immediately, but only after a certain period of time, when the GC does a collection:

    -
    open("filename", "w").write("stuff")
    +
    open("filename", "w").write("stuff")

    The proper fix is

    -
    f = open("filename", "w")
    f.write("stuff")
    f.close()
    +
    f = open("filename", "w")
    f.write("stuff")
    f.close()

    or using the with keyword

    -
    with open("filename", "w") as f:
    f.write("stuff")
    +
    with open("filename", "w") as f:
    f.write("stuff")

    The same problem–not closing your files–can also show up if your program opens a large number of files without closing them explicitly. In that case, you can easily hit the system limit on the number of file @@ -129,7 +126,7 @@

    Similarly, remember that you must close() a non-exhausted generator in order to have its pending finally or with clauses executed immediately:

    -
    def mygen():
    with foo:
    yield 42

    for x in mygen():
    if x == 42:
    break # foo.__exit__ is not run immediately!

    # fixed version:
    gen = mygen()
    try:
    for x in gen:
    if x == 42:
    break
    finally:
    gen.close()
    +
    def mygen():
    with foo:
    yield 42

    for x in mygen():
    if x == 42:
    break # foo.__exit__ is not run immediately!

    # fixed version:
    gen = mygen()
    try:
    for x in gen:
    if x == 42:
    break
    finally:
    gen.close()

    More generally, __del__() methods are not executed as predictively as on CPython: they run “some time later” in PyPy (or not at all if the program finishes running in the meantime). See more details diff --git a/contact.html b/contact.html --- a/contact.html +++ b/contact.html @@ -57,9 +57,6 @@ | Contact
    - Py3k donations - | - STM donations


    diff --git a/don2.html b/don2.html --- a/don2.html +++ b/don2.html @@ -1,7 +1,7 @@
    • - Donate towards STM in pypy
      - Donate towards py3k in pypy
      + + Donate towards general pypy progress
    • diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/features.html b/features.html --- a/features.html +++ b/features.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/howtohelp.html b/howtohelp.html --- a/howtohelp.html +++ b/howtohelp.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/index.html b/index.html --- a/index.html +++ b/index.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/js/script2.js b/js/script2.js --- a/js/script2.js +++ b/js/script2.js @@ -21,9 +21,9 @@ /* if (location.href.indexOf("numpydonate.html") >= 0) f = numpy_donate; -*/ if (location.href.indexOf("py3donate.html") >= 0) f = py3k_donate; else +*/ f = general_donate; $(document).ready(f); diff --git a/numpydonate.html b/numpydonate.html --- a/numpydonate.html +++ b/numpydonate.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/people.html b/people.html --- a/people.html +++ b/people.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/performance.html b/performance.html --- a/performance.html +++ b/performance.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/py3donate.html b/py3donate.html --- a/py3donate.html +++ b/py3donate.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/source/_layouts/site.genshi b/source/_layouts/site.genshi --- a/source/_layouts/site.genshi +++ b/source/_layouts/site.genshi @@ -13,8 +13,6 @@ ('Blog', 'http://morepypy.blogspot.com'), ('People', 'people.html'), ('Contact', 'contact.html'), - ('Py3k donations', 'py3donate.html'), - ('STM donations', 'tmdonate2.html'), ], } diff --git a/source/compat.txt b/source/compat.txt --- a/source/compat.txt +++ b/source/compat.txt @@ -8,16 +8,15 @@ already accepted in the main python in newer versions). It supports most of the commonly used Python `standard library modules`_; details below. -(PyPy3 implements the Python language version 3.5.3. It is beta right now, -and it is quite possible that a few things are missing. The rest of this -document only describes the situation of the 2.7.x implementation.) +PyPy3 implements the Python language version 3.5.3. It has been released, +but Python is a large language and it is quite possible that a few things are missing. .. class:: download_menu `List of installable top 1000 PyPI packages`_ -PyPy has **alpha/beta-level** support for the `CPython C API`_, however, -this feature is not yet complete. We strongly advise use of `CFFI`_ +PyPy has support for the `CPython C API`_, however there are constructs +that are `not compatible`. We strongly advise use of `CFFI`_ instead. CFFI come builtin with PyPy. Many libraries will require a bit of effort to work, but there are known success stories. Check out PyPy blog for updates, as well as the `Compatibility Wiki`__. @@ -62,6 +61,8 @@ * `lxml`_ +* NumPy + The main difference that is not going to be fixed is that PyPy does not support refcounting semantics. The following code won't fill the file immediately, but only after a certain period of time, when the GC @@ -140,6 +141,7 @@ .. _`CPython C API`: http://docs.python.org/c-api/ .. _`CFFI`: http://cffi.readthedocs.org/ +.. _`not compatible`: http://doc.pypy.org/en/latest/cpython_differences.html#c-api-differences .. _`standard library modules`: http://docs.python.org/library/ .. _`our dev site`: http://pypy.readthedocs.org/en/latest/cpython_differences.html .. _`more details here`: http://pypy.readthedocs.org/en/latest/cpython_differences.html#differences-related-to-garbage-collection-strategies diff --git a/sponsor.html b/sponsor.html --- a/sponsor.html +++ b/sponsor.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/success.html b/success.html --- a/success.html +++ b/success.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/tmdonate.html b/tmdonate.html --- a/tmdonate.html +++ b/tmdonate.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      diff --git a/tmdonate2.html b/tmdonate2.html --- a/tmdonate2.html +++ b/tmdonate2.html @@ -57,9 +57,6 @@ | Contact
      - Py3k donations - | - STM donations
      From pypy.commits at gmail.com Thu Dec 28 05:40:29 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 28 Dec 2017 02:40:29 -0800 (PST) Subject: [pypy-commit] pypy default: explain better what this branch does Message-ID: <5a44ca1d.8681df0a.64a80.a53d@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93586:b4f48ae0cb1b Date: 2017-12-28 11:39 +0100 http://bitbucket.org/pypy/pypy/changeset/b4f48ae0cb1b/ Log: explain better what this branch does diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -7,4 +7,6 @@ .. branch: cpyext-avoid-roundtrip -Reduce conversions of c struct to rpython equivalent where possible +Big refactoring of some cpyext code, which avoids a lot of nonsense when +calling C from Python and vice-versa: the result is a big speedup in +function/method calls, up to 6 times faster. From pypy.commits at gmail.com Thu Dec 28 09:58:42 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 28 Dec 2017 06:58:42 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix issue #2717 Message-ID: <5a4506a2.ceb51c0a.3cc20.a264@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93587:f145d8504387 Date: 2017-12-28 15:57 +0100 http://bitbucket.org/pypy/pypy/changeset/f145d8504387/ Log: Fix issue #2717 diff --git a/pypy/interpreter/test/test_timeutils.py b/pypy/interpreter/test/test_timeutils.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/test/test_timeutils.py @@ -0,0 +1,13 @@ +import pytest +from rpython.rlib.rarithmetic import r_longlong +from pypy.interpreter.error import OperationError +from pypy.interpreter.timeutils import timestamp_w + +def test_timestamp_w(space): + w_1_year = space.newint(365 * 24 * 3600) + result = timestamp_w(space, w_1_year) + assert isinstance(result, r_longlong) + assert result // 10 ** 9 == space.int_w(w_1_year) + w_millenium = space.mul(w_1_year, space.newint(1000)) + with pytest.raises(OperationError): # timestamps overflow after ~300 years + timestamp_w(space, w_millenium) diff --git a/pypy/interpreter/timeutils.py b/pypy/interpreter/timeutils.py --- a/pypy/interpreter/timeutils.py +++ b/pypy/interpreter/timeutils.py @@ -3,7 +3,7 @@ """ import math from rpython.rlib.rarithmetic import ( - r_longlong, ovfcheck, ovfcheck_float_to_longlong) + r_longlong, ovfcheck_float_to_longlong) from rpython.rlib import rfloat from pypy.interpreter.error import oefmt @@ -31,10 +31,10 @@ raise oefmt(space.w_OverflowError, "timestamp %R too large to convert to C _PyTime_t", w_secs) else: - sec = space.int_w(w_secs) try: - result = ovfcheck(sec * SECS_TO_NS) + sec = space.bigint_w(w_secs).tolonglong() + result = sec * r_longlong(SECS_TO_NS) except OverflowError: raise oefmt(space.w_OverflowError, - "timestamp too large to convert to C _PyTime_t") - return r_longlong(result) + "timestamp %R too large to convert to C _PyTime_t", w_secs) + return result From pypy.commits at gmail.com Thu Dec 28 10:23:37 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 28 Dec 2017 07:23:37 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Correctly skip test Message-ID: <5a450c79.0eb7df0a.a5fea.39db@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93588:e5c66b6446c5 Date: 2017-12-28 16:22 +0100 http://bitbucket.org/pypy/pypy/changeset/e5c66b6446c5/ Log: Correctly skip test diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -344,9 +344,8 @@ return module.get_flags()""", ns) assert ns['nested_flags']() == (0, 0) + @pytest.mark.xfail("sys.platform == 'win32'", reason='Hangs the process', run=False) def test_recursive_function(self): - if sys.platform == 'win32': - assert False, 'hangs the process on win32' module = self.import_extension('foo', [ ("call_recursive", "METH_NOARGS", """ From pypy.commits at gmail.com Thu Dec 28 10:57:12 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 28 Dec 2017 07:57:12 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix: the 'flags' argument to setxattr() had no effect Message-ID: <5a451458.b0abdf0a.8b3f.109d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93589:70dfe4f14f67 Date: 2017-12-28 16:56 +0100 http://bitbucket.org/pypy/pypy/changeset/70dfe4f14f67/ Log: Fix: the 'flags' argument to setxattr() had no effect diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2331,12 +2331,12 @@ raise oefmt(space.w_ValueError, "setxattr: cannot use fd and follow_symlinks together") try: - rposix.fsetxattr(path.as_fd, attribute.as_bytes, value) + rposix.fsetxattr(path.as_fd, attribute.as_bytes, value, flags) except OSError as e: raise wrap_oserror(space, e, path.as_bytes) else: try: - rposix.setxattr(path.as_bytes, attribute.as_bytes, value, + rposix.setxattr(path.as_bytes, attribute.as_bytes, value, flags, follow_symlinks=follow_symlinks) except OSError as e: raise wrap_oserror(space, e, path.as_bytes) diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -1455,6 +1455,8 @@ excinfo = raises(OSError, os.getxattr, self.path, 'user.test') assert excinfo.value.filename == self.path os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE, follow_symlinks=False) + raises(OSError, + os.setxattr, self.path, 'user.test', b'', os.XATTR_CREATE) assert os.getxattr(self.path, 'user.test') == b'' os.setxattr(self.path, b'user.test', b'foo', os.XATTR_REPLACE) assert os.getxattr(self.path, 'user.test', follow_symlinks=False) == b'foo' From pypy.commits at gmail.com Thu Dec 28 14:30:49 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 28 Dec 2017 11:30:49 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: fix for win32 Message-ID: <5a454669.478edf0a.61a81.aa39@mx.google.com> Author: Matti Picus Branch: release-pypy3.5-v5.9.x Changeset: r93591:737d3f5af2ce Date: 2017-12-25 22:02 +0200 http://bitbucket.org/pypy/pypy/changeset/737d3f5af2ce/ Log: fix for win32 (grafted from 85e44c9458db62931917a86f8614d131b136aaff) diff --git a/pypy/module/errno/interp_errno.py b/pypy/module/errno/interp_errno.py --- a/pypy/module/errno/interp_errno.py +++ b/pypy/module/errno/interp_errno.py @@ -1,6 +1,7 @@ from rpython.rlib.objectmodel import not_rpython from rpython.rtyper.tool.rffi_platform import DefinedConstantInteger, configure from rpython.translator.tool.cbuild import ExternalCompilationInfo +import sys # from CPython 3.5 errors = [ @@ -40,7 +41,7 @@ "WSAEREMOTE", "WSAEINVAL", "WSAEINPROGRESS", "WSAGETSELECTEVEN", "WSAESOCKTNOSUPPORT", "WSAGETASYNCERRO", "WSAMAKESELECTREPL", "WSAGETASYNCBUFLE", "WSAEDESTADDRREQ", "WSAECONNREFUSED", "WSAENETRESET", - "WSAN",] + "WSAN", "WSAEDQUOT"] more_errors = [ "ENOMEDIUM", "EMEDIUMTYPE", "ECANCELED", "ENOKEY", "EKEYEXPIRED", @@ -55,10 +56,12 @@ "EFTYPE", "ENEEDAUTH", "ENOATTR", "ENOPOLICY", "EPROCLIM", "EPROCUNAVAIL", "EPROGMISMATCH", "EPROGUNAVAIL", "EPWROFF", "ERPCMISMATCH", "ESHLIBVERS"] - +includes = ['errno.h'] +if sys.platform == 'win32': + includes.append('winsock2.h') class CConfig: - _compilation_info_ = ExternalCompilationInfo(includes=['errno.h']) + _compilation_info_ = ExternalCompilationInfo(includes=includes) for err_name in errors + win_errors + more_errors: setattr(CConfig, err_name, DefinedConstantInteger(err_name)) @@ -77,7 +80,7 @@ assert name.startswith('WSA') code = config[name] if code is not None: - if name[3:] in errors: + if name[3:] in errors and name[3:] not in name2code: # errno.EFOO = name2code[name[3:]] = code # errno.WSABAR = From pypy.commits at gmail.com Thu Dec 28 14:30:51 2017 From: pypy.commits at gmail.com (hroncok) Date: Thu, 28 Dec 2017 11:30:51 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: Fix: TypeError: 'str' does not support the buffer interface Message-ID: <5a45466b.06abdf0a.3e050.e0b3@mx.google.com> Author: Miro Hrončok Branch: release-pypy3.5-v5.9.x Changeset: r93592:ac1ac8fceed5 Date: 2017-12-27 22:50 +0000 http://bitbucket.org/pypy/pypy/changeset/ac1ac8fceed5/ Log: Fix: TypeError: 'str' does not support the buffer interface Fixes https://bitbucket.org/pypy/pypy/issues/2718 (grafted from 0551d04959425ea4a8ff7e87a5d357d03936cde0) diff --git a/lib_pypy/pyrepl/unix_console.py b/lib_pypy/pyrepl/unix_console.py --- a/lib_pypy/pyrepl/unix_console.py +++ b/lib_pypy/pyrepl/unix_console.py @@ -500,7 +500,7 @@ os.write(self.output_fd, fmt[:x]) fmt = fmt[y:] delay = int(m.group(1)) - if '*' in m.group(2): + if b'*' in m.group(2): delay *= self.height if self._pad: nchars = (bps*delay)/1000 From pypy.commits at gmail.com Thu Dec 28 14:30:46 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 28 Dec 2017 11:30:46 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: Fix 248a5a9859ef (probably) Message-ID: <5a454666.c3a6df0a.8c75.2400@mx.google.com> Author: Armin Rigo Branch: release-pypy3.5-v5.9.x Changeset: r93590:56f9a3f7b588 Date: 2017-12-25 09:30 +0100 http://bitbucket.org/pypy/pypy/changeset/56f9a3f7b588/ Log: Fix 248a5a9859ef (probably) (grafted from 36cb02a2cd901f153c8da24196fe3b676235c43f) diff --git a/pypy/interpreter/timeutils.py b/pypy/interpreter/timeutils.py --- a/pypy/interpreter/timeutils.py +++ b/pypy/interpreter/timeutils.py @@ -4,6 +4,7 @@ import math from rpython.rlib.rarithmetic import ( r_longlong, ovfcheck, ovfcheck_float_to_longlong) +from rpython.rlib import rfloat from pypy.interpreter.error import oefmt SECS_TO_NS = 10 ** 9 @@ -21,6 +22,8 @@ def timestamp_w(space, w_secs): if space.isinstance_w(w_secs, space.w_float): secs = space.float_w(w_secs) + if rfloat.isnan(secs): + raise oefmt(space.w_ValueError, "timestamp is nan") result_float = math.ceil(secs * SECS_TO_NS) try: return ovfcheck_float_to_longlong(result_float) From pypy.commits at gmail.com Thu Dec 28 14:30:55 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 28 Dec 2017 11:30:55 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: Fix: the 'flags' argument to setxattr() had no effect Message-ID: <5a45466f.e5b2df0a.10835.ebd3@mx.google.com> Author: Ronan Lamy Branch: release-pypy3.5-v5.9.x Changeset: r93594:f05e6bdccc8d Date: 2017-12-28 16:56 +0100 http://bitbucket.org/pypy/pypy/changeset/f05e6bdccc8d/ Log: Fix: the 'flags' argument to setxattr() had no effect (grafted from 70dfe4f14f678cefb5bdc58ed4ac4b354c30cf8b) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2331,12 +2331,12 @@ raise oefmt(space.w_ValueError, "setxattr: cannot use fd and follow_symlinks together") try: - rposix.fsetxattr(path.as_fd, attribute.as_bytes, value) + rposix.fsetxattr(path.as_fd, attribute.as_bytes, value, flags) except OSError as e: raise wrap_oserror(space, e, path.as_bytes) else: try: - rposix.setxattr(path.as_bytes, attribute.as_bytes, value, + rposix.setxattr(path.as_bytes, attribute.as_bytes, value, flags, follow_symlinks=follow_symlinks) except OSError as e: raise wrap_oserror(space, e, path.as_bytes) diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -1455,6 +1455,8 @@ excinfo = raises(OSError, os.getxattr, self.path, 'user.test') assert excinfo.value.filename == self.path os.setxattr(self.path, 'user.test', b'', os.XATTR_CREATE, follow_symlinks=False) + raises(OSError, + os.setxattr, self.path, 'user.test', b'', os.XATTR_CREATE) assert os.getxattr(self.path, 'user.test') == b'' os.setxattr(self.path, b'user.test', b'foo', os.XATTR_REPLACE) assert os.getxattr(self.path, 'user.test', follow_symlinks=False) == b'foo' From pypy.commits at gmail.com Thu Dec 28 14:30:57 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 28 Dec 2017 11:30:57 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: update version numbers Message-ID: <5a454671.06c4df0a.325e8.234a@mx.google.com> Author: Matti Picus Branch: release-pypy3.5-v5.9.x Changeset: r93595:291eb92c6b5d Date: 2017-12-28 20:14 +0200 http://bitbucket.org/pypy/pypy/changeset/291eb92c6b5d/ Log: update version numbers diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -29,8 +29,8 @@ #define PY_VERSION "3.5.3" /* PyPy version as a string */ -#define PYPY_VERSION "5.10.0" -#define PYPY_VERSION_NUM 0x050A0000 +#define PYPY_VERSION "5.10.1" +#define PYPY_VERSION_NUM 0x050A0100 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,7 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (5, 10, 0, "final", 0) #XXX # sync patchlevel.h +PYPY_VERSION = (5, 10, 1, "final", 0) #XXX # sync patchlevel.h import pypy From pypy.commits at gmail.com Thu Dec 28 14:30:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 28 Dec 2017 11:30:53 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-v5.9.x: Fix issue #2717 Message-ID: <5a45466d.ce99df0a.5cf70.f036@mx.google.com> Author: Ronan Lamy Branch: release-pypy3.5-v5.9.x Changeset: r93593:21617b1a2e41 Date: 2017-12-28 15:57 +0100 http://bitbucket.org/pypy/pypy/changeset/21617b1a2e41/ Log: Fix issue #2717 (grafted from f145d85043878194d7eee33b2049063843e032d8) diff --git a/pypy/interpreter/test/test_timeutils.py b/pypy/interpreter/test/test_timeutils.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/test/test_timeutils.py @@ -0,0 +1,13 @@ +import pytest +from rpython.rlib.rarithmetic import r_longlong +from pypy.interpreter.error import OperationError +from pypy.interpreter.timeutils import timestamp_w + +def test_timestamp_w(space): + w_1_year = space.newint(365 * 24 * 3600) + result = timestamp_w(space, w_1_year) + assert isinstance(result, r_longlong) + assert result // 10 ** 9 == space.int_w(w_1_year) + w_millenium = space.mul(w_1_year, space.newint(1000)) + with pytest.raises(OperationError): # timestamps overflow after ~300 years + timestamp_w(space, w_millenium) diff --git a/pypy/interpreter/timeutils.py b/pypy/interpreter/timeutils.py --- a/pypy/interpreter/timeutils.py +++ b/pypy/interpreter/timeutils.py @@ -3,7 +3,7 @@ """ import math from rpython.rlib.rarithmetic import ( - r_longlong, ovfcheck, ovfcheck_float_to_longlong) + r_longlong, ovfcheck_float_to_longlong) from rpython.rlib import rfloat from pypy.interpreter.error import oefmt @@ -31,10 +31,10 @@ raise oefmt(space.w_OverflowError, "timestamp %R too large to convert to C _PyTime_t", w_secs) else: - sec = space.int_w(w_secs) try: - result = ovfcheck(sec * SECS_TO_NS) + sec = space.bigint_w(w_secs).tolonglong() + result = sec * r_longlong(SECS_TO_NS) except OverflowError: raise oefmt(space.w_OverflowError, - "timestamp too large to convert to C _PyTime_t") - return r_longlong(result) + "timestamp %R too large to convert to C _PyTime_t", w_secs) + return result From pypy.commits at gmail.com Thu Dec 28 15:22:28 2017 From: pypy.commits at gmail.com (stefanor) Date: Thu, 28 Dec 2017 12:22:28 -0800 (PST) Subject: [pypy-commit] pypy default: rvmprof: Handle ProcessorAutodetectError Message-ID: <5a455284.f8b8df0a.228c9.d2bd@mx.google.com> Author: Stefano Rivera Branch: Changeset: r93596:9472e9d10a85 Date: 2017-12-28 21:21 +0100 http://bitbucket.org/pypy/pypy/changeset/9472e9d10a85/ Log: rvmprof: Handle ProcessorAutodetectError detect_cpu only knows about architectures supported by PyPy's JIT diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -16,9 +16,12 @@ pass # vmprof works only on x86 for now -IS_SUPPORTED = detect_cpu.autodetect().startswith('x86') -if sys.platform == 'win32' or sys.platform.startswith("openbsd"): - IS_SUPPORTED = False +IS_SUPPORTED = False +if sys.platform in ('darwin', 'linux', 'linux2'): + try: + IS_SUPPORTED = detect_cpu.autodetect().startswith('x86') + except detect_cpu.ProcessorAutodetectError: + pass ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') SRC = ROOT.join('src') From pypy.commits at gmail.com Fri Dec 29 02:31:12 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 28 Dec 2017 23:31:12 -0800 (PST) Subject: [pypy-commit] cffi doc-set_source: try to clarify set_source() requires str() Message-ID: <5a45ef40.478f1c0a.bff99.423c@mx.google.com> Author: Matti Picus Branch: doc-set_source Changeset: r3053:889d38233ce8 Date: 2017-12-29 09:30 +0200 http://bitbucket.org/cffi/cffi/changeset/889d38233ce8/ Log: try to clarify set_source() requires str() diff --git a/doc/source/overview.rst b/doc/source/overview.rst --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -76,20 +76,25 @@ ffibuilder = FFI() ffibuilder.set_source("_example", - r""" // passed to the real C compiler + r""" // passed to the real C compiler, + // contains implementation of things declared in cdef() #include #include + struct passwd *mygetpwuid(int uid) { + return getpwuid(uid); + } """, libraries=[]) # or a list of libraries to link with # (more arguments like setup.py's Extension class: # include_dirs=[..], extra_objects=[..], and so on) - ffibuilder.cdef(""" // some declarations from the man page + ffibuilder.cdef(""" + // declarations that are shared between python and C struct passwd { char *pw_name; ...; // literally dot-dot-dot }; - struct passwd *getpwuid(int uid); + struct passwd *mygetpwuid(int uid); """) if __name__ == "__main__": @@ -111,14 +116,14 @@ from _example import ffi, lib - p = lib.getpwuid(0) + p = lib.mygetpwuid(0) assert ffi.string(p.pw_name) == b'root' Note that this works independently of the exact C layout of ``struct passwd`` (it is "API level", as opposed to "ABI level"). It requires a C compiler in order to run ``example_build.py``, but it is much more portable than trying to get the details of the fields of ``struct -passwd`` exactly right. Similarly, we declared ``getpwuid()`` as +passwd`` exactly right. Similarly, we declared ``mygetpwuid()`` as taking an ``int`` argument. On some platforms this might be slightly incorrect---but it does not matter. It is also faster than the ABI mode. From pypy.commits at gmail.com Fri Dec 29 03:01:58 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 29 Dec 2017 00:01:58 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: start thinking about leysin sprint Message-ID: <5a45f676.af96df0a.1da9b.9eb5@mx.google.com> Author: Matti Picus Branch: extradoc Changeset: r5856:ecb8d7f22159 Date: 2017-12-29 10:01 +0200 http://bitbucket.org/pypy/extradoc/changeset/ecb8d7f22159/ Log: start thinking about leysin sprint diff --git a/planning/sprint-leysin-2018-notes.rst b/planning/sprint-leysin-2018-notes.rst new file mode 100644 --- /dev/null +++ b/planning/sprint-leysin-2018-notes.rst @@ -0,0 +1,8 @@ +Tasks +===== + +- cffi tutorial/overview rewrite +- py3 test runners are too complicated +- make win32 builds green +- make packaging more like cpython/portable builds +- get CI builders for PyPy into mainstream projects (Numpy, Scipy, lxml, uwsgi) From pypy.commits at gmail.com Fri Dec 29 03:17:29 2017 From: pypy.commits at gmail.com (arigo) Date: Fri, 29 Dec 2017 00:17:29 -0800 (PST) Subject: [pypy-commit] pypy default: Issue #2719: Keyword arguments to built-in functions Message-ID: <5a45fa19.ce99df0a.5cf70.187d@mx.google.com> Author: Armin Rigo Branch: Changeset: r93597:0eb832cbb13a Date: 2017-12-29 09:17 +0100 http://bitbucket.org/pypy/pypy/changeset/0eb832cbb13a/ Log: Issue #2719: Keyword arguments to built-in functions diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -545,6 +545,15 @@ ``del foo.bar`` where ``foo`` is a module (or class) that contains the function ``bar``, is significantly slower than CPython. +* Various built-in functions in CPython accept only positional arguments + and not keyword arguments. That can be considered a long-running + historical detail: newer functions tend to accept keyword arguments + and older function are occasionally fixed to do so as well. In PyPy, + most built-in functions accept keyword arguments (``help()`` shows the + argument names). But don't rely on it too much because future + versions of PyPy may have to rename the arguments if CPython starts + accepting them too. + .. _`is ignored in PyPy`: http://bugs.python.org/issue14621 .. _`little point`: http://events.ccc.de/congress/2012/Fahrplan/events/5152.en.html .. _`#2072`: https://bitbucket.org/pypy/pypy/issue/2072/ From pypy.commits at gmail.com Fri Dec 29 07:33:25 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 29 Dec 2017 04:33:25 -0800 (PST) Subject: [pypy-commit] pypy py3.5: add more info for buildbot virtualenv run with -A Message-ID: <5a463615.06811c0a.5dde2.28bb@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93598:213db4a13e7c Date: 2017-12-29 13:19 +0200 http://bitbucket.org/pypy/pypy/changeset/213db4a13e7c/ Log: add more info for buildbot virtualenv run with -A diff --git a/testrunner/get_info.py b/testrunner/get_info.py --- a/testrunner/get_info.py +++ b/testrunner/get_info.py @@ -8,14 +8,20 @@ import json BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) -TARGET_BASENAME = 'pypy3-c' +if sys.platform.startswith('win'): + TARGET_NAME = r'Scripts\\pypy3-c.exe' + TARGET_DIR = 'Scripts' +else: + TARGET_NAME = 'pypy3-c' + TARGET_DIR = 'bin' +VENV_DIR = 'pypy-venv' def make_info_dict(): - target = TARGET_BASENAME - if sys.platform.startswith('win'): - target += '.exe' - target_path = os.path.join(BASE_DIR, 'pypy', 'goal', target) - return {'target_path': target_path} + target_path = os.path.join(BASE_DIR, 'pypy', 'goal', TARGET_NAME) + return {'target_path': target_path, + 'virt_pypy': os.path.join(VENV_DIR, TARGET_DIR, TARGET_NAME), + 'venv_dir': VENV_DIR, + } def dump_info(): return json.dumps(make_info_dict()) From pypy.commits at gmail.com Fri Dec 29 09:16:39 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 29 Dec 2017 06:16:39 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix target_name Message-ID: <5a464e47.b0abdf0a.8b3f.9fd3@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93599:3e7cb4758b05 Date: 2017-12-29 16:15 +0200 http://bitbucket.org/pypy/pypy/changeset/3e7cb4758b05/ Log: fix target_name diff --git a/testrunner/get_info.py b/testrunner/get_info.py --- a/testrunner/get_info.py +++ b/testrunner/get_info.py @@ -9,7 +9,7 @@ BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) if sys.platform.startswith('win'): - TARGET_NAME = r'Scripts\\pypy3-c.exe' + TARGET_NAME = r'pypy3-c.exe' TARGET_DIR = 'Scripts' else: TARGET_NAME = 'pypy3-c' From pypy.commits at gmail.com Fri Dec 29 09:23:39 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 29 Dec 2017 06:23:39 -0800 (PST) Subject: [pypy-commit] buildbot default: use more Properties from testrunner/get_info, leave old defaults in place Message-ID: <5a464feb.1dbf1c0a.230b9.8abb@mx.google.com> Author: Matti Picus Branch: Changeset: r1053:424f8ecee620 Date: 2017-12-29 14:40 +0200 http://bitbucket.org/pypy/buildbot/changeset/424f8ecee620/ Log: use more Properties from testrunner/get_info, leave old defaults in place diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -468,14 +468,17 @@ env={"TMPDIR": Interpolate('%(prop:target_tmpdir)s' + pytest), })) if platform == 'win32': - virt_pypy = r'..\venv\pypy-venv\Scripts\python.exe' + virt_pypy = r'pypy-venv\Scripts\python.exe' clean = 'rmdir /s /q pypy-venv' virt_package = 'git+git://github.com/pypa/virtualenv at master' else: - virt_pypy = '../venv/pypy-venv/bin/python' + virt_pypy = 'pypy-venv/bin/python' clean = 'rm -rf pypy-venv' virt_package = 'virtualenv' + # set from testrunner/get_info.py target = Property('target_path') + venv_dir = Property('venv_dir', default = 'pypy-venv') + virt_pypy = Property('virt_pypy', default=virt_pypy) factory.addStep(ShellCmd( description="ensurepip", command=prefix + [target, '-mensurepip'], @@ -493,20 +496,20 @@ flunkOnFailure=True)) factory.addStep(ShellCmd( description="Create virtualenv", - command=prefix + [target, '-mvirtualenv', '--clear', 'pypy-venv'], + command=prefix + [target, '-mvirtualenv', '--clear', venv_dir], workdir='venv', flunkOnFailure=True)) factory.addStep(ShellCmd( description="Install extra tests requirements", command=prefix + [virt_pypy, '-m', 'pip', 'install', '-r', '../build/extra_tests/requirements.txt'], - workdir='testing')) + workdir='venv')) factory.addStep(PytestCmd( description="Run extra tests", command=prefix + [virt_pypy, '-m', 'pytest', '../build/extra_tests', '--resultlog=extra.log'], logfiles={'pytestLog': 'extra.log'}, - workdir='testing')) + workdir='venv')) if lib_python: factory.addStep(PytestCmd( From pypy.commits at gmail.com Fri Dec 29 14:10:13 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 29 Dec 2017 11:10:13 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix err.filename when *xattr() functions raise an OSError Message-ID: <5a469315.0eb7df0a.a5fea.db79@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93600:49d834e68cbe Date: 2017-12-29 20:06 +0100 http://bitbucket.org/pypy/pypy/changeset/49d834e68cbe/ Log: Fix err.filename when *xattr() functions raise an OSError diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2304,13 +2304,13 @@ try: result = rposix.fgetxattr(path.as_fd, attribute.as_bytes) except OSError as e: - raise wrap_oserror(space, e, path.as_bytes) + raise wrap_oserror2(space, e, path.w_path) else: try: result = rposix.getxattr(path.as_bytes, attribute.as_bytes, follow_symlinks=follow_symlinks) except OSError as e: - raise wrap_oserror(space, e, path.as_bytes) + raise wrap_oserror2(space, e, path.w_path) return space.newbytes(result) @unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), @@ -2333,13 +2333,13 @@ try: rposix.fsetxattr(path.as_fd, attribute.as_bytes, value, flags) except OSError as e: - raise wrap_oserror(space, e, path.as_bytes) + raise wrap_oserror2(space, e, path.w_path) else: try: rposix.setxattr(path.as_bytes, attribute.as_bytes, value, flags, follow_symlinks=follow_symlinks) except OSError as e: - raise wrap_oserror(space, e, path.as_bytes) + raise wrap_oserror2(space, e, path.w_path) @unwrap_spec(path=path_or_fd(), attribute=path_or_fd(allow_fd=False), @@ -2359,13 +2359,13 @@ try: rposix.fremovexattr(path.as_fd, attribute.as_bytes) except OSError as e: - raise wrap_oserror(space, e, path.as_bytes) + raise wrap_oserror2(space, e, path.w_path) else: try: rposix.removexattr(path.as_bytes, attribute.as_bytes, follow_symlinks=follow_symlinks) except OSError as e: - raise wrap_oserror(space, e, path.as_bytes) + raise wrap_oserror2(space, e, path.w_path) @unwrap_spec(path=path_or_fd(), follow_symlinks=bool) @@ -2386,12 +2386,12 @@ try: result = rposix.flistxattr(path.as_fd) except OSError as e: - raise wrap_oserror(space, e, eintr_retry=False) + raise wrap_oserror2(space, e, path.w_path) else: try: result = rposix.listxattr(path.as_bytes, follow_symlinks) except OSError as e: - raise wrap_oserror(space, e, path.as_bytes) + raise wrap_oserror2(space, e, path.w_path) return space.newlist([space.newfilename(attr) for attr in result]) From pypy.commits at gmail.com Sat Dec 30 02:37:24 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 29 Dec 2017 23:37:24 -0800 (PST) Subject: [pypy-commit] pypy default: fix for win32 Message-ID: <5a474234.cd5c1c0a.4eca2.1945@mx.google.com> Author: Matti Picus Branch: Changeset: r93601:c6c9d44fae1c Date: 2017-12-30 07:36 +0000 http://bitbucket.org/pypy/pypy/changeset/c6c9d44fae1c/ Log: fix for win32 diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c --- a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c +++ b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c @@ -3,7 +3,11 @@ #define MS_WIN32 #endif -#define EXPORT(x) extern x +#ifdef _WIN32 +#define EXPORT(x) __declspec(dllexport) x +#else +#define EXPORT(x) extern x +#endif #include #include From pypy.commits at gmail.com Sat Dec 30 12:25:02 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 30 Dec 2017 09:25:02 -0800 (PST) Subject: [pypy-commit] pypy default: pypy uses bin not Scripts, https://github.com/pypa/virtualenv/issues/789 Message-ID: <5a47cbee.6199df0a.63cfd.ce11@mx.google.com> Author: Matti Picus Branch: Changeset: r93602:aca4a901f9b8 Date: 2017-12-30 19:23 +0200 http://bitbucket.org/pypy/pypy/changeset/aca4a901f9b8/ Log: pypy uses bin not Scripts, https://github.com/pypa/virtualenv/issues/789 diff --git a/testrunner/get_info.py b/testrunner/get_info.py --- a/testrunner/get_info.py +++ b/testrunner/get_info.py @@ -8,14 +8,21 @@ import json BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) -TARGET_BASENAME = 'pypy-c' +if sys.platform.startswith('win'): + TARGET_NAME = r'pypy-c.exe' + # see https://github.com/pypa/virtualenv/issues/789 + TARGET_DIR = 'bin' +else: + TARGET_NAME = 'pypy3-c' + TARGET_DIR = 'bin' +VENV_DIR = 'pypy-venv' def make_info_dict(): - target = TARGET_BASENAME - if sys.platform.startswith('win'): - target += '.exe' - target_path = os.path.join(BASE_DIR, 'pypy', 'goal', target) - return {'target_path': target_path} + target_path = os.path.join(BASE_DIR, 'pypy', 'goal', TARGET_NAME) + return {'target_path': target_path, + 'virt_pypy': os.path.join(VENV_DIR, TARGET_DIR, TARGET_NAME), + 'venv_dir': VENV_DIR, + } def dump_info(): return json.dumps(make_info_dict()) From pypy.commits at gmail.com Sat Dec 30 12:25:29 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 30 Dec 2017 09:25:29 -0800 (PST) Subject: [pypy-commit] pypy py3.5: pypy uses bin not Scripts, https://github.com/pypa/virtualenv/issues/789 Message-ID: <5a47cc09.6199df0a.63cfd.ce50@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93603:c53457980876 Date: 2017-12-30 19:19 +0200 http://bitbucket.org/pypy/pypy/changeset/c53457980876/ Log: pypy uses bin not Scripts, https://github.com/pypa/virtualenv/issues/789 diff --git a/testrunner/get_info.py b/testrunner/get_info.py --- a/testrunner/get_info.py +++ b/testrunner/get_info.py @@ -10,7 +10,8 @@ BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) if sys.platform.startswith('win'): TARGET_NAME = r'pypy3-c.exe' - TARGET_DIR = 'Scripts' + # see https://github.com/pypa/virtualenv/issues/789 + TARGET_DIR = 'bin' else: TARGET_NAME = 'pypy3-c' TARGET_DIR = 'bin' From pypy.commits at gmail.com Sat Dec 30 12:43:55 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 30 Dec 2017 09:43:55 -0800 (PST) Subject: [pypy-commit] pypy default: Fix for pypy2? Message-ID: <5a47d05b.fb91500a.48e7d.d7c4@mx.google.com> Author: Armin Rigo Branch: Changeset: r93604:97fc8ffd2f01 Date: 2017-12-30 18:43 +0100 http://bitbucket.org/pypy/pypy/changeset/97fc8ffd2f01/ Log: Fix for pypy2? diff --git a/testrunner/get_info.py b/testrunner/get_info.py --- a/testrunner/get_info.py +++ b/testrunner/get_info.py @@ -13,7 +13,7 @@ # see https://github.com/pypa/virtualenv/issues/789 TARGET_DIR = 'bin' else: - TARGET_NAME = 'pypy3-c' + TARGET_NAME = 'pypy-c' TARGET_DIR = 'bin' VENV_DIR = 'pypy-venv'