[pypy-commit] pypy py3.6-asyncgen: hg merge py3.6

rlamy pypy.commits at gmail.com
Tue Oct 15 13:43:30 EDT 2019


Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.6-asyncgen
Changeset: r97780:3ad9df8709f9
Date: 2019-10-15 18:42 +0100
http://bitbucket.org/pypy/pypy/changeset/3ad9df8709f9/

Log:	hg merge py3.6

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -50,4 +50,8 @@
 de061d87e39c7df4e436974096d7982c676a859d release-pypy3.6-v7.1.0
 784b254d669919c872a505b807db8462b6140973 release-pypy3.6-v7.1.1
 8cdda8b8cdb8ff29d9e620cccd6c5edd2f2a23ec release-pypy2.7-v7.1.1
-
+85dae4fd5c234b482feff834c73e089872194541 release-pypy2.7-v7.2.0rc0
+7ffb92269488f37c707ce66076f50ffd8613f8e2 release-pypy3.6-v7.2.0rc0
+4d6761df14ffd6f38450f183ac1fad32c946c21b release-pypy3.6-v7.2.0rc1
+5da45ced70e515f94686be0df47c59abd1348ebc release-pypy3.6-v7.2.0rc2
+4a68d8d3d2fc1faec2e83bcb4d28559099092574 release-pypy2.7-v7.2.0rc2
diff --git a/lib_pypy/_lzma.py b/lib_pypy/_lzma.py
--- a/lib_pypy/_lzma.py
+++ b/lib_pypy/_lzma.py
@@ -583,8 +583,16 @@
                 self.clear_input_buffer()
             elif lzs.avail_in == 0:
                 # completed successfully!
-                self.needs_input = True
                 lzs.next_in = ffi.NULL
+                if lzs.avail_out == 0:
+                    # (avail_in==0 && avail_out==0)
+                    # Maybe lzs's internal state still have a few bytes can
+                    # be output, try to output them next time.
+                    self.needs_input = False
+                    assert max_length >= 0   # if < 0, lzs.avail_out always > 0
+                else:
+                    # Input buffer exhausted, output buffer has space.
+                    self.needs_input = True
                 self.clear_input_buffer()
             else:
                 self.needs_input = False
@@ -599,9 +607,6 @@
         lzs.next_in = buf
         lzs.avail_in = buf_len
 
-        if buf_len == 0:
-            return b""
-
         bufsiz = self._bufsiz
         if not (max_length < 0 or max_length > io.DEFAULT_BUFFER_SIZE):
             bufsiz = max_length
@@ -616,7 +621,8 @@
 
         try:
             while True:
-                ret = catch_lzma_error(m.lzma_code, lzs, m.LZMA_RUN)
+                ret = catch_lzma_error(m.lzma_code, lzs, m.LZMA_RUN,
+                    ignore_buf_error=(lzs.avail_in == 0 and lzs.avail_out > 0))
                 data_size = int(ffi.cast('uintptr_t', lzs.next_out)) - int(ffi.cast('uintptr_t', orig_out))
                 # data_size is the amount lzma_code has already outputted
 
@@ -626,14 +632,17 @@
                 if ret == m.LZMA_STREAM_END:
                     self.eof = True
                     break
-                elif lzs.avail_in == 0:
-                    # it ate everything
-                    break
                 elif lzs.avail_out == 0:
+                    # Need to check lzs->avail_out before lzs->avail_in.
+                    # Maybe lzs's internal state still have a few bytes
+                    # can be output, grow the output buffer and continue
+                    # if max_lengh < 0.
                     if data_size == max_length:
                         break
                     # ran out of space in the output buffer, let's grow it
                     bufsiz += (bufsiz >> 3) + 6
+                    if max_length > 0 and bufsiz > max_length:
+                        bufsiz = max_length
                     next_out = m.realloc(orig_out, bufsiz)
                     if next_out == ffi.NULL:
                         # realloc unsuccessful
@@ -645,6 +654,9 @@
 
                     lzs.next_out = orig_out + data_size
                     lzs.avail_out = bufsiz - data_size
+                elif lzs.avail_in == 0:
+                    # it ate everything
+                    break
 
             result = ffi.buffer(orig_out, data_size)[:]
         finally:
diff --git a/pypy/doc/commandline_ref.rst b/pypy/doc/commandline_ref.rst
--- a/pypy/doc/commandline_ref.rst
+++ b/pypy/doc/commandline_ref.rst
@@ -9,3 +9,4 @@
 
    man/pypy.1.rst
    man/pypy3.1.rst
+   jit_help.rst
diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst
--- a/pypy/doc/index-of-whatsnew.rst
+++ b/pypy/doc/index-of-whatsnew.rst
@@ -42,7 +42,8 @@
 -------------------------------
 
 .. toctree::
-   whatsnew-pypy3-head.rst
+    whatsnew-pypy3-head.rst
+    whatsnew-pypy3-7.1.0.rst
 
 CPython 3.5 compatible versions
 -------------------------------
@@ -50,6 +51,8 @@
 .. toctree::
 
    whatsnew-pypy3-7.0.0.rst
+   whatsnew-pypy3-6.0.0.rst
+   whatsnew-pypy3-5.10.0.rst
    whatsnew-pypy3-5.9.0.rst
    whatsnew-pypy3-5.8.0.rst
    whatsnew-pypy3-5.7.0.rst
@@ -62,10 +65,4 @@
    whatsnew-pypy3-5.5.0.rst
    whatsnew-pypy3-5.1.1-alpha1.rst
 
-CPython 3.2 compatible versions
--------------------------------
 
-.. toctree::
-
-   whatsnew-pypy3-2.4.0.rst
-   whatsnew-pypy3-2.3.1.rst
diff --git a/pypy/doc/jit_help.rst b/pypy/doc/jit_help.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/jit_help.rst
@@ -0,0 +1,78 @@
+========
+JIT help
+========
+
+.. note this is from ``pypy --jit help``
+
+Advanced JIT options
+====================
+
+``<pypy> --jit`` [*options*] where *options* is a comma-separated list of
+``OPTION=VALUE``:
+
+ decay=N
+    amount to regularly decay counters by (0=none, 1000=max) (default 40)
+
+ disable_unrolling=N
+    after how many operations we should not unroll (default 200)
+
+ enable_opts=N
+    INTERNAL USE ONLY (MAY NOT WORK OR LEAD TO CRASHES): optimizations to
+    enable, or all =
+    intbounds:rewrite:virtualize:string:pure:earlyforce:heap:unroll (default
+    all)
+
+ function_threshold=N
+    number of times a function must run for it to become traced from start
+    (default 1619)
+
+ inlining=N
+    inline python functions or not (1/0) (default 1)
+
+ loop_longevity=N
+    a parameter controlling how long loops will be kept before being freed,
+    an estimate (default 1000)
+
+ max_retrace_guards=N
+    number of extra guards a retrace can cause (default 15)
+
+ max_unroll_loops=N
+    number of extra unrollings a loop can cause (default 0)
+
+ max_unroll_recursion=N
+    how many levels deep to unroll a recursive function (default 7)
+
+ retrace_limit=N
+    how many times we can try retracing before giving up (default 0)
+
+ threshold=N
+    number of times a loop has to run for it to become hot (default 1039)
+
+ trace_eagerness=N
+    number of times a guard has to fail before we start compiling a bridge
+    (default 200)
+
+ trace_limit=N
+    number of recorded operations before we abort tracing with ABORT_TOO_LONG
+    (default 6000)
+
+ vec=N
+    turn on the vectorization optimization (vecopt). Supports x86 (SSE 4.1),
+    powerpc (SVX), s390x SIMD (default 0)
+
+ vec_all=N
+    try to vectorize trace loops that occur outside of the numpypy library
+    (default 0)
+
+ vec_cost=N
+    threshold for which traces to bail. Unpacking increases the counter,
+    vector operation decrease the cost (default 0)
+
+ off
+    turn off the JIT
+ help
+    print this page
+
+The :ref:`pypyjit<jit-hooks>` module can be used to control the JIT from inside
+pypy
+
diff --git a/pypy/doc/release-v7.2.0.rst b/pypy/doc/release-v7.2.0.rst
--- a/pypy/doc/release-v7.2.0.rst
+++ b/pypy/doc/release-v7.2.0.rst
@@ -216,6 +216,7 @@
 * Add more constants to `sysconfig``. Set ``MACOSX_DEPLOYMENT_TARGET`` for
   darwin (`issue 2994`_)
 * fix ``CBuffer.buffer_attach``
+* Add ``_PyDict_GetItemWithError`` (``PyDict_GetItemWithError`` on Python3)
 
 Python 3.6 only
 ---------------
@@ -307,6 +308,7 @@
 .. _33786 : https://bugs.python.org/issue33786
 .. _32270 : https://bugs.python.org/issue32270
 .. _28691 : https://bugs.python.org/issue28691
+.. _33729 : https://bugs.python.org/issue33729
 
 .. _opencv2: https://github.com/skvark/opencv-python/
 .. _`issue 2617`: https://bitbucket.com/pypy/pypy/issues/2617
diff --git a/pypy/doc/whatsnew-pypy3-2.3.1.rst b/pypy/doc/whatsnew-pypy3-2.3.1.rst
deleted file mode 100644
--- a/pypy/doc/whatsnew-pypy3-2.3.1.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-=========================
-What's new in PyPy3 2.3.1
-=========================
-
-.. this is a revision shortly after pypy3-release-2.3.x
-.. startrev: 0137d8e6657d
diff --git a/pypy/doc/whatsnew-pypy3-2.4.0.rst b/pypy/doc/whatsnew-pypy3-2.4.0.rst
deleted file mode 100644
--- a/pypy/doc/whatsnew-pypy3-2.4.0.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-=========================
-What's new in PyPy3 2.4.0
-=========================
-
-.. this is a revision shortly after pypy3-release-2.4.x
-.. startrev: 12b940544622
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1790,8 +1790,7 @@
     def convert_arg_to_w_unicode(self, w_obj, strict=None):
         # XXX why convert_to_w_unicode does something slightly different?
         from pypy.objspace.std.unicodeobject import W_UnicodeObject
-        # for z_translation tests
-        if hasattr(self, 'is_fake_objspace'): return self.newtext("foobar")
+        assert not hasattr(self, 'is_fake_objspace')
         return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict)
 
     def utf8_len_w(self, w_obj):
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -375,6 +375,14 @@
         return res_utf8, len(res), size
 
 def str_decode_utf8(s, errors, final, errorhandler, allow_surrogates=False):
+    try:
+        # fast version first
+        return s, rutf8.check_utf8(s, allow_surrogates=allow_surrogates), len(s)
+    except rutf8.CheckError:
+        return _str_decode_utf8_slowpath(
+            s, errors, final, errorhandler, allow_surrogates=allow_surrogates)
+
+def _str_decode_utf8_slowpath(s, errors, final, errorhandler, allow_surrogates):
     """ Same as checking for the valid utf8, but we know the utf8 is not
     valid so we're trying to either raise or pack stuff with error handler.
     The key difference is that this is call_may_force
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -737,17 +737,10 @@
         errors = 'strict'
     final = space.is_true(w_final)
     state = space.fromcache(CodecState)
-    # call the fast version for checking
-    try:
-        lgt = rutf8.check_utf8(string, allow_surrogates=False)
-    except rutf8.CheckError:
-        res, lgt, pos = unicodehelper.str_decode_utf8(string,
-            errors, final, state.decode_error_handler)
-        return space.newtuple([space.newutf8(res, lgt),
-                               space.newint(pos)])
-    else:
-        return space.newtuple([space.newutf8(string, lgt),
-                               space.newint(len(string))])
+    res, lgt, pos = unicodehelper.str_decode_utf8(string,
+        errors, final, state.decode_error_handler)
+    return space.newtuple([space.newutf8(res, lgt),
+                           space.newint(pos)])
 
 @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int,
              w_final=WrappedDefault(False))
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
--- a/pypy/module/_csv/interp_csv.py
+++ b/pypy/module/_csv/interp_csv.py
@@ -1,3 +1,4 @@
+from rpython.rlib import rutf8
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.typedef import TypeDef, interp_attrproperty
@@ -47,24 +48,26 @@
     if w_src is None:
         return default
     try:
-        return space.realunicode_w(w_src)
+        return space.text_w(w_src)
     except OperationError as e:
         if e.match(space, space.w_TypeError):
             raise oefmt(space.w_TypeError, '"%s" must be a string', attrname)
         raise
 
-def _get_char(space, w_src, default, name):
+def _get_codepoint(space, w_src, default, name):
     if w_src is None:
         return default
     if space.is_w(w_src, space.w_None):
-        return u'\0'
+        return 0
     if not space.isinstance_w(w_src, space.w_unicode):
         raise oefmt(space.w_TypeError, '"%s" must be string, not %T', name, w_src)
-    src = space.realunicode_w(w_src)
-    if len(src) == 1:
-        return src[0]
+    src, length = space.utf8_len_w(w_src)
+    if length == 1:
+        res = rutf8.codepoint_at_pos(src, 0)
+        assert res >= 0
+        return res
     if len(src) == 0:
-        return u'\0'
+        return 0
     raise oefmt(space.w_TypeError, '"%s" must be a 1-character string', name)
 
 def _build_dialect(space, w_dialect, w_delimiter, w_doublequote,
@@ -104,11 +107,11 @@
             w_strict = _fetch(space, w_dialect, 'strict')
 
     dialect = W_Dialect()
-    dialect.delimiter = _get_char(space, w_delimiter, u',', 'delimiter')
+    dialect.delimiter = _get_codepoint(space, w_delimiter, ord(u','), 'delimiter')
     dialect.doublequote = _get_bool(space, w_doublequote, True)
-    dialect.escapechar = _get_char(space, w_escapechar, u'\0', 'escapechar')
-    dialect.lineterminator = _get_str(space, w_lineterminator, u'\r\n', 'lineterminator')
-    dialect.quotechar = _get_char(space, w_quotechar, u'"', 'quotechar')
+    dialect.escapechar = _get_codepoint(space, w_escapechar, ord(u'\0'), 'escapechar')
+    dialect.lineterminator = _get_str(space, w_lineterminator, '\r\n', 'lineterminator')
+    dialect.quotechar = _get_codepoint(space, w_quotechar, ord(u'"'), 'quotechar')
     tmp_quoting = _get_int(space, w_quoting, QUOTE_MINIMAL, 'quoting')
     dialect.skipinitialspace = _get_bool(space, w_skipinitialspace, False)
     dialect.strict = _get_bool(space, w_strict, False)
@@ -117,13 +120,13 @@
     if not (0 <= tmp_quoting < 4):
         raise oefmt(space.w_TypeError, 'bad "quoting" value')
 
-    if dialect.delimiter == u'\0':
+    if dialect.delimiter == 0:
         raise oefmt(space.w_TypeError,
                     '"delimiter" must be a 1-character string')
 
     if space.is_w(w_quotechar, space.w_None) and w_quoting is None:
         tmp_quoting = QUOTE_NONE
-    if tmp_quoting != QUOTE_NONE and dialect.quotechar == u'\0':
+    if tmp_quoting != QUOTE_NONE and dialect.quotechar == 0:
         raise oefmt(space.w_TypeError,
                     "quotechar must be set if quoting enabled")
     dialect.quoting = tmp_quoting
@@ -158,14 +161,20 @@
 
 
 def _get_escapechar(space, dialect):
-    if dialect.escapechar == u'\0':
+    if dialect.escapechar == 0:
         return space.w_None
-    return space.newtext(dialect.escapechar)
+    s = rutf8.unichr_as_utf8(dialect.escapechar)
+    return space.newutf8(s, 1)
 
 def _get_quotechar(space, dialect):
-    if dialect.quotechar == u'\0':
+    if dialect.quotechar == 0:
         return space.w_None
-    return space.newtext(dialect.quotechar)
+    s = rutf8.unichr_as_utf8(dialect.quotechar)
+    return space.newutf8(s, 1)
+
+def _get_delimiter(space, dialect):
+    s = rutf8.unichr_as_utf8(dialect.delimiter)
+    return space.newutf8(s, 1)
 
 
 W_Dialect.typedef = TypeDef(
@@ -173,8 +182,7 @@
         __new__ = interp2app(W_Dialect___new__),
         __reduce_ex__ = interp2app(W_Dialect.reduce_ex_w),
 
-        delimiter        = interp_attrproperty('delimiter', W_Dialect,
-            wrapfn='newtext'),
+        delimiter        = GetSetProperty(_get_delimiter, cls=W_Dialect),
         doublequote      = interp_attrproperty('doublequote', W_Dialect,
             wrapfn='newbool'),
         escapechar       = GetSetProperty(_get_escapechar, cls=W_Dialect),
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -1,5 +1,4 @@
-from rpython.rlib.rstring import UnicodeBuilder
-from rpython.rlib.rutf8 import Utf8StringIterator
+from rpython.rlib.rutf8 import Utf8StringIterator, Utf8StringBuilder
 from rpython.rlib import objectmodel
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import OperationError
@@ -22,6 +21,7 @@
         self.dialect = dialect
         self.w_iter = w_iter
         self.line_num = 0
+        self.sizehint = 1  # just used for first line
 
     def iter_w(self):
         return self
@@ -38,22 +38,21 @@
         assert field_builder is not None
         if field_builder.getlength() >= field_limit.limit:
             raise self.error(u"field larger than field limit")
-        field_builder.append(c)
+        field_builder.append_code(c)
 
     def save_field(self, field_builder):
         space = self.space
         field = field_builder.build()
+        w_obj = space.newutf8(field, field_builder.getlength())
         if self.numeric_field:
             self.numeric_field = False
-            w_obj = space.call_function(space.w_float, space.newtext(field))
-        else:
-            w_obj = space.newtext(field)
+            w_obj = space.call_function(space.w_float, w_obj)
         self.fields_w.append(w_obj)
 
     def next_w(self):
         space = self.space
         dialect = self.dialect
-        self.fields_w = []
+        self.fields_w = objectmodel.newlist_hint(self.sizehint)
         self.numeric_field = False
         field_builder = None  # valid iff state not in [START_RECORD, EAT_CRNL]
         state = START_RECORD
@@ -79,13 +78,11 @@
                                  u"(did you open the file in text mode?")
             line = space.utf8_w(w_line)
             for c in Utf8StringIterator(line):
-                # XXX rewrite this to use c (as int) not unichr(c)
-                c = unichr(c)
-                if c == '\0':
+                if c == 0:
                     raise self.error(u"line contains NULL byte")
 
                 if state == START_RECORD:
-                    if c == b'\n' or c == b'\r':
+                    if c == ord(u'\n') or c == ord(u'\r'):
                         state = EAT_CRNL
                         continue
                     # normal character - handle as START_FIELD
@@ -93,9 +90,9 @@
                     # fall-through to the next case
 
                 if state == START_FIELD:
-                    field_builder = UnicodeBuilder(64)
+                    field_builder = Utf8StringBuilder(64)
                     # expecting field
-                    if c == u'\n' or c == u'\r':
+                    if c == ord(u'\n') or c == ord(u'\r'):
                         # save empty field
                         self.save_field(field_builder)
                         state = EAT_CRNL
@@ -106,7 +103,7 @@
                     elif c == dialect.escapechar:
                         # possible escaped character
                         state = ESCAPED_CHAR
-                    elif c == u' ' and dialect.skipinitialspace:
+                    elif c == ord(u' ') and dialect.skipinitialspace:
                         # ignore space at start of field
                         pass
                     elif c == dialect.delimiter:
@@ -120,7 +117,7 @@
                         state = IN_FIELD
 
                 elif state == ESCAPED_CHAR:
-                    if c in '\n\r':
+                    if c == ord(u'\n') or c == ord(u'\r'):
                         self.add_char(field_builder, c)
                         state = AFTER_ESCAPED_CRNL
                     else:
@@ -129,7 +126,7 @@
 
                 elif state == IN_FIELD or state == AFTER_ESCAPED_CRNL:
                     # in unquoted field
-                    if c == u'\n' or c == u'\r':
+                    if c == ord(u'\n') or c == ord(u'\r'):
                         # end of line
                         self.save_field(field_builder)
                         state = EAT_CRNL
@@ -176,7 +173,7 @@
                         # save field - wait for new field
                         self.save_field(field_builder)
                         state = START_FIELD
-                    elif c == u'\n' or c == u'\r':
+                    elif c == ord(u'\n') or c == ord(u'\r'):
                         # end of line
                         self.save_field(field_builder)
                         state = EAT_CRNL
@@ -186,10 +183,10 @@
                     else:
                         # illegal
                         raise self.error(u"'%s' expected after '%s'" % (
-                            dialect.delimiter, dialect.quotechar))
+                            unichr(dialect.delimiter), unichr(dialect.quotechar)))
 
                 elif state == EAT_CRNL:
-                    if not (c == u'\n' or c == u'\r'):
+                    if not (c == ord(u'\n') or c == ord(u'\r')):
                         raise self.error(u"new-line character seen in unquoted "
                                          u"field - do you need to open the file "
                                          u"in universal-newline mode?")
@@ -198,16 +195,16 @@
                 self.save_field(field_builder)
                 break
             elif state == ESCAPED_CHAR:
-                self.add_char(field_builder, u'\n')
+                self.add_char(field_builder, ord(u'\n'))
                 state = IN_FIELD
             elif state == IN_QUOTED_FIELD:
                 pass
             elif state == ESCAPE_IN_QUOTED_FIELD:
-                self.add_char(field_builder, u'\n')
+                self.add_char(field_builder, ord(u'\n'))
                 state = IN_QUOTED_FIELD
             elif state == START_FIELD:
                 # save empty field
-                field_builder = UnicodeBuilder(1)
+                field_builder = Utf8StringBuilder()
                 self.save_field(field_builder)
                 break
             elif state == AFTER_ESCAPED_CRNL:
@@ -216,6 +213,8 @@
                 break
         #
         w_result = space.newlist(self.fields_w)
+        # assume all lines have the same number of fields
+        self.sizehint = len(self.fields_w)
         self.fields_w = None
         return w_result
 
diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py
--- a/pypy/module/_csv/interp_writer.py
+++ b/pypy/module/_csv/interp_writer.py
@@ -1,4 +1,4 @@
-from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib.rutf8 import Utf8StringIterator, Utf8StringBuilder
 from rpython.rlib import objectmodel
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import OperationError
@@ -15,11 +15,13 @@
         self.dialect = dialect
         self.w_filewrite = space.getattr(w_fileobj, space.newtext('write'))
         # precompute this
-        special = dialect.delimiter + dialect.lineterminator
-        if dialect.escapechar != '\0':
-            special += dialect.escapechar
-        if dialect.quotechar != '\0':
-            special += dialect.quotechar
+        special = [dialect.delimiter]
+        for c in Utf8StringIterator(dialect.lineterminator):
+            special.append(c)
+        if dialect.escapechar != 0:
+            special.append(dialect.escapechar)
+        if dialect.quotechar != 0:
+            special.append(dialect.quotechar)
         self.special_characters = special
 
     @objectmodel.dont_inline
@@ -35,16 +37,17 @@
         space = self.space
         fields_w = space.listview(w_fields)
         dialect = self.dialect
-        rec = UnicodeBuilder(80)
+        rec = Utf8StringBuilder(80)
         #
         for field_index in range(len(fields_w)):
             w_field = fields_w[field_index]
             if space.is_w(w_field, space.w_None):
-                field = u""
+                field = ""
+                length = 0
             elif space.isinstance_w(w_field, space.w_float):
-                field = space.realunicode_w(space.repr(w_field))
+                field, length = space.utf8_len_w(space.repr(w_field))
             else:
-                field = space.realunicode_w(space.str(w_field))
+                field, length = space.utf8_len_w(space.str(w_field))
             #
             if dialect.quoting == QUOTE_NONNUMERIC:
                 try:
@@ -57,9 +60,9 @@
             elif dialect.quoting == QUOTE_ALL:
                 quoted = True
             elif dialect.quoting == QUOTE_MINIMAL:
-                # Find out if we really quoting
+                # Find out if we really need quoting.
                 special_characters = self.special_characters
-                for c in field:
+                for c in Utf8StringIterator(field):
                     if c in special_characters:
                         if c != dialect.quotechar or dialect.doublequote:
                             quoted = True
@@ -78,15 +81,15 @@
 
             # If this is not the first field we need a field separator
             if field_index > 0:
-                rec.append(dialect.delimiter)
+                rec.append_code(dialect.delimiter)
 
             # Handle preceding quote
             if quoted:
-                rec.append(dialect.quotechar)
+                rec.append_code(dialect.quotechar)
 
             # Copy field data
             special_characters = self.special_characters
-            for c in field:
+            for c in Utf8StringIterator(field):
                 if c in special_characters:
                     if dialect.quoting == QUOTE_NONE:
                         want_escape = True
@@ -94,28 +97,28 @@
                         want_escape = False
                         if c == dialect.quotechar:
                             if dialect.doublequote:
-                                rec.append(dialect.quotechar)
+                                rec.append_code(dialect.quotechar)
                             else:
                                 want_escape = True
                     if want_escape:
-                        if dialect.escapechar == u'\0':
+                        if dialect.escapechar == 0:
                             raise self.error("need to escape, "
                                              "but no escapechar set")
-                        rec.append(dialect.escapechar)
+                        rec.append_code(dialect.escapechar)
                     else:
                         assert quoted
                 # Copy field character into record buffer
-                rec.append(c)
+                rec.append_code(c)
 
             # Handle final quote
             if quoted:
-                rec.append(dialect.quotechar)
+                rec.append_code(dialect.quotechar)
 
         # Add line terminator
         rec.append(dialect.lineterminator)
 
         line = rec.build()
-        return space.call_function(self.w_filewrite, space.newtext(line))
+        return space.call_function(self.w_filewrite, space.newutf8(line, rec.getlength()))
 
     def writerows(self, w_seqseq):
         """Construct and write a series of sequences to a csv file.
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -73,6 +73,9 @@
     # hit in the cache
     STRING_CACHE_USEFULNESS_FACTOR = 4
 
+    # don't make arbitrarily huge maps
+    MAX_MAP_SIZE = 100
+
 
     def __init__(self, space, s):
         self.space = space
@@ -369,7 +372,7 @@
                 return w_res
             elif ch == ',':
                 i = self.skip_whitespace(i)
-                if currmap.is_state_blocked():
+                if currmap.is_state_blocked() or nextindex > self.MAX_MAP_SIZE:
                     self.scratch.append(values_w)  # can reuse next time
                     dict_w = self._switch_to_dict(currmap, values_w, nextindex)
                     return self.decode_object_dict(i, start, dict_w)
diff --git a/pypy/module/_pypyjson/simd.py b/pypy/module/_pypyjson/simd.py
--- a/pypy/module/_pypyjson/simd.py
+++ b/pypy/module/_pypyjson/simd.py
@@ -1,7 +1,7 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rlib import objectmodel, unroll
 from rpython.rlib.rarithmetic import r_uint, intmask, LONG_BIT
-from rpython.jit.backend.detect_cpu import autodetect
+from rpython.jit.backend.detect_cpu import autodetect, ProcessorAutodetectError
 
 # accelerators for string operations using simd on regular word sizes (*not*
 # SSE instructions). this style is sometimes called SWAR (SIMD Within A
@@ -15,8 +15,11 @@
     WORD_SIZE = 8
     EVERY_BYTE_ONE = 0x0101010101010101
     EVERY_BYTE_HIGHEST_BIT = 0x8080808080808080
-    if autodetect() == "x86-64":
-        USE_SIMD = True
+    try:
+        if autodetect() == "x86-64":
+            USE_SIMD = True
+    except ProcessorAutodetectError:
+        pass
 else:
     WORD_SIZE = 4
     EVERY_BYTE_ONE = 0x01010101
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -467,6 +467,14 @@
         res = _pypyjson.loads(json)
         assert res == [{u'a': 1}, {u'a': 2}]
 
+    def test_huge_map(self):
+        import _pypyjson
+        import __pypy__
+        s = '{' + ",".join('"%s": %s' % (i, i) for i in range(200)) + '}'
+        res = _pypyjson.loads(s)
+        assert len(res) == 200
+        assert __pypy__.strategy(res) == "UnicodeDictStrategy"
+
     def test_tab_in_string_should_fail(self):
         import _pypyjson
         # http://json.org/JSON_checker/test/fail25.json
diff --git a/pypy/module/cpyext/dictobject.py b/pypy/module/cpyext/dictobject.py
--- a/pypy/module/cpyext/dictobject.py
+++ b/pypy/module/cpyext/dictobject.py
@@ -80,6 +80,16 @@
     # XXX this is wrong with IntMutableCell.  Hope it works...
     return w_dict.getitem(w_key)
 
+ at cpython_api([PyObject, PyObject], PyObject, result_borrowed=True)
+def PyDict_GetItemWithError(space, w_dict, w_key):
+    """Variant of PyDict_GetItem() that does not suppress
+    exceptions. Return NULL with an exception set if an exception
+    occurred.  Return NULL without an exception set if the key
+    wasn't present."""
+    if not isinstance(w_dict, W_DictMultiObject):
+        PyErr_BadInternalCall(space)
+    return w_dict.getitem(w_key)
+
 @cpython_api([PyObject, PyObject, PyObject], rffi.INT_real, error=-1)
 def PyDict_SetItem(space, w_dict, w_key, w_obj):
     if not isinstance(w_dict, W_DictMultiObject):
@@ -298,3 +308,5 @@
             return 0
     return 1
 
+#def PyObject_GenericGetDict(space, w_obj, context):
+#    unlike CPython, you'll find this one in object.py together with ..SetDict
diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py
--- a/pypy/module/cpyext/object.py
+++ b/pypy/module/cpyext/object.py
@@ -444,3 +444,16 @@
             del d[w_obj]
         except KeyError:
             pass
+
+ at cpython_api([PyObject, rffi.VOIDP], PyObject)
+def PyObject_GenericGetDict(space, w_obj, context):
+    from pypy.interpreter.typedef import descr_get_dict
+    return descr_get_dict(space, w_obj)
+
+ at cpython_api([PyObject, PyObject, rffi.VOIDP], rffi.INT_real, error=-1)
+def PyObject_GenericSetDict(space, w_obj, w_value, context):
+    from pypy.interpreter.typedef import descr_set_dict
+    if w_value is None:
+        raise oefmt(space.w_TypeError, "cannot delete __dict__")
+    descr_set_dict(space, w_obj, w_value)
+    return 0
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -243,14 +243,6 @@
 def PyWrapper_New(space, w_d, w_self):
     raise NotImplementedError
 
- at cpython_api([PyObject, PyObject], PyObject)
-def PyDict_GetItemWithError(space, p, key):
-    """Variant of PyDict_GetItem() that does not suppress
-    exceptions. Return NULL with an exception set if an exception
-    occurred.  Return NULL without an exception set if the key
-    wasn't present."""
-    raise NotImplementedError
-
 @cpython_api([PyObject, PyObject, rffi.INT_real], rffi.INT_real, error=-1)
 def PyDict_MergeFromSeq2(space, a, seq2, override):
     """Update or merge into dictionary a, from the key-value pairs in seq2.
diff --git a/pypy/module/cpyext/test/test_dictobject.py b/pypy/module/cpyext/test/test_dictobject.py
--- a/pypy/module/cpyext/test/test_dictobject.py
+++ b/pypy/module/cpyext/test/test_dictobject.py
@@ -175,6 +175,26 @@
             ])
         assert module.dict_proxy({'a': 1, 'b': 2}) == 2
 
+    def test_getitemwitherror(self):
+        module = self.import_extension('foo', [
+            ("dict_getitem", "METH_VARARGS",
+             """
+             PyObject *d, *key, *result;
+             if (!PyArg_ParseTuple(args, "OO", &d, &key)) {
+                return NULL;
+             }
+             result = PyDict_GetItemWithError(d, key);
+             if (result == NULL && !PyErr_Occurred())
+                Py_RETURN_NONE;
+             Py_XINCREF(result);
+             return result;
+             """)])
+        d = {'foo': 'bar'}
+        assert module.dict_getitem(d, 'foo') == 'bar'
+        assert module.dict_getitem(d, 'missing') is None
+        with raises(TypeError):
+            module.dict_getitem(d, [])
+
     def test_setdefault(self):
         module = self.import_extension('foo', [
             ("setdefault", "METH_VARARGS",
diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py
--- a/pypy/module/cpyext/test/test_object.py
+++ b/pypy/module/cpyext/test/test_object.py
@@ -451,6 +451,31 @@
         assert n == 1
         module.leave(obj2)
 
+    def test_GenericGetSetDict(self):
+        module = self.import_extension('test_GenericGetSetDict', [
+            ('test1', 'METH_VARARGS',
+             """
+                 PyObject *obj = PyTuple_GET_ITEM(args, 0);
+                 PyObject *newdict = PyTuple_GET_ITEM(args, 1);
+
+                 PyObject *olddict = PyObject_GenericGetDict(obj, NULL);
+                 if (olddict == NULL)
+                    return NULL;
+                 int res = PyObject_GenericSetDict(obj, newdict, NULL);
+                 if (res != 0)
+                     return NULL;
+                 return olddict;
+             """)])
+        class A:
+            pass
+        a = A()
+        a.x = 42
+        nd = {'y': 43}
+        d = module.test1(a, nd)
+        assert d == {'x': 42}
+        assert a.y == 43
+        assert a.__dict__ is nd
+
 
 class AppTestPyBuffer_FillInfo(AppTestCpythonExtensionBase):
     """
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -164,6 +164,9 @@
         is_root(w_obj)
         return NonConstant(False)
 
+    def utf8_len_w(self, space):
+        return NonConstant((NonConstant("utf8len_foobar"), NonConstant(14)))
+
     @not_rpython
     def unwrap(self, w_obj):
         raise NotImplementedError
diff --git a/pypy/objspace/fake/test/test_objspace.py b/pypy/objspace/fake/test/test_objspace.py
--- a/pypy/objspace/fake/test/test_objspace.py
+++ b/pypy/objspace/fake/test/test_objspace.py
@@ -1,4 +1,5 @@
-import py
+import pytest
+from rpython.rlib.nonconst import NonConstant
 from pypy.objspace.fake.objspace import FakeObjSpace, W_Root
 from pypy.interpreter.argument import Arguments
 from pypy.interpreter.typedef import TypeDef
@@ -63,8 +64,8 @@
     def test_is_true(self):
         space = self.space
         space.translates(lambda: space.is_true(W_Root()))
-        py.test.raises(AssertionError,
-                       space.translates, lambda: space.is_true(42))
+        with pytest.raises(AssertionError):
+            space.translates(lambda: space.is_true(42))
 
     def test_unpackiterable(self):
         space = self.space
@@ -79,3 +80,23 @@
         space = self.space
         space.translates(lambda: (space.get(W_Root(), W_Root()),
                                   space.get(W_Root(), W_Root(), W_Root())))
+
+    def test_bug_utf8_len_w(self):
+        space = self.space
+
+        class A(object):
+            pass
+
+        def f():
+            s = NonConstant('a')
+            w_s = space.newutf8(s, 1)
+            t, l = space.utf8_len_w(w_s)
+            a = A()
+            if l == 1:
+                a.x = 1
+            else:
+                raise Exception
+            return a.x
+        space.translates(f)
+
+
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -19,6 +19,7 @@
 
 class W_AbstractBytesObject(W_Root):
     __slots__ = ()
+    exact_class_applevel_name = 'bytes'
 
     def is_w(self, space, w_other):
         if not isinstance(w_other, W_AbstractBytesObject):
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -36,6 +36,10 @@
 def prev_codepoint_pos_dont_look_inside(utf8, p):
     return rutf8.prev_codepoint_pos(utf8, p)
 
+ at jit.elidable
+def codepoint_at_pos_dont_look_inside(utf8, p):
+    return rutf8.codepoint_at_pos(utf8, p)
+
 
 class W_UnicodeObject(W_Root):
     import_from_mixin(StringMethods)
@@ -122,7 +126,7 @@
             raise oefmt(space.w_TypeError,
                          "ord() expected a character, but string of length %d "
                          "found", self._len())
-        return space.newint(rutf8.codepoint_at_pos(self._utf8, 0))
+        return space.newint(self.codepoint_at_pos_dont_look_inside(0))
 
     def _empty(self):
         return W_UnicodeObject.EMPTY
@@ -548,7 +552,7 @@
         if self._length == 0:
             return space.w_False
         if self._length == 1:
-            return space.newbool(func(rutf8.codepoint_at_pos(self._utf8, 0)))
+            return space.newbool(func(self.codepoint_at_pos_dont_look_inside(0)))
         else:
             return self._is_generic_loop(space, self._utf8, func_name)
 
@@ -1127,6 +1131,11 @@
             return pos - 1
         return prev_codepoint_pos_dont_look_inside(self._utf8, pos)
 
+    def codepoint_at_pos_dont_look_inside(self, pos):
+        if self.is_ascii():
+            return ord(self._utf8[pos])
+        return codepoint_at_pos_dont_look_inside(self._utf8, pos)
+
     @always_inline
     def _unwrap_and_search(self, space, w_sub, w_start, w_end, forward=True):
         w_sub = self.convert_arg_to_w_unicode(space, w_sub)
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -1,15 +1,18 @@
 # Edit these appropriately before running this script
 pmaj=2  # python main version: 2 or 3
 pmin=7  # python minor version
-exe=pypy3 # pypy3 or pypy
 maj=7
 min=2
-rev=0
+rev=0rc2
 
+case $pmaj in
+    "2") exe=pypy;;
+    "3") exe=pypy3;;
+    *) echo invalid pmaj=$pmaj; exit 1;;
+esac
 
 branchname=release-pypy$pmaj.$pmin-v$maj.x # ==OR== release-v$maj.x  # ==OR== release-v$maj.$min.x
-tagname=release-candidate-pypy$pmaj.$pmin-v$maj.$min.$rev  # ==OR== release-$maj.$min
-# tagname=release-pypy$pmaj.$pmin-v$maj.$min.$rev  # ==OR== release-$maj.$min
+tagname=release-pypy$pmaj.$pmin-v$maj.$min.$rev  # ==OR== release-$maj.$min
 
 echo checking hg log -r $branchname
 hg log -r $branchname || exit 1
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,5 +6,5 @@
 vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x
 
 # hypothesis is used for test generation on untranslated tests
-hypothesis
+hypothesis<4.40
 enum34>=1.1.2
diff --git a/rpython/jit/metainterp/quasiimmut.py b/rpython/jit/metainterp/quasiimmut.py
--- a/rpython/jit/metainterp/quasiimmut.py
+++ b/rpython/jit/metainterp/quasiimmut.py
@@ -5,6 +5,7 @@
 from rpython.jit.metainterp.history import (
     AbstractDescr, ConstPtr, ConstInt, ConstFloat)
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
 
 
 def get_mutate_field_name(fieldname):
@@ -26,12 +27,14 @@
     return qmut
 
 def make_invalidation_function(STRUCT, mutatefieldname):
-    #
+    # fake a repr
+    descr_repr = "FieldDescr(%s, '%s')" % (STRUCT.TO, mutatefieldname)
+
     def _invalidate_now(p):
         qmut_ptr = getattr(p, mutatefieldname)
         setattr(p, mutatefieldname, lltype.nullptr(rclass.OBJECT))
         qmut = cast_base_ptr_to_instance(QuasiImmut, qmut_ptr)
-        qmut.invalidate()
+        qmut.invalidate(descr_repr)
     _invalidate_now._dont_inline_ = True
     #
     def invalidation(p):
@@ -45,7 +48,7 @@
     if qmut_ref:
         cpu.bh_setfield_gc_r(p, ConstPtr.value, mutatefielddescr)
         qmut = cast_gcref_to_instance(QuasiImmut, qmut_ref)
-        qmut.invalidate()
+        qmut.invalidate(mutatefielddescr.repr_of_descr())
 
 
 class QuasiImmut(object):
@@ -78,7 +81,8 @@
         # already invalidated; see below
         self.compress_limit = (len(self.looptokens_wrefs) + 15) * 2
 
-    def invalidate(self):
+    def invalidate(self, descr_repr=None):
+        debug_start("jit-invalidate-quasi-immutable")
         # When this is called, all the loops that we record become
         # invalid: all GUARD_NOT_INVALIDATED in these loops (and
         # in attached bridges) must now fail.
@@ -87,9 +91,11 @@
             return
         wrefs = self.looptokens_wrefs
         self.looptokens_wrefs = []
+        invalidated = 0
         for wref in wrefs:
             looptoken = wref()
             if looptoken is not None:
+                invalidated += 1
                 looptoken.invalidated = True
                 self.cpu.invalidate_loop(looptoken)
                 # NB. we must call cpu.invalidate_loop() even if
@@ -100,6 +106,8 @@
                 if not we_are_translated():
                     self.cpu.stats.invalidated_token_numbers.add(
                         looptoken.number)
+        debug_print("fieldname", descr_repr or "<unknown>", "invalidated", invalidated)
+        debug_stop("jit-invalidate-quasi-immutable")
 
 
 class QuasiImmutDescr(AbstractDescr):
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1903,7 +1903,9 @@
                 if MultiByteToWideChar(CP_ACP, flags,
                                        dataptr, size, buf.raw, usize) == 0:
                     _decode_mbcs_error(s, errorhandler)
-                return buf.str(usize), size
+                ret = buf.str(usize)
+                assert ret is not None
+                return ret, size
 
     def unicode_encode_mbcs(s, size, errors, errorhandler=None,
                             force_replace=True):
diff --git a/rpython/translator/revdb/src-revdb/revdb_include.h b/rpython/translator/revdb/src-revdb/revdb_include.h
--- a/rpython/translator/revdb/src-revdb/revdb_include.h
+++ b/rpython/translator/revdb/src-revdb/revdb_include.h
@@ -285,6 +285,8 @@
 #define OP_GC_RAWREFCOUNT_NEXT_DEAD(r)   \
     r = rpy_reverse_db_rawrefcount_next_dead()
 
+#define OP_GC_INCREASE_ROOT_STACK_DEPTH(depth, r)   /* nothing */
+
 
 RPY_EXTERN void rpy_reverse_db_flush(void);  /* must be called with the lock */
 RPY_EXTERN void rpy_reverse_db_fetch(const char *file, int line);


More information about the pypy-commit mailing list