[pypy-commit] pypy jit-duplicated_short_boxes: hg merge default

Sun Sep 4 10:59:58 CEST 2011

Author: Hakan Ardo <hakan at debian.org>
Branch: jit-duplicated_short_boxes
Changeset: r47057:9541261c8da3
Date: 2011-09-04 10:21 +0200
http://bitbucket.org/pypy/pypy/changeset/9541261c8da3/

Log:	hg merge default

diff --git a/lib-python/modified-2.7/ctypes/util.py b/lib-python/modified-2.7/ctypes/util.py
--- a/lib-python/modified-2.7/ctypes/util.py
+++ b/lib-python/modified-2.7/ctypes/util.py
@@ -72,8 +72,8 @@
         return name
 
 if os.name == "posix" and sys.platform == "darwin":
-    from ctypes.macholib.dyld import dyld_find as _dyld_find
     def find_library(name):
+        from ctypes.macholib.dyld import dyld_find as _dyld_find
         possible = ['lib%s.dylib' % name,
                     '%s.dylib' % name,
                     '%s.framework/%s' % (name, name)]
diff --git a/lib-python/modified-2.7/gzip.py b/lib-python/modified-2.7/gzip.py
new file mode 100644
--- /dev/null
+++ b/lib-python/modified-2.7/gzip.py
@@ -0,0 +1,514 @@
+"""Functions that read and write gzipped files.
+
+The user of the file doesn't have to worry about the compression,
+but random access is not allowed."""
+
+# based on Andrew Kuchling's minigzip.py distributed with the zlib module
+
+import struct, sys, time, os
+import zlib
+import io
+import __builtin__
+
+__all__ = ["GzipFile","open"]
+
+FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
+
+READ, WRITE = 1, 2
+
+def write32u(output, value):
+    # The L format writes the bit pattern correctly whether signed
+    # or unsigned.
+    output.write(struct.pack("<L", value))
+
+def read32(input):
+    return struct.unpack("<I", input.read(4))[0]
+
+def open(filename, mode="rb", compresslevel=9):
+    """Shorthand for GzipFile(filename, mode, compresslevel).
+
+    The filename argument is required; mode defaults to 'rb'
+    and compresslevel defaults to 9.
+
+    """
+    return GzipFile(filename, mode, compresslevel)
+
+class GzipFile(io.BufferedIOBase):
+    """The GzipFile class simulates most of the methods of a file object with
+    the exception of the readinto() and truncate() methods.
+
+    """
+
+    myfileobj = None
+    max_read_chunk = 10 * 1024 * 1024   # 10Mb
+
+    def __init__(self, filename=None, mode=None,
+                 compresslevel=9, fileobj=None, mtime=None):
+        """Constructor for the GzipFile class.
+
+        At least one of fileobj and filename must be given a
+        non-trivial value.
+
+        The new class instance is based on fileobj, which can be a regular
+        file, a StringIO object, or any other object which simulates a file.
+        It defaults to None, in which case filename is opened to provide
+        a file object.
+
+        When fileobj is not None, the filename argument is only used to be
+        included in the gzip file header, which may includes the original
+        filename of the uncompressed file.  It defaults to the filename of
+        fileobj, if discernible; otherwise, it defaults to the empty string,
+        and in this case the original filename is not included in the header.
+
+        The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
+        depending on whether the file will be read or written.  The default
+        is the mode of fileobj if discernible; otherwise, the default is 'rb'.
+        Be aware that only the 'rb', 'ab', and 'wb' values should be used
+        for cross-platform portability.
+
+        The compresslevel argument is an integer from 1 to 9 controlling the
+        level of compression; 1 is fastest and produces the least compression,
+        and 9 is slowest and produces the most compression.  The default is 9.
+
+        The mtime argument is an optional numeric timestamp to be written
+        to the stream when compressing.  All gzip compressed streams
+        are required to contain a timestamp.  If omitted or None, the
+        current time is used.  This module ignores the timestamp when
+        decompressing; however, some programs, such as gunzip, make use
+        of it.  The format of the timestamp is the same as that of the
+        return value of time.time() and of the st_mtime member of the
+        object returned by os.stat().
+
+        """
+
+        # guarantee the file is opened in binary mode on platforms
+        # that care about that sort of thing
+        if mode and 'b' not in mode:
+            mode += 'b'
+        if fileobj is None:
+            fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
+        if filename is None:
+            if hasattr(fileobj, 'name'): filename = fileobj.name
+            else: filename = ''
+        if mode is None:
+            if hasattr(fileobj, 'mode'): mode = fileobj.mode
+            else: mode = 'rb'
+
+        if mode[0:1] == 'r':
+            self.mode = READ
+            # Set flag indicating start of a new member
+            self._new_member = True
+            # Buffer data read from gzip file. extrastart is offset in
+            # stream where buffer starts. extrasize is number of
+            # bytes remaining in buffer from current stream position.
+            self.extrabuf = ""
+            self.extrasize = 0
+            self.extrastart = 0
+            self.name = filename
+            # Starts small, scales exponentially
+            self.min_readsize = 100
+
+        elif mode[0:1] == 'w' or mode[0:1] == 'a':
+            self.mode = WRITE
+            self._init_write(filename)
+            self.compress = zlib.compressobj(compresslevel,
+                                             zlib.DEFLATED,
+                                             -zlib.MAX_WBITS,
+                                             zlib.DEF_MEM_LEVEL,
+                                             0)
+        else:
+            raise IOError, "Mode " + mode + " not supported"
+
+        self.fileobj = fileobj
+        self.offset = 0
+        self.mtime = mtime
+
+        if self.mode == WRITE:
+            self._write_gzip_header()
+
+    @property
+    def filename(self):
+        import warnings
+        warnings.warn("use the name attribute", DeprecationWarning, 2)
+        if self.mode == WRITE and self.name[-3:] != ".gz":
+            return self.name + ".gz"
+        return self.name
+
+    def __repr__(self):
+        s = repr(self.fileobj)
+        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
+
+    def _check_closed(self):
+        """Raises a ValueError if the underlying file object has been closed.
+
+        """
+        if self.closed:
+            raise ValueError('I/O operation on closed file.')
+
+    def _init_write(self, filename):
+        self.name = filename
+        self.crc = zlib.crc32("") & 0xffffffffL
+        self.size = 0
+        self.writebuf = []
+        self.bufsize = 0
+
+    def _write_gzip_header(self):
+        self.fileobj.write('\037\213')             # magic header
+        self.fileobj.write('\010')                 # compression method
+        fname = os.path.basename(self.name)
+        if fname.endswith(".gz"):
+            fname = fname[:-3]
+        flags = 0
+        if fname:
+            flags = FNAME
+        self.fileobj.write(chr(flags))
+        mtime = self.mtime
+        if mtime is None:
+            mtime = time.time()
+        write32u(self.fileobj, long(mtime))
+        self.fileobj.write('\002')
+        self.fileobj.write('\377')
+        if fname:
+            self.fileobj.write(fname + '\000')
+
+    def _init_read(self):
+        self.crc = zlib.crc32("") & 0xffffffffL
+        self.size = 0
+
+    def _read_gzip_header(self):
+        magic = self.fileobj.read(2)
+        if magic != '\037\213':
+            raise IOError, 'Not a gzipped file'
+        method = ord( self.fileobj.read(1) )
+        if method != 8:
+            raise IOError, 'Unknown compression method'
+        flag = ord( self.fileobj.read(1) )
+        self.mtime = read32(self.fileobj)
+        # extraflag = self.fileobj.read(1)
+        # os = self.fileobj.read(1)
+        self.fileobj.read(2)
+
+        if flag & FEXTRA:
+            # Read & discard the extra field, if present
+            xlen = ord(self.fileobj.read(1))
+            xlen = xlen + 256*ord(self.fileobj.read(1))
+            self.fileobj.read(xlen)
+        if flag & FNAME:
+            # Read and discard a null-terminated string containing the filename
+            while True:
+                s = self.fileobj.read(1)
+                if not s or s=='\000':
+                    break
+        if flag & FCOMMENT:
+            # Read and discard a null-terminated string containing a comment
+            while True:
+                s = self.fileobj.read(1)
+                if not s or s=='\000':
+                    break
+        if flag & FHCRC:
+            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
+
+    def write(self,data):
+        self._check_closed()
+        if self.mode != WRITE:
+            import errno
+            raise IOError(errno.EBADF, "write() on read-only GzipFile object")
+
+        if self.fileobj is None:
+            raise ValueError, "write() on closed GzipFile object"
+
+        # Convert data type if called by io.BufferedWriter.
+        if isinstance(data, memoryview):
+            data = data.tobytes()
+
+        if len(data) > 0:
+            self.size = self.size + len(data)
+            self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
+            self.fileobj.write( self.compress.compress(data) )
+            self.offset += len(data)
+
+        return len(data)
+
+    def read(self, size=-1):
+        self._check_closed()
+        if self.mode != READ:
+            import errno
+            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
+
+        if self.extrasize <= 0 and self.fileobj is None:
+            return ''
+
+        readsize = 1024
+        if size < 0:        # get the whole thing
+            try:
+                while True:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                size = self.extrasize
+        elif size == 0:
+            return ""
+        else:               # just get some more of it
+            try:
+                while size > self.extrasize:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                if size > self.extrasize:
+                    size = self.extrasize
+
+        offset = self.offset - self.extrastart
+        chunk = self.extrabuf[offset: offset + size]
+        self.extrasize = self.extrasize - size
+
+        self.offset += size
+        return chunk
+
+    def _unread(self, buf):
+        self.extrasize = len(buf) + self.extrasize
+        self.offset -= len(buf)
+
+    def _read(self, size=1024):
+        if self.fileobj is None:
+            raise EOFError, "Reached EOF"
+
+        if self._new_member:
+            # If the _new_member flag is set, we have to
+            # jump to the next member, if there is one.
+            #
+            # First, check if we're at the end of the file;
+            # if so, it's time to stop; no more members to read.
+            pos = self.fileobj.tell()   # Save current position
+            self.fileobj.seek(0, 2)     # Seek to end of file
+            if pos == self.fileobj.tell():
+                raise EOFError, "Reached EOF"
+            else:
+                self.fileobj.seek( pos ) # Return to original position
+
+            self._init_read()
+            self._read_gzip_header()
+            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+            self._new_member = False
+
+        # Read a chunk of data from the file
+        buf = self.fileobj.read(size)
+
+        # If the EOF has been reached, flush the decompression object
+        # and mark this object as finished.
+
+        if buf == "":
+            uncompress = self.decompress.flush()
+            self._read_eof()
+            self._add_read_data( uncompress )
+            raise EOFError, 'Reached EOF'
+
+        uncompress = self.decompress.decompress(buf)
+        self._add_read_data( uncompress )
+
+        if self.decompress.unused_data != "":
+            # Ending case: we've come to the end of a member in the file,
+            # so seek back to the start of the unused data, finish up
+            # this member, and read a new gzip header.
+            # (The number of bytes to seek back is the length of the unused
+            # data, minus 8 because _read_eof() will rewind a further 8 bytes)
+            self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
+
+            # Check the CRC and file size, and set the flag so we read
+            # a new member on the next call
+            self._read_eof()
+            self._new_member = True
+
+    def _add_read_data(self, data):
+        self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
+        offset = self.offset - self.extrastart
+        self.extrabuf = self.extrabuf[offset:] + data
+        self.extrasize = self.extrasize + len(data)
+        self.extrastart = self.offset
+        self.size = self.size + len(data)
+
+    def _read_eof(self):
+        # We've read to the end of the file, so we have to rewind in order
+        # to reread the 8 bytes containing the CRC and the file size.
+        # We check the that the computed CRC and size of the
+        # uncompressed data matches the stored values.  Note that the size
+        # stored is the true file size mod 2**32.
+        self.fileobj.seek(-8, 1)
+        crc32 = read32(self.fileobj)
+        isize = read32(self.fileobj)  # may exceed 2GB
+        if crc32 != self.crc:
+            raise IOError("CRC check failed %s != %s" % (hex(crc32),
+                                                         hex(self.crc)))
+        elif isize != (self.size & 0xffffffffL):
+            raise IOError, "Incorrect length of data produced"
+
+        # Gzip files can be padded with zeroes and still have archives.
+        # Consume all zero bytes and set the file position to the first
+        # non-zero byte. See http://www.gzip.org/#faq8
+        c = "\x00"
+        while c == "\x00":
+            c = self.fileobj.read(1)
+        if c:
+            self.fileobj.seek(-1, 1)
+
+    @property
+    def closed(self):
+        return self.fileobj is None
+
+    def close(self):
+        if self.fileobj is None:
+            return
+        if self.mode == WRITE:
+            self.fileobj.write(self.compress.flush())
+            write32u(self.fileobj, self.crc)
+            # self.size may exceed 2GB, or even 4GB
+            write32u(self.fileobj, self.size & 0xffffffffL)
+            self.fileobj = None
+        elif self.mode == READ:
+            self.fileobj = None
+        if self.myfileobj:
+            self.myfileobj.close()
+            self.myfileobj = None
+
+    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
+        self._check_closed()
+        if self.mode == WRITE:
+            # Ensure the compressor's buffer is flushed
+            self.fileobj.write(self.compress.flush(zlib_mode))
+            self.fileobj.flush()
+
+    def fileno(self):
+        """Invoke the underlying file object's fileno() method.
+
+        This will raise AttributeError if the underlying file object
+        doesn't support fileno().
+        """
+        return self.fileobj.fileno()
+
+    def rewind(self):
+        '''Return the uncompressed stream file position indicator to the
+        beginning of the file'''
+        if self.mode != READ:
+            raise IOError("Can't rewind in write mode")
+        self.fileobj.seek(0)
+        self._new_member = True
+        self.extrabuf = ""
+        self.extrasize = 0
+        self.extrastart = 0
+        self.offset = 0
+
+    def readable(self):
+        return self.mode == READ
+
+    def writable(self):
+        return self.mode == WRITE
+
+    def seekable(self):
+        return True
+
+    def seek(self, offset, whence=0):
+        if whence:
+            if whence == 1:
+                offset = self.offset + offset
+            else:
+                raise ValueError('Seek from end not supported')
+        if self.mode == WRITE:
+            if offset < self.offset:
+                raise IOError('Negative seek in write mode')
+            count = offset - self.offset
+            for i in range(count // 1024):
+                self.write(1024 * '\0')
+            self.write((count % 1024) * '\0')
+        elif self.mode == READ:
+            if offset == self.offset:
+                self.read(0) # to make sure that this file is open
+                return self.offset
+            if offset < self.offset:
+                # for negative seek, rewind and do positive seek
+                self.rewind()
+            count = offset - self.offset
+            for i in range(count // 1024):
+                self.read(1024)
+            self.read(count % 1024)
+
+        return self.offset
+
+    def readline(self, size=-1):
+        if size < 0:
+            # Shortcut common case - newline found in buffer.
+            offset = self.offset - self.extrastart
+            i = self.extrabuf.find('\n', offset) + 1
+            if i > 0:
+                self.extrasize -= i - offset
+                self.offset += i - offset
+                return self.extrabuf[offset: i]
+
+            size = sys.maxint
+            readsize = self.min_readsize
+        else:
+            readsize = size
+        bufs = []
+        while size != 0:
+            c = self.read(readsize)
+            i = c.find('\n')
+
+            # We set i=size to break out of the loop under two
+            # conditions: 1) there's no newline, and the chunk is
+            # larger than size, or 2) there is a newline, but the
+            # resulting line would be longer than 'size'.
+            if (size <= i) or (i == -1 and len(c) > size):
+                i = size - 1
+
+            if i >= 0 or c == '':
+                bufs.append(c[:i + 1])    # Add portion of last chunk
+                self._unread(c[i + 1:])   # Push back rest of chunk
+                break
+
+            # Append chunk to list, decrease 'size',
+            bufs.append(c)
+            size = size - len(c)
+            readsize = min(size, readsize * 2)
+        if readsize > self.min_readsize:
+            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
+        return ''.join(bufs) # Return resulting line
+
+
+def _test():
+    # Act like gzip; with -d, act like gunzip.
+    # The input file is not deleted, however, nor are any other gzip
+    # options or features supported.
+    args = sys.argv[1:]
+    decompress = args and args[0] == "-d"
+    if decompress:
+        args = args[1:]
+    if not args:
+        args = ["-"]
+    for arg in args:
+        if decompress:
+            if arg == "-":
+                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
+                g = sys.stdout
+            else:
+                if arg[-3:] != ".gz":
+                    print "filename doesn't end in .gz:", repr(arg)
+                    continue
+                f = open(arg, "rb")
+                g = __builtin__.open(arg[:-3], "wb")
+        else:
+            if arg == "-":
+                f = sys.stdin
+                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
+            else:
+                f = __builtin__.open(arg, "rb")
+                g = open(arg + ".gz", "wb")
+        while True:
+            chunk = f.read(1024)
+            if not chunk:
+                break
+            g.write(chunk)
+        if g is not sys.stdout:
+            g.close()
+        if f is not sys.stdin:
+            f.close()
+
+if __name__ == '__main__':
+    _test()
diff --git a/lib-python/modified-2.7/sqlite3/test/regression.py b/lib-python/modified-2.7/sqlite3/test/regression.py
--- a/lib-python/modified-2.7/sqlite3/test/regression.py
+++ b/lib-python/modified-2.7/sqlite3/test/regression.py
@@ -274,6 +274,18 @@
         cur.execute("UPDATE foo SET id = 3 WHERE id = 1")
         self.assertEqual(cur.description, None)
 
+    def CheckStatementCache(self):
+        cur = self.con.cursor()
+        cur.execute("CREATE TABLE foo (id INTEGER)")
+        values = [(i,) for i in xrange(5)]
+        cur.executemany("INSERT INTO foo (id) VALUES (?)", values)
+
+        cur.execute("SELECT id FROM foo")
+        self.assertEqual(list(cur), values)
+        self.con.commit()
+        cur.execute("SELECT id FROM foo")
+        self.assertEqual(list(cur), values)
+
 def suite():
     regression_suite = unittest.makeSuite(RegressionTests, "Check")
     return unittest.TestSuite((regression_suite,))
diff --git a/lib-python/modified-2.7/tarfile.py b/lib-python/modified-2.7/tarfile.py
--- a/lib-python/modified-2.7/tarfile.py
+++ b/lib-python/modified-2.7/tarfile.py
@@ -252,8 +252,8 @@
        the high bit set. So we calculate two checksums, unsigned and
        signed.
     """
-    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
-    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
+    unsigned_chksum = 256 + sum(struct.unpack("148B8x356B", buf[:512]))
+    signed_chksum = 256 + sum(struct.unpack("148b8x356b", buf[:512]))
     return unsigned_chksum, signed_chksum
 
 def copyfileobj(src, dst, length=None):
@@ -265,7 +265,6 @@
     if length is None:
         shutil.copyfileobj(src, dst)
         return
-
     BUFSIZE = 16 * 1024
     blocks, remainder = divmod(length, BUFSIZE)
     for b in xrange(blocks):
@@ -802,19 +801,19 @@
         if self.closed:
             raise ValueError("I/O operation on closed file")
 
-        buf = ""
         if self.buffer:
             if size is None:
-                buf = self.buffer
+                buf = self.buffer + self.fileobj.read()
                 self.buffer = ""
             else:
                 buf = self.buffer[:size]
                 self.buffer = self.buffer[size:]
-
-        if size is None:
-            buf += self.fileobj.read()
+                buf += self.fileobj.read(size - len(buf))
         else:
-            buf += self.fileobj.read(size - len(buf))
+            if size is None:
+                buf = self.fileobj.read()
+            else:
+                buf = self.fileobj.read(size)
 
         self.position += len(buf)
         return buf
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -54,7 +54,8 @@
     def get_ffi_argtype(self):
         if self._ffiargtype:
             return self._ffiargtype
-        return _shape_to_ffi_type(self._ffiargshape)
+        self._ffiargtype = _shape_to_ffi_type(self._ffiargshape)
+        return self._ffiargtype
 
     def _CData_output(self, resbuffer, base=None, index=-1):
         #assert isinstance(resbuffer, _rawffi.ArrayInstance)
@@ -166,7 +167,8 @@
     return tp._alignmentofinstances()
 
 def byref(cdata):
-    from ctypes import pointer
+    # "pointer" is imported at the end of this module to avoid circular
+    # imports
     return pointer(cdata)
 
 def cdata_from_address(self, address):
@@ -224,5 +226,9 @@
     'Z' : _ffi.types.void_p,
     'X' : _ffi.types.void_p,
     'v' : _ffi.types.sshort,
+    '?' : _ffi.types.ubyte,
     }
 
+
+# used by "byref"
+from _ctypes.pointer import pointer
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -24,6 +24,7 @@
 from ctypes import c_void_p, c_int, c_double, c_int64, c_char_p, cdll
 from ctypes import POINTER, byref, string_at, CFUNCTYPE, cast
 from ctypes import sizeof, c_ssize_t
+from collections import OrderedDict
 import datetime
 import sys
 import time
@@ -274,6 +275,28 @@
 def unicode_text_factory(x):
     return unicode(x, 'utf-8')
 
+
+class StatementCache(object):
+    def __init__(self, connection, maxcount):
+        self.connection = connection
+        self.maxcount = maxcount
+        self.cache = OrderedDict()
+
+    def get(self, sql, cursor, row_factory):
+        try:
+            stat = self.cache[sql]
+        except KeyError:
+            stat = Statement(self.connection, sql)
+            self.cache[sql] = stat
+            if len(self.cache) > self.maxcount:
+                self.cache.popitem(0)
+        #
+        if stat.in_use:
+            stat = Statement(self.connection, sql)
+        stat.set_row_factory(row_factory)
+        return stat
+
+
 class Connection(object):
     def __init__(self, database, timeout=5.0, detect_types=0, isolation_level="",
                  check_same_thread=True, factory=None, cached_statements=100):
@@ -291,6 +314,7 @@
         self.row_factory = None
         self._isolation_level = isolation_level
         self.detect_types = detect_types
+        self.statement_cache = StatementCache(self, cached_statements)
 
         self.cursors = []
 
@@ -399,7 +423,7 @@
         cur = Cursor(self)
         if not isinstance(sql, (str, unicode)):
             raise Warning("SQL is of wrong type. Must be string or unicode.")
-        statement = Statement(cur, sql, self.row_factory)
+        statement = self.statement_cache.get(sql, cur, self.row_factory)
         return statement
 
     def _get_isolation_level(self):
@@ -681,6 +705,8 @@
         from sqlite3.dump import _iterdump
         return _iterdump(self)
 
+DML, DQL, DDL = range(3)
+
 class Cursor(object):
     def __init__(self, con):
         if not isinstance(con, Connection):
@@ -708,12 +734,12 @@
         if type(sql) is unicode:
             sql = sql.encode("utf-8")
         self._check_closed()
-        self.statement = Statement(self, sql, self.row_factory)
+        self.statement = self.connection.statement_cache.get(sql, self, self.row_factory)
 
         if self.connection._isolation_level is not None:
-            if self.statement.kind == "DDL":
+            if self.statement.kind == DDL:
                 self.connection.commit()
-            elif self.statement.kind == "DML":
+            elif self.statement.kind == DML:
                 self.connection._begin()
 
         self.statement.set_params(params)
@@ -724,18 +750,18 @@
             self.statement.reset()
             raise self.connection._get_exception(ret)
 
-        if self.statement.kind == "DQL"and ret == SQLITE_ROW:
+        if self.statement.kind == DQL and ret == SQLITE_ROW:
             self.statement._build_row_cast_map()
-            self.statement._readahead()
+            self.statement._readahead(self)
         else:
             self.statement.item = None
             self.statement.exhausted = True
 
-        if self.statement.kind in ("DML", "DDL"):
+        if self.statement.kind == DML or self.statement.kind == DDL:
             self.statement.reset()
 
         self.rowcount = -1
-        if self.statement.kind == "DML":
+        if self.statement.kind == DML:
             self.rowcount = sqlite.sqlite3_changes(self.connection.db)
 
         return self
@@ -746,8 +772,9 @@
         if type(sql) is unicode:
             sql = sql.encode("utf-8")
         self._check_closed()
-        self.statement = Statement(self, sql, self.row_factory)
-        if self.statement.kind == "DML":
+        self.statement = self.connection.statement_cache.get(sql, self, self.row_factory)
+
+        if self.statement.kind == DML:
             self.connection._begin()
         else:
             raise ProgrammingError, "executemany is only for DML statements"
@@ -799,7 +826,7 @@
         return self
 
     def __iter__(self):
-        return self.statement
+        return iter(self.fetchone, None)
 
     def _check_reset(self):
         if self.reset:
@@ -816,7 +843,7 @@
             return None
 
         try:
-            return self.statement.next()
+            return self.statement.next(self)
         except StopIteration:
             return None
 
@@ -830,7 +857,7 @@
         if size is None:
             size = self.arraysize
         lst = []
-        for row in self.statement:
+        for row in self:
             lst.append(row)
             if len(lst) == size:
                 break
@@ -841,7 +868,7 @@
         self._check_reset()
         if self.statement is None:
             return []
-        return list(self.statement)
+        return list(self)
 
     def _getdescription(self):
         if self._description is None:
@@ -871,22 +898,24 @@
     lastrowid = property(_getlastrowid)
 
 class Statement(object):
-    def __init__(self, cur, sql, row_factory):
+    def __init__(self, connection, sql):
         self.statement = None
         if not isinstance(sql, str):
             raise ValueError, "sql must be a string"
-        self.con = cur.connection
-        self.cur = weakref.ref(cur)
+        self.con = connection
         self.sql = sql # DEBUG ONLY
-        self.row_factory = row_factory
         first_word = self._statement_kind = sql.lstrip().split(" ")[0].upper()
         if first_word in ("INSERT", "UPDATE", "DELETE", "REPLACE"):
-            self.kind = "DML"
+            self.kind = DML
         elif first_word in ("SELECT", "PRAGMA"):
-            self.kind = "DQL"
+            self.kind = DQL
         else:
-            self.kind = "DDL"
+            self.kind = DDL
         self.exhausted = False
+        self.in_use = False
+        #
+        # set by set_row_factory
+        self.row_factory = None
 
         self.statement = c_void_p()
         next_char = c_char_p()
@@ -895,7 +924,7 @@
         if ret == SQLITE_OK and self.statement.value is None:
             # an empty statement, we work around that, as it's the least trouble
             ret = sqlite.sqlite3_prepare_v2(self.con.db, "select 42", -1, byref(self.statement), byref(next_char))
-            self.kind = "DQL"
+            self.kind = DQL
 
         if ret != SQLITE_OK:
             raise self.con._get_exception(ret)
@@ -907,6 +936,9 @@
 
         self._build_row_cast_map()
 
+    def set_row_factory(self, row_factory):
+        self.row_factory = row_factory
+
     def _build_row_cast_map(self):
         self.row_cast_map = []
         for i in xrange(sqlite.sqlite3_column_count(self.statement)):
@@ -976,6 +1008,7 @@
         ret = sqlite.sqlite3_reset(self.statement)
         if ret != SQLITE_OK:
             raise self.con._get_exception(ret)
+        self.mark_dirty()
 
         if params is None:
             if sqlite.sqlite3_bind_parameter_count(self.statement) != 0:
@@ -1006,10 +1039,7 @@
                     raise ProgrammingError("missing parameter '%s'" %param)
                 self.set_param(idx, param)
 
-    def __iter__(self):
-        return self
-
-    def next(self):
+    def next(self, cursor):
         self.con._check_closed()
         self.con._check_thread()
         if self.exhausted:
@@ -1025,10 +1055,10 @@
             sqlite.sqlite3_reset(self.statement)
             raise exc
 
-        self._readahead()
+        self._readahead(cursor)
         return item
 
-    def _readahead(self):
+    def _readahead(self, cursor):
         self.column_count = sqlite.sqlite3_column_count(self.statement)
         row = []
         for i in xrange(self.column_count):
@@ -1063,23 +1093,30 @@
 
         row = tuple(row)
         if self.row_factory is not None:
-            row = self.row_factory(self.cur(), row)
+            row = self.row_factory(cursor, row)
         self.item = row
 
     def reset(self):
         self.row_cast_map = None
-        return sqlite.sqlite3_reset(self.statement)
+        ret = sqlite.sqlite3_reset(self.statement)
+        self.in_use = False
+        self.exhausted = False
+        return ret
 
     def finalize(self):
         sqlite.sqlite3_finalize(self.statement)
         self.statement = None
+        self.in_use = False
+
+    def mark_dirty(self):
+        self.in_use = True
 
     def __del__(self):
         sqlite.sqlite3_finalize(self.statement)
         self.statement = None
 
     def _get_description(self):
-        if self.kind == "DML":
+        if self.kind == DML:
             return None
         desc = []
         for i in xrange(sqlite.sqlite3_column_count(self.statement)):
diff --git a/lib_pypy/greenlet.py b/lib_pypy/greenlet.py
--- a/lib_pypy/greenlet.py
+++ b/lib_pypy/greenlet.py
@@ -59,7 +59,12 @@
         #
         while not target:
             if not target.__started:
-                _continulet.__init__(target, _greenlet_start, *args)
+                if unbound_method != _continulet.throw:
+                    greenlet_func = _greenlet_start
+                else:
+                    greenlet_func = _greenlet_throw
+                _continulet.__init__(target, greenlet_func, *args)
+                unbound_method = _continulet.switch
                 args = ()
                 target.__started = True
                 break
@@ -136,3 +141,11 @@
         if greenlet.parent is not _tls.main:
             _continuation.permute(greenlet, greenlet.parent)
     return (res,)
+
+def _greenlet_throw(greenlet, exc, value, tb):
+    _tls.current = greenlet
+    try:
+        raise exc, value, tb
+    finally:
+        if greenlet.parent is not _tls.main:
+            _continuation.permute(greenlet, greenlet.parent)
diff --git a/lib_pypy/pyrepl/reader.py b/lib_pypy/pyrepl/reader.py
--- a/lib_pypy/pyrepl/reader.py
+++ b/lib_pypy/pyrepl/reader.py
@@ -401,13 +401,19 @@
             return "(arg: %s) "%self.arg
         if "\n" in self.buffer:
             if lineno == 0:
-                return self._ps2
+                res = self.ps2
             elif lineno == self.buffer.count("\n"):
-                return self._ps4
+                res = self.ps4
             else:
-                return self._ps3
+                res = self.ps3
         else:
-            return self._ps1
+            res = self.ps1
+        # Lazily call str() on self.psN, and cache the results using as key
+        # the object on which str() was called.  This ensures that even if the
+        # same object is used e.g. for ps1 and ps2, str() is called only once.
+        if res not in self._pscache:
+            self._pscache[res] = str(res)
+        return self._pscache[res]
 
     def push_input_trans(self, itrans):
         self.input_trans_stack.append(self.input_trans)
@@ -473,8 +479,7 @@
             self.pos = 0
             self.dirty = 1
             self.last_command = None
-            self._ps1, self._ps2, self._ps3, self._ps4 = \
-                           map(str, [self.ps1, self.ps2, self.ps3, self.ps4])
+            self._pscache = {}
         except:
             self.restore()
             raise
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -315,6 +315,28 @@
 
 .. _`Andrew Brown's tutorial`: http://morepypy.blogspot.com/2011/04/tutorial-writing-interpreter-with-pypy.html
 
+---------------------------------------------------------
+Can RPython modules for PyPy be translated independently?
+---------------------------------------------------------
+
+No, you have to rebuild the entire interpreter.  This means two things:
+
+* It is imperative to use test-driven development.  You have to test
+  exhaustively your module in pure Python, before even attempting to
+  translate it.  Once you translate it, you should have only a few typing
+  issues left to fix, but otherwise the result should work out of the box.
+
+* Second, and perhaps most important: do you have a really good reason
+  for writing the module in RPython in the first place?  Nowadays you
+  should really look at alternatives, like writing it in pure Python,
+  using ctypes if it needs to call C code.  Other alternatives are being
+  developed too (as of summer 2011), like a Cython binding.
+
+In this context it is not that important to be able to translate
+RPython modules independently of translating the complete interpreter.
+(It could be done given enough efforts, but it's a really serious
+undertaking.  Consider it as quite unlikely for now.)
+
 ----------------------------------------------------------
 Why does PyPy draw a Mandelbrot fractal while translating?
 ----------------------------------------------------------
diff --git a/pypy/doc/jit/pyjitpl5.rst b/pypy/doc/jit/pyjitpl5.rst
--- a/pypy/doc/jit/pyjitpl5.rst
+++ b/pypy/doc/jit/pyjitpl5.rst
@@ -103,7 +103,7 @@
 
 The meta-interpreter starts interpreting the JIT bytecode.  Each operation is
 executed and then recorded in a list of operations, called the trace.
-Operations can have a list of boxes that operate on, arguments.  Some operations
+Operations can have a list of boxes they operate on, arguments.  Some operations
 (like GETFIELD and GETARRAYITEM) also have special objects that describe how
 their arguments are laid out in memory.  All possible operations generated by
 tracing are listed in metainterp/resoperation.py.  When a (interpreter-level)
diff --git a/pypy/doc/stackless.rst b/pypy/doc/stackless.rst
--- a/pypy/doc/stackless.rst
+++ b/pypy/doc/stackless.rst
@@ -199,7 +199,11 @@
 The following features (present in some past Stackless version of PyPy)
 are for the time being not supported any more:
 
-* Tasklets and channels (needs to be rewritten at app-level)
+* Tasklets and channels (currently ``stackless.py`` seems to import,
+  but you have tasklets on top of coroutines on top of greenlets on
+  top of continulets on top of stacklets, and it's probably not too
+  hard to cut two of these levels by adapting ``stackless.py`` to
+  use directly continulets)
 
 * Coroutines (could be rewritten at app-level)
 
@@ -209,6 +213,13 @@
 
 * Automatic unlimited stack (must be emulated__ so far)
 
+* Support for other CPUs than x86 and x86-64
+
+* The app-level ``f_back`` field of frames crossing continulet boundaries
+  is None for now, unlike what I explain in the theoretical overview
+  above.  It mostly means that in a ``pdb.set_trace()`` you cannot go
+  ``up`` past countinulet boundaries.  This could be fixed.
+
 .. __: `recursion depth limit`_
 
 (*) Pickling, as well as changing threads, could be implemented by using
@@ -217,9 +228,8 @@
 "hard" switch (like now) when the C stack contains non-trivial C frames
 to save, and a "soft" switch (like previously) when it contains only
 simple calls from Python to Python.  Soft-switched continulets would
-also consume a bit less RAM, at the possible expense of making the
-switch a bit slower (unsure about that; what is the Stackless Python
-experience?).
+also consume a bit less RAM, and the switch might be a bit faster too
+(unsure about that; what is the Stackless Python experience?).
 
 
 Recursion depth limit
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -626,9 +626,9 @@
             self.default_compiler = compiler
             return compiler
 
-    def createframe(self, code, w_globals, closure=None):
+    def createframe(self, code, w_globals, outer_func=None):
         "Create an empty PyFrame suitable for this code object."
-        return self.FrameClass(self, code, w_globals, closure)
+        return self.FrameClass(self, code, w_globals, outer_func)
 
     def allocate_lock(self):
         """Return an interp-level Lock object if threads are enabled,
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -30,7 +30,7 @@
     can_change_code = True
     _immutable_fields_ = ['code?',
                           'w_func_globals?',
-                          'closure?',
+                          'closure?[*]',
                           'defs_w?[*]',
                           'name?']
 
@@ -96,7 +96,7 @@
             assert isinstance(code, PyCode)
             if nargs < 5:
                 new_frame = self.space.createframe(code, self.w_func_globals,
-                                                   self.closure)
+                                                   self)
                 for i in funccallunrolling:
                     if i < nargs:
                         new_frame.locals_stack_w[i] = args_w[i]
@@ -156,7 +156,7 @@
     def _flat_pycall(self, code, nargs, frame):
         # code is a PyCode
         new_frame = self.space.createframe(code, self.w_func_globals,
-                                                   self.closure)
+                                                   self)
         for i in xrange(nargs):
             w_arg = frame.peekvalue(nargs-1-i)
             new_frame.locals_stack_w[i] = w_arg
@@ -167,7 +167,7 @@
     def _flat_pycall_defaults(self, code, nargs, frame, defs_to_load):
         # code is a PyCode
         new_frame = self.space.createframe(code, self.w_func_globals,
-                                                   self.closure)
+                                                   self)
         for i in xrange(nargs):
             w_arg = frame.peekvalue(nargs-1-i)
             new_frame.locals_stack_w[i] = w_arg
diff --git a/pypy/interpreter/nestedscope.py b/pypy/interpreter/nestedscope.py
--- a/pypy/interpreter/nestedscope.py
+++ b/pypy/interpreter/nestedscope.py
@@ -8,7 +8,7 @@
 
 class Cell(Wrappable):
     "A simple container for a wrapped value."
-    
+
     def __init__(self, w_value=None):
         self.w_value = w_value
 
@@ -90,32 +90,33 @@
     #     variables coming from a parent function in which i'm nested
     # 'closure' is a list of Cell instances: the received free vars.
 
-    cells = None
-
     @jit.unroll_safe
-    def initialize_frame_scopes(self, closure, code):
-        super_initialize_frame_scopes(self, closure, code)
+    def initialize_frame_scopes(self, outer_func, code):
+        super_initialize_frame_scopes(self, outer_func, code)
         ncellvars = len(code.co_cellvars)
         nfreevars = len(code.co_freevars)
         if not nfreevars:
             if not ncellvars:
+                self.cells = []
                 return            # no self.cells needed - fast path
-            if closure is None:
-                closure = []
-        elif closure is None:
+        elif outer_func is None:
             space = self.space
             raise OperationError(space.w_TypeError,
                                  space.wrap("directly executed code object "
                                             "may not contain free variables"))
-        if len(closure) != nfreevars:
+        if outer_func and outer_func.closure:
+            closure_size = len(outer_func.closure)
+        else:
+            closure_size = 0
+        if closure_size != nfreevars:
             raise ValueError("code object received a closure with "
                                  "an unexpected number of free variables")
         self.cells = [None] * (ncellvars + nfreevars)
         for i in range(ncellvars):
             self.cells[i] = Cell()
         for i in range(nfreevars):
-            self.cells[i + ncellvars] = closure[i]
-    
+            self.cells[i + ncellvars] = outer_func.closure[i]
+
     def _getcells(self):
         return self.cells
 
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -198,7 +198,7 @@
 
     def funcrun(self, func, args):
         frame = self.space.createframe(self, func.w_func_globals,
-                                  func.closure)
+                                  func)
         sig = self._signature
         # speed hack
         fresh_frame = jit.hint(frame, access_directly=True,
@@ -211,7 +211,7 @@
 
     def funcrun_obj(self, func, w_obj, args):
         frame = self.space.createframe(self, func.w_func_globals,
-                                  func.closure)
+                                  func)
         sig = self._signature
         # speed hack
         fresh_frame = jit.hint(frame, access_directly=True,
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -51,7 +51,7 @@
     is_being_profiled        = False
     escaped                  = False  # see mark_as_escaped()
 
-    def __init__(self, space, code, w_globals, closure):
+    def __init__(self, space, code, w_globals, outer_func):
         if not we_are_translated():
             assert type(self) in (space.FrameClass, CPythonFrame), (
                 "use space.FrameClass(), not directly PyFrame()")
@@ -70,7 +70,7 @@
             self.builtin = space.builtin.pick_builtin(w_globals)
         # regular functions always have CO_OPTIMIZED and CO_NEWLOCALS.
         # class bodies only have CO_NEWLOCALS.
-        self.initialize_frame_scopes(closure, code)
+        self.initialize_frame_scopes(outer_func, code)
         self.f_lineno = code.co_firstlineno
 
     def mark_as_escaped(self):
@@ -117,8 +117,8 @@
             return self.builtin
         else:
             return self.space.builtin
-        
-    def initialize_frame_scopes(self, closure, code): 
+
+    def initialize_frame_scopes(self, outer_func, code):
         # regular functions always have CO_OPTIMIZED and CO_NEWLOCALS.
         # class bodies only have CO_NEWLOCALS.
         # CO_NEWLOCALS: make a locals dict unless optimized is also set
@@ -385,7 +385,11 @@
         
         # do not use the instance's __init__ but the base's, because we set
         # everything like cells from here
-        PyFrame.__init__(self, space, pycode, w_globals, closure)
+        # XXX hack
+        from pypy.interpreter.function import Function
+        outer_func = Function(space, None, closure=closure,
+                             forcename="fake")
+        PyFrame.__init__(self, space, pycode, w_globals, outer_func)
         f_back = space.interp_w(PyFrame, w_f_back, can_be_None=True)
         new_frame.f_backref = jit.non_virtual_ref(f_back)
 
diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -109,25 +109,19 @@
             self.getc() == self.getc(+2)):
             self.pos += 3
             while 1: # Deal with a triple quoted docstring
-                if self.getc() == '\\':
-                    self.pos += 2
+                c = self.getc()
+                if c == '\\':
+                    self.pos += 1
+                    self._skip_next_char_from_docstring()
+                elif c != endchar:
+                    self._skip_next_char_from_docstring()
                 else:
-                    c = self.getc()
-                    if c != endchar:
-                        self.pos += 1
-                        if c == '\n':
-                            self.atbol()
-                        elif c == '\r':
-                            if self.getc() == '\n':
-                                self.pos += 1
-                                self.atbol()
-                    else:
-                        self.pos += 1
-                        if (self.getc() == endchar and
-                            self.getc(+1) == endchar):
-                            self.pos += 2
-                            self.consume_empty_line()
-                            break
+                    self.pos += 1
+                    if (self.getc() == endchar and
+                        self.getc(+1) == endchar):
+                        self.pos += 2
+                        self.consume_empty_line()
+                        break
 
         else: # Deal with a single quoted docstring
             self.pos += 1
@@ -138,17 +132,21 @@
                     self.consume_empty_line()
                     return
                 elif c == '\\':
-                    # Deal with linefeeds
-                    if self.getc() != '\r':
-                        self.pos += 1
-                    else:
-                        self.pos += 1
-                        if self.getc() == '\n':
-                            self.pos += 1
+                    self._skip_next_char_from_docstring()
                 elif c in '\r\n':
                     # Syntax error
                     return
 
+    def _skip_next_char_from_docstring(self):
+        c = self.getc()
+        self.pos += 1
+        if c == '\n':
+            self.atbol()
+        elif c == '\r':
+            if self.getc() == '\n':
+                self.pos += 1
+            self.atbol()
+
     def consume_continuation(self):
         c = self.getc()
         if c in '\n\r':
diff --git a/pypy/interpreter/pyparser/test/test_futureautomaton.py b/pypy/interpreter/pyparser/test/test_futureautomaton.py
--- a/pypy/interpreter/pyparser/test/test_futureautomaton.py
+++ b/pypy/interpreter/pyparser/test/test_futureautomaton.py
@@ -221,6 +221,14 @@
     assert f.lineno == 3
     assert f.col_offset == 0
 
+def test_lots_of_continuation_lines():
+    s = "\\\n\\\n\\\n\\\n\\\n\\\n\nfrom __future__ import with_statement\n"
+    f = run(s)
+    assert f.pos == len(s)
+    assert f.flags == fut.CO_FUTURE_WITH_STATEMENT
+    assert f.lineno == 8
+    assert f.col_offset == 0
+
 # This looks like a bug in cpython parser
 # and would require extensive modifications
 # to future.py in order to emulate the same behaviour
@@ -239,3 +247,19 @@
         raise AssertionError('IndentationError not raised')
     assert f.lineno == 2
     assert f.col_offset == 0
+
+def test_continuation_lines_in_docstring_single_quoted():
+    s = '"\\\n\\\n\\\n\\\n\\\n\\\n"\nfrom  __future__ import division\n'
+    f = run(s)
+    assert f.pos == len(s)
+    assert f.flags == fut.CO_FUTURE_DIVISION
+    assert f.lineno == 8
+    assert f.col_offset == 0
+
+def test_continuation_lines_in_docstring_triple_quoted():
+    s = '"""\\\n\\\n\\\n\\\n\\\n\\\n"""\nfrom  __future__ import division\n'
+    f = run(s)
+    assert f.pos == len(s)
+    assert f.flags == fut.CO_FUTURE_DIVISION
+    assert f.lineno == 8
+    assert f.col_offset == 0
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -25,13 +25,14 @@
 class Descr(history.AbstractDescr):
 
     def __init__(self, ofs, typeinfo, extrainfo=None, name=None,
-                 arg_types=None, count_fields_if_immut=-1):
+                 arg_types=None, count_fields_if_immut=-1, ffi_flags=0):
         self.ofs = ofs
         self.typeinfo = typeinfo
         self.extrainfo = extrainfo
         self.name = name
         self.arg_types = arg_types
         self.count_fields_if_immut = count_fields_if_immut
+        self.ffi_flags = ffi_flags
 
     def get_arg_types(self):
         return self.arg_types
@@ -67,6 +68,9 @@
     def count_fields_if_immutable(self):
         return self.count_fields_if_immut
 
+    def get_ffi_flags(self):
+        return self.ffi_flags
+
     def __lt__(self, other):
         raise TypeError("cannot use comparison on Descrs")
     def __le__(self, other):
@@ -114,14 +118,14 @@
         return False
 
     def getdescr(self, ofs, typeinfo='?', extrainfo=None, name=None,
-                 arg_types=None, count_fields_if_immut=-1):
+                 arg_types=None, count_fields_if_immut=-1, ffi_flags=0):
         key = (ofs, typeinfo, extrainfo, name, arg_types,
-               count_fields_if_immut)
+               count_fields_if_immut, ffi_flags)
         try:
             return self._descrs[key]
         except KeyError:
             descr = Descr(ofs, typeinfo, extrainfo, name, arg_types,
-                          count_fields_if_immut)
+                          count_fields_if_immut, ffi_flags)
             self._descrs[key] = descr
             return descr
 
@@ -326,7 +330,7 @@
         return self.getdescr(0, token[0], extrainfo=extrainfo,
                              arg_types=''.join(arg_types))
 
-    def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo):
+    def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo, ffi_flags):
         from pypy.jit.backend.llsupport.ffisupport import get_ffi_type_kind
         from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
         arg_types = []
@@ -339,7 +343,8 @@
         except UnsupportedKind:
             return None
         return self.getdescr(0, reskind, extrainfo=extrainfo,
-                             arg_types=''.join(arg_types))
+                             arg_types=''.join(arg_types),
+                             ffi_flags=ffi_flags)
 
 
     def grab_exc_value(self):
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -260,10 +260,12 @@
     _clsname = ''
     loop_token = None
     arg_classes = ''     # <-- annotation hack
+    ffi_flags = 0
 
-    def __init__(self, arg_classes, extrainfo=None):
+    def __init__(self, arg_classes, extrainfo=None, ffi_flags=0):
         self.arg_classes = arg_classes    # string of "r" and "i" (ref/int)
         self.extrainfo = extrainfo
+        self.ffi_flags = ffi_flags
 
     def __repr__(self):
         res = '%s(%s)' % (self.__class__.__name__, self.arg_classes)
@@ -284,6 +286,13 @@
     def get_extra_info(self):
         return self.extrainfo
 
+    def get_ffi_flags(self):
+        return self.ffi_flags
+
+    def get_call_conv(self):
+        from pypy.rlib.clibffi import get_call_conv
+        return get_call_conv(self.ffi_flags, True)
+
     def get_arg_types(self):
         return self.arg_classes
 
@@ -391,8 +400,8 @@
     """
     _clsname = 'DynamicIntCallDescr'
 
-    def __init__(self, arg_classes, result_size, result_sign, extrainfo=None):
-        BaseIntCallDescr.__init__(self, arg_classes, extrainfo)
+    def __init__(self, arg_classes, result_size, result_sign, extrainfo=None, ffi_flags=0):
+        BaseIntCallDescr.__init__(self, arg_classes, extrainfo, ffi_flags)
         assert isinstance(result_sign, bool)
         self._result_size = chr(result_size)
         self._result_sign = result_sign
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -8,7 +8,7 @@
 class UnsupportedKind(Exception):
     pass
 
-def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo=None):
+def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo=None, ffi_flags=0):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
@@ -20,18 +20,24 @@
     if reskind == history.INT:
         size = intmask(ffi_result.c_size)
         signed = is_ffi_type_signed(ffi_result)
-        return DynamicIntCallDescr(arg_classes, size, signed, extrainfo)
+        return DynamicIntCallDescr(arg_classes, size, signed, extrainfo,
+                                   ffi_flags=ffi_flags)
     elif reskind == history.REF:
-        return  NonGcPtrCallDescr(arg_classes, extrainfo)
+        return  NonGcPtrCallDescr(arg_classes, extrainfo,
+                                  ffi_flags=ffi_flags)
     elif reskind == history.FLOAT:
-        return FloatCallDescr(arg_classes, extrainfo)
+        return FloatCallDescr(arg_classes, extrainfo,
+                              ffi_flags=ffi_flags)
     elif reskind == history.VOID:
-        return VoidCallDescr(arg_classes, extrainfo)
+        return VoidCallDescr(arg_classes, extrainfo,
+                             ffi_flags=ffi_flags)
     elif reskind == 'L':
-        return LongLongCallDescr(arg_classes, extrainfo)
+        return LongLongCallDescr(arg_classes, extrainfo,
+                                 ffi_flags=ffi_flags)
     elif reskind == 'S':
         SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
-        return SingleFloatCallDescr(arg_classes, extrainfo)
+        return SingleFloatCallDescr(arg_classes, extrainfo,
+                                    ffi_flags=ffi_flags)
     assert False
 
 def get_ffi_type_kind(cpu, ffi_type):
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -257,10 +257,10 @@
     def calldescrof(self, FUNC, ARGS, RESULT, extrainfo):
         return get_call_descr(self.gc_ll_descr, ARGS, RESULT, extrainfo)
 
-    def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo):
+    def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo, ffi_flags):
         from pypy.jit.backend.llsupport import ffisupport
         return ffisupport.get_call_descr_dynamic(self, ffi_args, ffi_result,
-                                                 extrainfo)
+                                                 extrainfo, ffi_flags)
 
     def get_overflow_error(self):
         ovf_vtable = self.cast_adr_to_int(self._ovf_error_vtable)
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -13,17 +13,19 @@
 
 def test_call_descr_dynamic():
     args = [types.sint, types.pointer]
-    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint)
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint, ffi_flags=42)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.arg_classes == 'ii'
+    assert descr.get_ffi_flags() == 42
 
     args = [types.sint, types.double, types.pointer]
     descr = get_call_descr_dynamic(FakeCPU(), args, types.void)
     assert descr is None    # missing floats
     descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
-                                   args, types.void)
+                                   args, types.void, ffi_flags=43)
     assert isinstance(descr, VoidCallDescr)
     assert descr.arg_classes == 'ifi'
+    assert descr.get_ffi_flags() == 43
 
     descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8)
     assert isinstance(descr, DynamicIntCallDescr)
@@ -39,14 +41,16 @@
         descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong)
         assert descr is None   # missing longlongs
         descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
-                                       [], types.slonglong)
+                                       [], types.slonglong, ffi_flags=43)
         assert isinstance(descr, LongLongCallDescr)
+        assert descr.get_ffi_flags() == 43
     else:
         assert types.slonglong is types.slong
 
     descr = get_call_descr_dynamic(FakeCPU(), [], types.float)
     assert descr is None   # missing singlefloats
     descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
-                                   [], types.float)
+                                   [], types.float, ffi_flags=44)
     SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
     assert isinstance(descr, SingleFloatCallDescr)
+    assert descr.get_ffi_flags() == 44
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -468,7 +468,7 @@
             assert longlong.getrealfloat(x) == 3.5 - 42
 
     def test_call(self):
-        from pypy.rlib.libffi import types
+        from pypy.rlib.libffi import types, FUNCFLAG_CDECL
 
         def func_int(a, b):
             return a + b
@@ -497,7 +497,8 @@
             assert res.value == 2 * num
             # then, try it with the dynamic calldescr
             dyn_calldescr = cpu.calldescrof_dynamic([ffi_type, ffi_type], ffi_type,
-                                                    EffectInfo.MOST_GENERAL)
+                                                    EffectInfo.MOST_GENERAL,
+                                                    ffi_flags=FUNCFLAG_CDECL)
             res = self.execute_operation(rop.CALL,
                                          [funcbox, BoxInt(num), BoxInt(num)],
                                          'int', descr=dyn_calldescr)
@@ -1944,7 +1945,7 @@
         assert values == [1, 10]
 
     def test_call_to_c_function(self):
-        from pypy.rlib.libffi import CDLL, types, ArgChain
+        from pypy.rlib.libffi import CDLL, types, ArgChain, FUNCFLAG_CDECL
         from pypy.rpython.lltypesystem.ll2ctypes import libc_name
         libc = CDLL(libc_name)
         c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
@@ -1955,7 +1956,8 @@
         func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
         funcbox = ConstInt(heaptracker.adr2int(func_adr))
         calldescr = cpu.calldescrof_dynamic([types.uchar], types.sint,
-                                            EffectInfo.MOST_GENERAL)
+                                            EffectInfo.MOST_GENERAL,
+                                            ffi_flags=FUNCFLAG_CDECL)
         i1 = BoxInt()
         i2 = BoxInt()
         tok = BoxInt()
@@ -2012,7 +2014,8 @@
         calldescr = cpu.calldescrof_dynamic([types.pointer, types_size_t,
                                              types_size_t, types.pointer],
                                             types.void,
-                                            EffectInfo.MOST_GENERAL)
+                                            EffectInfo.MOST_GENERAL,
+                                            ffi_flags=clibffi.FUNCFLAG_CDECL)
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
@@ -2038,6 +2041,62 @@
         assert len(glob.lst) > 0
         lltype.free(raw, flavor='raw')
 
+    def test_call_to_winapi_function(self):
+        from pypy.rlib.clibffi import _WIN32, FUNCFLAG_STDCALL
+        if not _WIN32:
+            py.test.skip("Windows test only")
+        from pypy.rlib.libffi import CDLL, types, ArgChain
+        from pypy.rlib.rwin32 import DWORD
+        libc = CDLL('KERNEL32')
+        c_GetCurrentDir = libc.getpointer('GetCurrentDirectoryA',
+                                          [types.ulong, types.pointer],
+                                          types.ulong)
+
+        cwd = os.getcwd()
+        buflen = len(cwd) + 10
+        buffer = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw')
+        argchain = ArgChain().arg(rffi.cast(DWORD, buflen)).arg(buffer)
+        res = c_GetCurrentDir.call(argchain, DWORD)
+        assert rffi.cast(lltype.Signed, res) == len(cwd)
+        assert rffi.charp2strn(buffer, buflen) == cwd
+        lltype.free(buffer, flavor='raw')
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_GetCurrentDir.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.ulong, types.pointer],
+                                            types.ulong,
+                                            EffectInfo.MOST_GENERAL,
+                                            ffi_flags=FUNCFLAG_STDCALL)
+        i1 = BoxInt()
+        i2 = BoxInt()
+        faildescr = BasicFailDescr(1)
+        # if the stdcall convention is ignored, then ESP is wrong after the
+        # call: 8 bytes too much.  If we repeat the call often enough, crash.
+        ops = []
+        for i in range(50):
+            i3 = BoxInt()
+            ops += [
+                ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i1, i2], i3,
+                             descr=calldescr),
+                ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+                ]
+            ops[-1].setfailargs([])
+        ops += [
+            ResOperation(rop.FINISH, [i3], None, descr=BasicFailDescr(0))
+        ]
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i1, i2], ops, looptoken)
+
+        buffer = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw')
+        self.cpu.set_future_value_int(0, buflen)
+        self.cpu.set_future_value_int(1, rffi.cast(lltype.Signed, buffer))
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == len(cwd)
+        assert rffi.charp2strn(buffer, buflen) == cwd
+        lltype.free(buffer, flavor='raw')
+
     def test_guard_not_invalidated(self):
         cpu = self.cpu
         i0 = BoxInt()
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -34,6 +34,7 @@
 from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
                              have_debug_prints)
 from pypy.rlib import rgc
+from pypy.rlib.clibffi import FFI_DEFAULT_ABI
 from pypy.jit.backend.x86.jump import remap_frame_layout
 from pypy.jit.metainterp.history import ConstInt, BoxInt
 from pypy.jit.codewriter.effectinfo import EffectInfo
@@ -1120,7 +1121,7 @@
         return genop_cmp_guard_float
 
     def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
-                   argtypes=None):
+                   argtypes=None, callconv=FFI_DEFAULT_ABI):
         if IS_X86_64:
             return self._emit_call_64(force_index, x, arglocs, start, argtypes)
 
@@ -1149,6 +1150,16 @@
         # x is a location
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
+        #
+        if callconv != FFI_DEFAULT_ABI:
+            self._fix_stdcall(callconv, p)
+
+    def _fix_stdcall(self, callconv, p):
+        from pypy.rlib.clibffi import FFI_STDCALL
+        assert callconv == FFI_STDCALL
+        # it's a bit stupid, but we're just going to cancel the fact that
+        # the called function just added 'p' to ESP, by subtracting it again.
+        self.mc.SUB_ri(esp.value, p)
 
     def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
         src_locs = []
@@ -2127,7 +2138,8 @@
             tmp = eax
 
         self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
-                        argtypes=op.getdescr().get_arg_types())
+                        argtypes=op.getdescr().get_arg_types(),
+                        callconv=op.getdescr().get_call_conv())
 
         if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
             # a float or a long long return
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -527,6 +527,7 @@
 
     NOP = insn('\x90')
     RET = insn('\xC3')
+    RET16_i = insn('\xC2', immediate(1, 'h'))
 
     PUSH_r = insn(rex_nw, register(1), '\x50')
     PUSH_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -433,6 +433,88 @@
                 ops_offset[operations[2]] <=
                 ops_offset[None])
 
+    def test_calling_convention(self, monkeypatch):
+        if WORD != 4:
+            py.test.skip("32-bit only test")
+        from pypy.jit.backend.x86.regloc import eax, edx
+        from pypy.jit.backend.x86 import codebuf
+        from pypy.jit.codewriter.effectinfo import EffectInfo
+        from pypy.rlib.libffi import types, clibffi
+        had_stdcall = hasattr(clibffi, 'FFI_STDCALL')
+        if not had_stdcall:    # not running on Windows, but we can still test
+            monkeypatch.setattr(clibffi, 'FFI_STDCALL', 12345, raising=False)
+        #
+        for ffi in [clibffi.FFI_DEFAULT_ABI, clibffi.FFI_STDCALL]:
+            cpu = self.cpu
+            mc = codebuf.MachineCodeBlockWrapper()
+            mc.MOV_rs(eax.value, 4)      # argument 1
+            mc.MOV_rs(edx.value, 40)     # argument 10
+            mc.SUB_rr(eax.value, edx.value)     # return arg1 - arg10
+            if ffi == clibffi.FFI_DEFAULT_ABI:
+                mc.RET()
+            else:
+                mc.RET16_i(40)
+            rawstart = mc.materialize(cpu.asmmemmgr, [])
+            #
+            calldescr = cpu.calldescrof_dynamic([types.slong] * 10,
+                                                types.slong,
+                                                EffectInfo.MOST_GENERAL,
+                                                ffi_flags=-1)
+            calldescr.get_call_conv = lambda: ffi      # <==== hack
+            funcbox = ConstInt(rawstart)
+            i1 = BoxInt()
+            i2 = BoxInt()
+            i3 = BoxInt()
+            i4 = BoxInt()
+            i5 = BoxInt()
+            i6 = BoxInt()
+            c = ConstInt(-1)
+            faildescr = BasicFailDescr(1)
+            # we must call it repeatedly: if the stack pointer gets increased
+            # by 40 bytes by the STDCALL call, and if we don't expect it,
+            # then we are going to get our stack emptied unexpectedly by
+            # several repeated calls
+            ops = [
+            ResOperation(rop.CALL_RELEASE_GIL,
+                         [funcbox, i1, c, c, c, c, c, c, c, c, i2],
+                         i3, descr=calldescr),
+            ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+
+            ResOperation(rop.CALL_RELEASE_GIL,
+                         [funcbox, i1, c, c, c, c, c, c, c, c, i2],
+                         i4, descr=calldescr),
+            ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+
+            ResOperation(rop.CALL_RELEASE_GIL,
+                         [funcbox, i1, c, c, c, c, c, c, c, c, i2],
+                         i5, descr=calldescr),
+            ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+
+            ResOperation(rop.CALL_RELEASE_GIL,
+                         [funcbox, i1, c, c, c, c, c, c, c, c, i2],
+                         i6, descr=calldescr),
+            ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+
+            ResOperation(rop.FINISH, [i3, i4, i5, i6], None,
+                         descr=BasicFailDescr(0))
+            ]
+            ops[1].setfailargs([])
+            ops[3].setfailargs([])
+            ops[5].setfailargs([])
+            ops[7].setfailargs([])
+            looptoken = LoopToken()
+            self.cpu.compile_loop([i1, i2], ops, looptoken)
+
+            self.cpu.set_future_value_int(0, 123450)
+            self.cpu.set_future_value_int(1, 123408)
+            fail = self.cpu.execute_token(looptoken)
+            assert fail.identifier == 0
+            assert self.cpu.get_latest_value_int(0) == 42
+            assert self.cpu.get_latest_value_int(1) == 42
+            assert self.cpu.get_latest_value_int(2) == 42
+            assert self.cpu.get_latest_value_int(3) == 42
+
+
 class TestDebuggingAssembler(object):
     def setup_method(self, meth):
         self.cpu = CPU(rtyper=None, stats=FakeStats())
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -91,9 +91,12 @@
     reds_v = op.args[2+numgreens:]
     assert len(reds_v) == numreds
     #
-    def _sort(args_v):
+    def _sort(args_v, is_green):
         from pypy.jit.metainterp.history import getkind
         lst = [v for v in args_v if v.concretetype is not lltype.Void]
+        if is_green:
+            assert len(lst) == len(args_v), (
+                "not supported so far: 'greens' variables contain Void")
         _kind2count = {'int': 1, 'ref': 2, 'float': 3}
         lst2 = sorted(lst, key=lambda v: _kind2count[getkind(v.concretetype)])
         # a crash here means that you have to reorder the variable named in
@@ -102,7 +105,7 @@
         assert lst == lst2
         return lst
     #
-    return (_sort(greens_v), _sort(reds_v))
+    return (_sort(greens_v, True), _sort(reds_v, False))
 
 def maybe_on_top_of_llinterp(rtyper, fnptr):
     # Run a generated graph on top of the llinterp for testing.
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -18,26 +18,27 @@
     def __init__(self, funcval, cpu, prepare_op):
         self.funcval = funcval
         self.opargs = []
-        argtypes, restype = self._get_signature(funcval)
+        argtypes, restype, flags = self._get_signature(funcval)
         self.descr = cpu.calldescrof_dynamic(argtypes, restype,
-                                             EffectInfo.MOST_GENERAL)
+                                             EffectInfo.MOST_GENERAL,
+                                             ffi_flags=flags)
         # ^^^ may be None if unsupported
         self.prepare_op = prepare_op
         self.delayed_ops = []
 
     def _get_signature(self, funcval):
         """
-        given the funcval, return a tuple (argtypes, restype), where the
-        actuall types are libffi.types.*
+        given the funcval, return a tuple (argtypes, restype, flags), where
+        the actuall types are libffi.types.*
 
         The implementation is tricky because we have three possible cases:
 
         - translated: the easiest case, we can just cast back the pointer to
-          the original Func instance and read .argtypes and .restype
+          the original Func instance and read .argtypes, .restype and .flags
 
         - completely untranslated: this is what we get from test_optimizeopt
           tests. funcval contains a FakeLLObject whose _fake_class is Func,
-          and we can just get .argtypes and .restype
+          and we can just get .argtypes, .restype and .flags
 
         - partially translated: this happens when running metainterp tests:
           funcval contains the low-level equivalent of a Func, and thus we
@@ -49,10 +50,10 @@
         llfunc = funcval.box.getref_base()
         if we_are_translated():
             func = cast_base_ptr_to_instance(Func, llfunc)
-            return func.argtypes, func.restype
+            return func.argtypes, func.restype, func.flags
         elif getattr(llfunc, '_fake_class', None) is Func:
             # untranslated
-            return llfunc.argtypes, llfunc.restype
+            return llfunc.argtypes, llfunc.restype, llfunc.flags
         else:
             # partially translated
             # llfunc contains an opaque pointer to something like the following:
@@ -63,7 +64,7 @@
             # because we don't have the exact TYPE to cast to.  Instead, we
             # just fish it manually :-(
             f = llfunc._obj.container
-            return f.inst_argtypes, f.inst_restype
+            return f.inst_argtypes, f.inst_restype, f.inst_flags
 
 
 class OptFfiCall(Optimization):
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
@@ -14,12 +14,15 @@
     can check that the signature of a call is really what you want.
     """
 
-    def __init__(self, arg_types, typeinfo):
+    def __init__(self, arg_types, typeinfo, flags):
         self.arg_types = arg_types
         self.typeinfo = typeinfo   # return type
+        self.flags = flags
 
     def __eq__(self, other):
-        return self.arg_types == other.arg_types and self.typeinfo == other.typeinfo
+        return (self.arg_types == other.arg_types and
+                self.typeinfo == other.typeinfo and
+                self.flags == other.get_ffi_flags())
 
 class FakeLLObject(object):
 
@@ -41,14 +44,17 @@
         vable_token_descr = LLtypeMixin.valuedescr
         valuedescr = LLtypeMixin.valuedescr
 
-        int_float__int = MyCallDescr('if', 'i')
+        int_float__int_42 = MyCallDescr('if', 'i', 42)
+        int_float__int_43 = MyCallDescr('if', 'i', 43)
         funcptr = FakeLLObject()
         func = FakeLLObject(_fake_class=Func,
                             argtypes=[types.sint, types.double],
-                            restype=types.sint)
+                            restype=types.sint,
+                            flags=42)
         func2 = FakeLLObject(_fake_class=Func,
                              argtypes=[types.sint, types.double],
-                             restype=types.sint)
+                             restype=types.sint,
+                             flags=43)
         #
         def calldescr(cpu, FUNC, oopspecindex, extraeffect=None):
             if extraeffect == EffectInfo.EF_RANDOM_EFFECTS:
@@ -83,7 +89,7 @@
         """
         expected = """
         [i0, f1]
-        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int_42)
         guard_not_forced() []
         guard_no_exception() []
         jump(i3, f1)
@@ -123,7 +129,7 @@
         [i0, f1, p2]
         i4 = force_token()
         setfield_gc(p2, i4, descr=vable_token_descr)
-        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int_42)
         guard_not_forced() [p2]
         guard_no_exception() [p2]
         jump(i3, f1, p2)
@@ -220,7 +226,7 @@
         call(0, ConstPtr(func),                        descr=libffi_prepare)
         #
         # this "nested" call is nicely optimized
-        i4 = call_release_gil(67890, i0, f1, descr=int_float__int)
+        i4 = call_release_gil(67890, i0, f1, descr=int_float__int_43)
         guard_not_forced() []
         guard_no_exception() []
         #
@@ -265,7 +271,7 @@
         expected = """
         [i0, f1, p2]
         setfield_gc(p2, i0, descr=valuedescr)
-        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int_42)
         guard_not_forced() []
         guard_no_exception() []
         jump(i3, f1, p2)
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -252,6 +252,41 @@
         self.check_loops({'int_sub': 1, 'int_gt': 1, 'guard_true': 1,
                           'jump': 1})
 
+    def test_void_red_variable(self):
+        mydriver = JitDriver(greens=[], reds=['a', 'm'])
+        def f1(m):
+            a = None
+            while m > 0:
+                mydriver.jit_merge_point(a=a, m=m)
+                m = m - 1
+                if m == 10:
+                    pass   # other case
+        self.meta_interp(f1, [18])
+
+    def test_bug_constant_rawptrs(self):
+        py.test.skip("crashes because a is a constant")
+        from pypy.rpython.lltypesystem import lltype, rffi
+        mydriver = JitDriver(greens=['a'], reds=['m'])
+        def f1(m):
+            a = lltype.nullptr(rffi.VOIDP.TO)
+            while m > 0:
+                mydriver.jit_merge_point(a=a, m=m)
+                m = m - 1
+        self.meta_interp(f1, [18])
+
+    def test_bug_rawptrs(self):
+        from pypy.rpython.lltypesystem import lltype, rffi
+        mydriver = JitDriver(greens=['a'], reds=['m'])
+        def f1(m):
+            a = lltype.malloc(rffi.VOIDP.TO, 5, flavor='raw')
+            while m > 0:
+                mydriver.jit_merge_point(a=a, m=m)
+                m = m - 1
+                if m == 10:
+                    pass
+            lltype.free(a, flavor='raw')
+        self.meta_interp(f1, [18])
+
 
 class TestLLWarmspot(WarmspotTests, LLJitMixin):
     CPUClass = runner.LLtypeCPU
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -130,8 +130,15 @@
     results = _find_jit_marker(graphs, 'jit_merge_point')
     if not results:
         raise Exception("no jit_merge_point found!")
+    seen = set([graph for graph, block, pos in results])
+    assert len(seen) == len(results), (
+        "found several jit_merge_points in the same graph")
     return results
 
+def locate_jit_merge_point(graph):
+    [(graph, block, pos)] = find_jit_merge_points([graph])
+    return block, pos, block.operations[pos]
+
 def find_set_param(graphs):
     return _find_jit_marker(graphs, 'set_param')
 
@@ -235,7 +242,7 @@
     def split_graph_and_record_jitdriver(self, graph, block, pos):
         op = block.operations[pos]
         jd = JitDriverStaticData()
-        jd._jit_merge_point_pos = (graph, op)
+        jd._jit_merge_point_in = graph
         args = op.args[2:]
         s_binding = self.translator.annotator.binding
         jd._portal_args_s = [s_binding(v) for v in args]
@@ -245,7 +252,8 @@
         graph.startblock = support.split_before_jit_merge_point(*jmpp)
         graph.startblock.isstartblock = True
         # a crash in the following checkgraph() means that you forgot
-        # to list some variable in greens=[] or reds=[] in JitDriver.
+        # to list some variable in greens=[] or reds=[] in JitDriver,
+        # or that a jit_merge_point() takes a constant as an argument.
         checkgraph(graph)
         for v in graph.getargs():
             assert isinstance(v, Variable)
@@ -503,7 +511,8 @@
             self.make_args_specification(jd)
 
     def make_args_specification(self, jd):
-        graph, op = jd._jit_merge_point_pos
+        graph = jd._jit_merge_point_in
+        _, _, op = locate_jit_merge_point(graph)
         greens_v, reds_v = support.decode_hp_hint_args(op)
         ALLARGS = [v.concretetype for v in (greens_v + reds_v)]
         jd._green_args_spec = [v.concretetype for v in greens_v]
@@ -551,7 +560,7 @@
             assert jitdriver in sublists, \
                    "can_enter_jit with no matching jit_merge_point"
             jd, sublist = sublists[jitdriver]
-            origportalgraph = jd._jit_merge_point_pos[0]
+            origportalgraph = jd._jit_merge_point_in
             if graph is not origportalgraph:
                 sublist.append((graph, block, index))
                 jd.no_loop_header = False
@@ -581,7 +590,7 @@
             can_enter_jits = [(jd.portal_graph, jd.portal_graph.startblock, 0)]
 
         for graph, block, index in can_enter_jits:
-            if graph is jd._jit_merge_point_pos[0]:
+            if graph is jd._jit_merge_point_in:
                 continue
 
             op = block.operations[index]
@@ -639,7 +648,7 @@
         #           while 1:
         #               more stuff
         #
-        origportalgraph = jd._jit_merge_point_pos[0]
+        origportalgraph = jd._jit_merge_point_in
         portalgraph = jd.portal_graph
         PORTALFUNC = jd._PORTAL_FUNCTYPE
 
@@ -655,11 +664,13 @@
         portalfunc_ARGS = []
         nums = {}
         for i, ARG in enumerate(PORTALFUNC.ARGS):
+            kind = history.getkind(ARG)
+            assert kind != 'void'
             if i < len(jd.jitdriver.greens):
                 color = 'green'
             else:
                 color = 'red'
-            attrname = '%s_%s' % (color, history.getkind(ARG))
+            attrname = '%s_%s' % (color, kind)
             count = nums.get(attrname, 0)
             nums[attrname] = count + 1
             portalfunc_ARGS.append((ARG, attrname, count))
@@ -791,14 +802,7 @@
         # ____________________________________________________________
         # Now mutate origportalgraph to end with a call to portal_runner_ptr
         #
-        _, op = jd._jit_merge_point_pos
-        for origblock in origportalgraph.iterblocks():
-            if op in origblock.operations:
-                break
-        else:
-            assert False, "lost the operation %r in the graph %r" % (
-                op, origportalgraph)
-        origindex = origblock.operations.index(op)
+        origblock, origindex, op = locate_jit_merge_point(origportalgraph)
         assert op.opname == 'jit_marker'
         assert op.args[0].value == 'jit_merge_point'
         greens_v, reds_v = support.decode_hp_hint_args(op)
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -124,7 +124,7 @@
     # Hash of lltype or ootype object.
     # Only supports strings, unicodes and regular instances,
     # as well as primitives that can meaningfully be cast to Signed.
-    if isinstance(TYPE, lltype.Ptr):
+    if isinstance(TYPE, lltype.Ptr) and TYPE.TO._gckind == 'gc':
         if TYPE.TO is rstr.STR or TYPE.TO is rstr.UNICODE:
             return rstr.LLHelpers.ll_strhash(x)    # assumed not null
         else:
@@ -140,7 +140,7 @@
         else:
             return 0
     else:
-        return lltype.cast_primitive(lltype.Signed, x)
+        return rffi.cast(lltype.Signed, x)
 
 @specialize.ll_and_arg(3)
 def set_future_value(cpu, j, value, typecode):
diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py
--- a/pypy/module/__builtin__/functional.py
+++ b/pypy/module/__builtin__/functional.py
@@ -292,7 +292,7 @@
                 raise
             break
         new_frame = space.createframe(code, w_func.w_func_globals,
-                                      w_func.closure)
+                                      w_func)
         new_frame.locals_stack_w[0] = w_item
         w_res = new_frame.run()
         result_w.append(w_res)
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -687,11 +687,15 @@
 # support for the "string escape" codec
 # This is a bytes-to bytes transformation
 
- at unwrap_spec(errors='str_or_None')
-def escape_encode(space, w_string, errors='strict'):
-    w_repr = space.repr(w_string)
-    w_result = space.getslice(w_repr, space.wrap(1), space.wrap(-1))
-    return space.newtuple([w_result, space.len(w_string)])
+ at unwrap_spec(data=str, errors='str_or_None')
+def escape_encode(space, data, errors='strict'):
+    from pypy.objspace.std.stringobject import string_escape_encode
+    result = string_escape_encode(data, quote="'")
+    start = 1
+    end = len(result) - 1
+    assert end >= 0
+    w_result = space.wrap(result[start:end])
+    return space.newtuple([w_result, space.wrap(len(data))])
 
 @unwrap_spec(data=str, errors='str_or_None')
 def escape_decode(space, data, errors='strict'):
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -102,7 +102,6 @@
     
     def test_indexerror(self):
         test =   "\\"     # trailing backslash
-             
         raises (ValueError, test.decode,'string-escape')
 
     def test_charmap_decode(self):
@@ -292,6 +291,10 @@
         assert '\\0f'.decode('string_escape') == chr(0) + 'f'
         assert '\\08'.decode('string_escape') == chr(0) + '8'
 
+    def test_escape_encode(self):
+        assert '"'.encode('string_escape') == '"'
+        assert "'".encode('string_escape') == "\\'"
+
     def test_decode_utf8_different_case(self):
         constant = u"a"
         assert constant.encode("utf-8") == constant.encode("UTF-8")
diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py
--- a/pypy/module/_continuation/interp_continuation.py
+++ b/pypy/module/_continuation/interp_continuation.py
@@ -43,11 +43,11 @@
     def switch(self, w_to):
         to = self.space.interp_w(W_Continulet, w_to, can_be_None=True)
         if to is not None:
-            if self is to:    # double-switch to myself: no-op
-                return get_result()
             if to.sthread is None:
                 start_state.clear()
                 raise geterror(self.space, "continulet not initialized yet")
+            if self is to:    # double-switch to myself: no-op
+                return get_result()
         if self.sthread is None:
             start_state.clear()
             raise geterror(self.space, "continulet not initialized yet")
diff --git a/pypy/module/_weakref/interp__weakref.py b/pypy/module/_weakref/interp__weakref.py
--- a/pypy/module/_weakref/interp__weakref.py
+++ b/pypy/module/_weakref/interp__weakref.py
@@ -8,24 +8,12 @@
 
 
 class WeakrefLifeline(W_Root):
+    cached_weakref_index = -1
+    cached_proxy_index = -1
+
     def __init__(self, space):
         self.space = space
         self.refs_weak = []
-        self.cached_weakref_index = -1
-        self.cached_proxy_index = -1
-
-    def __del__(self):
-        """This runs when the interp-level object goes away, and allows
-        its lifeline to go away.  The purpose of this is to activate the
-        callbacks even if there is no __del__ method on the interp-level
-        W_Root subclass implementing the object.
-        """
-        for i in range(len(self.refs_weak) - 1, -1, -1):
-            w_ref = self.refs_weak[i]()
-            if w_ref is not None and w_ref.w_callable is not None:
-                w_ref.enqueue_for_destruction(self.space,
-                                              W_WeakrefBase.activate_callback,
-                                              'weakref callback of ')
 
     def clear_all_weakrefs(self):
         """Clear all weakrefs.  This is called when an app-level object has
@@ -39,12 +27,11 @@
         # weakref callbacks are not invoked eagerly here.  They are
         # invoked by self.__del__() anyway.
 
-    @jit.dont_look_inside
-    def get_or_make_weakref(self, space, w_subtype, w_obj, w_callable):
+    def get_or_make_weakref(self, w_subtype, w_obj):
+        space = self.space
         w_weakreftype = space.gettypeobject(W_Weakref.typedef)
         is_weakreftype = space.is_w(w_weakreftype, w_subtype)
-        can_reuse = space.is_w(w_callable, space.w_None)
-        if is_weakreftype and can_reuse and self.cached_weakref_index >= 0:
+        if is_weakreftype and self.cached_weakref_index >= 0:
             w_cached = self.refs_weak[self.cached_weakref_index]()
             if w_cached is not None:
                 return w_cached
@@ -52,16 +39,15 @@
                 self.cached_weakref_index = -1
         w_ref = space.allocate_instance(W_Weakref, w_subtype)
         index = len(self.refs_weak)
-        W_Weakref.__init__(w_ref, space, w_obj, w_callable)
+        W_Weakref.__init__(w_ref, space, w_obj, None)
         self.refs_weak.append(weakref.ref(w_ref))
-        if is_weakreftype and can_reuse:
+        if is_weakreftype:
             self.cached_weakref_index = index
         return w_ref
 
-    @jit.dont_look_inside
-    def get_or_make_proxy(self, space, w_obj, w_callable):
-        can_reuse = space.is_w(w_callable, space.w_None)
-        if can_reuse and self.cached_proxy_index >= 0:
+    def get_or_make_proxy(self, w_obj):
+        space = self.space
+        if self.cached_proxy_index >= 0:
             w_cached = self.refs_weak[self.cached_proxy_index]()
             if w_cached is not None:
                 return w_cached
@@ -69,12 +55,11 @@
                 self.cached_proxy_index = -1
         index = len(self.refs_weak)
         if space.is_true(space.callable(w_obj)):
-            w_proxy = W_CallableProxy(space, w_obj, w_callable)
+            w_proxy = W_CallableProxy(space, w_obj, None)
         else:
-            w_proxy = W_Proxy(space, w_obj, w_callable)
+            w_proxy = W_Proxy(space, w_obj, None)
         self.refs_weak.append(weakref.ref(w_proxy))
-        if can_reuse:
-            self.cached_proxy_index = index
+        self.cached_proxy_index = index
         return w_proxy
 
     def get_any_weakref(self, space):
@@ -90,6 +75,45 @@
                 return w_ref
         return space.w_None
 
+
+class WeakrefLifelineWithCallbacks(WeakrefLifeline):
+
+    def __init__(self, space, oldlifeline=None):
+        self.space = space
+        if oldlifeline is None:
+            self.refs_weak = []
+        else:
+            self.refs_weak = oldlifeline.refs_weak
+
+    def __del__(self):
+        """This runs when the interp-level object goes away, and allows
+        its lifeline to go away.  The purpose of this is to activate the
+        callbacks even if there is no __del__ method on the interp-level
+        W_Root subclass implementing the object.
+        """
+        for i in range(len(self.refs_weak) - 1, -1, -1):
+            w_ref = self.refs_weak[i]()
+            if w_ref is not None and w_ref.w_callable is not None:
+                w_ref.enqueue_for_destruction(self.space,
+                                              W_WeakrefBase.activate_callback,
+                                              'weakref callback of ')
+
+    def make_weakref_with_callback(self, w_subtype, w_obj, w_callable):
+        space = self.space
+        w_ref = space.allocate_instance(W_Weakref, w_subtype)
+        W_Weakref.__init__(w_ref, space, w_obj, w_callable)
+        self.refs_weak.append(weakref.ref(w_ref))
+        return w_ref
+
+    def make_proxy_with_callback(self, w_obj, w_callable):
+        space = self.space
+        if space.is_true(space.callable(w_obj)):
+            w_proxy = W_CallableProxy(space, w_obj, w_callable)
+        else:
+            w_proxy = W_Proxy(space, w_obj, w_callable)
+        self.refs_weak.append(weakref.ref(w_proxy))
+        return w_proxy
+
 # ____________________________________________________________
 
 class Dummy:
@@ -103,8 +127,7 @@
 
 class W_WeakrefBase(Wrappable):
     def __init__(w_self, space, w_obj, w_callable):
-        if space.is_w(w_callable, space.w_None):
-            w_callable = None
+        assert w_callable is not space.w_None    # should be really None
         w_self.space = space
         assert w_obj is not None
         w_self.w_obj_weak = weakref.ref(w_obj)
@@ -177,16 +200,39 @@
     def descr__ne__(self, space, w_ref2):
         return space.not_(space.eq(self, w_ref2))
 
+def getlifeline(space, w_obj):
+    lifeline = w_obj.getweakref()
+    if lifeline is None:
+        lifeline = WeakrefLifeline(space)
+        w_obj.setweakref(space, lifeline)
+    return lifeline
+
+def getlifelinewithcallbacks(space, w_obj):
+    lifeline = w_obj.getweakref()
+    if not isinstance(lifeline, WeakrefLifelineWithCallbacks):  # or None
+        oldlifeline = lifeline
+        lifeline = WeakrefLifelineWithCallbacks(space, oldlifeline)
+        w_obj.setweakref(space, lifeline)
+    return lifeline
+
+ at jit.dont_look_inside
+def get_or_make_weakref(space, w_subtype, w_obj):
+    return getlifeline(space, w_obj).get_or_make_weakref(w_subtype, w_obj)
+
+ at jit.dont_look_inside
+def make_weakref_with_callback(space, w_subtype, w_obj, w_callable):
+    lifeline = getlifelinewithcallbacks(space, w_obj)
+    return lifeline.make_weakref_with_callback(w_subtype, w_obj, w_callable)
+
 def descr__new__weakref(space, w_subtype, w_obj, w_callable=None,
                         __args__=None):
     if __args__.arguments_w:
         raise OperationError(space.w_TypeError, space.wrap(
             "__new__ expected at most 2 arguments"))
-    lifeline = w_obj.getweakref()
-    if lifeline is None:
-        lifeline = WeakrefLifeline(space)
-        w_obj.setweakref(space, lifeline)
-    return lifeline.get_or_make_weakref(space, w_subtype, w_obj, w_callable)
+    if space.is_w(w_callable, space.w_None):
+        return get_or_make_weakref(space, w_subtype, w_obj)
+    else:
+        return make_weakref_with_callback(space, w_subtype, w_obj, w_callable)
 
 W_Weakref.typedef = TypeDef("weakref",
     __doc__ = """A weak reference to an object 'obj'.  A 'callback' can be given,
@@ -239,15 +285,23 @@
         w_obj = force(space, self)
         return space.call_args(w_obj, __args__)
 
+ at jit.dont_look_inside
+def get_or_make_proxy(space, w_obj):
+    return getlifeline(space, w_obj).get_or_make_proxy(w_obj)
+
+ at jit.dont_look_inside
+def make_proxy_with_callback(space, w_obj, w_callable):
+    lifeline = getlifelinewithcallbacks(space, w_obj)
+    return lifeline.make_proxy_with_callback(w_obj, w_callable)
+
 def proxy(space, w_obj, w_callable=None):
     """Create a proxy object that weakly references 'obj'.
 'callback', if given, is called with the proxy as an argument when 'obj'
 is about to be finalized."""
-    lifeline = w_obj.getweakref()
-    if lifeline is None:
-        lifeline = WeakrefLifeline(space)
-        w_obj.setweakref(space, lifeline)
-    return lifeline.get_or_make_proxy(space, w_obj, w_callable)
+    if space.is_w(w_callable, space.w_None):
+        return get_or_make_proxy(space, w_obj)
+    else:
+        return make_proxy_with_callback(space, w_obj, w_callable)
 
 def descr__new__proxy(space, w_subtype, w_obj, w_callable=None):
     raise OperationError(
diff --git a/pypy/module/_weakref/test/test_weakref.py b/pypy/module/_weakref/test/test_weakref.py
--- a/pypy/module/_weakref/test/test_weakref.py
+++ b/pypy/module/_weakref/test/test_weakref.py
@@ -369,6 +369,26 @@
             return A
         raises(TypeError, tryit)
 
+    def test_proxy_to_dead_object(self):
+        import _weakref, gc
+        class A(object):
+            pass
+        p = _weakref.proxy(A())
+        gc.collect()
+        raises(ReferenceError, "p + 1")
+
+    def test_proxy_with_callback(self):
+        import _weakref, gc
+        class A(object):
+            pass
+        a2 = A()
+        def callback(proxy):
+            a2.seen = proxy
+        p = _weakref.proxy(A(), callback)
+        gc.collect()
+        raises(ReferenceError, "p + 1")
+        assert a2.seen is p
+
     def test_repr(self):
         import _weakref, gc
         for kind in ('ref', 'proxy'):
diff --git a/pypy/module/bz2/interp_bz2.py b/pypy/module/bz2/interp_bz2.py
--- a/pypy/module/bz2/interp_bz2.py
+++ b/pypy/module/bz2/interp_bz2.py
@@ -446,7 +446,9 @@
             result = self.buffer[pos:pos + n]
             self.pos += n
         else:
-            result = self.buffer
+            pos = self.pos
+            assert pos >= 0
+            result = self.buffer[pos:]
             self.pos = 0
             self.buffer = ""
         self.readlength += len(result)
diff --git a/pypy/module/bz2/test/test_bz2_file.py b/pypy/module/bz2/test/test_bz2_file.py
--- a/pypy/module/bz2/test/test_bz2_file.py
+++ b/pypy/module/bz2/test/test_bz2_file.py
@@ -274,14 +274,14 @@
             pass
         del bz2f   # delete from this frame, which is captured in the traceback
 
-    def test_read_chunk10(self):
+    def test_read_chunk9(self):
         from bz2 import BZ2File
         self.create_temp_file()
         
         bz2f = BZ2File(self.temppath)
         text_read = ""
         while True:
-            data = bz2f.read(10)
+            data = bz2f.read(9) # 9 doesn't divide evenly into data length
             if not data:
                 break
             text_read = "%s%s" % (text_read, data)
diff --git a/pypy/module/cpyext/frameobject.py b/pypy/module/cpyext/frameobject.py
--- a/pypy/module/cpyext/frameobject.py
+++ b/pypy/module/cpyext/frameobject.py
@@ -57,7 +57,7 @@
     code = space.interp_w(PyCode, w_code)
     w_globals = from_ref(space, py_frame.c_f_globals)
 
-    frame = space.FrameClass(space, code, w_globals, closure=None)
+    frame = space.FrameClass(space, code, w_globals, outer_func=None)
     frame.f_lineno = py_frame.c_f_lineno
     w_obj = space.wrap(frame)
     track_reference(space, py_obj, w_obj)
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -1,42 +1,45 @@
-
 from pypy.interpreter.mixedmodule import MixedModule
 
+
 class Module(MixedModule):
-
     applevel_name = 'numpy'
 
     interpleveldefs = {
         'array': 'interp_numarray.SingleDimArray',
         'dtype': 'interp_dtype.W_Dtype',
+        'ufunc': 'interp_ufuncs.W_Ufunc',
 
         'zeros': 'interp_numarray.zeros',
         'empty': 'interp_numarray.zeros',
         'ones': 'interp_numarray.ones',
         'fromstring': 'interp_support.fromstring',
+    }
 
-        # ufuncs
-        'abs': 'interp_ufuncs.absolute',
-        'absolute': 'interp_ufuncs.absolute',
-        'add': 'interp_ufuncs.add',
-        'copysign': 'interp_ufuncs.copysign',
-        'divide': 'interp_ufuncs.divide',
-        'exp': 'interp_ufuncs.exp',
-        'fabs': 'interp_ufuncs.fabs',
-        'floor': 'interp_ufuncs.floor',
-        'maximum': 'interp_ufuncs.maximum',
-        'minimum': 'interp_ufuncs.minimum',
-        'multiply': 'interp_ufuncs.multiply',
-        'negative': 'interp_ufuncs.negative',
-        'reciprocal': 'interp_ufuncs.reciprocal',
-        'sign': 'interp_ufuncs.sign',
-        'subtract': 'interp_ufuncs.subtract',
-        'sin': 'interp_ufuncs.sin',
-        'cos': 'interp_ufuncs.cos',
-        'tan': 'interp_ufuncs.tan',
-        'arcsin': 'interp_ufuncs.arcsin',
-        'arccos': 'interp_ufuncs.arccos',
-        'arctan': 'interp_ufuncs.arctan',
-    }
+    # ufuncs
+    for exposed, impl in [
+        ("abs", "absolute"),
+        ("absolute", "absolute"),
+        ("add", "add"),
+        ("arccos", "arccos"),
+        ("arcsin", "arcsin"),
+        ("arctan", "arctan"),
+        ("copysign", "copysign"),
+        ("cos", "cos"),
+        ("divide", "divide"),
+        ("exp", "exp"),
+        ("fabs", "fabs"),
+        ("floor", "floor"),
+        ("maximum", "maximum"),
+        ("minimum", "minimum"),
+        ("multiply", "multiply"),
+        ("negative", "negative"),
+        ("reciprocal", "reciprocal"),
+        ("sign", "sign"),
+        ("sin", "sin"),
+        ("subtract", "subtract"),
+        ("tan", "tan"),
+    ]:
+        interpleveldefs[exposed] = "interp_ufuncs.get(space).%s" % impl
 
     appleveldefs = {
         'average': 'app_numpy.average',
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -20,6 +20,7 @@
 
 class FakeSpace(object):
     w_ValueError = None
+    w_TypeError = None
 
     def __init__(self):
         """NOT_RPYTHON"""
diff --git a/pypy/module/micronumpy/interp_dtype.py b/pypy/module/micronumpy/interp_dtype.py
--- a/pypy/module/micronumpy/interp_dtype.py
+++ b/pypy/module/micronumpy/interp_dtype.py
@@ -53,7 +53,9 @@
 
 VOID_TP = lltype.Ptr(lltype.Array(lltype.Void, hints={'nolength': True, "uncast_on_llgraph": True}))
 
-def create_low_level_dtype(num, kind, name, aliases, applevel_types, T, valtype):
+def create_low_level_dtype(num, kind, name, aliases, applevel_types, T, valtype,
+    expected_size=None):
+
     class Box(BaseBox):
         def __init__(self, val):
             self.val = val
@@ -113,6 +115,8 @@
     W_LowLevelDtype.aliases = aliases
     W_LowLevelDtype.applevel_types = applevel_types
     W_LowLevelDtype.num_bytes = rffi.sizeof(T)
+    if expected_size is not None:
+        assert W_LowLevelDtype.num_bytes == expected_size
     return W_LowLevelDtype
 
 
@@ -282,10 +286,21 @@
     applevel_types = [],
     T = rffi.SIGNEDCHAR,
     valtype = rffi.SIGNEDCHAR._type,
+    expected_size = 1,
 )
 class W_Int8Dtype(IntegerArithmeticDtype, W_Int8Dtype):
-    def unwrap(self, space, w_item):
-        return self.adapt_val(space.int_w(space.int(w_item)))
+    pass
+
+W_Int16Dtype = create_low_level_dtype(
+    num = 3, kind = SIGNEDLTR, name = "int16",
+    aliases = ["int16"],
+    applevel_types = [],
+    T = rffi.SHORT,
+    valtype = rffi.SHORT._type,
+    expected_size = 2,
+)
+class W_Int16Dtype(IntegerArithmeticDtype, W_Int16Dtype):
+    pass
 
 W_Int32Dtype = create_low_level_dtype(
     num = 5, kind = SIGNEDLTR, name = "int32",
@@ -293,6 +308,7 @@
     applevel_types = [],
     T = rffi.INT,
     valtype = rffi.INT._type,
+    expected_size = 4,
 )
 class W_Int32Dtype(IntegerArithmeticDtype, W_Int32Dtype):
     pass
@@ -303,6 +319,7 @@
     applevel_types = ["long"],
     T = rffi.LONGLONG,
     valtype = rffi.LONGLONG._type,
+    expected_size = 8,
 )
 class W_Int64Dtype(IntegerArithmeticDtype, W_Int64Dtype):
     pass
@@ -313,6 +330,7 @@
     applevel_types = ["float"],
     T = lltype.Float,
     valtype = float,
+    expected_size = 8,
 )
 class W_Float64Dtype(FloatArithmeticDtype, W_Float64Dtype):
     def unwrap(self, space, w_item):
@@ -323,7 +341,7 @@
 
 ALL_DTYPES = [
     W_BoolDtype,
-    W_Int8Dtype, W_Int32Dtype, W_Int64Dtype,
+    W_Int8Dtype, W_Int16Dtype, W_Int32Dtype, W_Int64Dtype,
     W_Float64Dtype
 ]
 
@@ -353,4 +371,4 @@
     kind = interp_attrproperty("kind", cls=W_Dtype),
     shape = GetSetProperty(W_Dtype.descr_get_shape),
 )
-W_Dtype.typedef.acceptable_as_base_class = False
\ No newline at end of file
+W_Dtype.typedef.acceptable_as_base_class = False
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -53,90 +53,52 @@
             i += 1
         return arr
 
-    def _unaryop_impl(w_ufunc):
+    def _unaryop_impl(ufunc_name):
         def impl(self, space):
-            return w_ufunc(space, self)
-        return func_with_new_name(impl, "unaryop_%s_impl" % w_ufunc.__name__)
+            return getattr(interp_ufuncs.get(space), ufunc_name).call(space, [self])
+        return func_with_new_name(impl, "unaryop_%s_impl" % ufunc_name)
 
-    descr_pos = _unaryop_impl(interp_ufuncs.positive)
-    descr_neg = _unaryop_impl(interp_ufuncs.negative)
-    descr_abs = _unaryop_impl(interp_ufuncs.absolute)
+    descr_pos = _unaryop_impl("positive")
+    descr_neg = _unaryop_impl("negative")
+    descr_abs = _unaryop_impl("absolute")
 
-    def _binop_impl(w_ufunc):
+    def _binop_impl(ufunc_name):
         def impl(self, space, w_other):
-            return w_ufunc(space, self, w_other)
-        return func_with_new_name(impl, "binop_%s_impl" % w_ufunc.__name__)
+            return getattr(interp_ufuncs.get(space), ufunc_name).call(space, [self, w_other])
+        return func_with_new_name(impl, "binop_%s_impl" % ufunc_name)
 
-    descr_add = _binop_impl(interp_ufuncs.add)
-    descr_sub = _binop_impl(interp_ufuncs.subtract)
-    descr_mul = _binop_impl(interp_ufuncs.multiply)
-    descr_div = _binop_impl(interp_ufuncs.divide)
-    descr_pow = _binop_impl(interp_ufuncs.power)
-    descr_mod = _binop_impl(interp_ufuncs.mod)
+    descr_add = _binop_impl("add")
+    descr_sub = _binop_impl("subtract")
+    descr_mul = _binop_impl("multiply")
+    descr_div = _binop_impl("divide")
+    descr_pow = _binop_impl("power")
+    descr_mod = _binop_impl("mod")
 
-    def _binop_right_impl(w_ufunc):
+    def _binop_right_impl(ufunc_name):
         def impl(self, space, w_other):
             w_other = scalar_w(space,
                 interp_ufuncs.find_dtype_for_scalar(space, w_other, self.find_dtype()),
                 w_other
             )
-            return w_ufunc(space, w_other, self)
-        return func_with_new_name(impl, "binop_right_%s_impl" % w_ufunc.__name__)
+            return getattr(interp_ufuncs.get(space), ufunc_name).call(space, [w_other, self])
+        return func_with_new_name(impl, "binop_right_%s_impl" % ufunc_name)
 
-    descr_radd = _binop_right_impl(interp_ufuncs.add)
-    descr_rsub = _binop_right_impl(interp_ufuncs.subtract)
-    descr_rmul = _binop_right_impl(interp_ufuncs.multiply)
-    descr_rdiv = _binop_right_impl(interp_ufuncs.divide)
-    descr_rpow = _binop_right_impl(interp_ufuncs.power)
-    descr_rmod = _binop_right_impl(interp_ufuncs.mod)
+    descr_radd = _binop_right_impl("add")
+    descr_rsub = _binop_right_impl("subtract")
+    descr_rmul = _binop_right_impl("multiply")
+    descr_rdiv = _binop_right_impl("divide")
+    descr_rpow = _binop_right_impl("power")
+    descr_rmod = _binop_right_impl("mod")
 
-    def _reduce_sum_prod_impl(op_name, init):
-        reduce_driver = jit.JitDriver(greens=['signature'],
-                         reds = ['i', 'size', 'self', 'result', 'res_dtype'])
+    def _reduce_ufunc_impl(ufunc_name):
+        def impl(self, space):
+            return getattr(interp_ufuncs.get(space), ufunc_name).descr_reduce(space, self)
+        return func_with_new_name(impl, "reduce_%s_impl" % ufunc_name)
 
-        def loop(self, res_dtype, result, size):
-            i = 0
-            while i < size:
-                reduce_driver.jit_merge_point(signature=self.signature,
-                                              self=self, res_dtype=res_dtype,
-                                              size=size, i=i, result=result)
-                result = getattr(res_dtype, op_name)(
-                    result,
-                    self.eval(i).convert_to(res_dtype)
-                )
-                i += 1
-            return result
-
-        def impl(self, space):
-            dtype = interp_ufuncs.find_unaryop_result_dtype(
-                space, self.find_dtype(), promote_to_largest=True
-            )
-            result = dtype.adapt_val(init)
-            return loop(self, dtype, result, self.find_size()).wrap(space)
-        return func_with_new_name(impl, "reduce_%s_impl" % op_name)
-
-    def _reduce_max_min_impl(op_name):
-        reduce_driver = jit.JitDriver(greens=['signature'],
-                         reds = ['i', 'size', 'self', 'result', 'dtype'])
-        def loop(self, result, size):
-            i = 1
-            dtype = self.find_dtype()
-            while i < size:
-                reduce_driver.jit_merge_point(signature=self.signature,
-                                              self=self, dtype=dtype,
-                                              size=size, i=i, result=result)
-                result = getattr(dtype, op_name)(result, self.eval(i))
-                i += 1
-            return result
-
-        def impl(self, space):
-            size = self.find_size()
-            if size == 0:
-                raise OperationError(space.w_ValueError,
-                    space.wrap("Can't call %s on zero-size arrays" \
-                            % op_name))
-            return loop(self, self.eval(0), size).wrap(space)
-        return func_with_new_name(impl, "reduce_%s_impl" % op_name)
+    descr_sum = _reduce_ufunc_impl("add")
+    descr_prod = _reduce_ufunc_impl("multiply")
+    descr_max = _reduce_ufunc_impl("maximum")
+    descr_min = _reduce_ufunc_impl("minimum")
 
     def _reduce_argmax_argmin_impl(op_name):
         reduce_driver = jit.JitDriver(greens=['signature'],
@@ -192,10 +154,6 @@
     def descr_any(self, space):
         return space.wrap(self._any())
 
-    descr_sum = _reduce_sum_prod_impl("add", 0)
-    descr_prod = _reduce_sum_prod_impl("mul", 1)
-    descr_max = _reduce_max_min_impl("max")
-    descr_min = _reduce_max_min_impl("min")
     descr_argmax = _reduce_argmax_argmin_impl("max")
     descr_argmin = _reduce_argmax_argmin_impl("min")
 
@@ -248,7 +206,7 @@
         res = "array([" + ", ".join(concrete._getnums(False)) + "]"
         dtype = concrete.find_dtype()
         if (dtype is not space.fromcache(interp_dtype.W_Float64Dtype) and
-            dtype is not space.fromcache(interp_dtype.W_Int64Dtype)):
+            dtype is not space.fromcache(interp_dtype.W_Int64Dtype)) or not self.find_size():
             res += ", dtype=" + dtype.name
         res += ")"
         return space.wrap(res)
@@ -259,7 +217,15 @@
         return space.wrap("[" + " ".join(concrete._getnums(True)) + "]")
 
     def descr_getitem(self, space, w_idx):
-        # TODO: indexing by tuples
+        # TODO: indexing by arrays and lists
+        if space.isinstance_w(w_idx, space.w_tuple):
+            length = space.len_w(w_idx)
+            if length == 0:
+                return space.wrap(self)
+            if length > 1: # only one dimension for now.
+                raise OperationError(space.w_IndexError,
+                                     space.wrap("invalid index"))
+            w_idx = space.getitem(w_idx, space.wrap(0))
         start, stop, step, slice_length = space.decode_index4(w_idx, self.find_size())
         if step == 0:
             # Single index
@@ -273,8 +239,19 @@
             return space.wrap(res)
 
     def descr_setitem(self, space, w_idx, w_value):
-        # TODO: indexing by tuples and lists
+        # TODO: indexing by arrays and lists
         self.invalidated()
+        if space.isinstance_w(w_idx, space.w_tuple):
+            length = space.len_w(w_idx)
+            if length > 1: # only one dimension for now.
+                raise OperationError(space.w_IndexError,
+                                     space.wrap("invalid index"))
+            if length == 0:
+                w_idx = space.newslice(space.wrap(0),
+                                      space.wrap(self.find_size()),
+                                      space.wrap(1))
+            else:
+                w_idx = space.getitem(w_idx, space.wrap(0))
         start, stop, step, slice_length = space.decode_index4(w_idx,
                                                               self.find_size())
         if step == 0:
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -1,57 +1,160 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.gateway import interp2app
+from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty
 from pypy.module.micronumpy import interp_dtype, signature
+from pypy.rlib import jit
 from pypy.tool.sourcetools import func_with_new_name
 
 
-def ufunc(func=None, promote_to_float=False, promote_bools=False):
-    if func is None:
-        return lambda func: ufunc(func, promote_to_float, promote_bools)
-    call_sig = signature.Call1(func)
-    def impl(space, w_obj):
+reduce_driver = jit.JitDriver(
+    greens = ["signature"],
+    reds = ["i", "size", "self", "dtype", "value", "obj"]
+)
+
+class W_Ufunc(Wrappable):
+    _attrs_ = ["name", "promote_to_float", "promote_bools", "identity"]
+
+    def __init__(self, name, promote_to_float, promote_bools, identity):
+        self.name = name
+        self.promote_to_float = promote_to_float
+        self.promote_bools = promote_bools
+
+        self.identity = identity
+
+    def descr_repr(self, space):
+        return space.wrap("<ufunc '%s'>" % self.name)
+
+    def descr_get_identity(self, space):
+        if self.identity is None:
+            return space.w_None
+        return self.identity.wrap(space)
+
+    def descr_call(self, space, __args__):
+        try:
+            args_w = __args__.fixedunpack(self.argcount)
+        except ValueError, e:
+            raise OperationError(space.w_TypeError, space.wrap(str(e)))
+        return self.call(space, args_w)
+
+    def descr_reduce(self, space, w_obj):
+        from pypy.module.micronumpy.interp_numarray import convert_to_array, Scalar
+
+        if self.argcount != 2:
+            raise OperationError(space.w_ValueError, space.wrap("reduce only "
+                "supported for binary functions"))
+
+        assert isinstance(self, W_Ufunc2)
+        obj = convert_to_array(space, w_obj)
+        if isinstance(obj, Scalar):
+            raise OperationError(space.w_TypeError, space.wrap("cannot reduce "
+                "on a scalar"))
+
+        size = obj.find_size()
+        dtype = find_unaryop_result_dtype(
+            space, obj.find_dtype(),
+            promote_to_largest=True
+        )
+        start = 0
+        if self.identity is None:
+            if size == 0:
+                raise operationerrfmt(space.w_ValueError, "zero-size array to "
+                    "%s.reduce without identity", self.name)
+            value = obj.eval(0).convert_to(dtype)
+            start += 1
+        else:
+            value = self.identity.convert_to(dtype)
+        new_sig = signature.Signature.find_sig([
+            self.reduce_signature, obj.signature
+        ])
+        return self.reduce(new_sig, start, value, obj, dtype, size).wrap(space)
+
+    def reduce(self, signature, start, value, obj, dtype, size):
+        i = start
+        while i < size:
+            reduce_driver.jit_merge_point(signature=signature, self=self,
+                                          value=value, obj=obj, i=i,
+                                          dtype=dtype, size=size)
+            value = self.func(dtype, value, obj.eval(i).convert_to(dtype))
+            i += 1
+        return value
+
+class W_Ufunc1(W_Ufunc):
+    argcount = 1
+
+    def __init__(self, func, name, promote_to_float=False, promote_bools=False,
+        identity=None):
+
+        W_Ufunc.__init__(self, name, promote_to_float, promote_bools, identity)
+        self.func = func
+        self.signature = signature.Call1(func)
+
+    def call(self, space, args_w):
         from pypy.module.micronumpy.interp_numarray import (Call1,
             convert_to_array, Scalar)
 
+        [w_obj] = args_w
         w_obj = convert_to_array(space, w_obj)
         res_dtype = find_unaryop_result_dtype(space,
             w_obj.find_dtype(),
-            promote_to_float=promote_to_float,
-            promote_bools=promote_bools,
+            promote_to_float=self.promote_to_float,
+            promote_bools=self.promote_bools,
         )
         if isinstance(w_obj, Scalar):
-            return func(res_dtype, w_obj.value.convert_to(res_dtype)).wrap(space)
+            return self.func(res_dtype, w_obj.value.convert_to(res_dtype)).wrap(space)
 
-        new_sig = signature.Signature.find_sig([call_sig, w_obj.signature])
+        new_sig = signature.Signature.find_sig([self.signature, w_obj.signature])
         w_res = Call1(new_sig, res_dtype, w_obj)
         w_obj.add_invalidates(w_res)
         return w_res
-    return func_with_new_name(impl, "%s_dispatcher" % func.__name__)
 
-def ufunc2(func=None, promote_to_float=False, promote_bools=False):
-    if func is None:
-        return lambda func: ufunc2(func, promote_to_float, promote_bools)
 
-    call_sig = signature.Call2(func)
-    def impl(space, w_lhs, w_rhs):
+class W_Ufunc2(W_Ufunc):
+    argcount = 2
+
+    def __init__(self, func, name, promote_to_float=False, promote_bools=False,
+        identity=None):
+
+        W_Ufunc.__init__(self, name, promote_to_float, promote_bools, identity)
+        self.func = func
+        self.signature = signature.Call2(func)
+        self.reduce_signature = signature.BaseSignature()
+
+    def call(self, space, args_w):
         from pypy.module.micronumpy.interp_numarray import (Call2,
             convert_to_array, Scalar)
 
+        [w_lhs, w_rhs] = args_w
         w_lhs = convert_to_array(space, w_lhs)
         w_rhs = convert_to_array(space, w_rhs)
         res_dtype = find_binop_result_dtype(space,
             w_lhs.find_dtype(), w_rhs.find_dtype(),
-            promote_to_float=promote_to_float,
-            promote_bools=promote_bools,
+            promote_to_float=self.promote_to_float,
+            promote_bools=self.promote_bools,
         )
         if isinstance(w_lhs, Scalar) and isinstance(w_rhs, Scalar):
-            return func(res_dtype, w_lhs.value, w_rhs.value).wrap(space)
+            return self.func(res_dtype, w_lhs.value, w_rhs.value).wrap(space)
 
         new_sig = signature.Signature.find_sig([
-            call_sig, w_lhs.signature, w_rhs.signature
+            self.signature, w_lhs.signature, w_rhs.signature
         ])
         w_res = Call2(new_sig, res_dtype, w_lhs, w_rhs)
         w_lhs.add_invalidates(w_res)
         w_rhs.add_invalidates(w_res)
         return w_res
-    return func_with_new_name(impl, "%s_dispatcher" % func.__name__)
+
+
+W_Ufunc.typedef = TypeDef("ufunc",
+    __module__ = "numpy",
+
+    __call__ = interp2app(W_Ufunc.descr_call),
+    __repr__ = interp2app(W_Ufunc.descr_repr),
+
+    identity = GetSetProperty(W_Ufunc.descr_get_identity),
+    nin = interp_attrproperty("argcount", cls=W_Ufunc),
+
+    reduce = interp2app(W_Ufunc.descr_reduce),
+)
 
 def find_binop_result_dtype(space, dt1, dt2, promote_to_float=False,
     promote_bools=False):
@@ -74,7 +177,7 @@
     assert False
 
 def find_unaryop_result_dtype(space, dt, promote_to_float=False,
-    promote_to_largest=False, promote_bools=False):
+    promote_bools=False, promote_to_largest=False):
     if promote_bools and (dt.kind == interp_dtype.BOOLLTR):
         return space.fromcache(interp_dtype.W_Int8Dtype)
     if promote_to_float:
@@ -106,53 +209,65 @@
     return space.fromcache(interp_dtype.W_Float64Dtype)
 
 
-def ufunc_dtype_caller(ufunc_name, op_name, argcount, **kwargs):
+def ufunc_dtype_caller(ufunc_name, op_name, argcount):
     if argcount == 1:
-        @ufunc(**kwargs)
         def impl(res_dtype, value):
             return getattr(res_dtype, op_name)(value)
     elif argcount == 2:
-        @ufunc2(**kwargs)
         def impl(res_dtype, lvalue, rvalue):
             return getattr(res_dtype, op_name)(lvalue, rvalue)
     return func_with_new_name(impl, ufunc_name)
 
-for ufunc_def in [
-    ("add", "add", 2),
-    ("subtract", "sub", 2),
-    ("multiply", "mul", 2),
-    ("divide", "div", 2, {"promote_bools": True}),
-    ("mod", "mod", 2, {"promote_bools": True}),
-    ("power", "pow", 2, {"promote_bools": True}),
+class UfuncState(object):
+    def __init__(self, space):
+        "NOT_RPYTHON"
+        for ufunc_def in [
+            ("add", "add", 2, {"identity": 0}),
+            ("subtract", "sub", 2),
+            ("multiply", "mul", 2, {"identity": 1}),
+            ("divide", "div", 2, {"promote_bools": True}),
+            ("mod", "mod", 2, {"promote_bools": True}),
+            ("power", "pow", 2, {"promote_bools": True}),
 
-    ("maximum", "max", 2),
-    ("minimum", "min", 2),
+            ("maximum", "max", 2),
+            ("minimum", "min", 2),
 
-    ("copysign", "copysign", 2, {"promote_to_float": True}),
+            ("copysign", "copysign", 2, {"promote_to_float": True}),
 
-    ("positive", "pos", 1),
-    ("negative", "neg", 1),
-    ("absolute", "abs", 1),
-    ("sign", "sign", 1, {"promote_bools": True}),
-    ("reciprocal", "reciprocal", 1),
+            ("positive", "pos", 1),
+            ("negative", "neg", 1),
+            ("absolute", "abs", 1),
+            ("sign", "sign", 1, {"promote_bools": True}),
+            ("reciprocal", "reciprocal", 1),
 
-    ("fabs", "fabs", 1, {"promote_to_float": True}),
-    ("floor", "floor", 1, {"promote_to_float": True}),
-    ("exp", "exp", 1, {"promote_to_float": True}),
+            ("fabs", "fabs", 1, {"promote_to_float": True}),
+            ("floor", "floor", 1, {"promote_to_float": True}),
+            ("exp", "exp", 1, {"promote_to_float": True}),
 
-    ("sin", "sin", 1, {"promote_to_float": True}),
-    ("cos", "cos", 1, {"promote_to_float": True}),
-    ("tan", "tan", 1, {"promote_to_float": True}),
-    ("arcsin", "arcsin", 1, {"promote_to_float": True}),
-    ("arccos", "arccos", 1, {"promote_to_float": True}),
-    ("arctan", "arctan", 1, {"promote_to_float": True}),
-]:
-    ufunc_name = ufunc_def[0]
-    op_name = ufunc_def[1]
-    argcount = ufunc_def[2]
-    try:
-        extra_kwargs = ufunc_def[3]
-    except IndexError:
-        extra_kwargs = {}
+            ("sin", "sin", 1, {"promote_to_float": True}),
+            ("cos", "cos", 1, {"promote_to_float": True}),
+            ("tan", "tan", 1, {"promote_to_float": True}),
+            ("arcsin", "arcsin", 1, {"promote_to_float": True}),
+            ("arccos", "arccos", 1, {"promote_to_float": True}),
+            ("arctan", "arctan", 1, {"promote_to_float": True}),
+        ]:
+            self.add_ufunc(space, *ufunc_def)
 
-    globals()[ufunc_name] = ufunc_dtype_caller(ufunc_name, op_name, argcount, **extra_kwargs)
+    def add_ufunc(self, space, ufunc_name, op_name, argcount, extra_kwargs=None):
+        if extra_kwargs is None:
+            extra_kwargs = {}
+
+        identity = extra_kwargs.get("identity")
+        if identity is not None:
+            identity = space.fromcache(interp_dtype.W_Int64Dtype).adapt_val(identity)
+        extra_kwargs["identity"] = identity
+
+        func = ufunc_dtype_caller(ufunc_name, op_name, argcount)
+        if argcount == 1:
+            ufunc = W_Ufunc1(func, ufunc_name, **extra_kwargs)
+        elif argcount == 2:
+            ufunc = W_Ufunc2(func, ufunc_name, **extra_kwargs)
+        setattr(self, ufunc_name, ufunc)
+
+def get(space):
+    return space.fromcache(UfuncState)
\ No newline at end of file
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -82,10 +82,20 @@
             assert a[1] == 1
 
     def test_add_int8(self):
-        from numpy import array
+        from numpy import array, dtype
 
         a = array(range(5), dtype="int8")
         b = a + a
+        assert b.dtype is dtype("int8")
+        for i in range(5):
+            assert b[i] == i * 2
+
+    def test_add_int16(self):
+        from numpy import array, dtype
+
+        a = array(range(5), dtype="int16")
+        b = a + a
+        assert b.dtype is dtype("int16")
         for i in range(5):
             assert b[i] == i * 2
 
@@ -98,4 +108,4 @@
         from numpy import dtype
 
         # You can't subclass dtype
-        raises(TypeError, type, "Foo", (dtype,), {})
\ No newline at end of file
+        raises(TypeError, type, "Foo", (dtype,), {})
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -52,10 +52,14 @@
         from numpy import array, zeros
         a = array(range(5), float)
         assert repr(a) == "array([0.0, 1.0, 2.0, 3.0, 4.0])"
+        a = array([], float)
+        assert repr(a) == "array([], dtype=float64)"
         a = zeros(1001)
         assert repr(a) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
         a = array(range(5), long)
         assert repr(a) == "array([0, 1, 2, 3, 4])"
+        a = array([], long)
+        assert repr(a) == "array([], dtype=int64)"
         a = array([True, False, True, False], "?")
         assert repr(a) == "array([True, False, True, False], dtype=bool)"
 
@@ -84,6 +88,9 @@
         a = array(range(5), dtype="int8")
         assert str(a) == "[0 1 2 3 4]"
 
+        a = array(range(5), dtype="int16")
+        assert str(a) == "[0 1 2 3 4]"
+
     def test_str_slice(self):
         from numpy import array, zeros
         a = array(range(5), float)
@@ -102,6 +109,16 @@
         assert a[-1] == 8
         raises(IndexError, "a[-6]")
 
+    def test_getitem_tuple(self):
+        from numpy import array
+        a = array(range(5))
+        raises(IndexError, "a[(1,2)]")
+        for i in xrange(5):
+            assert a[(i,)] == i
+        b = a[()]
+        for i in xrange(5):
+            assert a[i] == b[i]
+
     def test_setitem(self):
         from numpy import array
         a = array(range(5))
@@ -110,6 +127,17 @@
         raises(IndexError, "a[5] = 0.0")
         raises(IndexError, "a[-6] = 3.0")
 
+    def test_setitem_tuple(self):
+        from numpy import array
+        a = array(range(5))
+        raises(IndexError, "a[(1,2)] = [0,1]")
+        for i in xrange(5):
+            a[(i,)] = i+1
+            assert a[i] == i+1
+        a[()] = range(5)
+        for i in xrange(5):
+            assert a[i] == i
+
     def test_setslice_array(self):
         from numpy import array
         a = array(range(5))
@@ -541,4 +569,4 @@
         a = fromstring(self.data)
         for i in range(4):
             assert a[i] == i + 1
-        raises(ValueError, fromstring, "abc")
\ No newline at end of file
+        raises(ValueError, fromstring, "abc")
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -3,6 +3,32 @@
 
 
 class AppTestUfuncs(BaseNumpyAppTest):
+    def test_ufunc_instance(self):
+        from numpy import add, ufunc
+
+        assert isinstance(add, ufunc)
+        assert repr(add) == "<ufunc 'add'>"
+        assert repr(ufunc) == "<type 'numpy.ufunc'>"
+
+    def test_ufunc_attrs(self):
+        from numpy import add, multiply, sin
+
+        assert add.identity == 0
+        assert multiply.identity == 1
+        assert sin.identity is None
+
+        assert add.nin == 2
+        assert multiply.nin == 2
+        assert sin.nin == 1
+
+    def test_wrong_arguments(self):
+        from numpy import add, sin
+
+        raises(TypeError, add, 1)
+        raises(TypeError, add, 1, 2, 3)
+        raises(TypeError, sin, 1, 2)
+        raises(TypeError, sin)
+
     def test_single_item(self):
         from numpy import negative, sign, minimum
 
@@ -272,3 +298,16 @@
         b = arctan(a)
         assert math.isnan(b[0])
 
+    def test_reduce_errors(self):
+        from numpy import sin, add
+
+        raises(ValueError, sin.reduce, [1, 2, 3])
+        raises(TypeError, add.reduce, 1)
+
+    def test_reduce(self):
+        from numpy import add, maximum
+
+        assert add.reduce([1, 2, 3]) == 6
+        assert maximum.reduce([1]) == 1
+        assert maximum.reduce([1, 2, 3]) == 3
+        raises(ValueError, maximum.reduce, [])
\ No newline at end of file
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -19,7 +19,7 @@
     def test_add(self):
         def f(i):
             ar = SingleDimArray(i, dtype=self.float64_dtype)
-            v = interp_ufuncs.add(self.space, ar, ar)
+            v = interp_ufuncs.get(self.space).add.call(self.space, [ar, ar])
             return v.get_concrete().eval(3).val
 
         result = self.meta_interp(f, [5], listops=True, backendopt=True)
@@ -31,9 +31,10 @@
     def test_floatadd(self):
         def f(i):
             ar = SingleDimArray(i, dtype=self.float64_dtype)
-            v = interp_ufuncs.add(self.space,
-                ar,
-                scalar_w(self.space, self.float64_dtype, self.space.wrap(4.5))
+            v = interp_ufuncs.get(self.space).add.call(self.space, [
+                    ar,
+                    scalar_w(self.space, self.float64_dtype, self.space.wrap(4.5))
+                ],
             )
             assert isinstance(v, BaseArray)
             return v.get_concrete().eval(3).val
@@ -89,14 +90,21 @@
     def test_max(self):
         space = self.space
         float64_dtype = self.float64_dtype
+        int64_dtype = self.int64_dtype
 
         def f(i):
-            ar = SingleDimArray(i, dtype=NonConstant(float64_dtype))
+            if NonConstant(False):
+                dtype = int64_dtype
+            else:
+                dtype = float64_dtype
+            ar = SingleDimArray(i, dtype=dtype)
             j = 0
             while j < i:
                 ar.get_concrete().setitem(j, float64_dtype.box(float(j)))
                 j += 1
-            return ar.descr_add(space, ar).descr_max(space).floatval
+            v = ar.descr_add(space, ar).descr_max(space)
+            assert isinstance(v, FloatObject)
+            return v.floatval
 
         result = self.meta_interp(f, [5], listops=True, backendopt=True)
         self.check_loops({"getarrayitem_raw": 2, "float_add": 1,
@@ -108,14 +116,21 @@
     def test_min(self):
         space = self.space
         float64_dtype = self.float64_dtype
+        int64_dtype = self.int64_dtype
 
         def f(i):
-            ar = SingleDimArray(i, dtype=NonConstant(float64_dtype))
+            if NonConstant(False):
+                dtype = int64_dtype
+            else:
+                dtype = float64_dtype
+            ar = SingleDimArray(i, dtype=dtype)
             j = 0
             while j < i:
                 ar.get_concrete().setitem(j, float64_dtype.box(float(j)))
                 j += 1
-            return ar.descr_add(space, ar).descr_min(space).floatval
+            v = ar.descr_add(space, ar).descr_min(space)
+            assert isinstance(v, FloatObject)
+            return v.floatval
 
         result = self.meta_interp(f, [5], listops=True, backendopt=True)
         self.check_loops({"getarrayitem_raw": 2, "float_add": 1,
@@ -180,9 +195,9 @@
 
         def f(i):
             ar = SingleDimArray(i, dtype=self.float64_dtype)
-            v1 = interp_ufuncs.add(space, ar, scalar_w(space, self.float64_dtype, space.wrap(4.5)))
+            v1 = interp_ufuncs.get(self.space).add.call(space, [ar, scalar_w(space, self.float64_dtype, space.wrap(4.5))])
             assert isinstance(v1, BaseArray)
-            v2 = interp_ufuncs.multiply(space, v1, scalar_w(space, self.float64_dtype, space.wrap(4.5)))
+            v2 = interp_ufuncs.get(self.space).multiply.call(space, [v1, scalar_w(space, self.float64_dtype, space.wrap(4.5))])
             v1.force_if_needed()
             assert isinstance(v2, BaseArray)
             return v2.get_concrete().eval(3).val
@@ -200,8 +215,8 @@
         space = self.space
         def f(i):
             ar = SingleDimArray(i, dtype=self.float64_dtype)
-            v1 = interp_ufuncs.add(space, ar, ar)
-            v2 = interp_ufuncs.negative(space, v1)
+            v1 = interp_ufuncs.get(self.space).add.call(space, [ar, ar])
+            v2 = interp_ufuncs.get(self.space).negative.call(space, [v1])
             return v2.get_concrete().eval(3).val
 
         result = self.meta_interp(f, [5], listops=True, backendopt=True)
@@ -216,13 +231,13 @@
         def f(i):
             ar = SingleDimArray(i, dtype=self.float64_dtype)
 
-            v1 = interp_ufuncs.add(space, ar, ar)
-            v2 = interp_ufuncs.negative(space, v1)
+            v1 = interp_ufuncs.get(self.space).add.call(space, [ar, ar])
+            v2 = interp_ufuncs.get(self.space).negative.call(space, [v1])
             v2.get_concrete()
 
             for i in xrange(5):
-                v1 = interp_ufuncs.multiply(space, ar, ar)
-                v2 = interp_ufuncs.negative(space, v1)
+                v1 = interp_ufuncs.get(self.space).multiply.call(space, [ar, ar])
+                v2 = interp_ufuncs.get(self.space).negative.call(space, [v1])
                 v2.get_concrete()
 
         self.meta_interp(f, [5], listops=True, backendopt=True)
@@ -237,7 +252,7 @@
                 SingleDimSlice.signature, ar.signature
             ])
             s = SingleDimSlice(0, step*i, step, i, ar, new_sig)
-            v = interp_ufuncs.add(self.space, s, s)
+            v = interp_ufuncs.get(self.space).add.call(self.space, [s, s])
             return v.get_concrete().eval(3).val
 
         result = self.meta_interp(f, [5], listops=True, backendopt=True)
@@ -259,7 +274,7 @@
                 SingleDimSlice.signature, s1.signature
             ])
             s2 = SingleDimSlice(0, step2*i, step2, i, ar, new_sig)
-            v = interp_ufuncs.add(self.space, s1, s2)
+            v = interp_ufuncs.get(self.space).add.call(self.space, [s1, s2])
             return v.get_concrete().eval(3).val
 
         result = self.meta_interp(f, [5], listops=True, backendopt=True)
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -21,6 +21,7 @@
 
 PyFrame._virtualizable2_ = ['last_instr', 'pycode',
                             'valuestackdepth', 'locals_stack_w[*]',
+                            'cells[*]',
                             'last_exception',
                             'lastblock',
                             'is_being_profiled',
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -8,7 +8,8 @@
                 modname == '__builtin__.interp_classobj' or
                 modname == '__builtin__.functional' or
                 modname == '__builtin__.descriptor' or
-                modname == 'thread.os_local'):
+                modname == 'thread.os_local' or
+                modname == 'thread.os_thread'):
             return True
         if '.' in modname:
             modname, _ = modname.split('.', 1)
diff --git a/pypy/module/pypyjit/test/test_policy.py b/pypy/module/pypyjit/test/test_policy.py
--- a/pypy/module/pypyjit/test/test_policy.py
+++ b/pypy/module/pypyjit/test/test_policy.py
@@ -34,7 +34,9 @@
 
 def test_thread_local():
     from pypy.module.thread.os_local import Local
+    from pypy.module.thread.os_thread import get_ident
     assert pypypolicy.look_inside_function(Local.getdict.im_func)
+    assert pypypolicy.look_inside_function(get_ident)
 
 def test_pypy_module():
     from pypy.module._collections.interp_deque import W_Deque
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
--- a/pypy/module/pypyjit/test_pypy_c/test_call.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -174,7 +174,7 @@
             guard_no_overflow(descr=...)
             i18 = force_token()
             --TICK--
-            jump(p0, p1, p2, p3, p4, i8, p7, i17, p8, i9, i17, p10, p11, p12, descr=<Loop0>)
+            jump(..., descr=<Loop0>)
         """)
 
     def test_default_and_kw(self):
@@ -396,3 +396,70 @@
             --TICK--
             jump(..., descr=<Loop0>)
         """)
+
+    def test_global_closure_has_constant_cells(self):
+        log = self.run("""
+            def make_adder(n):
+                def add(x):
+                    return x + n
+                return add
+            add5 = make_adder(5)
+            def main():
+                i = 0
+                while i < 5000:
+                    i = add5(i) # ID: call
+            """, [])
+        loop, = log.loops_by_id('call', is_entry_bridge=True)
+        assert loop.match("""
+            guard_value(i6, 1, descr=...)
+            guard_nonnull_class(p8, ConstClass(W_IntObject), descr=...)
+            guard_value(i4, 0, descr=...)
+            guard_value(p3, ConstPtr(ptr14), descr=...)
+            i15 = getfield_gc_pure(p8, descr=<SignedFieldDescr pypy.objspace.std.intobject.W_IntObject.inst_intval .*>)
+            i17 = int_lt(i15, 5000)
+            guard_true(i17, descr=...)
+            p18 = getfield_gc(p0, descr=<GcPtrFieldDescr pypy.interpreter.eval.Frame.inst_w_globals .*>)
+            guard_value(p18, ConstPtr(ptr19), descr=...)
+            p20 = getfield_gc(p18, descr=<GcPtrFieldDescr pypy.objspace.std.dictmultiobject.W_DictMultiObject.inst_strategy .*>)
+            guard_value(p20, ConstPtr(ptr21), descr=...)
+            guard_not_invalidated(descr=...)
+            # most importantly, there is no getarrayitem_gc here
+            p23 = call(ConstClass(getexecutioncontext), descr=<GcPtrCallDescr>)
+            p24 = getfield_gc(p23, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
+            i25 = force_token()
+            p26 = getfield_gc(p23, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
+            guard_isnull(p26, descr=...)
+            i27 = getfield_gc(p23, descr=<NonGcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_profilefunc .*>)
+            i28 = int_is_zero(i27)
+            guard_true(i28, descr=...)
+            p30 = getfield_gc(ConstPtr(ptr29), descr=<GcPtrFieldDescr pypy.interpreter.nestedscope.Cell.inst_w_value .*>)
+            guard_nonnull_class(p30, ConstClass(W_IntObject), descr=...)
+            i32 = getfield_gc_pure(p30, descr=<SignedFieldDescr pypy.objspace.std.intobject.W_IntObject.inst_intval .*>)
+            i33 = int_add_ovf(i15, i32)
+            guard_no_overflow(descr=...)
+            --TICK--
+            jump(p0, p1, p2, p5, i33, i32, p23, p30, p24, descr=<Loop0>)
+        """)
+
+    def test_local_closure_is_virtual(self):
+        log = self.run("""
+            def main():
+                i = 0
+                while i < 5000:
+                    def add():
+                        return i + 1
+                    i = add() # ID: call
+            """, [])
+        loop, = log.loops_by_id('call')
+        assert loop.match("""
+            i8 = getfield_gc_pure(p6, descr=<SignedFieldDescr pypy.objspace.std.intobject.W_IntObject.inst_intval .*>)
+            i10 = int_lt(i8, 5000)
+            guard_true(i10, descr=...)
+            i11 = force_token()
+            i13 = int_add(i8, 1)
+            --TICK--
+            p22 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p22, i13, descr=<SignedFieldDescr pypy.objspace.std.intobject.W_IntObject.inst_intval .*>)
+            setfield_gc(p4, p22, descr=<GcPtrFieldDescr pypy.interpreter.nestedscope.Cell.inst_w_value .*>)
+            jump(p0, p1, p2, p3, p4, p7, p22, p7, descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_globals.py b/pypy/module/pypyjit/test_pypy_c/test_globals.py
--- a/pypy/module/pypyjit/test_pypy_c/test_globals.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_globals.py
@@ -23,6 +23,4 @@
             guard_not_invalidated(descr=...)
             p19 = getfield_gc(ConstPtr(p17), descr=<GcPtrFieldDescr .*W_DictMultiObject.inst_strategy .*>)
             guard_value(p19, ConstPtr(ptr20), descr=...)
-            p22 = getfield_gc(ConstPtr(ptr21), descr=<GcPtrFieldDescr .*ModuleCell.inst_w_value .*>)
-            guard_nonnull(p22, descr=...)
-        """)
+        """)
\ No newline at end of file
diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py
--- a/pypy/module/pypyjit/test_pypy_c/test_instance.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py
@@ -181,8 +181,7 @@
         assert loop.match_by_id("contains", """
             guard_not_invalidated(descr=...)
             i11 = force_token()
-            i12 = int_add_ovf(i5, i7)
-            guard_no_overflow(descr=...)
+            i12 = int_add(i5, 1)
         """)
 
     def test_id_compare_optimization(self):
diff --git a/pypy/module/sys/test/test_encoding.py b/pypy/module/sys/test/test_encoding.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/sys/test/test_encoding.py
@@ -0,0 +1,30 @@
+import os, py
+from pypy.rlib import rlocale
+from pypy.module.sys.interp_encoding import _getfilesystemencoding
+from pypy.module.sys.interp_encoding import base_encoding
+
+
+def test__getfilesystemencoding(space):
+    if not (rlocale.HAVE_LANGINFO and rlocale.CODESET):
+        py.test.skip("requires HAVE_LANGINFO and CODESET")
+
+    def clear():
+        for key in os.environ.keys():
+            if key == 'LANG' or key.startswith('LC_'):
+                del os.environ[key]
+
+    def get(**env):
+        original_env = os.environ.copy()
+        try:
+            clear()
+            os.environ.update(env)
+            return _getfilesystemencoding(space)
+        finally:
+            clear()
+            os.environ.update(original_env)
+
+    assert get() in (base_encoding, 'ANSI_X3.4-1968')
+    assert get(LANG='foobar') in (base_encoding, 'ANSI_X3.4-1968')
+    assert get(LANG='en_US.UTF-8') == 'UTF-8'
+    assert get(LC_ALL='en_US.UTF-8') == 'UTF-8'
+    assert get(LC_CTYPE='en_US.UTF-8') == 'UTF-8'
diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py
--- a/pypy/module/sys/version.py
+++ b/pypy/module/sys/version.py
@@ -14,7 +14,7 @@
 
 if platform.name == 'msvc':
     COMPILER_INFO = 'MSC v.%d 32 bit' % (platform.version * 10 + 600)
-elif platform.cc.startswith('gcc'):
+elif platform.cc is not None and platform.cc.startswith('gcc'):
     out = platform.execute(platform.cc, '--version').out
     match = re.search(' (\d+\.\d+(\.\d+)*)', out)
     if match:
diff --git a/pypy/module/test_lib_pypy/test_greenlet.py b/pypy/module/test_lib_pypy/test_greenlet.py
--- a/pypy/module/test_lib_pypy/test_greenlet.py
+++ b/pypy/module/test_lib_pypy/test_greenlet.py
@@ -231,3 +231,13 @@
         assert res == "next step"
         res = g2.switch("goes to f1 instead")
         assert res == "all ok"
+
+    def test_throw_in_not_started_yet(self):
+        from greenlet import greenlet
+        #
+        def f1():
+            never_reached
+        #
+        g1 = greenlet(f1)
+        raises(ValueError, g1.throw, ValueError)
+        assert g1.dead
diff --git a/pypy/module/test_lib_pypy/test_stackless.py b/pypy/module/test_lib_pypy/test_stackless_pickle.py
rename from pypy/module/test_lib_pypy/test_stackless.py
rename to pypy/module/test_lib_pypy/test_stackless_pickle.py
diff --git a/pypy/objspace/flow/flowcontext.py b/pypy/objspace/flow/flowcontext.py
--- a/pypy/objspace/flow/flowcontext.py
+++ b/pypy/objspace/flow/flowcontext.py
@@ -184,7 +184,7 @@
 
 class FlowExecutionContext(ExecutionContext):
 
-    def __init__(self, space, code, globals, constargs={}, closure=None,
+    def __init__(self, space, code, globals, constargs={}, outer_func=None,
                  name=None):
         ExecutionContext.__init__(self, space)
         self.code = code
@@ -193,11 +193,11 @@
 
         self.crnt_offset = -1
         self.crnt_frame = None
-        if closure is None:
+        if outer_func and outer_func.closure:
+            self.closure = [nestedscope.Cell(Constant(value))
+                            for value in outer_func.closure]
+        else:
             self.closure = None
-        else:
-            self.closure = [nestedscope.Cell(Constant(value))
-                            for value in closure]
         frame = self.create_frame()
         formalargcount = code.getformalargcount()
         arg_list = [Variable() for i in range(formalargcount)]
@@ -216,7 +216,7 @@
         # while ignoring any operation like the creation of the locals dict
         self.recorder = []
         frame = FlowSpaceFrame(self.space, self.code,
-                               self.w_globals, self.closure)
+                               self.w_globals, self)
         frame.last_instr = 0
         return frame
 
diff --git a/pypy/objspace/flow/objspace.py b/pypy/objspace/flow/objspace.py
--- a/pypy/objspace/flow/objspace.py
+++ b/pypy/objspace/flow/objspace.py
@@ -252,9 +252,9 @@
             raise TypeError("%r is a generator" % (func,))
         code = PyCode._from_code(self, code)
         if func.func_closure is None:
-            closure = None
+            cl = None
         else:
-            closure = [extract_cell_content(c) for c in func.func_closure]
+            cl = [extract_cell_content(c) for c in func.func_closure]
         # CallableFactory.pycall may add class_ to functions that are methods
         name = func.func_name
         class_ = getattr(func, 'class_', None)
@@ -262,8 +262,10 @@
             name = '%s.%s' % (class_.__name__, name)
         for c in "<>&!":
             name = name.replace(c, '_')
+        class outerfunc: # hack
+            closure = cl
         ec = flowcontext.FlowExecutionContext(self, code, func.func_globals,
-                                              constargs, closure, name)
+                                              constargs, outerfunc, name)
         graph = ec.graph
         graph.func = func
         # attach a signature and defaults to the graph
diff --git a/pypy/objspace/std/celldict.py b/pypy/objspace/std/celldict.py
--- a/pypy/objspace/std/celldict.py
+++ b/pypy/objspace/std/celldict.py
@@ -65,6 +65,10 @@
         if isinstance(cell, ModuleCell):
             cell.w_value = w_value
             return
+        # If the new value and the current value are the same, don't create a
+        # level of indirection, or mutate are version.
+        if self.space.is_w(w_value, cell):
+            return
         if cell is not None:
             w_value = ModuleCell(w_value)
         self.mutated()
diff --git a/pypy/objspace/std/fake.py b/pypy/objspace/std/fake.py
--- a/pypy/objspace/std/fake.py
+++ b/pypy/objspace/std/fake.py
@@ -142,7 +142,7 @@
 
     def funcrun(self, func, args):
         frame = func.space.createframe(self, func.w_func_globals,
-                                        func.closure)
+                                       func)
         sig = self.signature()
         scope_w = args.parse_obj(None, func.name, sig, func.defs_w)
         frame.setfastscope(scope_w)
diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py
--- a/pypy/objspace/std/floatobject.py
+++ b/pypy/objspace/std/floatobject.py
@@ -355,9 +355,13 @@
     y = w_float2.floatval
     if y == 0.0:
         raise FailedToImplementArgs(space.w_ZeroDivisionError, space.wrap("float modulo"))
-    mod = math.fmod(x, y)
-    if (mod and ((y < 0.0) != (mod < 0.0))):
-        mod += y
+    try:
+        mod = math.fmod(x, y)
+    except ValueError:
+        mod = rfloat.NAN
+    else:
+        if (mod and ((y < 0.0) != (mod < 0.0))):
+            mod += y
 
     return W_FloatObject(mod)
 
@@ -366,7 +370,10 @@
     y = w_float2.floatval
     if y == 0.0:
         raise FailedToImplementArgs(space.w_ZeroDivisionError, space.wrap("float modulo"))
-    mod = math.fmod(x, y)
+    try:
+        mod = math.fmod(x, y)
+    except ValueError:
+        return [W_FloatObject(rfloat.NAN), W_FloatObject(rfloat.NAN)]
     # fmod is typically exact, so vx-mod is *mathematically* an
     # exact multiple of wx.  But this is fp arithmetic, and fp
     # vx - mod is an approximation; the result is that div may
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -129,12 +129,12 @@
         ec._py_repr = None
         return ec
 
-    def createframe(self, code, w_globals, closure=None):
+    def createframe(self, code, w_globals, outer_func=None):
         from pypy.objspace.std.fake import CPythonFakeCode, CPythonFakeFrame
         if not we_are_translated() and isinstance(code, CPythonFakeCode):
             return CPythonFakeFrame(self, code, w_globals)
         else:
-            return ObjSpace.createframe(self, code, w_globals, closure)
+            return ObjSpace.createframe(self, code, w_globals, outer_func)
 
     def gettypefor(self, cls):
         return self.gettypeobject(cls.typedef)
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -913,12 +913,16 @@
 def repr__String(space, w_str):
     s = w_str._value
 
-    buf = StringBuilder(len(s) + 2)
-
     quote = "'"
     if quote in s and '"' not in s:
         quote = '"'
 
+    return space.wrap(string_escape_encode(s, quote))
+
+def string_escape_encode(s, quote):
+
+    buf = StringBuilder(len(s) + 2)
+
     buf.append(quote)
     startslice = 0
 
@@ -959,7 +963,7 @@
 
     buf.append(quote)
 
-    return space.wrap(buf.build())
+    return buf.build()
 
 
 DEFAULT_NOOP_TABLE = ''.join([chr(i) for i in range(256)])
diff --git a/pypy/objspace/std/test/test_celldict.py b/pypy/objspace/std/test/test_celldict.py
--- a/pypy/objspace/std/test/test_celldict.py
+++ b/pypy/objspace/std/test/test_celldict.py
@@ -39,6 +39,20 @@
         assert d.getitem("a") is None
         assert d.strategy.getdictvalue_no_unwrapping(d, "a") is None
 
+    def test_same_key_set_twice(self):
+        strategy = ModuleDictStrategy(space)
+        storage = strategy.get_empty_storage()
+        d = W_DictMultiObject(space, strategy, storage)
+
+        v1 = strategy.version
+        x = object()
+        d.setitem("a", x)
+        v2 = strategy.version
+        assert v1 is not v2
+        d.setitem("a", x)
+        v3 = strategy.version
+        assert v2 is v3
+
 class AppTestModuleDict(object):
     def setup_class(cls):
         cls.space = gettestobjspace(**{"objspace.std.withcelldict": True})
diff --git a/pypy/objspace/std/test/test_floatobject.py b/pypy/objspace/std/test/test_floatobject.py
--- a/pypy/objspace/std/test/test_floatobject.py
+++ b/pypy/objspace/std/test/test_floatobject.py
@@ -767,3 +767,19 @@
 
     def test_invalid(self):
         raises(ValueError, float.fromhex, "0P")
+
+    def test_division_edgecases(self):
+        import math
+
+        # inf
+        inf = float("inf")
+        assert math.isnan(inf % 3)
+        assert math.isnan(inf // 3)
+        x, y = divmod(inf, 3)
+        assert math.isnan(x)
+        assert math.isnan(y)
+
+        # divide by 0
+        raises(ZeroDivisionError, lambda: inf % 0)
+        raises(ZeroDivisionError, lambda: inf // 0)
+        raises(ZeroDivisionError, divmod, inf, 0)
\ No newline at end of file
diff --git a/pypy/objspace/std/test/test_methodcache.py b/pypy/objspace/std/test/test_methodcache.py
--- a/pypy/objspace/std/test/test_methodcache.py
+++ b/pypy/objspace/std/test/test_methodcache.py
@@ -134,20 +134,24 @@
 
     def test_custom_metaclass(self):
         import __pypy__
-        class MetaA(type):
-            def __getattribute__(self, x):
-                return 1
-        def f(self):
-            return 42
-        A = type.__new__(MetaA, "A", (), {"f": f})
-        l = [type.__getattribute__(A, "__new__")(A)] * 10
-        __pypy__.reset_method_cache_counter()
-        for i, a in enumerate(l):
-            assert a.f() == 42
-        cache_counter = __pypy__.method_cache_counter("f")
-        assert cache_counter[0] >= 5
-        assert cache_counter[1] >= 1 # should be (27, 3)
-        assert sum(cache_counter) == 10
+        for j in range(20):
+            class MetaA(type):
+                def __getattribute__(self, x):
+                    return 1
+            def f(self):
+                return 42
+            A = type.__new__(MetaA, "A", (), {"f": f})
+            l = [type.__getattribute__(A, "__new__")(A)] * 10
+            __pypy__.reset_method_cache_counter()
+            for i, a in enumerate(l):
+                assert a.f() == 42
+            cache_counter = __pypy__.method_cache_counter("f")
+            assert sum(cache_counter) == 10
+            if cache_counter == (9, 1):
+                break
+            #else the moon is misaligned, try again
+        else:
+            raise AssertionError("cache_counter = %r" % (cache_counter,))
 
     def test_mutate_class(self):
         import __pypy__
diff --git a/pypy/objspace/std/test/test_rangeobject.py b/pypy/objspace/std/test/test_rangeobject.py
--- a/pypy/objspace/std/test/test_rangeobject.py
+++ b/pypy/objspace/std/test/test_rangeobject.py
@@ -89,6 +89,9 @@
         assert not self.not_forced(r)
         r.sort()
         assert r == range(1, 100) + [999]
+        r = range(10)
+        r.sort(key=lambda x: -x)
+        assert r == range(9, -1, -1)
 
     def test_pop(self):
         r = range(10)
diff --git a/pypy/rlib/clibffi.py b/pypy/rlib/clibffi.py
--- a/pypy/rlib/clibffi.py
+++ b/pypy/rlib/clibffi.py
@@ -286,10 +286,10 @@
 
 FFI_OK = cConfig.FFI_OK
 FFI_BAD_TYPEDEF = cConfig.FFI_BAD_TYPEDEF
-FFI_DEFAULT_ABI = rffi.cast(rffi.USHORT, cConfig.FFI_DEFAULT_ABI)
+FFI_DEFAULT_ABI = cConfig.FFI_DEFAULT_ABI
 if _WIN32:
-    FFI_STDCALL = rffi.cast(rffi.USHORT, cConfig.FFI_STDCALL)
-FFI_TYPE_STRUCT = rffi.cast(rffi.USHORT, cConfig.FFI_TYPE_STRUCT)
+    FFI_STDCALL = cConfig.FFI_STDCALL
+FFI_TYPE_STRUCT = cConfig.FFI_TYPE_STRUCT
 FFI_CIFP = rffi.COpaquePtr('ffi_cif', compilation_info=eci)
 
 FFI_CLOSUREP = lltype.Ptr(cConfig.ffi_closure)
@@ -319,7 +319,7 @@
        which the 'ffistruct' member is a regular FFI_TYPE.
     """
     tpe = lltype.malloc(FFI_STRUCT_P.TO, len(field_types)+1, flavor='raw')
-    tpe.ffistruct.c_type = FFI_TYPE_STRUCT
+    tpe.ffistruct.c_type = rffi.cast(rffi.USHORT, FFI_TYPE_STRUCT)
     tpe.ffistruct.c_size = rffi.cast(rffi.SIZE_T, size)
     tpe.ffistruct.c_alignment = rffi.cast(rffi.USHORT, aligment)
     tpe.ffistruct.c_elements = rffi.cast(FFI_TYPE_PP,
@@ -402,12 +402,20 @@
 
 closureHeap = ClosureHeap()
 
-FUNCFLAG_STDCALL   = 0
-FUNCFLAG_CDECL     = 1  # for WINAPI calls
+FUNCFLAG_STDCALL   = 0    # on Windows: for WINAPI calls
+FUNCFLAG_CDECL     = 1    # on Windows: for __cdecl calls
 FUNCFLAG_PYTHONAPI = 4
 FUNCFLAG_USE_ERRNO = 8
 FUNCFLAG_USE_LASTERROR = 16
 
+def get_call_conv(flags, from_jit):
+    if _WIN32 and (flags & FUNCFLAG_CDECL == 0):
+        return FFI_STDCALL
+    else:
+        return FFI_DEFAULT_ABI
+get_call_conv._annspecialcase_ = 'specialize:arg(1)'     # hack :-/
+
+
 class AbstractFuncPtr(object):
     ll_cif = lltype.nullptr(FFI_CIFP.TO)
     ll_argtypes = lltype.nullptr(FFI_TYPE_PP.TO)
@@ -427,21 +435,17 @@
         self.ll_cif = lltype.malloc(FFI_CIFP.TO, flavor='raw',
                                     track_allocation=False) # freed by the __del__
 
-        if _WIN32 and (flags & FUNCFLAG_CDECL == 0):
-            cc = FFI_STDCALL
-        else:
-            cc = FFI_DEFAULT_ABI
-
         if _MSVC:
             # This little trick works correctly with MSVC.
             # It returns small structures in registers
-            if r_uint(restype.c_type) == FFI_TYPE_STRUCT:
+            if intmask(restype.c_type) == FFI_TYPE_STRUCT:
                 if restype.c_size <= 4:
                     restype = ffi_type_sint32
                 elif restype.c_size <= 8:
                     restype = ffi_type_sint64
 
-        res = c_ffi_prep_cif(self.ll_cif, cc,
+        res = c_ffi_prep_cif(self.ll_cif,
+                             rffi.cast(rffi.USHORT, get_call_conv(flags,False)),
                              rffi.cast(rffi.UINT, argnum), restype,
                              self.ll_argtypes)
         if not res == FFI_OK:
diff --git a/pypy/rlib/libffi.py b/pypy/rlib/libffi.py
--- a/pypy/rlib/libffi.py
+++ b/pypy/rlib/libffi.py
@@ -75,7 +75,7 @@
     @staticmethod
     @jit.elidable
     def is_struct(ffi_type):
-        return intmask(ffi_type.c_type) == intmask(FFI_TYPE_STRUCT)
+        return intmask(ffi_type.c_type) == FFI_TYPE_STRUCT
 
 types._import()
 
@@ -206,6 +206,7 @@
     _immutable_fields_ = ['funcsym']
     argtypes = []
     restype = lltype.nullptr(clibffi.FFI_TYPE_P.TO)
+    flags = 0
     funcsym = lltype.nullptr(rffi.VOIDP.TO)
 
     def __init__(self, name, argtypes, restype, funcsym, flags=FUNCFLAG_CDECL,
diff --git a/pypy/rlib/parsing/tree.py b/pypy/rlib/parsing/tree.py
--- a/pypy/rlib/parsing/tree.py
+++ b/pypy/rlib/parsing/tree.py
@@ -6,9 +6,16 @@
         content = ["digraph G{"]
         content.extend(self.dot())
         content.append("}")
-        p = py.test.ensuretemp("automaton").join("temp.dot")
+        try:
+            p = py.test.ensuretemp("automaton").join("temp.dot")
+            remove = False
+        except AttributeError: # pytest lacks ensuretemp, make a normal one
+            p = py.path.local.mkdtemp().join('automaton.dot')
+            remove = True
         p.write("\n".join(content))
         graphclient.display_dot_file(str(p))
+        if remove:
+            p.dirpath().remove()
 
 class Symbol(Node):
 
diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -1403,7 +1403,7 @@
                                     s, pos, pos + unicode_bytes)
             result.append(res)
             continue
-        result.append(unichr(t))
+        result.append(UNICHR(t))
         pos += unicode_bytes
     return result.build(), pos
 
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -113,7 +113,7 @@
         rffi.LONGLONG:   ctypes.c_longlong,
         rffi.ULONGLONG:  ctypes.c_ulonglong,
         rffi.SIZE_T:     ctypes.c_size_t,
-        lltype.Bool:     ctypes.c_bool,
+        lltype.Bool:     getattr(ctypes, "c_bool", ctypes.c_long),
         llmemory.Address:  ctypes.c_void_p,
         llmemory.GCREF:    ctypes.c_void_p,
         llmemory.WeakRef:  ctypes.c_void_p, # XXX
@@ -1153,7 +1153,11 @@
         # an OverflowError on the following line.
         cvalue = ctypes.cast(ctypes.c_void_p(cvalue), cresulttype)
     else:
-        cvalue = cresulttype(cvalue).value   # mask high bits off if needed
+        try:
+            cvalue = cresulttype(cvalue).value   # mask high bits off if needed
+        except TypeError:
+            cvalue = int(cvalue)   # float -> int
+            cvalue = cresulttype(cvalue).value   # try again
     return ctypes2lltype(RESTYPE, cvalue)
 
 class ForceCastEntry(ExtRegistryEntry):
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -1283,6 +1283,8 @@
         try:
             return p._obj._hash_cache_
         except AttributeError:
+            assert self._T._gckind == 'gc'
+            assert self      # not for NULL
             result = hash(p._obj)
             if cache:
                 try:
diff --git a/pypy/rpython/lltypesystem/test/test_rffi.py b/pypy/rpython/lltypesystem/test/test_rffi.py
--- a/pypy/rpython/lltypesystem/test/test_rffi.py
+++ b/pypy/rpython/lltypesystem/test/test_rffi.py
@@ -699,7 +699,10 @@
     def test_cast(self):
         res = cast(SIZE_T, -1)
         assert type(res) is r_size_t
-        assert res == r_size_t(-1)    
+        assert res == r_size_t(-1)
+        #
+        res = cast(lltype.Signed, 42.5)
+        assert res == 42
     
     def test_rffi_sizeof(self):
         try:
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -1461,6 +1461,7 @@
         # We will fix such references to point to the copy of the young
         # objects when we walk 'old_objects_pointing_to_young'.
         self.old_objects_pointing_to_young.append(newobj)
+    _trace_drag_out._always_inline_ = True
 
     def _visit_young_rawmalloced_object(self, obj):
         # 'obj' points to a young, raw-malloced object.
diff --git a/pypy/rpython/memory/gctypelayout.py b/pypy/rpython/memory/gctypelayout.py
--- a/pypy/rpython/memory/gctypelayout.py
+++ b/pypy/rpython/memory/gctypelayout.py
@@ -459,7 +459,7 @@
             if t._hints.get('immutable'):
                 return
             if 'immutable_fields' in t._hints:
-                skip = t._hints['immutable_fields'].fields
+                skip = t._hints['immutable_fields'].all_immutable_fields()
         for n, t2 in t._flds.iteritems():
             if isinstance(t2, lltype.Ptr) and t2.TO._gckind == 'gc':
                 if n not in skip:
diff --git a/pypy/rpython/memory/test/test_gctypelayout.py b/pypy/rpython/memory/test/test_gctypelayout.py
--- a/pypy/rpython/memory/test/test_gctypelayout.py
+++ b/pypy/rpython/memory/test/test_gctypelayout.py
@@ -4,7 +4,7 @@
 from pypy.rpython.memory.gctypelayout import gc_pointers_inside
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.rpython.test.test_llinterp import get_interpreter
-from pypy.rpython.rclass import IR_IMMUTABLE
+from pypy.rpython.rclass import IR_IMMUTABLE, IR_QUASIIMMUTABLE
 from pypy.objspace.flow.model import Constant
 
 class FakeGC:
@@ -102,7 +102,7 @@
     accessor = rclass.FieldListAccessor()
     S3 = lltype.GcStruct('S', ('x', PT), ('y', PT),
                          hints={'immutable_fields': accessor})
-    accessor.initialize(S3, {'x': IR_IMMUTABLE})
+    accessor.initialize(S3, {'x': IR_IMMUTABLE, 'y': IR_QUASIIMMUTABLE})
     #
     s1 = lltype.malloc(S1)
     adr = llmemory.cast_ptr_to_adr(s1)
diff --git a/pypy/rpython/rclass.py b/pypy/rpython/rclass.py
--- a/pypy/rpython/rclass.py
+++ b/pypy/rpython/rclass.py
@@ -16,6 +16,13 @@
         for x in fields.itervalues():
             assert isinstance(x, ImmutableRanking)
 
+    def all_immutable_fields(self):
+        result = set()
+        for key, value in self.fields.iteritems():
+            if value in (IR_IMMUTABLE, IR_IMMUTABLE_ARRAY):
+                result.add(key)
+        return result
+
     def __repr__(self):
         return '<FieldListAccessor for %s>' % getattr(self, 'TYPE', '?')
 
diff --git a/pypy/tool/py.cleanup b/pypy/tool/py.cleanup
--- a/pypy/tool/py.cleanup
+++ b/pypy/tool/py.cleanup
@@ -1,16 +1,31 @@
 #!/usr/bin/env python
-import py, sys
+import sys, os, stat
 
-def shouldremove(p):
-    return p.ext == '.pyc'
+def clean(path):
+    global count
+    try:
+        content = os.listdir(path)
+    except OSError:
+        print >> sys.stderr, "skipping", path
+        return
+    for fn in content:
+        filename = os.path.join(path, fn)
+        st = os.lstat(filename)
+        if stat.S_ISDIR(st.st_mode):
+            clean(filename)
+            if fn == '__pycache__':
+                try:
+                    os.rmdir(filename)
+                except OSError:
+                    pass
+        elif fn.endswith('.pyc') or fn.endswith('.pyo'):
+            os.unlink(filename)
+            count += 1
 
 count = 0
 
 for arg in sys.argv[1:] or ['.']:
-    path = py.path.local(arg)
-    print "cleaning path", path, "of .pyc files"
-    for x in path.visit(shouldremove, lambda x: x.check(dotfile=0, link=0)):
-        x.remove()
-        count += 1
+    print "cleaning path", arg, "of .pyc/.pyo/__pycache__ files"
+    clean(arg)
 
 print "%d files removed" % (count,)
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -52,9 +52,14 @@
             pypy_c_dir = basedir.join('pypy', 'translator', 'goal')
         pypy_c = pypy_c_dir.join('pypy-c.exe')
         libpypy_c = pypy_c_dir.join('libpypy-c.dll')
+        libexpat = pypy_c_dir.join('libexpat.dll')
+        if not libexpat.check():
+            libexpat = py.path.local.sysfind('libexpat.dll')
+            assert libexpat, "libexpat.dll not found"
+            print "Picking %s" % libexpat
         binaries = [(pypy_c, pypy_c.basename),
                     (libpypy_c, libpypy_c.basename),
-                    (pypy_c_dir.join('libexpat.dll'), 'libexpat.dll')]
+                    (libexpat, libexpat.basename)]
     else:
         basename = 'pypy-c'
         if override_pypy_c is None:
diff --git a/pypy/translator/c/src/stacklet/stacklet.c b/pypy/translator/c/src/stacklet/stacklet.c
--- a/pypy/translator/c/src/stacklet/stacklet.c
+++ b/pypy/translator/c/src/stacklet/stacklet.c
@@ -319,10 +319,11 @@
 
 char **_stacklet_translate_pointer(stacklet_handle context, char **ptr)
 {
+  char *p = (char *)ptr;
+  long delta;
   if (context == NULL)
     return ptr;
-  char *p = (char *)ptr;
-  long delta = p - context->stack_start;
+  delta = p - context->stack_start;
   if (((unsigned long)delta) < ((unsigned long)context->stack_saved)) {
       /* a pointer to a saved away word */
       char *c = (char *)(context + 1);
diff --git a/pypy/translator/goal/app_main.py b/pypy/translator/goal/app_main.py
--- a/pypy/translator/goal/app_main.py
+++ b/pypy/translator/goal/app_main.py
@@ -260,6 +260,8 @@
     try:
         import _file
     except ImportError:
+        if sys.version_info < (2, 7):
+            return
         import ctypes # HACK: while running on top of CPython
         set_file_encoding = ctypes.pythonapi.PyFile_SetEncodingAndErrors
         set_file_encoding.argtypes = [ctypes.py_object, ctypes.c_char_p, ctypes.c_char_p]
@@ -479,7 +481,8 @@
             print >> sys.stderr, "'import site' failed"
 
     readenv = not ignore_environment
-    io_encoding = readenv and os.getenv("PYTHONIOENCODING")
+    io_encoding = ((readenv and os.getenv("PYTHONIOENCODING"))
+                   or sys.getfilesystemencoding())
     if io_encoding:
         set_io_encoding(io_encoding)
 
diff --git a/pypy/translator/goal/test2/test_app_main.py b/pypy/translator/goal/test2/test_app_main.py
--- a/pypy/translator/goal/test2/test_app_main.py
+++ b/pypy/translator/goal/test2/test_app_main.py
@@ -739,6 +739,19 @@
         data = self.run(p + os.sep)
         assert data == p + os.sep + '\n'
 
+    def test_getfilesystemencoding(self):
+        if sys.version_info < (2, 7):
+            skip("test requires Python >= 2.7")
+        p = getscript_in_dir("""
+        import sys
+        sys.stdout.write(u'15\u20ac')
+        sys.stdout.flush()
+        """)
+        env = os.environ.copy()
+        env["LC_CTYPE"] = 'en_US.UTF-8'
+        data = self.run(p, env=env)
+        assert data == '15\xe2\x82\xac'
+
     def test_pythonioencoding(self):
         if sys.version_info < (2, 7):
             skip("test requires Python >= 2.7")