[pypy-commit] pypy kill-unary-multimethods: merge default

Tue Sep 27 23:50:38 CEST 2011

Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: kill-unary-multimethods
Changeset: r47647:0d744fa42502
Date: 2011-09-27 18:50 -0300
http://bitbucket.org/pypy/pypy/changeset/0d744fa42502/

Log:	merge default

diff too long, truncating to 10000 out of 10353 lines

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -1,2 +1,3 @@
 b590cf6de4190623aad9aa698694c22e614d67b9 release-1.5
 b48df0bf4e75b81d98f19ce89d4a7dc3e1dab5e5 benchmarked
+d8ac7d23d3ec5f9a0fa1264972f74a010dbfd07f release-1.6
diff --git a/dotviewer/graphparse.py b/dotviewer/graphparse.py
--- a/dotviewer/graphparse.py
+++ b/dotviewer/graphparse.py
@@ -36,48 +36,45 @@
     print >> sys.stderr, "Warning: could not guess file type, using 'dot'"
     return 'unknown'
 
-def dot2plain(content, contenttype, use_codespeak=False):
-    if contenttype == 'plain':
-        # already a .plain file
-        return content
+def dot2plain_graphviz(content, contenttype, use_codespeak=False):
+    if contenttype != 'neato':
+        cmdline = 'dot -Tplain'
+    else:
+        cmdline = 'neato -Tplain'
+    #print >> sys.stderr, '* running:', cmdline
+    close_fds = sys.platform != 'win32'
+    p = subprocess.Popen(cmdline, shell=True, close_fds=close_fds,
+                         stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    (child_in, child_out) = (p.stdin, p.stdout)
+    try:
+        import thread
+    except ImportError:
+        bkgndwrite(child_in, content)
+    else:
+        thread.start_new_thread(bkgndwrite, (child_in, content))
+    plaincontent = child_out.read()
+    child_out.close()
+    if not plaincontent:    # 'dot' is likely not installed
+        raise PlainParseError("no result from running 'dot'")
+    return plaincontent
 
-    if not use_codespeak:
-        if contenttype != 'neato':
-            cmdline = 'dot -Tplain'
-        else:
-            cmdline = 'neato -Tplain'
-        #print >> sys.stderr, '* running:', cmdline
-        close_fds = sys.platform != 'win32'
-        p = subprocess.Popen(cmdline, shell=True, close_fds=close_fds,
-                             stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-        (child_in, child_out) = (p.stdin, p.stdout)
-        try:
-            import thread
-        except ImportError:
-            bkgndwrite(child_in, content)
-        else:
-            thread.start_new_thread(bkgndwrite, (child_in, content))
-        plaincontent = child_out.read()
-        child_out.close()
-        if not plaincontent:    # 'dot' is likely not installed
-            raise PlainParseError("no result from running 'dot'")
-    else:
-        import urllib
-        request = urllib.urlencode({'dot': content})
-        url = 'http://codespeak.net/pypy/convertdot.cgi'
-        print >> sys.stderr, '* posting:', url
-        g = urllib.urlopen(url, data=request)
-        result = []
-        while True:
-            data = g.read(16384)
-            if not data:
-                break
-            result.append(data)
-        g.close()
-        plaincontent = ''.join(result)
-        # very simple-minded way to give a somewhat better error message
-        if plaincontent.startswith('<body'):
-            raise Exception("the dot on codespeak has very likely crashed")
+def dot2plain_codespeak(content, contenttype):
+    import urllib
+    request = urllib.urlencode({'dot': content})
+    url = 'http://codespeak.net/pypy/convertdot.cgi'
+    print >> sys.stderr, '* posting:', url
+    g = urllib.urlopen(url, data=request)
+    result = []
+    while True:
+        data = g.read(16384)
+        if not data:
+            break
+        result.append(data)
+    g.close()
+    plaincontent = ''.join(result)
+    # very simple-minded way to give a somewhat better error message
+    if plaincontent.startswith('<body'):
+        raise Exception("the dot on codespeak has very likely crashed")
     return plaincontent
 
 def bkgndwrite(f, data):
@@ -148,10 +145,13 @@
 
 def parse_dot(graph_id, content, links={}, fixedfont=False):
     contenttype = guess_type(content)
-    try:
-        plaincontent = dot2plain(content, contenttype, use_codespeak=False)
-        return list(parse_plain(graph_id, plaincontent, links, fixedfont))
-    except PlainParseError:
-        # failed, retry via codespeak
-        plaincontent = dot2plain(content, contenttype, use_codespeak=True)
-        return list(parse_plain(graph_id, plaincontent, links, fixedfont))
+    if contenttype == 'plain':
+        plaincontent = content
+    else:
+        try:
+            plaincontent = dot2plain_graphviz(content, contenttype)
+        except PlainParseError, e:
+            print e
+            # failed, retry via codespeak
+            plaincontent = dot2plain_codespeak(content, contenttype)
+    return list(parse_plain(graph_id, plaincontent, links, fixedfont))
diff --git a/lib-python/modified-2.7/gzip.py b/lib-python/modified-2.7/gzip.py
deleted file mode 100644
--- a/lib-python/modified-2.7/gzip.py
+++ /dev/null
@@ -1,514 +0,0 @@
-"""Functions that read and write gzipped files.
-
-The user of the file doesn't have to worry about the compression,
-but random access is not allowed."""
-
-# based on Andrew Kuchling's minigzip.py distributed with the zlib module
-
-import struct, sys, time, os
-import zlib
-import io
-import __builtin__
-
-__all__ = ["GzipFile","open"]
-
-FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
-
-READ, WRITE = 1, 2
-
-def write32u(output, value):
-    # The L format writes the bit pattern correctly whether signed
-    # or unsigned.
-    output.write(struct.pack("<L", value))
-
-def read32(input):
-    return struct.unpack("<I", input.read(4))[0]
-
-def open(filename, mode="rb", compresslevel=9):
-    """Shorthand for GzipFile(filename, mode, compresslevel).
-
-    The filename argument is required; mode defaults to 'rb'
-    and compresslevel defaults to 9.
-
-    """
-    return GzipFile(filename, mode, compresslevel)
-
-class GzipFile(io.BufferedIOBase):
-    """The GzipFile class simulates most of the methods of a file object with
-    the exception of the readinto() and truncate() methods.
-
-    """
-
-    myfileobj = None
-    max_read_chunk = 10 * 1024 * 1024   # 10Mb
-
-    def __init__(self, filename=None, mode=None,
-                 compresslevel=9, fileobj=None, mtime=None):
-        """Constructor for the GzipFile class.
-
-        At least one of fileobj and filename must be given a
-        non-trivial value.
-
-        The new class instance is based on fileobj, which can be a regular
-        file, a StringIO object, or any other object which simulates a file.
-        It defaults to None, in which case filename is opened to provide
-        a file object.
-
-        When fileobj is not None, the filename argument is only used to be
-        included in the gzip file header, which may includes the original
-        filename of the uncompressed file.  It defaults to the filename of
-        fileobj, if discernible; otherwise, it defaults to the empty string,
-        and in this case the original filename is not included in the header.
-
-        The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
-        depending on whether the file will be read or written.  The default
-        is the mode of fileobj if discernible; otherwise, the default is 'rb'.
-        Be aware that only the 'rb', 'ab', and 'wb' values should be used
-        for cross-platform portability.
-
-        The compresslevel argument is an integer from 1 to 9 controlling the
-        level of compression; 1 is fastest and produces the least compression,
-        and 9 is slowest and produces the most compression.  The default is 9.
-
-        The mtime argument is an optional numeric timestamp to be written
-        to the stream when compressing.  All gzip compressed streams
-        are required to contain a timestamp.  If omitted or None, the
-        current time is used.  This module ignores the timestamp when
-        decompressing; however, some programs, such as gunzip, make use
-        of it.  The format of the timestamp is the same as that of the
-        return value of time.time() and of the st_mtime member of the
-        object returned by os.stat().
-
-        """
-
-        # guarantee the file is opened in binary mode on platforms
-        # that care about that sort of thing
-        if mode and 'b' not in mode:
-            mode += 'b'
-        if fileobj is None:
-            fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
-        if filename is None:
-            if hasattr(fileobj, 'name'): filename = fileobj.name
-            else: filename = ''
-        if mode is None:
-            if hasattr(fileobj, 'mode'): mode = fileobj.mode
-            else: mode = 'rb'
-
-        if mode[0:1] == 'r':
-            self.mode = READ
-            # Set flag indicating start of a new member
-            self._new_member = True
-            # Buffer data read from gzip file. extrastart is offset in
-            # stream where buffer starts. extrasize is number of
-            # bytes remaining in buffer from current stream position.
-            self.extrabuf = ""
-            self.extrasize = 0
-            self.extrastart = 0
-            self.name = filename
-            # Starts small, scales exponentially
-            self.min_readsize = 100
-
-        elif mode[0:1] == 'w' or mode[0:1] == 'a':
-            self.mode = WRITE
-            self._init_write(filename)
-            self.compress = zlib.compressobj(compresslevel,
-                                             zlib.DEFLATED,
-                                             -zlib.MAX_WBITS,
-                                             zlib.DEF_MEM_LEVEL,
-                                             0)
-        else:
-            raise IOError, "Mode " + mode + " not supported"
-
-        self.fileobj = fileobj
-        self.offset = 0
-        self.mtime = mtime
-
-        if self.mode == WRITE:
-            self._write_gzip_header()
-
-    @property
-    def filename(self):
-        import warnings
-        warnings.warn("use the name attribute", DeprecationWarning, 2)
-        if self.mode == WRITE and self.name[-3:] != ".gz":
-            return self.name + ".gz"
-        return self.name
-
-    def __repr__(self):
-        s = repr(self.fileobj)
-        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
-
-    def _check_closed(self):
-        """Raises a ValueError if the underlying file object has been closed.
-
-        """
-        if self.closed:
-            raise ValueError('I/O operation on closed file.')
-
-    def _init_write(self, filename):
-        self.name = filename
-        self.crc = zlib.crc32("") & 0xffffffffL
-        self.size = 0
-        self.writebuf = []
-        self.bufsize = 0
-
-    def _write_gzip_header(self):
-        self.fileobj.write('\037\213')             # magic header
-        self.fileobj.write('\010')                 # compression method
-        fname = os.path.basename(self.name)
-        if fname.endswith(".gz"):
-            fname = fname[:-3]
-        flags = 0
-        if fname:
-            flags = FNAME
-        self.fileobj.write(chr(flags))
-        mtime = self.mtime
-        if mtime is None:
-            mtime = time.time()
-        write32u(self.fileobj, long(mtime))
-        self.fileobj.write('\002')
-        self.fileobj.write('\377')
-        if fname:
-            self.fileobj.write(fname + '\000')
-
-    def _init_read(self):
-        self.crc = zlib.crc32("") & 0xffffffffL
-        self.size = 0
-
-    def _read_gzip_header(self):
-        magic = self.fileobj.read(2)
-        if magic != '\037\213':
-            raise IOError, 'Not a gzipped file'
-        method = ord( self.fileobj.read(1) )
-        if method != 8:
-            raise IOError, 'Unknown compression method'
-        flag = ord( self.fileobj.read(1) )
-        self.mtime = read32(self.fileobj)
-        # extraflag = self.fileobj.read(1)
-        # os = self.fileobj.read(1)
-        self.fileobj.read(2)
-
-        if flag & FEXTRA:
-            # Read & discard the extra field, if present
-            xlen = ord(self.fileobj.read(1))
-            xlen = xlen + 256*ord(self.fileobj.read(1))
-            self.fileobj.read(xlen)
-        if flag & FNAME:
-            # Read and discard a null-terminated string containing the filename
-            while True:
-                s = self.fileobj.read(1)
-                if not s or s=='\000':
-                    break
-        if flag & FCOMMENT:
-            # Read and discard a null-terminated string containing a comment
-            while True:
-                s = self.fileobj.read(1)
-                if not s or s=='\000':
-                    break
-        if flag & FHCRC:
-            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
-
-    def write(self,data):
-        self._check_closed()
-        if self.mode != WRITE:
-            import errno
-            raise IOError(errno.EBADF, "write() on read-only GzipFile object")
-
-        if self.fileobj is None:
-            raise ValueError, "write() on closed GzipFile object"
-
-        # Convert data type if called by io.BufferedWriter.
-        if isinstance(data, memoryview):
-            data = data.tobytes()
-
-        if len(data) > 0:
-            self.size = self.size + len(data)
-            self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
-            self.fileobj.write( self.compress.compress(data) )
-            self.offset += len(data)
-
-        return len(data)
-
-    def read(self, size=-1):
-        self._check_closed()
-        if self.mode != READ:
-            import errno
-            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
-
-        if self.extrasize <= 0 and self.fileobj is None:
-            return ''
-
-        readsize = 1024
-        if size < 0:        # get the whole thing
-            try:
-                while True:
-                    self._read(readsize)
-                    readsize = min(self.max_read_chunk, readsize * 2)
-            except EOFError:
-                size = self.extrasize
-        elif size == 0:
-            return ""
-        else:               # just get some more of it
-            try:
-                while size > self.extrasize:
-                    self._read(readsize)
-                    readsize = min(self.max_read_chunk, readsize * 2)
-            except EOFError:
-                if size > self.extrasize:
-                    size = self.extrasize
-
-        offset = self.offset - self.extrastart
-        chunk = self.extrabuf[offset: offset + size]
-        self.extrasize = self.extrasize - size
-
-        self.offset += size
-        return chunk
-
-    def _unread(self, buf):
-        self.extrasize = len(buf) + self.extrasize
-        self.offset -= len(buf)
-
-    def _read(self, size=1024):
-        if self.fileobj is None:
-            raise EOFError, "Reached EOF"
-
-        if self._new_member:
-            # If the _new_member flag is set, we have to
-            # jump to the next member, if there is one.
-            #
-            # First, check if we're at the end of the file;
-            # if so, it's time to stop; no more members to read.
-            pos = self.fileobj.tell()   # Save current position
-            self.fileobj.seek(0, 2)     # Seek to end of file
-            if pos == self.fileobj.tell():
-                raise EOFError, "Reached EOF"
-            else:
-                self.fileobj.seek( pos ) # Return to original position
-
-            self._init_read()
-            self._read_gzip_header()
-            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
-            self._new_member = False
-
-        # Read a chunk of data from the file
-        buf = self.fileobj.read(size)
-
-        # If the EOF has been reached, flush the decompression object
-        # and mark this object as finished.
-
-        if buf == "":
-            uncompress = self.decompress.flush()
-            self._read_eof()
-            self._add_read_data( uncompress )
-            raise EOFError, 'Reached EOF'
-
-        uncompress = self.decompress.decompress(buf)
-        self._add_read_data( uncompress )
-
-        if self.decompress.unused_data != "":
-            # Ending case: we've come to the end of a member in the file,
-            # so seek back to the start of the unused data, finish up
-            # this member, and read a new gzip header.
-            # (The number of bytes to seek back is the length of the unused
-            # data, minus 8 because _read_eof() will rewind a further 8 bytes)
-            self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
-
-            # Check the CRC and file size, and set the flag so we read
-            # a new member on the next call
-            self._read_eof()
-            self._new_member = True
-
-    def _add_read_data(self, data):
-        self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
-        offset = self.offset - self.extrastart
-        self.extrabuf = self.extrabuf[offset:] + data
-        self.extrasize = self.extrasize + len(data)
-        self.extrastart = self.offset
-        self.size = self.size + len(data)
-
-    def _read_eof(self):
-        # We've read to the end of the file, so we have to rewind in order
-        # to reread the 8 bytes containing the CRC and the file size.
-        # We check the that the computed CRC and size of the
-        # uncompressed data matches the stored values.  Note that the size
-        # stored is the true file size mod 2**32.
-        self.fileobj.seek(-8, 1)
-        crc32 = read32(self.fileobj)
-        isize = read32(self.fileobj)  # may exceed 2GB
-        if crc32 != self.crc:
-            raise IOError("CRC check failed %s != %s" % (hex(crc32),
-                                                         hex(self.crc)))
-        elif isize != (self.size & 0xffffffffL):
-            raise IOError, "Incorrect length of data produced"
-
-        # Gzip files can be padded with zeroes and still have archives.
-        # Consume all zero bytes and set the file position to the first
-        # non-zero byte. See http://www.gzip.org/#faq8
-        c = "\x00"
-        while c == "\x00":
-            c = self.fileobj.read(1)
-        if c:
-            self.fileobj.seek(-1, 1)
-
-    @property
-    def closed(self):
-        return self.fileobj is None
-
-    def close(self):
-        if self.fileobj is None:
-            return
-        if self.mode == WRITE:
-            self.fileobj.write(self.compress.flush())
-            write32u(self.fileobj, self.crc)
-            # self.size may exceed 2GB, or even 4GB
-            write32u(self.fileobj, self.size & 0xffffffffL)
-            self.fileobj = None
-        elif self.mode == READ:
-            self.fileobj = None
-        if self.myfileobj:
-            self.myfileobj.close()
-            self.myfileobj = None
-
-    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
-        self._check_closed()
-        if self.mode == WRITE:
-            # Ensure the compressor's buffer is flushed
-            self.fileobj.write(self.compress.flush(zlib_mode))
-            self.fileobj.flush()
-
-    def fileno(self):
-        """Invoke the underlying file object's fileno() method.
-
-        This will raise AttributeError if the underlying file object
-        doesn't support fileno().
-        """
-        return self.fileobj.fileno()
-
-    def rewind(self):
-        '''Return the uncompressed stream file position indicator to the
-        beginning of the file'''
-        if self.mode != READ:
-            raise IOError("Can't rewind in write mode")
-        self.fileobj.seek(0)
-        self._new_member = True
-        self.extrabuf = ""
-        self.extrasize = 0
-        self.extrastart = 0
-        self.offset = 0
-
-    def readable(self):
-        return self.mode == READ
-
-    def writable(self):
-        return self.mode == WRITE
-
-    def seekable(self):
-        return True
-
-    def seek(self, offset, whence=0):
-        if whence:
-            if whence == 1:
-                offset = self.offset + offset
-            else:
-                raise ValueError('Seek from end not supported')
-        if self.mode == WRITE:
-            if offset < self.offset:
-                raise IOError('Negative seek in write mode')
-            count = offset - self.offset
-            for i in range(count // 1024):
-                self.write(1024 * '\0')
-            self.write((count % 1024) * '\0')
-        elif self.mode == READ:
-            if offset == self.offset:
-                self.read(0) # to make sure that this file is open
-                return self.offset
-            if offset < self.offset:
-                # for negative seek, rewind and do positive seek
-                self.rewind()
-            count = offset - self.offset
-            for i in range(count // 1024):
-                self.read(1024)
-            self.read(count % 1024)
-
-        return self.offset
-
-    def readline(self, size=-1):
-        if size < 0:
-            # Shortcut common case - newline found in buffer.
-            offset = self.offset - self.extrastart
-            i = self.extrabuf.find('\n', offset) + 1
-            if i > 0:
-                self.extrasize -= i - offset
-                self.offset += i - offset
-                return self.extrabuf[offset: i]
-
-            size = sys.maxint
-            readsize = self.min_readsize
-        else:
-            readsize = size
-        bufs = []
-        while size != 0:
-            c = self.read(readsize)
-            i = c.find('\n')
-
-            # We set i=size to break out of the loop under two
-            # conditions: 1) there's no newline, and the chunk is
-            # larger than size, or 2) there is a newline, but the
-            # resulting line would be longer than 'size'.
-            if (size <= i) or (i == -1 and len(c) > size):
-                i = size - 1
-
-            if i >= 0 or c == '':
-                bufs.append(c[:i + 1])    # Add portion of last chunk
-                self._unread(c[i + 1:])   # Push back rest of chunk
-                break
-
-            # Append chunk to list, decrease 'size',
-            bufs.append(c)
-            size = size - len(c)
-            readsize = min(size, readsize * 2)
-        if readsize > self.min_readsize:
-            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
-        return ''.join(bufs) # Return resulting line
-
-
-def _test():
-    # Act like gzip; with -d, act like gunzip.
-    # The input file is not deleted, however, nor are any other gzip
-    # options or features supported.
-    args = sys.argv[1:]
-    decompress = args and args[0] == "-d"
-    if decompress:
-        args = args[1:]
-    if not args:
-        args = ["-"]
-    for arg in args:
-        if decompress:
-            if arg == "-":
-                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
-                g = sys.stdout
-            else:
-                if arg[-3:] != ".gz":
-                    print "filename doesn't end in .gz:", repr(arg)
-                    continue
-                f = open(arg, "rb")
-                g = __builtin__.open(arg[:-3], "wb")
-        else:
-            if arg == "-":
-                f = sys.stdin
-                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
-            else:
-                f = __builtin__.open(arg, "rb")
-                g = open(arg + ".gz", "wb")
-        while True:
-            chunk = f.read(1024)
-            if not chunk:
-                break
-            g.write(chunk)
-        if g is not sys.stdout:
-            g.close()
-        if f is not sys.stdin:
-            f.close()
-
-if __name__ == '__main__':
-    _test()
diff --git a/lib-python/modified-2.7/tarfile.py b/lib-python/modified-2.7/tarfile.py
--- a/lib-python/modified-2.7/tarfile.py
+++ b/lib-python/modified-2.7/tarfile.py
@@ -252,8 +252,8 @@
        the high bit set. So we calculate two checksums, unsigned and
        signed.
     """
-    unsigned_chksum = 256 + sum(struct.unpack("148B8x356B", buf[:512]))
-    signed_chksum = 256 + sum(struct.unpack("148b8x356b", buf[:512]))
+    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
+    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
     return unsigned_chksum, signed_chksum
 
 def copyfileobj(src, dst, length=None):
@@ -265,6 +265,7 @@
     if length is None:
         shutil.copyfileobj(src, dst)
         return
+
     BUFSIZE = 16 * 1024
     blocks, remainder = divmod(length, BUFSIZE)
     for b in xrange(blocks):
@@ -801,19 +802,19 @@
         if self.closed:
             raise ValueError("I/O operation on closed file")
 
+        buf = ""
         if self.buffer:
             if size is None:
-                buf = self.buffer + self.fileobj.read()
+                buf = self.buffer
                 self.buffer = ""
             else:
                 buf = self.buffer[:size]
                 self.buffer = self.buffer[size:]
-                buf += self.fileobj.read(size - len(buf))
+
+        if size is None:
+            buf += self.fileobj.read()
         else:
-            if size is None:
-                buf = self.fileobj.read()
-            else:
-                buf = self.fileobj.read(size)
+            buf += self.fileobj.read(size - len(buf))
 
         self.position += len(buf)
         return buf
diff --git a/lib_pypy/_functools.py b/lib_pypy/_functools.py
--- a/lib_pypy/_functools.py
+++ b/lib_pypy/_functools.py
@@ -14,10 +14,9 @@
             raise TypeError("the first argument must be callable")
         self.func = func
         self.args = args
-        self.keywords = keywords
+        self.keywords = keywords or None
 
     def __call__(self, *fargs, **fkeywords):
-        newkeywords = self.keywords.copy()
-        newkeywords.update(fkeywords)
-        return self.func(*(self.args + fargs), **newkeywords)
-
+        if self.keywords is not None:
+            fkeywords = dict(self.keywords, **fkeywords)
+        return self.func(*(self.args + fargs), **fkeywords)
diff --git a/lib_pypy/greenlet.py b/lib_pypy/greenlet.py
--- a/lib_pypy/greenlet.py
+++ b/lib_pypy/greenlet.py
@@ -48,23 +48,23 @@
     def switch(self, *args):
         "Switch execution to this greenlet, optionally passing the values "
         "given as argument(s).  Returns the value passed when switching back."
-        return self.__switch(_continulet.switch, args)
+        return self.__switch('switch', args)
 
     def throw(self, typ=GreenletExit, val=None, tb=None):
         "raise exception in greenlet, return value passed when switching back"
-        return self.__switch(_continulet.throw, typ, val, tb)
+        return self.__switch('throw', typ, val, tb)
 
-    def __switch(target, unbound_method, *args):
+    def __switch(target, methodname, *args):
         current = getcurrent()
         #
         while not target:
             if not target.__started:
-                if unbound_method != _continulet.throw:
+                if methodname == 'switch':
                     greenlet_func = _greenlet_start
                 else:
                     greenlet_func = _greenlet_throw
                 _continulet.__init__(target, greenlet_func, *args)
-                unbound_method = _continulet.switch
+                methodname = 'switch'
                 args = ()
                 target.__started = True
                 break
@@ -75,22 +75,8 @@
             target = target.parent
         #
         try:
-            if current.__main:
-                if target.__main:
-                    # switch from main to main
-                    if unbound_method == _continulet.throw:
-                        raise args[0], args[1], args[2]
-                    (args,) = args
-                else:
-                    # enter from main to target
-                    args = unbound_method(target, *args)
-            else:
-                if target.__main:
-                    # leave to go to target=main
-                    args = unbound_method(current, *args)
-                else:
-                    # switch from non-main to non-main
-                    args = unbound_method(current, *args, to=target)
+            unbound_method = getattr(_continulet, methodname)
+            args = unbound_method(current, *args, to=target)
         except GreenletExit, e:
             args = (e,)
         finally:
@@ -110,7 +96,16 @@
 
     @property
     def gr_frame(self):
-        raise NotImplementedError("attribute 'gr_frame' of greenlet objects")
+        # xxx this doesn't work when called on either the current or
+        # the main greenlet of another thread
+        if self is getcurrent():
+            return None
+        if self.__main:
+            self = getcurrent()
+        f = _continulet.__reduce__(self)[2][0]
+        if not f:
+            return None
+        return f.f_back.f_back.f_back   # go past start(), __switch(), switch()
 
 # ____________________________________________________________
 # Internal stuff
@@ -138,8 +133,7 @@
     try:
         res = greenlet.run(*args)
     finally:
-        if greenlet.parent is not _tls.main:
-            _continuation.permute(greenlet, greenlet.parent)
+        _continuation.permute(greenlet, greenlet.parent)
     return (res,)
 
 def _greenlet_throw(greenlet, exc, value, tb):
@@ -147,5 +141,4 @@
     try:
         raise exc, value, tb
     finally:
-        if greenlet.parent is not _tls.main:
-            _continuation.permute(greenlet, greenlet.parent)
+        _continuation.permute(greenlet, greenlet.parent)
diff --git a/lib_pypy/pypy_test/test_stackless_pickling.py b/lib_pypy/pypy_test/test_stackless_pickling.py
--- a/lib_pypy/pypy_test/test_stackless_pickling.py
+++ b/lib_pypy/pypy_test/test_stackless_pickling.py
@@ -1,7 +1,3 @@
-"""
-this test should probably not run from CPython or py.py.
-I'm not entirely sure, how to do that.
-"""
 from __future__ import absolute_import
 from py.test import skip
 try:
@@ -16,11 +12,15 @@
 
 class Test_StacklessPickling:
 
+    def test_pickle_main_coroutine(self):
+        import stackless, pickle
+        s = pickle.dumps(stackless.coroutine.getcurrent())
+        print s
+        c = pickle.loads(s)
+        assert c is stackless.coroutine.getcurrent()
+
     def test_basic_tasklet_pickling(self):
-        try:
-            import stackless
-        except ImportError:
-            skip("can't load stackless and don't know why!!!")
+        import stackless
         from stackless import run, schedule, tasklet
         import pickle
 
diff --git a/lib_pypy/pyrepl/completing_reader.py b/lib_pypy/pyrepl/completing_reader.py
--- a/lib_pypy/pyrepl/completing_reader.py
+++ b/lib_pypy/pyrepl/completing_reader.py
@@ -229,7 +229,8 @@
 
     def after_command(self, cmd):
         super(CompletingReader, self).after_command(cmd)
-        if not isinstance(cmd, complete) and not isinstance(cmd, self_insert):
+        if not isinstance(cmd, self.commands['complete']) \
+           and not isinstance(cmd, self.commands['self_insert']):
             self.cmpltn_reset()
 
     def calc_screen(self):
diff --git a/lib_pypy/stackless.py b/lib_pypy/stackless.py
--- a/lib_pypy/stackless.py
+++ b/lib_pypy/stackless.py
@@ -5,7 +5,6 @@
 """
 
 
-import traceback
 import _continuation
 
 class TaskletExit(Exception):
@@ -14,34 +13,46 @@
 CoroutineExit = TaskletExit
 
 
+def _coroutine_getcurrent():
+    "Returns the current coroutine (i.e. the one which called this function)."
+    try:
+        return _tls.current_coroutine
+    except AttributeError:
+        # first call in this thread: current == main
+        return _coroutine_getmain()
+
+def _coroutine_getmain():
+    try:
+        return _tls.main_coroutine
+    except AttributeError:
+        # create the main coroutine for this thread
+        continulet = _continuation.continulet
+        main = coroutine()
+        main._frame = continulet.__new__(continulet)
+        main._is_started = -1
+        _tls.current_coroutine = _tls.main_coroutine = main
+        return _tls.main_coroutine
+
+
 class coroutine(object):
-    "we can't have continulet as a base, because continulets can't be rebound"
-    _is_main = False
+    _is_started = 0      # 0=no, 1=yes, -1=main
 
     def __init__(self):
         self._frame = None
-        self.is_zombie = False
-
-    def __getattr__(self, attr):
-        return getattr(self._frame, attr)
-
-    def __del__(self):
-        self.is_zombie = True
-        del self._frame
-        self._frame = None
 
     def bind(self, func, *argl, **argd):
         """coro.bind(f, *argl, **argd) -> None.
            binds function f to coro. f will be called with
            arguments *argl, **argd
         """
-        if self._frame is None or not self._frame.is_pending():
-            def run(c):
-                _tls.current_coroutine = self
-                return func(*argl, **argd)
-            self._frame = frame = _continuation.continulet(run)
-        else:
+        if self.is_alive:
             raise ValueError("cannot bind a bound coroutine")
+        def run(c):
+            _tls.current_coroutine = self
+            self._is_started = 1
+            return func(*argl, **argd)
+        self._is_started = 0
+        self._frame = _continuation.continulet(run)
 
     def switch(self):
         """coro.switch() -> returnvalue
@@ -49,63 +60,38 @@
            f finishes, the returnvalue is that of f, otherwise
            None is returned
         """
-        current = _getcurrent()
+        current = _coroutine_getcurrent()
         try:
-            if current._is_main:
-                if self._is_main:
-                    pass
-                else:
-                    self._frame.switch()
-            else:
-                if self._is_main:
-                    current._frame.switch()
-                else:
-                    current._frame.switch(to=self._frame)
+            current._frame.switch(to=self._frame)
         finally:
             _tls.current_coroutine = current
 
     def kill(self):
         """coro.kill() : kill coroutine coro"""
-        current = _getcurrent()
+        current = _coroutine_getcurrent()
         try:
-            if current._is_main:
-                if self._is_main:
-                    raise CoroutineExit
-                else:
-                    self._frame.throw(CoroutineExit)
-            else:
-                if self._is_main:
-                    current._frame.throw(CoroutineExit)
-                else:
-                    current._frame.throw(CoroutineExit, to=self._frame)
+            current._frame.throw(CoroutineExit, to=self._frame)
         finally:
             _tls.current_coroutine = current
 
-    def _is_alive(self):
-        if self._frame is None:
-            return False
-        return not self._frame.is_pending()
-    is_alive = property(_is_alive)
-    del _is_alive
+    @property
+    def is_alive(self):
+        return self._is_started < 0 or (
+            self._frame is not None and self._frame.is_pending())
 
-    def getcurrent():
-        """coroutine.getcurrent() -> the currently running coroutine"""
-        return _getcurrent()
-    getcurrent = staticmethod(getcurrent)
+    @property
+    def is_zombie(self):
+        return self._is_started > 0 and not self._frame.is_pending()
+
+    getcurrent = staticmethod(_coroutine_getcurrent)
 
     def __reduce__(self):
-        raise TypeError, 'pickling is not possible based upon continulets'
+        if self._is_started < 0:
+            return _coroutine_getmain, ()
+        else:
+            return type(self), (), self.__dict__
 
 
-def _getcurrent():
-    "Returns the current coroutine (i.e. the one which called this function)."
-    try:
-        return _tls.current_coroutine
-    except AttributeError:
-        # first call in this thread: current == main
-        _coroutine_create_main()
-        return _tls.current_coroutine
-
 try:
     from thread import _local
 except ImportError:
@@ -114,13 +100,8 @@
 
 _tls = _local()
 
-def _coroutine_create_main():
-    # create the main coroutine for this thread
-    _tls.current_coroutine = None
-    main_coroutine = coroutine()
-    main_coroutine._is_main = True
-    _tls.main_coroutine = main_coroutine
-    _tls.current_coroutine = main_coroutine
+
+# ____________________________________________________________
 
 
 from collections import deque
@@ -166,10 +147,7 @@
     _last_task = next
     assert not next.blocked
     if next is not current:
-        #try:
-            next.switch()
-        #except CoroutineExit:  --- they are the same anyway
-        #    raise TaskletExit
+        next.switch()
     return current
 
 def set_schedule_callback(callback):
@@ -193,34 +171,6 @@
         raise self.type, self.value, self.traceback
 
 #
-# helpers for pickling
-#
-
-_stackless_primitive_registry = {}
-
-def register_stackless_primitive(thang, retval_expr='None'):
-    import types
-    func = thang
-    if isinstance(thang, types.MethodType):
-        func = thang.im_func
-    code = func.func_code
-    _stackless_primitive_registry[code] = retval_expr
-    # It is not too nice to attach info via the code object, but
-    # I can't think of a better solution without a real transform.
-
-def rewrite_stackless_primitive(coro_state, alive, tempval):
-    flags, frame, thunk, parent = coro_state
-    while frame is not None:
-        retval_expr = _stackless_primitive_registry.get(frame.f_code)
-        if retval_expr:
-            # this tasklet needs to stop pickling here and return its value.
-            tempval = eval(retval_expr, globals(), frame.f_locals)
-            coro_state = flags, frame, thunk, parent
-            break
-        frame = frame.f_back
-    return coro_state, alive, tempval
-
-#
 #
 
 class channel(object):
@@ -372,8 +322,6 @@
         """
         return self._channel_action(None, -1)
 
-    register_stackless_primitive(receive, retval_expr='receiver.tempval')
-
     def send_exception(self, exp_type, msg):
         self.send(bomb(exp_type, exp_type(msg)))
 
@@ -390,9 +338,8 @@
         the runnables list.
         """
         return self._channel_action(msg, 1)
-            
-    register_stackless_primitive(send)
-            
+
+
 class tasklet(coroutine):
     """
     A tasklet object represents a tiny task in a Python thread.
@@ -474,7 +421,7 @@
 
         self.func = None
         coroutine.bind(self, _func)
-        back = _getcurrent()
+        back = _coroutine_getcurrent()
         coroutine.switch(self)
         self.alive = True
         _scheduler_append(self)
@@ -498,39 +445,6 @@
             raise RuntimeError, "The current tasklet cannot be removed."
             # not sure if I will revive this  " Use t=tasklet().capture()"
         _scheduler_remove(self)
-        
-    def __reduce__(self):
-        one, two, coro_state = coroutine.__reduce__(self)
-        assert one is coroutine
-        assert two == ()
-        # we want to get rid of the parent thing.
-        # for now, we just drop it
-        a, frame, c, d = coro_state
-
-        # Removing all frames related to stackless.py.
-        # They point to stuff we don't want to be pickled.
-
-        pickleframe = frame
-        while frame is not None:
-            if frame.f_code == schedule.func_code:
-                # Removing everything including and after the
-                # call to stackless.schedule()
-                pickleframe = frame.f_back
-                break
-            frame = frame.f_back
-        if d:
-            assert isinstance(d, coroutine)
-        coro_state = a, pickleframe, c, None
-        coro_state, alive, tempval = rewrite_stackless_primitive(coro_state, self.alive, self.tempval)
-        inst_dict = self.__dict__.copy()
-        inst_dict.pop('tempval', None)
-        return self.__class__, (), (coro_state, alive, tempval, inst_dict)
-
-    def __setstate__(self, (coro_state, alive, tempval, inst_dict)):
-        coroutine.__setstate__(self, coro_state)
-        self.__dict__.update(inst_dict)
-        self.alive = alive
-        self.tempval = tempval
 
 def getmain():
     """
@@ -619,30 +533,7 @@
     global _last_task
     _global_task_id = 0
     _main_tasklet = coroutine.getcurrent()
-    try:
-        _main_tasklet.__class__ = tasklet
-    except TypeError: # we are running pypy-c
-        class TaskletProxy(object):
-            """TaskletProxy is needed to give the _main_coroutine tasklet behaviour"""
-            def __init__(self, coro):
-                self._coro = coro
-
-            def __getattr__(self,attr):
-                return getattr(self._coro,attr)
-
-            def __str__(self):
-                return '<tasklet %s a:%s>' % (self._task_id, self.is_alive)
-
-            def __reduce__(self):
-                return getmain, ()
-
-            __repr__ = __str__
-
-
-        global _main_coroutine
-        _main_coroutine = _main_tasklet
-        _main_tasklet = TaskletProxy(_main_tasklet)
-        assert _main_tasklet.is_alive and not _main_tasklet.is_zombie
+    _main_tasklet.__class__ = tasklet         # XXX HAAAAAAAAAAAAAAAAAAAAACK
     _last_task = _main_tasklet
     tasklet._init.im_func(_main_tasklet, label='main')
     _squeue = deque()
diff --git a/py/_code/source.py b/py/_code/source.py
--- a/py/_code/source.py
+++ b/py/_code/source.py
@@ -139,7 +139,7 @@
                 trysource = self[start:end]
                 if trysource.isparseable():
                     return start, end
-        return start, end
+        return start, len(self)
 
     def getblockend(self, lineno):
         # XXX
diff --git a/pypy/annotation/annrpython.py b/pypy/annotation/annrpython.py
--- a/pypy/annotation/annrpython.py
+++ b/pypy/annotation/annrpython.py
@@ -149,7 +149,7 @@
         desc = olddesc.bind_self(classdef)
         args = self.bookkeeper.build_args("simple_call", args_s[:])
         desc.consider_call_site(self.bookkeeper, desc.getcallfamily(), [desc],
-            args, annmodel.s_ImpossibleValue)
+            args, annmodel.s_ImpossibleValue, None)
         result = []
         def schedule(graph, inputcells):
             result.append((graph, inputcells))
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -209,8 +209,8 @@
                 self.consider_call_site(call_op)
 
             for pbc, args_s in self.emulated_pbc_calls.itervalues():
-                self.consider_call_site_for_pbc(pbc, 'simple_call', 
-                                                args_s, s_ImpossibleValue)
+                self.consider_call_site_for_pbc(pbc, 'simple_call',
+                                                args_s, s_ImpossibleValue, None)
             self.emulated_pbc_calls = {}
         finally:
             self.leave()
@@ -257,18 +257,18 @@
             args_s = [lltype_to_annotation(adtmeth.ll_ptrtype)] + args_s
         if isinstance(s_callable, SomePBC):
             s_result = binding(call_op.result, s_ImpossibleValue)
-            self.consider_call_site_for_pbc(s_callable,
-                                            call_op.opname,
-                                            args_s, s_result)
+            self.consider_call_site_for_pbc(s_callable, call_op.opname, args_s,
+                                            s_result, call_op)
 
-    def consider_call_site_for_pbc(self, s_callable, opname, args_s, s_result):
+    def consider_call_site_for_pbc(self, s_callable, opname, args_s, s_result,
+                                   call_op):
         descs = list(s_callable.descriptions)
         if not descs:
             return
         family = descs[0].getcallfamily()
         args = self.build_args(opname, args_s)
         s_callable.getKind().consider_call_site(self, family, descs, args,
-                                                s_result)
+                                                s_result, call_op)
 
     def getuniqueclassdef(self, cls):
         """Get the ClassDef associated with the given user cls.
@@ -656,6 +656,7 @@
                 whence = None
             else:
                 whence = emulated # callback case
+            op = None
             s_previous_result = s_ImpossibleValue
 
         def schedule(graph, inputcells):
@@ -663,7 +664,7 @@
 
         results = []
         for desc in descs:
-            results.append(desc.pycall(schedule, args, s_previous_result))
+            results.append(desc.pycall(schedule, args, s_previous_result, op))
         s_result = unionof(*results)
         return s_result
 
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -255,7 +255,11 @@
             raise TypeError, "signature mismatch: %s" % e.getmsg(self.name)
         return inputcells
 
-    def specialize(self, inputcells):
+    def specialize(self, inputcells, op=None):
+        if (op is None and
+            getattr(self.bookkeeper, "position_key", None) is not None):
+            _, block, i = self.bookkeeper.position_key
+            op = block.operations[i]
         if self.specializer is None:
             # get the specializer based on the tag of the 'pyobj'
             # (if any), according to the current policy
@@ -269,11 +273,14 @@
                 enforceargs = Sig(*enforceargs)
                 self.pyobj._annenforceargs_ = enforceargs
             enforceargs(self, inputcells) # can modify inputcells in-place
-        return self.specializer(self, inputcells)
+        if getattr(self.pyobj, '_annspecialcase_', '').endswith("call_location"):
+            return self.specializer(self, inputcells, op)
+        else:
+            return self.specializer(self, inputcells)
 
-    def pycall(self, schedule, args, s_previous_result):
+    def pycall(self, schedule, args, s_previous_result, op=None):
         inputcells = self.parse_arguments(args)
-        result = self.specialize(inputcells)
+        result = self.specialize(inputcells, op)
         if isinstance(result, FunctionGraph):
             graph = result         # common case
             # if that graph has a different signature, we need to re-parse
@@ -296,17 +303,17 @@
                                              None,       # selfclassdef
                                              name)
 
-    def consider_call_site(bookkeeper, family, descs, args, s_result):
+    def consider_call_site(bookkeeper, family, descs, args, s_result, op):
         shape = rawshape(args)
-        row = FunctionDesc.row_to_consider(descs, args)
+        row = FunctionDesc.row_to_consider(descs, args, op)
         family.calltable_add_row(shape, row)
     consider_call_site = staticmethod(consider_call_site)
 
-    def variant_for_call_site(bookkeeper, family, descs, args):
+    def variant_for_call_site(bookkeeper, family, descs, args, op):
         shape = rawshape(args)
         bookkeeper.enter(None)
         try:
-            row = FunctionDesc.row_to_consider(descs, args)
+            row = FunctionDesc.row_to_consider(descs, args, op)
         finally:
             bookkeeper.leave()
         index = family.calltable_lookup_row(shape, row)
@@ -316,7 +323,7 @@
     def rowkey(self):
         return self
 
-    def row_to_consider(descs, args):
+    def row_to_consider(descs, args, op):
         # see comments in CallFamily
         from pypy.annotation.model import s_ImpossibleValue
         row = {}
@@ -324,7 +331,7 @@
             def enlist(graph, ignore):
                 row[desc.rowkey()] = graph
                 return s_ImpossibleValue   # meaningless
-            desc.pycall(enlist, args, s_ImpossibleValue)
+            desc.pycall(enlist, args, s_ImpossibleValue, op)
         return row
     row_to_consider = staticmethod(row_to_consider)
 
@@ -521,7 +528,7 @@
                             "specialization" % (self.name,))
         return self.getclassdef(None)
 
-    def pycall(self, schedule, args, s_previous_result):
+    def pycall(self, schedule, args, s_previous_result, op=None):
         from pypy.annotation.model import SomeInstance, SomeImpossibleValue
         if self.specialize:
             if self.specialize == 'specialize:ctr_location':
@@ -664,7 +671,7 @@
             cdesc = cdesc.basedesc
         return s_result     # common case
 
-    def consider_call_site(bookkeeper, family, descs, args, s_result):
+    def consider_call_site(bookkeeper, family, descs, args, s_result, op):
         from pypy.annotation.model import SomeInstance, SomePBC, s_None
         if len(descs) == 1:
             # call to a single class, look at the result annotation
@@ -709,7 +716,7 @@
             initdescs[0].mergecallfamilies(*initdescs[1:])
             initfamily = initdescs[0].getcallfamily()
             MethodDesc.consider_call_site(bookkeeper, initfamily, initdescs,
-                                          args, s_None)
+                                          args, s_None, op)
     consider_call_site = staticmethod(consider_call_site)
 
     def getallbases(self):
@@ -782,13 +789,13 @@
     def getuniquegraph(self):
         return self.funcdesc.getuniquegraph()
 
-    def pycall(self, schedule, args, s_previous_result):
+    def pycall(self, schedule, args, s_previous_result, op=None):
         from pypy.annotation.model import SomeInstance
         if self.selfclassdef is None:
             raise Exception("calling %r" % (self,))
         s_instance = SomeInstance(self.selfclassdef, flags = self.flags)
         args = args.prepend(s_instance)
-        return self.funcdesc.pycall(schedule, args, s_previous_result)
+        return self.funcdesc.pycall(schedule, args, s_previous_result, op)
 
     def bind_under(self, classdef, name):
         self.bookkeeper.warning("rebinding an already bound %r" % (self,))
@@ -801,10 +808,10 @@
                                              self.name,
                                              flags)
 
-    def consider_call_site(bookkeeper, family, descs, args, s_result):
+    def consider_call_site(bookkeeper, family, descs, args, s_result, op):
         shape = rawshape(args, nextra=1)     # account for the extra 'self'
         funcdescs = [methoddesc.funcdesc for methoddesc in descs]
-        row = FunctionDesc.row_to_consider(descs, args)
+        row = FunctionDesc.row_to_consider(descs, args, op)
         family.calltable_add_row(shape, row)
     consider_call_site = staticmethod(consider_call_site)
 
@@ -956,16 +963,16 @@
         return '<MethodOfFrozenDesc %r of %r>' % (self.funcdesc,
                                                   self.frozendesc)
 
-    def pycall(self, schedule, args, s_previous_result):
+    def pycall(self, schedule, args, s_previous_result, op=None):
         from pypy.annotation.model import SomePBC
         s_self = SomePBC([self.frozendesc])
         args = args.prepend(s_self)
-        return self.funcdesc.pycall(schedule, args, s_previous_result)
+        return self.funcdesc.pycall(schedule, args, s_previous_result, op)
 
-    def consider_call_site(bookkeeper, family, descs, args, s_result):
+    def consider_call_site(bookkeeper, family, descs, args, s_result, op):
         shape = rawshape(args, nextra=1)    # account for the extra 'self'
         funcdescs = [mofdesc.funcdesc for mofdesc in descs]
-        row = FunctionDesc.row_to_consider(descs, args)
+        row = FunctionDesc.row_to_consider(descs, args, op)
         family.calltable_add_row(shape, row)
     consider_call_site = staticmethod(consider_call_site)
 
diff --git a/pypy/annotation/policy.py b/pypy/annotation/policy.py
--- a/pypy/annotation/policy.py
+++ b/pypy/annotation/policy.py
@@ -1,7 +1,7 @@
 # base annotation policy for specialization
 from pypy.annotation.specialize import default_specialize as default
 from pypy.annotation.specialize import specialize_argvalue, specialize_argtype, specialize_arglistitemtype
-from pypy.annotation.specialize import memo
+from pypy.annotation.specialize import memo, specialize_call_location
 # for some reason, model must be imported first,
 # or we create a cycle.
 from pypy.annotation import model as annmodel
@@ -75,6 +75,7 @@
     specialize__arg = staticmethod(specialize_argvalue) # specialize:arg(N)
     specialize__argtype = staticmethod(specialize_argtype) # specialize:argtype(N)
     specialize__arglistitemtype = staticmethod(specialize_arglistitemtype)
+    specialize__call_location = staticmethod(specialize_call_location)
 
     def specialize__ll(pol, *args):
         from pypy.rpython.annlowlevel import LowLevelAnnotatorPolicy
diff --git a/pypy/annotation/specialize.py b/pypy/annotation/specialize.py
--- a/pypy/annotation/specialize.py
+++ b/pypy/annotation/specialize.py
@@ -370,3 +370,7 @@
     else:
         key = s.listdef.listitem.s_value.knowntype
     return maybe_star_args(funcdesc, key, args_s)
+
+def specialize_call_location(funcdesc, args_s, op):
+    assert op is not None
+    return maybe_star_args(funcdesc, op, args_s)
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -1099,8 +1099,8 @@
         allocdesc = a.bookkeeper.getdesc(alloc)
         s_C1 = a.bookkeeper.immutablevalue(C1)
         s_C2 = a.bookkeeper.immutablevalue(C2)
-        graph1 = allocdesc.specialize([s_C1])
-        graph2 = allocdesc.specialize([s_C2])
+        graph1 = allocdesc.specialize([s_C1], None)
+        graph2 = allocdesc.specialize([s_C2], None)
         assert a.binding(graph1.getreturnvar()).classdef == C1df
         assert a.binding(graph2.getreturnvar()).classdef == C2df
         assert graph1 in a.translator.graphs
@@ -1135,8 +1135,8 @@
         allocdesc = a.bookkeeper.getdesc(alloc)
         s_C1 = a.bookkeeper.immutablevalue(C1)
         s_C2 = a.bookkeeper.immutablevalue(C2)
-        graph1 = allocdesc.specialize([s_C1, s_C2])
-        graph2 = allocdesc.specialize([s_C2, s_C2])
+        graph1 = allocdesc.specialize([s_C1, s_C2], None)
+        graph2 = allocdesc.specialize([s_C2, s_C2], None)
         assert a.binding(graph1.getreturnvar()).classdef == C1df
         assert a.binding(graph2.getreturnvar()).classdef == C2df
         assert graph1 in a.translator.graphs
@@ -1194,6 +1194,19 @@
         assert len(executedesc._cache[(0, 'star', 2)].startblock.inputargs) == 4
         assert len(executedesc._cache[(1, 'star', 3)].startblock.inputargs) == 5
 
+    def test_specialize_call_location(self):
+        def g(a):
+            return a
+        g._annspecialcase_ = "specialize:call_location"
+        def f(x):
+            return g(x)
+        f._annspecialcase_ = "specialize:argtype(0)"
+        def h(y):
+            w = f(y)
+            return int(f(str(y))) + w
+        a = self.RPythonAnnotator()
+        assert a.build_types(h, [int]) == annmodel.SomeInteger()
+
     def test_assert_list_doesnt_lose_info(self):
         class T(object):
             pass
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -125,6 +125,7 @@
 
     ###  Manipulation  ###
 
+    @jit.look_inside_iff(lambda self: not self._dont_jit)
     def unpack(self): # slowish
         "Return a ([w1,w2...], {'kw':w3...}) pair."
         kwds_w = {}
@@ -245,6 +246,8 @@
 
     ###  Parsing for function calls  ###
 
+    # XXX: this should be @jit.look_inside_iff, but we need key word arguments,
+    # and it doesn't support them for now.
     def _match_signature(self, w_firstarg, scope_w, signature, defaults_w=None,
                          blindargs=0):
         """Parse args and kwargs according to the signature of a code object,
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -1,5 +1,4 @@
 import sys
-from pypy.interpreter.miscutils import Stack
 from pypy.interpreter.error import OperationError
 from pypy.rlib.rarithmetic import LONG_BIT
 from pypy.rlib.unroll import unrolling_iterable
diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py
--- a/pypy/interpreter/miscutils.py
+++ b/pypy/interpreter/miscutils.py
@@ -2,154 +2,6 @@
 Miscellaneous utilities.
 """
 
-import types
-
-from pypy.rlib.rarithmetic import r_uint
-
-class RootStack:
-    pass
-
-class Stack(RootStack):
-    """Utility class implementing a stack."""
-
-    _annspecialcase_ = "specialize:ctr_location" # polymorphic
-
-    def __init__(self):
-        self.items = []
-
-    def clone(self):
-        s = self.__class__()
-        for item in self.items:
-            try:
-                item = item.clone()
-            except AttributeError:
-                pass
-            s.push(item)
-        return s
-
-    def push(self, item):
-        self.items.append(item)
-
-    def pop(self):
-        return self.items.pop()
-
-    def drop(self, n):
-        if n > 0:
-            del self.items[-n:]
-
-    def top(self, position=0):
-        """'position' is 0 for the top of the stack, 1 for the item below,
-        and so on.  It must not be negative."""
-        if position < 0:
-            raise ValueError, 'negative stack position'
-        if position >= len(self.items):
-            raise IndexError, 'not enough entries in stack'
-        return self.items[~position]
-
-    def set_top(self, value, position=0):
-        """'position' is 0 for the top of the stack, 1 for the item below,
-        and so on.  It must not be negative."""
-        if position < 0:
-            raise ValueError, 'negative stack position'
-        if position >= len(self.items):
-            raise IndexError, 'not enough entries in stack'
-        self.items[~position] = value
-
-    def depth(self):
-        return len(self.items)
-
-    def empty(self):
-        return len(self.items) == 0
-
-
-class FixedStack(RootStack):
-    _annspecialcase_ = "specialize:ctr_location" # polymorphic
-
-    # unfortunately, we have to re-do everything
-    def __init__(self):
-        pass
-
-    def setup(self, stacksize):
-        self.ptr = r_uint(0) # we point after the last element
-        self.items = [None] * stacksize
-
-    def clone(self):
-        # this is only needed if we support flow space
-        s = self.__class__()
-        s.setup(len(self.items))
-        for item in self.items[:self.ptr]:
-            try:
-                item = item.clone()
-            except AttributeError:
-                pass
-            s.push(item)
-        return s
-
-    def push(self, item):
-        ptr = self.ptr
-        self.items[ptr] = item
-        self.ptr = ptr + 1
-
-    def pop(self):
-        ptr = self.ptr - 1
-        ret = self.items[ptr]   # you get OverflowError if the stack is empty
-        self.items[ptr] = None
-        self.ptr = ptr
-        return ret
-
-    def drop(self, n):
-        while n > 0:
-            n -= 1
-            self.ptr -= 1
-            self.items[self.ptr] = None
-
-    def top(self, position=0):
-        # for a fixed stack, we assume correct indices
-        return self.items[self.ptr + ~position]
-
-    def set_top(self, value, position=0):
-        # for a fixed stack, we assume correct indices
-        self.items[self.ptr + ~position] = value
-
-    def depth(self):
-        return self.ptr
-
-    def empty(self):
-        return not self.ptr
-
-
-class InitializedClass(type):
-    """NOT_RPYTHON.  A meta-class that allows a class to initialize itself (or
-    its subclasses) by calling __initclass__() as a class method."""
-    def __init__(self, name, bases, dict):
-        super(InitializedClass, self).__init__(name, bases, dict)
-        for basecls in self.__mro__:
-            raw = basecls.__dict__.get('__initclass__')
-            if isinstance(raw, types.FunctionType):
-                raw(self)   # call it as a class method
-
-
-class RwDictProxy(object):
-    """NOT_RPYTHON.  A dict-like class standing for 'cls.__dict__', to work
-    around the fact that the latter is a read-only proxy for new-style
-    classes."""
-    
-    def __init__(self, cls):
-        self.cls = cls
-
-    def __getitem__(self, attr):
-        return self.cls.__dict__[attr]
-
-    def __setitem__(self, attr, value):
-        setattr(self.cls, attr, value)
-
-    def __contains__(self, value):
-        return value in self.cls.__dict__
-
-    def items(self):
-        return self.cls.__dict__.items()
-
-
 class ThreadLocals:
     """Pseudo thread-local storage, for 'space.threadlocals'.
     This is not really thread-local at all; the intention is that the PyPy
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -66,7 +66,7 @@
         make_sure_not_resized(self.locals_stack_w)
         check_nonneg(self.nlocals)
         #
-        if space.config.objspace.honor__builtins__ and w_globals is not None:
+        if space.config.objspace.honor__builtins__:
             self.builtin = space.builtin.pick_builtin(w_globals)
         # regular functions always have CO_OPTIMIZED and CO_NEWLOCALS.
         # class bodies only have CO_NEWLOCALS.
diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -225,14 +225,16 @@
             raise DoneException
         self.consume_whitespace()
 
-    def consume_whitespace(self):
+    def consume_whitespace(self, newline_ok=False):
         while 1:
             c = self.getc()
             if c in whitespace:
                 self.pos += 1
                 continue
-            elif c == '\\':
-                self.pos += 1
+            elif c == '\\' or newline_ok:
+                slash = c == '\\'
+                if slash:
+                    self.pos += 1
                 c = self.getc()
                 if c == '\n':
                     self.pos += 1
@@ -243,8 +245,10 @@
                     if self.getc() == '\n':
                         self.pos += 1
                         self.atbol()
+                elif slash:
+                    raise DoneException
                 else:
-                    raise DoneException
+                    return
             else:
                 return
 
@@ -281,7 +285,7 @@
             return
         else:
             self.pos += 1
-            self.consume_whitespace()
+            self.consume_whitespace(paren_list)
             if paren_list and self.getc() == ')':
                 self.pos += 1
                 return # Handles trailing comma inside parenthesis
diff --git a/pypy/interpreter/pyparser/test/test_futureautomaton.py b/pypy/interpreter/pyparser/test/test_futureautomaton.py
--- a/pypy/interpreter/pyparser/test/test_futureautomaton.py
+++ b/pypy/interpreter/pyparser/test/test_futureautomaton.py
@@ -3,7 +3,7 @@
 from pypy.tool import stdlib___future__ as fut
 
 def run(s):
-    f = future.FutureAutomaton(future.futureFlags_2_5, s)
+    f = future.FutureAutomaton(future.futureFlags_2_7, s)
     try:
         f.start()
     except future.DoneException:
@@ -113,6 +113,14 @@
     assert f.lineno == 1
     assert f.col_offset == 0
 
+def test_paren_with_newline():
+    s = 'from __future__ import (division,\nabsolute_import)\n'
+    f = run(s)
+    assert f.pos == len(s)
+    assert f.flags == (fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_ABSOLUTE_IMPORT)
+    assert f.lineno == 1
+    assert f.col_offset == 0
+
 def test_multiline():
     s = '"abc" #def\n  #ghi\nfrom  __future__ import (division as b, generators,)\nfrom __future__ import with_statement\n'
     f = run(s)
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -496,6 +496,16 @@
         u = lltype.cast_opaque_ptr(lltype.Ptr(rstr.UNICODE), string)
         u.chars[index] = unichr(newvalue)
 
+    def bh_copystrcontent(self, src, dst, srcstart, dststart, length):
+        src = lltype.cast_opaque_ptr(lltype.Ptr(rstr.STR), src)
+        dst = lltype.cast_opaque_ptr(lltype.Ptr(rstr.STR), dst)
+        rstr.copy_string_contents(src, dst, srcstart, dststart, length)
+
+    def bh_copyunicodecontent(self, src, dst, srcstart, dststart, length):
+        src = lltype.cast_opaque_ptr(lltype.Ptr(rstr.UNICODE), src)
+        dst = lltype.cast_opaque_ptr(lltype.Ptr(rstr.UNICODE), dst)
+        rstr.copy_unicode_contents(src, dst, srcstart, dststart, length)
+
     def bh_call_i(self, func, calldescr, args_i, args_r, args_f):
         assert isinstance(calldescr, BaseIntCallDescr)
         if not we_are_translated():
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -78,7 +78,7 @@
         Optionally, return a ``ops_offset`` dictionary.  See the docstring of
         ``compiled_loop`` for more informations about it.
         """
-        raise NotImplementedError    
+        raise NotImplementedError
 
     def dump_loop_token(self, looptoken):
         """Print a disassembled version of looptoken to stdout"""
@@ -298,6 +298,10 @@
         raise NotImplementedError
     def bh_unicodesetitem(self, string, index, newvalue):
         raise NotImplementedError
+    def bh_copystrcontent(self, src, dst, srcstart, dststart, length):
+        raise NotImplementedError
+    def bh_copyunicodecontent(self, src, dst, srcstart, dststart, length):
+        raise NotImplementedError
 
     def force(self, force_token):
         raise NotImplementedError
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -1158,6 +1158,12 @@
             return SpaceOperation('%s_assert_green' % kind, args, None)
         elif oopspec_name == 'jit.current_trace_length':
             return SpaceOperation('current_trace_length', [], op.result)
+        elif oopspec_name == 'jit.isconstant':
+            kind = getkind(args[0].concretetype)
+            return SpaceOperation('%s_isconstant' % kind, args, op.result)
+        elif oopspec_name == 'jit.isvirtual':
+            kind = getkind(args[0].concretetype)
+            return SpaceOperation('%s_isvirtual' % kind, args, op.result)
         else:
             raise AssertionError("missing support for %r" % oopspec_name)
 
@@ -1415,6 +1421,14 @@
         else:
             assert 0, "args[0].concretetype must be STR or UNICODE"
         #
+        if oopspec_name == 'stroruni.copy_contents':
+            if SoU.TO == rstr.STR:
+                new_op = 'copystrcontent'
+            elif SoU.TO == rstr.UNICODE:
+                new_op = 'copyunicodecontent'
+            else:
+                assert 0
+            return SpaceOperation(new_op, args, op.result)
         if oopspec_name == "stroruni.equal":
             for otherindex, othername, argtypes, resulttype in [
                 (EffectInfo.OS_STREQ_SLICE_CHECKNULL,
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -835,6 +835,18 @@
     def bhimpl_current_trace_length():
         return -1
 
+    @arguments("i", returns="i")
+    def bhimpl_int_isconstant(x):
+        return False
+
+    @arguments("r", returns="i")
+    def bhimpl_ref_isconstant(x):
+        return False
+
+    @arguments("r", returns="i")
+    def bhimpl_ref_isvirtual(x):
+        return False
+
     # ----------
     # the main hints and recursive calls
 
@@ -1224,6 +1236,9 @@
     @arguments("cpu", "r", "i", "i")
     def bhimpl_strsetitem(cpu, string, index, newchr):
         cpu.bh_strsetitem(string, index, newchr)
+    @arguments("cpu", "r", "r", "i", "i", "i")
+    def bhimpl_copystrcontent(cpu, src, dst, srcstart, dststart, length):
+        cpu.bh_copystrcontent(src, dst, srcstart, dststart, length)
 
     @arguments("cpu", "i", returns="r")
     def bhimpl_newunicode(cpu, length):
@@ -1237,6 +1252,9 @@
     @arguments("cpu", "r", "i", "i")
     def bhimpl_unicodesetitem(cpu, unicode, index, newchr):
         cpu.bh_unicodesetitem(unicode, index, newchr)
+    @arguments("cpu", "r", "r", "i", "i", "i")
+    def bhimpl_copyunicodecontent(cpu, src, dst, srcstart, dststart, length):
+        cpu.bh_copyunicodecontent(src, dst, srcstart, dststart, length)
 
     @arguments(returns=(longlong.is_64_bit and "i" or "f"))
     def bhimpl_ll_read_timestamp():
@@ -1441,7 +1459,7 @@
 def resume_in_blackhole(metainterp_sd, jitdriver_sd, resumedescr,
                         all_virtuals=None):
     from pypy.jit.metainterp.resume import blackhole_from_resumedata
-    debug_start('jit-blackhole')
+    #debug_start('jit-blackhole')
     metainterp_sd.profiler.start_blackhole()
     blackholeinterp = blackhole_from_resumedata(
         metainterp_sd.blackholeinterpbuilder,
@@ -1460,12 +1478,12 @@
         _run_forever(blackholeinterp, current_exc)
     finally:
         metainterp_sd.profiler.end_blackhole()
-        debug_stop('jit-blackhole')
+        #debug_stop('jit-blackhole')
 
 def convert_and_run_from_pyjitpl(metainterp, raising_exception=False):
     # Get a chain of blackhole interpreters and fill them by copying
     # 'metainterp.framestack'.
-    debug_start('jit-blackhole')
+    #debug_start('jit-blackhole')
     metainterp_sd = metainterp.staticdata
     metainterp_sd.profiler.start_blackhole()
     nextbh = None
@@ -1488,4 +1506,4 @@
         _run_forever(firstbh, current_exc)
     finally:
         metainterp_sd.profiler.end_blackhole()
-        debug_stop('jit-blackhole')
+        #debug_stop('jit-blackhole')
diff --git a/pypy/jit/metainterp/heapcache.py b/pypy/jit/metainterp/heapcache.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/heapcache.py
@@ -0,0 +1,210 @@
+from pypy.jit.metainterp.history import ConstInt
+from pypy.jit.metainterp.resoperation import rop
+
+
+class HeapCache(object):
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        # contains boxes where the class is already known
+        self.known_class_boxes = {}
+        # store the boxes that contain newly allocated objects, this maps the
+        # boxes to a bool, the bool indicates whether or not the object has
+        # escaped the trace or not (True means the box never escaped, False
+        # means it did escape), its presences in the mapping shows that it was
+        # allocated inside the trace
+        self.new_boxes = {}
+        # Tracks which boxes should be marked as escaped when the key box
+        # escapes.
+        self.dependencies = {}
+        # contains frame boxes that are not virtualizables
+        self.nonstandard_virtualizables = {}
+        # heap cache
+        # maps descrs to {from_box, to_box} dicts
+        self.heap_cache = {}
+        # heap array cache
+        # maps descrs to {index: {from_box: to_box}} dicts
+        self.heap_array_cache = {}
+        # cache the length of arrays
+        self.length_cache = {}
+
+    def invalidate_caches(self, opnum, descr, argboxes):
+        self.mark_escaped(opnum, argboxes)
+        self.clear_caches(opnum, descr, argboxes)
+
+    def mark_escaped(self, opnum, argboxes):
+        idx = 0
+        if opnum == rop.SETFIELD_GC:
+            assert len(argboxes) == 2
+            box, valuebox = argboxes
+            if self.is_unescaped(box) and self.is_unescaped(valuebox):
+                self.dependencies.setdefault(box, []).append(valuebox)
+            else:
+                self._escape(valuebox)
+        # GETFIELD_GC doesn't escape it's argument
+        elif opnum != rop.GETFIELD_GC:
+            for box in argboxes:
+                # setarrayitem_gc don't escape its first argument
+                if not (idx == 0 and opnum in [rop.SETARRAYITEM_GC]):
+                    self._escape(box)
+                idx += 1
+
+    def _escape(self, box):
+        if box in self.new_boxes:
+            self.new_boxes[box] = False
+        if box in self.dependencies:
+            for dep in self.dependencies[box]:
+                self._escape(dep)
+            del self.dependencies[box]
+
+    def clear_caches(self, opnum, descr, argboxes):
+        if opnum == rop.SETFIELD_GC:
+            return
+        if opnum == rop.SETARRAYITEM_GC:
+            return
+        if opnum == rop.SETFIELD_RAW:
+            return
+        if opnum == rop.SETARRAYITEM_RAW:
+            return
+        if rop._OVF_FIRST <= opnum <= rop._OVF_LAST:
+            return
+        if rop._NOSIDEEFFECT_FIRST <= opnum <= rop._NOSIDEEFFECT_LAST:
+            return
+        if opnum == rop.CALL or opnum == rop.CALL_LOOPINVARIANT:
+            effectinfo = descr.get_extra_info()
+            ef = effectinfo.extraeffect
+            if ef == effectinfo.EF_LOOPINVARIANT or \
+               ef == effectinfo.EF_ELIDABLE_CANNOT_RAISE or \
+               ef == effectinfo.EF_ELIDABLE_CAN_RAISE:
+                return
+            # A special case for ll_arraycopy, because it is so common, and its
+            # effects are so well defined.
+            elif effectinfo.oopspecindex == effectinfo.OS_ARRAYCOPY:
+                # The destination box
+                if argboxes[2] in self.new_boxes:
+                    # XXX: no descr here so we invalidate any of them, not just
+                    # of the correct type
+                    # XXX: in theory the indices of the copy could be looked at
+                    # as well
+                    for descr, cache in self.heap_array_cache.iteritems():
+                        for idx, cache in cache.iteritems():
+                            for frombox in cache.keys():
+                                if frombox not in self.new_boxes:
+                                    del cache[frombox]
+                    return
+
+        self.heap_cache.clear()
+        self.heap_array_cache.clear()
+
+    def is_class_known(self, box):
+        return box in self.known_class_boxes
+
+    def class_now_known(self, box):
+        self.known_class_boxes[box] = None
+
+    def is_nonstandard_virtualizable(self, box):
+        return box in self.nonstandard_virtualizables
+
+    def nonstandard_virtualizables_now_known(self, box):
+        self.nonstandard_virtualizables[box] = None
+
+    def is_unescaped(self, box):
+        return self.new_boxes.get(box, False)
+
+    def new(self, box):
+        self.new_boxes[box] = True
+
+    def new_array(self, box, lengthbox):
+        self.new(box)
+        self.arraylen_now_known(box, lengthbox)
+
+    def getfield(self, box, descr):
+        d = self.heap_cache.get(descr, None)
+        if d:
+            tobox = d.get(box, None)
+            if tobox:
+                return tobox
+        return None
+
+    def getfield_now_known(self, box, descr, fieldbox):
+        self.heap_cache.setdefault(descr, {})[box] = fieldbox
+
+    def setfield(self, box, descr, fieldbox):
+        d = self.heap_cache.get(descr, None)
+        new_d = self._do_write_with_aliasing(d, box, fieldbox)
+        self.heap_cache[descr] = new_d
+
+    def _do_write_with_aliasing(self, d, box, fieldbox):
+        # slightly subtle logic here
+        # a write to an arbitrary box, all other boxes can alias this one
+        if not d or box not in self.new_boxes:
+            # therefore we throw away the cache
+            return {box: fieldbox}
+        # the object we are writing to is freshly allocated
+        # only remove some boxes from the cache
+        new_d = {}
+        for frombox, tobox in d.iteritems():
+            # the other box is *also* freshly allocated
+            # therefore frombox and box *must* contain different objects
+            # thus we can keep it in the cache
+            if frombox in self.new_boxes:
+                new_d[frombox] = tobox
+        new_d[box] = fieldbox
+        return new_d
+
+    def getarrayitem(self, box, descr, indexbox):
+        if not isinstance(indexbox, ConstInt):
+            return
+        index = indexbox.getint()
+        cache = self.heap_array_cache.get(descr, None)
+        if cache:
+            indexcache = cache.get(index, None)
+            if indexcache is not None:
+                return indexcache.get(box, None)
+
+    def getarrayitem_now_known(self, box, descr, indexbox, valuebox):
+        if not isinstance(indexbox, ConstInt):
+            return
+        index = indexbox.getint()
+        cache = self.heap_array_cache.setdefault(descr, {})
+        indexcache = cache.get(index, None)
+        if indexcache is not None:
+            indexcache[box] = valuebox
+        else:
+            cache[index] = {box: valuebox}
+
+    def setarrayitem(self, box, descr, indexbox, valuebox):
+        if not isinstance(indexbox, ConstInt):
+            cache = self.heap_array_cache.get(descr, None)
+            if cache is not None:
+                cache.clear()
+            return
+        index = indexbox.getint()
+        cache = self.heap_array_cache.setdefault(descr, {})
+        indexcache = cache.get(index, None)
+        cache[index] = self._do_write_with_aliasing(indexcache, box, valuebox)
+
+    def arraylen(self, box):
+        return self.length_cache.get(box, None)
+
+    def arraylen_now_known(self, box, lengthbox):
+        self.length_cache[box] = lengthbox
+
+    def _replace_box(self, d, oldbox, newbox):
+        new_d = {}
+        for frombox, tobox in d.iteritems():
+            if frombox is oldbox:
+                frombox = newbox
+            if tobox is oldbox:
+                tobox = newbox
+            new_d[frombox] = tobox
+        return new_d
+
+    def replace_box(self, oldbox, newbox):
+        for descr, d in self.heap_cache.iteritems():
+            self.heap_cache[descr] = self._replace_box(d, oldbox, newbox)
+        for descr, d in self.heap_array_cache.iteritems():
+            for index, cache in d.iteritems():
+                d[index] = self._replace_box(cache, oldbox, newbox)
+        self.length_cache = self._replace_box(self.length_cache, oldbox, newbox)
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -71,7 +71,7 @@
             guards.append(op)
         elif self.level == LEVEL_KNOWNCLASS:
             op = ResOperation(rop.GUARD_NONNULL, [box], None)
-            guards.append(op)            
+            guards.append(op)
             op = ResOperation(rop.GUARD_CLASS, [box, self.known_class], None)
             guards.append(op)
         else:
@@ -112,7 +112,7 @@
                     self.lenbound.bound.intersect(other.lenbound.bound)
                 else:
                     self.lenbound = other.lenbound.clone()
-                    
+
 
     def force_box(self):
         return self.box
@@ -146,7 +146,7 @@
         assert isinstance(constbox, Const)
         self.box = constbox
         self.level = LEVEL_CONSTANT
-        
+
         if isinstance(constbox, ConstInt):
             val = constbox.getint()
             self.intbound = IntBound(val, val)
@@ -223,6 +223,9 @@
     def __init__(self, box):
         self.make_constant(box)
 
+    def __repr__(self):
+        return 'Constant(%r)' % (self.box,)
+
 CONST_0      = ConstInt(0)
 CONST_1      = ConstInt(1)
 CVAL_ZERO    = ConstantValue(CONST_0)
@@ -378,7 +381,7 @@
         new.set_optimizations(optimizations)
         new.quasi_immutable_deps = self.quasi_immutable_deps
         return new
-        
+
     def produce_potential_short_preamble_ops(self, sb):
         raise NotImplementedError('This is implemented in unroll.UnrollableOptimizer')
 
@@ -505,9 +508,9 @@
         if op.returns_bool_result():
             self.bool_boxes[self.getvalue(op.result)] = None
         self._emit_operation(op)
-        
+
     @specialize.argtype(0)
-    def _emit_operation(self, op):        
+    def _emit_operation(self, op):
         for i in range(op.numargs()):
             arg = op.getarg(i)
             try:
@@ -568,7 +571,7 @@
                 arg = value.get_key_box()
             args[i] = arg
         args[n] = ConstInt(op.getopnum())
-        args[n+1] = op.getdescr()
+        args[n + 1] = op.getdescr()
         return args
 
     @specialize.argtype(0)
@@ -616,7 +619,7 @@
 
     def remember_emitting_pure(self, op):
         pass
-    
+
     def constant_fold(self, op):
         argboxes = [self.get_constant_box(op.getarg(i))
                     for i in range(op.numargs())]
@@ -658,9 +661,9 @@
             arrayvalue = self.getvalue(op.getarg(0))
             arrayvalue.make_len_gt(MODE_UNICODE, op.getdescr(), indexvalue.box.getint())
         self.optimize_default(op)
-        
 
-    
+
+
 
 dispatch_opt = make_dispatcher_method(Optimizer, 'optimize_',
         default=Optimizer.optimize_default)
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -19,7 +19,7 @@
 
     def new(self):
         return OptRewrite()
-        
+
     def produce_potential_short_preamble_ops(self, sb):
         for op in self.loop_invariant_producer.values():
             sb.add_potential(op)
@@ -231,6 +231,17 @@
             else:
                 self.make_constant(op.result, result)
                 return
+
+        args = self.optimizer.make_args_key(op)
+        oldop = self.optimizer.pure_operations.get(args, None)
+        if oldop is not None and oldop.getdescr() is op.getdescr():
+            assert oldop.getopnum() == op.getopnum()
+            self.make_equal_to(op.result, self.getvalue(oldop.result))
+            return
+        else:
+            self.optimizer.pure_operations[args] = op
+            self.optimizer.remember_emitting_pure(op)
+
         # replace CALL_PURE with just CALL
         args = op.getarglist()
         self.emit_operation(ResOperation(rop.CALL, args, op.result,
@@ -351,7 +362,7 @@
         # expects a compile-time constant
         assert isinstance(arg, Const)
         key = make_hashable_int(arg.getint())
-        
+
         resvalue = self.loop_invariant_results.get(key, None)
         if resvalue is not None:
             self.make_equal_to(op.result, resvalue)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -4711,6 +4711,35 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_empty_copystrunicontent(self):
+        ops = """
+        [p0, p1, i0, i2, i3]
+        i4 = int_eq(i3, 0)
+        guard_true(i4) []
+        copystrcontent(p0, p1, i0, i2, i3)
+        jump(p0, p1, i0, i2, i3)
+        """
+        expected = """
+        [p0, p1, i0, i2, i3]
+        i4 = int_eq(i3, 0)
+        guard_true(i4) []
+        jump(p0, p1, i0, i2, 0)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_empty_copystrunicontent_virtual(self):
+        ops = """
+        [p0]
+        p1 = newstr(23)
+        copystrcontent(p0, p1, 0, 0, 0)
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
     def test_forced_virtuals_aliasing(self):
         ops = """
         [i0, i1]
@@ -4738,6 +4767,27 @@
         # other
         self.optimize_loop(ops, expected)
 
+    def test_plain_virtual_string_copy_content(self):
+        ops = """
+        []
+        p0 = newstr(6)
+        copystrcontent(s"hello!", p0, 0, 0, 6)
+        p1 = call(0, p0, s"abc123", descr=strconcatdescr)
+        i0 = strgetitem(p1, 0)
+        finish(i0)
+        """
+        expected = """
+        []
+        p0 = newstr(6)
+        copystrcontent(s"hello!", p0, 0, 0, 6)
+        p1 = newstr(12)
+        copystrcontent(p0, p1, 0, 0, 6)
+        copystrcontent(s"abc123", p1, 0, 6, 6)
+        i0 = strgetitem(p1, 0)
+        finish(i0)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -102,9 +102,9 @@
             print "Short Preamble:"
             short = loop.preamble.token.short_preamble[0]
             print short.inputargs
-            print '\n'.join([str(o) for o in short.operations])        
+            print '\n'.join([str(o) for o in short.operations])
             print
-        
+
         assert expected != "crash!", "should have raised an exception"
         self.assert_equal(loop, expected)
         if expected_preamble:
@@ -113,7 +113,7 @@
         if expected_short:
             self.assert_equal(short, expected_short,
                               text_right='expected short preamble')
-            
+
         return loop
 
 class OptimizeOptTest(BaseTestWithUnroll):
@@ -866,10 +866,10 @@
         setfield_gc(p3sub, i1, descr=valuedescr)
         setfield_gc(p1, p3sub, descr=nextdescr)
         # XXX: We get two extra operations here because the setfield
-        #      above is the result of forcing p1 and thus not 
+        #      above is the result of forcing p1 and thus not
         #      registered with the heap optimizer. I've makred tests
         #      below with VIRTUALHEAP if they suffer from this issue
-        p3sub2 = getfield_gc(p1, descr=nextdescr) 
+        p3sub2 = getfield_gc(p1, descr=nextdescr)
         guard_nonnull_class(p3sub2, ConstClass(node_vtable2)) []
         jump(i1, p1, p3sub2)
         """
@@ -1411,7 +1411,7 @@
         guard_isnull(p18) [p0, p8]
         p31 = new(descr=ssize)
         p35 = new_with_vtable(ConstClass(node_vtable))
-        setfield_gc(p35, p31, descr=valuedescr)        
+        setfield_gc(p35, p31, descr=valuedescr)
         jump(p0, p35)
         """
         expected = """
@@ -1426,7 +1426,7 @@
         guard_isnull(p18) [p0, p8]
         p31 = new(descr=ssize)
         p35 = new_with_vtable(ConstClass(node_vtable))
-        setfield_gc(p35, p31, descr=valuedescr)        
+        setfield_gc(p35, p31, descr=valuedescr)
         jump(p0, p35, p19, p18)
         """
         expected = """
@@ -1435,7 +1435,7 @@
         jump(p0, NULL)
         """
         self.optimize_loop(ops, expected)
-        
+
     def test_varray_1(self):
         ops = """
         [i1]
@@ -2181,7 +2181,7 @@
         jump(p1)
         """
         self.optimize_loop(ops, expected)
-        
+
     def test_duplicate_getarrayitem_2(self):
         ops = """
         [p1, i0]
@@ -2199,7 +2199,7 @@
         jump(p1, i7, i6)
         """
         self.optimize_loop(ops, expected)
-        
+
     def test_duplicate_getarrayitem_after_setarrayitem_1(self):
         ops = """
         [p1, p2]
@@ -2812,14 +2812,14 @@
         guard_no_overflow() []
         i3b = int_is_true(i3)
         guard_true(i3b) []
-        setfield_gc(p1, i1, descr=valuedescr)        
+        setfield_gc(p1, i1, descr=valuedescr)
         escape(i3)
         escape(i3)
         jump(i1, p1, i3)
         """
         expected = """
         [i1, p1, i3]
-        setfield_gc(p1, i1, descr=valuedescr)        
+        setfield_gc(p1, i1, descr=valuedescr)
         escape(i3)
         escape(i3)
         jump(i1, p1, i3)
@@ -2830,7 +2830,7 @@
         ops = """
         [p8, p11, i24]
         p26 = new_with_vtable(ConstClass(node_vtable))
-        setfield_gc(p26, i24, descr=adescr)        
+        setfield_gc(p26, i24, descr=adescr)
         i34 = getfield_gc_pure(p11, descr=valuedescr)
         i35 = getfield_gc_pure(p26, descr=adescr)
         i36 = int_add_ovf(i34, i35)
@@ -2839,10 +2839,10 @@
         """
         expected = """
         [p8, p11, i26]
-        jump(p8, p11, i26)        
-        """
-        self.optimize_loop(ops, expected)
-        
+        jump(p8, p11, i26)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_ovf_guard_in_short_preamble2(self):
         ops = """
         [p8, p11, p12]
@@ -3191,13 +3191,18 @@
         jump(p1, i4, i3)
         '''
         expected = '''
+        [p1, i4, i3, i5]
+        setfield_gc(p1, i5, descr=valuedescr)
+        jump(p1, i3, i5, i5)
+        '''
+        preamble = '''
         [p1, i1, i4]
         setfield_gc(p1, i1, descr=valuedescr)
         i3 = call(p1, descr=plaincalldescr)
         setfield_gc(p1, i3, descr=valuedescr)
-        jump(p1, i4, i3)
+        jump(p1, i4, i3, i3)
         '''
-        self.optimize_loop(ops, expected, expected)
+        self.optimize_loop(ops, expected, preamble)
 
     def test_call_pure_invalidates_heap_knowledge(self):
         # CALL_PURE should still force the setfield_gc() to occur before it
@@ -3209,21 +3214,20 @@
         jump(p1, i4, i3)
         '''
         expected = '''
+        [p1, i4, i3, i5]
+        setfield_gc(p1, i4, descr=valuedescr)
+        jump(p1, i3, i5, i5)
+        '''
+        preamble = '''
         [p1, i1, i4]
         setfield_gc(p1, i1, descr=valuedescr)
         i3 = call(p1, descr=plaincalldescr)
         setfield_gc(p1, i1, descr=valuedescr)
-        jump(p1, i4, i3)
+        jump(p1, i4, i3, i3)
         '''
-        self.optimize_loop(ops, expected, expected)
+        self.optimize_loop(ops, expected, preamble)
 
     def test_call_pure_constant_folding(self):
-        # CALL_PURE is not marked as is_always_pure(), because it is wrong
-        # to call the function arbitrary many times at arbitrary points in
-        # time.  Check that it is either constant-folded (and replaced by
-        # the result of the call, recorded as the first arg), or turned into
-        # a regular CALL.
-        # XXX can this test be improved with unrolling?
         arg_consts = [ConstInt(i) for i in (123456, 4, 5, 6)]
         call_pure_results = {tuple(arg_consts): ConstInt(42)}
         ops = '''
@@ -3239,14 +3243,13 @@
         escape(i1)
         escape(i2)
         i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
-        jump(i0, i4)
+        jump(i0, i4, i4)
         '''
         expected = '''
-        [i0, i2]
+        [i0, i4, i5]
         escape(42)
-        escape(i2)
-        i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
-        jump(i0, i4)
+        escape(i4)
+        jump(i0, i5, i5)
         '''
         self.optimize_loop(ops, expected, preamble, call_pure_results)
 
@@ -3270,18 +3273,43 @@
         escape(i2)
         i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
         guard_no_exception() []
-        jump(i0, i4)
+        jump(i0, i4, i4)
         '''
         expected = '''
-        [i0, i2]
+        [i0, i2, i3]
         escape(42)
         escape(i2)
-        i4 = call(123456, 4, i0, 6, descr=plaincalldescr)
-        guard_no_exception() []
-        jump(i0, i4)
+        jump(i0, i3, i3)
         '''
         self.optimize_loop(ops, expected, preamble, call_pure_results)
 
+    def test_call_pure_returning_virtual(self):
+        # XXX: This kind of loop invaraint call_pure will be forced
+        #      both in the preamble and in the peeled loop
+        ops = '''
+        [p1, i1, i2]
+        p2 = call_pure(0, p1, i1, i2, descr=strslicedescr)
+        escape(p2)
+        jump(p1, i1, i2)
+        '''
+        preamble = '''
+        [p1, i1, i2]
+        i6 = int_sub(i2, i1)
+        p2 = newstr(i6)
+        copystrcontent(p1, p2, i1, 0, i6)
+        escape(p2)
+        jump(p1, i1, i2, i6)
+        '''
+        expected = '''
+        [p1, i1, i2, i6]
+        p2 = newstr(i6)
+        copystrcontent(p1, p2, i1, 0, i6)
+        escape(p2)
+        jump(p1, i1, i2, i6)
+        '''
+        self.optimize_loop(ops, expected, preamble)
+        
+
     # ----------
 
     def test_vref_nonvirtual_nonescape(self):
@@ -5150,14 +5178,14 @@
         [i0, i1, i10, i11, i2, i3, i4]
         escape(i2)
         escape(i3)
-        escape(i4)        
+        escape(i4)
         i24 = int_mul_ovf(i10, i11)
         guard_no_overflow() []
         i23 = int_sub_ovf(i10, i11)
         guard_no_overflow() []
         i22 = int_add_ovf(i10, i11)
         guard_no_overflow() []
-        jump(i0, i1, i10, i11, i2, i3, i4) 
+        jump(i0, i1, i10, i11, i2, i3, i4)
         """
         self.optimize_loop(ops, expected)
 
@@ -5366,6 +5394,8 @@
         """
         self.optimize_strunicode_loop(ops, expected, expected)
 
+    # XXX Should some of the call's below now be call_pure?
+
     def test_str_concat_1(self):
         ops = """
         [p1, p2]
@@ -5699,14 +5729,14 @@
         ops = """
         [p0, i0]
         i1 = unicodegetitem(p0, i0)
-        i10 = unicodegetitem(p0, i0)        
+        i10 = unicodegetitem(p0, i0)
         i2 = int_lt(i1, 0)
         guard_false(i2) []
         jump(p0, i0)
         """
         expected = """
         [p0, i0]
-        i1 = unicodegetitem(p0, i0)        
+        i1 = unicodegetitem(p0, i0)
         jump(p0, i0)
         """
         self.optimize_loop(ops, expected)
@@ -5865,7 +5895,7 @@
         """
         preamble = """
         [p1, i1, i2, p3]
-        guard_nonnull(p3) []        
+        guard_nonnull(p3) []
         i4 = int_sub(i2, i1)
         i0 = call(0, p1, i1, i4, p3, descr=streq_slice_nonnull_descr)
         escape(i0)
@@ -6474,7 +6504,7 @@
         setfield_gc(p3, i1, descr=adescr)
         setfield_gc(p3, i2, descr=bdescr)
         i5 = int_gt(ii, 42)
-        guard_true(i5) []        
+        guard_true(i5) []
         jump(p0, p1, p3, ii2, ii, i1, i2)
         """
         self.optimize_loop(ops, expected)
@@ -6500,7 +6530,7 @@
         p1 = getfield_gc(p0, descr=nextdescr)
         guard_nonnull_class(p1, ConstClass(node_vtable)) []
         p2 = getfield_gc(p1, descr=nextdescr)
-        guard_nonnull_class(p2, ConstClass(node_vtable)) []        
+        guard_nonnull_class(p2, ConstClass(node_vtable)) []
         jump(p0)
         """
         expected = """
@@ -6514,11 +6544,11 @@
         guard_class(p1, ConstClass(node_vtable)) []
         p2 = getfield_gc(p1, descr=nextdescr)
         guard_nonnull(p2) []
-        guard_class(p2, ConstClass(node_vtable)) []        
+        guard_class(p2, ConstClass(node_vtable)) []
         jump(p0)
         """
         self.optimize_loop(ops, expected, expected_short=short)
-        
+
     def test_forced_virtual_pure_getfield(self):
         ops = """
         [p0]
@@ -6582,7 +6612,7 @@
         jump(p1, i2)
         """
         self.optimize_loop(ops, expected)
-        
+
     def test_loopinvariant_strlen(self):
         ops = """
         [p9]
@@ -6715,7 +6745,7 @@
         [p0, p1]
         p2 = new_with_vtable(ConstClass(node_vtable))
         p3 = new_with_vtable(ConstClass(node_vtable))
-        setfield_gc(p2, p3, descr=nextdescr) 
+        setfield_gc(p2, p3, descr=nextdescr)
         jump(p2, p3)
         """
         expected = """
@@ -6734,7 +6764,7 @@
         jump(p2, i2)
         """
         expected = """
-        [p1]        
+        [p1]
         p2 = getarrayitem_gc(p1, 7, descr=<GcPtrArrayDescr>)
         i1 = arraylen_gc(p1)
         jump(p2)
@@ -6775,8 +6805,8 @@
         jump(p0, p2, p1)
         """
         self.optimize_loop(ops, expected, expected_short=short)
-        
-        
+
+
     def test_loopinvariant_constant_strgetitem(self):
         ops = """
         [p0]
@@ -6830,11 +6860,11 @@
         expected = """
         [p0, i22, p1]
         call(i22, descr=nonwritedescr)
-        i3 = unicodelen(p1) # Should be killed by backend        
+        i3 = unicodelen(p1) # Should be killed by backend
         jump(p0, i22, p1)
         """
         self.optimize_loop(ops, expected, expected_short=short)
-        
+
     def test_propagate_virtual_arryalen(self):
         ops = """
         [p0]
@@ -6903,7 +6933,7 @@
         [p0, p1, p10, p11]
         i1 = arraylen_gc(p10, descr=arraydescr)
         getarrayitem_gc(p11, 1, descr=arraydescr)
-        call(i1, descr=nonwritedescr)        
+        call(i1, descr=nonwritedescr)
         jump(p1, p0, p11, p10)
         """
         self.optimize_loop(ops, expected)
@@ -6912,20 +6942,20 @@
         ops = """
         [p5]
         i10 = getfield_gc(p5, descr=valuedescr)
-        call(i10, descr=nonwritedescr) 
+        call(i10, descr=nonwritedescr)
         setfield_gc(p5, 1, descr=valuedescr)
         jump(p5)
         """
         preamble = """
         [p5]
         i10 = getfield_gc(p5, descr=valuedescr)
-        call(i10, descr=nonwritedescr) 
+        call(i10, descr=nonwritedescr)
         setfield_gc(p5, 1, descr=valuedescr)
         jump(p5)
         """
         expected = """
         [p5]
-        call(1, descr=nonwritedescr) 
+        call(1, descr=nonwritedescr)
         jump(p5)
         """
         self.optimize_loop(ops, expected, preamble)
@@ -6963,7 +6993,7 @@
         [p9]
         call_assembler(0, descr=asmdescr)
         i18 = getfield_gc(p9, descr=valuedescr)
-        guard_value(i18, 0) []        
+        guard_value(i18, 0) []
         jump(p9)
         """
         self.optimize_loop(ops, expected)
@@ -6992,17 +7022,17 @@
         i10 = getfield_gc(p5, descr=valuedescr)
         i11 = getfield_gc(p6, descr=nextdescr)
         call(i10, i11, descr=nonwritedescr)
-        setfield_gc(p6, i10, descr=nextdescr)        
+        setfield_gc(p6, i10, descr=nextdescr)
         jump(p5, p6)
         """
         expected = """
         [p5, p6, i10, i11]
         call(i10, i11, descr=nonwritedescr)
-        setfield_gc(p6, i10, descr=nextdescr)        
+        setfield_gc(p6, i10, descr=nextdescr)
         jump(p5, p6, i10, i10)
         """
         self.optimize_loop(ops, expected)
-        
+
     def test_cached_pure_func_of_equal_fields(self):
         ops = """
         [p5, p6]
@@ -7011,18 +7041,18 @@
         i12 = int_add(i10, 7)
         i13 = int_add(i11, 7)
         call(i12, i13, descr=nonwritedescr)
-        setfield_gc(p6, i10, descr=nextdescr)        
+        setfield_gc(p6, i10, descr=nextdescr)
         jump(p5, p6)
         """
         expected = """
         [p5, p6, i14, i12, i10]
         i13 = int_add(i14, 7)
         call(i12, i13, descr=nonwritedescr)
-        setfield_gc(p6, i10, descr=nextdescr)        
+        setfield_gc(p6, i10, descr=nextdescr)
         jump(p5, p6, i10, i12, i10)
         """
         self.optimize_loop(ops, expected)
-        
+
     def test_forced_counter(self):
         # XXX: VIRTUALHEAP (see above)
         py.test.skip("would be fixed by make heap optimizer aware of virtual setfields")
@@ -7165,7 +7195,7 @@
         expected = """
         [p1, p2, i2, i1]
         call(i2, descr=nonwritedescr)
-        setfield_gc(p2, i1, descr=nextdescr)        
+        setfield_gc(p2, i1, descr=nextdescr)
         jump(p1, p2, i2, i1)
         """
         self.optimize_loop(ops, expected)
@@ -7185,11 +7215,11 @@
         expected = """
         [p1, p2, i2, i1]
         call(i2, descr=nonwritedescr)
-        setfield_gc(p2, i1, descr=valuedescr)        
+        setfield_gc(p2, i1, descr=valuedescr)
         jump(p1, p2, i2, i1)
         """
         self.optimize_loop(ops, expected)
-        
+
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
     pass
-        
+
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -226,8 +226,9 @@
                 if op and op.result:
                     preamble_value = preamble_optimizer.getvalue(op.result)
                     value = self.optimizer.getvalue(op.result)
-                    imp = ValueImporter(self, preamble_value, op)
-                    self.optimizer.importable_values[value] = imp
+                    if not value.is_virtual():
+                        imp = ValueImporter(self, preamble_value, op)
+                        self.optimizer.importable_values[value] = imp
                     newresult = self.optimizer.getvalue(op.result).get_key_box()
                     if newresult is not op.result:
                         self.short_boxes.alias(newresult, op.result)
diff --git a/pypy/jit/metainterp/optimizeopt/vstring.py b/pypy/jit/metainterp/optimizeopt/vstring.py
--- a/pypy/jit/metainterp/optimizeopt/vstring.py
+++ b/pypy/jit/metainterp/optimizeopt/vstring.py
@@ -141,6 +141,11 @@
                                    for c in self._chars])
 
     def string_copy_parts(self, optimizer, targetbox, offsetbox, mode):
+        if not self.is_virtual() and targetbox is not self.box:
+            lengthbox = self.getstrlen(optimizer, mode)
+            srcbox = self.force_box()
+            return copy_str_content(optimizer, srcbox, targetbox,
+                                CONST_0, offsetbox, lengthbox, mode)
         for i in range(len(self._chars)):
             charbox = self._chars[i].force_box()
             if not (isinstance(charbox, Const) and charbox.same_constant(CONST_0)):
@@ -296,7 +301,7 @@
 
 
 def copy_str_content(optimizer, srcbox, targetbox,
-                     srcoffsetbox, offsetbox, lengthbox, mode):
+                     srcoffsetbox, offsetbox, lengthbox, mode, need_next_offset=True):
     if isinstance(srcbox, ConstPtr) and isinstance(srcoffsetbox, Const):
         M = 5
     else:
@@ -313,7 +318,10 @@
                                               None))
             offsetbox = _int_add(optimizer, offsetbox, CONST_1)
     else:
-        nextoffsetbox = _int_add(optimizer, offsetbox, lengthbox)
+        if need_next_offset:
+            nextoffsetbox = _int_add(optimizer, offsetbox, lengthbox)
+        else:
+            nextoffsetbox = None
         op = ResOperation(mode.COPYSTRCONTENT, [srcbox, targetbox,
                                                 srcoffsetbox, offsetbox,
                                                 lengthbox], None)
@@ -365,7 +373,7 @@
 
     def new(self):
         return OptString()
-    
+
     def make_vstring_plain(self, box, source_op, mode):
         vvalue = VStringPlainValue(self.optimizer, box, source_op, mode)
         self.make_equal_to(box, vvalue)
@@ -435,7 +443,11 @@
         #
         if isinstance(value, VStringPlainValue):  # even if no longer virtual
             if vindex.is_constant():
-                return value.getitem(vindex.box.getint())
+                res = value.getitem(vindex.box.getint())
+                # If it is uninitialized we can't return it, it was set by a
+                # COPYSTRCONTENT, not a STRSETITEM
+                if res is not optimizer.CVAL_UNINITIALIZED_ZERO:
+                    return res
         #
         resbox = _strgetitem(self.optimizer, value.force_box(), vindex.force_box(), mode)
         return self.getvalue(resbox)
@@ -450,6 +462,30 @@
         lengthbox = value.getstrlen(self.optimizer, mode)
         self.make_equal_to(op.result, self.getvalue(lengthbox))
 
+    def optimize_COPYSTRCONTENT(self, op):
+        self._optimize_COPYSTRCONTENT(op, mode_string)
+    def optimize_COPYUNICODECONTENT(self, op):
+        self._optimize_COPYSTRCONTENT(op, mode_unicode)
+
+    def _optimize_COPYSTRCONTENT(self, op, mode):
+        # args: src dst srcstart dststart length
+        src = self.getvalue(op.getarg(0))
+        dst = self.getvalue(op.getarg(1))
+        srcstart = self.getvalue(op.getarg(2))
+        dststart = self.getvalue(op.getarg(3))
+        length = self.getvalue(op.getarg(4))
+
+        if length.is_constant() and length.box.getint() == 0:
+            return
+        copy_str_content(self.optimizer,
+            src.force_box(),
+            dst.force_box(),
+            srcstart.force_box(),
+            dststart.force_box(),
+            length.force_box(),
+            mode, need_next_offset=False
+        )
+
     def optimize_CALL(self, op):
         # dispatch based on 'oopspecindex' to a method that handles
         # specifically the given oopspec call.  For non-oopspec calls,
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -17,6 +17,7 @@
 from pypy.jit.metainterp.jitprof import ABORT_TOO_LONG, ABORT_BRIDGE, \
                                         ABORT_FORCE_QUASIIMMUT, ABORT_BAD_LOOP
 from pypy.jit.metainterp.jitexc import JitException, get_llexception
+from pypy.jit.metainterp.heapcache import HeapCache
 from pypy.rlib.objectmodel import specialize
 from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
 from pypy.jit.codewriter import heaptracker
@@ -209,7 +210,8 @@
                 self.metainterp.clear_exception()
                 resbox = self.execute(rop.%s, b1, b2)
                 self.make_result_of_lastop(resbox)  # same as execute_varargs()
-                self.metainterp.handle_possible_overflow_error()
+                if not isinstance(resbox, Const):
+                    self.metainterp.handle_possible_overflow_error()
                 return resbox
         ''' % (_opimpl, _opimpl.upper())).compile()
 
@@ -321,7 +323,7 @@
     def _establish_nullity(self, box, orgpc):
         value = box.nonnull()
         if value:
-            if box not in self.metainterp.known_class_boxes:
+            if not self.metainterp.heapcache.is_class_known(box):
                 self.generate_guard(rop.GUARD_NONNULL, box, resumepc=orgpc)
         else:
             if not isinstance(box, Const):
@@ -366,14 +368,17 @@
 
     @arguments("descr")
     def opimpl_new(self, sizedescr):
-        return self.execute_with_descr(rop.NEW, sizedescr)
+        resbox = self.execute_with_descr(rop.NEW, sizedescr)
+        self.metainterp.heapcache.new(resbox)
+        return resbox
 
     @arguments("descr")
     def opimpl_new_with_vtable(self, sizedescr):
         cpu = self.metainterp.cpu
         cls = heaptracker.descr2vtable(cpu, sizedescr)
         resbox = self.execute(rop.NEW_WITH_VTABLE, ConstInt(cls))
-        self.metainterp.known_class_boxes[resbox] = None
+        self.metainterp.heapcache.new(resbox)
+        self.metainterp.heapcache.class_now_known(resbox)
         return resbox
 
 ##    @FixME  #arguments("box")
@@ -392,26 +397,30 @@
 ##        self.execute(rop.SUBCLASSOF, box1, box2)
 
     @arguments("descr", "box")
-    def opimpl_new_array(self, itemsizedescr, countbox):
-        return self.execute_with_descr(rop.NEW_ARRAY, itemsizedescr, countbox)
+    def opimpl_new_array(self, itemsizedescr, lengthbox):
+        resbox = self.execute_with_descr(rop.NEW_ARRAY, itemsizedescr, lengthbox)
+        self.metainterp.heapcache.new_array(resbox, lengthbox)
+        return resbox
+
+    @specialize.arg(1)
+    def _do_getarrayitem_gc_any(self, op, arraybox, arraydescr, indexbox):
+        tobox = self.metainterp.heapcache.getarrayitem(
+                arraybox, arraydescr, indexbox)
+        if tobox:
+            # sanity check: see whether the current array value
+            # corresponds to what the cache thinks the value is
+            resbox = executor.execute(self.metainterp.cpu, self.metainterp, op,
+                                      arraydescr, arraybox, indexbox)
+            assert resbox.constbox().same_constant(tobox.constbox())
+            return tobox
+        resbox = self.execute_with_descr(op, arraydescr, arraybox, indexbox)
+        self.metainterp.heapcache.getarrayitem_now_known(
+                arraybox, arraydescr, indexbox, resbox)
+        return resbox
 
     @arguments("box", "descr", "box")
     def _opimpl_getarrayitem_gc_any(self, arraybox, arraydescr, indexbox):
-        cache = self.metainterp.heap_array_cache.get(arraydescr, None)
-        if cache and isinstance(indexbox, ConstInt):
-            index = indexbox.getint()
-            frombox, tobox = cache.get(index, (None, None))
-            if frombox is arraybox:
-                return tobox
-        resbox = self.execute_with_descr(rop.GETARRAYITEM_GC,
-                                         arraydescr, arraybox, indexbox)
-        if isinstance(indexbox, ConstInt):
-            if not cache:
-                cache = self.metainterp.heap_array_cache[arraydescr] = {}
-            index = indexbox.getint()
-            cache[index] = arraybox, resbox
-        return resbox
-
+        return self._do_getarrayitem_gc_any(rop.GETARRAYITEM_GC, arraybox, arraydescr, indexbox)
 
     opimpl_getarrayitem_gc_i = _opimpl_getarrayitem_gc_any
     opimpl_getarrayitem_gc_r = _opimpl_getarrayitem_gc_any
@@ -427,8 +436,7 @@
 
     @arguments("box", "descr", "box")
     def _opimpl_getarrayitem_gc_pure_any(self, arraybox, arraydescr, indexbox):
-        return self.execute_with_descr(rop.GETARRAYITEM_GC_PURE,
-                                       arraydescr, arraybox, indexbox)
+        return self._do_getarrayitem_gc_any(rop.GETARRAYITEM_GC_PURE, arraybox, arraydescr, indexbox)
 
     opimpl_getarrayitem_gc_pure_i = _opimpl_getarrayitem_gc_pure_any
     opimpl_getarrayitem_gc_pure_r = _opimpl_getarrayitem_gc_pure_any
@@ -439,13 +447,8 @@
                                     indexbox, itembox):
         self.execute_with_descr(rop.SETARRAYITEM_GC, arraydescr, arraybox,
                                 indexbox, itembox)
-        if isinstance(indexbox, ConstInt):
-            cache = self.metainterp.heap_array_cache.setdefault(arraydescr, {})
-            cache[indexbox.getint()] = arraybox, itembox
-        else:
-            cache = self.metainterp.heap_array_cache.get(arraydescr, None)
-            if cache:
-                cache.clear()
+        self.metainterp.heapcache.setarrayitem(
+                arraybox, arraydescr, indexbox, itembox)
 
     opimpl_setarrayitem_gc_i = _opimpl_setarrayitem_gc_any
     opimpl_setarrayitem_gc_r = _opimpl_setarrayitem_gc_any
@@ -462,7 +465,12 @@
 
     @arguments("box", "descr")
     def opimpl_arraylen_gc(self, arraybox, arraydescr):
-        return self.execute_with_descr(rop.ARRAYLEN_GC, arraydescr, arraybox)
+        lengthbox = self.metainterp.heapcache.arraylen(arraybox)
+        if lengthbox is None:
+            lengthbox = self.execute_with_descr(
+                    rop.ARRAYLEN_GC, arraydescr, arraybox)
+            self.metainterp.heapcache.arraylen_now_known(arraybox, lengthbox)
+        return lengthbox
 
     @arguments("orgpc", "box", "descr", "box")
     def opimpl_check_neg_index(self, orgpc, arraybox, arraydescr, indexbox):
@@ -471,19 +479,17 @@
         negbox = self.implement_guard_value(orgpc, negbox)
         if negbox.getint():
             # the index is < 0; add the array length to it
-            lenbox = self.metainterp.execute_and_record(
-                rop.ARRAYLEN_GC, arraydescr, arraybox)
+            lengthbox = self.opimpl_arraylen_gc(arraybox, arraydescr)
             indexbox = self.metainterp.execute_and_record(
-                rop.INT_ADD, None, indexbox, lenbox)
+                rop.INT_ADD, None, indexbox, lengthbox)
         return indexbox
 
     @arguments("descr", "descr", "descr", "descr", "box")
     def opimpl_newlist(self, structdescr, lengthdescr, itemsdescr, arraydescr,
                        sizebox):
-        sbox = self.metainterp.execute_and_record(rop.NEW, structdescr)
+        sbox = self.opimpl_new(structdescr)
         self._opimpl_setfield_gc_any(sbox, lengthdescr, sizebox)
-        abox = self.metainterp.execute_and_record(rop.NEW_ARRAY, arraydescr,
-                                                  sizebox)
+        abox = self.opimpl_new_array(arraydescr, sizebox)
         self._opimpl_setfield_gc_any(sbox, itemsdescr, abox)
         return sbox
 
@@ -540,11 +546,15 @@
 
     @specialize.arg(1)
     def _opimpl_getfield_gc_any_pureornot(self, opnum, box, fielddescr):
-        frombox, tobox = self.metainterp.heap_cache.get(fielddescr, (None, None))
-        if frombox is box:
+        tobox = self.metainterp.heapcache.getfield(box, fielddescr)
+        if tobox is not None:
+            # sanity check: see whether the current struct value
+            # corresponds to what the cache thinks the value is
+            resbox = executor.execute(self.metainterp.cpu, self.metainterp,
+                                      rop.GETFIELD_GC, fielddescr, box)
             return tobox
         resbox = self.execute_with_descr(opnum, fielddescr, box)
-        self.metainterp.heap_cache[fielddescr] = (box, resbox)
+        self.metainterp.heapcache.getfield_now_known(box, fielddescr, resbox)
         return resbox
 
     @arguments("orgpc", "box", "descr")
@@ -565,11 +575,11 @@
 
     @arguments("box", "descr", "box")
     def _opimpl_setfield_gc_any(self, box, fielddescr, valuebox):
-        frombox, tobox = self.metainterp.heap_cache.get(fielddescr, (None, None))
-        if frombox is box and tobox is valuebox:
+        tobox = self.metainterp.heapcache.getfield(box, fielddescr)
+        if tobox is valuebox:
             return
         self.execute_with_descr(rop.SETFIELD_GC, fielddescr, box, valuebox)
-        self.metainterp.heap_cache[fielddescr] = (box, valuebox)
+        self.metainterp.heapcache.setfield(box, fielddescr, valuebox)
     opimpl_setfield_gc_i = _opimpl_setfield_gc_any
     opimpl_setfield_gc_r = _opimpl_setfield_gc_any
     opimpl_setfield_gc_f = _opimpl_setfield_gc_any
@@ -633,7 +643,7 @@
         standard_box = self.metainterp.virtualizable_boxes[-1]
         if standard_box is box:
             return False
-        if box in self.metainterp.nonstandard_virtualizables:
+        if self.metainterp.heapcache.is_nonstandard_virtualizable(box):
             return True
         eqbox = self.metainterp.execute_and_record(rop.PTR_EQ, None,
                                                    box, standard_box)
@@ -642,7 +652,7 @@
         if isstandard:
             self.metainterp.replace_box(box, standard_box)
         else:
-            self.metainterp.nonstandard_virtualizables[box] = None
+            self.metainterp.heapcache.nonstandard_virtualizables_now_known(box)
         return not isstandard
 
     def _get_virtualizable_field_index(self, fielddescr):
@@ -727,7 +737,7 @@
     def opimpl_arraylen_vable(self, pc, box, fdescr, adescr):
         if self._nonstandard_virtualizable(pc, box):
             arraybox = self._opimpl_getfield_gc_any(box, fdescr)
-            return self.execute_with_descr(rop.ARRAYLEN_GC, adescr, arraybox)
+            return self.opimpl_arraylen_gc(arraybox, adescr)
         vinfo = self.metainterp.jitdriver_sd.virtualizable_info
         virtualizable_box = self.metainterp.virtualizable_boxes[-1]
         virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
@@ -858,6 +868,14 @@
     def opimpl_newunicode(self, lengthbox):
         return self.execute(rop.NEWUNICODE, lengthbox)
 
+    @arguments("box", "box", "box", "box", "box")
+    def opimpl_copystrcontent(self, srcbox, dstbox, srcstartbox, dststartbox, lengthbox):
+        return self.execute(rop.COPYSTRCONTENT, srcbox, dstbox, srcstartbox, dststartbox, lengthbox)
+
+    @arguments("box", "box", "box", "box", "box")
+    def opimpl_copyunicodecontent(self, srcbox, dstbox, srcstartbox, dststartbox, lengthbox):
+        return self.execute(rop.COPYUNICODECONTENT, srcbox, dstbox, srcstartbox, dststartbox, lengthbox)
+
 ##    @FixME  #arguments("descr", "varargs")
 ##    def opimpl_residual_oosend_canraise(self, methdescr, varargs):
 ##        return self.execute_varargs(rop.OOSEND, varargs, descr=methdescr,
@@ -884,9 +902,9 @@
     @arguments("orgpc", "box")
     def opimpl_guard_class(self, orgpc, box):
         clsbox = self.cls_of_box(box)
-        if box not in self.metainterp.known_class_boxes:
+        if not self.metainterp.heapcache.is_class_known(box):
             self.generate_guard(rop.GUARD_CLASS, box, [clsbox], resumepc=orgpc)
-            self.metainterp.known_class_boxes[box] = None
+            self.metainterp.heapcache.class_now_known(box)
         return clsbox
 
     @arguments("int", "orgpc")
@@ -1052,6 +1070,18 @@
         return ConstInt(trace_length)
 
     @arguments("box")
+    def _opimpl_isconstant(self, box):
+        return ConstInt(isinstance(box, Const))
+
+    opimpl_int_isconstant = opimpl_ref_isconstant = _opimpl_isconstant
+
+    @arguments("box")
+    def _opimpl_isvirtual(self, box):
+        return ConstInt(self.metainterp.heapcache.is_unescaped(box))
+
+    opimpl_ref_isvirtual = _opimpl_isvirtual
+
+    @arguments("box")
     def opimpl_virtual_ref(self, box):
         # Details on the content of metainterp.virtualref_boxes:
         #
@@ -1492,16 +1522,7 @@
         self.last_exc_value_box = None
         self.retracing_loop_from = None
         self.call_pure_results = args_dict_box()
-        # contains boxes where the class is already known
-        self.known_class_boxes = {}
-        # contains frame boxes that are not virtualizables
-        self.nonstandard_virtualizables = {}
-        # heap cache
-        # maps descrs to (from_box, to_box) tuples
-        self.heap_cache = {}
-        # heap array cache
-        # maps descrs to {index: (from_box, to_box)} dicts
-        self.heap_array_cache = {}
+        self.heapcache = HeapCache()
 
     def perform_call(self, jitcode, boxes, greenkey=None):
         # causes the metainterp to enter the given subfunction
@@ -1674,32 +1695,18 @@
 
     def _record_helper_nonpure_varargs(self, opnum, resbox, descr, argboxes):
         assert resbox is None or isinstance(resbox, Box)
+        if (rop._OVF_FIRST <= opnum <= rop._OVF_LAST and
+            self.last_exc_value_box is None and
+            self._all_constants_varargs(argboxes)):
+            return resbox.constbox()
         # record the operation
         profiler = self.staticdata.profiler
         profiler.count_ops(opnum, RECORDED_OPS)
-        self._invalidate_caches(opnum, descr)
+        self.heapcache.invalidate_caches(opnum, descr, argboxes)
         op = self.history.record(opnum, argboxes, resbox, descr)
         self.attach_debug_info(op)
         return resbox
 
-    def _invalidate_caches(self, opnum, descr):
-        if opnum == rop.SETFIELD_GC:
-            return
-        if opnum == rop.SETARRAYITEM_GC:
-            return
-        if rop._NOSIDEEFFECT_FIRST <= opnum <= rop._NOSIDEEFFECT_LAST:
-            return
-        if opnum == rop.CALL:
-            effectinfo = descr.get_extra_info()
-            ef = effectinfo.extraeffect
-            if ef == effectinfo.EF_LOOPINVARIANT or \
-               ef == effectinfo.EF_ELIDABLE_CANNOT_RAISE or \
-               ef == effectinfo.EF_ELIDABLE_CAN_RAISE:
-                return
-        if self.heap_cache:
-            self.heap_cache.clear()
-        if self.heap_array_cache:
-            self.heap_array_cache.clear()
 
     def attach_debug_info(self, op):
         if (not we_are_translated() and op is not None
@@ -1862,10 +1869,7 @@
                 duplicates[box] = None
 
     def reached_loop_header(self, greenboxes, redboxes, resumedescr):
-        self.known_class_boxes = {}
-        self.nonstandard_virtualizables = {} # XXX maybe not needed?
-        self.heap_cache = {}
-        self.heap_array_cache = {}
+        self.heapcache.reset()
 
         duplicates = {}
         self.remove_consts_and_duplicates(redboxes, len(redboxes),
@@ -2373,17 +2377,7 @@
             for i in range(len(boxes)):
                 if boxes[i] is oldbox:
                     boxes[i] = newbox
-        for descr, (frombox, tobox) in self.heap_cache.iteritems():
-            change = False
-            if frombox is oldbox:
-                change = True
-                frombox = newbox
-            if tobox is oldbox:
-                change = True
-                tobox = newbox
-            if change:
-                self.heap_cache[descr] = frombox, tobox
-        # XXX what about self.heap_array_cache?
+        self.heapcache.replace_box(oldbox, newbox)
 
     def find_biggest_function(self):
         start_stack = []
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -1,23 +1,25 @@
+import sys
+
 import py
-import sys
-from pypy.rlib.jit import JitDriver, we_are_jitted, hint, dont_look_inside
-from pypy.rlib.jit import loop_invariant, elidable, promote
-from pypy.rlib.jit import jit_debug, assert_green, AssertGreenFailed
-from pypy.rlib.jit import unroll_safe, current_trace_length
+
+from pypy import conftest
+from pypy.jit.codewriter.policy import JitPolicy, StopAtXPolicy
 from pypy.jit.metainterp import pyjitpl, history
+from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT
+from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin, noConst
+from pypy.jit.metainterp.typesystem import LLTypeHelper, OOTypeHelper
+from pypy.jit.metainterp.warmspot import get_stats
 from pypy.jit.metainterp.warmstate import set_future_value
-from pypy.jit.metainterp.warmspot import get_stats
-from pypy.jit.codewriter.policy import JitPolicy, StopAtXPolicy
-from pypy import conftest
+from pypy.rlib.jit import (JitDriver, we_are_jitted, hint, dont_look_inside,
+    loop_invariant, elidable, promote, jit_debug, assert_green,
+    AssertGreenFailed, unroll_safe, current_trace_length, look_inside_iff,
+    isconstant, isvirtual)
 from pypy.rlib.rarithmetic import ovfcheck
-from pypy.jit.metainterp.typesystem import LLTypeHelper, OOTypeHelper
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.ootypesystem import ootype
-from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT
-from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin, noConst
+
 
 class BasicTests:
-
     def test_basic(self):
         def f(x, y):
             return x + y
@@ -99,14 +101,14 @@
                 myjitdriver.jit_merge_point(x=x, y=y, res=res)
                 res += x * x
                 x += 1
-                res += x * x                
+                res += x * x
                 y -= 1
             return res
         res = self.meta_interp(f, [6, 7])
         assert res == 1323
         self.check_loop_count(1)
         self.check_loops(int_mul=1)
-        
+
     def test_loop_variant_mul_ovf(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
         def f(x, y):
@@ -1372,7 +1374,7 @@
             return x
         res = self.meta_interp(f, [299], listops=True)
         assert res == f(299)
-        self.check_loops(guard_class=0, guard_value=3)        
+        self.check_loops(guard_class=0, guard_value=3)
         self.check_loops(guard_class=0, guard_value=6, everywhere=True)
 
     def test_merge_guardnonnull_guardclass(self):
@@ -2118,7 +2120,7 @@
             return sa
         res = self.meta_interp(f, [32, 7])
         assert res == f(32, 7)
-        
+
     def test_caching_setarrayitem_fixed(self):
         myjitdriver = JitDriver(greens = [], reds = ['sa', 'i', 'n', 'a', 'node'])
         def f(n, a):
@@ -2138,7 +2140,7 @@
             return sa
         res = self.meta_interp(f, [32, 7])
         assert res == f(32, 7)
-        
+
     def test_caching_setarrayitem_var(self):
         myjitdriver = JitDriver(greens = [], reds = ['sa', 'i', 'n', 'a', 'b', 'node'])
         def f(n, a, b):
@@ -2668,7 +2670,7 @@
             myjitdriver.set_param('threshold', 3)
             myjitdriver.set_param('trace_eagerness', 1)
             myjitdriver.set_param('retrace_limit', 5)
-            myjitdriver.set_param('function_threshold', -1)            
+            myjitdriver.set_param('function_threshold', -1)
             pc = sa = i = 0
             while pc < len(bytecode):
                 myjitdriver.jit_merge_point(pc=pc, n=n, sa=sa, i=i)
@@ -2693,12 +2695,12 @@
         def g(n1, n2):
             for i in range(10):
                 f(n1)
-            for i in range(10):                
+            for i in range(10):
                 f(n2)
 
         nn = [10, 3]
         assert self.meta_interp(g, nn) == g(*nn)
-        
+
         # The attempts of retracing first loop will end up retracing the
         # second and thus fail 5 times, saturating the retrace_count. Instead a
         # bridge back to the preamble of the first loop is produced. A guard in
@@ -2709,7 +2711,7 @@
         self.check_tree_loop_count(2 + 3)
 
         # FIXME: Add a gloabl retrace counter and test that we are not trying more than 5 times.
-        
+
         def g(n):
             for i in range(n):
                 for j in range(10):
@@ -2945,15 +2947,15 @@
             a = [0, 1, 2, 3, 4]
             while i < n:
                 myjitdriver.jit_merge_point(sa=sa, n=n, a=a, i=i)
-                if i < n/2:
+                if i < n / 2:
                     sa += a[4]
-                elif i == n/2:
+                elif i == n / 2:
                     a.pop()
                 i += 1
         res = self.meta_interp(f, [32])
         assert res == f(32)
         self.check_loops(arraylen_gc=2)
-        
+
 class TestOOtype(BasicTests, OOJitMixin):
 
     def test_oohash(self):
@@ -3173,7 +3175,7 @@
         res = self.meta_interp(f, [32])
         assert res == f(32)
         self.check_tree_loop_count(3)
-        
+
     def test_two_loopinvariant_arrays3(self):
         from pypy.rpython.lltypesystem import lltype, llmemory, rffi
         myjitdriver = JitDriver(greens = [], reds = ['sa', 'n', 'i', 'a'])
@@ -3197,7 +3199,7 @@
         res = self.meta_interp(f, [32])
         assert res == f(32)
         self.check_tree_loop_count(2)
-        
+
     def test_two_loopinvariant_arrays_boxed(self):
         class A(object):
             def __init__(self, a):
@@ -3222,7 +3224,7 @@
         res = self.meta_interp(f, [32])
         assert res == f(32)
         self.check_loops(arraylen_gc=2, everywhere=True)
-        
+
     def test_release_gil_flush_heap_cache(self):
         if sys.platform == "win32":
             py.test.skip("needs 'time'")
@@ -3298,5 +3300,114 @@
 
         self.meta_interp(main, [10])
 
+    def test_look_inside_iff_const(self):
+        @look_inside_iff(lambda arg: isconstant(arg))
+        def f(arg):
+            s = 0
+            while arg > 0:
+                s += arg
+                arg -= 1
+            return s
+
+        driver = JitDriver(greens = ['code'], reds = ['n', 'arg', 's'])
+
+        def main(code, n, arg):
+            s = 0
+            while n > 0:
+                driver.jit_merge_point(code=code, n=n, arg=arg, s=s)
+                if code == 0:
+                    s += f(arg)
+                else:
+                    s += f(1)
+                n -= 1
+            return s
+
+        res = self.meta_interp(main, [0, 10, 2], enable_opts='')
+        assert res == main(0, 10, 2)
+        self.check_loops(call=1)
+        res = self.meta_interp(main, [1, 10, 2], enable_opts='')
+        assert res == main(1, 10, 2)
+        self.check_loops(call=0)
+
+    def test_look_inside_iff_virtual(self):
+        # There's no good reason for this to be look_inside_iff, but it's a test!
+        @look_inside_iff(lambda arg, n: isvirtual(arg))
+        def f(arg, n):
+            if n == 100:
+                for i in xrange(n):
+                    n += i
+            return arg.x
+        class A(object):
+            def __init__(self, x):
+                self.x = x
+        driver = JitDriver(greens=['n'], reds=['i', 'a'])
+        def main(n):
+            i = 0
+            a = A(3)
+            while i < 20:
+                driver.jit_merge_point(i=i, n=n, a=a)
+                if n == 0:
+                    i += f(a, n)
+                else:
+                    i += f(A(2), n)
+        res = self.meta_interp(main, [0], enable_opts='')
+        assert res == main(0)
+        self.check_loops(call=1, getfield_gc=0)
+        res = self.meta_interp(main, [1], enable_opts='')
+        assert res == main(1)
+        self.check_loops(call=0, getfield_gc=0)
+
+    def test_reuse_elidable_result(self):
+        driver = JitDriver(reds=['n', 's'], greens = [])
+        def main(n):
+            s = 0
+            while n > 0:
+                driver.jit_merge_point(s=s, n=n)
+                s += len(str(n)) + len(str(n))
+                n -= 1
+            return s
+        res = self.meta_interp(main, [10])
+        assert res == main(10)
+        self.check_loops({
+            'call': 1, 'guard_no_exception': 1, 'guard_true': 1, 'int_add': 2,
+            'int_gt': 1, 'int_sub': 1, 'strlen': 1, 'jump': 1,
+        })
+
+    def test_look_inside_iff_const_getarrayitem_gc_pure(self):
+        driver = JitDriver(greens=['unroll'], reds=['s', 'n'])
+
+        class A(object):
+            _immutable_fields_ = ["x[*]"]
+            def __init__(self, x):
+                self.x = [x]
+
+        @look_inside_iff(lambda x: isconstant(x))
+        def f(x):
+            i = 0
+            for c in x:
+                i += 1
+            return i
+
+        def main(unroll, n):
+            s = 0
+            while n > 0:
+                driver.jit_merge_point(s=s, n=n, unroll=unroll)
+                if unroll:
+                    x = A("xx")
+                else:
+                    x = A("x" * n)
+                s += f(x.x[0])
+                n -= 1
+            return s
+
+        res = self.meta_interp(main, [0, 10])
+        assert res == main(0, 10)
+        # 2 calls, one for f() and one for char_mul
+        self.check_loops(call=2)
+        res = self.meta_interp(main, [1, 10])
+        assert res == main(1, 10)
+        self.check_loops(call=0)
+
+
 class TestLLtype(BaseLLtypeTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_dict.py b/pypy/jit/metainterp/test/test_dict.py
--- a/pypy/jit/metainterp/test/test_dict.py
+++ b/pypy/jit/metainterp/test/test_dict.py
@@ -153,11 +153,7 @@
 
         res = self.meta_interp(f, [100], listops=True)
         assert res == f(50)
-        # XXX: ideally there would be 7 calls here, but repeated CALL_PURE with
-        # the same arguments are not folded, because we have conflicting
-        # definitions of pure, once strhash can be appropriately folded
-        # this should be decreased to seven.
-        self.check_loops({"call": 8, "guard_false": 1, "guard_no_exception": 6,
+        self.check_loops({"call": 7, "guard_false": 1, "guard_no_exception": 6,
                           "guard_true": 1, "int_and": 1, "int_gt": 1,
                           "int_is_true": 1, "int_sub": 1, "jump": 1,
                           "new_with_vtable": 1, "setfield_gc": 1})
diff --git a/pypy/jit/metainterp/test/test_heapcache.py b/pypy/jit/metainterp/test/test_heapcache.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/test/test_heapcache.py
@@ -0,0 +1,365 @@
+from pypy.jit.metainterp.heapcache import HeapCache
+from pypy.jit.metainterp.resoperation import rop
+from pypy.jit.metainterp.history import ConstInt
+
+box1 = object()
+box2 = object()
+box3 = object()
+box4 = object()
+lengthbox1 = object()
+lengthbox2 = object()
+descr1 = object()
+descr2 = object()
+descr3 = object()
+
+index1 = ConstInt(0)
+index2 = ConstInt(1)
+
+
+class FakeEffektinfo(object):
+    EF_ELIDABLE_CANNOT_RAISE           = 0 #elidable function (and cannot raise)
+    EF_LOOPINVARIANT                   = 1 #special: call it only once per loop
+    EF_CANNOT_RAISE                    = 2 #a function which cannot raise
+    EF_ELIDABLE_CAN_RAISE              = 3 #elidable function (but can raise)
+    EF_CAN_RAISE                       = 4 #normal function (can raise)
+    EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE = 5 #can raise and force virtualizables
+    EF_RANDOM_EFFECTS                  = 6 #can do whatever
+
+    OS_ARRAYCOPY = 0
+
+    def __init__(self, extraeffect, oopspecindex):
+        self.extraeffect = extraeffect
+        self.oopspecindex = oopspecindex
+
+class FakeCallDescr(object):
+    def __init__(self, extraeffect, oopspecindex=None):
+        self.extraeffect = extraeffect
+        self.oopspecindex = oopspecindex
+
+    def get_extra_info(self):
+        return FakeEffektinfo(self.extraeffect, self.oopspecindex)
+
+class TestHeapCache(object):
+    def test_known_class_box(self):
+        h = HeapCache()
+        assert not h.is_class_known(1)
+        assert not h.is_class_known(2)
+        h.class_now_known(1)
+        assert h.is_class_known(1)
+        assert not h.is_class_known(2)
+
+        h.reset()
+        assert not h.is_class_known(1)
+        assert not h.is_class_known(2)
+
+    def test_nonstandard_virtualizable(self):
+        h = HeapCache()
+        assert not h.is_nonstandard_virtualizable(1)
+        assert not h.is_nonstandard_virtualizable(2)
+        h.nonstandard_virtualizables_now_known(1)
+        assert h.is_nonstandard_virtualizable(1)
+        assert not h.is_nonstandard_virtualizable(2)
+
+        h.reset()
+        assert not h.is_nonstandard_virtualizable(1)
+        assert not h.is_nonstandard_virtualizable(2)
+
+
+    def test_heapcache_fields(self):
+        h = HeapCache()
+        assert h.getfield(box1, descr1) is None
+        assert h.getfield(box1, descr2) is None
+        h.setfield(box1, descr1, box2)
+        assert h.getfield(box1, descr1) is box2
+        assert h.getfield(box1, descr2) is None
+        h.setfield(box1, descr2, box3)
+        assert h.getfield(box1, descr1) is box2
+        assert h.getfield(box1, descr2) is box3
+        h.setfield(box1, descr1, box3)
+        assert h.getfield(box1, descr1) is box3
+        assert h.getfield(box1, descr2) is box3
+        h.setfield(box3, descr1, box1)
+        assert h.getfield(box3, descr1) is box1
+        assert h.getfield(box1, descr1) is None
+        assert h.getfield(box1, descr2) is box3
+
+        h.reset()
+        assert h.getfield(box1, descr1) is None
+        assert h.getfield(box1, descr2) is None
+        assert h.getfield(box3, descr1) is None
+
+    def test_heapcache_read_fields_multiple(self):
+        h = HeapCache()
+        h.getfield_now_known(box1, descr1, box2)
+        h.getfield_now_known(box3, descr1, box4)
+        assert h.getfield(box1, descr1) is box2
+        assert h.getfield(box1, descr2) is None
+        assert h.getfield(box3, descr1) is box4
+        assert h.getfield(box3, descr2) is None
+
+        h.reset()
+        assert h.getfield(box1, descr1) is None
+        assert h.getfield(box1, descr2) is None
+        assert h.getfield(box3, descr1) is None
+        assert h.getfield(box3, descr2) is None
+
+    def test_heapcache_write_fields_multiple(self):
+        h = HeapCache()
+        h.setfield(box1, descr1, box2)
+        assert h.getfield(box1, descr1) is box2
+        h.setfield(box3, descr1, box4)
+        assert h.getfield(box3, descr1) is box4
+        assert h.getfield(box1, descr1) is None # box1 and box3 can alias
+
+        h = HeapCache()
+        h.new(box1)
+        h.setfield(box1, descr1, box2)
+        assert h.getfield(box1, descr1) is box2
+        h.setfield(box3, descr1, box4)
+        assert h.getfield(box3, descr1) is box4
+        assert h.getfield(box1, descr1) is None # box1 and box3 can alias
+
+        h = HeapCache()
+        h.new(box1)
+        h.new(box3)
+        h.setfield(box1, descr1, box2)
+        assert h.getfield(box1, descr1) is box2
+        h.setfield(box3, descr1, box4)
+        assert h.getfield(box3, descr1) is box4
+        assert h.getfield(box1, descr1) is box2 # box1 and box3 cannot alias
+        h.setfield(box1, descr1, box3)
+        assert h.getfield(box1, descr1) is box3
+
+
+    def test_heapcache_arrays(self):
+        h = HeapCache()
+        assert h.getarrayitem(box1, descr1, index1) is None
+        assert h.getarrayitem(box1, descr2, index1) is None
+        assert h.getarrayitem(box1, descr1, index2) is None
+        assert h.getarrayitem(box1, descr2, index2) is None
+
+        h.setarrayitem(box1, descr1, index1, box2)
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        assert h.getarrayitem(box1, descr2, index1) is None
+        assert h.getarrayitem(box1, descr1, index2) is None
+        assert h.getarrayitem(box1, descr2, index2) is None
+        h.setarrayitem(box1, descr1, index2, box4)
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        assert h.getarrayitem(box1, descr2, index1) is None
+        assert h.getarrayitem(box1, descr1, index2) is box4
+        assert h.getarrayitem(box1, descr2, index2) is None
+
+        h.setarrayitem(box1, descr2, index1, box3)
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        assert h.getarrayitem(box1, descr2, index1) is box3
+        assert h.getarrayitem(box1, descr1, index2) is box4
+        assert h.getarrayitem(box1, descr2, index2) is None
+
+        h.setarrayitem(box1, descr1, index1, box3)
+        assert h.getarrayitem(box1, descr1, index1) is box3
+        assert h.getarrayitem(box1, descr2, index1) is box3
+        assert h.getarrayitem(box1, descr1, index2) is box4
+        assert h.getarrayitem(box1, descr2, index2) is None
+
+        h.setarrayitem(box3, descr1, index1, box1)
+        assert h.getarrayitem(box3, descr1, index1) is box1
+        assert h.getarrayitem(box1, descr1, index1) is None
+        assert h.getarrayitem(box1, descr2, index1) is box3
+        assert h.getarrayitem(box1, descr1, index2) is box4
+        assert h.getarrayitem(box1, descr2, index2) is None
+
+        h.reset()
+        assert h.getarrayitem(box1, descr1, index1) is None
+        assert h.getarrayitem(box1, descr2, index1) is None
+        assert h.getarrayitem(box3, descr1, index1) is None
+
+    def test_heapcache_array_nonconst_index(self):
+        h = HeapCache()
+        h.setarrayitem(box1, descr1, index1, box2)
+        h.setarrayitem(box1, descr1, index2, box4)
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        assert h.getarrayitem(box1, descr1, index2) is box4
+        h.setarrayitem(box1, descr1, box2, box3)
+        assert h.getarrayitem(box1, descr1, index1) is None
+        assert h.getarrayitem(box1, descr1, index2) is None
+
+    def test_heapcache_read_fields_multiple_array(self):
+        h = HeapCache()
+        h.getarrayitem_now_known(box1, descr1, index1, box2)
+        h.getarrayitem_now_known(box3, descr1, index1, box4)
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        assert h.getarrayitem(box1, descr2, index1) is None
+        assert h.getarrayitem(box3, descr1, index1) is box4
+        assert h.getarrayitem(box3, descr2, index1) is None
+
+        h.reset()
+        assert h.getarrayitem(box1, descr1, index1) is None
+        assert h.getarrayitem(box1, descr2, index1) is None
+        assert h.getarrayitem(box3, descr1, index1) is None
+        assert h.getarrayitem(box3, descr2, index1) is None
+
+    def test_heapcache_write_fields_multiple_array(self):
+        h = HeapCache()
+        h.setarrayitem(box1, descr1, index1, box2)
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        h.setarrayitem(box3, descr1, index1, box4)
+        assert h.getarrayitem(box3, descr1, index1) is box4
+        assert h.getarrayitem(box1, descr1, index1) is None # box1 and box3 can alias
+
+        h = HeapCache()
+        h.new(box1)
+        h.setarrayitem(box1, descr1, index1, box2)
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        h.setarrayitem(box3, descr1, index1, box4)
+        assert h.getarrayitem(box3, descr1, index1) is box4
+        assert h.getarrayitem(box1, descr1, index1) is None # box1 and box3 can alias
+
+        h = HeapCache()
+        h.new(box1)
+        h.new(box3)
+        h.setarrayitem(box1, descr1, index1, box2)
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        h.setarrayitem(box3, descr1, index1, box4)
+        assert h.getarrayitem(box3, descr1, index1) is box4
+        assert h.getarrayitem(box1, descr1, index1) is box2 # box1 and box3 cannot alias
+        h.setarrayitem(box1, descr1, index1, box3)
+        assert h.getarrayitem(box3, descr1, index1) is box4
+        assert h.getarrayitem(box1, descr1, index1) is box3 # box1 and box3 cannot alias
+
+    def test_length_cache(self):
+        h = HeapCache()
+        h.new_array(box1, lengthbox1)
+        assert h.arraylen(box1) is lengthbox1
+
+        assert h.arraylen(box2) is None
+        h.arraylen_now_known(box2, lengthbox2)
+        assert h.arraylen(box2) is lengthbox2
+
+
+    def test_invalidate_cache(self):
+        h = HeapCache()
+        h.setfield(box1, descr1, box2)
+        h.setarrayitem(box1, descr1, index1, box2)
+        h.setarrayitem(box1, descr1, index2, box4)
+        h.invalidate_caches(rop.INT_ADD, None, [])
+        h.invalidate_caches(rop.INT_ADD_OVF, None, [])
+        h.invalidate_caches(rop.SETFIELD_RAW, None, [])
+        h.invalidate_caches(rop.SETARRAYITEM_RAW, None, [])
+        assert h.getfield(box1, descr1) is box2
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        assert h.getarrayitem(box1, descr1, index2) is box4
+
+        h.invalidate_caches(
+            rop.CALL, FakeCallDescr(FakeEffektinfo.EF_ELIDABLE_CANNOT_RAISE), [])
+        assert h.getfield(box1, descr1) is box2
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        assert h.getarrayitem(box1, descr1, index2) is box4
+
+        h.invalidate_caches(
+            rop.CALL_LOOPINVARIANT, FakeCallDescr(FakeEffektinfo.EF_LOOPINVARIANT), [])
+
+        h.invalidate_caches(
+            rop.CALL, FakeCallDescr(FakeEffektinfo.EF_RANDOM_EFFECTS), [])
+        assert h.getfield(box1, descr1) is None
+        assert h.getarrayitem(box1, descr1, index1) is None
+        assert h.getarrayitem(box1, descr1, index2) is None
+
+
+    def test_replace_box(self):
+        h = HeapCache()
+        h.setfield(box1, descr1, box2)
+        h.setfield(box1, descr2, box3)
+        h.setfield(box2, descr3, box3)
+        h.replace_box(box1, box4)
+        assert h.getfield(box1, descr1) is None
+        assert h.getfield(box1, descr2) is None
+        assert h.getfield(box4, descr1) is box2
+        assert h.getfield(box4, descr2) is box3
+        assert h.getfield(box2, descr3) is box3
+
+    def test_replace_box_array(self):
+        h = HeapCache()
+        h.setarrayitem(box1, descr1, index1, box2)
+        h.setarrayitem(box1, descr2, index1, box3)
+        h.arraylen_now_known(box1, lengthbox1)
+        h.setarrayitem(box2, descr1, index2, box1)
+        h.setarrayitem(box3, descr2, index2, box1)
+        h.setarrayitem(box2, descr3, index2, box3)
+        h.replace_box(box1, box4)
+        assert h.getarrayitem(box1, descr1, index1) is None
+        assert h.getarrayitem(box1, descr2, index1) is None
+        assert h.arraylen(box1) is None
+        assert h.arraylen(box4) is lengthbox1
+        assert h.getarrayitem(box4, descr1, index1) is box2
+        assert h.getarrayitem(box4, descr2, index1) is box3
+        assert h.getarrayitem(box2, descr1, index2) is box4
+        assert h.getarrayitem(box3, descr2, index2) is box4
+        assert h.getarrayitem(box2, descr3, index2) is box3
+
+        h.replace_box(lengthbox1, lengthbox2)
+        assert h.arraylen(box4) is lengthbox2
+
+    def test_ll_arraycopy(self):
+        h = HeapCache()
+        h.new_array(box1, lengthbox1)
+        h.setarrayitem(box1, descr1, index1, box2)
+        h.new_array(box2, lengthbox1)
+        # Just need the destination box for this call
+        h.invalidate_caches(
+            rop.CALL,
+            FakeCallDescr(FakeEffektinfo.EF_CANNOT_RAISE, FakeEffektinfo.OS_ARRAYCOPY),
+            [None, None, box2, None, None]
+        )
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        h.invalidate_caches(
+            rop.CALL,
+            FakeCallDescr(FakeEffektinfo.EF_CANNOT_RAISE, FakeEffektinfo.OS_ARRAYCOPY),
+            [None, None, box3, None, None]
+        )
+        assert h.getarrayitem(box1, descr1, index1) is None
+
+        h.setarrayitem(box4, descr1, index1, box2)
+        assert h.getarrayitem(box4, descr1, index1) is box2
+        h.invalidate_caches(
+            rop.CALL,
+            FakeCallDescr(FakeEffektinfo.EF_CANNOT_RAISE, FakeEffektinfo.OS_ARRAYCOPY),
+            [None, None, box2, None, None]
+        )
+        assert h.getarrayitem(box4, descr1, index1) is None
+
+    def test_unescaped(self):
+        h = HeapCache()
+        assert not h.is_unescaped(box1)
+        h.new(box2)
+        assert h.is_unescaped(box2)
+        h.invalidate_caches(rop.SETFIELD_GC, None, [box2, box1])
+        assert h.is_unescaped(box2)
+        h.invalidate_caches(rop.SETFIELD_GC, None, [box1, box2])
+        assert not h.is_unescaped(box2)
+
+    def test_unescaped_testing(self):
+        h = HeapCache()
+        h.new(box1)
+        h.new(box2)
+        assert h.is_unescaped(box1)
+        assert h.is_unescaped(box2)
+        # Putting a virtual inside of another virtual doesn't escape it.
+        h.invalidate_caches(rop.SETFIELD_GC, None, [box1, box2])
+        assert h.is_unescaped(box2)
+        # Reading a field from a virtual doesn't escape it.
+        h.invalidate_caches(rop.GETFIELD_GC, None, [box1])
+        assert h.is_unescaped(box1)
+        # Escaping a virtual transitively escapes anything inside of it.
+        assert not h.is_unescaped(box3)
+        h.invalidate_caches(rop.SETFIELD_GC, None, [box3, box1])
+        assert not h.is_unescaped(box1)
+        assert not h.is_unescaped(box2)
+
+    def test_unescaped_array(self):
+        h = HeapCache()
+        h.new_array(box1, lengthbox1)
+        assert h.is_unescaped(box1)
+        h.invalidate_caches(rop.SETARRAYITEM_GC, None, [box1, index1, box2])
+        assert h.is_unescaped(box1)
+        h.invalidate_caches(rop.SETARRAYITEM_GC, None, [box2, index1, box1])
+        assert not h.is_unescaped(box1)
\ No newline at end of file
diff --git a/pypy/jit/metainterp/test/test_list.py b/pypy/jit/metainterp/test/test_list.py
--- a/pypy/jit/metainterp/test/test_list.py
+++ b/pypy/jit/metainterp/test/test_list.py
@@ -34,7 +34,7 @@
                 l = [x + 1]
                 n -= 1
             return l[0]
-        
+
         res = self.meta_interp(f, [10], listops=True)
         assert res == f(10)
         self.check_all_virtualized()
@@ -60,7 +60,7 @@
 
     def test_ll_fixed_setitem_fast(self):
         jitdriver = JitDriver(greens = [], reds = ['n', 'l'])
-        
+
         def f(n):
             l = [1, 2, 3]
 
@@ -116,7 +116,7 @@
         assert res == f(10)
         py.test.skip("'[non-null] * n' gives a residual call so far")
         self.check_loops(setarrayitem_gc=0, getarrayitem_gc=0, call=0)
-    
+
     def test_arraycopy_simpleoptimize(self):
         def f():
             l = [1, 2, 3, 4]
@@ -208,6 +208,26 @@
         assert res == f(15)
         self.check_loops(guard_exception=0)
 
+    def test_virtual_resize(self):
+        jitdriver = JitDriver(greens = [], reds = ['n', 's'])
+        def f(n):
+            s = 0
+            while n > 0:
+                jitdriver.jit_merge_point(n=n, s=s)
+                lst = []
+                lst += [1]
+                n -= len(lst)
+                s += lst[0]
+                lst.pop()
+                lst.append(1)
+                s /= lst.pop()
+            return s
+        res = self.meta_interp(f, [15], listops=True)
+        assert res == f(15)
+        self.check_loops({"int_add": 1, "int_sub": 1, "int_gt": 1,
+                          "guard_true": 1, "jump": 1})
+
+
 class TestOOtype(ListTests, OOJitMixin):
     pass
 
@@ -236,8 +256,6 @@
             return a * b
         res = self.meta_interp(f, [37])
         assert res == f(37)
-        # There is the one actual field on a, plus 2 getfield's from the list
-        # itself, 1 to get the length (which is then incremented and passed to
-        # the resize func), and then a read of the items field to actually
-        # perform the setarrayitem on
-        self.check_loops(getfield_gc=5, everywhere=True)
+        # There is the one actual field on a, plus several fields on the list
+        # itself
+        self.check_loops(getfield_gc=10, everywhere=True)
diff --git a/pypy/jit/metainterp/test/test_slist.py b/pypy/jit/metainterp/test/test_slist.py
--- a/pypy/jit/metainterp/test/test_slist.py
+++ b/pypy/jit/metainterp/test/test_slist.py
@@ -5,7 +5,6 @@
 class ListTests(object):
 
     def test_basic_list(self):
-        py.test.skip("not yet")
         myjitdriver = JitDriver(greens = [], reds = ['n', 'lst'])
         def f(n):
             lst = []
@@ -34,7 +33,7 @@
             return m
         res = self.interp_operations(f, [11], listops=True)
         assert res == 49
-        self.check_operations_history(call=5)
+        self.check_operations_history(call=3)
 
     def test_list_of_voids(self):
         myjitdriver = JitDriver(greens = [], reds = ['n', 'lst'])
@@ -93,7 +92,7 @@
             return x
         res = self.meta_interp(f, [-2], listops=True)
         assert res == 41
-        self.check_loops(call=1, guard_value=0)
+        self.check_loops(call=0, guard_value=0)
 
 # we don't support resizable lists on ootype
 #class TestOOtype(ListTests, OOJitMixin):
diff --git a/pypy/jit/metainterp/test/test_string.py b/pypy/jit/metainterp/test/test_string.py
--- a/pypy/jit/metainterp/test/test_string.py
+++ b/pypy/jit/metainterp/test/test_string.py
@@ -1,9 +1,11 @@
 import py
+
+from pypy.jit.codewriter.policy import StopAtXPolicy
+from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
+from pypy.rlib.debug import debug_print
 from pypy.rlib.jit import JitDriver, dont_look_inside, we_are_jitted
-from pypy.rlib.debug import debug_print
-from pypy.jit.codewriter.policy import StopAtXPolicy
+from pypy.rlib.rstring import StringBuilder
 from pypy.rpython.ootypesystem import ootype
-from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
 
 
 class StringTests:
@@ -27,7 +29,7 @@
             return i
         res = self.meta_interp(f, [10, True, _str('h')], listops=True)
         assert res == 5
-        self.check_loops(**{self.CALL: 1, self.CALL_PURE: 0})
+        self.check_loops(**{self.CALL: 1, self.CALL_PURE: 0, 'everywhere': True})
 
     def test_eq_folded(self):
         _str = self._str
@@ -327,7 +329,7 @@
     def test_str_slice_len_surviving(self):
         _str = self._str
         longstring = _str("Unrolling Trouble")
-        mydriver = JitDriver(reds = ['i', 'a', 'sa'], greens = []) 
+        mydriver = JitDriver(reds = ['i', 'a', 'sa'], greens = [])
         def f(a):
             i = sa = a
             while i < len(longstring):
@@ -343,7 +345,7 @@
         fillers = _str("abcdefghijklmnopqrstuvwxyz")
         data = _str("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
 
-        mydriver = JitDriver(reds = ['line', 'noise', 'res'], greens = []) 
+        mydriver = JitDriver(reds = ['line', 'noise', 'res'], greens = [])
         def f():
             line = data
             noise = fillers
@@ -370,7 +372,7 @@
             def __init__(self, value):
                 self.value = value
         mydriver = JitDriver(reds = ['ratio', 'line', 'noise', 'res'],
-                             greens = []) 
+                             greens = [])
         def f():
             line = Str(data)
             noise = Str(fillers)
@@ -408,7 +410,7 @@
             return len(sa)
         assert self.meta_interp(f, [16]) == f(16)
 
-    def test_loop_invariant_string_slize(self):
+    def test_loop_invariant_string_slice(self):
         _str = self._str
         mydriver = JitDriver(reds = ['i', 'n', 'sa', 's', 's1'], greens = [])
         def f(n, c):
@@ -425,7 +427,7 @@
             return sa
         assert self.meta_interp(f, [16, 'a']) == f(16, 'a')
 
-    def test_loop_invariant_string_slize_boxed(self):
+    def test_loop_invariant_string_slice_boxed(self):
         class Str(object):
             def __init__(self, value):
                 self.value = value
@@ -445,7 +447,7 @@
             return sa
         assert self.meta_interp(f, [16, 'a']) == f(16, 'a')
 
-    def test_loop_invariant_string_slize_in_array(self):
+    def test_loop_invariant_string_slice_in_array(self):
         _str = self._str
         mydriver = JitDriver(reds = ['i', 'n', 'sa', 's', 's1'], greens = [])
         def f(n, c):
@@ -513,7 +515,7 @@
                 m -= 1
             return 42
         self.meta_interp(f, [6, 7])
-        self.check_loops(call=3,    # str(), _str(), escape()
+        self.check_loops(call=1,    # escape()
                          newunicode=1, unicodegetitem=0,
                          unicodesetitem=1, copyunicodecontent=1)
 
@@ -536,3 +538,55 @@
         self.check_loops(call_pure=0, call=1,
                          newunicode=0, unicodegetitem=0,
                          unicodesetitem=0, copyunicodecontent=0)
+
+    def test_join_chars(self):
+        jitdriver = JitDriver(reds=['a', 'b', 'c', 'i'], greens=[])
+        def f(a, b, c):
+            i = 0
+            while i < 10:
+                jitdriver.jit_merge_point(a=a, b=b, c=c, i=i)
+                x = []
+                if a:
+                    x.append("a")
+                if b:
+                    x.append("b")
+                if c:
+                    x.append("c")
+                i += len("".join(x))
+            return i
+        res = self.meta_interp(f, [1, 1, 1])
+        assert res == f(True, True, True)
+        # The "".join should be unrolled, since the length of x is known since
+        # it is virtual, ensure there are no calls to ll_join_chars, or
+        # allocations.
+        self.check_loops({
+            "guard_true": 5, "int_is_true": 3, "int_lt": 2, "int_add": 2, "jump": 2,
+        }, everywhere=True)
+
+    def test_virtual_copystringcontent(self):
+        jitdriver = JitDriver(reds=['n', 'result'], greens=[])
+        def main(n):
+            result = 0
+            while n >= 0:
+                jitdriver.jit_merge_point(n=n, result=result)
+                b = StringBuilder(6)
+                b.append("Hello!")
+                result += ord(b.build()[0])
+                n -= 1
+            return result
+        res = self.meta_interp(main, [9])
+        assert res == main(9)
+
+    def test_virtual_copystringcontent2(self):
+        jitdriver = JitDriver(reds=['n', 'result'], greens=[])
+        def main(n):
+            result = 0
+            while n >= 0:
+                jitdriver.jit_merge_point(n=n, result=result)
+                b = StringBuilder(6)
+                b.append("Hello!")
+                result += ord((b.build() + "xyz")[0])
+                n -= 1
+            return result
+        res = self.meta_interp(main, [9])
+        assert res == main(9)
diff --git a/pypy/jit/metainterp/test/test_tracingopts.py b/pypy/jit/metainterp/test/test_tracingopts.py
--- a/pypy/jit/metainterp/test/test_tracingopts.py
+++ b/pypy/jit/metainterp/test/test_tracingopts.py
@@ -1,7 +1,10 @@
+import sys
+
+from pypy.jit.metainterp.test.support import LLJitMixin
+from pypy.rlib import jit
+from pypy.rlib.rarithmetic import ovfcheck
+
 import py
-import sys
-from pypy.rlib import jit
-from pypy.jit.metainterp.test.support import LLJitMixin
 
 
 class TestLLtype(LLJitMixin):
@@ -257,6 +260,28 @@
         self.check_operations_history(setarrayitem_gc=2, setfield_gc=2,
                                       getarrayitem_gc=0, getfield_gc=2)
 
+    def test_promote_changes_array_cache(self):
+        a1 = [0, 0]
+        a2 = [0, 0]
+        def fn(n):
+            if n > 0:
+                a = a1
+            else:
+                a = a2
+            a[0] = n
+            jit.hint(n, promote=True)
+            x1 = a[0]
+            jit.hint(x1, promote=True)
+            a[n - n] = n + 1
+            return a[0] + x1
+        res = self.interp_operations(fn, [7])
+        assert res == 7 + 7 + 1
+        self.check_operations_history(getarrayitem_gc=0, guard_value=1)
+        res = self.interp_operations(fn, [-7])
+        assert res == -7 - 7 + 1
+        self.check_operations_history(getarrayitem_gc=0, guard_value=1)
+
+
     def test_list_caching(self):
         a1 = [0, 0]
         a2 = [0, 0]
@@ -357,7 +382,7 @@
         assert res == f(10, 1, 1)
         self.check_history(getarrayitem_gc=0, getfield_gc=0)
 
-    def test_heap_caching_pure(self):
+    def test_heap_caching_array_pure(self):
         class A(object):
             pass
         p1 = A()
@@ -405,3 +430,164 @@
         assert res == -7 + 7
         self.check_operations_history(getfield_gc=0)
         return
+
+    def test_heap_caching_multiple_objects(self):
+        class Gbl(object):
+            pass
+        g = Gbl()
+        class A(object):
+            pass
+        a1 = A()
+        g.a1 = a1
+        a1.x = 7
+        a2 = A()
+        g.a2 = a2
+        a2.x = 7
+        def gn(a1, a2):
+            return a1.x + a2.x
+        def fn(n):
+            if n < 0:
+                a1 = A()
+                g.a1 = a1
+                a1.x = n
+                a2 = A()
+                g.a2 = a2
+                a2.x = n - 1
+            else:
+                a1 = g.a1
+                a2 = g.a2
+            return a1.x + a2.x + gn(a1, a2)
+        res = self.interp_operations(fn, [-7])
+        assert res == 2 * -7 + 2 * -8
+        self.check_operations_history(setfield_gc=4, getfield_gc=0)
+        res = self.interp_operations(fn, [7])
+        assert res == 4 * 7
+        self.check_operations_history(getfield_gc=4)
+
+    def test_heap_caching_multiple_tuples(self):
+        class Gbl(object):
+            pass
+        g = Gbl()
+        def gn(a1, a2):
+            return a1[0] + a2[0]
+        def fn(n):
+            a1 = (n, )
+            g.a = a1
+            a2 = (n - 1, )
+            g.a = a2
+            jit.promote(n)
+            return a1[0] + a2[0] + gn(a1, a2)
+        res = self.interp_operations(fn, [7])
+        assert res == 2 * 7 + 2 * 6
+        self.check_operations_history(getfield_gc_pure=0)
+        res = self.interp_operations(fn, [-7])
+        assert res == 2 * -7 + 2 * -8
+        self.check_operations_history(getfield_gc_pure=0)
+
+    def test_heap_caching_multiple_arrays(self):
+        class Gbl(object):
+            pass
+        g = Gbl()
+        def fn(n):
+            a1 = [n, n, n]
+            g.a = a1
+            a1[0] = n
+            a2 = [n, n, n]
+            g.a = a2
+            a2[0] = n - 1
+            return a1[0] + a2[0] + a1[0] + a2[0]
+        res = self.interp_operations(fn, [7])
+        assert res == 2 * 7 + 2 * 6
+        self.check_operations_history(getarrayitem_gc=0)
+        res = self.interp_operations(fn, [-7])
+        assert res == 2 * -7 + 2 * -8
+        self.check_operations_history(getarrayitem_gc=0)
+
+    def test_heap_caching_multiple_arrays_getarrayitem(self):
+        class Gbl(object):
+            pass
+        g = Gbl()
+        g.a1 = [7, 8, 9]
+        g.a2 = [8, 9, 10, 11]
+
+        def fn(i):
+            if i < 0:
+                g.a1 = [7, 8, 9]
+                g.a2 = [7, 8, 9, 10]
+            jit.promote(i)
+            a1 = g.a1
+            a1[i + 1] = 15 # make lists mutable
+            a2 = g.a2
+            a2[i + 1] = 19
+            return a1[i] + a2[i] + a1[i] + a2[i]
+        res = self.interp_operations(fn, [0])
+        assert res == 2 * 7 + 2 * 8
+        self.check_operations_history(getarrayitem_gc=2)
+
+
+    def test_heap_caching_multiple_lists(self):
+        class Gbl(object):
+            pass
+        g = Gbl()
+        g.l = []
+        def fn(n):
+            if n < -100:
+                g.l.append(1)
+            a1 = [n, n, n]
+            g.l = a1
+            a1[0] = n
+            a2 = [n, n, n]
+            g.l = a2
+            a2[0] = n - 1
+            return a1[0] + a2[0] + a1[0] + a2[0]
+        res = self.interp_operations(fn, [7])
+        assert res == 2 * 7 + 2 * 6
+        self.check_operations_history(getarrayitem_gc=0, getfield_gc=0)
+        res = self.interp_operations(fn, [-7])
+        assert res == 2 * -7 + 2 * -8
+        self.check_operations_history(getarrayitem_gc=0, getfield_gc=0)
+
+    def test_length_caching(self):
+        class Gbl(object):
+            pass
+        g = Gbl()
+        g.a = [0] * 7
+        def fn(n):
+            a = g.a
+            res = len(a) + len(a)
+            a1 = [0] * n
+            g.a = a1
+            return len(a1) + res
+        res = self.interp_operations(fn, [7])
+        assert res == 7 * 3
+        self.check_operations_history(arraylen_gc=1)
+
+    def test_arraycopy(self):
+        class Gbl(object):
+            pass
+        g = Gbl()
+        g.a = [0] * 7
+        def fn(n):
+            assert n >= 0
+            a = g.a
+            x = [0] * n
+            x[2] = 21
+            return len(a[:n]) + x[2]
+        res = self.interp_operations(fn, [3])
+        assert res == 24
+        self.check_operations_history(getarrayitem_gc=0)
+
+    def test_fold_int_add_ovf(self):
+        def fn(n):
+            jit.promote(n)
+            try:
+                n = ovfcheck(n + 1)
+            except OverflowError:
+                return 12
+            else:
+                return n
+        res = self.interp_operations(fn, [3])
+        assert res == 4
+        self.check_operations_history(int_add_ovf=0)
+        res = self.interp_operations(fn, [sys.maxint])
+        assert res == 12
\ No newline at end of file
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -367,9 +367,9 @@
             # ---------- execute assembler ----------
             while True:     # until interrupted by an exception
                 metainterp_sd.profiler.start_running()
-                debug_start("jit-running")
+                #debug_start("jit-running")
                 fail_descr = warmrunnerdesc.execute_token(loop_token)
-                debug_stop("jit-running")
+                #debug_stop("jit-running")
                 metainterp_sd.profiler.end_running()
                 loop_token = None     # for test_memmgr
                 if vinfo is not None:
diff --git a/pypy/jit/tl/pypyjit.py b/pypy/jit/tl/pypyjit.py
--- a/pypy/jit/tl/pypyjit.py
+++ b/pypy/jit/tl/pypyjit.py
@@ -40,7 +40,7 @@
 config.objspace.usemodules.array = False
 config.objspace.usemodules._weakref = True
 config.objspace.usemodules._sre = False
-config.objspace.usemodules._lsprof = True
+config.objspace.usemodules._lsprof = False
 #
 config.objspace.usemodules._ffi = True
 config.objspace.usemodules.micronumpy = False
@@ -77,7 +77,7 @@
 
 def read_code():
     from pypy.module.marshal.interp_marshal import dumps
-    
+
     filename = 'pypyjit_demo.py'
     source = readfile(filename)
     ec = space.getexecutioncontext()
diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py
--- a/pypy/module/__builtin__/functional.py
+++ b/pypy/module/__builtin__/functional.py
@@ -3,13 +3,13 @@
 
 """
 
+from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError
-from pypy.interpreter.gateway import NoneNotWrapped
-from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.gateway import NoneNotWrapped, interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef
-from pypy.interpreter.baseobjspace import Wrappable
+from pypy.rlib import jit
+from pypy.rlib.objectmodel import specialize
 from pypy.rlib.rarithmetic import r_uint, intmask
-from pypy.rlib.objectmodel import specialize
 from pypy.rlib.rbigint import rbigint
 
 
@@ -134,29 +134,15 @@
 
 
 @specialize.arg(2)
+ at jit.look_inside_iff(lambda space, args, implementation_of:
+    jit.isconstant(len(args.arguments_w)) and
+    len(args.arguments_w) == 2
+)
 def min_max(space, args, implementation_of):
     if implementation_of == "max":
         compare = space.gt
     else:
         compare = space.lt
-
-    args_w = args.arguments_w
-    if len(args_w) == 2 and not args.keywords:
-        # simple case, suitable for the JIT
-        w_arg0, w_arg1 = args_w
-        if space.is_true(compare(w_arg0, w_arg1)):
-            return w_arg0
-        else:
-            return w_arg1
-    else:
-        return min_max_loop(space, args, implementation_of)
-
- at specialize.arg(2)
-def min_max_loop(space, args, implementation_of):
-    if implementation_of == "max":
-        compare = space.gt
-    else:
-        compare = space.lt
     args_w = args.arguments_w
     if len(args_w) > 1:
         w_sequence = space.newtuple(args_w)
diff --git a/pypy/module/_continuation/__init__.py b/pypy/module/_continuation/__init__.py
--- a/pypy/module/_continuation/__init__.py
+++ b/pypy/module/_continuation/__init__.py
@@ -37,4 +37,5 @@
     interpleveldefs = {
         'continulet': 'interp_continuation.W_Continulet',
         'permute': 'interp_continuation.permute',
+        '_p': 'interp_continuation.unpickle',      # pickle support
     }
diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py
--- a/pypy/module/_continuation/interp_continuation.py
+++ b/pypy/module/_continuation/interp_continuation.py
@@ -6,6 +6,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.gateway import interp2app
 from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.pyframe import PyFrame
 
 
 class W_Continulet(Wrappable):
@@ -21,69 +22,69 @@
     def check_sthread(self):
         ec = self.space.getexecutioncontext()
         if ec.stacklet_thread is not self.sthread:
-            start_state.clear()
+            global_state.clear()
             raise geterror(self.space, "inter-thread support is missing")
-        return ec
 
     def descr_init(self, w_callable, __args__):
         if self.sthread is not None:
             raise geterror(self.space, "continulet already __init__ialized")
-        start_state.origin = self
-        start_state.w_callable = w_callable
-        start_state.args = __args__
-        self.bottomframe = make_fresh_frame(self.space)
-        self.sthread = build_sthread(self.space)
-        try:
-            self.h = self.sthread.new(new_stacklet_callback)
-            if self.sthread.is_empty_handle(self.h):    # early return
-                raise MemoryError
-        except MemoryError:
-            self.sthread = None
-            start_state.clear()
-            raise getmemoryerror(self.space)
+        #
+        # hackish: build the frame "by hand", passing it the correct arguments
+        space = self.space
+        w_args, w_kwds = __args__.topacked()
+        bottomframe = space.createframe(get_entrypoint_pycode(space),
+                                        get_w_module_dict(space), None)
+        bottomframe.locals_stack_w[0] = space.wrap(self)
+        bottomframe.locals_stack_w[1] = w_callable
+        bottomframe.locals_stack_w[2] = w_args
+        bottomframe.locals_stack_w[3] = w_kwds
+        self.bottomframe = bottomframe
+        #
+        global_state.origin = self
+        sthread = build_sthread(self.space)
+        self.sthread = sthread
+        h = sthread.new(new_stacklet_callback)
+        post_switch(sthread, h)
 
     def switch(self, w_to):
         sthread = self.sthread
-        if sthread is None:
-            start_state.clear()
-            raise geterror(self.space, "continulet not initialized yet")
-        if sthread.is_empty_handle(self.h):
-            start_state.clear()
+        if sthread is not None and sthread.is_empty_handle(self.h):
+            global_state.clear()
             raise geterror(self.space, "continulet already finished")
         to = self.space.interp_w(W_Continulet, w_to, can_be_None=True)
+        if to is not None and to.sthread is None:
+            to = None
+        if sthread is None:      # if self is non-initialized:
+            if to is not None:   #     if we are given a 'to'
+                self = to        #         then just use it and ignore 'self'
+                sthread = self.sthread
+                to = None
+            else:
+                return get_result()  # else: no-op
         if to is not None:
             if to.sthread is not sthread:
-                start_state.clear()
-                if to.sthread is None:
-                    msg = "continulet not initialized yet"
-                else:
-                    msg = "cross-thread double switch"
-                raise geterror(self.space, msg)
+                global_state.clear()
+                raise geterror(self.space, "cross-thread double switch")
             if self is to:    # double-switch to myself: no-op
                 return get_result()
             if sthread.is_empty_handle(to.h):
-                start_state.clear()
+                global_state.clear()
                 raise geterror(self.space, "continulet already finished")
-        ec = self.check_sthread()
+        self.check_sthread()
         #
-        start_state.origin = self
+        global_state.origin = self
         if to is None:
             # simple switch: going to self.h
-            start_state.destination = self
+            global_state.destination = self
         else:
             # double switch: the final destination is to.h
-            start_state.destination = to
+            global_state.destination = to
         #
-        try:
-            do_switch(sthread, start_state.destination.h)
-        except MemoryError:
-            start_state.clear()
-            raise getmemoryerror(self.space)
-        #
-        return get_result()
+        h = sthread.switch(global_state.destination.h)
+        return post_switch(sthread, h)
 
     def descr_switch(self, w_value=None, w_to=None):
-        start_state.w_value = w_value
+        global_state.w_value = w_value
         return self.switch(w_to)
 
     def descr_throw(self, w_type, w_val=None, w_tb=None, w_to=None):
@@ -98,8 +99,8 @@
         #
         operr = OperationError(w_type, w_val, tb)
         operr.normalize_exception(space)
-        start_state.w_value = None
-        start_state.propagate_exception = operr
+        global_state.w_value = None
+        global_state.propagate_exception = operr
         return self.switch(w_to)
 
     def descr_is_pending(self):
@@ -107,12 +108,26 @@
                  and not self.sthread.is_empty_handle(self.h))
         return self.space.newbool(valid)
 
+    def descr__reduce__(self):
+        from pypy.module._continuation import interp_pickle
+        return interp_pickle.reduce(self)
+
+    def descr__setstate__(self, w_args):
+        from pypy.module._continuation import interp_pickle
+        interp_pickle.setstate(self, w_args)
+
 
 def W_Continulet___new__(space, w_subtype, __args__):
     r = space.allocate_instance(W_Continulet, w_subtype)
     r.__init__(space)
     return space.wrap(r)
 
+def unpickle(space, w_subtype):
+    """Pickle support."""
+    r = space.allocate_instance(W_Continulet, w_subtype)
+    r.__init__(space)
+    return space.wrap(r)
+
 
 W_Continulet.typedef = TypeDef(
     'continulet',
@@ -122,9 +137,10 @@
     switch      = interp2app(W_Continulet.descr_switch),
     throw       = interp2app(W_Continulet.descr_throw),
     is_pending  = interp2app(W_Continulet.descr_is_pending),
+    __reduce__  = interp2app(W_Continulet.descr__reduce__),
+    __setstate__= interp2app(W_Continulet.descr__setstate__),
     )
 
-
 # ____________________________________________________________
 
 # Continulet objects maintain a dummy frame object in order to ensure
@@ -133,27 +149,40 @@
 
 class State:
     def __init__(self, space):
-        from pypy.interpreter.astcompiler.consts import CO_OPTIMIZED
-        self.space = space 
+        self.space = space
         w_module = space.getbuiltinmodule('_continuation')
         self.w_error = space.getattr(w_module, space.wrap('error'))
-        self.w_memoryerror = OperationError(space.w_MemoryError, space.w_None)
-        self.dummy_pycode = PyCode(space, 0, 0, 0, CO_OPTIMIZED,
-                                   '', [], [], [], '',
-                                   '<bottom of continulet>', 0, '', [], [],
-                                   hidden_applevel=True)
+        # the following function switches away immediately, so that
+        # continulet.__init__() doesn't immediately run func(), but it
+        # also has the hidden purpose of making sure we have a single
+        # bottomframe for the whole duration of the continulet's run.
+        # Hackish: only the func_code is used, and used in the context
+        # of w_globals == this module, so we can access the name
+        # 'continulet' directly.
+        w_code = space.appexec([], '''():
+            def start(c, func, args, kwds):
+                if continulet.switch(c) is not None:
+                    raise TypeError(
+                     "can\'t send non-None value to a just-started continulet")
+                return func(c, *args, **kwds)
+            return start.func_code
+        ''')
+        self.entrypoint_pycode = space.interp_w(PyCode, w_code)
+        self.entrypoint_pycode.hidden_applevel = True
+        self.w_unpickle = w_module.get('_p')
+        self.w_module_dict = w_module.getdict(space)
 
 def geterror(space, message):
     cs = space.fromcache(State)
     return OperationError(cs.w_error, space.wrap(message))
 
-def getmemoryerror(space):
+def get_entrypoint_pycode(space):
     cs = space.fromcache(State)
-    return cs.w_memoryerror
+    return cs.entrypoint_pycode
 
-def make_fresh_frame(space):
+def get_w_module_dict(space):
     cs = space.fromcache(State)
-    return space.FrameClass(space, cs.dummy_pycode, None, None)
+    return cs.w_module_dict
 
 # ____________________________________________________________
 
@@ -164,76 +193,63 @@
         StackletThread.__init__(self, space.config)
         self.space = space
         self.ec = ec
+        # for unpickling
+        from pypy.rlib.rweakref import RWeakKeyDictionary
+        self.frame2continulet = RWeakKeyDictionary(PyFrame, W_Continulet)
 
 ExecutionContext.stacklet_thread = None
 
 # ____________________________________________________________
 
 
-class StartState:   # xxx a single global to pass around the function to start
+class GlobalState:
     def clear(self):
         self.origin = None
         self.destination = None
-        self.w_callable = None
-        self.args = None
         self.w_value = None
         self.propagate_exception = None
-start_state = StartState()
-start_state.clear()
+global_state = GlobalState()
+global_state.clear()
 
 
 def new_stacklet_callback(h, arg):
-    self       = start_state.origin
-    w_callable = start_state.w_callable
-    args       = start_state.args
-    start_state.clear()
-    try:
-        do_switch(self.sthread, h)
-    except MemoryError:
-        return h       # oups!  do an early return in this case
-    #
+    self = global_state.origin
+    self.h = h
+    global_state.clear()
     space = self.space
     try:
-        assert self.sthread.ec.topframeref() is None
-        self.sthread.ec.topframeref = jit.non_virtual_ref(self.bottomframe)
-        if start_state.propagate_exception is not None:
-            raise start_state.propagate_exception   # just propagate it further
-        if start_state.w_value is not space.w_None:
-            raise OperationError(space.w_TypeError, space.wrap(
-                "can't send non-None value to a just-started continulet"))
-
-        args = args.prepend(self.space.wrap(self))
-        w_result = space.call_args(w_callable, args)
+        frame = self.bottomframe
+        w_result = frame.execute_frame()
     except Exception, e:
-        start_state.propagate_exception = e
+        global_state.propagate_exception = e
     else:
-        start_state.w_value = w_result
+        global_state.w_value = w_result
     self.sthread.ec.topframeref = jit.vref_None
-    start_state.origin = self
-    start_state.destination = self
+    global_state.origin = self
+    global_state.destination = self
     return self.h
 
-
-def do_switch(sthread, h):
-    h = sthread.switch(h)
-    origin = start_state.origin
-    self = start_state.destination
-    start_state.origin = None
-    start_state.destination = None
+def post_switch(sthread, h):
+    origin = global_state.origin
+    self = global_state.destination
+    global_state.origin = None
+    global_state.destination = None
     self.h, origin.h = origin.h, h
     #
     current = sthread.ec.topframeref
     sthread.ec.topframeref = self.bottomframe.f_backref
     self.bottomframe.f_backref = origin.bottomframe.f_backref
     origin.bottomframe.f_backref = current
+    #
+    return get_result()
 
 def get_result():
-    if start_state.propagate_exception:
-        e = start_state.propagate_exception
-        start_state.propagate_exception = None
+    if global_state.propagate_exception:
+        e = global_state.propagate_exception
+        global_state.propagate_exception = None
         raise e
-    w_value = start_state.w_value
-    start_state.w_value = None
+    w_value = global_state.w_value
+    global_state.w_value = None
     return w_value
 
 def build_sthread(space):
@@ -253,7 +269,7 @@
         cont = space.interp_w(W_Continulet, w_cont)
         if cont.sthread is not sthread:
             if cont.sthread is None:
-                raise geterror(space, "got a non-initialized continulet")
+                continue   # ignore non-initialized continulets
             else:
                 raise geterror(space, "inter-thread support is missing")
         elif sthread.is_empty_handle(cont.h):
diff --git a/pypy/module/_continuation/interp_pickle.py b/pypy/module/_continuation/interp_pickle.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_continuation/interp_pickle.py
@@ -0,0 +1,128 @@
+from pypy.tool import stdlib_opcode as pythonopcode
+from pypy.rlib import jit
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.pyframe import PyFrame
+from pypy.module._continuation.interp_continuation import State, global_state
+from pypy.module._continuation.interp_continuation import build_sthread
+from pypy.module._continuation.interp_continuation import post_switch
+from pypy.module._continuation.interp_continuation import get_result, geterror
+
+
+def getunpickle(space):
+    cs = space.fromcache(State)
+    return cs.w_unpickle
+
+
+def reduce(self):
+    # xxx this is known to be not completely correct with respect
+    # to subclasses, e.g. no __slots__ support, no looking for a
+    # __getnewargs__ or __getstate__ defined in the subclass, etc.
+    # Doing the right thing looks involved, though...
+    space = self.space
+    if self.sthread is None:
+        w_frame = space.w_False
+    elif self.sthread.is_empty_handle(self.h):
+        w_frame = space.w_None
+    else:
+        w_frame = space.wrap(self.bottomframe)
+    w_continulet_type = space.type(space.wrap(self))
+    w_dict = self.getdict(space) or space.w_None
+    args = [getunpickle(space),
+            space.newtuple([w_continulet_type]),
+            space.newtuple([w_frame, w_dict]),
+            ]
+    return space.newtuple(args)
+
+def setstate(self, w_args):
+    space = self.space
+    if self.sthread is not None:
+        raise geterror(space, "continulet.__setstate__() on an already-"
+                              "initialized continulet")
+    w_frame, w_dict = space.fixedview(w_args, expected_length=2)
+    if not space.is_w(w_dict, space.w_None):
+        self.setdict(space, w_dict)
+    if space.is_w(w_frame, space.w_False):
+        return    # not initialized
+    sthread = build_sthread(self.space)
+    self.sthread = sthread
+    self.bottomframe = space.interp_w(PyFrame, w_frame, can_be_None=True)
+    #
+    global_state.origin = self
+    if self.bottomframe is not None:
+        sthread.frame2continulet.set(self.bottomframe, self)
+    self.h = sthread.new(resume_trampoline_callback)
+    get_result()    # propagate the eventual MemoryError
+
+# ____________________________________________________________
+
+def resume_trampoline_callback(h, arg):
+    self = global_state.origin
+    self.h = h
+    space = self.space
+    sthread = self.sthread
+    try:
+        global_state.clear()
+        if self.bottomframe is None:
+            w_result = space.w_None
+        else:
+            h = sthread.switch(self.h)
+            try:
+                w_result = post_switch(sthread, h)
+                operr = None
+            except OperationError, e:
+                w_result = None
+                operr = e
+            #
+            while True:
+                ec = sthread.ec
+                frame = ec.topframeref()
+                assert frame is not None     # XXX better error message
+                exit_continulet = sthread.frame2continulet.get(frame)
+                #
+                continue_after_call(frame)
+                #
+                # small hack: unlink frame out of the execution context,
+                # because execute_frame will add it there again
+                ec.topframeref = frame.f_backref
+                #
+                try:
+                    w_result = frame.execute_frame(w_result, operr)
+                    operr = None
+                except OperationError, e:
+                    w_result = None
+                    operr = e
+                if exit_continulet is not None:
+                    self = exit_continulet
+                    break
+            sthread.ec.topframeref = jit.vref_None
+            if operr:
+                raise operr
+    except Exception, e:
+        global_state.propagate_exception = e
+    else:
+        global_state.w_value = w_result
+    global_state.origin = self
+    global_state.destination = self
+    return self.h
+
+def continue_after_call(frame):
+    code = frame.pycode.co_code
+    instr = frame.last_instr
+    opcode = ord(code[instr])
+    map = pythonopcode.opmap
+    call_ops = [map['CALL_FUNCTION'], map['CALL_FUNCTION_KW'],
+                map['CALL_FUNCTION_VAR'], map['CALL_FUNCTION_VAR_KW'],
+                map['CALL_METHOD']]
+    assert opcode in call_ops   # XXX check better, and complain better
+    instr += 1
+    oparg = ord(code[instr]) | ord(code[instr + 1]) << 8
+    nargs = oparg & 0xff
+    nkwds = (oparg >> 8) & 0xff
+    if nkwds == 0:     # only positional arguments
+        # fast paths leaves things on the stack, pop them
+        if (frame.space.config.objspace.opcodes.CALL_METHOD and
+            opcode == map['CALL_METHOD']):
+            frame.dropvalues(nargs + 2)
+        elif opcode == map['CALL_FUNCTION']:
+            frame.dropvalues(nargs + 1)
+    frame.last_instr = instr + 1    # continue after the call
diff --git a/pypy/module/_continuation/test/support.py b/pypy/module/_continuation/test/support.py
--- a/pypy/module/_continuation/test/support.py
+++ b/pypy/module/_continuation/test/support.py
@@ -9,4 +9,4 @@
             import pypy.rlib.rstacklet
         except CompilationError, e:
             py.test.skip("cannot import rstacklet: %s" % e)
-        cls.space = gettestobjspace(usemodules=['_continuation'])
+        cls.space = gettestobjspace(usemodules=['_continuation'], continuation=True)
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -13,7 +13,7 @@
         from _continuation import continulet
         #
         def empty_callback(c):
-            pass
+            never_called
         #
         c = continulet(empty_callback)
         assert type(c) is continulet
@@ -36,7 +36,7 @@
         from _continuation import continulet, error
         #
         def empty_callback(c1):
-            pass
+            never_called
         #
         c = continulet(empty_callback)
         raises(error, c.__init__, empty_callback)
@@ -135,12 +135,6 @@
         e = raises(error, c.switch)
         assert str(e.value) == "continulet already finished"
 
-    def test_not_initialized_yet(self):
-        from _continuation import continulet, error
-        c = continulet.__new__(continulet)
-        e = raises(error, c.switch)
-        assert str(e.value) == "continulet not initialized yet"
-
     def test_go_depth2(self):
         from _continuation import continulet
         #
@@ -254,6 +248,15 @@
         res = c_upper.switch('D')
         assert res == 'E'
 
+    def test_switch_not_initialized(self):
+        from _continuation import continulet
+        c0 = continulet.__new__(continulet)
+        res = c0.switch()
+        assert res is None
+        res = c0.switch(123)
+        assert res == 123
+        raises(ValueError, c0.throw, ValueError)
+
     def test_exception_with_switch_depth2(self):
         from _continuation import continulet
         #
@@ -499,16 +502,31 @@
         assert res == 'z'
         raises(TypeError, c1.switch, to=c2)  # "can't send non-None value"
 
-    def test_switch2_not_initialized_yet(self):
-        from _continuation import continulet, error
+    def test_switch2_not_initialized(self):
+        from _continuation import continulet
+        c0 = continulet.__new__(continulet)
+        c0bis = continulet.__new__(continulet)
+        res = c0.switch(123, to=c0)
+        assert res == 123
+        res = c0.switch(123, to=c0bis)
+        assert res == 123
+        raises(ValueError, c0.throw, ValueError, to=c0)
+        raises(ValueError, c0.throw, ValueError, to=c0bis)
         #
         def f1(c1):
-            not_reachable
-        #
+            c1.switch('a')
+            raises(ValueError, c1.switch, 'b')
+            raises(KeyError, c1.switch, 'c')
+            return 'd'
         c1 = continulet(f1)
-        c2 = continulet.__new__(continulet)
-        e = raises(error, c1.switch, to=c2)
-        assert str(e.value) == "continulet not initialized yet"
+        res = c0.switch(to=c1)
+        assert res == 'a'
+        res = c1.switch(to=c0)
+        assert res == 'b'
+        res = c1.throw(ValueError, to=c0)
+        assert res == 'c'
+        res = c0.throw(KeyError, to=c1)
+        assert res == 'd'
 
     def test_switch2_already_finished(self):
         from _continuation import continulet, error
@@ -643,6 +661,12 @@
             assert res == "done"
         main()
 
+    def test_permute_noninitialized(self):
+        from _continuation import continulet, permute
+        permute(continulet.__new__(continulet))    # ignored
+        permute(continulet.__new__(continulet),    # ignored
+                continulet.__new__(continulet))
+
     def test_bug_finish_with_already_finished_stacklet(self):
         from _continuation import continulet, error
         # make an already-finished continulet
diff --git a/pypy/module/_continuation/test/test_zpickle.py b/pypy/module/_continuation/test/test_zpickle.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_continuation/test/test_zpickle.py
@@ -0,0 +1,262 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestCopy:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=('_continuation',),
+                                    CALL_METHOD=True)
+        cls.space.config.translation.continuation = True
+
+    def test_basic_setup(self):
+        from _continuation import continulet
+        lst = [4]
+        co = continulet(lst.append)
+        assert lst == [4]
+        res = co.switch()
+        assert res is None
+        assert lst == [4, co]
+
+    def test_copy_continulet_not_started(self):
+        from _continuation import continulet, error
+        import copy
+        lst = []
+        co = continulet(lst.append)
+        co2, lst2 = copy.deepcopy((co, lst))
+        #
+        assert lst == []
+        co.switch()
+        assert lst == [co]
+        #
+        assert lst2 == []
+        co2.switch()
+        assert lst2 == [co2]
+
+    def test_copy_continulet_not_started_multiple(self):
+        from _continuation import continulet, error
+        import copy
+        lst = []
+        co = continulet(lst.append)
+        co2, lst2 = copy.deepcopy((co, lst))
+        co3, lst3 = copy.deepcopy((co, lst))
+        co4, lst4 = copy.deepcopy((co, lst))
+        #
+        assert lst == []
+        co.switch()
+        assert lst == [co]
+        #
+        assert lst2 == []
+        co2.switch()
+        assert lst2 == [co2]
+        #
+        assert lst3 == []
+        co3.switch()
+        assert lst3 == [co3]
+        #
+        assert lst4 == []
+        co4.switch()
+        assert lst4 == [co4]
+
+    def test_copy_continulet_real(self):
+        import new, sys
+        mod = new.module('test_copy_continulet_real')
+        sys.modules['test_copy_continulet_real'] = mod
+        exec '''if 1:
+            from _continuation import continulet
+            import copy
+            def f(co, x):
+                co.switch(x + 1)
+                co.switch(x + 2)
+                return x + 3
+            co = continulet(f, 40)
+            res = co.switch()
+            assert res == 41
+            co2 = copy.deepcopy(co)
+            #
+            res = co2.switch()
+            assert res == 42
+            assert co2.is_pending()
+            res = co2.switch()
+            assert res == 43
+            assert not co2.is_pending()
+            #
+            res = co.switch()
+            assert res == 42
+            assert co.is_pending()
+            res = co.switch()
+            assert res == 43
+            assert not co.is_pending()
+        ''' in mod.__dict__
+
+    def test_copy_continulet_already_finished(self):
+        from _continuation import continulet, error
+        import copy
+        lst = []
+        co = continulet(lst.append)
+        co.switch()
+        co2 = copy.deepcopy(co)
+        assert not co.is_pending()
+        assert not co2.is_pending()
+        raises(error, co.__init__, lst.append)
+        raises(error, co2.__init__, lst.append)
+        raises(error, co.switch)
+        raises(error, co2.switch)
+
+
+class AppTestPickle:
+    version = 0
+
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=('_continuation',),
+                                    CALL_METHOD=True)
+        cls.space.appexec([], """():
+            global continulet, A, __name__
+
+            import sys
+            __name__ = 'test_pickle_continulet'
+            thismodule = type(sys)(__name__)
+            sys.modules[__name__] = thismodule
+
+            from _continuation import continulet
+            class A(continulet):
+                pass
+
+            thismodule.__dict__.update(globals())
+        """)
+        cls.w_version = cls.space.wrap(cls.version)
+
+    def test_pickle_continulet_empty(self):
+        from _continuation import continulet
+        lst = [4]
+        co = continulet.__new__(continulet)
+        import pickle
+        pckl = pickle.dumps(co, self.version)
+        print repr(pckl)
+        co2 = pickle.loads(pckl)
+        assert co2 is not co
+        assert not co.is_pending()
+        assert not co2.is_pending()
+        # the empty unpickled coroutine can still be used:
+        result = [5]
+        co2.__init__(result.append)
+        res = co2.switch()
+        assert res is None
+        assert result == [5, co2]
+
+    def test_pickle_continulet_empty_subclass(self):
+        from test_pickle_continulet import continulet, A
+        lst = [4]
+        co = continulet.__new__(A)
+        co.foo = 'bar'
+        co.bar = 'baz'
+        import pickle
+        pckl = pickle.dumps(co, self.version)
+        print repr(pckl)
+        co2 = pickle.loads(pckl)
+        assert co2 is not co
+        assert not co.is_pending()
+        assert not co2.is_pending()
+        assert type(co) is type(co2) is A
+        assert co.foo == co2.foo == 'bar'
+        assert co.bar == co2.bar == 'baz'
+        # the empty unpickled coroutine can still be used:
+        result = [5]
+        co2.__init__(result.append)
+        res = co2.switch()
+        assert res is None
+        assert result == [5, co2]
+
+    def test_pickle_continulet_not_started(self):
+        from _continuation import continulet, error
+        import pickle
+        lst = []
+        co = continulet(lst.append)
+        pckl = pickle.dumps((co, lst))
+        print pckl
+        del co, lst
+        for i in range(2):
+            print 'resume...'
+            co2, lst2 = pickle.loads(pckl)
+            assert lst2 == []
+            co2.switch()
+            assert lst2 == [co2]
+
+    def test_pickle_continulet_real(self):
+        import new, sys
+        mod = new.module('test_pickle_continulet_real')
+        sys.modules['test_pickle_continulet_real'] = mod
+        mod.version = self.version
+        exec '''if 1:
+            from _continuation import continulet
+            import pickle
+            def f(co, x):
+                co.switch(x + 1)
+                co.switch(x + 2)
+                return x + 3
+            co = continulet(f, 40)
+            res = co.switch()
+            assert res == 41
+            pckl = pickle.dumps(co, version)
+            print repr(pckl)
+            co2 = pickle.loads(pckl)
+            #
+            res = co2.switch()
+            assert res == 42
+            assert co2.is_pending()
+            res = co2.switch()
+            assert res == 43
+            assert not co2.is_pending()
+            #
+            res = co.switch()
+            assert res == 42
+            assert co.is_pending()
+            res = co.switch()
+            assert res == 43
+            assert not co.is_pending()
+        ''' in mod.__dict__
+
+    def test_pickle_continulet_real_subclass(self):
+        import new, sys
+        mod = new.module('test_pickle_continulet_real_subclass')
+        sys.modules['test_pickle_continulet_real_subclass'] = mod
+        mod.version = self.version
+        exec '''if 1:
+            from _continuation import continulet
+            import pickle
+            class A(continulet):
+                def __init__(self):
+                    crash
+            def f(co):
+                co.switch(co.x + 1)
+                co.switch(co.x + 2)
+                return co.x + 3
+            co = A.__new__(A)
+            continulet.__init__(co, f)
+            co.x = 40
+            res = co.switch()
+            assert res == 41
+            pckl = pickle.dumps(co, version)
+            print repr(pckl)
+            co2 = pickle.loads(pckl)
+            #
+            assert type(co2) is A
+            res = co2.switch()
+            assert res == 42
+            assert co2.is_pending()
+            res = co2.switch()
+            assert res == 43
+            assert not co2.is_pending()
+            #
+            res = co.switch()
+            assert res == 42
+            assert co.is_pending()
+            res = co.switch()
+            assert res == 43
+            assert not co.is_pending()
+        ''' in mod.__dict__
+
+
+class AppTestPickle_v1(AppTestPickle):
+    version = 1
+
+class AppTestPickle_v2(AppTestPickle):
+    version = 2
diff --git a/pypy/module/_multiprocessing/interp_semaphore.py b/pypy/module/_multiprocessing/interp_semaphore.py
--- a/pypy/module/_multiprocessing/interp_semaphore.py
+++ b/pypy/module/_multiprocessing/interp_semaphore.py
@@ -468,6 +468,9 @@
 
         self.count -= 1
 
+    def after_fork(self):
+        self.count = 0
+
     @unwrap_spec(kind=int, maxvalue=int)
     def rebuild(space, w_cls, w_handle, kind, maxvalue):
         self = space.allocate_instance(W_SemLock, w_cls)
@@ -512,6 +515,7 @@
     acquire = interp2app(W_SemLock.acquire),
     release = interp2app(W_SemLock.release),
     _rebuild = interp2app(W_SemLock.rebuild.im_func, as_classmethod=True),
+    _after_fork = interp2app(W_SemLock.after_fork),
     __enter__=interp2app(W_SemLock.enter),
     __exit__=interp2app(W_SemLock.exit),
     SEM_VALUE_MAX=SEM_VALUE_MAX,
diff --git a/pypy/module/_multiprocessing/test/test_semaphore.py b/pypy/module/_multiprocessing/test/test_semaphore.py
--- a/pypy/module/_multiprocessing/test/test_semaphore.py
+++ b/pypy/module/_multiprocessing/test/test_semaphore.py
@@ -39,6 +39,10 @@
         sem.release()
         assert sem._count() == 0
 
+        sem.acquire()
+        sem._after_fork()
+        assert sem._count() == 0
+
     def test_recursive(self):
         from _multiprocessing import SemLock
         kind = self.RECURSIVE
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -57,6 +57,9 @@
         compile_extra=['-DPy_BUILD_CORE'],
         )
 
+class CConfig2:
+    _compilation_info_ = CConfig._compilation_info_
+
 class CConfig_constants:
     _compilation_info_ = CConfig._compilation_info_
 
@@ -300,9 +303,13 @@
         return unwrapper_raise # used in 'normal' RPython code.
     return decorate
 
-def cpython_struct(name, fields, forward=None):
+def cpython_struct(name, fields, forward=None, level=1):
     configname = name.replace(' ', '__')
-    setattr(CConfig, configname, rffi_platform.Struct(name, fields))
+    if level == 1:
+        config = CConfig
+    else:
+        config = CConfig2
+    setattr(config, configname, rffi_platform.Struct(name, fields))
     if forward is None:
         forward = lltype.ForwardReference()
     TYPES[configname] = forward
@@ -445,9 +452,10 @@
 #              'int*': rffi.INTP}
 
 def configure_types():
-    for name, TYPE in rffi_platform.configure(CConfig).iteritems():
-        if name in TYPES:
-            TYPES[name].become(TYPE)
+    for config in (CConfig, CConfig2):
+        for name, TYPE in rffi_platform.configure(config).iteritems():
+            if name in TYPES:
+                TYPES[name].become(TYPE)
 
 def build_type_checkers(type_name, cls=None):
     """
diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h
--- a/pypy/module/cpyext/include/object.h
+++ b/pypy/module/cpyext/include/object.h
@@ -321,6 +321,15 @@
 
 } PyTypeObject;
 
+typedef struct {
+    PyTypeObject ht_type;
+    PyNumberMethods as_number;
+    PyMappingMethods as_mapping;
+    PySequenceMethods as_sequence;
+    PyBufferProcs as_buffer;
+    PyObject *ht_name, *ht_slots;
+} PyHeapTypeObject;
+
 /* Flag bits for printing: */
 #define Py_PRINT_RAW	1	/* No string quotes etc. */
 
diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h
--- a/pypy/module/cpyext/include/patchlevel.h
+++ b/pypy/module/cpyext/include/patchlevel.h
@@ -29,7 +29,7 @@
 #define PY_VERSION		"2.7.1"
 
 /* PyPy version as a string */
-#define PYPY_VERSION "1.6.0"
+#define PYPY_VERSION "1.6.1"
 
 /* Subversion Revision number of this file (not of the repository).
  * Empty since Mercurial migration. */
diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py
--- a/pypy/module/cpyext/pyobject.py
+++ b/pypy/module/cpyext/pyobject.py
@@ -19,13 +19,42 @@
     basestruct = PyObject.TO
 
     def get_dealloc(self, space):
-        raise NotImplementedError
+        from pypy.module.cpyext.typeobject import subtype_dealloc
+        return llhelper(
+            subtype_dealloc.api_func.functype,
+            subtype_dealloc.api_func.get_wrapper(space))
+
     def allocate(self, space, w_type, itemcount=0):
-        raise NotImplementedError
+        # similar to PyType_GenericAlloc?
+        # except that it's not related to any pypy object.
+
+        pytype = rffi.cast(PyTypeObjectPtr, make_ref(space, w_type))
+        # Don't increase refcount for non-heaptypes
+        if pytype:
+            flags = rffi.cast(lltype.Signed, pytype.c_tp_flags)
+            if not flags & Py_TPFLAGS_HEAPTYPE:
+                Py_DecRef(space, w_type)
+
+        if pytype:
+            size = pytype.c_tp_basicsize
+        else:
+            size = rffi.sizeof(self.basestruct)
+        if itemcount:
+            size += itemcount * pytype.c_tp_itemsize
+        buf = lltype.malloc(rffi.VOIDP.TO, size,
+                            flavor='raw', zero=True)
+        pyobj = rffi.cast(PyObject, buf)
+        pyobj.c_ob_refcnt = 1
+        pyobj.c_ob_type = pytype
+        return pyobj
+
     def attach(self, space, pyobj, w_obj):
-        raise NotImplementedError
+        pass
+
     def realize(self, space, ref):
-        raise NotImplementedError
+        # For most types, a reference cannot exist without
+        # a real interpreter object
+        raise InvalidPointerException(str(ref))
 
 typedescr_cache = {}
 
@@ -40,6 +69,7 @@
     """
 
     tp_basestruct = kw.pop('basestruct', PyObject.TO)
+    tp_alloc      = kw.pop('alloc', None)
     tp_attach     = kw.pop('attach', None)
     tp_realize    = kw.pop('realize', None)
     tp_dealloc    = kw.pop('dealloc', None)
@@ -49,58 +79,24 @@
 
     class CpyTypedescr(BaseCpyTypedescr):
         basestruct = tp_basestruct
-        realize = tp_realize
 
-        def get_dealloc(self, space):
-            if tp_dealloc:
+        if tp_alloc:
+            def allocate(self, space, w_type, itemcount=0):
+                return tp_alloc(space, w_type)
+
+        if tp_dealloc:
+            def get_dealloc(self, space):
                 return llhelper(
                     tp_dealloc.api_func.functype,
                     tp_dealloc.api_func.get_wrapper(space))
-            else:
-                from pypy.module.cpyext.typeobject import subtype_dealloc
-                return llhelper(
-                    subtype_dealloc.api_func.functype,
-                    subtype_dealloc.api_func.get_wrapper(space))
-
-        def allocate(self, space, w_type, itemcount=0):
-            # similar to PyType_GenericAlloc?
-            # except that it's not related to any pypy object.
-
-            pytype = rffi.cast(PyTypeObjectPtr, make_ref(space, w_type))
-            # Don't increase refcount for non-heaptypes
-            if pytype:
-                flags = rffi.cast(lltype.Signed, pytype.c_tp_flags)
-                if not flags & Py_TPFLAGS_HEAPTYPE:
-                    Py_DecRef(space, w_type)
-
-            if pytype:
-                size = pytype.c_tp_basicsize
-            else:
-                size = rffi.sizeof(tp_basestruct)
-            if itemcount:
-                size += itemcount * pytype.c_tp_itemsize
-            buf = lltype.malloc(rffi.VOIDP.TO, size,
-                                flavor='raw', zero=True)
-            pyobj = rffi.cast(PyObject, buf)
-            pyobj.c_ob_refcnt = 1
-            pyobj.c_ob_type = pytype
-            return pyobj
 
         if tp_attach:
             def attach(self, space, pyobj, w_obj):
                 tp_attach(space, pyobj, w_obj)
-        else:
-            def attach(self, space, pyobj, w_obj):
-                pass
 
         if tp_realize:
             def realize(self, space, ref):
                 return tp_realize(space, ref)
-        else:
-            def realize(self, space, ref):
-                # For most types, a reference cannot exist without
-                # a real interpreter object
-                raise InvalidPointerException(str(ref))
     if typedef:
         CpyTypedescr.__name__ = "CpyTypedescr_%s" % (typedef.name,)
 
diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -48,3 +48,4 @@
         w_slice = api.PyTuple_GetSlice(w_tuple, 3, -3)
         assert space.eq_w(w_slice,
                           space.newtuple([space.wrap(i) for i in range(3, 7)]))
+
diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -268,6 +268,21 @@
         assert type(obj) is foo.Custom
         assert type(foo.Custom) is foo.MetaType
 
+    def test_heaptype(self):
+        module = self.import_extension('foo', [
+           ("name_by_heaptype", "METH_O",
+            '''
+                 PyHeapTypeObject *heaptype = (PyHeapTypeObject *)args;
+                 Py_INCREF(heaptype->ht_name);
+                 return heaptype->ht_name;
+             '''
+             )
+            ])
+        class C(object):
+            pass
+        assert module.name_by_heaptype(C) == "C"
+        
+
 class TestTypes(BaseApiTest):
     def test_type_attributes(self, space, api):
         w_class = space.appexec([], """():
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -11,7 +11,7 @@
     generic_cpy_call, Py_TPFLAGS_READY, Py_TPFLAGS_READYING,
     Py_TPFLAGS_HEAPTYPE, METH_VARARGS, METH_KEYWORDS, CANNOT_FAIL,
     Py_TPFLAGS_HAVE_GETCHARBUFFER,
-    build_type_checkers)
+    build_type_checkers, PyObjectFields)
 from pypy.module.cpyext.pyobject import (
     PyObject, make_ref, create_ref, from_ref, get_typedescr, make_typedescr,
     track_reference, RefcountState, borrow_from)
@@ -25,7 +25,7 @@
 from pypy.module.cpyext.structmember import PyMember_GetOne, PyMember_SetOne
 from pypy.module.cpyext.typeobjectdefs import (
     PyTypeObjectPtr, PyTypeObject, PyGetSetDef, PyMemberDef, newfunc,
-    PyNumberMethods, PySequenceMethods, PyBufferProcs)
+    PyNumberMethods, PyMappingMethods, PySequenceMethods, PyBufferProcs)
 from pypy.module.cpyext.slotdefs import (
     slotdefs_for_tp_slots, slotdefs_for_wrappers, get_slot_tp_function)
 from pypy.interpreter.error import OperationError
@@ -39,6 +39,19 @@
 
 PyType_Check, PyType_CheckExact = build_type_checkers("Type", "w_type")
 
+PyHeapTypeObjectStruct = lltype.ForwardReference()
+PyHeapTypeObject = lltype.Ptr(PyHeapTypeObjectStruct)
+PyHeapTypeObjectFields = (
+    ("ht_type", PyTypeObject),
+    ("ht_name", PyObject),
+    ("as_number", PyNumberMethods),
+    ("as_mapping", PyMappingMethods),
+    ("as_sequence", PySequenceMethods),
+    ("as_buffer", PyBufferProcs),
+    )
+cpython_struct("PyHeapTypeObject", PyHeapTypeObjectFields, PyHeapTypeObjectStruct,
+               level=2)
+
 class W_GetSetPropertyEx(GetSetProperty):
     def __init__(self, getset, w_type):
         self.getset = getset
@@ -136,6 +149,8 @@
             assert len(slot_names) == 2
             struct = getattr(pto, slot_names[0])
             if not struct:
+                assert not space.config.translating
+                assert not pto.c_tp_flags & Py_TPFLAGS_HEAPTYPE
                 if slot_names[0] == 'c_tp_as_number':
                     STRUCT_TYPE = PyNumberMethods
                 elif slot_names[0] == 'c_tp_as_sequence':
@@ -301,6 +316,7 @@
 
     make_typedescr(space.w_type.instancetypedef,
                    basestruct=PyTypeObject,
+                   alloc=type_alloc,
                    attach=type_attach,
                    realize=type_realize,
                    dealloc=type_dealloc)
@@ -319,11 +335,13 @@
     track_reference(space, lltype.nullptr(PyObject.TO), space.w_type)
     track_reference(space, lltype.nullptr(PyObject.TO), space.w_object)
     track_reference(space, lltype.nullptr(PyObject.TO), space.w_tuple)
+    track_reference(space, lltype.nullptr(PyObject.TO), space.w_str)
 
     # create the objects
     py_type = create_ref(space, space.w_type)
     py_object = create_ref(space, space.w_object)
     py_tuple = create_ref(space, space.w_tuple)
+    py_str = create_ref(space, space.w_str)
 
     # form cycles
     pto_type = rffi.cast(PyTypeObjectPtr, py_type)
@@ -340,10 +358,15 @@
     pto_object.c_tp_bases.c_ob_type = pto_tuple
     pto_tuple.c_tp_bases.c_ob_type = pto_tuple
 
+    for typ in (py_type, py_object, py_tuple, py_str):
+        heaptype = rffi.cast(PyHeapTypeObject, typ)
+        heaptype.c_ht_name.c_ob_type = pto_type
+
     # Restore the mapping
     track_reference(space, py_type, space.w_type, replace=True)
     track_reference(space, py_object, space.w_object, replace=True)
     track_reference(space, py_tuple, space.w_tuple, replace=True)
+    track_reference(space, py_str, space.w_str, replace=True)
 
 
 @cpython_api([PyObject], lltype.Void, external=False)
@@ -416,17 +439,34 @@
     Py_DecRef(space, obj_pto.c_tp_cache) # let's do it like cpython
     Py_DecRef(space, obj_pto.c_tp_dict)
     if obj_pto.c_tp_flags & Py_TPFLAGS_HEAPTYPE:
-        if obj_pto.c_tp_as_buffer:
-            lltype.free(obj_pto.c_tp_as_buffer, flavor='raw')
-        if obj_pto.c_tp_as_number:
-            lltype.free(obj_pto.c_tp_as_number, flavor='raw')
-        if obj_pto.c_tp_as_sequence:
-            lltype.free(obj_pto.c_tp_as_sequence, flavor='raw')
+        heaptype = rffi.cast(PyHeapTypeObject, obj)
+        Py_DecRef(space, heaptype.c_ht_name)
         Py_DecRef(space, base_pyo)
-        rffi.free_charp(obj_pto.c_tp_name)
         PyObject_dealloc(space, obj)
 
 
+def type_alloc(space, w_metatype):
+    size = rffi.sizeof(PyHeapTypeObject)
+    metatype = rffi.cast(PyTypeObjectPtr, make_ref(space, w_metatype))
+    # Don't increase refcount for non-heaptypes
+    if metatype:
+        flags = rffi.cast(lltype.Signed, metatype.c_tp_flags)
+        if not flags & Py_TPFLAGS_HEAPTYPE:
+            Py_DecRef(space, w_metatype)
+
+    heaptype = lltype.malloc(PyHeapTypeObject.TO,
+                             flavor='raw', zero=True)
+    pto = heaptype.c_ht_type
+    pto.c_ob_refcnt = 1
+    pto.c_ob_type = metatype
+    pto.c_tp_flags |= Py_TPFLAGS_HEAPTYPE
+    pto.c_tp_as_number = heaptype.c_as_number
+    pto.c_tp_as_sequence = heaptype.c_as_sequence
+    pto.c_tp_as_mapping = heaptype.c_as_mapping
+    pto.c_tp_as_buffer = heaptype.c_as_buffer
+    
+    return rffi.cast(PyObject, heaptype)
+
 def type_attach(space, py_obj, w_type):
     """
     Fills a newly allocated PyTypeObject from an existing type.
@@ -445,12 +485,18 @@
     if space.is_w(w_type, space.w_str):
         setup_string_buffer_procs(space, pto)
 
-    pto.c_tp_flags |= Py_TPFLAGS_HEAPTYPE
     pto.c_tp_free = llhelper(PyObject_Del.api_func.functype,
             PyObject_Del.api_func.get_wrapper(space))
     pto.c_tp_alloc = llhelper(PyType_GenericAlloc.api_func.functype,
             PyType_GenericAlloc.api_func.get_wrapper(space))
-    pto.c_tp_name = rffi.str2charp(w_type.getname(space))
+    if pto.c_tp_flags & Py_TPFLAGS_HEAPTYPE:
+        w_typename = space.getattr(w_type, space.wrap('__name__'))
+        heaptype = rffi.cast(PyHeapTypeObject, pto)
+        heaptype.c_ht_name = make_ref(space, w_typename)
+        from pypy.module.cpyext.stringobject import PyString_AsString
+        pto.c_tp_name = PyString_AsString(space, heaptype.c_ht_name)
+    else:
+        pto.c_tp_name = rffi.str2charp(w_type.getname(space))
     pto.c_tp_basicsize = -1 # hopefully this makes malloc bail out
     pto.c_tp_itemsize = 0
     # uninitialized fields:
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -12,6 +12,7 @@
 from pypy.translator.platform import platform
 
 import sys
+import weakref
 import py
 
 if sys.platform == "win32":
@@ -180,7 +181,7 @@
 class CallbackData(Wrappable):
     def __init__(self, space, parser):
         self.space = space
-        self.parser = parser
+        self.parser = weakref.ref(parser)
 
 SETTERS = {}
 for index, (name, params) in enumerate(HANDLERS.items()):
@@ -257,7 +258,7 @@
         id = rffi.cast(lltype.Signed, %(ll_id)s)
         userdata = global_storage.get_object(id)
         space = userdata.space
-        parser = userdata.parser
+        parser = userdata.parser()
 
         handler = parser.handlers[%(index)s]
         if not handler:
@@ -292,7 +293,7 @@
     id = rffi.cast(lltype.Signed, ll_userdata)
     userdata = global_storage.get_object(id)
     space = userdata.space
-    parser = userdata.parser
+    parser = userdata.parser()
 
     name = rffi.charp2str(name)
 
@@ -409,8 +410,7 @@
         if XML_ParserFree: # careful with CPython interpreter shutdown
             XML_ParserFree(self.itself)
         if global_storage:
-            global_storage.free_nonmoving_id(
-                rffi.cast(lltype.Signed, self.itself))
+            global_storage.free_nonmoving_id(self.id)
 
     @unwrap_spec(flag=int)
     def SetParamEntityParsing(self, space, flag):
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -13,7 +13,6 @@
 from pypy.interpreter.pyframe import PyFrame
 from pypy.interpreter.pyopcode import ExitFrame
 from pypy.interpreter.gateway import unwrap_spec
-from pypy.interpreter.baseobjspace import ObjSpace, W_Root
 from opcode import opmap
 from pypy.rlib.nonconst import NonConstant
 from pypy.jit.metainterp.resoperation import rop
@@ -221,7 +220,6 @@
     def __init__(self, space):
         self.w_compile_hook = space.w_None
 
- at unwrap_spec(ObjSpace, W_Root)
 def set_compile_hook(space, w_hook):
     """ set_compile_hook(hook)
 
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -16,7 +16,7 @@
         if modname in ['pypyjit', 'signal', 'micronumpy', 'math', 'exceptions',
                        'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
                        'posix', '_socket', '_sre', '_lsprof', '_weakref',
-                       '__pypy__', 'cStringIO', '_collections']:
+                       '__pypy__', 'cStringIO', '_collections', 'struct']:
             return True
         return False
 
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
--- a/pypy/module/pypyjit/test_pypy_c/test_call.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -337,7 +337,9 @@
         assert loop.match_by_id('append', """
             i13 = getfield_gc(p8, descr=<SignedFieldDescr list.length .*>)
             i15 = int_add(i13, 1)
-            call(ConstClass(_ll_list_resize_ge__listPtr_Signed), p8, i15, descr=<VoidCallDescr>)
+            # Will be killed by the backend
+            i17 = arraylen_gc(p7, descr=<GcPtrArrayDescr>)
+            call(ConstClass(_ll_list_resize_ge), p8, i15, descr=<VoidCallDescr>)
             guard_no_exception(descr=...)
             p17 = getfield_gc(p8, descr=<GcPtrFieldDescr list.items .*>)
             p19 = new_with_vtable(ConstClass(W_IntObject))
diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py
--- a/pypy/module/pypyjit/test_pypy_c/test_containers.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py
@@ -40,12 +40,33 @@
         log = self.run(fn, [1000])
         assert log.result == 300
         loop, = log.loops_by_filename(self.filepath)
-        # check that the call to ll_dict_lookup is not a call_may_force
+        # check that the call to ll_dict_lookup is not a call_may_force, the
+        # gc_id call is hoisted out of the loop, the id of a value obviously
+        # can't change ;)
         assert loop.match_by_id("getitem", """
-            i25 = call(ConstClass(_ll_1_gc_identityhash__objectPtr), p6, descr=...)
-            ...
             i28 = call(ConstClass(ll_dict_lookup__dicttablePtr_objectPtr_Signed), p18, p6, i25, descr=...)
             ...
             p33 = call(ConstClass(ll_get_value__dicttablePtr_Signed), p18, i28, descr=...)
             ...
         """)
+
+    def test_list(self):
+        def main(n):
+            i = 0
+            while i < n:
+                z = list(())
+                z.append(1)
+                i += z[-1] / len(z)
+            return i
+
+        log = self.run(main, [1000])
+        assert log.result == main(1000)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=...)
+            guard_not_invalidated(descr=...)
+            i9 = int_add(i5, 1)
+            --TICK--
+            jump(..., descr=...)
+        """)
\ No newline at end of file
diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py
--- a/pypy/module/pypyjit/test_pypy_c/test_misc.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py
@@ -92,7 +92,7 @@
         """)
 
 
-    def test_cached_pure_func_of_equal_fields(self):            
+    def test_cached_pure_func_of_equal_fields(self):
         def main(n):
             class A(object):
                 def __init__(self, val):
@@ -285,3 +285,48 @@
 
         loop, = log.loops_by_id("globalread", is_entry_bridge=True)
         assert len(loop.ops_by_id("globalread")) == 0
+
+    def test_struct_module(self):
+        def main():
+            import struct
+            i = 1
+            while i < 1000:
+                x = struct.unpack("i", struct.pack("i", i))[0] # ID: struct
+                i += x / i
+            return i
+
+        log = self.run(main)
+        assert log.result == main()
+
+        loop, = log.loops_by_id("struct")
+        if sys.maxint == 2 ** 63 - 1:
+            extra = """
+            i8 = int_lt(i4, -2147483648)
+            guard_false(i8, descr=...)
+            """
+        else:
+            extra = ""
+        # This could, of course stand some improvement, to remove all these
+        # arithmatic ops, but we've removed all the core overhead.
+        assert loop.match_by_id("struct", """
+            guard_not_invalidated(descr=...)
+            # struct.pack
+            %(32_bit_only)s
+            i11 = int_and(i4, 255)
+            i13 = int_rshift(i4, 8)
+            i14 = int_and(i13, 255)
+            i16 = int_rshift(i13, 8)
+            i17 = int_and(i16, 255)
+            i19 = int_rshift(i16, 8)
+            i20 = int_and(i19, 255)
+
+            # struct.unpack
+            i22 = int_lshift(i14, 8)
+            i23 = int_or(i11, i22)
+            i25 = int_lshift(i17, 16)
+            i26 = int_or(i23, i25)
+            i28 = int_ge(i20, 128)
+            guard_false(i28, descr=...)
+            i30 = int_lshift(i20, 24)
+            i31 = int_or(i26, i30)
+        """ % {"32_bit_only": extra})
\ No newline at end of file
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -1,5 +1,6 @@
 from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 
+
 class TestString(BaseTestPyPyC):
     def test_lookup_default_encoding(self):
         def main(n):
@@ -107,3 +108,52 @@
             --TICK--
             jump(p0, p1, p2, p3, p4, p5, i58, i7, descr=<Loop4>)
         """)
+
+    def test_str_mod(self):
+        def main(n):
+            s = 0
+            while n > 0:
+                s += len('%d %d' % (n, n))
+                n -= 1
+            return s
+
+        log = self.run(main, [1000])
+        assert log.result == main(1000)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_gt(i4, 0)
+            guard_true(i7, descr=...)
+            guard_not_invalidated(descr=...)
+            p9 = call(ConstClass(ll_int2dec__Signed), i4, descr=<GcPtrCallDescr>)
+            guard_no_exception(descr=...)
+            i10 = strlen(p9)
+            i11 = int_is_true(i10)
+            guard_true(i11, descr=...)
+            i13 = strgetitem(p9, 0)
+            i15 = int_eq(i13, 45)
+            guard_false(i15, descr=...)
+            i17 = int_sub(0, i10)
+            i19 = int_gt(i10, 23)
+            guard_false(i19, descr=...)
+            p21 = newstr(23)
+            copystrcontent(p9, p21, 0, 0, i10)
+            i25 = int_add(1, i10)
+            i26 = int_gt(i25, 23)
+            guard_false(i26, descr=...)
+            strsetitem(p21, i10, 32)
+            i29 = int_add(i10, 1)
+            i30 = int_add(i10, i25)
+            i31 = int_gt(i30, 23)
+            guard_false(i31, descr=...)
+            copystrcontent(p9, p21, 0, i25, i10)
+            i33 = int_eq(i30, 23)
+            guard_false(i33, descr=...)
+            p35 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p21, i30, descr=<GcPtrCallDescr>)
+            guard_no_exception(descr=...)
+            i37 = strlen(p35)
+            i38 = int_add_ovf(i5, i37)
+            guard_no_overflow(descr=...)
+            i40 = int_sub(i4, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i40, i38, descr=<Loop0>)
+        """)
\ No newline at end of file
diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -1,9 +1,9 @@
-from pypy.interpreter.error import OperationError
-
+from pypy.rlib import jit
 from pypy.rlib.objectmodel import specialize
 from pypy.rlib.rstruct.error import StructError
+from pypy.rlib.rstruct.formatiterator import FormatIterator
 from pypy.rlib.rstruct.standardfmttable import PACK_ACCEPTS_BROKEN_INPUT
-from pypy.rlib.rstruct.formatiterator import FormatIterator
+from pypy.interpreter.error import OperationError
 
 
 class PackFormatIterator(FormatIterator):
@@ -14,15 +14,20 @@
         self.args_index = 0
         self.result = []      # list of characters
 
+    # This *should* be always unroll safe, the only way to get here is by
+    # unroll the interpret function, which means the fmt is const, and thus
+    # this should be const (in theory ;)
+    @jit.unroll_safe
+    @specialize.arg(1)
     def operate(self, fmtdesc, repetitions):
         if fmtdesc.needcount:
             fmtdesc.pack(self, repetitions)
         else:
             for i in range(repetitions):
                 fmtdesc.pack(self)
-    operate._annspecialcase_ = 'specialize:arg(1)'
     _operate_is_specialized_ = True
 
+    @jit.unroll_safe
     def align(self, mask):
         pad = (-len(self.result)) & mask
         for i in range(pad):
@@ -130,13 +135,15 @@
         self.inputpos = 0
         self.result_w = []     # list of wrapped objects
 
+    # See above comment on operate.
+    @jit.unroll_safe
+    @specialize.arg(1)
     def operate(self, fmtdesc, repetitions):
         if fmtdesc.needcount:
             fmtdesc.unpack(self, repetitions)
         else:
             for i in range(repetitions):
                 fmtdesc.unpack(self)
-    operate._annspecialcase_ = 'specialize:arg(1)'
     _operate_is_specialized_ = True
 
     def align(self, mask):
@@ -154,7 +161,6 @@
         self.inputpos = end
         return s
 
+    @specialize.argtype(1)
     def appendobj(self, value):
         self.result_w.append(self.space.wrap(value))
-    appendobj._annspecialcase_ = 'specialize:argtype(1)'
-
diff --git a/pypy/module/struct/interp_struct.py b/pypy/module/struct/interp_struct.py
--- a/pypy/module/struct/interp_struct.py
+++ b/pypy/module/struct/interp_struct.py
@@ -3,6 +3,7 @@
 from pypy.rlib.rstruct.error import StructError
 from pypy.rlib.rstruct.formatiterator import CalcSizeFormatIterator
 
+
 @unwrap_spec(format=str)
 def calcsize(space, format):
     fmtiter = CalcSizeFormatIterator()
diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py
--- a/pypy/module/sys/version.py
+++ b/pypy/module/sys/version.py
@@ -10,7 +10,7 @@
 CPYTHON_VERSION            = (2, 7, 1, "final", 42)   #XXX # sync patchlevel.h
 CPYTHON_API_VERSION        = 1013   #XXX # sync with include/modsupport.h
 
-PYPY_VERSION               = (1, 6, 0, "dev", 1)    #XXX # sync patchlevel.h
+PYPY_VERSION               = (1, 6, 1, "dev", 0)    #XXX # sync patchlevel.h
 
 if platform.name == 'msvc':
     COMPILER_INFO = 'MSC v.%d 32 bit' % (platform.version * 10 + 600)
diff --git a/pypy/module/test_lib_pypy/test_greenlet.py b/pypy/module/test_lib_pypy/test_greenlet.py
--- a/pypy/module/test_lib_pypy/test_greenlet.py
+++ b/pypy/module/test_lib_pypy/test_greenlet.py
@@ -3,7 +3,7 @@
 
 class AppTestGreenlet:
     def setup_class(cls):
-        cls.space = gettestobjspace(usemodules=['_continuation'])
+        cls.space = gettestobjspace(usemodules=['_continuation'], continuation=True)
 
     def test_simple(self):
         from greenlet import greenlet
@@ -258,3 +258,25 @@
             assert sys.exc_info() == (None, None, None)
 
         greenlet(f).switch()
+
+    def test_gr_frame(self):
+        from greenlet import greenlet
+        import sys
+        def f2():
+            assert g.gr_frame is None
+            gmain.switch()
+            assert g.gr_frame is None
+        def f1():
+            assert gmain.gr_frame is gmain_frame
+            assert g.gr_frame is None
+            f2()
+            assert g.gr_frame is None
+        gmain = greenlet.getcurrent()
+        assert gmain.gr_frame is None
+        gmain_frame = sys._getframe()
+        g = greenlet(f1)
+        assert g.gr_frame is None
+        g.switch()
+        assert g.gr_frame.f_code.co_name == 'f2'
+        g.switch()
+        assert g.gr_frame is None
diff --git a/pypy/module/test_lib_pypy/test_stackless_pickle.py b/pypy/module/test_lib_pypy/test_stackless_pickle.py
--- a/pypy/module/test_lib_pypy/test_stackless_pickle.py
+++ b/pypy/module/test_lib_pypy/test_stackless_pickle.py
@@ -1,25 +1,27 @@
-import py; py.test.skip("XXX port me")
+import py
+py.test.skip("in-progress, maybe")
 from pypy.conftest import gettestobjspace, option
 
 class AppTest_Stackless:
 
     def setup_class(cls):
-        import py.test
-        py.test.importorskip('greenlet')
-        space = gettestobjspace(usemodules=('_stackless', '_socket'))
+        space = gettestobjspace(usemodules=('_continuation', '_socket'))
         cls.space = space
-        # cannot test the unpickle part on top of py.py
+        if option.runappdirect:
+            cls.w_lev = space.wrap(14)
+        else:
+            cls.w_lev = space.wrap(2)
 
     def test_pickle(self):
         import new, sys
 
         mod = new.module('mod')
         sys.modules['mod'] = mod
+        mod.lev = self.lev
         try:
             exec '''
 import pickle, sys
 import stackless
-lev = 14
 
 ch = stackless.channel()
 seen = []
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -250,7 +250,8 @@
 def repr__Bytearray(space, w_bytearray):
     s = w_bytearray.data
 
-    buf = StringBuilder(50)
+    # Good default if there are no replacements.
+    buf = StringBuilder(len("bytearray(b'')") + len(s))
 
     buf.append("bytearray(b'")
 
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -1,13 +1,15 @@
 """
 String formatting routines.
 """
-from pypy.rlib.unroll import unrolling_iterable
+from pypy.interpreter.error import OperationError
+from pypy.objspace.std.unicodetype import unicode_from_object
+from pypy.rlib import jit
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.rfloat import formatd, DTSF_ALT, isnan, isinf
-from pypy.interpreter.error import OperationError
+from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
+from pypy.rlib.unroll import unrolling_iterable
 from pypy.tool.sourcetools import func_with_new_name
-from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
-from pypy.objspace.std.unicodetype import unicode_from_object
+
 
 class BaseStringFormatter(object):
     def __init__(self, space, values_w, w_valuedict):
@@ -173,6 +175,9 @@
                 raise OperationError(space.w_ValueError,
                                      space.wrap("incomplete format"))
 
+        # Only shows up if we've already started inlining format(), so just
+        # unconditionally unroll this.
+        @jit.unroll_safe
         def getmappingkey(self):
             # return the mapping key in a '%(key)s' specifier
             fmt = self.fmt
@@ -233,6 +238,8 @@
 
             return w_value
 
+        # Same as getmappingkey
+        @jit.unroll_safe
         def peel_flags(self):
             self.f_ljust = False
             self.f_sign  = False
@@ -255,6 +262,8 @@
                     break
                 self.forward()
 
+        # Same as getmappingkey
+        @jit.unroll_safe
         def peel_num(self):
             space = self.space
             c = self.peekchr()
@@ -276,6 +285,7 @@
                 c = self.peekchr()
             return result
 
+        @jit.look_inside_iff(lambda self: jit.isconstant(self.fmt))
         def format(self):
             lgt = len(self.fmt) + 4 * len(self.values_w) + 10
             if do_unicode:
diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py
--- a/pypy/objspace/std/mapdict.py
+++ b/pypy/objspace/std/mapdict.py
@@ -132,7 +132,10 @@
             cache[selector] = attr
         return attr
 
-    @jit.unroll_safe
+    @jit.look_inside_iff(lambda self, obj, selector, w_value:
+            jit.isconstant(self) and
+            jit.isconstant(selector[0]) and
+            jit.isconstant(selector[1]))
     def add_attr(self, obj, selector, w_value):
         # grumble, jit needs this
         attr = self._get_new_attr(selector[0], selector[1])
@@ -347,7 +350,7 @@
 SLOTS_STARTING_FROM = 3
 
 
-class BaseMapdictObject: # slightly evil to make it inherit from W_Root
+class BaseMapdictObject:
     _mixin_ = True
 
     def _init_empty(self, map):
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -3,9 +3,10 @@
 import string
 
 from pypy.interpreter.error import OperationError
-from pypy.rlib import rstring, runicode, rlocale, rarithmetic, rfloat
+from pypy.rlib import rstring, runicode, rlocale, rarithmetic, rfloat, jit
 from pypy.rlib.objectmodel import specialize
 from pypy.rlib.rfloat import copysign, formatd
+from pypy.tool import sourcetools
 
 
 @specialize.argtype(1)
@@ -36,314 +37,321 @@
 ANS_MANUAL = 3
 
 
-class TemplateFormatter(object):
+def make_template_formatting_class():
+    class TemplateFormatter(object):
 
-    _annspecialcase_ = "specialize:ctr_location"
+        parser_list_w = None
 
-    parser_list_w = None
+        def __init__(self, space, is_unicode, template):
+            self.space = space
+            self.is_unicode = is_unicode
+            self.empty = u"" if is_unicode else ""
+            self.template = template
 
-    def __init__(self, space, is_unicode, template):
-        self.space = space
-        self.is_unicode = is_unicode
-        self.empty = u"" if is_unicode else ""
-        self.template = template
+        def build(self, args):
+            self.args, self.kwargs = args.unpack()
+            self.auto_numbering = 0
+            self.auto_numbering_state = ANS_INIT
+            return self._build_string(0, len(self.template), 2)
 
-    def build(self, args):
-        self.args, self.kwargs = args.unpack()
-        self.auto_numbering = 0
-        self.auto_numbering_state = ANS_INIT
-        return self._build_string(0, len(self.template), 2)
+        def _build_string(self, start, end, level):
+            space = self.space
+            if self.is_unicode:
+                out = rstring.UnicodeBuilder()
+            else:
+                out = rstring.StringBuilder()
+            if not level:
+                raise OperationError(space.w_ValueError,
+                                     space.wrap("Recursion depth exceeded"))
+            level -= 1
+            s = self.template
+            return self._do_build_string(start, end, level, out, s)
 
-    def _build_string(self, start, end, level):
-        space = self.space
-        if self.is_unicode:
-            out = rstring.UnicodeBuilder()
-        else:
-            out = rstring.StringBuilder()
-        if not level:
-            raise OperationError(space.w_ValueError,
-                                 space.wrap("Recursion depth exceeded"))
-        level -= 1
-        s = self.template
-        last_literal = i = start
-        while i < end:
-            c = s[i]
-            i += 1
-            if c == "{" or c == "}":
-                at_end = i == end
-                # Find escaped "{" and "}"
-                markup_follows = True
-                if c == "}":
-                    if at_end or s[i] != "}":
-                        raise OperationError(space.w_ValueError,
-                                             space.wrap("Single '}'"))
-                    i += 1
-                    markup_follows = False
-                if c == "{":
-                    if at_end:
-                        raise OperationError(space.w_ValueError,
-                                             space.wrap("Single '{'"))
-                    if s[i] == "{":
+        @jit.look_inside_iff(lambda self, start, end, level, out, s: jit.isconstant(s))
+        def _do_build_string(self, start, end, level, out, s):
+            space = self.space
+            last_literal = i = start
+            while i < end:
+                c = s[i]
+                i += 1
+                if c == "{" or c == "}":
+                    at_end = i == end
+                    # Find escaped "{" and "}"
+                    markup_follows = True
+                    if c == "}":
+                        if at_end or s[i] != "}":
+                            raise OperationError(space.w_ValueError,
+                                                 space.wrap("Single '}'"))
                         i += 1
                         markup_follows = False
-                # Attach literal data
-                out.append_slice(s, last_literal, i - 1)
-                if not markup_follows:
+                    if c == "{":
+                        if at_end:
+                            raise OperationError(space.w_ValueError,
+                                                 space.wrap("Single '{'"))
+                        if s[i] == "{":
+                            i += 1
+                            markup_follows = False
+                    # Attach literal data
+                    out.append_slice(s, last_literal, i - 1)
+                    if not markup_follows:
+                        last_literal = i
+                        continue
+                    nested = 1
+                    field_start = i
+                    recursive = False
+                    while i < end:
+                        c = s[i]
+                        if c == "{":
+                            recursive = True
+                            nested += 1
+                        elif c == "}":
+                            nested -= 1
+                            if not nested:
+                                break
+                        i += 1
+                    if nested:
+                        raise OperationError(space.w_ValueError,
+                                             space.wrap("Unmatched '{'"))
+                    rendered = self._render_field(field_start, i, recursive, level)
+                    out.append(rendered)
+                    i += 1
                     last_literal = i
-                    continue
-                nested = 1
-                field_start = i
-                recursive = False
-                while i < end:
-                    c = s[i]
-                    if c == "{":
-                        recursive = True
-                        nested += 1
-                    elif c == "}":
-                        nested -= 1
-                        if not nested:
-                            break
-                    i += 1
-                if nested:
-                    raise OperationError(space.w_ValueError,
-                                         space.wrap("Unmatched '{'"))
-                rendered = self._render_field(field_start, i, recursive, level)
-                out.append(rendered)
+
+            out.append_slice(s, last_literal, end)
+            return out.build()
+
+        def _parse_field(self, start, end):
+            s = self.template
+            # Find ":" or "!"
+            i = start
+            while i < end:
+                c = s[i]
+                if c == ":" or c == "!":
+                    end_name = i
+                    if c == "!":
+                        i += 1
+                        if i == end:
+                            w_msg = self.space.wrap("expected conversion")
+                            raise OperationError(self.space.w_ValueError, w_msg)
+                        conversion = s[i]
+                        i += 1
+                        if i < end:
+                            if s[i] != ':':
+                                w_msg = self.space.wrap("expected ':' after"
+                                                        " format specifier")
+                                raise OperationError(self.space.w_ValueError,
+                                                     w_msg)
+                            i += 1
+                    else:
+                        conversion = None
+                        i += 1
+                    return s[start:end_name], conversion, i
                 i += 1
-                last_literal = i
+            return s[start:end], None, end
 
-        out.append_slice(s, last_literal, end)
-        return out.build()
-
-    def _parse_field(self, start, end):
-        s = self.template
-        # Find ":" or "!"
-        i = start
-        while i < end:
-            c = s[i]
-            if c == ":" or c == "!":
-                end_name = i
-                if c == "!":
-                    i += 1
-                    if i == end:
-                        w_msg = self.space.wrap("expected conversion")
-                        raise OperationError(self.space.w_ValueError, w_msg)
-                    conversion = s[i]
-                    i += 1
-                    if i < end:
-                        if s[i] != ':':
-                            w_msg = self.space.wrap("expected ':' after"
-                                                    " format specifier")
-                            raise OperationError(self.space.w_ValueError,
-                                                 w_msg)
-                        i += 1
+        def _get_argument(self, name):
+            # First, find the argument.
+            space = self.space
+            i = 0
+            end = len(name)
+            while i < end:
+                c = name[i]
+                if c == "[" or c == ".":
+                    break
+                i += 1
+            empty = not i
+            if empty:
+                index = -1
+            else:
+                index, stop = _parse_int(self.space, name, 0, i)
+                if stop != i:
+                    index = -1
+            use_numeric = empty or index != -1
+            if self.auto_numbering_state == ANS_INIT and use_numeric:
+                if empty:
+                    self.auto_numbering_state = ANS_AUTO
                 else:
-                    conversion = None
-                    i += 1
-                return s[start:end_name], conversion, i
-            i += 1
-        return s[start:end], None, end
-
-    def _get_argument(self, name):
-        # First, find the argument.
-        space = self.space
-        i = 0
-        end = len(name)
-        while i < end:
-            c = name[i]
-            if c == "[" or c == ".":
-                break
-            i += 1
-        empty = not i
-        if empty:
-            index = -1
-        else:
-            index, stop = _parse_int(self.space, name, 0, i)
-            if stop != i:
-                index = -1
-        use_numeric = empty or index != -1
-        if self.auto_numbering_state == ANS_INIT and use_numeric:
-            if empty:
-                self.auto_numbering_state = ANS_AUTO
-            else:
-                self.auto_numbering_state = ANS_MANUAL
-        if use_numeric:
-            if self.auto_numbering_state == ANS_MANUAL:
-                if empty:
-                    msg = "switching from manual to automatic numbering"
+                    self.auto_numbering_state = ANS_MANUAL
+            if use_numeric:
+                if self.auto_numbering_state == ANS_MANUAL:
+                    if empty:
+                        msg = "switching from manual to automatic numbering"
+                        raise OperationError(space.w_ValueError,
+                                             space.wrap(msg))
+                elif not empty:
+                    msg = "switching from automatic to manual numbering"
                     raise OperationError(space.w_ValueError,
                                          space.wrap(msg))
-            elif not empty:
-                msg = "switching from automatic to manual numbering"
-                raise OperationError(space.w_ValueError,
-                                     space.wrap(msg))
-        if empty:
-            index = self.auto_numbering
-            self.auto_numbering += 1
-        if index == -1:
-            kwarg = name[:i]
-            if self.is_unicode:
+            if empty:
+                index = self.auto_numbering
+                self.auto_numbering += 1
+            if index == -1:
+                kwarg = name[:i]
+                if self.is_unicode:
+                    try:
+                        arg_key = kwarg.encode("latin-1")
+                    except UnicodeEncodeError:
+                        # Not going to be found in a dict of strings.
+                        raise OperationError(space.w_KeyError, space.wrap(kwarg))
+                else:
+                    arg_key = kwarg
                 try:
-                    arg_key = kwarg.encode("latin-1")
-                except UnicodeEncodeError:
-                    # Not going to be found in a dict of strings.
-                    raise OperationError(space.w_KeyError, space.wrap(kwarg))
+                    w_arg = self.kwargs[arg_key]
+                except KeyError:
+                    raise OperationError(space.w_KeyError, space.wrap(arg_key))
             else:
-                arg_key = kwarg
-            try:
-                w_arg = self.kwargs[arg_key]
-            except KeyError:
-                raise OperationError(space.w_KeyError, space.wrap(arg_key))
-        else:
-            try:
-                w_arg = self.args[index]
-            except IndexError:
-                w_msg = space.wrap("index out of range")
-                raise OperationError(space.w_IndexError, w_msg)
-        return self._resolve_lookups(w_arg, name, i, end)
+                try:
+                    w_arg = self.args[index]
+                except IndexError:
+                    w_msg = space.wrap("index out of range")
+                    raise OperationError(space.w_IndexError, w_msg)
+            return self._resolve_lookups(w_arg, name, i, end)
 
-    def _resolve_lookups(self, w_obj, name, start, end):
-        # Resolve attribute and item lookups.
-        space = self.space
-        i = start
-        while i < end:
-            c = name[i]
-            if c == ".":
+        def _resolve_lookups(self, w_obj, name, start, end):
+            # Resolve attribute and item lookups.
+            space = self.space
+            i = start
+            while i < end:
+                c = name[i]
+                if c == ".":
+                    i += 1
+                    start = i
+                    while i < end:
+                        c = name[i]
+                        if c == "[" or c == ".":
+                            break
+                        i += 1
+                    if start == i:
+                        w_msg = space.wrap("Empty attribute in format string")
+                        raise OperationError(space.w_ValueError, w_msg)
+                    w_attr = space.wrap(name[start:i])
+                    if w_obj is not None:
+                        w_obj = space.getattr(w_obj, w_attr)
+                    else:
+                        self.parser_list_w.append(space.newtuple([
+                            space.w_True, w_attr]))
+                elif c == "[":
+                    got_bracket = False
+                    i += 1
+                    start = i
+                    while i < end:
+                        c = name[i]
+                        if c == "]":
+                            got_bracket = True
+                            break
+                        i += 1
+                    if not got_bracket:
+                        raise OperationError(space.w_ValueError,
+                                             space.wrap("Missing ']'"))
+                    index, reached = _parse_int(self.space, name, start, i)
+                    if index != -1 and reached == i:
+                        w_item = space.wrap(index)
+                    else:
+                        w_item = space.wrap(name[start:i])
+                    i += 1 # Skip "]"
+                    if w_obj is not None:
+                        w_obj = space.getitem(w_obj, w_item)
+                    else:
+                        self.parser_list_w.append(space.newtuple([
+                            space.w_False, w_item]))
+                else:
+                    msg = "Only '[' and '.' may follow ']'"
+                    raise OperationError(space.w_ValueError, space.wrap(msg))
+            return w_obj
+
+        def formatter_field_name_split(self):
+            space = self.space
+            name = self.template
+            i = 0
+            end = len(name)
+            while i < end:
+                c = name[i]
+                if c == "[" or c == ".":
+                    break
                 i += 1
-                start = i
-                while i < end:
-                    c = name[i]
-                    if c == "[" or c == ".":
-                        break
-                    i += 1
-                if start == i:
-                    w_msg = space.wrap("Empty attribute in format string")
-                    raise OperationError(space.w_ValueError, w_msg)
-                w_attr = space.wrap(name[start:i])
-                if w_obj is not None:
-                    w_obj = space.getattr(w_obj, w_attr)
-                else:
-                    self.parser_list_w.append(space.newtuple([
-                        space.w_True, w_attr]))
-            elif c == "[":
-                got_bracket = False
-                i += 1
-                start = i
-                while i < end:
-                    c = name[i]
-                    if c == "]":
-                        got_bracket = True
-                        break
-                    i += 1
-                if not got_bracket:
-                    raise OperationError(space.w_ValueError,
-                                         space.wrap("Missing ']'"))
-                index, reached = _parse_int(self.space, name, start, i)
-                if index != -1 and reached == i:
-                    w_item = space.wrap(index)
-                else:
-                    w_item = space.wrap(name[start:i])
-                i += 1 # Skip "]"
-                if w_obj is not None:
-                    w_obj = space.getitem(w_obj, w_item)
-                else:
-                    self.parser_list_w.append(space.newtuple([
-                        space.w_False, w_item]))
+            if i == 0:
+                index = -1
             else:
-                msg = "Only '[' and '.' may follow ']'"
-                raise OperationError(space.w_ValueError, space.wrap(msg))
-        return w_obj
+                index, stop = _parse_int(self.space, name, 0, i)
+                if stop != i:
+                    index = -1
+            if index >= 0:
+                w_first = space.wrap(index)
+            else:
+                w_first = space.wrap(name[:i])
+            #
+            self.parser_list_w = []
+            self._resolve_lookups(None, name, i, end)
+            #
+            return space.newtuple([w_first,
+                                   space.iter(space.newlist(self.parser_list_w))])
 
-    def formatter_field_name_split(self):
-        space = self.space
-        name = self.template
-        i = 0
-        end = len(name)
-        while i < end:
-            c = name[i]
-            if c == "[" or c == ".":
-                break
-            i += 1
-        if i == 0:
-            index = -1
-        else:
-            index, stop = _parse_int(self.space, name, 0, i)
-            if stop != i:
-                index = -1
-        if index >= 0:
-            w_first = space.wrap(index)
-        else:
-            w_first = space.wrap(name[:i])
-        #
-        self.parser_list_w = []
-        self._resolve_lookups(None, name, i, end)
-        #
-        return space.newtuple([w_first,
-                               space.iter(space.newlist(self.parser_list_w))])
+        def _convert(self, w_obj, conversion):
+            space = self.space
+            conv = conversion[0]
+            if conv == "r":
+                return space.repr(w_obj)
+            elif conv == "s":
+                if self.is_unicode:
+                    return space.call_function(space.w_unicode, w_obj)
+                return space.str(w_obj)
+            else:
+                raise OperationError(self.space.w_ValueError,
+                                     self.space.wrap("invalid conversion"))
 
-    def _convert(self, w_obj, conversion):
-        space = self.space
-        conv = conversion[0]
-        if conv == "r":
-            return space.repr(w_obj)
-        elif conv == "s":
-            if self.is_unicode:
-                return space.call_function(space.w_unicode, w_obj)
-            return space.str(w_obj)
-        else:
-            raise OperationError(self.space.w_ValueError,
-                                 self.space.wrap("invalid conversion"))
+        def _render_field(self, start, end, recursive, level):
+            name, conversion, spec_start = self._parse_field(start, end)
+            spec = self.template[spec_start:end]
+            #
+            if self.parser_list_w is not None:
+                # used from formatter_parser()
+                if level == 1:    # ignore recursive calls
+                    space = self.space
+                    startm1 = start - 1
+                    assert startm1 >= self.last_end
+                    w_entry = space.newtuple([
+                        space.wrap(self.template[self.last_end:startm1]),
+                        space.wrap(name),
+                        space.wrap(spec),
+                        space.wrap(conversion)])
+                    self.parser_list_w.append(w_entry)
+                    self.last_end = end + 1
+                return self.empty
+            #
+            w_obj = self._get_argument(name)
+            if conversion is not None:
+                w_obj = self._convert(w_obj, conversion)
+            if recursive:
+                spec = self._build_string(spec_start, end, level)
+            w_rendered = self.space.format(w_obj, self.space.wrap(spec))
+            unwrapper = "unicode_w" if self.is_unicode else "str_w"
+            to_interp = getattr(self.space, unwrapper)
+            return to_interp(w_rendered)
 
-    def _render_field(self, start, end, recursive, level):
-        name, conversion, spec_start = self._parse_field(start, end)
-        spec = self.template[spec_start:end]
-        #
-        if self.parser_list_w is not None:
-            # used from formatter_parser()
-            if level == 1:    # ignore recursive calls
-                space = self.space
-                startm1 = start - 1
-                assert startm1 >= self.last_end
-                w_entry = space.newtuple([
-                    space.wrap(self.template[self.last_end:startm1]),
-                    space.wrap(name),
-                    space.wrap(spec),
-                    space.wrap(conversion)])
-                self.parser_list_w.append(w_entry)
-                self.last_end = end + 1
-            return self.empty
-        #
-        w_obj = self._get_argument(name)
-        if conversion is not None:
-            w_obj = self._convert(w_obj, conversion)
-        if recursive:
-            spec = self._build_string(spec_start, end, level)
-        w_rendered = self.space.format(w_obj, self.space.wrap(spec))
-        unwrapper = "unicode_w" if self.is_unicode else "str_w"
-        to_interp = getattr(self.space, unwrapper)
-        return to_interp(w_rendered)
+        def formatter_parser(self):
+            self.parser_list_w = []
+            self.last_end = 0
+            self._build_string(0, len(self.template), 2)
+            #
+            space = self.space
+            if self.last_end < len(self.template):
+                w_lastentry = space.newtuple([
+                    space.wrap(self.template[self.last_end:]),
+                    space.w_None,
+                    space.w_None,
+                    space.w_None])
+                self.parser_list_w.append(w_lastentry)
+            return space.iter(space.newlist(self.parser_list_w))
+    return TemplateFormatter
 
-    def formatter_parser(self):
-        self.parser_list_w = []
-        self.last_end = 0
-        self._build_string(0, len(self.template), 2)
-        #
-        space = self.space
-        if self.last_end < len(self.template):
-            w_lastentry = space.newtuple([
-                space.wrap(self.template[self.last_end:]),
-                space.w_None,
-                space.w_None,
-                space.w_None])
-            self.parser_list_w.append(w_lastentry)
-        return space.iter(space.newlist(self.parser_list_w))
-
+StrTemplateFormatter = make_template_formatting_class()
+UnicodeTemplateFormatter = make_template_formatting_class()
 
 def str_template_formatter(space, template):
-    return TemplateFormatter(space, False, template)
+    return StrTemplateFormatter(space, False, template)
 
 def unicode_template_formatter(space, template):
-    return TemplateFormatter(space, True, template)
+    return UnicodeTemplateFormatter(space, True, template)
 
 
 def format_method(space, w_string, args, is_unicode):
@@ -380,756 +388,759 @@
 
 LONG_DIGITS = string.digits + string.ascii_lowercase
 
-class Formatter(BaseFormatter):
-    """__format__ implementation for builtin types."""
+def make_formatting_class():
+    class Formatter(BaseFormatter):
+        """__format__ implementation for builtin types."""
 
-    _annspecialcase_ = "specialize:ctr_location"
-    _grouped_digits = None
+        _grouped_digits = None
 
-    def __init__(self, space, is_unicode, spec):
-        self.space = space
-        self.is_unicode = is_unicode
-        self.empty = u"" if is_unicode else ""
-        self.spec = spec
+        def __init__(self, space, is_unicode, spec):
+            self.space = space
+            self.is_unicode = is_unicode
+            self.empty = u"" if is_unicode else ""
+            self.spec = spec
 
-    def _is_alignment(self, c):
-        return (c == "<" or
-                c == ">" or
-                c == "=" or
-                c == "^")
+        def _is_alignment(self, c):
+            return (c == "<" or
+                    c == ">" or
+                    c == "=" or
+                    c == "^")
 
-    def _is_sign(self, c):
-        return (c == " " or
-                c == "+" or
-                c == "-")
+        def _is_sign(self, c):
+            return (c == " " or
+                    c == "+" or
+                    c == "-")
 
-    def _parse_spec(self, default_type, default_align):
-        space = self.space
-        self._fill_char = self._lit("\0")[0]
-        self._align = default_align
-        self._alternate = False
-        self._sign = "\0"
-        self._thousands_sep = False
-        self._precision = -1
-        the_type = default_type
-        spec = self.spec
-        if not spec:
-            return True
-        length = len(spec)
-        i = 0
-        got_align = True
-        if length - i >= 2 and self._is_alignment(spec[i + 1]):
-            self._align = spec[i + 1]
-            self._fill_char = spec[i]
-            i += 2
-        elif length - i >= 1 and self._is_alignment(spec[i]):
-            self._align = spec[i]
-            i += 1
-        else:
-            got_align = False
-        if length - i >= 1 and self._is_sign(spec[i]):
-            self._sign = spec[i]
-            i += 1
-        if length - i >= 1 and spec[i] == "#":
-            self._alternate = True
-            i += 1
-        if self._fill_char == "\0" and length - i >= 1 and spec[i] == "0":
-            self._fill_char = self._lit("0")[0]
-            if not got_align:
-                self._align = "="
-            i += 1
-        start_i = i
-        self._width, i = _parse_int(self.space, spec, i, length)
-        if length != i and spec[i] == ",":
-            self._thousands_sep = True
-            i += 1
-        if length != i and spec[i] == ".":
-            i += 1
-            self._precision, i = _parse_int(self.space, spec, i, length)
-            if self._precision == -1:
+        def _parse_spec(self, default_type, default_align):
+            space = self.space
+            self._fill_char = self._lit("\0")[0]
+            self._align = default_align
+            self._alternate = False
+            self._sign = "\0"
+            self._thousands_sep = False
+            self._precision = -1
+            the_type = default_type
+            spec = self.spec
+            if not spec:
+                return True
+            length = len(spec)
+            i = 0
+            got_align = True
+            if length - i >= 2 and self._is_alignment(spec[i + 1]):
+                self._align = spec[i + 1]
+                self._fill_char = spec[i]
+                i += 2
+            elif length - i >= 1 and self._is_alignment(spec[i]):
+                self._align = spec[i]
+                i += 1
+            else:
+                got_align = False
+            if length - i >= 1 and self._is_sign(spec[i]):
+                self._sign = spec[i]
+                i += 1
+            if length - i >= 1 and spec[i] == "#":
+                self._alternate = True
+                i += 1
+            if self._fill_char == "\0" and length - i >= 1 and spec[i] == "0":
+                self._fill_char = self._lit("0")[0]
+                if not got_align:
+                    self._align = "="
+                i += 1
+            start_i = i
+            self._width, i = _parse_int(self.space, spec, i, length)
+            if length != i and spec[i] == ",":
+                self._thousands_sep = True
+                i += 1
+            if length != i and spec[i] == ".":
+                i += 1
+                self._precision, i = _parse_int(self.space, spec, i, length)
+                if self._precision == -1:
+                    raise OperationError(space.w_ValueError,
+                                         space.wrap("no precision given"))
+            if length - i > 1:
                 raise OperationError(space.w_ValueError,
-                                     space.wrap("no precision given"))
-        if length - i > 1:
-            raise OperationError(space.w_ValueError,
-                                 space.wrap("invalid format spec"))
-        if length - i == 1:
-            presentation_type = spec[i]
-            if self.is_unicode:
-                try:
-                    the_type = spec[i].encode("ascii")[0]
-                except UnicodeEncodeError:
+                                     space.wrap("invalid format spec"))
+            if length - i == 1:
+                presentation_type = spec[i]
+                if self.is_unicode:
+                    try:
+                        the_type = spec[i].encode("ascii")[0]
+                    except UnicodeEncodeError:
+                        raise OperationError(space.w_ValueError,
+                                             space.wrap("invalid presentation type"))
+                else:
+                    the_type = presentation_type
+                i += 1
+            self._type = the_type
+            if self._thousands_sep:
+                tp = self._type
+                if (tp == "d" or
+                    tp == "e" or
+                    tp == "f" or
+                    tp == "g" or
+                    tp == "E" or
+                    tp == "G" or
+                    tp == "%" or
+                    tp == "F" or
+                    tp == "\0"):
+                    # ok
+                    pass
+                else:
                     raise OperationError(space.w_ValueError,
-                                         space.wrap("invalid presentation type"))
+                                         space.wrap("invalid type with ','"))
+            return False
+
+        def _calc_padding(self, string, length):
+            """compute left and right padding, return total width of string"""
+            if self._width != -1 and length < self._width:
+                total = self._width
             else:
-                the_type = presentation_type
-            i += 1
-        self._type = the_type
-        if self._thousands_sep:
-            tp = self._type
-            if (tp == "d" or
-                tp == "e" or
-                tp == "f" or
-                tp == "g" or
-                tp == "E" or
-                tp == "G" or
-                tp == "%" or
-                tp == "F" or
-                tp == "\0"):
-                # ok
-                pass
+                total = length
+            align = self._align
+            if align == ">":
+                left = total - length
+            elif align == "^":
+                left = (total - length) / 2
+            elif align == "<" or align == "=":
+                left = 0
             else:
-                raise OperationError(space.w_ValueError,
-                                     space.wrap("invalid type with ','"))
-        return False
+                raise AssertionError("shouldn't be here")
+            right = total - length - left
+            self._left_pad = left
+            self._right_pad = right
+            return total
 
-    def _calc_padding(self, string, length):
-        """compute left and right padding, return total width of string"""
-        if self._width != -1 and length < self._width:
-            total = self._width
-        else:
-            total = length
-        align = self._align
-        if align == ">":
-            left = total - length
-        elif align == "^":
-            left = (total - length) / 2
-        elif align == "<" or align == "=":
-            left = 0
-        else:
-            raise AssertionError("shouldn't be here")
-        right = total - length - left
-        self._left_pad = left
-        self._right_pad = right
-        return total
-
-    def _lit(self, s):
-        if self.is_unicode:
-            return s.decode("ascii")
-        else:
-            return s
-
-    def _pad(self, string):
-        builder = self._builder()
-        builder.append_multiple_char(self._fill_char, self._left_pad)
-        builder.append(string)
-        builder.append_multiple_char(self._fill_char, self._right_pad)
-        return builder.build()
-
-    def _builder(self):
-        if self.is_unicode:
-            return rstring.UnicodeBuilder()
-        else:
-            return rstring.StringBuilder()
-
-    def _unknown_presentation(self, tp):
-        msg = "unknown presentation for %s: '%s'"
-        w_msg = self.space.wrap(msg  % (tp, self._type))
-        raise OperationError(self.space.w_ValueError, w_msg)
-
-    def format_string(self, string):
-        space = self.space
-        if self._parse_spec("s", "<"):
-            return space.wrap(string)
-        if self._type != "s":
-            self._unknown_presentation("string")
-        if self._sign != "\0":
-            msg = "Sign not allowed in string format specifier"
-            raise OperationError(space.w_ValueError, space.wrap(msg))
-        if self._alternate:
-            msg = "Alternate form not allowed in string format specifier"
-            raise OperationError(space.w_ValueError, space.wrap(msg))
-        if self._align == "=":
-            msg = "'=' alignment not allowed in string format specifier"
-            raise OperationError(space.w_ValueError, space.wrap(msg))
-        length = len(string)
-        precision = self._precision
-        if precision != -1 and length >= precision:
-            assert precision >= 0
-            length = precision
-            string = string[:precision]
-        if self._fill_char == "\0":
-            self._fill_char = self._lit(" ")[0]
-        self._calc_padding(string, length)
-        return space.wrap(self._pad(string))
-
-    def _get_locale(self, tp):
-        space = self.space
-        if tp == "n":
-            dec, thousands, grouping = rlocale.numeric_formatting()
-        elif self._thousands_sep:
-            dec = "."
-            thousands = ","
-            grouping = "\3\0"
-        else:
-            dec = "."
-            thousands = ""
-            grouping = "\256"
-        if self.is_unicode:
-            self._loc_dec = dec.decode("ascii")
-            self._loc_thousands = thousands.decode("ascii")
-        else:
-            self._loc_dec = dec
-            self._loc_thousands = thousands
-        self._loc_grouping = grouping
-
-    def _calc_num_width(self, n_prefix, sign_char, to_number, n_number,
-                        n_remainder, has_dec, digits):
-        """Calculate widths of all parts of formatted number.
-
-        Output will look like:
-
-            <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal>
-            <remainder> <rpadding>
-
-        sign is computed from self._sign, and the sign of the number
-        prefix is given
-        digits is known
-        """
-        spec = NumberSpec()
-        spec.n_digits = n_number - n_remainder - has_dec
-        spec.n_prefix = n_prefix
-        spec.n_lpadding = 0
-        spec.n_decimal = int(has_dec)
-        spec.n_remainder = n_remainder
-        spec.n_spadding = 0
-        spec.n_rpadding = 0
-        spec.n_min_width = 0
-        spec.n_total = 0
-        spec.sign = "\0"
-        spec.n_sign = 0
-        sign = self._sign
-        if sign == "+":
-            spec.n_sign = 1
-            spec.sign = "-" if sign_char == "-" else "+"
-        elif sign == " ":
-            spec.n_sign = 1
-            spec.sign = "-" if sign_char == "-" else " "
-        elif sign_char == "-":
-            spec.n_sign = 1
-            spec.sign = "-"
-        extra_length = (spec.n_sign + spec.n_prefix + spec.n_decimal +
-                        spec.n_remainder) # Not padding or digits
-        if self._fill_char == "0" and self._align == "=":
-            spec.n_min_width = self._width - extra_length
-        if self._loc_thousands:
-            self._group_digits(spec, digits[to_number:])
-            n_grouped_digits = len(self._grouped_digits)
-        else:
-            n_grouped_digits = spec.n_digits
-        n_padding = self._width - (extra_length + n_grouped_digits)
-        if n_padding > 0:
-            align = self._align
-            if align == "<":
-                spec.n_rpadding = n_padding
-            elif align == ">":
-                spec.n_lpadding = n_padding
-            elif align == "^":
-                spec.n_lpadding = n_padding // 2
-                spec.n_rpadding = n_padding - spec.n_lpadding
-            elif align == "=":
-                spec.n_spadding = n_padding
-            else:
-                raise AssertionError("shouldn't reach")
-        spec.n_total = spec.n_lpadding + spec.n_sign + spec.n_prefix + \
-                       spec.n_spadding + n_grouped_digits + \
-                       spec.n_decimal + spec.n_remainder + spec.n_rpadding
-        return spec
-
-    def _fill_digits(self, buf, digits, d_state, n_chars, n_zeros,
-                     thousands_sep):
-        if thousands_sep:
-            for c in thousands_sep:
-                buf.append(c)
-        for i in range(d_state - 1, d_state - n_chars - 1, -1):
-            buf.append(digits[i])
-        for i in range(n_zeros):
-            buf.append("0")
-
-    def _group_digits(self, spec, digits):
-        buf = []
-        grouping = self._loc_grouping
-        min_width = spec.n_min_width
-        grouping_state = 0
-        count = 0
-        left = spec.n_digits
-        n_ts = len(self._loc_thousands)
-        need_separator = False
-        done = False
-        groupings = len(grouping)
-        previous = 0
-        while True:
-            group = ord(grouping[grouping_state])
-            if group > 0:
-                if group == 256:
-                    break
-                grouping_state += 1
-                previous = group
-            else:
-                group = previous
-            final_grouping = min(group, max(left, max(min_width, 1)))
-            n_zeros = max(0, final_grouping - left)
-            n_chars = max(0, min(left, final_grouping))
-            ts = self._loc_thousands if need_separator else None
-            self._fill_digits(buf, digits, left, n_chars, n_zeros, ts)
-            need_separator = True
-            left -= n_chars
-            min_width -= final_grouping
-            if left <= 0 and min_width <= 0:
-                done = True
-                break
-            min_width -= n_ts
-        if not done:
-            group = max(max(left, min_width), 1)
-            n_zeros = max(0, group - left)
-            n_chars = max(0, min(left, group))
-            ts = self._loc_thousands if need_separator else None
-            self._fill_digits(buf, digits, left, n_chars, n_zeros, ts)
-        buf.reverse()
-        self._grouped_digits = self.empty.join(buf)
-
-    def _upcase_string(self, s):
-        buf = []
-        for c in s:
-            index = ord(c)
-            if ord("a") <= index <= ord("z"):
-                c = chr(index - 32)
-            buf.append(c)
-        return self.empty.join(buf)
-
-
-    def _fill_number(self, spec, num, to_digits, to_prefix, fill_char,
-                     to_remainder, upper, grouped_digits=None):
-        out = self._builder()
-        if spec.n_lpadding:
-            out.append_multiple_char(fill_char[0], spec.n_lpadding)
-        if spec.n_sign:
-            if self.is_unicode:
-                sign = spec.sign.decode("ascii")
-            else:
-                sign = spec.sign
-            out.append(sign)
-        if spec.n_prefix:
-            pref = num[to_prefix:to_prefix + spec.n_prefix]
-            if upper:
-                pref = self._upcase_string(pref)
-            out.append(pref)
-        if spec.n_spadding:
-            out.append_multiple_char(fill_char[0], spec.n_spadding)
-        if spec.n_digits != 0:
-            if self._loc_thousands:
-                if grouped_digits is not None:
-                    digits = grouped_digits
-                else:
-                    digits = self._grouped_digits
-                    assert digits is not None
-            else:
-                stop = to_digits + spec.n_digits
-                assert stop >= 0
-                digits = num[to_digits:stop]
-            if upper:
-                digits = self._upcase_string(digits)
-            out.append(digits)
-        if spec.n_decimal:
-            out.append(self._lit(".")[0])
-        if spec.n_remainder:
-            out.append(num[to_remainder:])
-        if spec.n_rpadding:
-            out.append_multiple_char(fill_char[0], spec.n_rpadding)
-        #if complex, need to call twice - just retun the buffer
-        return out.build()
-
-    def _format_int_or_long(self, w_num, kind):
-        space = self.space
-        if self._precision != -1:
-            msg = "precision not allowed in integer type"
-            raise OperationError(space.w_ValueError, space.wrap(msg))
-        sign_char = "\0"
-        tp = self._type
-        if tp == "c":
-            if self._sign != "\0":
-                msg = "sign not allowed with 'c' presentation type"
-                raise OperationError(space.w_ValueError, space.wrap(msg))
-            value = space.int_w(w_num)
-            if self.is_unicode:
-                result = runicode.UNICHR(value)
-            else:
-                result = chr(value)
-            n_digits = 1
-            n_remainder = 1
-            to_remainder = 0
-            n_prefix = 0
-            to_prefix = 0
-            to_numeric = 0
-        else:
-            if tp == "b":
-                base = 2
-                skip_leading = 2
-            elif tp == "o":
-                base = 8
-                skip_leading = 2
-            elif tp == "x" or tp == "X":
-                base = 16
-                skip_leading = 2
-            elif tp == "n" or tp == "d":
-                base = 10
-                skip_leading = 0
-            else:
-                raise AssertionError("shouldn't reach")
-            if kind == INT_KIND:
-                result = self._int_to_base(base, space.int_w(w_num))
-            else:
-                result = self._long_to_base(base, space.bigint_w(w_num))
-            n_prefix = skip_leading if self._alternate else 0
-            to_prefix = 0
-            if result[0] == "-":
-                sign_char = "-"
-                skip_leading += 1
-                to_prefix += 1
-            n_digits = len(result) - skip_leading
-            n_remainder = 0
-            to_remainder = 0
-            to_numeric = skip_leading
-        self._get_locale(tp)
-        spec = self._calc_num_width(n_prefix, sign_char, to_numeric, n_digits,
-                                    n_remainder, False, result)
-        fill = self._lit(" ") if self._fill_char == "\0" else self._fill_char
-        upper = self._type == "X"
-        return self.space.wrap(self._fill_number(spec, result, to_numeric,
-                                 to_prefix, fill, to_remainder, upper))
-
-    def _long_to_base(self, base, value):
-        prefix = ""
-        if base == 2:
-            prefix = "0b"
-        elif base == 8:
-            prefix = "0o"
-        elif base == 16:
-            prefix = "0x"
-        as_str = value.format(LONG_DIGITS[:base], prefix)
-        if self.is_unicode:
-            return as_str.decode("ascii")
-        return as_str
-
-    def _int_to_base(self, base, value):
-        if base == 10:
-            s = str(value)
+        def _lit(self, s):
             if self.is_unicode:
                 return s.decode("ascii")
-            return s
-        # This part is slow.
-        negative = value < 0
-        value = abs(value)
-        buf = ["\0"] * (8 * 8 + 6) # Too much on 32 bit, but who cares?
-        i = len(buf) - 1
-        while True:
-            div = value // base
-            mod = value - div * base
-            digit = abs(mod)
-            digit += ord("0") if digit < 10 else ord("a") - 10
-            buf[i] = chr(digit)
-            value = div
+            else:
+                return s
+
+        def _pad(self, string):
+            builder = self._builder()
+            builder.append_multiple_char(self._fill_char, self._left_pad)
+            builder.append(string)
+            builder.append_multiple_char(self._fill_char, self._right_pad)
+            return builder.build()
+
+        def _builder(self):
+            if self.is_unicode:
+                return rstring.UnicodeBuilder()
+            else:
+                return rstring.StringBuilder()
+
+        def _unknown_presentation(self, tp):
+            msg = "unknown presentation for %s: '%s'"
+            w_msg = self.space.wrap(msg  % (tp, self._type))
+            raise OperationError(self.space.w_ValueError, w_msg)
+
+        def format_string(self, string):
+            space = self.space
+            if self._parse_spec("s", "<"):
+                return space.wrap(string)
+            if self._type != "s":
+                self._unknown_presentation("string")
+            if self._sign != "\0":
+                msg = "Sign not allowed in string format specifier"
+                raise OperationError(space.w_ValueError, space.wrap(msg))
+            if self._alternate:
+                msg = "Alternate form not allowed in string format specifier"
+                raise OperationError(space.w_ValueError, space.wrap(msg))
+            if self._align == "=":
+                msg = "'=' alignment not allowed in string format specifier"
+                raise OperationError(space.w_ValueError, space.wrap(msg))
+            length = len(string)
+            precision = self._precision
+            if precision != -1 and length >= precision:
+                assert precision >= 0
+                length = precision
+                string = string[:precision]
+            if self._fill_char == "\0":
+                self._fill_char = self._lit(" ")[0]
+            self._calc_padding(string, length)
+            return space.wrap(self._pad(string))
+
+        def _get_locale(self, tp):
+            space = self.space
+            if tp == "n":
+                dec, thousands, grouping = rlocale.numeric_formatting()
+            elif self._thousands_sep:
+                dec = "."
+                thousands = ","
+                grouping = "\3\0"
+            else:
+                dec = "."
+                thousands = ""
+                grouping = "\256"
+            if self.is_unicode:
+                self._loc_dec = dec.decode("ascii")
+                self._loc_thousands = thousands.decode("ascii")
+            else:
+                self._loc_dec = dec
+                self._loc_thousands = thousands
+            self._loc_grouping = grouping
+
+        def _calc_num_width(self, n_prefix, sign_char, to_number, n_number,
+                            n_remainder, has_dec, digits):
+            """Calculate widths of all parts of formatted number.
+
+            Output will look like:
+
+                <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal>
+                <remainder> <rpadding>
+
+            sign is computed from self._sign, and the sign of the number
+            prefix is given
+            digits is known
+            """
+            spec = NumberSpec()
+            spec.n_digits = n_number - n_remainder - has_dec
+            spec.n_prefix = n_prefix
+            spec.n_lpadding = 0
+            spec.n_decimal = int(has_dec)
+            spec.n_remainder = n_remainder
+            spec.n_spadding = 0
+            spec.n_rpadding = 0
+            spec.n_min_width = 0
+            spec.n_total = 0
+            spec.sign = "\0"
+            spec.n_sign = 0
+            sign = self._sign
+            if sign == "+":
+                spec.n_sign = 1
+                spec.sign = "-" if sign_char == "-" else "+"
+            elif sign == " ":
+                spec.n_sign = 1
+                spec.sign = "-" if sign_char == "-" else " "
+            elif sign_char == "-":
+                spec.n_sign = 1
+                spec.sign = "-"
+            extra_length = (spec.n_sign + spec.n_prefix + spec.n_decimal +
+                            spec.n_remainder) # Not padding or digits
+            if self._fill_char == "0" and self._align == "=":
+                spec.n_min_width = self._width - extra_length
+            if self._loc_thousands:
+                self._group_digits(spec, digits[to_number:])
+                n_grouped_digits = len(self._grouped_digits)
+            else:
+                n_grouped_digits = spec.n_digits
+            n_padding = self._width - (extra_length + n_grouped_digits)
+            if n_padding > 0:
+                align = self._align
+                if align == "<":
+                    spec.n_rpadding = n_padding
+                elif align == ">":
+                    spec.n_lpadding = n_padding
+                elif align == "^":
+                    spec.n_lpadding = n_padding // 2
+                    spec.n_rpadding = n_padding - spec.n_lpadding
+                elif align == "=":
+                    spec.n_spadding = n_padding
+                else:
+                    raise AssertionError("shouldn't reach")
+            spec.n_total = spec.n_lpadding + spec.n_sign + spec.n_prefix + \
+                           spec.n_spadding + n_grouped_digits + \
+                           spec.n_decimal + spec.n_remainder + spec.n_rpadding
+            return spec
+
+        def _fill_digits(self, buf, digits, d_state, n_chars, n_zeros,
+                         thousands_sep):
+            if thousands_sep:
+                for c in thousands_sep:
+                    buf.append(c)
+            for i in range(d_state - 1, d_state - n_chars - 1, -1):
+                buf.append(digits[i])
+            for i in range(n_zeros):
+                buf.append("0")
+
+        def _group_digits(self, spec, digits):
+            buf = []
+            grouping = self._loc_grouping
+            min_width = spec.n_min_width
+            grouping_state = 0
+            count = 0
+            left = spec.n_digits
+            n_ts = len(self._loc_thousands)
+            need_separator = False
+            done = False
+            groupings = len(grouping)
+            previous = 0
+            while True:
+                group = ord(grouping[grouping_state])
+                if group > 0:
+                    if group == 256:
+                        break
+                    grouping_state += 1
+                    previous = group
+                else:
+                    group = previous
+                final_grouping = min(group, max(left, max(min_width, 1)))
+                n_zeros = max(0, final_grouping - left)
+                n_chars = max(0, min(left, final_grouping))
+                ts = self._loc_thousands if need_separator else None
+                self._fill_digits(buf, digits, left, n_chars, n_zeros, ts)
+                need_separator = True
+                left -= n_chars
+                min_width -= final_grouping
+                if left <= 0 and min_width <= 0:
+                    done = True
+                    break
+                min_width -= n_ts
+            if not done:
+                group = max(max(left, min_width), 1)
+                n_zeros = max(0, group - left)
+                n_chars = max(0, min(left, group))
+                ts = self._loc_thousands if need_separator else None
+                self._fill_digits(buf, digits, left, n_chars, n_zeros, ts)
+            buf.reverse()
+            self._grouped_digits = self.empty.join(buf)
+
+        def _upcase_string(self, s):
+            buf = []
+            for c in s:
+                index = ord(c)
+                if ord("a") <= index <= ord("z"):
+                    c = chr(index - 32)
+                buf.append(c)
+            return self.empty.join(buf)
+
+
+        def _fill_number(self, spec, num, to_digits, to_prefix, fill_char,
+                         to_remainder, upper, grouped_digits=None):
+            out = self._builder()
+            if spec.n_lpadding:
+                out.append_multiple_char(fill_char[0], spec.n_lpadding)
+            if spec.n_sign:
+                if self.is_unicode:
+                    sign = spec.sign.decode("ascii")
+                else:
+                    sign = spec.sign
+                out.append(sign)
+            if spec.n_prefix:
+                pref = num[to_prefix:to_prefix + spec.n_prefix]
+                if upper:
+                    pref = self._upcase_string(pref)
+                out.append(pref)
+            if spec.n_spadding:
+                out.append_multiple_char(fill_char[0], spec.n_spadding)
+            if spec.n_digits != 0:
+                if self._loc_thousands:
+                    if grouped_digits is not None:
+                        digits = grouped_digits
+                    else:
+                        digits = self._grouped_digits
+                        assert digits is not None
+                else:
+                    stop = to_digits + spec.n_digits
+                    assert stop >= 0
+                    digits = num[to_digits:stop]
+                if upper:
+                    digits = self._upcase_string(digits)
+                out.append(digits)
+            if spec.n_decimal:
+                out.append(self._lit(".")[0])
+            if spec.n_remainder:
+                out.append(num[to_remainder:])
+            if spec.n_rpadding:
+                out.append_multiple_char(fill_char[0], spec.n_rpadding)
+            #if complex, need to call twice - just retun the buffer
+            return out.build()
+
+        def _format_int_or_long(self, w_num, kind):
+            space = self.space
+            if self._precision != -1:
+                msg = "precision not allowed in integer type"
+                raise OperationError(space.w_ValueError, space.wrap(msg))
+            sign_char = "\0"
+            tp = self._type
+            if tp == "c":
+                if self._sign != "\0":
+                    msg = "sign not allowed with 'c' presentation type"
+                    raise OperationError(space.w_ValueError, space.wrap(msg))
+                value = space.int_w(w_num)
+                if self.is_unicode:
+                    result = runicode.UNICHR(value)
+                else:
+                    result = chr(value)
+                n_digits = 1
+                n_remainder = 1
+                to_remainder = 0
+                n_prefix = 0
+                to_prefix = 0
+                to_numeric = 0
+            else:
+                if tp == "b":
+                    base = 2
+                    skip_leading = 2
+                elif tp == "o":
+                    base = 8
+                    skip_leading = 2
+                elif tp == "x" or tp == "X":
+                    base = 16
+                    skip_leading = 2
+                elif tp == "n" or tp == "d":
+                    base = 10
+                    skip_leading = 0
+                else:
+                    raise AssertionError("shouldn't reach")
+                if kind == INT_KIND:
+                    result = self._int_to_base(base, space.int_w(w_num))
+                else:
+                    result = self._long_to_base(base, space.bigint_w(w_num))
+                n_prefix = skip_leading if self._alternate else 0
+                to_prefix = 0
+                if result[0] == "-":
+                    sign_char = "-"
+                    skip_leading += 1
+                    to_prefix += 1
+                n_digits = len(result) - skip_leading
+                n_remainder = 0
+                to_remainder = 0
+                to_numeric = skip_leading
+            self._get_locale(tp)
+            spec = self._calc_num_width(n_prefix, sign_char, to_numeric, n_digits,
+                                        n_remainder, False, result)
+            fill = self._lit(" ") if self._fill_char == "\0" else self._fill_char
+            upper = self._type == "X"
+            return self.space.wrap(self._fill_number(spec, result, to_numeric,
+                                     to_prefix, fill, to_remainder, upper))
+
+        def _long_to_base(self, base, value):
+            prefix = ""
+            if base == 2:
+                prefix = "0b"
+            elif base == 8:
+                prefix = "0o"
+            elif base == 16:
+                prefix = "0x"
+            as_str = value.format(LONG_DIGITS[:base], prefix)
+            if self.is_unicode:
+                return as_str.decode("ascii")
+            return as_str
+
+        def _int_to_base(self, base, value):
+            if base == 10:
+                s = str(value)
+                if self.is_unicode:
+                    return s.decode("ascii")
+                return s
+            # This part is slow.
+            negative = value < 0
+            value = abs(value)
+            buf = ["\0"] * (8 * 8 + 6) # Too much on 32 bit, but who cares?
+            i = len(buf) - 1
+            while True:
+                div = value // base
+                mod = value - div * base
+                digit = abs(mod)
+                digit += ord("0") if digit < 10 else ord("a") - 10
+                buf[i] = chr(digit)
+                value = div
+                i -= 1
+                if not value:
+                    break
+            if base == 2:
+                buf[i] = "b"
+                buf[i - 1] = "0"
+            elif base == 8:
+                buf[i] = "o"
+                buf[i - 1] = "0"
+            elif base == 16:
+                buf[i] = "x"
+                buf[i - 1] = "0"
+            else:
+                buf[i] = "#"
+                buf[i - 1] = chr(ord("0") + base % 10)
+                if base > 10:
+                    buf[i - 2] = chr(ord("0") + base // 10)
+                    i -= 1
             i -= 1
-            if not value:
-                break
-        if base == 2:
-            buf[i] = "b"
-            buf[i - 1] = "0"
-        elif base == 8:
-            buf[i] = "o"
-            buf[i - 1] = "0"
-        elif base == 16:
-            buf[i] = "x"
-            buf[i - 1] = "0"
-        else:
-            buf[i] = "#"
-            buf[i - 1] = chr(ord("0") + base % 10)
-            if base > 10:
-                buf[i - 2] = chr(ord("0") + base // 10)
+            if negative:
                 i -= 1
-        i -= 1
-        if negative:
-            i -= 1
-            buf[i] = "-"
-        assert i >= 0
-        return self.empty.join(buf[i:])
+                buf[i] = "-"
+            assert i >= 0
+            return self.empty.join(buf[i:])
 
-    def format_int_or_long(self, w_num, kind):
-        space = self.space
-        if self._parse_spec("d", ">"):
+        def format_int_or_long(self, w_num, kind):
+            space = self.space
+            if self._parse_spec("d", ">"):
+                if self.is_unicode:
+                    return space.call_function(space.w_unicode, w_num)
+                return self.space.str(w_num)
+            tp = self._type
+            if (tp == "b" or
+                tp == "c" or
+                tp == "d" or
+                tp == "o" or
+                tp == "x" or
+                tp == "X" or
+                tp == "n"):
+                return self._format_int_or_long(w_num, kind)
+            elif (tp == "e" or
+                  tp == "E" or
+                  tp == "f" or
+                  tp == "F" or
+                  tp == "g" or
+                  tp == "G" or
+                  tp == "%"):
+                w_float = space.float(w_num)
+                return self._format_float(w_float)
+            else:
+                self._unknown_presentation("int" if kind == INT_KIND else "long")
+
+        def _parse_number(self, s, i):
+            """Determine if s has a decimal point, and the index of the first #
+            after the decimal, or the end of the number."""
+            length = len(s)
+            while i < length and "0" <= s[i] <= "9":
+                i += 1
+            rest = i
+            dec_point = i < length and s[i] == "."
+            if dec_point:
+                rest += 1
+            #differs from CPython method - CPython sets n_remainder
+            return dec_point, rest
+
+        def _format_float(self, w_float):
+            """helper for format_float"""
+            space = self.space
+            flags = 0
+            default_precision = 6
+            if self._alternate:
+                msg = "alternate form not allowed in float formats"
+                raise OperationError(space.w_ValueError, space.wrap(msg))
+            tp = self._type
+            self._get_locale(tp)
+            if tp == "\0":
+                tp = "g"
+                default_precision = 12
+                flags |= rfloat.DTSF_ADD_DOT_0
+            elif tp == "n":
+                tp = "g"
+            value = space.float_w(w_float)
+            if tp == "%":
+                tp = "f"
+                value *= 100
+                add_pct = True
+            else:
+                add_pct = False
+            if self._precision == -1:
+                self._precision = default_precision
+            result, special = rfloat.double_to_string(value, tp,
+                                                      self._precision, flags)
+            if add_pct:
+                result += "%"
+            n_digits = len(result)
+            if result[0] == "-":
+                sign = "-"
+                to_number = 1
+                n_digits -= 1
+            else:
+                sign = "\0"
+                to_number = 0
+            have_dec_point, to_remainder = self._parse_number(result, to_number)
+            n_remainder = len(result) - to_remainder
             if self.is_unicode:
-                return space.call_function(space.w_unicode, w_num)
-            return self.space.str(w_num)
-        tp = self._type
-        if (tp == "b" or
-            tp == "c" or
-            tp == "d" or
-            tp == "o" or
-            tp == "x" or
-            tp == "X" or
-            tp == "n"):
-            return self._format_int_or_long(w_num, kind)
-        elif (tp == "e" or
-              tp == "E" or
-              tp == "f" or
-              tp == "F" or
-              tp == "g" or
-              tp == "G" or
-              tp == "%"):
-            w_float = space.float(w_num)
-            return self._format_float(w_float)
-        else:
-            self._unknown_presentation("int" if kind == INT_KIND else "long")
+                digits = result.decode("ascii")
+            else:
+                digits = result
+            spec = self._calc_num_width(0, sign, to_number, n_digits,
+                                        n_remainder, have_dec_point, digits)
+            fill = self._lit(" ") if self._fill_char == "\0" else self._fill_char
+            return self.space.wrap(self._fill_number(spec, digits, to_number, 0,
+                                      fill, to_remainder, False))
 
-    def _parse_number(self, s, i):
-        """Determine if s has a decimal point, and the index of the first #
-        after the decimal, or the end of the number."""
-        length = len(s)
-        while i < length and "0" <= s[i] <= "9":
-            i += 1
-        rest = i
-        dec_point = i < length and s[i] == "."
-        if dec_point:
-            rest += 1
-        #differs from CPython method - CPython sets n_remainder
-        return dec_point, rest
+        def format_float(self, w_float):
+            space = self.space
+            if self._parse_spec("\0", ">"):
+                if self.is_unicode:
+                    return space.call_function(space.w_unicode, w_float)
+                return space.str(w_float)
+            tp = self._type
+            if (tp == "\0" or
+                tp == "e" or
+                tp == "E" or
+                tp == "f" or
+                tp == "F" or
+                tp == "g" or
+                tp == "G" or
+                tp == "n" or
+                tp == "%"):
+                return self._format_float(w_float)
+            self._unknown_presentation("float")
 
-    def _format_float(self, w_float):
-        """helper for format_float"""
-        space = self.space
-        flags = 0
-        default_precision = 6
-        if self._alternate:
-            msg = "alternate form not allowed in float formats"
-            raise OperationError(space.w_ValueError, space.wrap(msg))
-        tp = self._type
-        self._get_locale(tp)
-        if tp == "\0":
-            tp = "g"
-            default_precision = 12
-            flags |= rfloat.DTSF_ADD_DOT_0
-        elif tp == "n":
-            tp = "g"
-        value = space.float_w(w_float)
-        if tp == "%":
-            tp = "f"
-            value *= 100
-            add_pct = True
-        else:
-            add_pct = False
-        if self._precision == -1:
-            self._precision = default_precision
-        result, special = rfloat.double_to_string(value, tp,
-                                                  self._precision, flags)
-        if add_pct:
-            result += "%"
-        n_digits = len(result)
-        if result[0] == "-":
-            sign = "-"
-            to_number = 1
-            n_digits -= 1
-        else:
-            sign = "\0"
-            to_number = 0
-        have_dec_point, to_remainder = self._parse_number(result, to_number)
-        n_remainder = len(result) - to_remainder
-        if self.is_unicode:
-            digits = result.decode("ascii")
-        else:
-            digits = result
-        spec = self._calc_num_width(0, sign, to_number, n_digits,
-                                    n_remainder, have_dec_point, digits)
-        fill = self._lit(" ") if self._fill_char == "\0" else self._fill_char
-        return self.space.wrap(self._fill_number(spec, digits, to_number, 0,
-                                  fill, to_remainder, False))
+        def _format_complex(self, w_complex):
+            space = self.space
+            tp = self._type
+            self._get_locale(tp)
+            default_precision = 6
+            if self._align == "=":
+                # '=' alignment is invalid
+                msg = ("'=' alignment flag is not allowed in"
+                       " complex format specifier")
+                raise OperationError(space.w_ValueError, space.wrap(msg))
+            if self._fill_char == "0":
+                #zero padding is invalid
+                msg = "Zero padding is not allowed in complex format specifier"
+                raise OperationError(space.w_ValueError, space.wrap(msg))
+            if self._alternate:
+                #alternate is invalid
+                msg = "Alternate form %s not allowed in complex format specifier"
+                raise OperationError(space.w_ValueError,
+                                     space.wrap(msg % (self._alternate)))
+            skip_re = 0
+            add_parens = 0
+            if tp == "\0":
+                #should mirror str() output
+                tp = "g"
+                default_precision = 12
+                #test if real part is non-zero
+                if (w_complex.realval == 0 and
+                    copysign(1., w_complex.realval) == 1.):
+                    skip_re = 1
+                else:
+                    add_parens = 1
 
-    def format_float(self, w_float):
-        space = self.space
-        if self._parse_spec("\0", ">"):
+            if tp == "n":
+                #same as 'g' except for locale, taken care of later
+                tp = "g"
+
+            #check if precision not set
+            if self._precision == -1:
+                self._precision = default_precision
+
+            #might want to switch to double_to_string from formatd
+            #in CPython it's named 're' - clashes with re module
+            re_num = formatd(w_complex.realval, tp, self._precision)
+            im_num = formatd(w_complex.imagval, tp, self._precision)
+            n_re_digits = len(re_num)
+            n_im_digits = len(im_num)
+
+            to_real_number = 0
+            to_imag_number = 0
+            re_sign = im_sign = ''
+            #if a sign character is in the output, remember it and skip
+            if re_num[0] == "-":
+                re_sign = "-"
+                to_real_number = 1
+                n_re_digits -= 1
+            if im_num[0] == "-":
+                im_sign = "-"
+                to_imag_number = 1
+                n_im_digits -= 1
+
+            #turn off padding - do it after number composition
+            #calc_num_width uses self._width, so assign to temporary variable,
+            #calculate width of real and imag parts, then reassign padding, align
+            tmp_fill_char = self._fill_char
+            tmp_align = self._align
+            tmp_width = self._width
+            self._fill_char = "\0"
+            self._align = "<"
+            self._width = -1
+
+            #determine if we have remainder, might include dec or exponent or both
+            re_have_dec, re_remainder_ptr = self._parse_number(re_num,
+                                                               to_real_number)
+            im_have_dec, im_remainder_ptr = self._parse_number(im_num,
+                                                               to_imag_number)
+
             if self.is_unicode:
-                return space.call_function(space.w_unicode, w_float)
-            return space.str(w_float)
-        tp = self._type
-        if (tp == "\0" or
-            tp == "e" or
-            tp == "E" or
-            tp == "f" or
-            tp == "F" or
-            tp == "g" or
-            tp == "G" or
-            tp == "n" or
-            tp == "%"):
-            return self._format_float(w_float)
-        self._unknown_presentation("float")
+                re_num = re_num.decode("ascii")
+                im_num = im_num.decode("ascii")
 
-    def _format_complex(self, w_complex):
-        space = self.space
-        tp = self._type
-        self._get_locale(tp)
-        default_precision = 6
-        if self._align == "=":
-            # '=' alignment is invalid
-            msg = ("'=' alignment flag is not allowed in"
-                   " complex format specifier")
-            raise OperationError(space.w_ValueError, space.wrap(msg))
-        if self._fill_char == "0":
-            #zero padding is invalid
-            msg = "Zero padding is not allowed in complex format specifier"
-            raise OperationError(space.w_ValueError, space.wrap(msg))
-        if self._alternate:
-            #alternate is invalid
-            msg = "Alternate form %s not allowed in complex format specifier"
-            raise OperationError(space.w_ValueError,
-                                 space.wrap(msg % (self._alternate)))
-        skip_re = 0
-        add_parens = 0
-        if tp == "\0":
-            #should mirror str() output
-            tp = "g"
-            default_precision = 12
-            #test if real part is non-zero
-            if (w_complex.realval == 0 and
-                copysign(1., w_complex.realval) == 1.):
-                skip_re = 1
-            else:
-                add_parens = 1
+            #set remainder, in CPython _parse_number sets this
+            #using n_re_digits causes tests to fail
+            re_n_remainder = len(re_num) - re_remainder_ptr
+            im_n_remainder = len(im_num) - im_remainder_ptr
+            re_spec = self._calc_num_width(0, re_sign, to_real_number, n_re_digits,
+                                           re_n_remainder, re_have_dec,
+                                           re_num)
 
-        if tp == "n":
-            #same as 'g' except for locale, taken care of later
-            tp = "g"
+            #capture grouped digits b/c _fill_number reads from self._grouped_digits
+            #self._grouped_digits will get overwritten in imaginary calc_num_width
+            re_grouped_digits = self._grouped_digits
+            if not skip_re:
+                self._sign = "+"
+            im_spec = self._calc_num_width(0, im_sign, to_imag_number, n_im_digits,
+                                           im_n_remainder, im_have_dec,
+                                           im_num)
 
-        #check if precision not set
-        if self._precision == -1:
-            self._precision = default_precision
+            im_grouped_digits = self._grouped_digits
+            if skip_re:
+                re_spec.n_total = 0
 
-        #might want to switch to double_to_string from formatd
-        #in CPython it's named 're' - clashes with re module
-        re_num = formatd(w_complex.realval, tp, self._precision)
-        im_num = formatd(w_complex.imagval, tp, self._precision)
-        n_re_digits = len(re_num)
-        n_im_digits = len(im_num)
+            #reassign width, alignment, fill character
+            self._align = tmp_align
+            self._width = tmp_width
+            self._fill_char = tmp_fill_char
 
-        to_real_number = 0
-        to_imag_number = 0
-        re_sign = im_sign = ''
-        #if a sign character is in the output, remember it and skip
-        if re_num[0] == "-":
-            re_sign = "-"
-            to_real_number = 1
-            n_re_digits -= 1
-        if im_num[0] == "-":
-            im_sign = "-"
-            to_imag_number = 1
-            n_im_digits -= 1
+            #compute L and R padding - stored in self._left_pad and self._right_pad
+            self._calc_padding(self.empty, re_spec.n_total + im_spec.n_total + 1 +
+                                           add_parens * 2)
 
-        #turn off padding - do it after number composition
-        #calc_num_width uses self._width, so assign to temporary variable,
-        #calculate width of real and imag parts, then reassign padding, align
-        tmp_fill_char = self._fill_char
-        tmp_align = self._align
-        tmp_width = self._width
-        self._fill_char = "\0"
-        self._align = "<"
-        self._width = -1
+            out = self._builder()
+            fill = self._fill_char
+            if fill == "\0":
+                fill = self._lit(" ")[0]
 
-        #determine if we have remainder, might include dec or exponent or both
-        re_have_dec, re_remainder_ptr = self._parse_number(re_num,
-                                                           to_real_number)
-        im_have_dec, im_remainder_ptr = self._parse_number(im_num,
-                                                           to_imag_number)
+            #compose the string
+            #add left padding
+            out.append_multiple_char(fill, self._left_pad)
+            if add_parens:
+                out.append(self._lit('(')[0])
 
-        if self.is_unicode:
-            re_num = re_num.decode("ascii")
-            im_num = im_num.decode("ascii")
+            #if the no. has a real component, add it
+            if not skip_re:
+                out.append(self._fill_number(re_spec, re_num, to_real_number, 0,
+                                             fill, re_remainder_ptr, False,
+                                             re_grouped_digits))
 
-        #set remainder, in CPython _parse_number sets this
-        #using n_re_digits causes tests to fail
-        re_n_remainder = len(re_num) - re_remainder_ptr
-        im_n_remainder = len(im_num) - im_remainder_ptr
-        re_spec = self._calc_num_width(0, re_sign, to_real_number, n_re_digits,
-                                       re_n_remainder, re_have_dec,
-                                       re_num)
+            #add imaginary component
+            out.append(self._fill_number(im_spec, im_num, to_imag_number, 0,
+                                         fill, im_remainder_ptr, False,
+                                         im_grouped_digits))
 
-        #capture grouped digits b/c _fill_number reads from self._grouped_digits
-        #self._grouped_digits will get overwritten in imaginary calc_num_width
-        re_grouped_digits = self._grouped_digits
-        if not skip_re:
-            self._sign = "+"
-        im_spec = self._calc_num_width(0, im_sign, to_imag_number, n_im_digits,
-                                       im_n_remainder, im_have_dec,
-                                       im_num)
+            #add 'j' character
+            out.append(self._lit('j')[0])
 
-        im_grouped_digits = self._grouped_digits
-        if skip_re:
-            re_spec.n_total = 0
+            if add_parens:
+                out.append(self._lit(')')[0])
 
-        #reassign width, alignment, fill character
-        self._align = tmp_align
-        self._width = tmp_width
-        self._fill_char = tmp_fill_char
+            #add right padding
+            out.append_multiple_char(fill, self._right_pad)
 
-        #compute L and R padding - stored in self._left_pad and self._right_pad
-        self._calc_padding(self.empty, re_spec.n_total + im_spec.n_total + 1 +
-                                       add_parens * 2)
+            return self.space.wrap(out.build())
 
-        out = self._builder()
-        fill = self._fill_char
-        if fill == "\0":
-            fill = self._lit(" ")[0]
 
-        #compose the string
-        #add left padding
-        out.append_multiple_char(fill, self._left_pad)
-        if add_parens:
-            out.append(self._lit('(')[0])
+        def format_complex(self, w_complex):
+            """return the string representation of a complex number"""
+            space = self.space
+            #parse format specification, set associated variables
+            if self._parse_spec("\0", ">"):
+                return space.str(w_complex)
+            tp = self._type
+            if (tp == "\0" or
+                tp == "e" or
+                tp == "E" or
+                tp == "f" or
+                tp == "F" or
+                tp == "g" or
+                tp == "G" or
+                tp == "n"):
+                return self._format_complex(w_complex)
+            self._unknown_presentation("complex")
+    return Formatter
 
-        #if the no. has a real component, add it
-        if not skip_re:
-            out.append(self._fill_number(re_spec, re_num, to_real_number, 0,
-                                         fill, re_remainder_ptr, False,
-                                         re_grouped_digits))
-
-        #add imaginary component
-        out.append(self._fill_number(im_spec, im_num, to_imag_number, 0,
-                                     fill, im_remainder_ptr, False,
-                                     im_grouped_digits))
-
-        #add 'j' character
-        out.append(self._lit('j')[0])
-
-        if add_parens:
-            out.append(self._lit(')')[0])
-
-        #add right padding
-        out.append_multiple_char(fill, self._right_pad)
-
-        return self.space.wrap(out.build())
-
-
-    def format_complex(self, w_complex):
-        """return the string representation of a complex number"""
-        space = self.space
-        #parse format specification, set associated variables
-        if self._parse_spec("\0", ">"):
-            return space.str(w_complex)
-        tp = self._type
-        if (tp == "\0" or
-            tp == "e" or
-            tp == "E" or
-            tp == "f" or
-            tp == "F" or
-            tp == "g" or
-            tp == "G" or
-            tp == "n"):
-            return self._format_complex(w_complex)
-        self._unknown_presentation("complex")
+StrFormatter = make_formatting_class()
+UnicodeFormatter = make_formatting_class()
 
 
 def unicode_formatter(space, spec):
-    return Formatter(space, True, spec)
-
+    return StrFormatter(space, True, spec)
 
 def str_formatter(space, spec):
-    return Formatter(space, False, spec)
+    return UnicodeFormatter(space, False, spec)
 
 
 @specialize.arg(2)
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -362,43 +362,26 @@
 
     return _str_join_many_items(space, w_self, list_w, size)
 
-from pypy.rlib.jit import JitDriver
-
-one = JitDriver(greens = [], reds = ['size', 'reslen', 'self', 'list_w'])
-two = JitDriver(greens = [], reds = ['i', 'size', 'list_w', 'sb', 'self',
-                                     'w_self'])
-
-def _str_join_compute_reslen(space, self, list_w, size):
+def _str_join_many_items(space, w_self, list_w, size):
+    self = w_self._value
     reslen = len(self) * (size - 1)
     for i in range(size):
-        one.jit_merge_point(size = size, reslen = reslen,
-                            self = self, list_w = list_w)
         w_s = list_w[i]
         if not space.isinstance_w(w_s, space.w_str):
             if space.isinstance_w(w_s, space.w_unicode):
-                return -1
+                # we need to rebuild w_list here, because the original
+                # w_list might be an iterable which we already consumed
+                w_list = space.newlist(list_w)
+                w_u = space.call_function(space.w_unicode, w_self)
+                return space.call_method(w_u, "join", w_list)
             raise operationerrfmt(
                 space.w_TypeError,
                 "sequence item %d: expected string, %s "
                 "found", i, space.type(w_s).getname(space))
         reslen += len(space.str_w(w_s))
-    return reslen
-
-def _str_join_many_items(space, w_self, list_w, size):
-    self = w_self._value
-
-    reslen = _str_join_compute_reslen(space, self, list_w, size)
-    if reslen == -1:
-        # we need to rebuild w_list here, because the original
-        # w_list might be an iterable which we already consumed
-        w_list = space.newlist(list_w)
-        w_u = space.call_function(space.w_unicode, w_self)
-        return space.call_method(w_u, "join", w_list)
 
     sb = StringBuilder(reslen)
     for i in range(size):
-        two.jit_merge_point(size=size, i=i, sb=sb, list_w=list_w, self=self,
-                            w_self=w_self)
         if self and i != 0:
             sb.append(self)
         sb.append(space.str_w(list_w[i]))
diff --git a/pypy/rlib/_rweakkeydict.py b/pypy/rlib/_rweakkeydict.py
--- a/pypy/rlib/_rweakkeydict.py
+++ b/pypy/rlib/_rweakkeydict.py
@@ -117,7 +117,7 @@
     d = lltype.malloc(WEAKDICT)
     d.entries = WEAKDICT.entries.TO.allocate(rdict.DICT_INITSIZE)
     d.num_items = 0
-    d.num_pristine_entries = rdict.DICT_INITSIZE
+    d.resize_counter = rdict.DICT_INITSIZE * 2
     return d
 
 @jit.dont_look_inside
@@ -152,8 +152,8 @@
     #                 ll_debugrepr(llkey),
     #                 ll_debugrepr(llvalue))
     if not everused:
-        d.num_pristine_entries -= 1
-        if d.num_pristine_entries * 3 <= len(d.entries):
+        d.resize_counter -= 3
+        if d.resize_counter <= 0:
             #llop.debug_print(lltype.Void, 'RESIZE')
             ll_weakdict_resize(d)
 
@@ -206,6 +206,6 @@
 
 WEAKDICT = lltype.GcStruct("weakkeydict",
                            ("num_items", lltype.Signed),
-                           ("num_pristine_entries", lltype.Signed),
+                           ("resize_counter", lltype.Signed),
                            ("entries", lltype.Ptr(WEAKDICTENTRYARRAY)),
                            adtmeths=dictmeths)
diff --git a/pypy/rlib/_rweakvaldict.py b/pypy/rlib/_rweakvaldict.py
--- a/pypy/rlib/_rweakvaldict.py
+++ b/pypy/rlib/_rweakvaldict.py
@@ -53,7 +53,7 @@
         self.WEAKDICT = lltype.GcStruct(
             "weakvaldict",
             ("num_items", lltype.Signed),
-            ("num_pristine_entries", lltype.Signed),
+            ("resize_counter", lltype.Signed),
             ("entries", lltype.Ptr(WEAKDICTENTRYARRAY)),
             adtmeths=dictmeths)
 
@@ -107,7 +107,7 @@
         d = lltype.malloc(self.WEAKDICT)
         d.entries = self.WEAKDICT.entries.TO.allocate(rdict.DICT_INITSIZE)
         d.num_items = 0
-        d.num_pristine_entries = rdict.DICT_INITSIZE
+        d.resize_counter = rdict.DICT_INITSIZE * 2
         return d
 
     @jit.dont_look_inside
@@ -138,8 +138,8 @@
         d.entries[i].value = valueref
         #llop.debug_print(lltype.Void, i, 'stored')
         if not everused:
-            d.num_pristine_entries -= 1
-            if d.num_pristine_entries * 3 <= len(d.entries):
+            d.resize_counter -= 3
+            if d.resize_counter <= 0:
                 #llop.debug_print(lltype.Void, 'RESIZE')
                 self.ll_weakdict_resize(d)
 
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -1,10 +1,13 @@
+import sys
+
 import py
-import sys
+
+from pypy.rlib.nonconst import NonConstant
+from pypy.rlib.objectmodel import CDefinedIntSymbolic, keepalive_until_here, specialize
+from pypy.rlib.unroll import unrolling_iterable
 from pypy.rpython.extregistry import ExtRegistryEntry
-from pypy.rlib.objectmodel import CDefinedIntSymbolic
-from pypy.rlib.objectmodel import keepalive_until_here, specialize
-from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.nonconst import NonConstant
+from pypy.tool.sourcetools import func_with_new_name
+
 
 def elidable(func):
     """ Decorate a function as "trace-elidable". This means precisely that:
@@ -72,17 +75,22 @@
     func._jit_loop_invariant_ = True
     return func
 
+def _get_args(func):
+    import inspect
+
+    args, varargs, varkw, defaults = inspect.getargspec(func)
+    args = ["v%s" % (i, ) for i in range(len(args))]
+    assert varargs is None and varkw is None
+    assert not defaults
+    return args
+
 def elidable_promote(promote_args='all'):
     """ A decorator that promotes all arguments and then calls the supplied
     function
     """
     def decorator(func):
-        import inspect
         elidable(func)
-        args, varargs, varkw, defaults = inspect.getargspec(func)
-        args = ["v%s" % (i, ) for i in range(len(args))]
-        assert varargs is None and varkw is None
-        assert not defaults
+        args = _get_args(func)
         argstring = ", ".join(args)
         code = ["def f(%s):\n" % (argstring, )]
         if promote_args != 'all':
@@ -102,6 +110,46 @@
     warnings.warn("purefunction_promote is deprecated, use elidable_promote instead", DeprecationWarning)
     return elidable_promote(*args, **kwargs)
 
+def look_inside_iff(predicate):
+    """
+    look inside (including unrolling loops) the target function, if and only if
+    predicate(*args) returns True
+    """
+    def inner(func):
+        func = unroll_safe(func)
+        # When we return the new function, it might be specialized in some
+        # way. We "propogate" this specialization by using
+        # specialize:call_location on relevant functions.
+        for thing in [func, predicate]:
+            thing._annspecialcase_ = "specialize:call_location"
+
+        args = _get_args(func)
+        d = {
+            "dont_look_inside": dont_look_inside,
+            "predicate": predicate,
+            "func": func,
+            "we_are_jitted": we_are_jitted,
+        }
+        exec py.code.Source("""
+            @dont_look_inside
+            def trampoline(%(arguments)s):
+                return func(%(arguments)s)
+            if hasattr(func, "oopspec"):
+                # XXX: This seems like it should be here, but it causes errors.
+                # trampoline.oopspec = func.oopspec
+                del func.oopspec
+            trampoline.__name__ = func.__name__ + "_trampoline"
+            trampoline._annspecialcase_ = "specialize:call_location"
+
+            def f(%(arguments)s):
+                if not we_are_jitted() or predicate(%(arguments)s):
+                    return func(%(arguments)s)
+                else:
+                    return trampoline(%(arguments)s)
+            f.__name__ = func.__name__ + "_look_inside_iff"
+        """ % {"arguments": ", ".join(args)}).compile() in d
+        return d["f"]
+    return inner
 
 def oopspec(spec):
     def decorator(func):
@@ -109,6 +157,34 @@
         return func
     return decorator
 
+ at oopspec("jit.isconstant(value)")
+ at specialize.argtype(0)
+def isconstant(value):
+    """
+    While tracing, returns whether or not the value is currently known to be
+    constant. This is not perfect, values can become constant later. Mostly for
+    use with @look_inside_iff.
+
+    This is for advanced usage only.
+    """
+    # I hate the annotator so much.
+    if NonConstant(False):
+        return True
+    return False
+
+ at oopspec("jit.isvirtual(value)")
+ at specialize.ll()
+def isvirtual(value):
+    """
+    Returns if this value is virtual, while tracing, it's relatively
+    conservative and will miss some cases.
+
+    This is for advanced usage only.
+    """
+    if NonConstant(False):
+        return True
+    return False
+
 class Entry(ExtRegistryEntry):
     _about_ = hint
 
@@ -291,10 +367,10 @@
 class JitHintError(Exception):
     """Inconsistency in the JIT hints."""
 
-PARAMETERS = {'threshold': 1032, # just above 1024
-              'function_threshold': 1617, # slightly more than one above
+PARAMETERS = {'threshold': 1039, # just above 1024, prime
+              'function_threshold': 1619, # slightly more than one above, also prime
               'trace_eagerness': 200,
-              'trace_limit': 12000,
+              'trace_limit': 6000,
               'inlining': 1,
               'loop_longevity': 1000,
               'retrace_limit': 5,
diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py
--- a/pypy/rlib/objectmodel.py
+++ b/pypy/rlib/objectmodel.py
@@ -70,11 +70,12 @@
 
         return decorated_func
 
-    def ll_and_arg(self, arg):
-        """ XXX what does that do?
+    def ll_and_arg(self, *args):
+        """ This is like ll(), but instead of specializing on all arguments,
+        specializes on only the arguments at the given positions
         """
         def decorated_func(func):
-            func._annspecialcase_ = 'specialize:ll_and_arg(%d)' % arg
+            func._annspecialcase_ = 'specialize:ll_and_arg' + self._wrap(args)
             return func
 
         return decorated_func
diff --git a/pypy/rlib/parsing/codebuilder.py b/pypy/rlib/parsing/codebuilder.py
--- a/pypy/rlib/parsing/codebuilder.py
+++ b/pypy/rlib/parsing/codebuilder.py
@@ -1,3 +1,5 @@
+import contextlib
+
 class Codebuilder(object):
     def __init__(self):
         self.blocks = []
@@ -27,10 +29,12 @@
         assert blockstarter.endswith(":")
         self.emit(blockstarter)
         self.blocks.append(blockstarter)
-        def BlockEnder():
-            yield None
-            self.end_block(blockstarter)
-        return BlockEnder()
+
+    @contextlib.contextmanager
+    def block(self, blockstarter):
+        self.start_block(blockstarter)
+        yield None
+        self.end_block(blockstarter)
 
     def end_block(self, starterpart=""):
         block = self.blocks.pop()
diff --git a/pypy/rlib/parsing/deterministic.py b/pypy/rlib/parsing/deterministic.py
--- a/pypy/rlib/parsing/deterministic.py
+++ b/pypy/rlib/parsing/deterministic.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 import py
 
 try:
@@ -228,11 +229,11 @@
         above = set()
         for state, nextstates in state_to_chars.iteritems():
             above.add(state)
-            for _ in result.start_block("if state == %s:" % (state, )):
-                for _ in result.start_block("if i < len(input):"):
+            with result.block("if state == %s:" % (state, )):
+                with result.block("if i < len(input):"):
                     result.emit("char = input[i]")
                     result.emit("i += 1")
-                for _ in result.start_block("else:"):
+                with result.block("else:"):
                     if state in self.final_states:
                         result.emit("return True")
                     else:
@@ -248,7 +249,7 @@
                     for i, (a, num) in enumerate(compressed):
                         if num < 5:
                             for charord in range(ord(a), ord(a) + num):
-                                for _ in result.start_block(
+                                with result.block(
                                     "%sif char == %r:" % (
                                         elif_prefix, chr(charord))):
                                     result.emit("state = %s" % (nextstate, ))
@@ -256,23 +257,23 @@
                                 if not elif_prefix:
                                     elif_prefix = "el"
                         else:
-                            for _ in result.start_block(
+                            with result.block(
                                 "%sif %r <= char <= %r:" % (
                                     elif_prefix, a, chr(ord(a) + num - 1))):
                                 result.emit("state = %s""" % (nextstate, ))
                                 result.emit(continue_prefix)
                             if not elif_prefix:
                                 elif_prefix = "el"
-                for _ in result.start_block("else:"):
+                with result.block("else:"):
                     result.emit("break") 
         #print state_to_chars.keys()
         for state in range(self.num_states):
             if state in state_to_chars:
                 continue
-            for _ in result.start_block("if state == %s:" % (state, )):
-                for _ in result.start_block("if i == len(input):"):
+            with result.block("if state == %s:" % (state, )):
+                with result.block("if i == len(input):"):
                     result.emit("return True")
-                for _ in result.start_block("else:"):
+                with result.block("else:"):
                     result.emit("break")
         result.emit("break")
         result.end_block("while")
@@ -303,14 +304,14 @@
         above = set()
         for state, nextstates in state_to_chars_sorted:
             above.add(state)
-            for _ in result.start_block("if state == %s:" % (state, )):
+            with result.block("if state == %s:" % (state, )):
                 if state in self.final_states:
                     result.emit("runner.last_matched_index = i - 1")
                     result.emit("runner.last_matched_state = state")
-                for _ in result.start_block("try:"):
+                with result.block("try:"):
                     result.emit("char = input[i]")
                     result.emit("i += 1")
-                for _ in result.start_block("except IndexError:"):
+                with result.block("except IndexError:"):
                     result.emit("runner.state = %s" % (state, ))
                     if state in self.final_states:
                         result.emit("return i")
@@ -327,21 +328,21 @@
                     for i, (a, num) in enumerate(compressed):
                         if num < 3:
                             for charord in range(ord(a), ord(a) + num):
-                                for _ in result.start_block("%sif char == %r:"
+                                with result.block("%sif char == %r:"
                                         % (elif_prefix, chr(charord))):
                                     result.emit("state = %s" % (nextstate, ))
                                     result.emit(continue_prefix)
                                 if not elif_prefix:
                                     elif_prefix = "el"
                         else:
-                            for _ in result.start_block(
+                            with result.block(
                                 "%sif %r <= char <= %r:" % (
                                     elif_prefix, a, chr(ord(a) + num - 1))):
                                     result.emit("state = %s" % (nextstate, ))
                                     result.emit(continue_prefix)
                             if not elif_prefix:
                                 elif_prefix = "el"
-                for _ in result.start_block("else:"):
+                with result.block("else:"):
                     result.emit("break")
         #print state_to_chars.keys()
         for state in range(self.num_states):
diff --git a/pypy/rlib/parsing/makepackrat.py b/pypy/rlib/parsing/makepackrat.py
--- a/pypy/rlib/parsing/makepackrat.py
+++ b/pypy/rlib/parsing/makepackrat.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 import py
 import sys
 from pypy.rlib.parsing.tree import Nonterminal, Symbol, RPythonVisitor
@@ -321,27 +322,27 @@
         else:
             self.emit("_key = self._pos")
         self.emit("_status = self.%s.get(_key, None)" % (dictname, ))
-        for _ in self.start_block("if _status is None:"):
+        with self.block("if _status is None:"):
             self.emit("_status = self.%s[_key] = Status()" % (
                 dictname, ))
-        for _ in self.start_block("else:"):
+        with self.block("else:"):
             self.emit("_statusstatus = _status.status")
-            for _ in self.start_block("if _statusstatus == _status.NORMAL:"):
+            with self.block("if _statusstatus == _status.NORMAL:"):
                 self.emit("self._pos = _status.pos")
                 self.emit("return _status")
-            for _ in self.start_block("elif _statusstatus == _status.ERROR:"):
+            with self.block("elif _statusstatus == _status.ERROR:"):
                 self.emit("raise BacktrackException(_status.error)")
             if self.have_call:
-                for _ in self.start_block(
+                with self.block(
                     "elif (_statusstatus == _status.INPROGRESS or\n"
                     "      _statusstatus == _status.LEFTRECURSION):"):
                     self.emit("_status.status = _status.LEFTRECURSION")
-                    for _ in self.start_block("if _status.result is not None:"):
+                    with self.block("if _status.result is not None:"):
                         self.emit("self._pos = _status.pos")
                         self.emit("return _status")
-                    for _ in self.start_block("else:"):
+                    with self.block("else:"):
                         self.emit("raise BacktrackException(None)")
-                for _ in self.start_block(
+                with self.block(
                     "elif _statusstatus == _status.SOMESOLUTIONS:"):
                     self.emit("_status.status = _status.INPROGRESS")
         self.emit("_startingpos = self._pos")
@@ -352,10 +353,10 @@
     def memoize_footer(self, name, args):
         dictname = "_dict_%s" % (name, )
         if self.have_call:
-            for _ in self.start_block(
+            with self.block(
                 "if _status.status == _status.LEFTRECURSION:"):
-                for _ in self.start_block("if _status.result is not None:"):
-                    for _ in self.start_block("if _status.pos >= self._pos:"):
+                with self.block("if _status.result is not None:"):
+                    with self.block("if _status.pos >= self._pos:"):
                         self.emit("_status.status = _status.NORMAL")
                         self.emit("self._pos = _status.pos")
                         self.emit("return _status")
@@ -373,7 +374,7 @@
         self.emit("_status.error = _error")
         self.emit("return _status")
         self.end_block("try")
-        for _ in self.start_block("except BacktrackException, _exc:"):
+        with self.block("except BacktrackException, _exc:"):
             self.emit("_status.pos = -1")
             self.emit("_status.result = None")
             self.combine_error('_exc.error')
@@ -394,7 +395,7 @@
         self.start_block("class Parser(object):")
         for elt in t.children:
             self.dispatch(elt)
-        for _ in self.start_block("def __init__(self, inputstream):"):
+        with self.block("def __init__(self, inputstream):"):
             for line in self.initcode:
                 self.emit(line)
             self.emit("self._pos = 0")
@@ -405,7 +406,7 @@
 
     def emit_regex_code(self):
         for regex, matcher in self.matchers.iteritems():
-            for _ in  self.start_block(
+            with  self.block(
                     "def _regex%s(self):" % (abs(hash(regex)), )):
                 c = self.choice_point()
                 self.emit("_runner = self._Runner(self._inputstream, self._pos)")
@@ -423,8 +424,8 @@
                 self.emit("self._pos = _upto")
                 self.emit("return _result")
 
-        for _ in self.start_block("class _Runner(object):"):
-            for _ in self.start_block("def __init__(self, text, pos):"):
+        with self.block("class _Runner(object):"):
+            with self.block("def __init__(self, text, pos):"):
                 self.emit("self.text = text")
                 self.emit("self.pos = pos")
                 self.emit("self.last_matched_state = -1")
@@ -444,7 +445,7 @@
         otherargs = t.children[1].children
         argswithself = ", ".join(["self"] + otherargs)
         argswithoutself = ", ".join(otherargs)
-        for _ in self.start_block("def %s(%s):" % (name, argswithself)):
+        with self.block("def %s(%s):" % (name, argswithself)):
             self.emit("return self._%s(%s).result" % (name, argswithoutself))
         self.start_block("def _%s(%s):" % (name, argswithself, ))
         self.namecount = 0
@@ -465,10 +466,10 @@
             self.start_block("while 1:")
         for i, p in enumerate(possibilities):
             c = self.choice_point()
-            for _ in self.start_block("try:"):
+            with self.block("try:"):
                 self.dispatch(p)
                 self.emit("break")
-            for _ in self.start_block("except BacktrackException, _exc:"):
+            with self.block("except BacktrackException, _exc:"):
                 self.combine_error('_exc.error')
                 self.revert(c)
                 if i == len(possibilities) - 1:
@@ -484,9 +485,9 @@
 
     def visit_maybe(self, t):
         c = self.choice_point()
-        for _ in self.start_block("try:"):
+        with self.block("try:"):
             self.dispatch(t.children[0])
-        for _ in self.start_block("except BacktrackException:"):
+        with self.block("except BacktrackException:"):
             self.revert(c)
 
     def visit_repetition(self, t):
@@ -496,12 +497,12 @@
         if t.children[0] == '+':
             self.dispatch(t.children[1])
             self.emit("%s.append(_result)"  % (name, ))
-        for _ in self.start_block("while 1:"):
+        with self.block("while 1:"):
             c = self.choice_point()
-            for _ in self.start_block("try:"):
+            with self.block("try:"):
                 self.dispatch(t.children[1])
                 self.emit("%s.append(_result)" % (name, ))
-            for _ in self.start_block("except BacktrackException, _exc:"):
+            with self.block("except BacktrackException, _exc:"):
                 self.combine_error('_exc.error')
                 self.revert(c)
                 self.emit("break")
@@ -525,12 +526,12 @@
         self.namecount += 1
         child = t.children[0]
         self.emit("%s = _result" % (resultname, ))
-        for _ in self.start_block("try:"):
+        with self.block("try:"):
             self.dispatch(child)
-        for _ in self.start_block("except BacktrackException:"):
+        with self.block("except BacktrackException:"):
             self.revert(c)
             self.emit("_result = %s" % (resultname, ))
-        for _ in self.start_block("else:"):
+        with self.block("else:"):
             # heuristic to get nice error messages sometimes
             if isinstance(child, Symbol) and child.symbol == "QUOTE":
 
@@ -559,21 +560,21 @@
     def visit_if(self, t):
         if len(t.children) == 2:
             self.dispatch(t.children[0])
-        for _ in self.start_block("if not (%s):" % (
+        with self.block("if not (%s):" % (
             t.children[-1].additional_info[1:-1], )):
             self.emit("raise BacktrackException(")
             self.emit("    self._ErrorInformation(")
             self.emit("         _startingpos, ['condition not met']))")
-    
+
     def visit_choose(self, t):
-        for _ in self.start_block("for %s in (%s):" % (
+        with self.block("for %s in (%s):" % (
             t.children[0], t.children[1].additional_info[1:-1], )):
-            for _ in self.start_block("try:"):
+            with self.block("try:"):
                 self.dispatch(t.children[2])
                 self.emit("break")
-            for _ in self.start_block("except BacktrackException, _exc:"):
+            with self.block("except BacktrackException, _exc:"):
                 self.combine_error('_exc.error')
-        for _ in self.start_block("else:"):
+        with self.block("else:"):
             self.emit("raise BacktrackException(_error)")
 
     def visit_call(self, t):
diff --git a/pypy/rlib/rStringIO.py b/pypy/rlib/rStringIO.py
--- a/pypy/rlib/rStringIO.py
+++ b/pypy/rlib/rStringIO.py
@@ -104,7 +104,7 @@
             if len(self.bigbuffer) >= endp:
                 # semi-fast path: the write is entirely inside self.bigbuffer
                 for i in range(len(buffer)):
-                    self.bigbuffer[p+i] = buffer[i]
+                    self.bigbuffer[p + i] = buffer[i]
                 self.pos = endp
                 return
             else:
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -1,6 +1,9 @@
-import gc, types
+import gc
+import types
+
+from pypy.rlib import jit
+from pypy.rlib.objectmodel import we_are_translated, enforceargs, specialize
 from pypy.rpython.extregistry import ExtRegistryEntry
-from pypy.rlib.objectmodel import we_are_translated
 from pypy.rpython.lltypesystem import lltype, llmemory
 
 # ____________________________________________________________
@@ -32,7 +35,7 @@
         if len(hop.args_s) == 1:
             args_v = hop.inputargs(lltype.Signed)
         return hop.genop('gc__collect', args_v, resulttype=hop.r_result)
-    
+
 class SetMaxHeapSizeEntry(ExtRegistryEntry):
     _about_ = set_max_heap_size
 
@@ -133,6 +136,9 @@
         hop.exception_cannot_occur()
         return hop.genop(opname, vlist, resulttype = hop.r_result.lowleveltype)
 
+ at jit.oopspec('list.ll_arraycopy(source, dest, source_start, dest_start, length)')
+ at specialize.ll()
+ at enforceargs(None, None, int, int, int)
 def ll_arraycopy(source, dest, source_start, dest_start, length):
     from pypy.rpython.lltypesystem.lloperation import llop
     from pypy.rlib.objectmodel import keepalive_until_here
@@ -161,14 +167,11 @@
                       llmemory.sizeof(TP.OF) * source_start)
     cp_dest_addr = (dest_addr + llmemory.itemoffsetof(TP, 0) +
                     llmemory.sizeof(TP.OF) * dest_start)
-    
+
     llmemory.raw_memcopy(cp_source_addr, cp_dest_addr,
                          llmemory.sizeof(TP.OF) * length)
     keepalive_until_here(source)
     keepalive_until_here(dest)
-ll_arraycopy._annenforceargs_ = [None, None, int, int, int]
-ll_arraycopy._annspecialcase_ = 'specialize:ll'
-ll_arraycopy.oopspec = 'list.ll_arraycopy(source, dest, source_start, dest_start, length)'
 
 def ll_shrink_array(p, smallerlength):
     from pypy.rpython.lltypesystem.lloperation import llop
@@ -192,7 +195,7 @@
               llmemory.itemoffsetof(ARRAY, 0))
     source_addr = llmemory.cast_ptr_to_adr(p)    + offset
     dest_addr   = llmemory.cast_ptr_to_adr(newp) + offset
-    llmemory.raw_memcopy(source_addr, dest_addr, 
+    llmemory.raw_memcopy(source_addr, dest_addr,
                          llmemory.sizeof(ARRAY.OF) * smallerlength)
 
     keepalive_until_here(p)
diff --git a/pypy/rlib/ropenssl.py b/pypy/rlib/ropenssl.py
--- a/pypy/rlib/ropenssl.py
+++ b/pypy/rlib/ropenssl.py
@@ -62,8 +62,7 @@
         "OPENSSL_VERSION_NUMBER")
     SSLEAY_VERSION = rffi_platform.DefinedConstantString(
         "SSLEAY_VERSION", "SSLeay_version(SSLEAY_VERSION)")
-    OPENSSL_NO_SSL2 = rffi_platform.DefinedConstantInteger(
-        "OPENSSL_NO_SSL2")
+    OPENSSL_NO_SSL2 = rffi_platform.Defined("OPENSSL_NO_SSL2")
     SSL_FILETYPE_PEM = rffi_platform.ConstantInteger("SSL_FILETYPE_PEM")
     SSL_OP_ALL = rffi_platform.ConstantInteger("SSL_OP_ALL")
     SSL_VERIFY_NONE = rffi_platform.ConstantInteger("SSL_VERIFY_NONE")
diff --git a/pypy/rlib/rstacklet.py b/pypy/rlib/rstacklet.py
--- a/pypy/rlib/rstacklet.py
+++ b/pypy/rlib/rstacklet.py
@@ -1,24 +1,47 @@
 from pypy.rlib import _rffi_stacklet as _c
+from pypy.rlib import jit
+from pypy.rlib.objectmodel import we_are_translated
 from pypy.rpython.lltypesystem import lltype, llmemory
 
+DEBUG = False
+
 
 class StackletThread(object):
 
+    @jit.dont_look_inside
     def __init__(self, config):
-        self._gcrootfinder = _getgcrootfinder(config)
+        self._gcrootfinder = _getgcrootfinder(config, we_are_translated())
         self._thrd = _c.newthread()
         if not self._thrd:
             raise MemoryError
         self._thrd_deleter = StackletThreadDeleter(self._thrd)
+        if DEBUG:
+            assert debug.sthread is None, "multithread debug support missing"
+            debug.sthread = self
 
+    @jit.dont_look_inside
     def new(self, callback, arg=llmemory.NULL):
-        return self._gcrootfinder.new(self, callback, arg)
+        if DEBUG:
+            callback = _debug_wrapper(callback)
+        h = self._gcrootfinder.new(self, callback, arg)
+        if DEBUG:
+            debug.add(h)
+        return h
     new._annspecialcase_ = 'specialize:arg(1)'
 
+    @jit.dont_look_inside
     def switch(self, stacklet):
-        return self._gcrootfinder.switch(self, stacklet)
+        if DEBUG:
+            debug.remove(stacklet)
+        h = self._gcrootfinder.switch(self, stacklet)
+        if DEBUG:
+            debug.add(h)
+        return h
 
+    @jit.dont_look_inside
     def destroy(self, stacklet):
+        if DEBUG:
+            debug.remove(stacklet)
         self._gcrootfinder.destroy(self, stacklet)
 
     def is_empty_handle(self, stacklet):
@@ -45,7 +68,13 @@
 
 # ____________________________________________________________
 
-def _getgcrootfinder(config):
+def _getgcrootfinder(config, translated):
+    if translated:
+        assert config is not None, ("you have to pass a valid config, "
+                                    "e.g. from 'driver.config'")
+    if config is not None:
+        assert config.translation.continuation, (
+            "stacklet: you have to translate with --continuation")
     if (config is None or
         config.translation.gc in ('ref', 'boehm', 'none')):   # for tests
         gcrootfinder = 'n/a'
@@ -56,3 +85,42 @@
                         None, None, ['__doc__'])
     return module.gcrootfinder
 _getgcrootfinder._annspecialcase_ = 'specialize:memo'
+
+
+class StackletDebugError(Exception):
+    pass
+
+class Debug(object):
+    def __init__(self):
+        self.sthread = None
+        self.active = []
+    def _freeze_(self):
+        self.__init__()
+        return False
+    def add(self, h):
+        if not self.sthread.is_empty_handle(h):
+            if h == self.sthread.get_null_handle():
+                raise StackletDebugError("unexpected null handle")
+            self.active.append(h)
+    def remove(self, h):
+        try:
+            i = self.active.index(h)
+        except ValueError:
+            if self.sthread.is_empty_handle(h):
+                msg = "empty stacklet handle"
+            elif h == self.sthread.get_null_handle():
+                msg = "unexpected null handle"
+            else:
+                msg = "double usage of handle %r" % (h,)
+            raise StackletDebugError(msg)
+        del self.active[i]
+debug = Debug()
+
+def _debug_wrapper(callback):
+    def wrapper(h, arg):
+        debug.add(h)
+        h = callback(h, arg)
+        debug.remove(h)
+        return h
+    return wrapper
+_debug_wrapper._annspecialcase_ = 'specialize:memo'
diff --git a/pypy/rlib/rstruct/formatiterator.py b/pypy/rlib/rstruct/formatiterator.py
--- a/pypy/rlib/rstruct/formatiterator.py
+++ b/pypy/rlib/rstruct/formatiterator.py
@@ -1,10 +1,10 @@
-
-from pypy.rlib.rstruct.nativefmttable import native_is_bigendian
-from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib import jit
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.rstruct.error import StructError
+from pypy.rlib.rstruct.nativefmttable import native_is_bigendian, native_fmttable
 from pypy.rlib.rstruct.standardfmttable import standard_fmttable
-from pypy.rlib.rstruct.nativefmttable import native_fmttable
+from pypy.rlib.unroll import unrolling_iterable
+
 
 class FormatIterator(object):
     """
@@ -16,6 +16,7 @@
     _mixin_ = True
     _operate_is_specialized_ = False
 
+    @jit.look_inside_iff(lambda self, fmt: jit.isconstant(fmt))
     def interpret(self, fmt):
         # decode the byte order, size and alignment based on the 1st char
         table = unroll_native_fmtdescs
diff --git a/pypy/rlib/test/test_rstacklet.py b/pypy/rlib/test/test_rstacklet.py
--- a/pypy/rlib/test/test_rstacklet.py
+++ b/pypy/rlib/test/test_rstacklet.py
@@ -264,6 +264,10 @@
     gcrootfinder = 'shadowstack'
 
 
+def test_dont_keep_debug_to_true():
+    assert not rstacklet.DEBUG
+
+
 def target(*args):
     return entry_point, None
 
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -140,7 +140,8 @@
                 if isinstance(FIELDTYPE, lltype.Ptr):
                     cls = get_ctypes_type(FIELDTYPE, delayed_builders)
                 else:
-                    cls = get_ctypes_type(FIELDTYPE)
+                    cls = get_ctypes_type(FIELDTYPE, delayed_builders,
+                                          cannot_delay=True)
             fields.append((fieldname, cls))
         CStruct._fields_ = fields
 
@@ -169,7 +170,7 @@
         CStruct._normalized_ctype = get_ctypes_type(S)
         builder()    # no need to be lazy here
     else:
-        delayed_builders.append(builder)
+        delayed_builders.append((S, builder))
     return CStruct
 
 def build_ctypes_array(A, delayed_builders, max_n=0):
@@ -252,11 +253,19 @@
     else:
         return get_ctypes_type(FIELDTYPE)
 
-def get_ctypes_type(T, delayed_builders=None):
+def get_ctypes_type(T, delayed_builders=None, cannot_delay=False):
+    # Check delayed builders
+    if cannot_delay and delayed_builders:
+        for T2, builder in delayed_builders:
+            if T2 is T:
+                builder()
+                delayed_builders.remove((T2, builder))
+                return _ctypes_cache[T]
+
     try:
         return _ctypes_cache[T]
     except KeyError:
-        toplevel = delayed_builders is None
+        toplevel = cannot_delay or delayed_builders is None
         if toplevel:
             delayed_builders = []
         cls = build_new_ctypes_type(T, delayed_builders)
@@ -306,9 +315,11 @@
 
 def complete_builders(delayed_builders):
     while delayed_builders:
-        delayed_builders.pop()()
+        T, builder = delayed_builders[0]
+        builder()
+        delayed_builders.pop(0)
 
-def convert_struct(container, cstruct=None):
+def convert_struct(container, cstruct=None, delayed_converters=None):
     STRUCT = container._TYPE
     if cstruct is None:
         # if 'container' is an inlined substructure, convert the whole
@@ -325,23 +336,38 @@
             n = None
         cstruct = cls._malloc(n)
     add_storage(container, _struct_mixin, ctypes.pointer(cstruct))
+
+    if delayed_converters is None:
+        delayed_converters_was_None = True
+        delayed_converters = []
+    else:
+        delayed_converters_was_None = False
     for field_name in STRUCT._names:
         FIELDTYPE = getattr(STRUCT, field_name)
         field_value = getattr(container, field_name)
         if not isinstance(FIELDTYPE, lltype.ContainerType):
             # regular field
             if FIELDTYPE != lltype.Void:
-                setattr(cstruct, field_name, lltype2ctypes(field_value))
+                def convert(field_name=field_name, field_value=field_value):
+                    setattr(cstruct, field_name, lltype2ctypes(field_value))
+                if isinstance(FIELDTYPE, lltype.Ptr):
+                    delayed_converters.append(convert)
+                else:
+                    convert()
         else:
             # inlined substructure/subarray
             if isinstance(FIELDTYPE, lltype.Struct):
                 csubstruct = getattr(cstruct, field_name)
-                convert_struct(field_value, csubstruct)
+                convert_struct(field_value, csubstruct,
+                               delayed_converters=delayed_converters)
             elif field_name == STRUCT._arrayfld:    # inlined var-sized part
                 csubarray = getattr(cstruct, field_name)
                 convert_array(field_value, csubarray)
             else:
                 raise NotImplementedError('inlined field', FIELDTYPE)
+    if delayed_converters_was_None:
+        for converter in delayed_converters:
+            converter()
     remove_regular_struct_content(container)
 
 def remove_regular_struct_content(container):
@@ -358,7 +384,8 @@
         # bigger structure at once
         parent, parentindex = lltype.parentlink(container)
         if parent is not None:
-            convert_struct(parent)
+            if not isinstance(parent, _parentable_mixin):
+                convert_struct(parent)
             return
         # regular case: allocate a new ctypes array of the proper type
         cls = get_ctypes_type(ARRAY)
diff --git a/pypy/rpython/lltypesystem/ll_str.py b/pypy/rpython/lltypesystem/ll_str.py
--- a/pypy/rpython/lltypesystem/ll_str.py
+++ b/pypy/rpython/lltypesystem/ll_str.py
@@ -16,34 +16,31 @@
         return r_uint(i)
 
 @jit.elidable
-def ll_int2dec(i):
+def ll_int2dec(val):
     from pypy.rpython.lltypesystem.rstr import mallocstr
-    temp = malloc(CHAR_ARRAY, 20)
+
+    sign = int(val < 0)
+    if sign:
+        val = ll_unsigned(-val)
+    else:
+        val = ll_unsigned(val)
     len = 0
-    sign = 0
-    if i < 0:
-        sign = 1
-        i = ll_unsigned(-i)
-    else:
-        i = ll_unsigned(i)
-    if i == 0:
-        len = 1
-        temp[0] = '0'
-    else:
-        while i:
-            temp[len] = chr(i%10+ord('0'))
-            i //= 10
-            len += 1
-    len += sign
-    result = mallocstr(len)
-    result.hash = 0
+    i = val
+    while i:
+        len += 1
+        i //= 10
+
+    total_len = sign + len + int(val == 0)
+    result = mallocstr(total_len)
     if sign:
         result.chars[0] = '-'
-        j = 1
-    else:
-        j = 0
+    elif val == 0:
+        result.chars[0] = '0'
+
+    j = 0
     while j < len:
-        result.chars[j] = temp[len-j-1]
+        result.chars[total_len - j - 1] = chr(val % 10 + ord('0'))
+        val //= 10
         j += 1
     return result
 
diff --git a/pypy/rpython/lltypesystem/rbuilder.py b/pypy/rpython/lltypesystem/rbuilder.py
--- a/pypy/rpython/lltypesystem/rbuilder.py
+++ b/pypy/rpython/lltypesystem/rbuilder.py
@@ -1,4 +1,4 @@
-from pypy.rlib import rgc
+from pypy.rlib import rgc, jit
 from pypy.rlib.objectmodel import enforceargs
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.annlowlevel import llstr
@@ -95,6 +95,7 @@
         ll_builder.used = needed + used
 
     @staticmethod
+    @jit.look_inside_iff(lambda ll_builder, char, times: jit.isconstant(times) and times <= 4)
     def ll_append_multiple_char(ll_builder, char, times):
         used = ll_builder.used
         if times + used > ll_builder.allocated:
diff --git a/pypy/rpython/lltypesystem/rdict.py b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -20,7 +20,7 @@
 #  DICTVALUE types.
 #
 #  XXX for immutable dicts, the array should be inlined and
-#      num_pristine_entries and everused are not needed.
+#      resize_counter and everused are not needed.
 #
 #    struct dictentry {
 #        DICTKEY key;
@@ -32,7 +32,7 @@
 #
 #    struct dicttable {
 #        int num_items;
-#        int num_pristine_entries;  # never used entries
+#        int resize_counter;
 #        Array *entries;
 #        (Function DICTKEY, DICTKEY -> bool) *fnkeyeq;
 #        (Function DICTKEY -> int) *fnkeyhash;
@@ -176,7 +176,7 @@
             self.DICTENTRYARRAY = lltype.GcArray(self.DICTENTRY,
                                                  adtmeths=entrymeths)
             fields =          [ ("num_items", lltype.Signed),
-                                ("num_pristine_entries", lltype.Signed),
+                                ("resize_counter", lltype.Signed),
                                 ("entries", lltype.Ptr(self.DICTENTRYARRAY)) ]
             if self.custom_eq_hash:
                 self.r_rdict_eqfn, self.r_rdict_hashfn = self._custom_eq_hash_repr()
@@ -465,8 +465,8 @@
     d.num_items += 1
     if not everused:
         if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
-        d.num_pristine_entries -= 1
-        if d.num_pristine_entries <= len(d.entries) / 3:
+        d.resize_counter -= 3
+        if d.resize_counter <= 0:
             ll_dict_resize(d)
 
 def ll_dict_insertclean(d, key, value, hash):
@@ -484,7 +484,7 @@
     if hasattr(ENTRY, 'f_valid'):    entry.f_valid = True
     if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
     d.num_items += 1
-    d.num_pristine_entries -= 1
+    d.resize_counter -= 3
 
 def ll_dict_delitem(d, key):
     i = ll_dict_lookup(d, key, d.keyhash(key))
@@ -518,7 +518,7 @@
         new_size /= 2
     d.entries = lltype.typeOf(old_entries).TO.allocate(new_size)
     d.num_items = 0
-    d.num_pristine_entries = new_size
+    d.resize_counter = new_size * 2
     i = 0
     while i < old_size:
         if old_entries.valid(i):
@@ -619,7 +619,7 @@
     d = DICT.allocate()
     d.entries = DICT.entries.TO.allocate(DICT_INITSIZE)
     d.num_items = 0
-    d.num_pristine_entries = DICT_INITSIZE
+    d.resize_counter = DICT_INITSIZE * 2
     return d
 ll_newdict.oopspec = 'newdict()'
 
@@ -631,7 +631,7 @@
     d = DICT.allocate()
     d.entries = DICT.entries.TO.allocate(n)
     d.num_items = 0
-    d.num_pristine_entries = n
+    d.resize_counter = n * 2
     return d
 ll_newdict_size.oopspec = 'newdict()'
 
@@ -749,7 +749,7 @@
     d = DICT.allocate()
     d.entries = DICT.entries.TO.allocate(dictsize)
     d.num_items = dict.num_items
-    d.num_pristine_entries = dict.num_pristine_entries
+    d.resize_counter = dict.resize_counter
     if hasattr(DICT, 'fnkeyeq'):   d.fnkeyeq   = dict.fnkeyeq
     if hasattr(DICT, 'fnkeyhash'): d.fnkeyhash = dict.fnkeyhash
     i = 0
@@ -767,12 +767,13 @@
 ll_copy.oopspec = 'dict.copy(dict)'
 
 def ll_clear(d):
-    if len(d.entries) == d.num_pristine_entries == DICT_INITSIZE:
+    if (len(d.entries) == DICT_INITSIZE and
+        d.resize_counter == DICT_INITSIZE * 2):
         return
     old_entries = d.entries
     d.entries = lltype.typeOf(old_entries).TO.allocate(DICT_INITSIZE)
     d.num_items = 0
-    d.num_pristine_entries = DICT_INITSIZE
+    d.resize_counter = DICT_INITSIZE * 2
     old_entries.delete()
 ll_clear.oopspec = 'dict.clear(d)'
 
diff --git a/pypy/rpython/lltypesystem/rlist.py b/pypy/rpython/lltypesystem/rlist.py
--- a/pypy/rpython/lltypesystem/rlist.py
+++ b/pypy/rpython/lltypesystem/rlist.py
@@ -9,7 +9,7 @@
      GcStruct, Void, Signed, malloc, typeOf, nullptr, typeMethod
 from pypy.rpython.lltypesystem import rstr
 from pypy.rlib.debug import ll_assert
-from pypy.rlib import rgc
+from pypy.rlib import rgc, jit
 
 # ____________________________________________________________
 #
@@ -225,20 +225,22 @@
     else:
         _ll_list_resize_really(l, newsize)
 
+ at jit.look_inside_iff(lambda l, newsize: jit.isconstant(len(l.items)) and jit.isconstant(newsize))
+ at jit.oopspec("list._resize_ge(l, newsize)")
 def _ll_list_resize_ge(l, newsize):
     if len(l.items) >= newsize:
         l.length = newsize
     else:
         _ll_list_resize_really(l, newsize)
-_ll_list_resize_ge.oopspec = 'list._resize_ge(l, newsize)'
 
+ at jit.look_inside_iff(lambda l, newsize: jit.isconstant(len(l.items)) and jit.isconstant(newsize))
+ at jit.oopspec("list._resize_le(l, newsize)")
 def _ll_list_resize_le(l, newsize):
     if newsize >= (len(l.items) >> 1) - 5:
         l.length = newsize
     else:
         _ll_list_resize_really(l, newsize)
 
-
 def ll_append_noresize(l, newitem):
     length = l.length
     l.length = length + 1
diff --git a/pypy/rpython/lltypesystem/rpbc.py b/pypy/rpython/lltypesystem/rpbc.py
--- a/pypy/rpython/lltypesystem/rpbc.py
+++ b/pypy/rpython/lltypesystem/rpbc.py
@@ -230,7 +230,8 @@
         args = bk.build_args(opname, hop.args_s[1:])
         s_pbc = hop.args_s[0]   # possibly more precise than self.s_pbc
         descs = list(s_pbc.descriptions)
-        shape, index = description.FunctionDesc.variant_for_call_site(bk, self.callfamily, descs, args)
+        vfcs = description.FunctionDesc.variant_for_call_site
+        shape, index = vfcs(bk, self.callfamily, descs, args, hop.spaceop)
         row_of_graphs = self.callfamily.calltables[shape][index]
         anygraph = row_of_graphs.itervalues().next()  # pick any witness
         vlist = [hop.inputarg(self, arg=0)]
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -5,7 +5,7 @@
 from pypy.rlib.objectmodel import _hash_string, enforceargs
 from pypy.rlib.objectmodel import keepalive_until_here
 from pypy.rlib.debug import ll_assert
-from pypy.rlib.jit import elidable, we_are_jitted, dont_look_inside
+from pypy.rlib import jit
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.robject import PyObjRepr, pyobj_repr
 from pypy.rpython.rmodel import inputconst, IntegerRepr
@@ -58,8 +58,7 @@
                 llmemory.itemoffsetof(TP.chars, 0) +
                 llmemory.sizeof(CHAR_TP) * item)
 
-    # It'd be nice to be able to look inside this function.
-    @dont_look_inside
+    @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
     @enforceargs(None, None, int, int, int)
     def copy_string_contents(src, dst, srcstart, dststart, length):
         assert srcstart >= 0
@@ -71,8 +70,6 @@
         keepalive_until_here(src)
         keepalive_until_here(dst)
     copy_string_contents._always_inline_ = True
-    #copy_string_contents.oopspec = (
-    #    '%s.copy_contents(src, dst, srcstart, dststart, length)' % name)
     return func_with_new_name(copy_string_contents, 'copy_%s_contents' % name)
 
 copy_string_contents = _new_copy_contents_fun(STR, Char, 'string')
@@ -147,7 +144,7 @@
         self.ll = LLHelpers
         self.malloc = mallocunicode
 
-    @elidable
+    @jit.elidable
     def ll_str(self, s):
         # XXX crazy that this is here, but I don't want to break
         #     rmodel logic
@@ -162,7 +159,7 @@
             result.chars[i] = cast_primitive(Char, c)
         return result
 
-    @elidable
+    @jit.elidable
     def ll_encode_latin1(self, s):
         length = len(s.chars)
         result = mallocstr(length)
@@ -261,7 +258,7 @@
 
 
 class LLHelpers(AbstractLLHelpers):
-    @elidable
+    @jit.elidable
     def ll_str_mul(s, times):
         if times < 0:
             times = 0
@@ -283,7 +280,7 @@
             i += j
         return newstr
 
-    @elidable
+    @jit.elidable
     def ll_char_mul(ch, times):
         if typeOf(ch) is Char:
             malloc = mallocstr
@@ -328,7 +325,7 @@
         return s
     ll_str2unicode.oopspec = 'str.str2unicode(str)'
 
-    @elidable
+    @jit.elidable
     def ll_strhash(s):
         # unlike CPython, there is no reason to avoid to return -1
         # but our malloc initializes the memory to zero, so we use zero as the
@@ -344,7 +341,7 @@
     def ll_strfasthash(s):
         return s.hash     # assumes that the hash is already computed
 
-    @elidable
+    @jit.elidable
     def ll_strconcat(s1, s2):
         len1 = len(s1.chars)
         len2 = len(s2.chars)
@@ -356,7 +353,7 @@
         return newstr
     ll_strconcat.oopspec = 'stroruni.concat(s1, s2)'
 
-    @elidable
+    @jit.elidable
     def ll_strip(s, ch, left, right):
         s_len = len(s.chars)
         if s_len == 0:
@@ -374,7 +371,7 @@
         s.copy_contents(s, result, lpos, 0, r_len)
         return result
 
-    @elidable
+    @jit.elidable
     def ll_upper(s):
         s_chars = s.chars
         s_len = len(s_chars)
@@ -391,7 +388,7 @@
             i += 1
         return result
 
-    @elidable
+    @jit.elidable
     def ll_lower(s):
         s_chars = s.chars
         s_len = len(s_chars)
@@ -441,7 +438,7 @@
             i += 1
         return result
 
-    @elidable
+    @jit.elidable
     def ll_strcmp(s1, s2):
         if not s1 and not s2:
             return True
@@ -464,7 +461,7 @@
             i += 1
         return len1 - len2
 
-    @elidable
+    @jit.elidable
     def ll_streq(s1, s2):
         if s1 == s2:       # also if both are NULLs
             return True
@@ -484,7 +481,7 @@
         return True
     ll_streq.oopspec = 'stroruni.equal(s1, s2)'
 
-    @elidable
+    @jit.elidable
     def ll_startswith(s1, s2):
         len1 = len(s1.chars)
         len2 = len(s2.chars)
@@ -505,7 +502,7 @@
             return False
         return s.chars[0] == ch
 
-    @elidable
+    @jit.elidable
     def ll_endswith(s1, s2):
         len1 = len(s1.chars)
         len2 = len(s2.chars)
@@ -527,7 +524,7 @@
             return False
         return s.chars[len(s.chars) - 1] == ch
 
-    @elidable
+    @jit.elidable
     def ll_find_char(s, ch, start, end):
         i = start
         if end > len(s.chars):
@@ -539,7 +536,7 @@
         return -1
     ll_find_char._annenforceargs_ = [None, None, int, int]
 
-    @elidable
+    @jit.elidable
     def ll_rfind_char(s, ch, start, end):
         if end > len(s.chars):
             end = len(s.chars)
@@ -550,7 +547,7 @@
                 return i
         return -1
 
-    @elidable
+    @jit.elidable
     def ll_count_char(s, ch, start, end):
         count = 0
         i = start
@@ -618,7 +615,7 @@
             res = 0
         return res
 
-    @elidable
+    @jit.elidable
     def ll_search(s1, s2, start, end, mode):
         count = 0
         n = end - start
@@ -697,7 +694,13 @@
             return -1
         return count
 
+    @jit.look_inside_iff(lambda length, items: jit.isconstant(length) and length <= 2)
+    @enforceargs(int, None)
     def ll_join_strs(length, items):
+        # Special case for length 1 items, helps both the JIT and other code
+        if length == 1:
+            return items[0]
+
         num_items = length
         itemslen = 0
         i = 0
@@ -724,8 +727,8 @@
             res_index += item_len
             i += 1
         return result
-    ll_join_strs._annenforceargs_ = [int, None]
 
+    @jit.look_inside_iff(lambda length, chars, RES: jit.isconstant(length) and jit.isvirtual(chars))
     def ll_join_chars(length, chars, RES):
         # no need to optimize this, will be replaced by string builder
         # at some point soon
@@ -744,7 +747,7 @@
             i += 1
         return result
 
-    @elidable
+    @jit.elidable
     def _ll_stringslice(s1, start, stop):
         lgt = stop - start
         assert start >= 0
@@ -759,7 +762,7 @@
         return LLHelpers._ll_stringslice(s1, start, len(s1.chars))
 
     def ll_stringslice_startstop(s1, start, stop):
-        if we_are_jitted():
+        if jit.we_are_jitted():
             if stop > len(s1.chars):
                 stop = len(s1.chars)
         else:
@@ -842,7 +845,7 @@
         item.copy_contents(s, item, j, 0, i - j)
         return res
 
-    @elidable
+    @jit.elidable
     def ll_replace_chr_chr(s, c1, c2):
         length = len(s.chars)
         newstr = s.malloc(length)
@@ -857,7 +860,7 @@
             j += 1
         return newstr
 
-    @elidable
+    @jit.elidable
     def ll_contains(s, c):
         chars = s.chars
         strlen = len(chars)
@@ -868,7 +871,7 @@
             i += 1
         return False
 
-    @elidable
+    @jit.elidable
     def ll_int(s, base):
         if not 2 <= base <= 36:
             raise ValueError
diff --git a/pypy/rpython/lltypesystem/test/test_ll2ctypes.py b/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
--- a/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
@@ -82,7 +82,6 @@
         assert not ALLOCATED     # detects memory leaks in the test
 
     def test_get_pointer(self):
-        py.test.skip("FIXME")
         # Equivalent of the C code::
         #     struct S1 { struct S2 *ptr; struct S2 buf; };
         #     struct S1 s1;
diff --git a/pypy/rpython/memory/gctransform/asmgcroot.py b/pypy/rpython/memory/gctransform/asmgcroot.py
--- a/pypy/rpython/memory/gctransform/asmgcroot.py
+++ b/pypy/rpython/memory/gctransform/asmgcroot.py
@@ -636,7 +636,8 @@
                                       ASM_FRAMEDATA_HEAD_PTR],
                                      lltype.Signed,
                                      sandboxsafe=True,
-                                     _nowrapper=True)
+                                     _nowrapper=True,
+                                     random_effects_on_gcobjs=True)
 c_asm_stackwalk = Constant(pypy_asm_stackwalk,
                            lltype.typeOf(pypy_asm_stackwalk))
 
@@ -662,4 +663,5 @@
                          QSORT_CALLBACK_PTR],
                         lltype.Void,
                         sandboxsafe=True,
+                        random_effects_on_gcobjs=False,  # but has a callback
                         _nowrapper=True)
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -41,7 +41,7 @@
                                                               seen)
     def analyze_external_call(self, op, seen=None):
         funcobj = op.args[0].value._obj
-        if funcobj._name == 'pypy_asm_stackwalk':
+        if getattr(funcobj, 'random_effects_on_gcobjs', False):
             return True
         return graphanalyze.GraphAnalyzer.analyze_external_call(self, op,
                                                                 seen)
@@ -626,8 +626,8 @@
         func = getattr(graph, 'func', None)
         if func and getattr(func, '_gc_no_collect_', False):
             if self.collect_analyzer.analyze_direct_call(graph):
-                raise Exception("no_collect function can trigger collection: %s"
-                                % func.__name__)
+                raise Exception("'no_collect' function can trigger collection:"
+                                " %s" % func)
             
         if self.write_barrier_ptr:
             self.clean_sets = (
@@ -812,6 +812,7 @@
                   resultvar=op.result)
 
     def gct_gc_shadowstackref_destroy(self, hop):
+        op = hop.spaceop
         hop.genop("direct_call",
                   [self.root_walker.gc_shadowstackref_destroy_ptr, op.args[0]])
 
diff --git a/pypy/rpython/memory/gctransform/test/test_framework.py b/pypy/rpython/memory/gctransform/test/test_framework.py
--- a/pypy/rpython/memory/gctransform/test/test_framework.py
+++ b/pypy/rpython/memory/gctransform/test/test_framework.py
@@ -139,7 +139,8 @@
     cbuild = CStandaloneBuilder(t, entrypoint, t.config,
                                 gcpolicy=FrameworkGcPolicy2)
     f = py.test.raises(Exception, cbuild.generate_graphs_for_llinterp)
-    assert str(f.value) == 'no_collect function can trigger collection: g'
+    expected = "'no_collect' function can trigger collection: <function g at "
+    assert str(f.value).startswith(expected)
 
 class WriteBarrierTransformer(FrameworkGCTransformer):
     clean_sets = {}
diff --git a/pypy/rpython/memory/lldict.py b/pypy/rpython/memory/lldict.py
--- a/pypy/rpython/memory/lldict.py
+++ b/pypy/rpython/memory/lldict.py
@@ -83,7 +83,7 @@
                        })
 DICT = lltype.Struct('DICT', ('entries', lltype.Ptr(ENTRIES)),
                              ('num_items', lltype.Signed),
-                             ('num_pristine_entries', lltype.Signed),
+                             ('resize_counter', lltype.Signed),
                      adtmeths = {
                          'allocate': dict_allocate,
                          'delete': dict_delete,
diff --git a/pypy/rpython/module/ll_os_stat.py b/pypy/rpython/module/ll_os_stat.py
--- a/pypy/rpython/module/ll_os_stat.py
+++ b/pypy/rpython/module/ll_os_stat.py
@@ -173,7 +173,8 @@
         _compilation_info_ = compilation_info
         STAT_STRUCT = platform.Struct('struct %s' % _name_struct_stat, LL_STAT_FIELDS)
     try:
-        config = platform.configure(CConfig)
+        config = platform.configure(CConfig, ignore_errors=
+                                    try_to_add is not None)
     except platform.CompilationError:
         if try_to_add:
             return    # failed to add this field, give up
diff --git a/pypy/rpython/ootypesystem/rdict.py b/pypy/rpython/ootypesystem/rdict.py
--- a/pypy/rpython/ootypesystem/rdict.py
+++ b/pypy/rpython/ootypesystem/rdict.py
@@ -247,7 +247,7 @@
         fn = None
         v_obj = hop.inputarg(r_func, arg=arg)
         s_pbc_fn = hop.args_s[arg]
-        methodname = r_func._get_method_name("simple_call", s_pbc_fn, params_annotation)
+        methodname = r_func._get_method_name("simple_call", s_pbc_fn, params_annotation, hop)
     elif isinstance(r_func, MethodOfFrozenPBCRepr):
         r_impl, nimplicitarg = r_func.get_r_implfunc()
         fn = r_impl.get_unique_llfn().value
diff --git a/pypy/rpython/ootypesystem/rpbc.py b/pypy/rpython/ootypesystem/rpbc.py
--- a/pypy/rpython/ootypesystem/rpbc.py
+++ b/pypy/rpython/ootypesystem/rpbc.py
@@ -130,14 +130,14 @@
     def call(self, opname, hop):
         s_pbc = hop.args_s[0]   # possibly more precise than self.s_pbc        
         args_s = hop.args_s[1:]
-        shape, index, callfamily = self._get_shape_index_callfamily(opname, s_pbc, args_s)
+        shape, index, callfamily = self._get_shape_index_callfamily(opname, s_pbc, args_s, hop)
         row_of_graphs = callfamily.calltables[shape][index]
         anygraph = row_of_graphs.itervalues().next()  # pick any witness
         hop2 = self.add_instance_arg_to_hop(hop, opname == "call_args")
         vlist = callparse.callparse(self.rtyper, anygraph, hop2, opname,
                                     r_self = self.r_im_self)
         rresult = callparse.getrresult(self.rtyper, anygraph)
-        derived_mangled = self._get_method_name(opname, s_pbc, args_s)
+        derived_mangled = self._get_method_name(opname, s_pbc, args_s, hop)
         cname = hop.inputconst(ootype.Void, derived_mangled)
         hop.exception_is_here()
         # sanity check: make sure that INSTANCE has the method
@@ -151,18 +151,18 @@
         else:
             return hop.llops.convertvar(v, rresult, hop.r_result)
 
-    def _get_shape_index_callfamily(self, opname, s_pbc, args_s):
+    def _get_shape_index_callfamily(self, opname, s_pbc, args_s, hop):
         bk = self.rtyper.annotator.bookkeeper
         args = bk.build_args(opname, args_s)
         args = args.prepend(self.s_im_self)
         descs = [desc.funcdesc for desc in s_pbc.descriptions]
         callfamily = descs[0].getcallfamily()
         shape, index = description.FunctionDesc.variant_for_call_site(
-                bk, callfamily, descs, args)
+                bk, callfamily, descs, args, hop.spaceop)
         return shape, index, callfamily
 
-    def _get_method_name(self, opname, s_pbc, args_s):
-        shape, index, callfamily = self._get_shape_index_callfamily(opname, s_pbc, args_s)
+    def _get_method_name(self, opname, s_pbc, args_s, hop):
+        shape, index, callfamily = self._get_shape_index_callfamily(opname, s_pbc, args_s, hop)
         mangled = mangle(self.methodname, self.rtyper.getconfig())
         row = self.concretetable[shape, index]
         derived_mangled = row_method_name(mangled, row.attrname)
diff --git a/pypy/rpython/rlist.py b/pypy/rpython/rlist.py
--- a/pypy/rpython/rlist.py
+++ b/pypy/rpython/rlist.py
@@ -11,7 +11,7 @@
 from pypy.rlib.debug import ll_assert
 from pypy.rlib.rarithmetic import ovfcheck, widen, r_uint, intmask
 from pypy.rpython.annlowlevel import ADTInterface
-from pypy.rlib import rgc
+from pypy.rlib import rgc, jit
 
 ADTIFixedList = ADTInterface(None, {
     'll_newlist':      (['SELF', Signed        ], 'self'),
@@ -116,7 +116,7 @@
         v_lst = hop.inputarg(self, 0)
         cRESLIST = hop.inputconst(Void, hop.r_result.LIST)
         return hop.gendirectcall(ll_copy, cRESLIST, v_lst)
-    
+
     def rtype_len(self, hop):
         v_lst, = hop.inputargs(self)
         if hop.args_s[0].listdef.listitem.resized:
@@ -132,7 +132,7 @@
         else:
             ll_func = ll_list_is_true_foldable
         return hop.gendirectcall(ll_func, v_lst)
-    
+
     def rtype_method_reverse(self, hop):
         v_lst, = hop.inputargs(self)
         hop.exception_cannot_occur()
@@ -273,7 +273,7 @@
         return pair(r_lst, r_int).rtype_getitem(hop, checkidx=True)
 
     rtype_getitem_idx_key = rtype_getitem_idx
-    
+
     def rtype_setitem((r_lst, r_int), hop):
         if hop.has_implicit_exception(IndexError):
             spec = dum_checkidx
@@ -331,7 +331,7 @@
 ##            return hop.gendirectcall(ll_both_none, v_lst1, v_lst2)
 
 ##        return pairtype(Repr, Repr).rtype_is_(pair(r_lst1, r_lst2), hop)
- 
+
     def rtype_eq((r_lst1, r_lst2), hop):
         assert r_lst1.item_repr == r_lst2.item_repr
         v_lst1, v_lst2 = hop.inputargs(r_lst1, r_lst2)
@@ -499,7 +499,7 @@
     else:
         check = item
     if (not malloc_zero_filled) or check: # as long as malloc it is known to zero the allocated memory avoid zeroing twice
-    
+
         i = 0
         while i < count:
             l.ll_setitem_fast(i, item)
@@ -633,7 +633,6 @@
         l.ll_setitem_fast(index, null)
     l._ll_resize_le(newlength)
     return res
-ll_pop_default.oopspec = 'list.pop(l)'
 
 def ll_pop_zero(func, l):
     length = l.ll_length()
@@ -913,6 +912,8 @@
     return l
 # no oopspec -- the function is inlined by the JIT
 
+ at jit.look_inside_iff(lambda l, start: jit.isconstant(start) and jit.isvirtual(l))
+ at jit.oopspec('list.delslice_startonly(l, start)')
 def ll_listdelslice_startonly(l, start):
     ll_assert(start >= 0, "del l[start:] with unexpectedly negative start")
     ll_assert(start <= l.ll_length(), "del l[start:] with start > len(l)")
@@ -924,7 +925,6 @@
             l.ll_setitem_fast(j, null)
             j -= 1
     l._ll_resize_le(newlength)
-ll_listdelslice_startonly.oopspec = 'list.delslice_startonly(l, start)'