[pypy-commit] pypy jit-simplify-backendintf: hg merge jit-targets

Sun Dec 11 17:33:47 CET 2011

Author: Armin Rigo <arigo at tunes.org>
Branch: jit-simplify-backendintf
Changeset: r50387:11ca1186b82f
Date: 2011-12-11 17:33 +0100
http://bitbucket.org/pypy/pypy/changeset/11ca1186b82f/

Log:	hg merge jit-targets

diff too long, truncating to 10000 out of 40345 lines

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -1,3 +1,4 @@
 b590cf6de4190623aad9aa698694c22e614d67b9 release-1.5
 b48df0bf4e75b81d98f19ce89d4a7dc3e1dab5e5 benchmarked
 d8ac7d23d3ec5f9a0fa1264972f74a010dbfd07f release-1.6
+ff4af8f318821f7f5ca998613a60fca09aa137da release-1.7
diff --git a/lib-python/2.7/test/test_os.py b/lib-python/2.7/test/test_os.py
--- a/lib-python/2.7/test/test_os.py
+++ b/lib-python/2.7/test/test_os.py
@@ -74,7 +74,8 @@
         self.assertFalse(os.path.exists(name),
                     "file already exists for temporary file")
         # make sure we can create the file
-        open(name, "w")
+        f = open(name, "w")
+        f.close()
         self.files.append(name)
 
     def test_tempnam(self):
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -201,7 +201,7 @@
     RegrTest('test_difflib.py'),
     RegrTest('test_dircache.py', core=True),
     RegrTest('test_dis.py'),
-    RegrTest('test_distutils.py'),
+    RegrTest('test_distutils.py', skip=True),
     RegrTest('test_dl.py', skip=True),
     RegrTest('test_doctest.py', usemodules="thread"),
     RegrTest('test_doctest2.py'),
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -351,7 +351,7 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            self._handle = _ffi.CDLL(name)
+            self._handle = _ffi.CDLL(name, mode)
         else:
             self._handle = handle
 
diff --git a/lib-python/modified-2.7/ctypes/test/test_simplesubclasses.py b/lib-python/modified-2.7/ctypes/test/test_simplesubclasses.py
--- a/lib-python/modified-2.7/ctypes/test/test_simplesubclasses.py
+++ b/lib-python/modified-2.7/ctypes/test/test_simplesubclasses.py
@@ -1,6 +1,5 @@
 import unittest
 from ctypes import *
-from ctypes.test import xfail
 
 class MyInt(c_int):
     def __cmp__(self, other):
@@ -27,7 +26,6 @@
         self.assertEqual(None, cb())
 
 
-    @xfail
     def test_int_callback(self):
         args = []
         def func(arg):
diff --git a/lib-python/modified-2.7/heapq.py b/lib-python/modified-2.7/heapq.py
new file mode 100644
--- /dev/null
+++ b/lib-python/modified-2.7/heapq.py
@@ -0,0 +1,442 @@
+# -*- coding: latin-1 -*-
+
+"""Heap queue algorithm (a.k.a. priority queue).
+
+Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
+all k, counting elements from 0.  For the sake of comparison,
+non-existing elements are considered to be infinite.  The interesting
+property of a heap is that a[0] is always its smallest element.
+
+Usage:
+
+heap = []            # creates an empty heap
+heappush(heap, item) # pushes a new item on the heap
+item = heappop(heap) # pops the smallest item from the heap
+item = heap[0]       # smallest item on the heap without popping it
+heapify(x)           # transforms list into a heap, in-place, in linear time
+item = heapreplace(heap, item) # pops and returns smallest item, and adds
+                               # new item; the heap size is unchanged
+
+Our API differs from textbook heap algorithms as follows:
+
+- We use 0-based indexing.  This makes the relationship between the
+  index for a node and the indexes for its children slightly less
+  obvious, but is more suitable since Python uses 0-based indexing.
+
+- Our heappop() method returns the smallest item, not the largest.
+
+These two make it possible to view the heap as a regular Python list
+without surprises: heap[0] is the smallest item, and heap.sort()
+maintains the heap invariant!
+"""
+
+# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger
+
+__about__ = """Heap queues
+
+[explanation by Fran&#65533;ois Pinard]
+
+Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
+all k, counting elements from 0.  For the sake of comparison,
+non-existing elements are considered to be infinite.  The interesting
+property of a heap is that a[0] is always its smallest element.
+
+The strange invariant above is meant to be an efficient memory
+representation for a tournament.  The numbers below are `k', not a[k]:
+
+                                   0
+
+                  1                                 2
+
+          3               4                5               6
+
+      7       8       9       10      11      12      13      14
+
+    15 16   17 18   19 20   21 22   23 24   25 26   27 28   29 30
+
+
+In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'.  In
+an usual binary tournament we see in sports, each cell is the winner
+over the two cells it tops, and we can trace the winner down the tree
+to see all opponents s/he had.  However, in many computer applications
+of such tournaments, we do not need to trace the history of a winner.
+To be more memory efficient, when a winner is promoted, we try to
+replace it by something else at a lower level, and the rule becomes
+that a cell and the two cells it tops contain three different items,
+but the top cell "wins" over the two topped cells.
+
+If this heap invariant is protected at all time, index 0 is clearly
+the overall winner.  The simplest algorithmic way to remove it and
+find the "next" winner is to move some loser (let's say cell 30 in the
+diagram above) into the 0 position, and then percolate this new 0 down
+the tree, exchanging values, until the invariant is re-established.
+This is clearly logarithmic on the total number of items in the tree.
+By iterating over all items, you get an O(n ln n) sort.
+
+A nice feature of this sort is that you can efficiently insert new
+items while the sort is going on, provided that the inserted items are
+not "better" than the last 0'th element you extracted.  This is
+especially useful in simulation contexts, where the tree holds all
+incoming events, and the "win" condition means the smallest scheduled
+time.  When an event schedule other events for execution, they are
+scheduled into the future, so they can easily go into the heap.  So, a
+heap is a good structure for implementing schedulers (this is what I
+used for my MIDI sequencer :-).
+
+Various structures for implementing schedulers have been extensively
+studied, and heaps are good for this, as they are reasonably speedy,
+the speed is almost constant, and the worst case is not much different
+than the average case.  However, there are other representations which
+are more efficient overall, yet the worst cases might be terrible.
+
+Heaps are also very useful in big disk sorts.  You most probably all
+know that a big sort implies producing "runs" (which are pre-sorted
+sequences, which size is usually related to the amount of CPU memory),
+followed by a merging passes for these runs, which merging is often
+very cleverly organised[1].  It is very important that the initial
+sort produces the longest runs possible.  Tournaments are a good way
+to that.  If, using all the memory available to hold a tournament, you
+replace and percolate items that happen to fit the current run, you'll
+produce runs which are twice the size of the memory for random input,
+and much better for input fuzzily ordered.
+
+Moreover, if you output the 0'th item on disk and get an input which
+may not fit in the current tournament (because the value "wins" over
+the last output value), it cannot fit in the heap, so the size of the
+heap decreases.  The freed memory could be cleverly reused immediately
+for progressively building a second heap, which grows at exactly the
+same rate the first heap is melting.  When the first heap completely
+vanishes, you switch heaps and start a new run.  Clever and quite
+effective!
+
+In a word, heaps are useful memory structures to know.  I use them in
+a few applications, and I think it is good to keep a `heap' module
+around. :-)
+
+--------------------
+[1] The disk balancing algorithms which are current, nowadays, are
+more annoying than clever, and this is a consequence of the seeking
+capabilities of the disks.  On devices which cannot seek, like big
+tape drives, the story was quite different, and one had to be very
+clever to ensure (far in advance) that each tape movement will be the
+most effective possible (that is, will best participate at
+"progressing" the merge).  Some tapes were even able to read
+backwards, and this was also used to avoid the rewinding time.
+Believe me, real good tape sorts were quite spectacular to watch!
+From all times, sorting has always been a Great Art! :-)
+"""
+
+__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
+           'nlargest', 'nsmallest', 'heappushpop']
+
+from itertools import islice, repeat, count, imap, izip, tee, chain
+from operator import itemgetter
+import bisect
+
+def heappush(heap, item):
+    """Push item onto heap, maintaining the heap invariant."""
+    heap.append(item)
+    _siftdown(heap, 0, len(heap)-1)
+
+def heappop(heap):
+    """Pop the smallest item off the heap, maintaining the heap invariant."""
+    lastelt = heap.pop()    # raises appropriate IndexError if heap is empty
+    if heap:
+        returnitem = heap[0]
+        heap[0] = lastelt
+        _siftup(heap, 0)
+    else:
+        returnitem = lastelt
+    return returnitem
+
+def heapreplace(heap, item):
+    """Pop and return the current smallest value, and add the new item.
+
+    This is more efficient than heappop() followed by heappush(), and can be
+    more appropriate when using a fixed-size heap.  Note that the value
+    returned may be larger than item!  That constrains reasonable uses of
+    this routine unless written as part of a conditional replacement:
+
+        if item > heap[0]:
+            item = heapreplace(heap, item)
+    """
+    returnitem = heap[0]    # raises appropriate IndexError if heap is empty
+    heap[0] = item
+    _siftup(heap, 0)
+    return returnitem
+
+def heappushpop(heap, item):
+    """Fast version of a heappush followed by a heappop."""
+    if heap and heap[0] < item:
+        item, heap[0] = heap[0], item
+        _siftup(heap, 0)
+    return item
+
+def heapify(x):
+    """Transform list into a heap, in-place, in O(len(heap)) time."""
+    n = len(x)
+    # Transform bottom-up.  The largest index there's any point to looking at
+    # is the largest with a child index in-range, so must have 2*i + 1 < n,
+    # or i < (n-1)/2.  If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
+    # j-1 is the largest, which is n//2 - 1.  If n is odd = 2*j+1, this is
+    # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
+    for i in reversed(xrange(n//2)):
+        _siftup(x, i)
+
+def nlargest(n, iterable):
+    """Find the n largest elements in a dataset.
+
+    Equivalent to:  sorted(iterable, reverse=True)[:n]
+    """
+    if n < 0: # for consistency with the c impl
+        return []
+    it = iter(iterable)
+    result = list(islice(it, n))
+    if not result:
+        return result
+    heapify(result)
+    _heappushpop = heappushpop
+    for elem in it:
+        _heappushpop(result, elem)
+    result.sort(reverse=True)
+    return result
+
+def nsmallest(n, iterable):
+    """Find the n smallest elements in a dataset.
+
+    Equivalent to:  sorted(iterable)[:n]
+    """
+    if n < 0: # for consistency with the c impl
+        return []
+    if hasattr(iterable, '__len__') and n * 10 <= len(iterable):
+        # For smaller values of n, the bisect method is faster than a minheap.
+        # It is also memory efficient, consuming only n elements of space.
+        it = iter(iterable)
+        result = sorted(islice(it, 0, n))
+        if not result:
+            return result
+        insort = bisect.insort
+        pop = result.pop
+        los = result[-1]    # los --> Largest of the nsmallest
+        for elem in it:
+            if los <= elem:
+                continue
+            insort(result, elem)
+            pop()
+            los = result[-1]
+        return result
+    # An alternative approach manifests the whole iterable in memory but
+    # saves comparisons by heapifying all at once.  Also, saves time
+    # over bisect.insort() which has O(n) data movement time for every
+    # insertion.  Finding the n smallest of an m length iterable requires
+    #    O(m) + O(n log m) comparisons.
+    h = list(iterable)
+    heapify(h)
+    return map(heappop, repeat(h, min(n, len(h))))
+
+# 'heap' is a heap at all indices >= startpos, except possibly for pos.  pos
+# is the index of a leaf with a possibly out-of-order value.  Restore the
+# heap invariant.
+def _siftdown(heap, startpos, pos):
+    newitem = heap[pos]
+    # Follow the path to the root, moving parents down until finding a place
+    # newitem fits.
+    while pos > startpos:
+        parentpos = (pos - 1) >> 1
+        parent = heap[parentpos]
+        if newitem < parent:
+            heap[pos] = parent
+            pos = parentpos
+            continue
+        break
+    heap[pos] = newitem
+
+# The child indices of heap index pos are already heaps, and we want to make
+# a heap at index pos too.  We do this by bubbling the smaller child of
+# pos up (and so on with that child's children, etc) until hitting a leaf,
+# then using _siftdown to move the oddball originally at index pos into place.
+#
+# We *could* break out of the loop as soon as we find a pos where newitem <=
+# both its children, but turns out that's not a good idea, and despite that
+# many books write the algorithm that way.  During a heap pop, the last array
+# element is sifted in, and that tends to be large, so that comparing it
+# against values starting from the root usually doesn't pay (= usually doesn't
+# get us out of the loop early).  See Knuth, Volume 3, where this is
+# explained and quantified in an exercise.
+#
+# Cutting the # of comparisons is important, since these routines have no
+# way to extract "the priority" from an array element, so that intelligence
+# is likely to be hiding in custom __cmp__ methods, or in array elements
+# storing (priority, record) tuples.  Comparisons are thus potentially
+# expensive.
+#
+# On random arrays of length 1000, making this change cut the number of
+# comparisons made by heapify() a little, and those made by exhaustive
+# heappop() a lot, in accord with theory.  Here are typical results from 3
+# runs (3 just to demonstrate how small the variance is):
+#
+# Compares needed by heapify     Compares needed by 1000 heappops
+# --------------------------     --------------------------------
+# 1837 cut to 1663               14996 cut to 8680
+# 1855 cut to 1659               14966 cut to 8678
+# 1847 cut to 1660               15024 cut to 8703
+#
+# Building the heap by using heappush() 1000 times instead required
+# 2198, 2148, and 2219 compares:  heapify() is more efficient, when
+# you can use it.
+#
+# The total compares needed by list.sort() on the same lists were 8627,
+# 8627, and 8632 (this should be compared to the sum of heapify() and
+# heappop() compares):  list.sort() is (unsurprisingly!) more efficient
+# for sorting.
+
+def _siftup(heap, pos):
+    endpos = len(heap)
+    startpos = pos
+    newitem = heap[pos]
+    # Bubble up the smaller child until hitting a leaf.
+    childpos = 2*pos + 1    # leftmost child position
+    while childpos < endpos:
+        # Set childpos to index of smaller child.
+        rightpos = childpos + 1
+        if rightpos < endpos and not heap[childpos] < heap[rightpos]:
+            childpos = rightpos
+        # Move the smaller child up.
+        heap[pos] = heap[childpos]
+        pos = childpos
+        childpos = 2*pos + 1
+    # The leaf at pos is empty now.  Put newitem there, and bubble it up
+    # to its final resting place (by sifting its parents down).
+    heap[pos] = newitem
+    _siftdown(heap, startpos, pos)
+
+# If available, use C implementation
+try:
+    from _heapq import *
+except ImportError:
+    pass
+
+def merge(*iterables):
+    '''Merge multiple sorted inputs into a single sorted output.
+
+    Similar to sorted(itertools.chain(*iterables)) but returns a generator,
+    does not pull the data into memory all at once, and assumes that each of
+    the input streams is already sorted (smallest to largest).
+
+    >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
+    [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
+
+    '''
+    _heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration
+
+    h = []
+    h_append = h.append
+    for itnum, it in enumerate(map(iter, iterables)):
+        try:
+            next = it.next
+            h_append([next(), itnum, next])
+        except _StopIteration:
+            pass
+    heapify(h)
+
+    while 1:
+        try:
+            while 1:
+                v, itnum, next = s = h[0]   # raises IndexError when h is empty
+                yield v
+                s[0] = next()               # raises StopIteration when exhausted
+                _heapreplace(h, s)          # restore heap condition
+        except _StopIteration:
+            _heappop(h)                     # remove empty iterator
+        except IndexError:
+            return
+
+# Extend the implementations of nsmallest and nlargest to use a key= argument
+_nsmallest = nsmallest
+def nsmallest(n, iterable, key=None):
+    """Find the n smallest elements in a dataset.
+
+    Equivalent to:  sorted(iterable, key=key)[:n]
+    """
+    # Short-cut for n==1 is to use min() when len(iterable)>0
+    if n == 1:
+        it = iter(iterable)
+        head = list(islice(it, 1))
+        if not head:
+            return []
+        if key is None:
+            return [min(chain(head, it))]
+        return [min(chain(head, it), key=key)]
+
+    # When n>=size, it's faster to use sort()
+    try:
+        size = len(iterable)
+    except (TypeError, AttributeError):
+        pass
+    else:
+        if n >= size:
+            return sorted(iterable, key=key)[:n]
+
+    # When key is none, use simpler decoration
+    if key is None:
+        it = izip(iterable, count())                        # decorate
+        result = _nsmallest(n, it)
+        return map(itemgetter(0), result)                   # undecorate
+
+    # General case, slowest method
+    in1, in2 = tee(iterable)
+    it = izip(imap(key, in1), count(), in2)                 # decorate
+    result = _nsmallest(n, it)
+    return map(itemgetter(2), result)                       # undecorate
+
+_nlargest = nlargest
+def nlargest(n, iterable, key=None):
+    """Find the n largest elements in a dataset.
+
+    Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
+    """
+
+    # Short-cut for n==1 is to use max() when len(iterable)>0
+    if n == 1:
+        it = iter(iterable)
+        head = list(islice(it, 1))
+        if not head:
+            return []
+        if key is None:
+            return [max(chain(head, it))]
+        return [max(chain(head, it), key=key)]
+
+    # When n>=size, it's faster to use sort()
+    try:
+        size = len(iterable)
+    except (TypeError, AttributeError):
+        pass
+    else:
+        if n >= size:
+            return sorted(iterable, key=key, reverse=True)[:n]
+
+    # When key is none, use simpler decoration
+    if key is None:
+        it = izip(iterable, count(0,-1))                    # decorate
+        result = _nlargest(n, it)
+        return map(itemgetter(0), result)                   # undecorate
+
+    # General case, slowest method
+    in1, in2 = tee(iterable)
+    it = izip(imap(key, in1), count(0,-1), in2)             # decorate
+    result = _nlargest(n, it)
+    return map(itemgetter(2), result)                       # undecorate
+
+if __name__ == "__main__":
+    # Simple sanity test
+    heap = []
+    data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0]
+    for item in data:
+        heappush(heap, item)
+    sort = []
+    while heap:
+        sort.append(heappop(heap))
+    print sort
+
+    import doctest
+    doctest.testmod()
diff --git a/lib-python/2.7/pkgutil.py b/lib-python/modified-2.7/pkgutil.py
copy from lib-python/2.7/pkgutil.py
copy to lib-python/modified-2.7/pkgutil.py
--- a/lib-python/2.7/pkgutil.py
+++ b/lib-python/modified-2.7/pkgutil.py
@@ -244,7 +244,8 @@
         return mod
 
     def get_data(self, pathname):
-        return open(pathname, "rb").read()
+        with open(pathname, "rb") as f:
+            return f.read()
 
     def _reopen(self):
         if self.file and self.file.closed:
diff --git a/lib-python/modified-2.7/test/test_heapq.py b/lib-python/modified-2.7/test/test_heapq.py
--- a/lib-python/modified-2.7/test/test_heapq.py
+++ b/lib-python/modified-2.7/test/test_heapq.py
@@ -186,6 +186,11 @@
         self.assertFalse(sys.modules['heapq'] is self.module)
         self.assertTrue(hasattr(self.module.heapify, 'func_code'))
 
+    def test_islice_protection(self):
+        m = self.module
+        self.assertFalse(m.nsmallest(-1, [1]))
+        self.assertFalse(m.nlargest(-1, [1]))
+
 
 class TestHeapC(TestHeap):
     module = c_heapq
diff --git a/lib-python/modified-2.7/test/test_import.py b/lib-python/modified-2.7/test/test_import.py
--- a/lib-python/modified-2.7/test/test_import.py
+++ b/lib-python/modified-2.7/test/test_import.py
@@ -64,6 +64,7 @@
             except ImportError, err:
                 self.fail("import from %s failed: %s" % (ext, err))
             else:
+                # XXX importing .pyw is missing on Windows
                 self.assertEqual(mod.a, a,
                     "module loaded (%s) but contents invalid" % mod)
                 self.assertEqual(mod.b, b,
diff --git a/lib-python/modified-2.7/test/test_repr.py b/lib-python/modified-2.7/test/test_repr.py
--- a/lib-python/modified-2.7/test/test_repr.py
+++ b/lib-python/modified-2.7/test/test_repr.py
@@ -254,8 +254,14 @@
         eq = self.assertEqual
         touch(os.path.join(self.subpkgname, self.pkgname + os.extsep + 'py'))
         from areallylongpackageandmodulenametotestreprtruncation.areallylongpackageandmodulenametotestreprtruncation import areallylongpackageandmodulenametotestreprtruncation
-        eq(repr(areallylongpackageandmodulenametotestreprtruncation),
-           "<module '%s' from '%s'>" % (areallylongpackageandmodulenametotestreprtruncation.__name__, areallylongpackageandmodulenametotestreprtruncation.__file__))
+        # On PyPy, we use %r to format the file name; on CPython it is done
+        # with '%s'.  It seems to me that %r is safer <arigo>.
+        if '__pypy__' in sys.builtin_module_names:
+            eq(repr(areallylongpackageandmodulenametotestreprtruncation),
+               "<module %r from %r>" % (areallylongpackageandmodulenametotestreprtruncation.__name__, areallylongpackageandmodulenametotestreprtruncation.__file__))
+        else:
+            eq(repr(areallylongpackageandmodulenametotestreprtruncation),
+               "<module '%s' from '%s'>" % (areallylongpackageandmodulenametotestreprtruncation.__name__, areallylongpackageandmodulenametotestreprtruncation.__file__))
         eq(repr(sys), "<module 'sys' (built-in)>")
 
     def test_type(self):
diff --git a/lib-python/2.7/test/test_subprocess.py b/lib-python/modified-2.7/test/test_subprocess.py
copy from lib-python/2.7/test/test_subprocess.py
copy to lib-python/modified-2.7/test/test_subprocess.py
--- a/lib-python/2.7/test/test_subprocess.py
+++ b/lib-python/modified-2.7/test/test_subprocess.py
@@ -16,11 +16,11 @@
 # Depends on the following external programs: Python
 #
 
-if mswindows:
-    SETBINARY = ('import msvcrt; msvcrt.setmode(sys.stdout.fileno(), '
-                                                'os.O_BINARY);')
-else:
-    SETBINARY = ''
+#if mswindows:
+#    SETBINARY = ('import msvcrt; msvcrt.setmode(sys.stdout.fileno(), '
+#                                                'os.O_BINARY);')
+#else:
+#    SETBINARY = ''
 
 
 try:
@@ -420,8 +420,9 @@
         self.assertStderrEqual(stderr, "")
 
     def test_universal_newlines(self):
-        p = subprocess.Popen([sys.executable, "-c",
-                          'import sys,os;' + SETBINARY +
+        # NB. replaced SETBINARY with the -u flag
+        p = subprocess.Popen([sys.executable, "-u", "-c",
+                          'import sys,os;' + #SETBINARY +
                           'sys.stdout.write("line1\\n");'
                           'sys.stdout.flush();'
                           'sys.stdout.write("line2\\r");'
@@ -448,8 +449,9 @@
 
     def test_universal_newlines_communicate(self):
         # universal newlines through communicate()
-        p = subprocess.Popen([sys.executable, "-c",
-                          'import sys,os;' + SETBINARY +
+        # NB. replaced SETBINARY with the -u flag
+        p = subprocess.Popen([sys.executable, "-u", "-c",
+                          'import sys,os;' + #SETBINARY +
                           'sys.stdout.write("line1\\n");'
                           'sys.stdout.flush();'
                           'sys.stdout.write("line2\\r");'
diff --git a/lib-python/modified-2.7/urllib2.py b/lib-python/modified-2.7/urllib2.py
--- a/lib-python/modified-2.7/urllib2.py
+++ b/lib-python/modified-2.7/urllib2.py
@@ -395,11 +395,7 @@
         meth_name = protocol+"_response"
         for processor in self.process_response.get(protocol, []):
             meth = getattr(processor, meth_name)
-            try:
-                response = meth(req, response)
-            except:
-                response.close()
-                raise
+            response = meth(req, response)
 
         return response
 
diff --git a/lib_pypy/_collections.py b/lib_pypy/_collections.py
--- a/lib_pypy/_collections.py
+++ b/lib_pypy/_collections.py
@@ -379,12 +379,14 @@
 class defaultdict(dict):
     
     def __init__(self, *args, **kwds):
-        self.default_factory = None
-        if 'default_factory' in kwds:
-            self.default_factory = kwds.pop('default_factory')
-        elif len(args) > 0 and (callable(args[0]) or args[0] is None):
-            self.default_factory = args[0]
+        if len(args) > 0:
+            default_factory = args[0]
             args = args[1:]
+            if not callable(default_factory) and default_factory is not None:
+                raise TypeError("first argument must be callable")
+        else:
+            default_factory = None
+        self.default_factory = default_factory
         super(defaultdict, self).__init__(*args, **kwds)
  
     def __missing__(self, key):
@@ -404,7 +406,7 @@
             recurse.remove(id(self))
 
     def copy(self):
-        return type(self)(self, default_factory=self.default_factory)
+        return type(self)(self.default_factory, self)
     
     def __copy__(self):
         return self.copy()
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -124,7 +124,8 @@
     # for now, we always allow types.pointer, else a lot of tests
     # break. We need to rethink how pointers are represented, though
     if my_ffitype is not ffitype and ffitype is not _ffi.types.void_p:
-        raise ArgumentError, "expected %s instance, got %s" % (type(value), ffitype)
+        raise ArgumentError("expected %s instance, got %s" % (type(value),
+                                                              ffitype))
     return value._get_buffer_value()
 
 def _cast_addr(obj, _, tp):
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -17,7 +17,7 @@
         if len(f) == 3:
             if (not hasattr(tp, '_type_')
                 or not isinstance(tp._type_, str)
-                or tp._type_ not in "iIhHbBlL"):
+                or tp._type_ not in "iIhHbBlLqQ"):
                 #XXX: are those all types?
                 #     we just dont get the type name
                 #     in the interp levle thrown TypeError
diff --git a/lib_pypy/_pypy_irc_topic.py b/lib_pypy/_pypy_irc_topic.py
--- a/lib_pypy/_pypy_irc_topic.py
+++ b/lib_pypy/_pypy_irc_topic.py
@@ -1,117 +1,6 @@
-"""qvfgbcvna naq hgbcvna punvef
-qlfgbcvna naq hgbcvna punvef
-V'z fbeel, pbhyq lbh cyrnfr abg nterr jvgu gur png nf jryy?
-V'z fbeel, pbhyq lbh cyrnfr abg nterr jvgu gur punve nf jryy?
-jr cnffrq gur RH erivrj
-cbfg RhebClguba fcevag fgnegf 12.IVV.2007, 10nz
-RhebClguba raqrq
-n Pyrna Ragrecevfrf cebqhpgvba
-npnqrzl vf n pbzcyvpngrq ebyr tnzr
-npnqrzvn vf n pbzcyvpngrq ebyr tnzr
-jbexvat pbqr vf crn fbhc
-abg lbhe snhyg, zber yvxr vg'f n zbivat gnetrg
-guvf fragrapr vf snyfr
-abguvat vf gehr
-Yncfnat Fbhpubat
-Oenpunzhgnaqn
-fbeel, V'yy grnpu gur pnpghf ubj gb fjvz yngre
-Jul fb znal znal znal znal znal ivbyvaf?
-Jul fb znal znal znal znal znal bowrpgf?
-"eha njnl naq yvir ba n snez" nccebnpu gb fbsgjner qrirybczrag
-"va snpg, lbh zvtug xabj zber nobhg gur genafyngvba gbbypunva nsgre znfgrevat eclguba guna fbzr angvir fcrnxre xabjf nobhg uvf zbgure gbathr" - kbeNkNk
-"jurer qvq nyy gur ivbyvaf tb?"
-- ClCl fgnghf oybt: uggc://zberclcl.oybtfcbg.pbz/
-uggc://kxpq.pbz/353/
-pnfhnyvgl ivbyngvbaf naq sylvat
-wrgmg abpu fpubxbynqvtre
-R09 2X @PNN:85?
-vs lbh'er gelvat gb oybj hc fghss, jub pnerf?
-vs fghss oybjf hc, lbh pner
-2008 jvyy or gur lrne bs clcl ba gur qrfxgbc
-2008 jvyy or gur lrne bs gur qrfxgbc ba #clcl
-2008 jvyy or gur lrne bs gur qrfxgbc ba #clcl, Wnahnel jvyy or gur zbagu bs gur nyc gbcf
-lrf, ohg jung'g gur frafr bs 0 < "qhena qhena"
-eclguba: flagnk naq frznagvpf bs clguba, fcrrq bs p, erfgevpgvbaf bs wnin naq pbzcvyre reebe zrffntrf nf crargenoyr nf ZHZCF
+"""eclguba: flagnk naq frznagvpf bs clguba, fcrrq bs p, erfgevpgvbaf bs wnin naq pbzcvyre reebe zrffntrf nf crargenoyr nf ZHZCF
 pglcrf unf n fcva bs 1/3
 ' ' vf n fcnpr gbb
-2009 jvyy or gur lrne bs WVG ba gur qrfxgbc
-N ynathntr vf n qvnyrpg jvgu na nezl naq anil
-gbcvpf ner sbe gur srroyr zvaqrq
-2009 vf gur lrne bs ersyrpgvba ba gur qrfxgbc
-gur tybor vf bhe cbal, gur pbfzbf bhe erny ubefr
-jub nz V naq vs lrf, ubj znal?
-cebtenzzvat va orq vf n cresrpgyl svar npgvivgl
-zbber'f ynj vf n qeht jvgu gur jbefg pbzr qbja
-EClguba: jr hfr vg fb lbh qba'g unir gb
-Zbber'f ynj vf n qeht jvgu gur jbefg pbzr qbja. EClguba: haqrpvqrq.
-guvatf jvyy or avpr naq fghss
-qba'g cbfg yvaxf gb cngragf urer
-Abg lbhe hfhny nanylfrf.
-Gur Neg bs gur Punaary
-Clguba 300
-V fhccbfr ZO bs UGZY cre frpbaq vf abg gur hfhny fcrrq zrnfher crbcyr jbhyq rkcrpg sbe n wvg
-gur fha arire frgf ba gur ClCl rzcver
-ghegyrf ner snfgre guna lbh guvax
-cebtenzzvat vf na nrfgrguvp raqrnibhe
-P vf tbbq sbe fbzrguvat, whfg abg sbe jevgvat fbsgjner
-trezna vf tbbq sbe fbzrguvat, whfg abg sbe jevgvat fbsgjner
-trezna vf tbbq sbe artngvbaf, whfg abg sbe jevgvat fbsgjner
-# nffreg qvq abg penfu
-lbh fubhyq fgneg n cresrpg fbsgjner zbirzrag
-lbh fubhyq fgneg n cresrpg punaary gbcvp zbirzrag
-guvf vf n cresrpg punaary gbcvp
-guvf vf n frys-ersreragvny punaary gbcvp
-crrcubcr bcgvzvmngvbaf ner jung n Fhssvpvragyl Fzneg Pbzcvyre hfrf
-"crrcubcr" bcgvzvmngvbaf ner jung na bcgvzvfgvp Pbzcvyre hfrf
-pubbfr lbhe unpx
-gur 'fhcre' xrljbeq vf abg gung uhttnoyr
-wlguba cngpurf ner abg rabhtu sbe clcl
-- qb lbh xabj oreyva? - nyy bs vg? - jryy, whfg oreyva
-- ubj jvyy gur snpg gung gurl ner hfrq va bhe ercy punatr bhe gbcvpf?
-- ubj pna vg rire unir jbexrq?
-- jurer fubhyq gur unpx or fgberq?
-- Vg'f uneq gb fnl rknpgyl jung pbafgvghgrf erfrnepu va gur pbzchgre jbeyq, ohg nf n svefg nccebkvzngvba, vg'f fbsgjner gung qbrfa'g unir hfref.
-- Cebtenzzvat vf nyy nobhg xabjvat jura gb obvy gur benatr fcbatr qbaxrl npebff gur cuvyyvcvarf
-- Jul fb znal, znal, znal, znal, znal, znal qhpxyvatf?
-- ab qrgnvy vf bofpher rabhtu gb abg unir fbzr pbqr qrcraqvat ba vg.
-- jung V trarenyyl jnag vf serr fcrrqhcf
-- nyy bs ClCl vf kv-dhnyvgl
-"lbh pna nyjnlf xvyy -9 be bf._rkvg() vs lbh'er va n uheel"
-Ohernhpengf ohvyq npnqrzvp rzcverf juvpu puhea bhg zrnavatyrff fbyhgvbaf gb veeryrinag ceboyrzf.
-vg'f abg n unpx, vg'f n jbexnebhaq
-ClCl qbrfa'g unir pbcbylinevnqvp qrcraqragyl-zbabzbecurq ulcresyhknqf
-ClCl qbrfa'g punatr gur shaqnzragny culfvpf pbafgnagf
-Qnapr bs gur Fhtnecyhz Snvel
-Wnin vf whfg tbbq rabhtu gb or cenpgvpny, ohg abg tbbq rabhtu gb or hfnoyr.
-RhebClguba vf unccravat, qba'g rkcrpg nal dhvpx erfcbafr gvzrf.
-"V jbhyq yvxr gb fgnl njnl sebz ernyvgl gura"
-"gung'f jul gur 'be' vf ernyyl na 'naq' "
-jvgu nyy nccebcevngr pbagrkghnyvfngvbavat
-qba'g gevc ba gur cbjre pbeq
-vzcyrzragvat YBTB va YBTB: "ghegyrf nyy gur jnl qbja"
-gur ohooyrfbeg jbhyq or gur jebat jnl gb tb
-gur cevapvcyr bs pbafreingvba bs zrff
-gb fnir n gerr, rng n ornire
-Qre Ovore znpugf evpugvt: Antg nyyrf xnchgg.
-"Nal jbeyqivrj gung vfag jenpxrq ol frys-qbhog naq pbashfvba bire vgf bja vqragvgl vf abg n jbeyqivrj sbe zr." - Fpbgg Nnebafba
-jr oryvrir va cnapnxrf, znlor
-jr oryvrir va ghegyrf, znlor
-jr qrsvavgryl oryvrir va zrgn
-gur zngevk unf lbh
-"Yvsr vf uneq, gura lbh anc" - n png
-Vf Nezva ubzr jura gur havirefr prnfrf gb rkvfg?
-Qhrffryqbes fcevag fgnegrq
-frys.nobeeg("pnaabg ybnq negvpyrf")
-QRAGVFGEL FLZOBY YVTUG IREGVPNY NAQ JNIR
-"Gur UUH pnzchf vf n tbbq Dhnxr yriry" - Nezva
-"Gur UUH pnzchf jbhyq or n greevoyr dhnxr yriry - lbh'q arire unir n pyhr jurer lbh ner" - zvpunry
-N enqvbnpgvir png unf 18 unys-yvirf.
-<rfp> : j  [fvtu] <onpxfcnpr> <onpxfcnpr> <pgey>-f
-pbybe-pbqrq oyhrf
-"Neebtnapr va pbzchgre fpvrapr vf zrnfherq va anab-Qvwxfgenf."
-ClCl arrqf n Whfg-va-Gvzr WVG
-"Lbh pna'g gvzr geniry whfg ol frggvat lbhe pybpxf jebat"
-Gjb guernqf jnyx vagb n one. Gur onexrrcre ybbxf hc naq lryyf, "url, V jnag qba'g nal pbaqvgvbaf enpr yvxr gvzr ynfg!"
 Clguba 2.k rfg cerfdhr zbeg, ivir Clguba!
 Clguba 2.k vf abg qrnq
 Riregvzr fbzrbar nethrf jvgu "Fznyygnyx unf nyjnlf qbar K", vg vf  nyjnlf n tbbq uvag gung fbzrguvat arrqf gb or punatrq snfg. - Znephf Qraxre
@@ -119,7 +8,6 @@
 __kkk__ naq __ekkk__ if bcrengvba fybgf: cnegvpyr dhnaghz fhcrecbfvgvba xvaq bs sha
 ClCl vf na rkpvgvat grpuabybtl gung yrgf lbh gb jevgr snfg, cbegnoyr, zhygv-cyngsbez vagrecergref jvgu yrff rssbeg
 Nezva: "Cebybt vf n zrff.", PS: "Ab, vg'f irel pbby!", Nezva: "Vfa'g guvf jung V fnvq?"
-<nevtngb> tbbq, grfgf ner hfrshy fbzrgvzrf :-)
 ClCl vf yvxr nofheq gurngre
 jr unir ab nagv-vzcbffvoyr fgvpx gung znxrf fher gung nyy lbhe cebtenzf unyg
 clcl vf n enpr orgjrra crbcyr funivat lnxf naq gur havirefr cebqhpvat zber orneqrq lnxf. Fb sne, gur havirefr vf jvaavat
@@ -136,14 +24,14 @@
 ClCl 1.1.0orgn eryrnfrq: uggc://pbqrfcrnx.arg/clcl/qvfg/clcl/qbp/eryrnfr-1.1.0.ugzy
 "gurer fubhyq or bar naq bayl bar boivbhf jnl gb qb vg". ClCl inevnag: "gurer pna or A unys-ohttl jnlf gb qb vg"
 1.1 svany eryrnfrq: uggc://pbqrfcrnx.arg/clcl/qvfg/clcl/qbp/eryrnfr-1.1.0.ugzy
-1.1 svany eryrnfrq | <svwny> nzq64 naq ccp ner bayl ninvynoyr va ragrecevfr irefvba
+<svwny> nzq64 naq ccp ner bayl ninvynoyr va ragrecevfr irefvba
 Vf gurer n clcl gvzr? - vs lbh pna srry vg (?) gura gurer vf
 <nevtngb> ab, abezny jbex vf fhpu zhpu yrff gvevat guna inpngvbaf
 <nevtngb> ab, abezny jbex vf fb zhpu yrff gvevat guna inpngvbaf
-SVEFG gurl vtaber lbh, gura gurl ynhtu ng lbh, gura gurl svtug lbh, gura lbh jva.
+-SVEFG gurl vtaber lbh, gura gurl ynhtu ng lbh, gura gurl svtug lbh, gura lbh jva.-
 vg'f Fhaqnl, znlor
 vg'f Fhaqnl, ntnva
-"3 + 3 = 8"  Nagb va gur WVG gnyx
+"3 + 3 = 8" - Nagb va gur WVG gnyx
 RPBBC vf unccravat
 RPBBC vf svavfurq
 cflpb rngf bar oenva cre vapu bs cebterff
@@ -175,10 +63,108 @@
 "nu, whfg va gvzr qbphzragngvba" (__nc__)
 ClCl vf abg n erny IZ: ab frtsnhyg unaqyref gb qb gur ener pnfrf
 lbh pna'g unir obgu pbairavrapr naq fcrrq
-gur WVG qbrfa'g jbex ba BF/K (abi'09)
-ab fhccbeg sbe BF/K evtug abj! (abi'09)
 fyvccref urvtug pna or zrnfherq va k86 ertvfgref
 clcl vf n enpr orgjrra gur vaqhfgel gelvat gb ohvyq znpuvarf jvgu zber naq zber erfbheprf, naq gur clcl qrirybcref gelvat gb rng nyy bs gurz. Fb sne, gur jvaare vf fgvyy hapyrne
+"znl pbagnva ahgf naq/be lbhat cbvagref"
+vg'f nyy irel fvzcyr, yvxr gur ubyvqnlf
+unccl ClCl'f lrne 2010!
+fnzhryr fnlf gung jr ybfg n enmbe. fb jr pna'g funir lnxf
+"yrg'f abg or bofpher, hayrff jr ernyyl arrq gb"
+<nevtngb> (abg guernq-fnsr, ohg jryy, abguvat vf)
+clcl unf znal ceboyrzf, ohg rnpu bar unf znal fbyhgvbaf
+whfg nabgure vgrz (1.333...) ba bhe erny-ahzorerq gbqb yvfg
+ClCl vf Fuveg Bevtnzv erfrnepu
+<svwny> nafjrevat n dhrfgvba: "ab -- sbe ng yrnfg bar cbffvoyr vagrecergngvba bs lbhe fragrapr"
+eryrnfr 1.2 hcpbzvat
+ClCl 1.2 eryrnfrq - uggc://clcl.bet/
+AB IPF QVFPHFFVBAF
+EClguba vf n svar pnzry unve oehfu
+ClCl vf n npghnyyl n ivfhnyvmngvba cebwrpg, jr whfg ohvyq vagrecergref gb unir vagrerfgvat qngn gb ivfhnyvmr
+clcl vf yvxr fnhfntrf
+naq abj sbe fbzrguvat pbzcyrgryl qvssrerag
+n 10gu bs sberire vf 1u45
+pbeerpg pbqr qbrfag arrq nal grfgf <qhpx>
+cbfgfgehpghenyvfz rgp.
+clcl UVG trarengbe
+gur arj clcl fcbeg vf gb cnff clcl ohtf nf pclguba ohtf
+jr unir zhpu zber vagrecergref guna hfref
+ClCl 1.3 njnvgvat eryrnfr
+ClCl 1.3 eryrnfrq
+vg frrzf gb zr gung bapr lbh frggyr ba na rkrphgvba / bowrpg zbqry naq / be olgrpbqr sbezng, lbh'ir nyernql qrpvqrq jung ynathntrf (jurer gur 'f' frrzf fhcresyhbhf) fhccbeg vf tbvat gb or svefg pynff sbe
+"Nyy ceboyrzf va ClCl pna or fbyirq ol nabgure yriry bs vagrecergngvba"
+ClCl 1.3 eryrnfrq (jvaqbjf ovanevrf vapyhqrq)
+jul qvq lbh thlf unir gb znxr gur ohvygva sbeghar zber vagrerfgvat guna npghny jbex? v whfg pngpurq zlfrys erfgnegvat clcl 20 gvzrf
+"jr hfrq gb unir n zrff jvgu na bofpher vagresnpr, abj jr unir zrff urer naq bofpher vagresnpr gurer. cebterff" crqebavf ba n clcl fcevag
+"phcf bs pbssrr ner yvxr nanybtvrf va gung V'z znxvat bar evtug abj"
+"vg'f nyjnlf hc gb hf, va n jnl be gur bgure"
+ClCl vf infg, naq pbagnvaf zhygvghqrf
+qravny vf eneryl n tbbq qrohttvat grpuavdhr
+"Yrg'f tb." - "Jr pna'g" - "Jul abg?" - "Jr'er jnvgvat sbe n Genafyngvba." - (qrfcnvevatyl) "Nu!"
+'gung'f qrsvavgryl n pnfr bs "hu????"'
+va gurbel gurer vf gur Ybbc, va cenpgvpr gurer ner oevqtrf
+gur uneqqevir - pbafgnag qngn cvytevzntr
+ClCl vf n gbby gb xrrc bgurejvfr qnatrebhf zvaqf fnsryl bpphcvrq.
+jr ner n trareny senzrjbex ohvyg ba pbafvfgrag nccyvpngvba bs nqubp-arff
+gur jnl gb nibvq n jbexnebhaq vf gb vagebqhpr n fgebatre jbexnebhaq fbzrjurer ryfr
+pnyyvat gur genafyngvba gbby punva n 'fpevcg' vf xvaq bs bssrafvir
+ehaavat clcl-p genafyngr.cl vf n ovg yvxr jngpuvat n guevyyre zbivr, vg pbhyq pbafhzr nyy gur zrzbel ng nal gvzr
+ehaavat clcl-p genafyngr.cl vf n ovg yvxr jngpuvat n guevyyre zbivr, vg pbhyq qvr ng nal gvzr orpnhfr bs gur 32-ovg 4TO yvzvg bs ENZ
+Qh jvefg rora tranh qnf reervpura, jbena xrvare tynhog
+vs fjvgmreynaq jrer jurer terrpr vf (ba vfynaqf) jbhyq gurl nyy or pbaarpgrq ol oevqtrf?
+genafyngvat clcl jvgu pclguba vf fbbbbbb fybj
+ClCl 1.4 eryrnfrq!
+Jr ner abg urebrf, whfg irel cngvrag.
+QBAR zrnaf vg'f qbar
+jul gurer vf ab "ClCl 1.4 eryrnfrq" va gbcvp nal zber?
+fabj! fabj!
+svanyyl, zrephevny zvtengvba vf unccravat!
+Gur zvtengvba gb zrephevny vf pbzcyrgrq! uggc://ovgohpxrg.bet/clcl/clcl
+fabj! fabj! (gre)
+unccl arj lrne
+naq anaanaw gb lbh nf jryy
+Frrvat nf gur ynjf bs culfvpf ner ntnvafg lbh, lbh unir gb pnershyyl pbafvqre lbhe fpbcr fb gung lbhe tbnyf ner ernfbanoyr.
+nf hfhny va clcl, gur fbyhgvba nccrnef pbzcyrgryl qvfcebcbegvbangr gb gur ceboyrz naq vafgrnq jr'yy tb sbe n pbzcyrgryl qvssrerag fvzcyre nccebnpu gb gur bevtvany ceboyrz
+fabj, fabj!
+va clcl lbh ner nyjnlf ng gur jebat yriry, va bar jnl be gur bgure
+jryy, vg'f jebat ohg abg fb "irel jebat" nf vg ybbxrq
+<svwny> V ybir clcl
+ynmvarff vzcngvrapr naq uhoevf
+fabj, fabj
+EClguba: guvatf lbh jbhyqa'g qb va Clguba, naq pna'g qb va P.
+vg vf gur rkcrpgrq orunivbe, rkprcg jura lbh qba'g rkcrpg vg
+erqrsvavat lryybj frrzf yvxr n orggre vqrn
+"gung'f ubjrire whfg ratvarrevat" (svwny)
+"[vg] whfg fubjf ntnva gung eclguba vf bofpher" (psobym)
+"naljnl, clguba vf n infg ynathntr" (svwny)
+bhg-bs-yvr-thneqf
+"gurer ner qnlf ba juvpu lbh ybbx nebhaq naq abguvat fubhyq unir rire jbexrq" (svwny)
+clcl vf n orggre xvaq bs sbbyvfuarff - ynp
+ehaavat grfgf vf rffragvny sbe qrirybcvat clcl -- hu? qvq V oernx gur grfg? (svwny)
+V'ir tbg guvf sybbe jnk gung'f nyfb n TERNG qrffreg gbccvat!!
+rknexha: "gur cneg gung V gubhtug jnf tbvat gb or uneq jnf gevivny, fb abj V whfg unir guvf cneg gung V qvqa'g rira guvax bs gung vf uneq"
+V fhccbfr jr pna yvir jvgu gur bofphevgl, nf ybat nf gurer vf n pbzzrag znxvat vg yvtugre
+V nz n ovt oryvrire va ernfbaf. ohg gur nccnerag xvaq ner zl snibevgr.
+clcl: trg n WVG sbe serr (jryy gur svefg qnl lbh jba'g znantr naq vg jvyy or irel sehfgengvat)
+<Nyrk_Tnlabe> thgjbegu: bu, jr fubhyq znxr gur WVG zntvpnyyl orggre, jvgu qrpbengbef naq fghss
+vg'f n pbzcyrgr unpx, ohg n irel zvavzny bar (nevtngb)
+svefg gurl ynhtu ng lbh, gura gurl vtaber lbh, gura gurl svtug lbh, gura lbh jva
+ClCl vf snzvyl sevraqyl
+jr yvxr pbzcynvagf
+gbqnl jr'er snfgre guna lrfgreqnl (hfhnyyl)
+ClCl naq PClguba: gurl ner zbegny rarzvrf vagrag ba xvyyvat rnpu bgure
+nethnoyl, rirelguvat vf n avpur
+clcl unf ynlref yvxr bavbaf: crryvat gurz onpx jvyy znxr lbh pel
+EClguba zntvpnyyl znxrf lbh evpu naq snzbhf (fnlf fb ba gur gva)
+Vf evtbobg nebhaq jura gur havirefr prnfrf gb rkvfg?
+ClCl vf gbb pbby sbe dhrelfgevatf.
+< nevtngb> gura jung bpphef? < svwny> tbbq fghss V oryvrir
+ClCl 1.6 eryrnfrq!
+<psobym> jurer ner gur grfgf?
+uggc://gjvgcvp.pbz/52nr8s
+N enaqbz dhbgr
+Nyy rkprcgoybpxf frrz fnar.
+N cvax tyvggrel ebgngvat ynzoqn
+"vg'f yvxryl grzcbenel hagvy sberire" nevtb
 """
 
 def some_topic():
diff --git a/lib_pypy/_sha.py b/lib_pypy/_sha.py
--- a/lib_pypy/_sha.py
+++ b/lib_pypy/_sha.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: iso-8859-1
+# -*- coding: iso-8859-1 -*-
 
 # Note that PyPy contains also a built-in module 'sha' which will hide
 # this one if compiled in.
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -231,6 +231,9 @@
 sqlite.sqlite3_result_text.argtypes = [c_void_p, c_char_p, c_int, c_void_p]
 sqlite.sqlite3_result_text.restype = None
 
+sqlite.sqlite3_enable_load_extension.argtypes = [c_void_p, c_int]
+sqlite.sqlite3_enable_load_extension.restype = c_int
+
 ##########################################
 # END Wrapped SQLite C API and constants
 ##########################################
@@ -705,6 +708,14 @@
         from sqlite3.dump import _iterdump
         return _iterdump(self)
 
+    def enable_load_extension(self, enabled):
+        self._check_thread()
+        self._check_closed()
+
+        rc = sqlite.sqlite3_enable_load_extension(self.db, int(enabled))
+        if rc != SQLITE_OK:
+            raise OperationalError("Error enabling load extension")
+
 DML, DQL, DDL = range(3)
 
 class Cursor(object):
diff --git a/lib_pypy/distributed/socklayer.py b/lib_pypy/distributed/socklayer.py
--- a/lib_pypy/distributed/socklayer.py
+++ b/lib_pypy/distributed/socklayer.py
@@ -2,7 +2,7 @@
 import py
 from socket import socket
 
-XXX needs import adaptation as 'green' is removed from py lib for years 
+raise ImportError("XXX needs import adaptation as 'green' is removed from py lib for years")
 from py.impl.green.msgstruct import decodemessage, message
 from socket import socket, AF_INET, SOCK_STREAM
 import marshal
diff --git a/lib_pypy/itertools.py b/lib_pypy/itertools.py
--- a/lib_pypy/itertools.py
+++ b/lib_pypy/itertools.py
@@ -25,7 +25,7 @@
 
 __all__ = ['chain', 'count', 'cycle', 'dropwhile', 'groupby', 'ifilter',
            'ifilterfalse', 'imap', 'islice', 'izip', 'repeat', 'starmap',
-           'takewhile', 'tee']
+           'takewhile', 'tee', 'compress', 'product']
 
 try: from __pypy__ import builtinify
 except ImportError: builtinify = lambda f: f
diff --git a/lib_pypy/pyrepl/commands.py b/lib_pypy/pyrepl/commands.py
--- a/lib_pypy/pyrepl/commands.py
+++ b/lib_pypy/pyrepl/commands.py
@@ -33,10 +33,9 @@
 class Command(object):
     finish = 0
     kills_digit_arg = 1
-    def __init__(self, reader, (event_name, event)):
+    def __init__(self, reader, cmd):
         self.reader = reader
-        self.event = event
-        self.event_name = event_name
+        self.event_name, self.event = cmd
     def do(self):
         pass
 
diff --git a/lib_pypy/pyrepl/pygame_console.py b/lib_pypy/pyrepl/pygame_console.py
--- a/lib_pypy/pyrepl/pygame_console.py
+++ b/lib_pypy/pyrepl/pygame_console.py
@@ -130,7 +130,7 @@
         s.fill(c, [0, 600 - bmargin, 800, bmargin])
         s.fill(c, [800 - rmargin, 0, lmargin, 600])
 
-    def refresh(self, screen, (cx, cy)):
+    def refresh(self, screen, cxy):
         self.screen = screen
         self.pygame_screen.fill(colors.bg,
                                 [0, tmargin + self.cur_top + self.scroll,
@@ -139,8 +139,8 @@
 
         line_top = self.cur_top
         width, height = self.fontsize
-        self.cxy = (cx, cy)
-        cp = self.char_pos(cx, cy)
+        self.cxy = cxy
+        cp = self.char_pos(*cxy)
         if cp[1] < tmargin:
             self.scroll = - (cy*self.fh + self.cur_top)
             self.repaint()
@@ -148,7 +148,7 @@
             self.scroll += (600 - bmargin) - (cp[1] + self.fh)
             self.repaint()
         if self.curs_vis:
-            self.pygame_screen.blit(self.cursor, self.char_pos(cx, cy))
+            self.pygame_screen.blit(self.cursor, self.char_pos(*cxy))
         for line in screen:
             if 0 <= line_top + self.scroll <= (600 - bmargin - tmargin - self.fh):
                 if line:
diff --git a/lib_pypy/pyrepl/readline.py b/lib_pypy/pyrepl/readline.py
--- a/lib_pypy/pyrepl/readline.py
+++ b/lib_pypy/pyrepl/readline.py
@@ -231,7 +231,11 @@
         return ''.join(chars)
 
     def _histline(self, line):
-        return unicode(line.rstrip('\n'), ENCODING)
+        line = line.rstrip('\n')
+        try:
+            return unicode(line, ENCODING)
+        except UnicodeDecodeError:   # bah, silently fall back...
+            return unicode(line, 'utf-8')
 
     def get_history_length(self):
         return self.saved_history_length
@@ -268,7 +272,10 @@
         f = open(os.path.expanduser(filename), 'w')
         for entry in history:
             if isinstance(entry, unicode):
-                entry = entry.encode(ENCODING)
+                try:
+                    entry = entry.encode(ENCODING)
+                except UnicodeEncodeError:   # bah, silently fall back...
+                    entry = entry.encode('utf-8')
             entry = entry.replace('\n', '\r\n')   # multiline history support
             f.write(entry + '\n')
         f.close()
diff --git a/lib_pypy/pyrepl/unix_console.py b/lib_pypy/pyrepl/unix_console.py
--- a/lib_pypy/pyrepl/unix_console.py
+++ b/lib_pypy/pyrepl/unix_console.py
@@ -163,7 +163,7 @@
     def change_encoding(self, encoding):
         self.encoding = encoding
     
-    def refresh(self, screen, (cx, cy)):
+    def refresh(self, screen, cxy):
         # this function is still too long (over 90 lines)
 
         if not self.__gone_tall:
@@ -198,6 +198,7 @@
 
         # we make sure the cursor is on the screen, and that we're
         # using all of the screen if we can
+        cx, cy = cxy
         if cy < offset:
             offset = cy
         elif cy >= offset + height:
@@ -411,7 +412,12 @@
                    e.args[4] == 'unexpected end of data':
                 pass
             else:
-                raise
+                # was: "raise".  But it crashes pyrepl, and by extension the
+                # pypy currently running, in which we are e.g. in the middle
+                # of some debugging session.  Argh.  Instead just print an
+                # error message to stderr and continue running, for now.
+                self.partial_char = ''
+                sys.stderr.write('\n%s: %s\n' % (e.__class__.__name__, e))
         else:
             self.partial_char = ''
             self.event_queue.push(c)
diff --git a/lib_pypy/syslog.py b/lib_pypy/syslog.py
--- a/lib_pypy/syslog.py
+++ b/lib_pypy/syslog.py
@@ -38,9 +38,27 @@
 _setlogmask.argtypes = (c_int,)
 _setlogmask.restype = c_int
 
+_S_log_open = False
+_S_ident_o = None
+
+def _get_argv():
+    try:
+        import sys
+        script = sys.argv[0]
+        if isinstance(script, str):
+            return script[script.rfind('/')+1:] or None
+    except Exception:
+        pass
+    return None
+
 @builtinify
-def openlog(ident, option, facility):
-    _openlog(ident, option, facility)
+def openlog(ident=None, logoption=0, facility=LOG_USER):
+    global _S_ident_o, _S_log_open
+    if ident is None:
+        ident = _get_argv()
+    _S_ident_o = c_char_p(ident)    # keepalive
+    _openlog(_S_ident_o, logoption, facility)
+    _S_log_open = True
 
 @builtinify
 def syslog(arg1, arg2=None):
@@ -48,11 +66,18 @@
         priority, message = arg1, arg2
     else:
         priority, message = LOG_INFO, arg1
+    # if log is not opened, open it now
+    if not _S_log_open:
+        openlog()
     _syslog(priority, "%s", message)
 
 @builtinify
 def closelog():
-    _closelog()
+    global _S_log_open, S_ident_o
+    if _S_log_open:
+        _closelog()
+        _S_log_open = False
+        _S_ident_o = None
 
 @builtinify
 def setlogmask(mask):
diff --git a/py/_code/code.py b/py/_code/code.py
--- a/py/_code/code.py
+++ b/py/_code/code.py
@@ -164,6 +164,7 @@
         #   if something:  # assume this causes a NameError
         #      # _this_ lines and the one
                #        below we don't want from entry.getsource()
+        end = min(end, len(source))
         for i in range(self.lineno, end):
             if source[i].rstrip().endswith(':'):
                 end = i + 1
@@ -307,7 +308,7 @@
                     self._striptext = 'AssertionError: '
         self._excinfo = tup
         self.type, self.value, tb = self._excinfo
-        self.typename = self.type.__name__
+        self.typename = getattr(self.type, "__name__", "???")
         self.traceback = py.code.Traceback(tb)
 
     def __repr__(self):
diff --git a/pypy/annotation/binaryop.py b/pypy/annotation/binaryop.py
--- a/pypy/annotation/binaryop.py
+++ b/pypy/annotation/binaryop.py
@@ -252,7 +252,26 @@
     # unsignedness is considered a rare and contagious disease
 
     def union((int1, int2)):
-        knowntype = rarithmetic.compute_restype(int1.knowntype, int2.knowntype)
+        if int1.unsigned == int2.unsigned:
+            knowntype = rarithmetic.compute_restype(int1.knowntype, int2.knowntype)
+        else:
+            t1 = int1.knowntype
+            if t1 is bool:
+                t1 = int
+            t2 = int2.knowntype
+            if t2 is bool:
+                t2 = int
+
+            if t2 is int:
+                if int2.nonneg == False:
+                    raise UnionError, "Merging %s and a possibly negative int is not allowed" % t1
+                knowntype = t1
+            elif t1 is int:
+                if int1.nonneg == False:
+                    raise UnionError, "Merging %s and a possibly negative int is not allowed" % t2
+                knowntype = t2
+            else:
+                raise UnionError, "Merging these types (%s, %s) is not supported" % (t1, t2)
         return SomeInteger(nonneg=int1.nonneg and int2.nonneg,
                            knowntype=knowntype)
 
diff --git a/pypy/annotation/model.py b/pypy/annotation/model.py
--- a/pypy/annotation/model.py
+++ b/pypy/annotation/model.py
@@ -591,13 +591,11 @@
     immutable = True
     def __init__(self, method):
         self.method = method
-        
-NUMBER = object()
+
 annotation_to_ll_map = [
     (SomeSingleFloat(), lltype.SingleFloat),
     (s_None, lltype.Void),   # also matches SomeImpossibleValue()
     (s_Bool, lltype.Bool),
-    (SomeInteger(knowntype=r_ulonglong), NUMBER),    
     (SomeFloat(), lltype.Float),
     (SomeLongFloat(), lltype.LongFloat),
     (SomeChar(), lltype.Char),
@@ -623,10 +621,11 @@
             return lltype.Ptr(p.PARENTTYPE)
     if isinstance(s_val, SomePtr):
         return s_val.ll_ptrtype
+    if type(s_val) is SomeInteger:
+        return lltype.build_number(None, s_val.knowntype)
+
     for witness, T in annotation_to_ll_map:
         if witness.contains(s_val):
-            if T is NUMBER:
-                return lltype.build_number(None, s_val.knowntype)
             return T
     if info is None:
         info = ''
@@ -635,7 +634,7 @@
     raise ValueError("%sshould return a low-level type,\ngot instead %r" % (
         info, s_val))
 
-ll_to_annotation_map = dict([(ll, ann) for ann, ll in annotation_to_ll_map if ll is not NUMBER])
+ll_to_annotation_map = dict([(ll, ann) for ann, ll in annotation_to_ll_map])
 
 def lltype_to_annotation(T):
     try:
diff --git a/pypy/annotation/specialize.py b/pypy/annotation/specialize.py
--- a/pypy/annotation/specialize.py
+++ b/pypy/annotation/specialize.py
@@ -36,9 +36,7 @@
             newtup = SpaceOperation('newtuple', starargs, argscopy[-1])
             newstartblock.operations.append(newtup)
             newstartblock.closeblock(Link(argscopy, graph.startblock))
-            graph.startblock.isstartblock = False
             graph.startblock = newstartblock
-            newstartblock.isstartblock = True
             argnames = argnames + ['.star%d' % i for i in range(nb_extra_args)]
             graph.signature = Signature(argnames)
             # note that we can mostly ignore defaults: if nb_extra_args > 0, 
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -856,6 +856,46 @@
         py.test.raises(Exception, a.build_types, f, [])
         # if you want to get a r_uint, you have to be explicit about it
 
+    def test_add_different_ints(self):
+        def f(a, b):
+            return a + b
+        a = self.RPythonAnnotator()
+        py.test.raises(Exception, a.build_types, f, [r_uint, int])
+
+    def test_merge_different_ints(self):
+        def f(a, b):
+            if a:
+                c = a
+            else:
+                c = b
+            return c
+        a = self.RPythonAnnotator()
+        py.test.raises(Exception, a.build_types, f, [r_uint, int])
+
+    def test_merge_ruint_zero(self):
+        def f(a):
+            if a:
+                c = a
+            else:
+                c = 0
+            return c
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [r_uint])
+        assert s == annmodel.SomeInteger(nonneg = True, unsigned = True)
+
+    def test_merge_ruint_nonneg_signed(self):
+        def f(a, b):
+            if a:
+                c = a
+            else:
+                assert b >= 0
+                c = b
+            return c
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [r_uint, int])
+        assert s == annmodel.SomeInteger(nonneg = True, unsigned = True)
+
+
     def test_prebuilt_long_that_is_not_too_long(self):
         small_constant = 12L
         def f():
@@ -3029,7 +3069,7 @@
             if g(x, y):
                 g(x, r_uint(y))
         a = self.RPythonAnnotator()
-        a.build_types(f, [int, int])
+        py.test.raises(Exception, a.build_types, f, [int, int])
 
     def test_compare_with_zero(self):
         def g():
diff --git a/pypy/bin/checkmodule.py b/pypy/bin/checkmodule.py
--- a/pypy/bin/checkmodule.py
+++ b/pypy/bin/checkmodule.py
@@ -1,43 +1,45 @@
 #! /usr/bin/env python
 """
-Usage:  checkmodule.py [-b backend] <module-name>
+Usage:  checkmodule.py <module-name>
 
-Compiles the PyPy extension module from pypy/module/<module-name>/
-into a fake program which does nothing. Useful for testing whether a
-modules compiles without doing a full translation. Default backend is cli.
-
-WARNING: this is still incomplete: there are chances that the
-compilation fails with strange errors not due to the module. If a
-module is known to compile during a translation but don't pass
-checkmodule.py, please report the bug (or, better, correct it :-).
+Check annotation and rtyping of the PyPy extension module from
+pypy/module/<module-name>/.  Useful for testing whether a
+modules compiles without doing a full translation.
 """
 import autopath
-import sys
+import sys, os
 
 from pypy.objspace.fake.checkmodule import checkmodule
 
 def main(argv):
-    try:
-        assert len(argv) in (2, 4)
-        if len(argv) == 2:
-            backend = 'cli'
-            modname = argv[1]
-            if modname in ('-h', '--help'):
-                print >> sys.stderr, __doc__
-                sys.exit(0)
-            if modname.startswith('-'):
-                print >> sys.stderr, "Bad command line"
-                print >> sys.stderr, __doc__
-                sys.exit(1)
-        else:
-            _, b, backend, modname = argv
-            assert b == '-b'
-    except AssertionError:
+    if len(argv) != 2:
         print >> sys.stderr, __doc__
         sys.exit(2)
+    modname = argv[1]
+    if modname in ('-h', '--help'):
+        print >> sys.stderr, __doc__
+        sys.exit(0)
+    if modname.startswith('-'):
+        print >> sys.stderr, "Bad command line"
+        print >> sys.stderr, __doc__
+        sys.exit(1)
+    if os.path.sep in modname:
+        if os.path.basename(modname) == '':
+            modname = os.path.dirname(modname)
+        if os.path.basename(os.path.dirname(modname)) != 'module':
+            print >> sys.stderr, "Must give '../module/xxx', or just 'xxx'."
+            sys.exit(1)
+        modname = os.path.basename(modname)
+    try:
+        checkmodule(modname)
+    except Exception, e:
+        import traceback, pdb
+        traceback.print_exc()
+        pdb.post_mortem(sys.exc_info()[2])
+        return 1
     else:
-        checkmodule(modname, backend, interactive=True)
-        print 'Module compiled succesfully'
+        print 'Passed.'
+        return 0
 
 if __name__ == '__main__':
-    main(sys.argv)
+    sys.exit(main(sys.argv))
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -92,7 +92,7 @@
 
 module_import_dependencies = {
     # no _rawffi if importing pypy.rlib.clibffi raises ImportError
-    # or CompilationError
+    # or CompilationError or py.test.skip.Exception
     "_rawffi"   : ["pypy.rlib.clibffi"],
     "_ffi"      : ["pypy.rlib.clibffi"],
 
@@ -113,7 +113,7 @@
             try:
                 for name in modlist:
                     __import__(name)
-            except (ImportError, CompilationError), e:
+            except (ImportError, CompilationError, py.test.skip.Exception), e:
                 errcls = e.__class__.__name__
                 config.add_warning(
                     "The module %r is disabled\n" % (modname,) +
@@ -281,6 +281,9 @@
                    "actually create the full list until the resulting "
                    "list is mutated",
                    default=False),
+        BoolOption("withliststrategies",
+                   "enable optimized ways to store lists of primitives ",
+                   default=True),
 
         BoolOption("withtypeversion",
                    "version type objects when changing them",
diff --git a/pypy/config/test/test_translationoption.py b/pypy/config/test/test_translationoption.py
new file mode 100644
--- /dev/null
+++ b/pypy/config/test/test_translationoption.py
@@ -0,0 +1,10 @@
+import py
+from pypy.config.translationoption import get_combined_translation_config
+from pypy.config.translationoption import set_opt_level
+from pypy.config.config import ConflictConfigError
+
+
+def test_no_gcrootfinder_with_boehm():
+    config = get_combined_translation_config()
+    config.translation.gcrootfinder = "shadowstack"
+    py.test.raises(ConflictConfigError, set_opt_level, config, '0')
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -69,8 +69,8 @@
                      "statistics": [("translation.gctransformer", "framework")],
                      "generation": [("translation.gctransformer", "framework")],
                      "hybrid": [("translation.gctransformer", "framework")],
-                     "boehm": [("translation.gctransformer", "boehm"),
-                               ("translation.continuation", False)],  # breaks
+                     "boehm": [("translation.continuation", False),  # breaks
+                               ("translation.gctransformer", "boehm")],
                      "markcompact": [("translation.gctransformer", "framework")],
                      "minimark": [("translation.gctransformer", "framework")],
                      },
@@ -398,6 +398,10 @@
     # make_sure_not_resized often relies on it, so we always enable them
     config.translation.suggest(list_comprehension_operations=True)
 
+    # finally, make the choice of the gc definitive.  This will fail
+    # if we have specified strange inconsistent settings.
+    config.translation.gc = config.translation.gc
+
 # ----------------------------------------------------------------
 
 def set_platform(config):
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -270,7 +270,12 @@
   - *slicing*:
     the slice start must be within bounds. The stop doesn't need to, but it must
     not be smaller than the start.  All negative indexes are disallowed, except for
-    the [:-1] special case.  No step.
+    the [:-1] special case.  No step.  Slice deletion follows the same rules.
+    
+  - *slice assignment*:
+    only supports ``lst[x:y] = sublist``, if ``len(sublist) == y - x``.
+    In other words, slice assignment cannot change the total length of the list,
+    but just replace items.
 
   - *other operators*:
     ``+``, ``+=``, ``in``, ``*``, ``*=``, ``==``, ``!=`` work as expected.
diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py
--- a/pypy/doc/conf.py
+++ b/pypy/doc/conf.py
@@ -45,9 +45,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.6'
+version = '1.7'
 # The full version, including alpha/beta/rc tags.
-release = '1.6'
+release = '1.7'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/pypy/doc/config/objspace.std.withliststrategies.txt b/pypy/doc/config/objspace.std.withliststrategies.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.std.withliststrategies.txt
@@ -0,0 +1,2 @@
+Enable list strategies: Use specialized representations for lists of primitive
+objects, such as ints.
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -262,6 +262,26 @@
 documented as such (as e.g. for hasattr()), in most cases PyPy
 lets the exception propagate instead.
 
+Object Identity of Primitive Values, ``is`` and ``id``
+-------------------------------------------------------
+
+Object identity of primitive values works by value equality, not by identity of
+the wrapper. This means that ``x + 1 is x + 1`` is always true, for arbitrary
+integers ``x``. The rule applies for the following types:
+
+ - ``int``
+
+ - ``float``
+
+ - ``long``
+
+ - ``complex``
+
+This change requires some changes to ``id`` as well. ``id`` fulfills the
+following condition: ``x is y <=> id(x) == id(y)``. Therefore ``id`` of the
+above types will return a value that is computed from the argument, and can
+thus be larger than ``sys.maxint`` (i.e. it can be an arbitrary long).
+
 
 Miscellaneous
 -------------
@@ -284,14 +304,14 @@
   never a dictionary as it sometimes is in CPython. Assigning to
   ``__builtins__`` has no effect.
 
-* Do not compare immutable objects with ``is``.  For example on CPython
-  it is true that ``x is 0`` works, i.e. does the same as ``type(x) is
-  int and x == 0``, but it is so by accident.  If you do instead
-  ``x is 1000``, then it stops working, because 1000 is too large and
-  doesn't come from the internal cache.  In PyPy it fails to work in
-  both cases, because we have no need for a cache at all.
+* directly calling the internal magic methods of a few built-in types
+  with invalid arguments may have a slightly different result.  For
+  example, ``[].__add__(None)`` and ``(2).__add__(None)`` both return
+  ``NotImplemented`` on PyPy; on CPython, only the later does, and the
+  former raises ``TypeError``.  (Of course, ``[]+None`` and ``2+None``
+  both raise ``TypeError`` everywhere.)  This difference is an
+  implementation detail that shows up because of internal C-level slots
+  that PyPy does not have.
 
-* Also, object identity of immutable keys in dictionaries is not necessarily
-  preserved.
 
 .. include:: _ref.txt
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -1,6 +1,3 @@
-.. include:: needswork.txt
-
-.. needs work, it talks about svn. also, it is not really user documentation
 
 Making a PyPy Release
 =======================
@@ -12,11 +9,8 @@
 forgetting things. A set of todo files may also work.
 
 Check and prioritize all issues for the release, postpone some if necessary,
-create new  issues also as necessary. A meeting (or meetings) should be
-organized to decide what things are priorities, should go in and work for
-the release. 
-
-An important thing is to get the documentation into an up-to-date state!
+create new  issues also as necessary. An important thing is to get
+the documentation into an up-to-date state!
 
 Release Steps
 ----------------
diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -15,7 +15,7 @@
 
 * `FAQ`_: some frequently asked questions.
 
-* `Release 1.6`_: the latest official release
+* `Release 1.7`_: the latest official release
 
 * `PyPy Blog`_: news and status info about PyPy 
 
@@ -75,7 +75,7 @@
 .. _`Getting Started`: getting-started.html
 .. _`Papers`: extradoc.html
 .. _`Videos`: video-index.html
-.. _`Release 1.6`: http://pypy.org/download.html
+.. _`Release 1.7`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
 .. _`potential project ideas`: project-ideas.html
@@ -120,9 +120,9 @@
 Windows, on top of .NET, and on top of Java.
 To dig into PyPy it is recommended to try out the current
 Mercurial default branch, which is always working or mostly working,
-instead of the latest release, which is `1.6`__.
+instead of the latest release, which is `1.7`__.
 
-.. __: release-1.6.0.html
+.. __: release-1.7.0.html
 
 PyPy is mainly developed on Linux and Mac OS X.  Windows is supported,
 but platform-specific bugs tend to take longer before we notice and fix
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -17,17 +17,26 @@
 projects, or anything else in PyPy, pop up on IRC or write to us on the
 `mailing list`_.
 
+Make big integers faster
+-------------------------
+
+PyPy's implementation of the Python ``long`` type is slower than CPython's.
+Find out why and optimize them.
+
+Make bytearray type fast
+------------------------
+
+PyPy's bytearray type is very inefficient. It would be an interesting
+task to look into possible optimizations on this.
+
 Numpy improvements
 ------------------
 
-This is more of a project-container than a single project. Possible ideas:
+The numpy is rapidly progressing in pypy, so feel free to come to IRC and
+ask for proposed topic. A not necesarilly up-to-date `list of topics`_
+is also available.
 
-* experiment with auto-vectorization using SSE or implement vectorization
-  without automatically detecting it for array operations.
-
-* improve numpy, for example implement memory views.
-
-* interface with fortran/C libraries.
+.. _`list of topics`: https://bitbucket.org/pypy/extradoc/src/extradoc/planning/micronumpy.txt
 
 Improving the jitviewer
 ------------------------
diff --git a/pypy/doc/release-1.7.0.rst b/pypy/doc/release-1.7.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-1.7.0.rst
@@ -0,0 +1,94 @@
+==================================
+PyPy 1.7 - widening the sweet spot
+==================================
+
+We're pleased to announce the 1.7 release of PyPy. As became a habit, this
+release brings a lot of bugfixes and performance improvements over the 1.6
+release. However, unlike the previous releases, the focus has been on widening
+the "sweet spot" of PyPy. That is, classes of Python code that PyPy can greatly
+speed up should be vastly improved with this release. You can download the 1.7
+release here:
+
+    http://pypy.org/download.html
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7. It's fast (`pypy 1.7 and cpython 2.7.1`_ performance comparison)
+due to its integrated tracing JIT compiler.
+
+This release supports x86 machines running Linux 32/64, Mac OS X 32/64 or
+Windows 32. Windows 64 work is ongoing, but not yet natively supported.
+
+The main topic of this release is widening the range of code which PyPy
+can greatly speed up. On average on
+our benchmark suite, PyPy 1.7 is around **30%** faster than PyPy 1.6 and up
+to **20 times** faster on some benchmarks.
+
+.. _`pypy 1.7 and cpython 2.7.1`: http://speed.pypy.org
+
+
+Highlights
+==========
+
+* Numerous performance improvements. There are too many examples which python
+  constructs now should behave faster to list them.
+
+* Bugfixes and compatibility fixes with CPython.
+
+* Windows fixes.
+
+* PyPy now comes with stackless features enabled by default. However,
+  any loop using stackless features will interrupt the JIT for now, so no real
+  performance improvement for stackless-based programs. Contact pypy-dev for
+  info how to help on removing this restriction.
+
+* NumPy effort in PyPy was renamed numpypy. In order to try using it, simply
+  write::
+
+    import numpypy as numpy
+
+  at the beginning of your program. There is a huge progress on numpy in PyPy
+  since 1.6, the main feature being implementation of dtypes.
+
+* JSON encoder (but not decoder) has been replaced with a new one. This one
+  is written in pure Python, but is known to outperform CPython's C extension
+  up to **2 times** in some cases. It's about **20 times** faster than
+  the one that we had in 1.6.
+
+* The memory footprint of some of our RPython modules has been drastically
+  improved. This should impact any applications using for example cryptography,
+  like tornado.
+
+* There was some progress in exposing even more CPython C API via cpyext.
+
+Things that didn't make it, expect in 1.8 soon
+==============================================
+
+There is an ongoing work, which while didn't make it to the release, is
+probably worth mentioning here. This is what you should probably expect in
+1.8 some time soon:
+
+* Specialized list implementation. There is a branch that implements lists of
+  integers/floats/strings as compactly as array.array. This should drastically
+  improve performance/memory impact of some applications
+
+* NumPy effort is progressing forward, with multi-dimensional arrays coming
+  soon.
+
+* There are two brand new JIT assembler backends, notably for the PowerPC and
+  ARM processors.
+
+Fundraising
+===========
+
+It's maybe worth mentioning that we're running fundraising campaigns for
+NumPy effort in PyPy and for Python 3 in PyPy. In case you want to see any
+of those happen faster, we urge you to donate to `numpy proposal`_ or
+`py3k proposal`_. In case you want PyPy to progress, but you trust us with
+the general direction, you can always donate to the `general pot`_.
+
+.. _`numpy proposal`: http://pypy.org/numpydonate.html
+.. _`py3k proposal`: http://pypy.org/py3donate.html
+.. _`general pot`: http://pypy.org
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -1,6 +1,5 @@
 """codegen helpers and AST constant folding."""
 import sys
-import itertools
 
 from pypy.interpreter.astcompiler import ast, consts, misc
 from pypy.tool import stdlib_opcode as ops
@@ -146,8 +145,7 @@
 }
 unrolling_unary_folders = unrolling_iterable(unary_folders.items())
 
-for folder in itertools.chain(binary_folders.itervalues(),
-                              unary_folders.itervalues()):
+for folder in binary_folders.values() + unary_folders.values():
     folder._always_inline_ = True
 del folder
 
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1,4 +1,3 @@
-import itertools
 import pypy
 from pypy.interpreter.executioncontext import ExecutionContext, ActionFlag
 from pypy.interpreter.executioncontext import UserDelAction, FrameTraceAction
@@ -188,6 +187,12 @@
 
     # -------------------------------------------------------------------
 
+    def is_w(self, space, w_other):
+        return self is w_other
+
+    def unique_id(self, space):
+        return space.wrap(compute_unique_id(self))
+
     def str_w(self, space):
         w_msg = typed_unwrap_error_msg(space, "string", self)
         raise OperationError(space.w_TypeError, w_msg)
@@ -482,6 +487,16 @@
         'parser', 'fcntl', '_codecs', 'binascii'
     ]
 
+    # These modules are treated like CPython treats built-in modules,
+    # i.e. they always shadow any xx.py.  The other modules are treated
+    # like CPython treats extension modules, and are loaded in sys.path
+    # order by the fake entry '.../lib_pypy/__extensions__'.
+    MODULES_THAT_ALWAYS_SHADOW = dict.fromkeys([
+        '__builtin__', '__pypy__', '_ast', '_codecs', '_sre', '_warnings',
+        '_weakref', 'errno', 'exceptions', 'gc', 'imp', 'marshal',
+        'posix', 'nt', 'pwd', 'signal', 'sys', 'thread', 'zipimport',
+    ], None)
+
     def make_builtins(self):
         "NOT_RPYTHON: only for initializing the space."
 
@@ -513,8 +528,8 @@
         exception_types_w = self.export_builtin_exceptions()
 
         # initialize with "bootstrap types" from objspace  (e.g. w_None)
-        types_w = itertools.chain(self.get_builtin_types().iteritems(),
-                                  exception_types_w.iteritems())
+        types_w = (self.get_builtin_types().items() +
+                   exception_types_w.items())
         for name, w_type in types_w:
             self.setitem(self.builtin.w_dict, self.wrap(name), w_type)
 
@@ -681,9 +696,17 @@
         """shortcut for space.is_true(space.eq(w_obj1, w_obj2))"""
         return self.is_w(w_obj1, w_obj2) or self.is_true(self.eq(w_obj1, w_obj2))
 
-    def is_w(self, w_obj1, w_obj2):
-        """shortcut for space.is_true(space.is_(w_obj1, w_obj2))"""
-        return self.is_true(self.is_(w_obj1, w_obj2))
+    def is_(self, w_one, w_two):
+        return self.newbool(self.is_w(w_one, w_two))
+
+    def is_w(self, w_one, w_two):
+        # done by a method call on w_two (and not on w_one, because of the
+        # expected programming style where we say "if x is None" or
+        # "if x is object").
+        return w_two.is_w(self, w_one)
+
+    def id(self, w_obj):
+        return w_obj.unique_id(self)
 
     def hash_w(self, w_obj):
         """shortcut for space.int_w(space.hash(w_obj))"""
@@ -777,22 +800,63 @@
         """Unpack an iterable object into a real (interpreter-level) list.
         Raise an OperationError(w_ValueError) if the length is wrong."""
         w_iterator = self.iter(w_iterable)
-        # If we know the expected length we can preallocate.
         if expected_length == -1:
+            # xxx special hack for speed
+            from pypy.interpreter.generator import GeneratorIterator
+            if isinstance(w_iterator, GeneratorIterator):
+                lst_w = []
+                w_iterator.unpack_into(lst_w)
+                return lst_w
+            # /xxx
+            return self._unpackiterable_unknown_length(w_iterator, w_iterable)
+        else:
+            lst_w = self._unpackiterable_known_length(w_iterator,
+                                                      expected_length)
+            return lst_w[:]     # make the resulting list resizable
+
+    @jit.dont_look_inside
+    def _unpackiterable_unknown_length(self, w_iterator, w_iterable):
+        # Unpack a variable-size list of unknown length.
+        # The JIT does not look inside this function because it
+        # contains a loop (made explicit with the decorator above).
+        #
+        # If we can guess the expected length we can preallocate.
+        try:
+            lgt_estimate = self.len_w(w_iterable)
+        except OperationError, o:
+            if (not o.match(self, self.w_AttributeError) and
+                not o.match(self, self.w_TypeError)):
+                raise
+            items = []
+        else:
             try:
-                lgt_estimate = self.len_w(w_iterable)
-            except OperationError, o:
-                if (not o.match(self, self.w_AttributeError) and
-                    not o.match(self, self.w_TypeError)):
+                items = newlist(lgt_estimate)
+            except MemoryError:
+                items = [] # it might have lied
+        #
+        while True:
+            try:
+                w_item = self.next(w_iterator)
+            except OperationError, e:
+                if not e.match(self, self.w_StopIteration):
                     raise
-                items = []
-            else:
-                try:
-                    items = newlist(lgt_estimate)
-                except MemoryError:
-                    items = [] # it might have lied
-        else:
-            items = [None] * expected_length
+                break  # done
+            items.append(w_item)
+        #
+        return items
+
+    @jit.dont_look_inside
+    def _unpackiterable_known_length(self, w_iterator, expected_length):
+        # Unpack a known length list, without letting the JIT look inside.
+        # Implemented by just calling the @jit.unroll_safe version, but
+        # the JIT stopped looking inside already.
+        return self._unpackiterable_known_length_jitlook(w_iterator,
+                                                         expected_length)
+
+    @jit.unroll_safe
+    def _unpackiterable_known_length_jitlook(self, w_iterator,
+                                             expected_length):
+        items = [None] * expected_length
         idx = 0
         while True:
             try:
@@ -801,26 +865,29 @@
                 if not e.match(self, self.w_StopIteration):
                     raise
                 break  # done
-            if expected_length != -1 and idx == expected_length:
+            if idx == expected_length:
                 raise OperationError(self.w_ValueError,
-                                     self.wrap("too many values to unpack"))
-            if expected_length == -1:
-                items.append(w_item)
-            else:
-                items[idx] = w_item
+                                    self.wrap("too many values to unpack"))
+            items[idx] = w_item
             idx += 1
-        if expected_length != -1 and idx < expected_length:
+        if idx < expected_length:
             if idx == 1:
                 plural = ""
             else:
                 plural = "s"
-            raise OperationError(self.w_ValueError,
-                      self.wrap("need more than %d value%s to unpack" %
-                                (idx, plural)))
+            raise operationerrfmt(self.w_ValueError,
+                                  "need more than %d value%s to unpack",
+                                  idx, plural)
         return items
 
-    unpackiterable_unroll = jit.unroll_safe(func_with_new_name(unpackiterable,
-                                            'unpackiterable_unroll'))
+    def unpackiterable_unroll(self, w_iterable, expected_length):
+        # Like unpackiterable(), but for the cases where we have
+        # an expected_length and want to unroll when JITted.
+        # Returns a fixed-size list.
+        w_iterator = self.iter(w_iterable)
+        assert expected_length != -1
+        return self._unpackiterable_known_length_jitlook(w_iterator,
+                                                         expected_length)
 
     def fixedview(self, w_iterable, expected_length=-1):
         """ A fixed list view of w_iterable. Don't modify the result
@@ -835,6 +902,16 @@
         """
         return self.unpackiterable(w_iterable, expected_length)
 
+    def listview_str(self, w_list):
+        """ Return a list of unwrapped strings out of a list of strings. If the
+        argument is not a list or does not contain only strings, return None.
+        May return None anyway.
+        """
+        return None
+
+    def newlist_str(self, list_s):
+        return self.newlist([self.wrap(s) for s in list_s])
+
     @jit.unroll_safe
     def exception_match(self, w_exc_type, w_check_class):
         """Checks if the given exception type matches 'w_check_class'."""
@@ -969,9 +1046,6 @@
     def isinstance_w(self, w_obj, w_type):
         return self.is_true(self.isinstance(w_obj, w_type))
 
-    def id(self, w_obj):
-        return self.wrap(compute_unique_id(w_obj))
-
     # The code below only works
     # for the simple case (new-style instance).
     # These methods are patched with the full logic by the __builtin__
@@ -1543,6 +1617,8 @@
     'UnicodeError',
     'ValueError',
     'ZeroDivisionError',
+    'UnicodeEncodeError',
+    'UnicodeDecodeError',
     ]
 
 ## Irregular part of the interface:
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -1,14 +1,15 @@
+from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError
-from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.gateway import NoneNotWrapped
+from pypy.interpreter.pyopcode import LoopBlock
 from pypy.rlib import jit
-from pypy.interpreter.pyopcode import LoopBlock
+from pypy.rlib.objectmodel import specialize
 
 
 class GeneratorIterator(Wrappable):
     "An iterator created by a generator."
     _immutable_fields_ = ['pycode']
-    
+
     def __init__(self, frame):
         self.space = frame.space
         self.frame = frame     # turned into None when frame_finished_execution
@@ -81,7 +82,7 @@
             # if the frame is now marked as finished, it was RETURNed from
             if frame.frame_finished_execution:
                 self.frame = None
-                raise OperationError(space.w_StopIteration, space.w_None) 
+                raise OperationError(space.w_StopIteration, space.w_None)
             else:
                 return w_result     # YIELDed
         finally:
@@ -97,21 +98,21 @@
     def throw(self, w_type, w_val, w_tb):
         from pypy.interpreter.pytraceback import check_traceback
         space = self.space
-        
+
         msg = "throw() third argument must be a traceback object"
         if space.is_w(w_tb, space.w_None):
             tb = None
         else:
             tb = check_traceback(space, w_tb, msg)
-       
+
         operr = OperationError(w_type, w_val, tb)
         operr.normalize_exception(space)
         return self.send_ex(space.w_None, operr)
-             
+
     def descr_next(self):
         """x.next() -> the next value, or raise StopIteration"""
         return self.send_ex(self.space.w_None)
- 
+
     def descr_close(self):
         """x.close(arg) -> raise GeneratorExit inside generator."""
         assert isinstance(self, GeneratorIterator)
@@ -124,7 +125,7 @@
                     e.match(space, space.w_GeneratorExit):
                 return space.w_None
             raise
-        
+
         if w_retval is not None:
             msg = "generator ignored GeneratorExit"
             raise OperationError(space.w_RuntimeError, space.wrap(msg))
@@ -155,3 +156,44 @@
                                                  "interrupting generator of ")
                     break
                 block = block.previous
+
+    # Results can be either an RPython list of W_Root, or it can be an
+    # app-level W_ListObject, which also has an append() method, that's why we
+    # generate 2 versions of the function and 2 jit drivers.
+    def _create_unpack_into():
+        jitdriver = jit.JitDriver(greens=['pycode'],
+                                  reds=['self', 'frame', 'results'])
+        def unpack_into(self, results):
+            """This is a hack for performance: runs the generator and collects
+            all produced items in a list."""
+            # XXX copied and simplified version of send_ex()
+            space = self.space
+            if self.running:
+                raise OperationError(space.w_ValueError,
+                                     space.wrap('generator already executing'))
+            frame = self.frame
+            if frame is None:    # already finished
+                return
+            self.running = True
+            try:
+                pycode = self.pycode
+                while True:
+                    jitdriver.jit_merge_point(self=self, frame=frame,
+                                              results=results, pycode=pycode)
+                    try:
+                        w_result = frame.execute_frame(space.w_None)
+                    except OperationError, e:
+                        if not e.match(space, space.w_StopIteration):
+                            raise
+                        break
+                    # if the frame is now marked as finished, it was RETURNed from
+                    if frame.frame_finished_execution:
+                        break
+                    results.append(w_result)     # YIELDed
+            finally:
+                frame.f_backref = jit.vref_None
+                self.running = False
+                self.frame = None
+        return unpack_into
+    unpack_into = _create_unpack_into()
+    unpack_into_w = _create_unpack_into()
\ No newline at end of file
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -10,7 +10,7 @@
 from pypy.rlib.objectmodel import we_are_translated, instantiate
 from pypy.rlib.jit import hint
 from pypy.rlib.debug import make_sure_not_resized, check_nonneg
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, r_uint
 from pypy.rlib import jit
 from pypy.tool import stdlib_opcode
 from pypy.tool.stdlib_opcode import host_bytecode_spec
@@ -167,7 +167,7 @@
                 # Execution starts just after the last_instr.  Initially,
                 # last_instr is -1.  After a generator suspends it points to
                 # the YIELD_VALUE instruction.
-                next_instr = self.last_instr + 1
+                next_instr = r_uint(self.last_instr + 1)
                 if next_instr != 0:
                     self.pushvalue(w_inputvalue)
             #
@@ -691,6 +691,7 @@
     handlerposition = space.int_w(w_handlerposition)
     valuestackdepth = space.int_w(w_valuestackdepth)
     assert valuestackdepth >= 0
+    assert handlerposition >= 0
     blk = instantiate(get_block_class(opname))
     blk.handlerposition = handlerposition
     blk.valuestackdepth = valuestackdepth
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -837,6 +837,7 @@
         raise Yield
 
     def jump_absolute(self, jumpto, next_instr, ec):
+        check_nonneg(jumpto)
         return jumpto
 
     def JUMP_FORWARD(self, jumpby, next_instr):
@@ -1278,7 +1279,7 @@
 
     def handle(self, frame, unroller):
         next_instr = self.really_handle(frame, unroller)   # JIT hack
-        return next_instr
+        return r_uint(next_instr)
 
     def really_handle(self, frame, unroller):
         """ Purely abstract method
diff --git a/pypy/interpreter/test/test_executioncontext.py b/pypy/interpreter/test/test_executioncontext.py
--- a/pypy/interpreter/test/test_executioncontext.py
+++ b/pypy/interpreter/test/test_executioncontext.py
@@ -292,7 +292,7 @@
         import os, sys
         print sys.executable, self.tmpfile
         if sys.platform == "win32":
-            cmdformat = '""%s" "%s""'    # excellent! tons of "!
+            cmdformat = '"%s" "%s"'
         else:
             cmdformat = "'%s' '%s'"
         g = os.popen(cmdformat % (sys.executable, self.tmpfile), 'r')
diff --git a/pypy/interpreter/test/test_function.py b/pypy/interpreter/test/test_function.py
--- a/pypy/interpreter/test/test_function.py
+++ b/pypy/interpreter/test/test_function.py
@@ -587,7 +587,7 @@
         assert isinstance(meth2, Method)
         assert meth2.call_args(args) == obj1
         # Check method returned from unbound_method.__get__()
-        w_meth3 = descr_function_get(space, func, None, space.type(obj2))
+        w_meth3 = descr_function_get(space, func, space.w_None, space.type(obj2))
         meth3 = space.unwrap(w_meth3)
         w_meth4 = meth3.descr_method_get(obj2, space.w_None)
         meth4 = space.unwrap(w_meth4)
diff --git a/pypy/interpreter/test/test_generator.py b/pypy/interpreter/test/test_generator.py
--- a/pypy/interpreter/test/test_generator.py
+++ b/pypy/interpreter/test/test_generator.py
@@ -117,7 +117,7 @@
         g = f()
         raises(NameError, g.throw, NameError, "Error", None)
 
-    
+
     def test_throw_fail(self):
         def f():
             yield 1
@@ -129,7 +129,7 @@
             yield 1
         g = f()
         raises(TypeError, g.throw, list())
- 
+
     def test_throw_fail3(self):
         def f():
             yield 1
@@ -188,7 +188,7 @@
         g = f()
         g.next()
         raises(NameError, g.close)
- 
+
     def test_close_fail(self):
         def f():
             try:
@@ -267,3 +267,15 @@
         assert r.startswith("<generator object myFunc at 0x")
         assert list(g) == [1]
         assert repr(g) == r
+
+    def test_unpackiterable_gen(self):
+        g = (i*i for i in range(-5, 3))
+        assert set(g) == set([0, 1, 4, 9, 16, 25])
+        assert set(g) == set()
+        assert set(i for i in range(0)) == set()
+
+    def test_explicit_stop_iteration_unpackiterable(self):
+        def f():
+            yield 1
+            raise StopIteration
+        assert tuple(f()) == (1,)
\ No newline at end of file
diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py
--- a/pypy/interpreter/test/test_objspace.py
+++ b/pypy/interpreter/test/test_objspace.py
@@ -63,10 +63,13 @@
     def test_unpackiterable(self):
         space = self.space
         w = space.wrap
-        l = [w(1), w(2), w(3), w(4)]
+        l = [space.newlist([]) for l in range(4)]
         w_l = space.newlist(l)
-        assert space.unpackiterable(w_l) == l
-        assert space.unpackiterable(w_l, 4) == l
+        l1 = space.unpackiterable(w_l)
+        l2 = space.unpackiterable(w_l, 4)
+        for i in range(4):
+            assert space.is_w(l1[i], l[i])
+            assert space.is_w(l2[i], l[i])
         err = raises(OperationError, space.unpackiterable, w_l, 3)
         assert err.value.match(space, space.w_ValueError)
         err = raises(OperationError, space.unpackiterable, w_l, 5)
diff --git a/pypy/interpreter/test/test_typedef.py b/pypy/interpreter/test/test_typedef.py
--- a/pypy/interpreter/test/test_typedef.py
+++ b/pypy/interpreter/test/test_typedef.py
@@ -2,7 +2,7 @@
 from pypy.interpreter import typedef
 from pypy.tool.udir import udir
 from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.gateway import ObjSpace
+from pypy.interpreter.gateway import ObjSpace, interp2app
 
 # this test isn't so much to test that the objspace interface *works*
 # -- it's more to test that it's *there*
@@ -260,6 +260,50 @@
         gc.collect(); gc.collect()
         assert space.unwrap(w_seen) == [6, 2]
 
+    def test_multiple_inheritance(self):
+        class W_A(Wrappable):
+            a = 1
+            b = 2
+        class W_C(W_A):
+            b = 3
+        W_A.typedef = typedef.TypeDef("A",
+            a = typedef.interp_attrproperty("a", cls=W_A),
+            b = typedef.interp_attrproperty("b", cls=W_A),
+        )
+        class W_B(Wrappable):
+            pass
+        def standalone_method(space, w_obj):
+            if isinstance(w_obj, W_A):
+                return space.w_True
+            else:
+                return space.w_False
+        W_B.typedef = typedef.TypeDef("B",
+            c = interp2app(standalone_method)
+        )
+        W_C.typedef = typedef.TypeDef("C", (W_A.typedef, W_B.typedef,))
+
+        w_o1 = self.space.wrap(W_C())
+        w_o2 = self.space.wrap(W_B())
+        w_c = self.space.gettypefor(W_C)
+        w_b = self.space.gettypefor(W_B)
+        w_a = self.space.gettypefor(W_A)
+        assert w_c.mro_w == [
+            w_c,
+            w_a,
+            w_b,
+            self.space.w_object,
+        ]
+        for w_tp in w_c.mro_w:
+            assert self.space.isinstance_w(w_o1, w_tp)
+        def assert_attr(w_obj, name, value):
+            assert self.space.unwrap(self.space.getattr(w_obj, self.space.wrap(name))) == value
+        def assert_method(w_obj, name, value):
+            assert self.space.unwrap(self.space.call_method(w_obj, name)) == value
+        assert_attr(w_o1, "a", 1)
+        assert_attr(w_o1, "b", 3)
+        assert_method(w_o1, "c", True)
+        assert_method(w_o2, "c", False)
+
 
 class AppTestTypeDef:
 
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -15,13 +15,19 @@
     def __init__(self, __name, __base=None, **rawdict):
         "NOT_RPYTHON: initialization-time only"
         self.name = __name
-        self.base = __base
+        if __base is None:
+            bases = []
+        elif isinstance(__base, tuple):
+            bases = list(__base)
+        else:
+            bases = [__base]
+        self.bases = bases
         self.hasdict = '__dict__' in rawdict
         self.weakrefable = '__weakref__' in rawdict
         self.doc = rawdict.pop('__doc__', None)
-        if __base is not None:
-            self.hasdict     |= __base.hasdict
-            self.weakrefable |= __base.weakrefable
+        for base in bases:
+            self.hasdict     |= base.hasdict
+            self.weakrefable |= base.weakrefable
         self.rawdict = {}
         self.acceptable_as_base_class = '__new__' in rawdict
         self.applevel_subclasses_base = None
diff --git a/pypy/jit/backend/conftest.py b/pypy/jit/backend/conftest.py
--- a/pypy/jit/backend/conftest.py
+++ b/pypy/jit/backend/conftest.py
@@ -12,7 +12,7 @@
                     help="choose a fixed random seed")
     group.addoption('--backend', action="store",
                     default='llgraph',
-                    choices=['llgraph', 'x86'],
+                    choices=['llgraph', 'cpu'],
                     dest="backend",
                     help="select the backend to run the functions with")
     group.addoption('--block-length', action="store", type="int",
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -8,6 +8,7 @@
 from pypy.objspace.flow.model import Variable, Constant
 from pypy.annotation import model as annmodel
 from pypy.jit.metainterp.history import REF, INT, FLOAT
+from pypy.jit.metainterp import history
 from pypy.jit.codewriter import heaptracker
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rstr, rffi
 from pypy.rpython.ootypesystem import ootype
@@ -20,6 +21,7 @@
 from pypy.jit.backend.llgraph import symbolic
 from pypy.jit.codewriter import longlong
 
+from pypy.rlib import libffi, clibffi
 from pypy.rlib.objectmodel import ComputedIntSymbolic, we_are_translated
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint
@@ -47,6 +49,11 @@
         value._the_opaque_pointer = op
         return op
 
+def _normalize(value):
+    if isinstance(value, lltype._ptr):
+        value = lltype.top_container(value._obj)
+    return value
+
 def from_opaque_string(s):
     if isinstance(s, str):
         return s
@@ -325,12 +332,12 @@
     loop = _from_opaque(loop)
     loop.operations.append(Operation(opnum))
 
-def compile_add_descr(loop, ofs, type, arg_types):
+def compile_add_descr(loop, ofs, type, arg_types, extrainfo, width):
     from pypy.jit.backend.llgraph.runner import Descr
     loop = _from_opaque(loop)
     op = loop.operations[-1]
     assert isinstance(type, str) and len(type) == 1
-    op.descr = Descr(ofs, type, arg_types=arg_types)
+    op.descr = Descr(ofs, type, arg_types=arg_types, extrainfo=extrainfo, width=width)
 
 def compile_add_descr_arg(loop, ofs, type, arg_types):
     from pypy.jit.backend.llgraph.runner import Descr
@@ -346,6 +353,14 @@
     op = loop.operations[-1]
     op.descr = weakref.ref(descr)
 
+TARGET_TOKENS = weakref.WeakKeyDictionary()
+
+def compile_add_target_token(loop, descr):
+    loop = _from_opaque(loop)
+    op = loop.operations[-1]
+    descrobj = _normalize(descr)
+    TARGET_TOKENS[descrobj] = loop, len(loop.operations), op.args
+
 def compile_add_var(loop, intvar):
     loop = _from_opaque(loop)
     op = loop.operations[-1]
@@ -380,13 +395,17 @@
     _variables.append(v)
     return r
 
-def compile_add_jump_target(loop, loop_target):
+def compile_add_jump_target(loop, targettoken):
     loop = _from_opaque(loop)
-    loop_target = _from_opaque(loop_target)
+    descrobj = _normalize(targettoken)
+    loop_target, target_opindex, target_inputargs = TARGET_TOKENS[descrobj]
+    #
     op = loop.operations[-1]
     op.jump_target = loop_target
+    op.jump_target_opindex = target_opindex
+    op.jump_target_inputargs = target_inputargs
     assert op.opnum == rop.JUMP
-    assert len(op.args) == len(loop_target.inputargs)
+    assert len(op.args) == len(target_inputargs)
     if loop_target == loop:
         log.info("compiling new loop")
     else:
@@ -520,10 +539,11 @@
                 self.opindex += 1
                 continue
             if op.opnum == rop.JUMP:
-                assert len(op.jump_target.inputargs) == len(args)
-                self.env = dict(zip(op.jump_target.inputargs, args))
+                inputargs = op.jump_target_inputargs
+                assert len(inputargs) == len(args)
+                self.env = dict(zip(inputargs, args))
                 self.loop = op.jump_target
-                self.opindex = 0
+                self.opindex = op.jump_target_opindex
                 _stats.exec_jumps += 1
             elif op.opnum == rop.FINISH:
                 if self.verbose:
@@ -616,6 +636,15 @@
         #
         return _op_default_implementation
 
+    def op_label(self, _, *args):
+        op = self.loop.operations[self.opindex]
+        assert op.opnum == rop.LABEL
+        assert len(op.args) == len(args)
+        newenv = {}
+        for v, value in zip(op.args, args):
+            newenv[v] = value
+        self.env = newenv
+
     def op_debug_merge_point(self, _, *args):
         from pypy.jit.metainterp.warmspot import get_stats
         try:
@@ -825,6 +854,16 @@
         else:
             raise NotImplementedError
 
+    def op_getinteriorfield_raw(self, descr, array, index):
+        if descr.typeinfo == REF:
+            return do_getinteriorfield_raw_ptr(array, index, descr.width, descr.ofs)
+        elif descr.typeinfo == INT:
+            return do_getinteriorfield_raw_int(array, index, descr.width, descr.ofs)
+        elif descr.typeinfo == FLOAT:
+            return do_getinteriorfield_raw_float(array, index, descr.width, descr.ofs)
+        else:
+            raise NotImplementedError
+
     def op_setinteriorfield_gc(self, descr, array, index, newvalue):
         if descr.typeinfo == REF:
             return do_setinteriorfield_gc_ptr(array, index, descr.ofs,
@@ -838,6 +877,16 @@
         else:
             raise NotImplementedError
 
+    def op_setinteriorfield_raw(self, descr, array, index, newvalue):
+        if descr.typeinfo == REF:
+            return do_setinteriorfield_raw_ptr(array, index, newvalue, descr.width, descr.ofs)
+        elif descr.typeinfo == INT:
+            return do_setinteriorfield_raw_int(array, index, newvalue, descr.width, descr.ofs)
+        elif descr.typeinfo == FLOAT:
+            return do_setinteriorfield_raw_float(array, index, newvalue, descr.width, descr.ofs)
+        else:
+            raise NotImplementedError
+
     def op_setfield_gc(self, fielddescr, struct, newvalue):
         if fielddescr.typeinfo == REF:
             do_setfield_gc_ptr(struct, fielddescr.ofs, newvalue)
@@ -1114,7 +1163,9 @@
     if isinstance(TYPE, lltype.Ptr):
         if isinstance(x, (int, long, llmemory.AddressAsInt)):
             x = llmemory.cast_int_to_adr(x)
-        if TYPE is rffi.VOIDP or TYPE.TO._hints.get("uncast_on_llgraph"):
+        if TYPE is rffi.VOIDP or (
+                hasattr(TYPE.TO, '_hints') and
+                TYPE.TO._hints.get("uncast_on_llgraph")):
             # assume that we want a "C-style" cast, without typechecking the value
             return rffi.cast(TYPE, x)
         return llmemory.cast_adr_to_ptr(x, TYPE)
@@ -1401,6 +1452,18 @@
     struct = array._obj.container.getitem(index)
     return cast_to_ptr(_getinteriorfield_gc(struct, fieldnum))
 
+def _getinteriorfield_raw(ffitype, array, index, width, ofs):
+    addr = rffi.cast(rffi.VOIDP, array)
+    return libffi.array_getitem(ffitype, width, addr, index, ofs)
+
+def do_getinteriorfield_raw_int(array, index, width, ofs):
+    res = _getinteriorfield_raw(libffi.types.slong, array, index, width, ofs)
+    return res
+
+def do_getinteriorfield_raw_float(array, index, width, ofs):
+    res = _getinteriorfield_raw(libffi.types.double, array, index, width, ofs)
+    return res
+
 def _getfield_raw(struct, fieldnum):
     STRUCT, fieldname = symbolic.TokenToField[fieldnum]
     ptr = cast_from_int(lltype.Ptr(STRUCT), struct)
@@ -1477,7 +1540,19 @@
     return do_setinteriorfield_gc
 do_setinteriorfield_gc_int = new_setinteriorfield_gc(cast_from_int)
 do_setinteriorfield_gc_float = new_setinteriorfield_gc(cast_from_floatstorage)
-do_setinteriorfield_gc_ptr = new_setinteriorfield_gc(cast_from_ptr)        
+do_setinteriorfield_gc_ptr = new_setinteriorfield_gc(cast_from_ptr)
+
+def new_setinteriorfield_raw(cast_func, ffitype):
+    def do_setinteriorfield_raw(array, index, newvalue, width, ofs):
+        addr = rffi.cast(rffi.VOIDP, array)
+        for TYPE, ffitype2 in clibffi.ffitype_map:
+            if ffitype2 is ffitype:
+                newvalue = cast_func(TYPE, newvalue)
+                break
+        return libffi.array_setitem(ffitype, width, addr, index, ofs, newvalue)
+    return do_setinteriorfield_raw
+do_setinteriorfield_raw_int = new_setinteriorfield_raw(cast_from_int, libffi.types.slong)
+do_setinteriorfield_raw_float = new_setinteriorfield_raw(cast_from_floatstorage, libffi.types.double)
 
 def do_setfield_raw_int(struct, fieldnum, newvalue):
     STRUCT, fieldname = symbolic.TokenToField[fieldnum]
@@ -1744,6 +1819,7 @@
 setannotation(compile_add, annmodel.s_None)
 setannotation(compile_add_descr, annmodel.s_None)
 setannotation(compile_add_descr_arg, annmodel.s_None)
+setannotation(compile_add_target_token, annmodel.s_None)
 setannotation(compile_add_var, annmodel.s_None)
 setannotation(compile_add_int_const, annmodel.s_None)
 setannotation(compile_add_ref_const, annmodel.s_None)
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -23,8 +23,10 @@
 class Descr(history.AbstractDescr):
 
     def __init__(self, ofs, typeinfo, extrainfo=None, name=None,
-                 arg_types=None, count_fields_if_immut=-1, ffi_flags=0):
+                 arg_types=None, count_fields_if_immut=-1, ffi_flags=0, width=-1):
+
         self.ofs = ofs
+        self.width = width
         self.typeinfo = typeinfo
         self.extrainfo = extrainfo
         self.name = name
@@ -119,14 +121,14 @@
         return False
 
     def getdescr(self, ofs, typeinfo='?', extrainfo=None, name=None,
-                 arg_types=None, count_fields_if_immut=-1, ffi_flags=0):
+                 arg_types=None, count_fields_if_immut=-1, ffi_flags=0, width=-1):
         key = (ofs, typeinfo, extrainfo, name, arg_types,
-               count_fields_if_immut, ffi_flags)
+               count_fields_if_immut, ffi_flags, width)
         try:
             return self._descrs[key]
         except KeyError:
             descr = Descr(ofs, typeinfo, extrainfo, name, arg_types,
-                          count_fields_if_immut, ffi_flags)
+                          count_fields_if_immut, ffi_flags, width)
             self._descrs[key] = descr
             return descr
 
@@ -140,17 +142,17 @@
         old, oldindex = faildescr._compiled_fail
         llimpl.compile_redirect_fail(old, oldindex, c)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
+    def compile_loop(self, inputargs, operations, jitcell_token, log=True, name=''):
         """In a real assembler backend, this should assemble the given
         list of operations.  Here we just generate a similar CompiledLoop
         instance.  The code here is RPython, whereas the code in llimpl
         is not.
         """
         c = llimpl.compile_start()
-        clt = model.CompiledLoopToken(self, looptoken.number)
+        clt = model.CompiledLoopToken(self, jitcell_token.number)
         clt.loop_and_bridges = [c]
         clt.compiled_version = c
-        looptoken.compiled_loop_token = clt
+        jitcell_token.compiled_loop_token = clt
         self._compile_loop_or_bridge(c, inputargs, operations)
 
     def free_loop_and_bridges(self, compiled_loop_token):
@@ -179,10 +181,13 @@
             descr = op.getdescr()
             if isinstance(descr, Descr):
                 llimpl.compile_add_descr(c, descr.ofs, descr.typeinfo,
-                                         descr.arg_types)
-            if (isinstance(descr, history.LoopToken) and
-                op.getopnum() != rop.JUMP):
+                                         descr.arg_types, descr.extrainfo,
+                                         descr.width)
+            if isinstance(descr, history.JitCellToken):
+                assert op.getopnum() != rop.JUMP
                 llimpl.compile_add_loop_token(c, descr)
+            if isinstance(descr, history.TargetToken) and op.getopnum() == rop.LABEL:
+                llimpl.compile_add_target_token(c, descr)
             if self.is_oo and isinstance(descr, (OODescr, MethDescr)):
                 # hack hack, not rpython
                 c._obj.externalobj.operations[-1].setdescr(descr)
@@ -236,9 +241,7 @@
         assert op.is_final()
         if op.getopnum() == rop.JUMP:
             targettoken = op.getdescr()
-            assert isinstance(targettoken, history.LoopToken)
-            compiled_version = targettoken.compiled_loop_token.compiled_version
-            llimpl.compile_add_jump_target(c, compiled_version)
+            llimpl.compile_add_jump_target(c, targettoken)
         elif op.getopnum() == rop.FINISH:
             faildescr = op.getdescr()
             index = self.get_fail_descr_number(faildescr)
@@ -327,10 +330,22 @@
 
     def interiorfielddescrof(self, A, fieldname):
         S = A.OF
-        ofs2 = symbolic.get_size(A)
+        width = symbolic.get_size(A)
         ofs, size = symbolic.get_field_token(S, fieldname)
         token = history.getkind(getattr(S, fieldname))
-        return self.getdescr(ofs, token[0], name=fieldname, extrainfo=ofs2)
+        return self.getdescr(ofs, token[0], name=fieldname, width=width)
+
+    def interiorfielddescrof_dynamic(self, offset, width, fieldsize,
+        is_pointer, is_float, is_signed):
+
+        if is_pointer:
+            typeinfo = REF
+        elif is_float:
+            typeinfo = FLOAT
+        else:
+            typeinfo = INT
+        # we abuse the arg_types field to distinguish dynamic and static descrs
+        return Descr(offset, typeinfo, arg_types='dynamic', name='<dynamic interior field>', width=width)
 
     def calldescrof(self, FUNC, ARGS, RESULT, extrainfo):
         arg_types = []
@@ -368,6 +383,7 @@
 
     def arraydescrof(self, A):
         assert A.OF != lltype.Void
+        assert isinstance(A, lltype.GcArray) or A._hints.get('nolength', False)
         size = symbolic.get_size(A)
         if isinstance(A.OF, lltype.Ptr) or isinstance(A.OF, lltype.Primitive):
             token = history.getkind(A.OF)[0]
diff --git a/pypy/jit/backend/llsupport/asmmemmgr.py b/pypy/jit/backend/llsupport/asmmemmgr.py
--- a/pypy/jit/backend/llsupport/asmmemmgr.py
+++ b/pypy/jit/backend/llsupport/asmmemmgr.py
@@ -37,25 +37,25 @@
             self._add_free_block(smaller_stop, stop)
             stop = smaller_stop
             result = (start, stop)
-        self.total_mallocs += stop - start
+        self.total_mallocs += r_uint(stop - start)
         return result   # pair (start, stop)
 
     def free(self, start, stop):
         """Free a block (start, stop) returned by a previous malloc()."""
-        self.total_mallocs -= (stop - start)
+        self.total_mallocs -= r_uint(stop - start)
         self._add_free_block(start, stop)
 
     def open_malloc(self, minsize):
         """Allocate at least minsize bytes.  Returns (start, stop)."""
         result = self._allocate_block(minsize)
         (start, stop) = result
-        self.total_mallocs += stop - start
+        self.total_mallocs += r_uint(stop - start)
         return result
 
     def open_free(self, middle, stop):
         """Used for freeing the end of an open-allocated block of memory."""
         if stop - middle >= self.min_fragment:
-            self.total_mallocs -= (stop - middle)
+            self.total_mallocs -= r_uint(stop - middle)
             self._add_free_block(middle, stop)
             return True
         else:
@@ -77,7 +77,7 @@
                 # Hack to make sure that mcs are not within 32-bits of one
                 # another for testing purposes
                 rmmap.hint.pos += 0x80000000 - size
-        self.total_memory_allocated += size
+        self.total_memory_allocated += r_uint(size)
         data = rffi.cast(lltype.Signed, data)
         return self._add_free_block(data, data + size)
 
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -111,6 +111,16 @@
     def repr_of_descr(self):
         return '<%s %s %s>' % (self._clsname, self.name, self.offset)
 
+class DynamicFieldDescr(BaseFieldDescr):
+    def __init__(self, offset, fieldsize, is_pointer, is_float, is_signed):
+        self.offset = offset
+        self._fieldsize = fieldsize
+        self._is_pointer_field = is_pointer
+        self._is_float_field = is_float
+        self._is_field_signed = is_signed
+
+    def get_field_size(self, translate_support_code):
+        return self._fieldsize
 
 class NonGcPtrFieldDescr(BaseFieldDescr):
     _clsname = 'NonGcPtrFieldDescr'
@@ -182,6 +192,7 @@
     def repr_of_descr(self):
         return '<%s>' % self._clsname
 
+
 class NonGcPtrArrayDescr(BaseArrayDescr):
     _clsname = 'NonGcPtrArrayDescr'
     def get_item_size(self, translate_support_code):
@@ -211,6 +222,13 @@
     def get_ofs_length(self, translate_support_code):
         return -1
 
+class DynamicArrayNoLengthDescr(BaseArrayNoLengthDescr):
+    def __init__(self, itemsize):
+        self.itemsize = itemsize
+
+    def get_item_size(self, translate_support_code):
+        return self.itemsize
+
 class NonGcPtrArrayNoLengthDescr(BaseArrayNoLengthDescr):
     _clsname = 'NonGcPtrArrayNoLengthDescr'
     def get_item_size(self, translate_support_code):
@@ -281,6 +299,9 @@
     def is_float_field(self):
         return self.fielddescr.is_float_field()
 
+    def sort_key(self):
+        return self.fielddescr.sort_key()
+
     def repr_of_descr(self):
         return '<InteriorFieldDescr %s>' % self.fielddescr.repr_of_descr()
 
@@ -302,12 +323,16 @@
     _clsname = ''
     loop_token = None
     arg_classes = ''     # <-- annotation hack
-    ffi_flags = 0
+    ffi_flags = 1
 
-    def __init__(self, arg_classes, extrainfo=None, ffi_flags=0):
+    def __init__(self, arg_classes, extrainfo=None, ffi_flags=1):
         self.arg_classes = arg_classes    # string of "r" and "i" (ref/int)
         self.extrainfo = extrainfo
         self.ffi_flags = ffi_flags
+        # NB. the default ffi_flags is 1, meaning FUNCFLAG_CDECL, which
+        # makes sense on Windows as it's the one for all the C functions
+        # we are compiling together with the JIT.  On non-Windows platforms
+        # it is just ignored anyway.
 
     def __repr__(self):
         res = '%s(%s)' % (self.__class__.__name__, self.arg_classes)
@@ -348,6 +373,10 @@
         return False    # unless overridden
 
     def create_call_stub(self, rtyper, RESULT):
+        from pypy.rlib.clibffi import FFI_DEFAULT_ABI
+        assert self.get_call_conv() == FFI_DEFAULT_ABI, (
+            "%r: create_call_stub() with a non-default call ABI" % (self,))
+
         def process(c):
             if c == 'L':
                 assert longlong.supports_longlong
@@ -442,7 +471,7 @@
     """
     _clsname = 'DynamicIntCallDescr'
 
-    def __init__(self, arg_classes, result_size, result_sign, extrainfo=None, ffi_flags=0):
+    def __init__(self, arg_classes, result_size, result_sign, extrainfo, ffi_flags):
         BaseIntCallDescr.__init__(self, arg_classes, extrainfo, ffi_flags)
         assert isinstance(result_sign, bool)
         self._result_size = chr(result_size)
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -8,7 +8,7 @@
 class UnsupportedKind(Exception):
     pass
 
-def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo=None, ffi_flags=0):
+def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo, ffi_flags):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -648,11 +648,10 @@
         # make a malloc function, with two arguments
         def malloc_basic(size, tid):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
-            has_finalizer = bool(tid & (1<<llgroup.HALFSHIFT))
             check_typeid(type_id)
             res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
                                                   type_id, size,
-                                                  has_finalizer, False)
+                                                  False, False, False)
             # In case the operation above failed, we are returning NULL
             # from this function to assembler.  There is also an RPython
             # exception set, typically MemoryError; but it's easier and
@@ -723,7 +722,7 @@
             # also use it to allocate varsized objects.  The tid
             # and possibly the length are both set afterward.
             gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                        0, size, False, False)
+                                        0, size, False, False, False)
             return rffi.cast(lltype.Signed, gcref)
         self.malloc_slowpath = malloc_slowpath
         self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed], lltype.Signed)
@@ -746,9 +745,8 @@
     def init_size_descr(self, S, descr):
         type_id = self.layoutbuilder.get_type_id(S)
         assert not self.layoutbuilder.is_weakref_type(S)
-        has_finalizer = bool(self.layoutbuilder.has_finalizer(S))
-        flags = int(has_finalizer) << llgroup.HALFSHIFT
-        descr.tid = llop.combine_ushort(lltype.Signed, type_id, flags)
+        assert not self.layoutbuilder.has_finalizer(S)
+        descr.tid = llop.combine_ushort(lltype.Signed, type_id, 0)
 
     def init_array_descr(self, A, descr):
         type_id = self.layoutbuilder.get_type_id(A)
@@ -825,6 +823,15 @@
                                             bool(v.value)): # store a non-NULL
                         self._gen_write_barrier(newops, op.getarg(0), v)
                         op = op.copy_and_change(rop.SETFIELD_RAW)
+            # ---------- write barrier for SETINTERIORFIELD_GC ------
+            if op.getopnum() == rop.SETINTERIORFIELD_GC:
+                val = op.getarg(0)
+                if val is not last_malloc:
+                    v = op.getarg(2)
+                    if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
+                                            bool(v.value)): # store a non-NULL
+                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        op = op.copy_and_change(rop.SETINTERIORFIELD_RAW)
             # ---------- write barrier for SETARRAYITEM_GC ----------
             if op.getopnum() == rop.SETARRAYITEM_GC:
                 val = op.getarg(0)
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -9,9 +9,10 @@
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.backend.llsupport.symbolic import WORD, unroll_basic_sizes
 from pypy.jit.backend.llsupport.descr import (get_size_descr,
-     get_field_descr, BaseFieldDescr, get_array_descr, BaseArrayDescr,
-     get_call_descr, BaseIntCallDescr, GcPtrCallDescr, FloatCallDescr,
-     VoidCallDescr, InteriorFieldDescr, get_interiorfield_descr)
+     get_field_descr, BaseFieldDescr, DynamicFieldDescr, get_array_descr,
+     BaseArrayDescr, DynamicArrayNoLengthDescr, get_call_descr,
+     BaseIntCallDescr, GcPtrCallDescr, FloatCallDescr, VoidCallDescr,
+     InteriorFieldDescr, get_interiorfield_descr)
 from pypy.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
 
 
@@ -238,6 +239,12 @@
     def interiorfielddescrof(self, A, fieldname):
         return get_interiorfield_descr(self.gc_ll_descr, A, A.OF, fieldname)
 
+    def interiorfielddescrof_dynamic(self, offset, width, fieldsize,
+        is_pointer, is_float, is_signed):
+        arraydescr = DynamicArrayNoLengthDescr(width)
+        fielddescr = DynamicFieldDescr(offset, fieldsize, is_pointer, is_float, is_signed)
+        return InteriorFieldDescr(arraydescr, fielddescr)
+
     def unpack_arraydescr(self, arraydescr):
         assert isinstance(arraydescr, BaseArrayDescr)
         return arraydescr.get_base_size(self.translate_support_code)
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -16,32 +16,101 @@
     """ Manage frame positions
     """
     def __init__(self):
-        self.frame_bindings = {}
-        self.frame_depth    = 0
+        self.bindings = {}
+        self.used = []      # list of bools
+        self.hint_frame_locations = {}
+
+    frame_depth = property(lambda:xxx, lambda:xxx)   # XXX kill me
+
+    def get_frame_depth(self):
+        return len(self.used)
 
     def get(self, box):
-        return self.frame_bindings.get(box, None)
+        return self.bindings.get(box, None)
 
     def loc(self, box):
-        res = self.get(box)
-        if res is not None:
-            return res
+        """Return or create the frame location associated with 'box'."""
+        # first check if it's already in the frame_manager
+        try:
+            return self.bindings[box]
+        except KeyError:
+            pass
+        # check if we have a hint for this box
+        if box in self.hint_frame_locations:
+            # if we do, try to reuse the location for this box
+            loc = self.hint_frame_locations[box]
+            if self.try_to_reuse_location(box, loc):
+                return loc
+        # no valid hint.  make up a new free location
+        return self.get_new_loc(box)
+
+    def get_new_loc(self, box):
         size = self.frame_size(box.type)
-        self.frame_depth += ((-self.frame_depth) & (size-1))
-        # ^^^ frame_depth is rounded up to a multiple of 'size', assuming
+        # frame_depth is rounded up to a multiple of 'size', assuming
         # that 'size' is a power of two.  The reason for doing so is to
         # avoid obscure issues in jump.py with stack locations that try
         # to move from position (6,7) to position (7,8).
-        newloc = self.frame_pos(self.frame_depth, box.type)
-        self.frame_bindings[box] = newloc
-        self.frame_depth += size
+        while self.get_frame_depth() & (size - 1):
+            self.used.append(False)
+        #
+        index = self.get_frame_depth()
+        newloc = self.frame_pos(index, box.type)
+        for i in range(size):
+            self.used.append(True)
+        #
+        if not we_are_translated():    # extra testing
+            testindex = self.get_loc_index(newloc)
+            assert testindex == index
+        #
+        self.bindings[box] = newloc
         return newloc
 
+    def set_binding(self, box, loc):
+        self.bindings[box] = loc
+        #
+        index = self.get_loc_index(loc)
+        endindex = index + self.frame_size(box.type)
+        while len(self.used) < endindex:
+            self.used.append(False)
+        while index < endindex:
+            self.used[index] = True
+            index += 1
+
     def reserve_location_in_frame(self, size):
-        frame_depth = self.frame_depth
-        self.frame_depth += size
+        frame_depth = self.get_frame_depth()
+        for i in range(size):
+            self.used.append(True)
         return frame_depth
 
+    def mark_as_free(self, box):
+        try:
+            loc = self.bindings[box]
+        except KeyError:
+            return    # already gone
+        del self.bindings[box]
+        #
+        size = self.frame_size(box.type)
+        baseindex = self.get_loc_index(loc)
+        for i in range(size):
+            index = baseindex + i
+            assert 0 <= index < len(self.used)
+            self.used[index] = False
+
+    def try_to_reuse_location(self, box, loc):
+        index = self.get_loc_index(loc)
+        assert index >= 0
+        size = self.frame_size(box.type)
+        for i in range(size):
+            while (index + i) >= len(self.used):
+                self.used.append(False)
+            if self.used[index + i]:
+                return False    # already in use
+        # good, we can reuse the location
+        for i in range(size):
+            self.used[index + i] = True
+        self.bindings[box] = loc
+        return True
+
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
@@ -49,6 +118,10 @@
     @staticmethod
     def frame_size(type):
         return 1
+    @staticmethod
+    def get_loc_index(loc):
+        raise NotImplementedError("Purely abstract")
+
 
 class RegisterManager(object):
     """ Class that keeps track of register allocations
@@ -68,7 +141,14 @@
         self.frame_manager = frame_manager
         self.assembler = assembler
 
+    def is_still_alive(self, v):
+        # Check if 'v' is alive at the current position.
+        # Return False if the last usage is strictly before.
+        return self.longevity[v][1] >= self.position
+
     def stays_alive(self, v):
+        # Check if 'v' stays alive after the current position.
+        # Return False if the last usage is before or at position.
         return self.longevity[v][1] > self.position
 
     def next_instruction(self, incr=1):
@@ -84,11 +164,14 @@
             point for all variables that might be in registers.
         """
         self._check_type(v)
-        if isinstance(v, Const) or v not in self.reg_bindings:
+        if isinstance(v, Const):
             return
         if v not in self.longevity or self.longevity[v][1] <= self.position:
-            self.free_regs.append(self.reg_bindings[v])
-            del self.reg_bindings[v]
+            if v in self.reg_bindings:
+                self.free_regs.append(self.reg_bindings[v])
+                del self.reg_bindings[v]
+            if self.frame_manager is not None:
+                self.frame_manager.mark_as_free(v)
 
     def possibly_free_vars(self, vars):
         """ Same as 'possibly_free_var', but for all v in vars.
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -13,44 +13,46 @@
 
 def test_call_descr_dynamic():
     args = [types.sint, types.pointer]
-    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint, ffi_flags=42)
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint, None,
+                                   ffi_flags=42)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.arg_classes == 'ii'
     assert descr.get_ffi_flags() == 42
 
     args = [types.sint, types.double, types.pointer]
-    descr = get_call_descr_dynamic(FakeCPU(), args, types.void)
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.void, None, 42)
     assert descr is None    # missing floats
     descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
-                                   args, types.void, ffi_flags=43)
+                                   args, types.void, None, ffi_flags=43)
     assert isinstance(descr, VoidCallDescr)
     assert descr.arg_classes == 'ifi'
     assert descr.get_ffi_flags() == 43
 
-    descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8, None, 42)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == True
 
-    descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8, None, 42)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == False
 
     if not is_64_bit:
-        descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong)
+        descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong,
+                                       None, 42)
         assert descr is None   # missing longlongs
         descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
-                                       [], types.slonglong, ffi_flags=43)
+                                       [], types.slonglong, None, ffi_flags=43)
         assert isinstance(descr, LongLongCallDescr)
         assert descr.get_ffi_flags() == 43
     else:
         assert types.slonglong is types.slong
 
-    descr = get_call_descr_dynamic(FakeCPU(), [], types.float)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.float, None, 42)
     assert descr is None   # missing singlefloats
     descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
-                                   [], types.float, ffi_flags=44)
+                                   [], types.float, None, ffi_flags=44)
     SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
     assert isinstance(descr, SingleFloatCallDescr)
     assert descr.get_ffi_flags() == 44
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -247,12 +247,14 @@
         self.record = []
 
     def do_malloc_fixedsize_clear(self, RESTYPE, type_id, size,
-                                  has_finalizer, contains_weakptr):
+                                  has_finalizer, has_light_finalizer,
+                                  contains_weakptr):
         assert not contains_weakptr
+        assert not has_finalizer           # in these tests
+        assert not has_light_finalizer     # in these tests
         p = llmemory.raw_malloc(size)
         p = llmemory.cast_adr_to_ptr(p, RESTYPE)
-        flags = int(has_finalizer) << 16
-        tid = llop.combine_ushort(lltype.Signed, type_id, flags)
+        tid = llop.combine_ushort(lltype.Signed, type_id, 0)
         self.record.append(("fixedsize", repr(size), tid, p))
         return p
 
@@ -568,6 +570,28 @@
             assert operations[1].getarg(2) == v_value
             assert operations[1].getdescr() == array_descr
 
+    def test_rewrite_assembler_5(self):
+        S = lltype.GcStruct('S')
+        A = lltype.GcArray(lltype.Struct('A', ('x', lltype.Ptr(S))))
+        interiordescr = get_interiorfield_descr(self.gc_ll_descr, A,
+                                                A.OF, 'x')
+        wbdescr = self.gc_ll_descr.write_barrier_descr
+        ops = parse("""
+        [p1, p2]
+        setinteriorfield_gc(p1, 0, p2, descr=interiordescr)
+        jump(p1, p2)
+        """, namespace=locals())
+        expected = parse(""" 
+        [p1, p2]
+        cond_call_gc_wb(p1, p2, descr=wbdescr)
+        setinteriorfield_raw(p1, 0, p2, descr=interiordescr)
+        jump(p1, p2)
+        """, namespace=locals())
+        operations = get_deep_immutable_oplist(ops.operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
+        equaloplists(operations, expected.operations)
+
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
                             ('x', lltype.Signed))
diff --git a/pypy/jit/backend/llsupport/test/test_regalloc.py b/pypy/jit/backend/llsupport/test/test_regalloc.py
--- a/pypy/jit/backend/llsupport/test/test_regalloc.py
+++ b/pypy/jit/backend/llsupport/test/test_regalloc.py
@@ -2,6 +2,8 @@
 from pypy.jit.metainterp.history import BoxInt, ConstInt, BoxFloat, INT, FLOAT
 from pypy.jit.backend.llsupport.regalloc import FrameManager
 from pypy.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan
+from pypy.jit.tool.oparser import parse
+from pypy.jit.backend.detect_cpu import getcpuclass
 
 def newboxes(*values):
     return [BoxInt(v) for v in values]
@@ -40,8 +42,13 @@
     def frame_size(self, box_type):
         if box_type == FLOAT:
             return 2
+        elif box_type == INT:
+            return 1
         else:
-            return 1
+            raise ValueError(box_type)
+    def get_loc_index(self, loc):
+        assert isinstance(loc, FakeFramePos)
+        return loc.pos
 
 class MockAsm(object):
     def __init__(self):
@@ -280,7 +287,7 @@
             rm.force_allocate_reg(b)
         rm.before_call()
         assert len(rm.reg_bindings) == 2
-        assert fm.frame_depth == 2
+        assert fm.get_frame_depth() == 2
         assert len(asm.moves) == 2
         rm._check_invariants()
         rm.after_call(boxes[-1])
@@ -303,7 +310,7 @@
             rm.force_allocate_reg(b)
         rm.before_call(save_all_regs=True)
         assert len(rm.reg_bindings) == 0
-        assert fm.frame_depth == 4
+        assert fm.get_frame_depth() == 4
         assert len(asm.moves) == 4
         rm._check_invariants()
         rm.after_call(boxes[-1])
@@ -325,7 +332,7 @@
         xrm = XRegisterManager(longevity, frame_manager=fm, assembler=asm)
         xrm.loc(f0)
         rm.loc(b0)
-        assert fm.frame_depth == 3
+        assert fm.get_frame_depth() == 3
         
         
 
@@ -346,3 +353,123 @@
         spilled2 = rm.force_allocate_reg(b5)
         assert spilled2 is loc
         rm._check_invariants()
+
+
+    def test_hint_frame_locations_1(self):
+        b0, = newboxes(0)
+        fm = TFrameManager()
+        loc123 = FakeFramePos(123, INT)
+        fm.hint_frame_locations[b0] = loc123
+        assert fm.get_frame_depth() == 0
+        loc = fm.loc(b0)
+        assert loc == loc123
+        assert fm.get_frame_depth() == 124
+
+    def test_hint_frame_locations_2(self):
+        b0, b1, b2 = newboxes(0, 1, 2)
+        longevity = {b0: (0, 1), b1: (0, 2), b2: (0, 2)}
+        fm = TFrameManager()
+        asm = MockAsm()
+        rm = RegisterManager(longevity, frame_manager=fm, assembler=asm)
+        rm.force_allocate_reg(b0)
+        rm.force_allocate_reg(b1)
+        rm.force_allocate_reg(b2)
+        rm.force_spill_var(b0)
+        loc = rm.loc(b0)
+        assert isinstance(loc, FakeFramePos)
+        assert fm.get_loc_index(loc) == 0
+        rm.position = 1
+        assert fm.used == [True]
+        rm.possibly_free_var(b0)
+        assert fm.used == [False]
+        #
+        fm.hint_frame_locations[b1] = loc
+        rm.force_spill_var(b1)
+        loc1 = rm.loc(b1)
+        assert loc1 == loc
+        assert fm.used == [True]
+        #
+        fm.hint_frame_locations[b2] = loc
+        rm.force_spill_var(b2)
+        loc2 = rm.loc(b2)
+        assert loc2 != loc1     # because it was not free
+        assert fm.used == [True, True]
+        #
+        rm._check_invariants()
+
+    def test_frame_manager_basic(self):
+        b0, b1 = newboxes(0, 1)
+        fm = TFrameManager()
+        loc0 = fm.loc(b0)
+        assert fm.get_loc_index(loc0) == 0
+        #
+        assert fm.get(b1) is None
+        loc1 = fm.loc(b1)
+        assert fm.get_loc_index(loc1) == 1
+        assert fm.get(b1) == loc1
+        #
+        loc0b = fm.loc(b0)
+        assert loc0b == loc0
+        #
+        fm.loc(BoxInt())
+        assert fm.get_frame_depth() == 3
+        #
+        f0 = BoxFloat()
+        locf0 = fm.loc(f0)
+        assert fm.get_loc_index(locf0) == 4
+        assert fm.get_frame_depth() == 6
+        #
+        f1 = BoxFloat()
+        locf1 = fm.loc(f1)
+        assert fm.get_loc_index(locf1) == 6
+        assert fm.get_frame_depth() == 8
+        assert fm.used == [True, True, True, False, True, True, True, True]
+        #
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, True, True]
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, True, True]
+        fm.mark_as_free(f1)
+        assert fm.used == [False, True, True, False, True, True, False, False]
+        #
+        fm.reserve_location_in_frame(1)
+        assert fm.get_frame_depth() == 9
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        #
+        assert b0 not in fm.bindings
+        fm.set_binding(b0, loc0)
+        assert b0 in fm.bindings
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        b3 = BoxInt()
+        assert not fm.try_to_reuse_location(b3, loc0)
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        assert fm.try_to_reuse_location(b3, loc0)
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b0)   # already free
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b3)
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        f3 = BoxFloat()
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(0, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(1, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(2, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(3, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(4, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(5, FLOAT))
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        assert fm.try_to_reuse_location(f3, fm.frame_pos(6, FLOAT))
+        assert fm.used == [False, True, True, False, True, True, True, True, True]
+        #
+        fm.used = [False]
+        assert fm.try_to_reuse_location(BoxFloat(), fm.frame_pos(0, FLOAT))
+        assert fm.used == [True, True]
+        #
+        fm.used = [True]
+        assert not fm.try_to_reuse_location(BoxFloat(), fm.frame_pos(0, FLOAT))
+        assert fm.used == [True]
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -173,38 +173,35 @@
             lst[n] = None
         self.fail_descr_free_list.extend(faildescr_indices)
 
-    @staticmethod
-    def sizeof(S):
+    def sizeof(self, S):
         raise NotImplementedError
 
-    @staticmethod
-    def fielddescrof(S, fieldname):
+    def fielddescrof(self, S, fieldname):
         """Return the Descr corresponding to field 'fieldname' on the
         structure 'S'.  It is important that this function (at least)
         caches the results."""
         raise NotImplementedError
 
-    @staticmethod
-    def arraydescrof(A):
+    def interiorfielddescrof(self, A, fieldname):
         raise NotImplementedError
 
-    @staticmethod
-    def calldescrof(FUNC, ARGS, RESULT):
+    def interiorfielddescrof_dynamic(self, offset, width, fieldsize, is_pointer,
+        is_float, is_signed):
+        raise NotImplementedError
+
+    def arraydescrof(self, A):
+        raise NotImplementedError
+
+    def calldescrof(self, FUNC, ARGS, RESULT):
         # FUNC is the original function type, but ARGS is a list of types
         # with Voids removed
         raise NotImplementedError
 
-    @staticmethod
-    def methdescrof(SELFTYPE, methname):
+    def methdescrof(self, SELFTYPE, methname):
         # must return a subclass of history.AbstractMethDescr
         raise NotImplementedError
 
-    @staticmethod
-    def typedescrof(TYPE):
-        raise NotImplementedError
-
-    @staticmethod
-    def interiorfielddescrof(A, fieldname):
+    def typedescrof(self, TYPE):
         raise NotImplementedError
 
     # ---------- the backend-dependent operations ----------
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -2,7 +2,7 @@
                                          AbstractDescr,
                                          BasicFailDescr,
                                          BoxInt, Box, BoxPtr,
-                                         LoopToken,
+                                         JitCellToken,
                                          ConstInt, ConstPtr,
                                          BoxObj, Const,
                                          ConstObj, BoxFloat, ConstFloat)
@@ -107,7 +107,7 @@
             ops += 'finish(f99, %s)\n' % arguments
 
             loop = parse(ops, namespace=locals())
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
             self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
             expected_result = self._prepare_args(args, floats, ints)
@@ -253,7 +253,7 @@
             called_ops += 'finish(f%d, descr=fdescr3)\n' % total_index
             # compile called loop
             called_loop = parse(called_ops, namespace=locals())
-            called_looptoken = LoopToken()
+            called_looptoken = JitCellToken()
             called_looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
             done_number = self.cpu.get_fail_descr_number(called_loop.operations[-1].getdescr())
             self.cpu.compile_loop(called_loop.inputargs, called_loop.operations, called_looptoken)
@@ -284,7 +284,7 @@
             # we want to take the fast path
             self.cpu.done_with_this_frame_float_v = done_number
             try:
-                othertoken = LoopToken()
+                othertoken = JitCellToken()
                 self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
 
                 # prepare call to called_loop
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -3,7 +3,7 @@
                                          AbstractDescr,
                                          BasicFailDescr,
                                          BoxInt, Box, BoxPtr,
-                                         LoopToken,
+                                         JitCellToken, TargetToken,
                                          ConstInt, ConstPtr,
                                          BoxObj,
                                          ConstObj, BoxFloat, ConstFloat)
@@ -32,7 +32,7 @@
                                                                 result_type,
                                                                 valueboxes,
                                                                 descr)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop(inputargs, operations, looptoken)
         args = []
         for box in inputargs:
@@ -103,7 +103,7 @@
             ResOperation(rop.FINISH, [i1], None, descr=BasicFailDescr(1))
             ]
         inputargs = [i0]
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop(inputargs, operations, looptoken)
         fail = self.cpu.execute_token(looptoken, 2)
         res = self.cpu.get_latest_value_int(0)
@@ -114,15 +114,17 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=BasicFailDescr(2)),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[2].setfailargs([i1])
+        operations[3].setfailargs([i1])
 
         self.cpu.compile_loop(inputargs, operations, looptoken)
         fail = self.cpu.execute_token(looptoken, 2)
@@ -134,18 +136,22 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
-        looptoken = LoopToken()
+        i3 = BoxInt()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.INT_SUB, [i3, ConstInt(42)], i0),
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=BasicFailDescr(2)),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
-        inputargs = [i0]
-        operations[2].setfailargs([None, None, i1, None])
+        inputargs = [i3]
+        operations[4].setfailargs([None, None, i1, None])
 
         self.cpu.compile_loop(inputargs, operations, looptoken)
-        fail = self.cpu.execute_token(looptoken, 2)
+        fail = self.cpu.execute_token(looptoken, 44)
         assert fail.identifier == 2
         res = self.cpu.get_latest_value_int(2)
         assert res == 10
@@ -156,15 +162,17 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=BasicFailDescr()),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[2].setfailargs([i1])
+        operations[3].setfailargs([i1])
         wr_i1 = weakref.ref(i1)
         wr_guard = weakref.ref(operations[2])
         self.cpu.compile_loop(inputargs, operations, looptoken)
@@ -184,15 +192,17 @@
         i2 = BoxInt()
         faildescr1 = BasicFailDescr(1)
         faildescr2 = BasicFailDescr(2)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[2].setfailargs([i1])
+        operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
 
         i1b = BoxInt()
@@ -200,7 +210,7 @@
         bridge = [
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
-            ResOperation(rop.JUMP, [i1b], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
 
@@ -219,17 +229,21 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
+        i3 = BoxInt()
         faildescr1 = BasicFailDescr(1)
         faildescr2 = BasicFailDescr(2)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.INT_SUB, [i3, ConstInt(42)], i0),
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
-        inputargs = [i0]
-        operations[2].setfailargs([None, i1, None])
+        inputargs = [i3]
+        operations[4].setfailargs([None, i1, None])
         self.cpu.compile_loop(inputargs, operations, looptoken)
 
         i1b = BoxInt()
@@ -237,7 +251,7 @@
         bridge = [
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
-            ResOperation(rop.JUMP, [i1b], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
 
@@ -253,15 +267,17 @@
         i1 = BoxInt()
         i2 = BoxInt()
         faildescr1 = BasicFailDescr(1)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[2].setfailargs([None, i1, None])
+        operations[3].setfailargs([None, i1, None])
         self.cpu.compile_loop(inputargs, operations, looptoken)
 
         fail = self.cpu.execute_token(looptoken, 2)
@@ -281,7 +297,7 @@
                     return AbstractFailDescr.__setattr__(self, name, value)
                 py.test.fail("finish descrs should not be touched")
         faildescr = UntouchableFailDescr() # to check that is not touched
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         operations = [
             ResOperation(rop.FINISH, [i0], None, descr=faildescr)
             ]
@@ -291,7 +307,7 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == 99
 
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         operations = [
             ResOperation(rop.FINISH, [ConstInt(42)], None, descr=faildescr)
             ]
@@ -301,7 +317,7 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == 42
 
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         operations = [
             ResOperation(rop.FINISH, [], None, descr=faildescr)
             ]
@@ -310,7 +326,7 @@
         assert fail is faildescr
 
         if self.cpu.supports_floats:
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             f0 = BoxFloat()
             operations = [
                 ResOperation(rop.FINISH, [f0], None, descr=faildescr)
@@ -322,7 +338,7 @@
             res = self.cpu.get_latest_value_float(0)
             assert longlong.getrealfloat(res) == -61.25
 
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             operations = [
                 ResOperation(rop.FINISH, [constfloat(42.5)], None, descr=faildescr)
                 ]
@@ -339,14 +355,16 @@
         z = BoxInt(579)
         t = BoxInt(455)
         u = BoxInt(0)    # False
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [y, x], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [x, y], z),
             ResOperation(rop.INT_SUB, [y, ConstInt(1)], t),
             ResOperation(rop.INT_EQ, [t, ConstInt(0)], u),
             ResOperation(rop.GUARD_FALSE, [u], None,
                          descr=BasicFailDescr()),
-            ResOperation(rop.JUMP, [z, t], None, descr=looptoken),
+            ResOperation(rop.JUMP, [t, z], None, descr=targettoken),
             ]
         operations[-2].setfailargs([t, z])
         cpu.compile_loop([x, y], operations, looptoken)
@@ -406,7 +424,7 @@
                     ]
                 ops[1].setfailargs([v_res])
             #
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             self.cpu.compile_loop([v1, v2], ops, looptoken)
             for x, y, z in testcases:
                 excvalue = self.cpu.grab_exc_value()
@@ -1067,16 +1085,18 @@
             inputargs.insert(index_counter, i0)
             jumpargs.insert(index_counter, i1)
             #
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
+            targettoken = TargetToken()
             faildescr = BasicFailDescr(15)
             operations = [
+                ResOperation(rop.LABEL, inputargs, None, descr=targettoken),
                 ResOperation(rop.INT_SUB, [i0, ConstInt(1)], i1),
                 ResOperation(rop.INT_GE, [i1, ConstInt(0)], i2),
                 ResOperation(rop.GUARD_TRUE, [i2], None),
-                ResOperation(rop.JUMP, jumpargs, None, descr=looptoken),
+                ResOperation(rop.JUMP, jumpargs, None, descr=targettoken),
                 ]
-            operations[2].setfailargs(inputargs[:])
-            operations[2].setdescr(faildescr)
+            operations[3].setfailargs(inputargs[:])
+            operations[3].setdescr(faildescr)
             #
             self.cpu.compile_loop(inputargs, operations, looptoken)
             #
@@ -1124,22 +1144,24 @@
             py.test.skip("requires floats")
         fboxes = [BoxFloat() for i in range(12)]
         i2 = BoxInt()
+        targettoken = TargetToken()
         faildescr1 = BasicFailDescr(1)
         faildescr2 = BasicFailDescr(2)
         operations = [
+            ResOperation(rop.LABEL, fboxes, None, descr=targettoken),
             ResOperation(rop.FLOAT_LE, [fboxes[0], constfloat(9.2)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
             ResOperation(rop.FINISH, fboxes, None, descr=faildescr2),
             ]
         operations[-2].setfailargs(fboxes)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop(fboxes, operations, looptoken)
 
         fboxes2 = [BoxFloat() for i in range(12)]
         f3 = BoxFloat()
         bridge = [
             ResOperation(rop.FLOAT_SUB, [fboxes2[0], constfloat(1.0)], f3),
-            ResOperation(rop.JUMP, [f3] + fboxes2[1:], None, descr=looptoken),
+            ResOperation(rop.JUMP, [f3]+fboxes2[1:], None, descr=targettoken),
         ]
 
         self.cpu.compile_bridge(faildescr1, fboxes2, bridge, looptoken)
@@ -1190,7 +1212,7 @@
                         ResOperation(rop.FINISH, [], None, descr=faildescr2),
                         ]
                     operations[-2].setfailargs([])
-                    looptoken = LoopToken()
+                    looptoken = JitCellToken()
                     self.cpu.compile_loop(inputargs, operations, looptoken)
                     #
                     cpu = self.cpu
@@ -1245,7 +1267,7 @@
                         ResOperation(rop.FINISH, [], None, descr=faildescr2),
                         ]
                     operations[-2].setfailargs([])
-                    looptoken = LoopToken()
+                    looptoken = JitCellToken()
                     self.cpu.compile_loop(inputargs, operations, looptoken)
                     #
                     cpu = self.cpu
@@ -1302,7 +1324,7 @@
         faildescr = BasicFailDescr(1)
         operations.append(ResOperation(rop.FINISH, [], None,
                                        descr=faildescr))
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         #
         self.cpu.compile_loop(inputargs, operations, looptoken)
         #
@@ -1373,7 +1395,7 @@
                             ResOperation(rop.FINISH, [], None,
                                          descr=BasicFailDescr(5))]
                         operations[1].setfailargs([])
-                        looptoken = LoopToken()
+                        looptoken = JitCellToken()
                         # Use "set" to unique-ify inputargs
                         unique_testcase_list = list(set(testcase))
                         self.cpu.compile_loop(unique_testcase_list, operations,
@@ -1647,13 +1669,14 @@
         exc_tp = xtp
         exc_ptr = xptr
         loop = parse(ops, self.cpu, namespace=locals())
-        self.cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
-        self.cpu.execute_token(loop.token, 1)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        self.cpu.execute_token(looptoken, 1)
         assert self.cpu.get_latest_value_int(0) == 0
         assert self.cpu.get_latest_value_ref(1) == xptr
         excvalue = self.cpu.grab_exc_value()
         assert not excvalue
-        self.cpu.execute_token(loop.token, 0)
+        self.cpu.execute_token(looptoken, 0)
         assert self.cpu.get_latest_value_int(0) == 1
         excvalue = self.cpu.grab_exc_value()
         assert not excvalue
@@ -1670,8 +1693,9 @@
         exc_tp = ytp
         exc_ptr = yptr
         loop = parse(ops, self.cpu, namespace=locals())
-        self.cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
-        self.cpu.execute_token(loop.token, 1)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        self.cpu.execute_token(looptoken, 1)
         assert self.cpu.get_latest_value_int(0) == 1
         excvalue = self.cpu.grab_exc_value()
         assert excvalue == yptr
@@ -1687,12 +1711,13 @@
         finish(0)
         '''
         loop = parse(ops, self.cpu, namespace=locals())
-        self.cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
-        self.cpu.execute_token(loop.token, 1)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        self.cpu.execute_token(looptoken, 1)
         assert self.cpu.get_latest_value_int(0) == 1
         excvalue = self.cpu.grab_exc_value()
         assert excvalue == xptr
-        self.cpu.execute_token(loop.token, 0)
+        self.cpu.execute_token(looptoken, 0)
         assert self.cpu.get_latest_value_int(0) == 0
         excvalue = self.cpu.grab_exc_value()
         assert not excvalue
@@ -1862,7 +1887,7 @@
         ResOperation(rop.FINISH, [i0], None, descr=BasicFailDescr(0))
         ]
         ops[2].setfailargs([i1, i0])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1], ops, looptoken)
         fail = self.cpu.execute_token(looptoken, 20, 0)
         assert fail.identifier == 0
@@ -1903,7 +1928,7 @@
         ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
         ]
         ops[2].setfailargs([i1, i2, i0])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1], ops, looptoken)
         fail = self.cpu.execute_token(looptoken, 20, 0)
         assert fail.identifier == 0
@@ -1945,7 +1970,7 @@
         ResOperation(rop.FINISH, [f2], None, descr=BasicFailDescr(0))
         ]
         ops[2].setfailargs([i1, f2, i0])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1], ops, looptoken)
         fail = self.cpu.execute_token(looptoken, 20, 0)
         assert fail.identifier == 0
@@ -1986,7 +2011,7 @@
         ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
         ]
         ops[1].setfailargs([i1, i2])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i1], ops, looptoken)
         fail = self.cpu.execute_token(looptoken, ord('G'))
         assert fail.identifier == 0
@@ -2045,7 +2070,7 @@
         ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(0))
         ]
         ops[1].setfailargs([])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1, i2, i3], ops, looptoken)
         args = [rffi.cast(lltype.Signed, raw),
                 2,
@@ -2101,7 +2126,7 @@
         ops += [
             ResOperation(rop.FINISH, [i3], None, descr=BasicFailDescr(0))
         ]
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i1, i2], ops, looptoken)
 
         buffer = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw')
@@ -2122,7 +2147,7 @@
             ResOperation(rop.FINISH, [i0], None, descr=BasicFailDescr(0))
         ]
         ops[0].setfailargs([i1])
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop([i0, i1], ops, looptoken)
 
         fail = self.cpu.execute_token(looptoken, -42, 9)
@@ -2360,7 +2385,7 @@
         i18 = int_add(i17, i9)
         finish(i18)'''
         loop = parse(ops)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
         ARGS = [lltype.Signed] * 10
@@ -2379,7 +2404,7 @@
         finish(i11)
         '''
         loop = parse(ops, namespace=locals())
-        othertoken = LoopToken()
+        othertoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
         args = [i+1 for i in range(10)]
         res = self.cpu.execute_token(othertoken, *args)
@@ -2414,7 +2439,7 @@
         finish(f2)'''
         loop = parse(ops)
         done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
         args = [longlong.getfloatstorage(1.2),
@@ -2429,7 +2454,7 @@
         finish(f3)
         '''
         loop = parse(ops, namespace=locals())
-        othertoken = LoopToken()
+        othertoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
         args = [longlong.getfloatstorage(1.2),
                 longlong.getfloatstorage(3.2)]
@@ -2442,7 +2467,7 @@
         del called[:]
         self.cpu.done_with_this_frame_float_v = done_number
         try:
-            othertoken = LoopToken()
+            othertoken = JitCellToken()
             self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
             args = [longlong.getfloatstorage(1.2),
                     longlong.getfloatstorage(3.2)]
@@ -2504,7 +2529,7 @@
         f2 = float_add(f0, f1)
         finish(f2)'''
         loop = parse(ops)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
         args = [longlong.getfloatstorage(1.25),
@@ -2521,7 +2546,7 @@
         finish(f3)
         '''
         loop = parse(ops, namespace=locals())
-        othertoken = LoopToken()
+        othertoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
 
         # normal call_assembler: goes to looptoken
@@ -2539,7 +2564,7 @@
         f2 = float_sub(f0, f1)
         finish(f2)'''
         loop = parse(ops)
-        looptoken2 = LoopToken()
+        looptoken2 = JitCellToken()
         looptoken2.outermost_jitdriver_sd = FakeJitDriverSD()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken2)
 
@@ -2900,12 +2925,139 @@
             ResOperation(rop.FINISH, [p0], None, descr=BasicFailDescr(1))
             ]
         inputargs = [i0]
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         self.cpu.compile_loop(inputargs, operations, looptoken)
         # overflowing value:
         fail = self.cpu.execute_token(looptoken, sys.maxint // 4 + 1)
         assert fail.identifier == excdescr.identifier
 
+    def test_compile_loop_with_target(self):
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        i3 = BoxInt()
+        looptoken = JitCellToken()
+        targettoken1 = TargetToken()
+        targettoken2 = TargetToken()
+        faildescr = BasicFailDescr(2)
+        operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken1),
+            ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
+            ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
+            ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr),
+            ResOperation(rop.LABEL, [i1], None, descr=targettoken2),
+            ResOperation(rop.INT_GE, [i1, ConstInt(0)], i3),
+            ResOperation(rop.GUARD_TRUE, [i3], None, descr=BasicFailDescr(3)),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken1),
+            ]
+        inputargs = [i0]
+        operations[3].setfailargs([i1])
+        operations[6].setfailargs([i1])
+
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+        self.cpu.set_future_value_int(0, 2)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 2
+        res = self.cpu.get_latest_value_int(0)
+        assert res == 10
+
+        inputargs = [i0]
+        operations = [
+            ResOperation(rop.INT_SUB, [i0, ConstInt(20)], i2),
+            ResOperation(rop.JUMP, [i2], None, descr=targettoken2),
+            ]
+        self.cpu.compile_bridge(faildescr, inputargs, operations, looptoken)
+        
+        self.cpu.set_future_value_int(0, 2)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 3
+        res = self.cpu.get_latest_value_int(0)
+        assert res == -10
+
+    def test_compile_bridge_with_target(self):
+        # This test creates a loopy piece of code in a bridge, and builds another
+        # unrelated loop that ends in a jump directly to this loopy bit of code.
+        # It catches a case in which we underestimate the needed frame_depth across
+        # the cross-loop JUMP, because we estimate it based on the frame_depth stored
+        # in the original loop.
+        i0 = BoxInt()
+        i1 = BoxInt()
+        looptoken1 = JitCellToken()
+        targettoken1 = TargetToken()
+        faildescr1 = BasicFailDescr(2)
+        inputargs = [i0]
+        operations = [
+            ResOperation(rop.INT_LE, [i0, ConstInt(1)], i1),
+            ResOperation(rop.GUARD_TRUE, [i1], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [i0], None, descr=BasicFailDescr(1234)),
+            ]
+        operations[1].setfailargs([i0])
+        self.cpu.compile_loop(inputargs, operations, looptoken1)
+
+        def func(a, b, c, d, e, f, g, h, i):
+            assert a + 2 == b
+            assert a + 4 == c
+            assert a + 6 == d
+            assert a + 8 == e
+            assert a + 10 == f
+            assert a + 12 == g
+            assert a + 14 == h
+            assert a + 16 == i
+        FPTR = self.Ptr(self.FuncType([lltype.Signed]*9, lltype.Void))
+        func_ptr = llhelper(FPTR, func)
+        cpu = self.cpu
+        calldescr = cpu.calldescrof(deref(FPTR), (lltype.Signed,)*9, lltype.Void,
+                                    EffectInfo.MOST_GENERAL)
+        funcbox = self.get_funcbox(cpu, func_ptr)
+
+        i0 = BoxInt(); i1 = BoxInt(); i2 = BoxInt(); i3 = BoxInt(); i4 = BoxInt()
+        i5 = BoxInt(); i6 = BoxInt(); i7 = BoxInt(); i8 = BoxInt(); i9 = BoxInt()
+        i10 = BoxInt(); i11 = BoxInt(); i12 = BoxInt(); i13 = BoxInt(); i14 = BoxInt()
+        i15 = BoxInt(); i16 = BoxInt(); i17 = BoxInt(); i18 = BoxInt(); i19 = BoxInt()
+        i20 = BoxInt()
+        inputargs = [i0]
+        operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken1),
+            ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
+            ResOperation(rop.INT_ADD, [i1, ConstInt(1)], i2),
+            ResOperation(rop.INT_ADD, [i2, ConstInt(1)], i3),
+            ResOperation(rop.INT_ADD, [i3, ConstInt(1)], i4),
+            ResOperation(rop.INT_ADD, [i4, ConstInt(1)], i5),
+            ResOperation(rop.INT_ADD, [i5, ConstInt(1)], i6),
+            ResOperation(rop.INT_ADD, [i6, ConstInt(1)], i7),
+            ResOperation(rop.INT_ADD, [i7, ConstInt(1)], i8),
+            ResOperation(rop.INT_ADD, [i8, ConstInt(1)], i9),
+            ResOperation(rop.INT_ADD, [i9, ConstInt(1)], i10),
+            ResOperation(rop.INT_ADD, [i10, ConstInt(1)], i11),
+            ResOperation(rop.INT_ADD, [i11, ConstInt(1)], i12),
+            ResOperation(rop.INT_ADD, [i12, ConstInt(1)], i13),
+            ResOperation(rop.INT_ADD, [i13, ConstInt(1)], i14),
+            ResOperation(rop.INT_ADD, [i14, ConstInt(1)], i15),
+            ResOperation(rop.INT_ADD, [i15, ConstInt(1)], i16),
+            ResOperation(rop.INT_ADD, [i16, ConstInt(1)], i17),
+            ResOperation(rop.INT_ADD, [i17, ConstInt(1)], i18),
+            ResOperation(rop.INT_ADD, [i18, ConstInt(1)], i19),
+            ResOperation(rop.CALL, [funcbox, i2, i4, i6, i8, i10, i12, i14, i16, i18],
+                         None, descr=calldescr),
+            ResOperation(rop.CALL, [funcbox, i2, i4, i6, i8, i10, i12, i14, i16, i18],
+                         None, descr=calldescr),
+            ResOperation(rop.INT_LT, [i19, ConstInt(100)], i20),
+            ResOperation(rop.GUARD_TRUE, [i20], None, descr=BasicFailDescr(42)),
+            ResOperation(rop.JUMP, [i19], None, descr=targettoken1),
+            ]
+        operations[-2].setfailargs([])
+        self.cpu.compile_bridge(faildescr1, inputargs, operations, looptoken1)
+
+        looptoken2 = JitCellToken()
+        inputargs = []
+        operations = [
+            ResOperation(rop.JUMP, [ConstInt(0)], None, descr=targettoken1),
+            ]
+        self.cpu.compile_loop(inputargs, operations, looptoken2)
+
+        fail = self.cpu.execute_token(looptoken2)
+        assert fail.identifier == 42
+
 
 class OOtypeBackendTest(BaseBackendTest):
 
diff --git a/pypy/jit/backend/test/test_ll_random.py b/pypy/jit/backend/test/test_ll_random.py
--- a/pypy/jit/backend/test/test_ll_random.py
+++ b/pypy/jit/backend/test/test_ll_random.py
@@ -28,16 +28,27 @@
         fork.structure_types_and_vtables = self.structure_types_and_vtables
         return fork
 
-    def get_structptr_var(self, r, must_have_vtable=False, type=lltype.Struct):
+    def _choose_ptr_vars(self, from_, type, array_of_structs):
+        ptrvars = []
+        for i in range(len(from_)):
+            v, S = from_[i][:2]
+            if not isinstance(S, type):
+                continue
+            if ((isinstance(S, lltype.Array) and
+                 isinstance(S.OF, lltype.Struct)) == array_of_structs):
+                ptrvars.append((v, S))
+        return ptrvars
+
+    def get_structptr_var(self, r, must_have_vtable=False, type=lltype.Struct,
+                          array_of_structs=False):
         while True:
-            ptrvars = [(v, S) for (v, S) in self.ptrvars
-                              if isinstance(S, type)]
+            ptrvars = self._choose_ptr_vars(self.ptrvars, type,
+                                            array_of_structs)
             if ptrvars and r.random() < 0.8:
                 v, S = r.choice(ptrvars)
             else:
-                prebuilt_ptr_consts = [(v, S)
-                                 for (v, S, _) in self.prebuilt_ptr_consts
-                                 if isinstance(S, type)]
+                prebuilt_ptr_consts = self._choose_ptr_vars(
+                    self.prebuilt_ptr_consts, type, array_of_structs)
                 if prebuilt_ptr_consts and r.random() < 0.7:
                     v, S = r.choice(prebuilt_ptr_consts)
                 else:
@@ -48,7 +59,8 @@
                                                 has_vtable=must_have_vtable)
                     else:
                         # create a new constant array
-                        p = self.get_random_array(r)
+                        p = self.get_random_array(r,
+                                    must_be_array_of_structs=array_of_structs)
                     S = lltype.typeOf(p).TO
                     v = ConstPtr(lltype.cast_opaque_ptr(llmemory.GCREF, p))
                     self.prebuilt_ptr_consts.append((v, S,
@@ -74,7 +86,8 @@
                 TYPE = lltype.Signed
         return TYPE
 
-    def get_random_structure_type(self, r, with_vtable=None, cache=True):
+    def get_random_structure_type(self, r, with_vtable=None, cache=True,
+                                  type=lltype.GcStruct):
         if cache and self.structure_types and r.random() < 0.5:
             return r.choice(self.structure_types)
         fields = []
@@ -85,7 +98,7 @@
         for i in range(r.randrange(1, 5)):
             TYPE = self.get_random_primitive_type(r)
             fields.append(('f%d' % i, TYPE))
-        S = lltype.GcStruct('S%d' % self.counter, *fields, **kwds)
+        S = type('S%d' % self.counter, *fields, **kwds)
         self.counter += 1
         if cache:
             self.structure_types.append(S)
@@ -125,17 +138,29 @@
                 setattr(p, fieldname, rffi.cast(TYPE, r.random_integer()))
         return p
 
-    def get_random_array_type(self, r):
-        TYPE = self.get_random_primitive_type(r)
+    def get_random_array_type(self, r, can_be_array_of_struct=False,
+                              must_be_array_of_structs=False):
+        if ((can_be_array_of_struct and r.random() < 0.1) or
+            must_be_array_of_structs):
+            TYPE = self.get_random_structure_type(r, cache=False,
+                                                  type=lltype.Struct)
+        else:
+            TYPE = self.get_random_primitive_type(r)
         return lltype.GcArray(TYPE)
 
-    def get_random_array(self, r):
-        A = self.get_random_array_type(r)
+    def get_random_array(self, r, must_be_array_of_structs=False):
+        A = self.get_random_array_type(r,
+                           must_be_array_of_structs=must_be_array_of_structs)
         length = (r.random_integer() // 15) % 300  # length: between 0 and 299
                                                    # likely to be small
         p = lltype.malloc(A, length)
-        for i in range(length):
-            p[i] = rffi.cast(A.OF, r.random_integer())
+        if isinstance(A.OF, lltype.Primitive):
+            for i in range(length):
+                p[i] = rffi.cast(A.OF, r.random_integer())
+        else:
+            for i in range(length):
+                for fname, TP in A.OF._flds.iteritems():
+                    setattr(p[i], fname, rffi.cast(TP, r.random_integer()))
         return p
 
     def get_index(self, length, r):
@@ -155,8 +180,16 @@
                     dic[fieldname] = getattr(p, fieldname)
         else:
             assert isinstance(S, lltype.Array)
-            for i in range(len(p)):
-                dic[i] = p[i]
+            if isinstance(S.OF, lltype.Struct):
+                for i in range(len(p)):
+                    item = p[i]
+                    s1 = {}
+                    for fieldname in S.OF._names:
+                        s1[fieldname] = getattr(item, fieldname)
+                    dic[i] = s1
+            else:
+                for i in range(len(p)):
+                    dic[i] = p[i]
         return dic
 
     def print_loop_prebuilt(self, names, writevar, s):
@@ -220,7 +253,7 @@
 
 class GetFieldOperation(test_random.AbstractOperation):
     def field_descr(self, builder, r):
-        v, S = builder.get_structptr_var(r)
+        v, S = builder.get_structptr_var(r, )
         names = S._names
         if names[0] == 'parent':
             names = names[1:]
@@ -239,6 +272,28 @@
                 continue
             break
 
+class GetInteriorFieldOperation(test_random.AbstractOperation):
+    def field_descr(self, builder, r):
+        v, A = builder.get_structptr_var(r, type=lltype.Array,
+                                         array_of_structs=True)
+        array = v.getref(lltype.Ptr(A))
+        v_index = builder.get_index(len(array), r)
+        name = r.choice(A.OF._names)
+        descr = builder.cpu.interiorfielddescrof(A, name)
+        descr._random_info = 'cpu.interiorfielddescrof(%s, %r)' % (A.OF._name,
+                                                                   name)
+        TYPE = getattr(A.OF, name)
+        return v, v_index, descr, TYPE
+
+    def produce_into(self, builder, r):
+        while True:
+            try:
+                v, v_index, descr, _ = self.field_descr(builder, r)
+                self.put(builder, [v, v_index], descr)
+            except lltype.UninitializedMemoryAccess:
+                continue
+            break
+
 class SetFieldOperation(GetFieldOperation):
     def produce_into(self, builder, r):
         v, descr, TYPE = self.field_descr(builder, r)
@@ -251,6 +306,18 @@
                 break
         builder.do(self.opnum, [v, w], descr)
 
+class SetInteriorFieldOperation(GetInteriorFieldOperation):
+    def produce_into(self, builder, r):
+        v, v_index, descr, TYPE = self.field_descr(builder, r)
+        while True:
+            if r.random() < 0.3:
+                w = ConstInt(r.random_integer())
+            else:
+                w = r.choice(builder.intvars)
+            if rffi.cast(lltype.Signed, rffi.cast(TYPE, w.value)) == w.value:
+                break
+        builder.do(self.opnum, [v, v_index, w], descr)
+
 class NewOperation(test_random.AbstractOperation):
     def size_descr(self, builder, S):
         descr = builder.cpu.sizeof(S)
@@ -306,7 +373,7 @@
 
 class NewArrayOperation(ArrayOperation):
     def produce_into(self, builder, r):
-        A = builder.get_random_array_type(r)
+        A = builder.get_random_array_type(r, can_be_array_of_struct=True)
         v_size = builder.get_index(300, r)
         v_ptr = builder.do(self.opnum, [v_size], self.array_descr(builder, A))
         builder.ptrvars.append((v_ptr, A))
@@ -586,7 +653,9 @@
 for i in range(4):      # make more common
     OPERATIONS.append(GetFieldOperation(rop.GETFIELD_GC))
     OPERATIONS.append(GetFieldOperation(rop.GETFIELD_GC))
+    OPERATIONS.append(GetInteriorFieldOperation(rop.GETINTERIORFIELD_GC))
     OPERATIONS.append(SetFieldOperation(rop.SETFIELD_GC))
+    OPERATIONS.append(SetInteriorFieldOperation(rop.SETINTERIORFIELD_GC))
     OPERATIONS.append(NewOperation(rop.NEW))
     OPERATIONS.append(NewOperation(rop.NEW_WITH_VTABLE))
 
diff --git a/pypy/jit/backend/test/test_random.py b/pypy/jit/backend/test/test_random.py
--- a/pypy/jit/backend/test/test_random.py
+++ b/pypy/jit/backend/test/test_random.py
@@ -3,8 +3,8 @@
 from pypy.rlib.rarithmetic import intmask, LONG_BIT
 from pypy.rpython.lltypesystem import llmemory
 from pypy.jit.metainterp.history import BasicFailDescr, TreeLoop
-from pypy.jit.metainterp.history import BoxInt, ConstInt, LoopToken
-from pypy.jit.metainterp.history import BoxPtr, ConstPtr
+from pypy.jit.metainterp.history import BoxInt, ConstInt, JitCellToken
+from pypy.jit.metainterp.history import BoxPtr, ConstPtr, TargetToken
 from pypy.jit.metainterp.history import BoxFloat, ConstFloat, Const
 from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.metainterp.executor import execute_nonspec
@@ -179,7 +179,7 @@
                 #print >>s, '    operations[%d].suboperations = [' % i
                 #print >>s, '        ResOperation(rop.FAIL, [%s], None)]' % (
                 #    ', '.join([names[v] for v in op.args]))
-        print >>s, '    looptoken = LoopToken()'
+        print >>s, '    looptoken = JitCellToken()'
         print >>s, '    cpu.compile_loop(inputargs, operations, looptoken)'
         if hasattr(self.loop, 'inputargs'):
             for i, v in enumerate(self.loop.inputargs):
@@ -495,9 +495,9 @@
     if pytest.config.option.backend == 'llgraph':
         from pypy.jit.backend.llgraph.runner import LLtypeCPU
         return LLtypeCPU(None)
-    elif pytest.config.option.backend == 'x86':
-        from pypy.jit.backend.x86.runner import CPU386
-        return CPU386(None, None)
+    elif pytest.config.option.backend == 'cpu':
+        from pypy.jit.backend.detect_cpu import getcpuclass
+        return getcpuclass()(None, None)
     else:
         assert 0, "unknown backend %r" % pytest.config.option.backend
 
@@ -525,29 +525,53 @@
                     startvars.append(BoxFloat(r.random_float_storage()))
                 else:
                     startvars.append(BoxInt(r.random_integer()))
+            allow_delay = True
+        else:
+            allow_delay = False
         assert len(dict.fromkeys(startvars)) == len(startvars)
         self.startvars = startvars
         self.prebuilt_ptr_consts = []
         self.r = r
-        self.build_random_loop(cpu, builder_factory, r, startvars)
+        self.build_random_loop(cpu, builder_factory, r, startvars, allow_delay)
 
-    def build_random_loop(self, cpu, builder_factory, r, startvars):
+    def build_random_loop(self, cpu, builder_factory, r, startvars, allow_delay):
 
         loop = TreeLoop('test_random_function')
         loop.inputargs = startvars[:]
         loop.operations = []
-        loop.token = LoopToken()
-
+        loop._jitcelltoken = JitCellToken()
         builder = builder_factory(cpu, loop, startvars[:])
-        self.generate_ops(builder, r, loop, startvars)
+        if allow_delay:
+            needs_a_label = True
+        else:
+            self.insert_label(loop, 0, r)
+            needs_a_label = False
+        self.generate_ops(builder, r, loop, startvars, needs_a_label=needs_a_label)
         self.builder = builder
         self.loop = loop
-        cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
+        dump(loop)
+        cpu.compile_loop(loop.inputargs, loop.operations, loop._jitcelltoken)
 
-    def generate_ops(self, builder, r, loop, startvars):
+    def insert_label(self, loop, position, r):
+        assert not hasattr(loop, '_targettoken')
+        for i in range(position):
+            op = loop.operations[i]
+            if (not op.has_no_side_effect()
+                    or not isinstance(op.result, (BoxInt, BoxFloat))):
+                position = i
+                break       # cannot move the LABEL later
+            randompos = r.randrange(0, len(self.startvars)+1)
+            self.startvars.insert(randompos, op.result)
+        loop._targettoken = TargetToken()
+        loop.operations.insert(position, ResOperation(rop.LABEL, self.startvars, None,
+                                                      loop._targettoken))
+
+    def generate_ops(self, builder, r, loop, startvars, needs_a_label=False):
         block_length = pytest.config.option.block_length
+        istart = 0
 
         for i in range(block_length):
+            istart = len(loop.operations)
             try:
                 op = r.choice(builder.OPERATIONS)
                 op.filter(builder)
@@ -556,6 +580,12 @@
                 pass
             if builder.should_fail_by is not None:
                 break
+            if needs_a_label and r.random() < 0.2:
+                self.insert_label(loop, istart, r)
+                needs_a_label = False
+        if needs_a_label:
+            self.insert_label(loop, istart, r)
+
         endvars = []
         used_later = {}
         for op in loop.operations:
@@ -581,6 +611,17 @@
         if pytest.config.option.output:
             builder.print_loop()
 
+    def runjitcelltoken(self):
+        if self.startvars == self.loop.inputargs:
+            return self.loop._jitcelltoken
+        if not hasattr(self, '_initialjumploop_celltoken'):
+            self._initialjumploop_celltoken = JitCellToken()
+            self.cpu.compile_loop(self.startvars[:],
+                                  [ResOperation(rop.JUMP, self.startvars[:], None,
+                                                descr=self.loop._targettoken)],
+                                  self._initialjumploop_celltoken)
+        return self._initialjumploop_celltoken
+
     def get_fail_args(self):
         if self.should_fail_by.is_guard():
             assert self.should_fail_by.getfailargs() is not None
@@ -595,6 +636,10 @@
             for name, value in fields.items():
                 if isinstance(name, str):
                     setattr(container, name, value)
+                elif isinstance(value, dict):
+                    item = container.getitem(name)
+                    for key1, value1 in value.items():
+                        setattr(item, key1, value1)
                 else:
                     container.setitem(name, value)
 
@@ -611,7 +656,7 @@
                 cpu.set_future_value_float(i, box.value)
             else:
                 raise NotImplementedError(box)
-        fail = cpu.execute_token(self.loop.token)
+        fail = cpu.execute_token(self.runjitcelltoken())
         assert fail is self.should_fail_by.getdescr()
         for i, v in enumerate(self.get_fail_args()):
             if isinstance(v, (BoxFloat, ConstFloat)):
@@ -679,26 +724,37 @@
             args = [x.clonebox() for x in subset]
             rl = RandomLoop(self.builder.cpu, self.builder.fork,
                                      r, args)
+            dump(rl.loop)
             self.cpu.compile_loop(rl.loop.inputargs, rl.loop.operations,
-                                  rl.loop.token)
+                                  rl.loop._jitcelltoken)
             # done
             self.should_fail_by = rl.should_fail_by
             self.expected = rl.expected
             assert len(rl.loop.inputargs) == len(args)
             # The new bridge's execution will end normally at its FINISH.
             # Just replace the FINISH with the JUMP to the new loop.
-            jump_op = ResOperation(rop.JUMP, subset, None, descr=rl.loop.token)
+            jump_op = ResOperation(rop.JUMP, subset, None,
+                                   descr=rl.loop._targettoken)
             subloop.operations[-1] = jump_op
             self.guard_op = rl.guard_op
             self.prebuilt_ptr_consts += rl.prebuilt_ptr_consts
-            self.loop.token.record_jump_to(rl.loop.token)
+            self.loop._jitcelltoken.record_jump_to(rl.loop._jitcelltoken)
             self.dont_generate_more = True
         if r.random() < .05:
             return False
+        dump(subloop)
         self.builder.cpu.compile_bridge(fail_descr, fail_args,
-                                        subloop.operations, self.loop.token)
+                                        subloop.operations,
+                                        self.loop._jitcelltoken)
         return True
 
+def dump(loop):
+    print >> sys.stderr, loop
+    if hasattr(loop, 'inputargs'):
+        print >> sys.stderr, '\t', loop.inputargs
+    for op in loop.operations:
+        print >> sys.stderr, '\t', op
+
 def check_random_function(cpu, BuilderClass, r, num=None, max=None):
     loop = RandomLoop(cpu, BuilderClass, r)
     while True:
diff --git a/pypy/jit/backend/x86/test/test_zll_random.py b/pypy/jit/backend/test/test_zll_stress.py
rename from pypy/jit/backend/x86/test/test_zll_random.py
rename to pypy/jit/backend/test/test_zll_stress.py
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -2,14 +2,14 @@
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt
-from pypy.jit.metainterp.history import (AbstractFailDescr, INT, REF, FLOAT,
-                                         LoopToken)
+from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
+from pypy.jit.metainterp.history import JitCellToken
 from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.annlowlevel import llhelper
 from pypy.jit.backend.model import CompiledLoopToken
-from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs,
-                                           _get_scale, gpr_reg_mgr_cls)
+from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
+    gpr_reg_mgr_cls, _valid_addressing_size)
 
 from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
                                        IS_X86_32, IS_X86_64)
@@ -152,14 +152,13 @@
         allblocks = self.get_asmmemmgr_blocks(looptoken)
         self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
                                                         allblocks)
+        self.target_tokens_currently_compiling = {}
 
     def teardown(self):
         self.pending_guard_tokens = None
         if WORD == 8:
             self.pending_memoryerror_trampoline_from = None
         self.mc = None
-        self.looppos = -1
-        self.currently_compiling_loop = None
         self.current_clt = None
 
     def finish_once(self):
@@ -425,8 +424,6 @@
                _x86_loop_code       (an integer giving an address)
                _x86_bootstrap_code  (an integer giving an address)
                _x86_direct_bootstrap_code  ( "    "     "    "   )
-               _x86_frame_depth
-               _x86_param_depth
                _x86_arglocs
                _x86_debug_checksum
         '''
@@ -443,7 +440,6 @@
             assert len(set(inputargs)) == len(inputargs)
 
         self.setup(looptoken)
-        self.currently_compiling_loop = looptoken
         if log:
             self._register_counter(False, looptoken.number)
             operations = self._inject_debugging_code(looptoken, operations)
@@ -455,15 +451,16 @@
 
         bootstrappos = self.mc.get_relative_pos()
         stackadjustpos = self._assemble_bootstrap_code(inputargs, arglocs)
-        self.looppos = self.mc.get_relative_pos()
-        looptoken._x86_frame_depth = -1     # temporarily
-        looptoken._x86_param_depth = -1     # temporarily
+        looppos = self.mc.get_relative_pos()
+        looptoken._x86_loop_code = looppos
+        clt.frame_depth = -1     # temporarily
+        clt.param_depth = -1     # temporarily
         frame_depth, param_depth = self._assemble(regalloc, operations)
-        looptoken._x86_frame_depth = frame_depth
-        looptoken._x86_param_depth = param_depth
+        clt.frame_depth = frame_depth
+        clt.param_depth = param_depth
 
         directbootstrappos = self.mc.get_relative_pos()
-        self._assemble_bootstrap_direct_call(arglocs, self.looppos,
+        self._assemble_bootstrap_direct_call(arglocs, looppos,
                                              frame_depth+param_depth)
         self.write_pending_failure_recoveries()
         fullsize = self.mc.get_relative_pos()
@@ -472,7 +469,7 @@
         debug_start("jit-backend-addr")
         debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
             looptoken.number, loopname,
-            rawstart + self.looppos,
+            rawstart + looppos,
             rawstart + directbootstrappos,
             rawstart))
         debug_stop("jit-backend-addr")
@@ -488,8 +485,8 @@
             looptoken._x86_ops_offset = ops_offset
 
         looptoken._x86_bootstrap_code = rawstart + bootstrappos
-        looptoken._x86_loop_code = rawstart + self.looppos
         looptoken._x86_direct_bootstrap_code = rawstart + directbootstrappos
+        self.fixup_target_tokens(rawstart)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
@@ -548,6 +545,9 @@
         # patch the jump from original guard
         self.patch_jump_for_descr(faildescr, rawstart)
         ops_offset = self.mc.ops_offset
+        self.fixup_target_tokens(rawstart)
+        self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
+        self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
@@ -668,6 +668,11 @@
             mc.copy_to_raw_memory(adr_target)
         faildescr._x86_adr_jump_offset = 0    # means "patched"
 
+    def fixup_target_tokens(self, rawstart):
+        for targettoken in self.target_tokens_currently_compiling:
+            targettoken._x86_loop_code += rawstart
+        self.target_tokens_currently_compiling = None
+
     @specialize.argtype(1)
     def _inject_debugging_code(self, looptoken, operations):
         if self._debug:
@@ -685,20 +690,24 @@
                    ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
                    ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
                                 None, descr=self.debug_counter_descr)]
-            operations = ops + operations
+            if operations[0].getopnum() == rop.LABEL:
+                operations = [operations[0]] + ops + operations[1:]
+            else:
+                operations =  ops + operations
         return operations
 
     def _assemble(self, regalloc, operations):
         self._regalloc = regalloc
+        regalloc.compute_hint_frame_locations(operations)
         regalloc.walk_operations(operations)
         if we_are_translated() or self.cpu.dont_keepalive_stuff:
             self._regalloc = None   # else keep it around for debugging
-        frame_depth = regalloc.fm.frame_depth
+        frame_depth = regalloc.fm.get_frame_depth()
         param_depth = regalloc.param_depth
         jump_target_descr = regalloc.jump_target_descr
         if jump_target_descr is not None:
-            target_frame_depth = jump_target_descr._x86_frame_depth
-            target_param_depth = jump_target_descr._x86_param_depth
+            target_frame_depth = jump_target_descr._x86_clt.frame_depth
+            target_param_depth = jump_target_descr._x86_clt.param_depth
             frame_depth = max(frame_depth, target_frame_depth)
             param_depth = max(param_depth, target_param_depth)
         return frame_depth, param_depth
@@ -1596,13 +1605,33 @@
     genop_getarrayitem_gc_pure = genop_getarrayitem_gc
     genop_getarrayitem_raw = genop_getarrayitem_gc
 
+    def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
+                                base_loc, ofs_loc):
+        assert isinstance(itemsize_loc, ImmedLoc)
+        if isinstance(index_loc, ImmedLoc):
+            temp_loc = imm(index_loc.value * itemsize_loc.value)
+        elif _valid_addressing_size(itemsize_loc.value):
+            return AddressLoc(base_loc, index_loc, _get_scale(itemsize_loc.value), ofs_loc.value)
+        else:
+            # XXX should not use IMUL in more cases, it can use a clever LEA
+            assert isinstance(temp_loc, RegLoc)
+            assert isinstance(index_loc, RegLoc)
+            assert not temp_loc.is_xmm
+            self.mc.IMUL_rri(temp_loc.value, index_loc.value,
+                             itemsize_loc.value)
+        assert isinstance(ofs_loc, ImmedLoc)
+        return AddressLoc(base_loc, temp_loc, 0, ofs_loc.value)
+
     def genop_getinteriorfield_gc(self, op, arglocs, resloc):
-        base_loc, ofs_loc, itemsize_loc, fieldsize_loc, index_loc, sign_loc = arglocs
-        # XXX should not use IMUL in most cases
-        self.mc.IMUL(index_loc, itemsize_loc)
-        src_addr = AddressLoc(base_loc, index_loc, 0, ofs_loc.value)
+        (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
+            index_loc, temp_loc, sign_loc) = arglocs
+        src_addr = self._get_interiorfield_addr(temp_loc, index_loc,
+                                                itemsize_loc, base_loc,
+                                                ofs_loc)
         self.load_from_mem(resloc, src_addr, fieldsize_loc, sign_loc)
 
+    genop_getinteriorfield_raw = genop_getinteriorfield_gc
+
 
     def genop_discard_setfield_gc(self, op, arglocs):
         base_loc, ofs_loc, size_loc, value_loc = arglocs
@@ -1611,12 +1640,15 @@
         self.save_into_mem(dest_addr, value_loc, size_loc)
 
     def genop_discard_setinteriorfield_gc(self, op, arglocs):
-        base_loc, ofs_loc, itemsize_loc, fieldsize_loc, index_loc, value_loc = arglocs
-        # XXX should not use IMUL in most cases
-        self.mc.IMUL(index_loc, itemsize_loc)
-        dest_addr = AddressLoc(base_loc, index_loc, 0, ofs_loc.value)
+        (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
+            index_loc, temp_loc, value_loc) = arglocs
+        dest_addr = self._get_interiorfield_addr(temp_loc, index_loc,
+                                                 itemsize_loc, base_loc,
+                                                 ofs_loc)
         self.save_into_mem(dest_addr, value_loc, fieldsize_loc)
 
+    genop_discard_setinteriorfield_raw = genop_discard_setinteriorfield_gc
+
     def genop_discard_setarrayitem_gc(self, op, arglocs):
         base_loc, ofs_loc, value_loc, size_loc, baseofs = arglocs
         assert isinstance(baseofs, ImmedLoc)
@@ -2321,7 +2353,7 @@
         fail_index = self.cpu.get_fail_descr_number(faildescr)
         self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
         descr = op.getdescr()
-        assert isinstance(descr, LoopToken)
+        assert isinstance(descr, JitCellToken)
         assert len(arglocs) - 2 == len(descr._x86_arglocs[0])
         #
         # Write a call to the direct_bootstrap_code of the target assembler
@@ -2555,15 +2587,13 @@
                     gcrootmap.put(self.gcrootmap_retaddr_forced, mark)
                     self.gcrootmap_retaddr_forced = -1
 
-    def target_arglocs(self, loop_token):
-        return loop_token._x86_arglocs
-
-    def closing_jump(self, loop_token):
-        if loop_token is self.currently_compiling_loop:
+    def closing_jump(self, target_token):
+        target = target_token._x86_loop_code
+        if target_token in self.target_tokens_currently_compiling:
             curpos = self.mc.get_relative_pos() + 5
-            self.mc.JMP_l(self.looppos - curpos)
+            self.mc.JMP_l(target - curpos)
         else:
-            self.mc.JMP(imm(loop_token._x86_loop_code))
+            self.mc.JMP(imm(target))
 
     def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, tid):
         size = max(size, self.cpu.gc_ll_descr.minimal_size_in_nursery)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -5,7 +5,8 @@
 import os
 from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
                                          ResOperation, BoxPtr, ConstFloat,
-                                         BoxFloat, LoopToken, INT, REF, FLOAT)
+                                         BoxFloat, INT, REF, FLOAT,
+                                         TargetToken, JitCellToken)
 from pypy.jit.backend.x86.regloc import *
 from pypy.rpython.lltypesystem import lltype, rffi, rstr
 from pypy.rlib.objectmodel import we_are_translated
@@ -138,6 +139,10 @@
             return 2
         else:
             return 1
+    @staticmethod
+    def get_loc_index(loc):
+        assert isinstance(loc, StackLoc)
+        return loc.position
 
 if WORD == 4:
     gpr_reg_mgr_cls = X86RegisterManager
@@ -159,6 +164,7 @@
         # to be read/used by the assembler too
         self.jump_target_descr = None
         self.close_stack_struct = 0
+        self.final_jump_op = None
 
     def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
@@ -167,35 +173,30 @@
         operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
                                                        allgcrefs)
         # compute longevity of variables
-        longevity = self._compute_vars_longevity(inputargs, operations)
+        longevity, useful = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
         self.rm = gpr_reg_mgr_cls(longevity,
                                   frame_manager = self.fm,
                                   assembler = self.assembler)
         self.xrm = xmm_reg_mgr_cls(longevity, frame_manager = self.fm,
                                    assembler = self.assembler)
-        return operations
+        return operations, useful
 
     def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
-        operations = self._prepare(inputargs, operations, allgcrefs)
-        jump = operations[-1]
-        loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
-        self.loop_consts = loop_consts
-        return self._process_inputargs(inputargs), operations
+        operations, useful = self._prepare(inputargs, operations, allgcrefs)
+        return self._process_inputargs(inputargs, useful), operations
 
     def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
                        allgcrefs):
-        operations = self._prepare(inputargs, operations, allgcrefs)
-        self.loop_consts = {}
+        operations, _ = self._prepare(inputargs, operations, allgcrefs)
         self._update_bindings(arglocs, inputargs)
-        self.fm.frame_depth = prev_depths[0]
         self.param_depth = prev_depths[1]
         return operations
 
     def reserve_param(self, n):
         self.param_depth = max(self.param_depth, n)
 
-    def _process_inputargs(self, inputargs):
+    def _process_inputargs(self, inputargs, useful):
         # XXX we can sort out here by longevity if we need something
         # more optimal
         floatlocs = [None] * len(inputargs)
@@ -211,7 +212,7 @@
             arg = inputargs[i]
             assert not isinstance(arg, Const)
             reg = None
-            if arg not in self.loop_consts and self.longevity[arg][1] > -1:
+            if self.longevity[arg][1] > -1 and arg in useful:
                 if arg.type == FLOAT:
                     # xxx is it really a good idea?  at the first CALL they
                     # will all be flushed anyway
@@ -287,15 +288,15 @@
         else:
             return self.xrm.make_sure_var_in_reg(var, forbidden_vars)
 
-    def _compute_loop_consts(self, inputargs, jump, looptoken):
-        if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
-            loop_consts = {}
-        else:
-            loop_consts = {}
-            for i in range(len(inputargs)):
-                if inputargs[i] is jump.getarg(i):
-                    loop_consts[inputargs[i]] = i
-        return loop_consts
+    #def _compute_loop_consts(self, inputargs, jump, looptoken):
+    #    if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
+    #        loop_consts = {}
+    #    else:
+    #        loop_consts = {}
+    #        for i in range(len(inputargs)):
+    #            if inputargs[i] is jump.getarg(i):
+    #                loop_consts[inputargs[i]] = i
+    #    return loop_consts
 
     def _update_bindings(self, locs, inputargs):
         # XXX this should probably go to llsupport/regalloc.py
@@ -311,7 +312,7 @@
                     self.xrm.reg_bindings[arg] = loc
                     used[loc] = None
                 else:
-                    self.fm.frame_bindings[arg] = loc
+                    self.fm.set_binding(arg, loc)
             else:
                 if isinstance(loc, RegLoc):
                     if loc is ebp:
@@ -320,7 +321,7 @@
                         self.rm.reg_bindings[arg] = loc
                         used[loc] = None
                 else:
-                    self.fm.frame_bindings[arg] = loc
+                    self.fm.set_binding(arg, loc)
         self.rm.free_regs = []
         for reg in self.rm.all_regs:
             if reg not in used:
@@ -356,7 +357,7 @@
     def get_current_depth(self):
         # return (self.fm.frame_depth, self.param_depth), but trying to share
         # the resulting tuple among several calls
-        arg0 = self.fm.frame_depth
+        arg0 = self.fm.get_frame_depth()
         arg1 = self.param_depth
         result = self.assembler._current_depths_cache
         if result[0] != arg0 or result[1] != arg1:
@@ -450,8 +451,14 @@
     def _compute_vars_longevity(self, inputargs, operations):
         # compute a dictionary that maps variables to index in
         # operations that is a "last-time-seen"
+
+        # returns a pair longevity/useful. Non-useful variables are ones that
+        # never appear in the assembler or it does not matter if they appear on
+        # stack or in registers. Main example is loop arguments that go
+        # only to guard operations or to jump or to finish
         produced = {}
         last_used = {}
+        useful = {}
         for i in range(len(operations)-1, -1, -1):
             op = operations[i]
             if op.result:
@@ -459,8 +466,11 @@
                     continue
                 assert op.result not in produced
                 produced[op.result] = i
+            opnum = op.getopnum()
             for j in range(op.numargs()):
                 arg = op.getarg(j)
+                if opnum != rop.JUMP and opnum != rop.FINISH:
+                    useful[arg] = None
                 if isinstance(arg, Box) and arg not in last_used:
                     last_used[arg] = i
             if op.is_guard():
@@ -486,7 +496,7 @@
                 longevity[arg] = (0, last_used[arg])
                 del last_used[arg]
         assert len(last_used) == 0
-        return longevity
+        return longevity, useful
 
     def loc(self, v):
         if v is None: # xxx kludgy
@@ -883,7 +893,7 @@
 
     def consider_call_assembler(self, op, guard_op):
         descr = op.getdescr()
-        assert isinstance(descr, LoopToken)
+        assert isinstance(descr, JitCellToken)
         jd = descr.outermost_jitdriver_sd
         assert jd is not None
         size = jd.portal_calldescr.get_result_size(self.translate_support_code)
@@ -1042,16 +1052,32 @@
         t = self._unpack_interiorfielddescr(op.getdescr())
         ofs, itemsize, fieldsize, _ = t
         args = op.getarglist()
-        tmpvar = TempBox()
-        base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        index_loc = self.rm.force_result_in_reg(tmpvar, op.getarg(1),
-                                                args)
-        # we're free to modify index now
-        value_loc = self.make_sure_var_in_reg(op.getarg(2), args)
-        self.possibly_free_vars(args)
-        self.rm.possibly_free_var(tmpvar)
+        if fieldsize.value == 1:
+            need_lower_byte = True
+        else:
+            need_lower_byte = False
+        box_base, box_index, box_value = args
+        base_loc = self.rm.make_sure_var_in_reg(box_base, args)
+        index_loc = self.rm.make_sure_var_in_reg(box_index, args)
+        value_loc = self.make_sure_var_in_reg(box_value, args,
+                                              need_lower_byte=need_lower_byte)
+        # If 'index_loc' is not an immediate, then we need a 'temp_loc' that
+        # is a register whose value will be destroyed.  It's fine to destroy
+        # the same register as 'index_loc', but not the other ones.
+        self.rm.possibly_free_var(box_index)
+        if not isinstance(index_loc, ImmedLoc):
+            tempvar = TempBox()
+            temp_loc = self.rm.force_allocate_reg(tempvar, [box_base,
+                                                            box_value])
+            self.rm.possibly_free_var(tempvar)
+        else:
+            temp_loc = None
+        self.rm.possibly_free_var(box_base)
+        self.possibly_free_var(box_value)
         self.PerformDiscard(op, [base_loc, ofs, itemsize, fieldsize,
-                                 index_loc, value_loc])
+                                 index_loc, temp_loc, value_loc])
+
+    consider_setinteriorfield_raw = consider_setinteriorfield_gc
 
     def consider_strsetitem(self, op):
         args = op.getarglist()
@@ -1122,15 +1148,29 @@
         else:
             sign_loc = imm0
         args = op.getarglist()
-        tmpvar = TempBox()
         base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        index_loc = self.rm.force_result_in_reg(tmpvar, op.getarg(1),
-                                                args)
-        self.rm.possibly_free_vars_for_op(op)
-        self.rm.possibly_free_var(tmpvar)
-        result_loc = self.force_allocate_reg(op.result)
+        index_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
+        # 'base' and 'index' are put in two registers (or one if 'index'
+        # is an immediate).  'result' can be in the same register as
+        # 'index' but must be in a different register than 'base'.
+        self.rm.possibly_free_var(op.getarg(1))
+        result_loc = self.force_allocate_reg(op.result, [op.getarg(0)])
+        assert isinstance(result_loc, RegLoc)
+        # two cases: 1) if result_loc is a normal register, use it as temp_loc
+        if not result_loc.is_xmm:
+            temp_loc = result_loc
+        else:
+            # 2) if result_loc is an xmm register, we (likely) need another
+            # temp_loc that is a normal register.  It can be in the same
+            # register as 'index' but not 'base'.
+            tempvar = TempBox()
+            temp_loc = self.rm.force_allocate_reg(tempvar, [op.getarg(0)])
+            self.rm.possibly_free_var(tempvar)
+        self.rm.possibly_free_var(op.getarg(0))
         self.Perform(op, [base_loc, ofs, itemsize, fieldsize,
-                          index_loc, sign_loc], result_loc)
+                          index_loc, temp_loc, sign_loc], result_loc)
+
+    consider_getinteriorfield_raw = consider_getinteriorfield_gc
 
     def consider_int_is_true(self, op, guard_op):
         # doesn't need arg to be in a register
@@ -1283,13 +1323,50 @@
             self.rm.possibly_free_var(tmpbox_low)
         self.rm.possibly_free_var(tmpbox_high)
 
+    def compute_hint_frame_locations(self, operations):
+        # optimization only: fill in the 'hint_frame_locations' dictionary
+        # of 'fm' based on the JUMP at the end of the loop, by looking
+        # at where we would like the boxes to be after the jump.
+        op = operations[-1]
+        if op.getopnum() != rop.JUMP:
+            return
+        self.final_jump_op = op
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        if descr._x86_loop_code != 0:
+            # if the target LABEL was already compiled, i.e. if it belongs
+            # to some already-compiled piece of code
+            self._compute_hint_frame_locations_from_descr(descr)
+        #else:
+        #   The loop ends in a JUMP going back to a LABEL in the same loop.
+        #   We cannot fill 'hint_frame_locations' immediately, but we can
+        #   wait until the corresponding consider_label() to know where the
+        #   we would like the boxes to be after the jump.
+
+    def _compute_hint_frame_locations_from_descr(self, descr):
+        nonfloatlocs, floatlocs = descr._x86_arglocs
+        jump_op = self.final_jump_op
+        assert len(nonfloatlocs) == jump_op.numargs()
+        for i in range(jump_op.numargs()):
+            box = jump_op.getarg(i)
+            if isinstance(box, Box):
+                loc = nonfloatlocs[i]
+                if isinstance(loc, StackLoc):
+                    assert box.type != FLOAT
+                    self.fm.hint_frame_locations[box] = loc
+                else:
+                    loc = floatlocs[i]
+                    if isinstance(loc, StackLoc):
+                        assert box.type == FLOAT
+                        self.fm.hint_frame_locations[box] = loc
+
     def consider_jump(self, op):
         assembler = self.assembler
         assert self.jump_target_descr is None
         descr = op.getdescr()
-        assert isinstance(descr, LoopToken)
+        assert isinstance(descr, TargetToken)
+        nonfloatlocs, floatlocs = descr._x86_arglocs
         self.jump_target_descr = descr
-        nonfloatlocs, floatlocs = assembler.target_arglocs(self.jump_target_descr)
         # compute 'tmploc' to be all_regs[0] by spilling what is there
         box = TempBox()
         box1 = TempBox()
@@ -1327,7 +1404,7 @@
 
     def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
         shape = gcrootmap.get_basic_shape(IS_X86_64)
-        for v, val in self.fm.frame_bindings.items():
+        for v, val in self.fm.bindings.items():
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
                 assert isinstance(val, StackLoc)
                 gcrootmap.add_frame_offset(shape, get_ebp_ofs(val.position))
@@ -1362,6 +1439,74 @@
         # the FORCE_TOKEN operation returns directly 'ebp'
         self.rm.force_allocate_frame_reg(op.result)
 
+    def consider_label(self, op):
+        # XXX big refactoring needed?
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        inputargs = op.getarglist()
+        floatlocs = [None] * len(inputargs)
+        nonfloatlocs = [None] * len(inputargs)
+        #
+        # we need to make sure that the tmpreg and xmmtmp are free
+        tmpreg = X86RegisterManager.all_regs[0]
+        tmpvar = TempBox()
+        self.rm.force_allocate_reg(tmpvar, selected_reg=tmpreg)
+        self.rm.possibly_free_var(tmpvar)
+        #
+        xmmtmp = X86XMMRegisterManager.all_regs[0]
+        tmpvar = TempBox()
+        self.xrm.force_allocate_reg(tmpvar, selected_reg=xmmtmp)
+        self.xrm.possibly_free_var(tmpvar)
+        #
+        # we need to make sure that no variable is stored in ebp
+        for arg in inputargs:
+            if self.loc(arg) is ebp:
+                loc2 = self.fm.loc(arg)
+                self.assembler.mc.MOV(loc2, ebp)
+        self.rm.bindings_to_frame_reg.clear()
+        #
+        for i in range(len(inputargs)):
+            arg = inputargs[i]
+            assert not isinstance(arg, Const)
+            loc = self.loc(arg)
+            assert not (loc is tmpreg or loc is xmmtmp or loc is ebp)
+            if arg.type == FLOAT:
+                floatlocs[i] = loc
+            else:
+                nonfloatlocs[i] = loc
+            if isinstance(loc, RegLoc):
+                self.fm.mark_as_free(arg)
+        descr._x86_arglocs = nonfloatlocs, floatlocs
+        descr._x86_loop_code = self.assembler.mc.get_relative_pos()
+        descr._x86_clt = self.assembler.current_clt
+        self.assembler.target_tokens_currently_compiling[descr] = None
+        self.possibly_free_vars_for_op(op)
+        #
+        # if the LABEL's descr is precisely the target of the JUMP at the
+        # end of the same loop, i.e. if what we are compiling is a single
+        # loop that ends up jumping to this LABEL, then we can now provide
+        # the hints about the expected position of the spilled variables.
+        jump_op = self.final_jump_op
+        if jump_op is not None and jump_op.getdescr() is descr:
+            self._compute_hint_frame_locations_from_descr(descr)
+
+##        from pypy.rpython.annlowlevel import llhelper
+##        def fn(addr):
+##            print '...label:', hex(addr), nonfloatlocs
+##        FUNC = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Void))
+##        ll_disp = llhelper(FUNC, fn)
+##        faddr = rffi.cast(lltype.Signed, ll_disp)
+##        for i in range(16):
+##            self.assembler.mc.PUSH_r(i)
+##        self.assembler.mc.CALL_l(0)
+##        self.assembler.mc.POP(edi)
+##        self.assembler.mc.MOV(r11, imm(faddr))
+##        self.assembler.mc.CALL(r11)
+##        for i in range(15, -1, -1):
+##            if i == esp.value:
+##                i -= 1
+##            self.assembler.mc.POP_r(i)
+
     def not_implemented_op(self, op):
         not_implemented("not implemented operation: %s" % op.getopname())
 
@@ -1404,8 +1549,11 @@
     # i.e. the n'th word beyond the fixed frame size.
     return -WORD * (FRAME_FIXED_SIZE + position)
 
+def _valid_addressing_size(size):
+    return size == 1 or size == 2 or size == 4 or size == 8
+
 def _get_scale(size):
-    assert size == 1 or size == 2 or size == 4 or size == 8
+    assert _valid_addressing_size(size)
     if size < 4:
         return size - 1         # 1, 2 => 0, 1
     else:
@@ -1414,3 +1562,7 @@
 def not_implemented(msg):
     os.write(2, '[x86/regalloc] %s\n' % msg)
     raise NotImplementedError(msg)
+
+# xxx hack: set a default value for TargetToken._x86_loop_code.
+# If 0, we know that it is a LABEL that was not compiled yet.
+TargetToken._x86_loop_code = 0
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -17,7 +17,7 @@
 
 class AssemblerLocation(object):
     # XXX: Is adding "width" here correct?
-    __slots__ = ('value', 'width')
+    _attrs_ = ('value', 'width', '_location_code')
     _immutable_ = True
     def _getregkey(self):
         return self.value
@@ -25,6 +25,9 @@
     def is_memory_reference(self):
         return self.location_code() in ('b', 's', 'j', 'a', 'm')
 
+    def location_code(self):
+        return self._location_code
+
     def value_r(self): return self.value
     def value_b(self): return self.value
     def value_s(self): return self.value
@@ -38,6 +41,8 @@
 
 class StackLoc(AssemblerLocation):
     _immutable_ = True
+    _location_code = 'b'
+
     def __init__(self, position, ebp_offset, num_words, type):
         assert ebp_offset < 0   # so no confusion with RegLoc.value
         self.position = position
@@ -49,9 +54,6 @@
     def __repr__(self):
         return '%d(%%ebp)' % (self.value,)
 
-    def location_code(self):
-        return 'b'
-
     def assembler(self):
         return repr(self)
 
@@ -63,8 +65,10 @@
         self.is_xmm = is_xmm
         if self.is_xmm:
             self.width = 8
+            self._location_code = 'x'
         else:
             self.width = WORD
+            self._location_code = 'r'
     def __repr__(self):
         if self.is_xmm:
             return rx86.R.xmmnames[self.value]
@@ -79,12 +83,6 @@
         assert not self.is_xmm
         return RegLoc(rx86.high_byte(self.value), False)
 
-    def location_code(self):
-        if self.is_xmm:
-            return 'x'
-        else:
-            return 'r'
-
     def assembler(self):
         return '%' + repr(self)
 
@@ -97,14 +95,13 @@
 class ImmedLoc(AssemblerLocation):
     _immutable_ = True
     width = WORD
+    _location_code = 'i'
+
     def __init__(self, value):
         from pypy.rpython.lltypesystem import rffi, lltype
         # force as a real int
         self.value = rffi.cast(lltype.Signed, value)
 
-    def location_code(self):
-        return 'i'
-
     def getint(self):
         return self.value
 
@@ -149,9 +146,6 @@
         info = getattr(self, attr, '?')
         return '<AddressLoc %r: %s>' % (self._location_code, info)
 
-    def location_code(self):
-        return self._location_code
-
     def value_a(self):
         return self.loc_a
 
@@ -191,6 +185,7 @@
     # we want a width of 8  (... I think.  Check this!)
     _immutable_ = True
     width = 8
+    _location_code = 'j'
 
     def __init__(self, address):
         self.value = address
@@ -198,9 +193,6 @@
     def __repr__(self):
         return '<ConstFloatLoc @%s>' % (self.value,)
 
-    def location_code(self):
-        return 'j'
-
 if IS_X86_32:
     class FloatImmedLoc(AssemblerLocation):
         # This stands for an immediate float.  It cannot be directly used in
@@ -209,6 +201,7 @@
         # instead; see below.
         _immutable_ = True
         width = 8
+        _location_code = '#'     # don't use me
 
         def __init__(self, floatstorage):
             self.aslonglong = floatstorage
@@ -229,9 +222,6 @@
             floatvalue = longlong.getrealfloat(self.aslonglong)
             return '<FloatImmedLoc(%s)>' % (floatvalue,)
 
-        def location_code(self):
-            raise NotImplementedError
-
 if IS_X86_64:
     def FloatImmedLoc(floatstorage):
         from pypy.rlib.longlong2float import float2longlong
@@ -270,6 +260,11 @@
     else:
         raise AssertionError(methname + " undefined")
 
+def _missing_binary_insn(name, code1, code2):
+    raise AssertionError(name + "_" + code1 + code2 + " missing")
+_missing_binary_insn._dont_inline_ = True
+
+
 class LocationCodeBuilder(object):
     _mixin_ = True
 
@@ -303,6 +298,8 @@
             else:
                 # For this case, we should not need the scratch register more than here.
                 self._load_scratch(val2)
+                if name == 'MOV' and loc1 is X86_64_SCRATCH_REG:
+                    return     # don't need a dummy "MOV r11, r11"
                 INSN(self, loc1, X86_64_SCRATCH_REG)
 
         def invoke(self, codes, val1, val2):
@@ -310,6 +307,23 @@
             _rx86_getattr(self, methname)(val1, val2)
         invoke._annspecialcase_ = 'specialize:arg(1)'
 
+        def has_implementation_for(loc1, loc2):
+            # A memo function that returns True if there is any NAME_xy that could match.
+            # If it returns False we know the whole subcase can be omitted from translated
+            # code.  Without this hack, the size of most _binaryop INSN functions ends up
+            # quite large in C code.
+            if loc1 == '?':
+                return any([has_implementation_for(loc1, loc2)
+                            for loc1 in unrolling_location_codes])
+            methname = name + "_" + loc1 + loc2
+            if not hasattr(rx86.AbstractX86CodeBuilder, methname):
+                return False
+            # any NAME_j should have a NAME_m as a fallback, too.  Check it
+            if loc1 == 'j': assert has_implementation_for('m', loc2), methname
+            if loc2 == 'j': assert has_implementation_for(loc1, 'm'), methname
+            return True
+        has_implementation_for._annspecialcase_ = 'specialize:memo'
+
         def INSN(self, loc1, loc2):
             code1 = loc1.location_code()
             code2 = loc2.location_code()
@@ -325,6 +339,8 @@
                 assert code2 not in ('j', 'i')
 
             for possible_code2 in unrolling_location_codes:
+                if not has_implementation_for('?', possible_code2):
+                    continue
                 if code2 == possible_code2:
                     val2 = getattr(loc2, "value_" + possible_code2)()
                     #
@@ -335,28 +351,32 @@
                     #
                     # Regular case
                     for possible_code1 in unrolling_location_codes:
+                        if not has_implementation_for(possible_code1,
+                                                      possible_code2):
+                            continue
                         if code1 == possible_code1:
                             val1 = getattr(loc1, "value_" + possible_code1)()
                             # More faking out of certain operations for x86_64
-                            if possible_code1 == 'j' and not rx86.fits_in_32bits(val1):
+                            fits32 = rx86.fits_in_32bits
+                            if possible_code1 == 'j' and not fits32(val1):
                                 val1 = self._addr_as_reg_offset(val1)
                                 invoke(self, "m" + possible_code2, val1, val2)
-                            elif possible_code2 == 'j' and not rx86.fits_in_32bits(val2):
+                                return
+                            if possible_code2 == 'j' and not fits32(val2):
                                 val2 = self._addr_as_reg_offset(val2)
                                 invoke(self, possible_code1 + "m", val1, val2)
-                            elif possible_code1 == 'm' and not rx86.fits_in_32bits(val1[1]):
+                                return
+                            if possible_code1 == 'm' and not fits32(val1[1]):
                                 val1 = self._fix_static_offset_64_m(val1)
-                                invoke(self, "a" + possible_code2, val1, val2)
-                            elif possible_code2 == 'm' and not rx86.fits_in_32bits(val2[1]):
+                            if possible_code2 == 'm' and not fits32(val2[1]):
                                 val2 = self._fix_static_offset_64_m(val2)
-                                invoke(self, possible_code1 + "a", val1, val2)
-                            else:
-                                if possible_code1 == 'a' and not rx86.fits_in_32bits(val1[3]):
-                                    val1 = self._fix_static_offset_64_a(val1)
-                                if possible_code2 == 'a' and not rx86.fits_in_32bits(val2[3]):
-                                    val2 = self._fix_static_offset_64_a(val2)
-                                invoke(self, possible_code1 + possible_code2, val1, val2)
+                            if possible_code1 == 'a' and not fits32(val1[3]):
+                                val1 = self._fix_static_offset_64_a(val1)
+                            if possible_code2 == 'a' and not fits32(val2[3]):
+                                val2 = self._fix_static_offset_64_a(val2)
+                            invoke(self, possible_code1 + possible_code2, val1, val2)
                             return
+            _missing_binary_insn(name, code1, code2)
 
         return func_with_new_name(INSN, "INSN_" + name)
 
@@ -431,12 +451,14 @@
     def _fix_static_offset_64_m(self, (basereg, static_offset)):
         # For cases where an AddressLoc has the location_code 'm', but
         # where the static offset does not fit in 32-bits.  We have to fall
-        # back to the X86_64_SCRATCH_REG.  Note that this returns a location
-        # encoded as mode 'a'.  These are all possibly rare cases; don't try
+        # back to the X86_64_SCRATCH_REG.  Returns a new location encoded
+        # as mode 'm' too.  These are all possibly rare cases; don't try
         # to reuse a past value of the scratch register at all.
         self._scratch_register_known = False
         self.MOV_ri(X86_64_SCRATCH_REG.value, static_offset)
-        return (basereg, X86_64_SCRATCH_REG.value, 0, 0)
+        self.LEA_ra(X86_64_SCRATCH_REG.value,
+                    (basereg, X86_64_SCRATCH_REG.value, 0, 0))
+        return (X86_64_SCRATCH_REG.value, 0)
 
     def _fix_static_offset_64_a(self, (basereg, scalereg,
                                        scale, static_offset)):
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -215,14 +215,3 @@
         super(CPU_X86_64, self).__init__(*args, **kwargs)
 
 CPU = CPU386
-
-# silence warnings
-##history.LoopToken._x86_param_depth = 0
-##history.LoopToken._x86_arglocs = (None, None)
-##history.LoopToken._x86_frame_depth = 0
-##history.LoopToken._x86_bootstrap_code = 0
-##history.LoopToken._x86_direct_bootstrap_code = 0
-##history.LoopToken._x86_loop_code = 0
-##history.LoopToken._x86_debug_checksum = 0
-##compile.AbstractFailDescr._x86_current_depths = (0, 0)
-##compile.AbstractFailDescr._x86_adr_jump_offset = 0
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -745,6 +745,7 @@
     assert insnname_template.count('*') == 1
     add_insn('x', register(2), '\xC0')
     add_insn('j', abs_, immediate(2))
+    add_insn('m', mem_reg_plus_const(2))
 
 define_pxmm_insn('PADDQ_x*',     '\xD4')
 define_pxmm_insn('PSUBQ_x*',     '\xFB')
diff --git a/pypy/jit/backend/x86/test/test_fficall.py b/pypy/jit/backend/x86/test/test_fficall.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/x86/test/test_fficall.py
@@ -0,0 +1,8 @@
+import py
+from pypy.jit.metainterp.test import test_fficall
+from pypy.jit.backend.x86.test.test_basic import Jit386Mixin
+
+class TestFfiLookups(Jit386Mixin, test_fficall.FfiLookupTests):
+    # for the individual tests see
+    # ====> ../../../metainterp/test/test_fficall.py
+    supports_all = True
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -4,7 +4,7 @@
 
 import py
 from pypy.jit.metainterp.history import BoxInt, ConstInt,\
-     BoxPtr, ConstPtr, TreeLoop
+     BoxPtr, ConstPtr, TreeLoop, TargetToken
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.codewriter import heaptracker
 from pypy.jit.codewriter.effectinfo import EffectInfo
@@ -113,6 +113,8 @@
     descr0 = cpu.fielddescrof(S, 'int')
     ptr0 = struct_ref
 
+    targettoken = TargetToken()
+
     namespace = locals().copy()
 
     def test_basic(self):
@@ -136,6 +138,7 @@
     def test_bug_0(self):
         ops = '''
         [i0, i1, i2, i3, i4, i5, i6, i7, i8]
+        label(i0, i1, i2, i3, i4, i5, i6, i7, i8, descr=targettoken)
         guard_value(i2, 1) [i2, i3, i4, i5, i6, i7, i0, i1, i8]
         guard_class(i4, 138998336) [i4, i5, i6, i7, i0, i1, i8]
         i11 = getfield_gc(i4, descr=descr0)
@@ -163,7 +166,7 @@
         guard_false(i32) [i4, i6, i7, i0, i1, i24]
         i33 = getfield_gc(i0, descr=descr0)
         guard_value(i33, ConstPtr(ptr0)) [i4, i6, i7, i0, i1, i33, i24]
-        jump(i0, i1, 1, 17, i4, ConstPtr(ptr0), i6, i7, i24)
+        jump(i0, i1, 1, 17, i4, ConstPtr(ptr0), i6, i7, i24, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0, 0, 0, 0, 0, 0], run=False)
 
diff --git a/pypy/jit/backend/x86/test/test_recompilation.py b/pypy/jit/backend/x86/test/test_recompilation.py
--- a/pypy/jit/backend/x86/test/test_recompilation.py
+++ b/pypy/jit/backend/x86/test/test_recompilation.py
@@ -5,10 +5,11 @@
     def test_compile_bridge_not_deeper(self):
         ops = '''
         [i0]
+        label(i0, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 20)
         guard_true(i2, descr=fdescr1) [i1]
-        jump(i1)
+        jump(i1, descr=targettoken)
         '''
         loop = self.interpret(ops, [0])
         assert self.getint(0) == 20
@@ -26,14 +27,15 @@
     def test_compile_bridge_deeper(self):
         ops = '''
         [i0]
+        label(i0, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 20)
         guard_true(i2, descr=fdescr1) [i1]
-        jump(i1)
+        jump(i1, descr=targettoken)
         '''
         loop = self.interpret(ops, [0])
-        previous = loop.token._x86_frame_depth
-        assert loop.token._x86_param_depth == 0
+        previous = loop._jitcelltoken.compiled_loop_token.frame_depth
+        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
         assert self.getint(0) == 20
         ops = '''
         [i1]
@@ -42,17 +44,17 @@
         i5 = int_add(i4, 1)
         i6 = int_add(i5, 1)
         i7 = int_add(i5, i4)
+        force_spill(i5)
         i8 = int_add(i7, 1)
         i9 = int_add(i8, 1)
         finish(i3, i4, i5, i6, i7, i8, i9, descr=fdescr2)
         '''
         bridge = self.attach_bridge(ops, loop, -2)
-        descr = loop.operations[2].getdescr()
+        descr = loop.operations[3].getdescr()
         new = descr._x86_bridge_frame_depth
-        assert descr._x86_bridge_param_depth == 0        
-        # XXX: Maybe add enough ops to force stack on 64-bit as well?
-        if IS_X86_32:
-            assert new > previous
+        assert descr._x86_bridge_param_depth == 0
+        # the force_spill() forces the stack to grow
+        assert new > previous
         self.cpu.set_future_value_int(0, 0)
         fail = self.run(loop)
         assert fail.identifier == 2
@@ -64,21 +66,23 @@
     def test_bridge_jump_to_other_loop(self):
         loop = self.interpret('''
         [i0, i10, i11, i12, i13, i14, i15, i16]
+        label(i0, i10, i11, i12, i13, i14, i15, i16, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 20)
         guard_true(i2, descr=fdescr1) [i1]
-        jump(i1, i10, i11, i12, i13, i14, i15, i16)
+        jump(i1, i10, i11, i12, i13, i14, i15, i16, descr=targettoken)
         ''', [0])
         other_loop = self.interpret('''
         [i3]
+        label(i3, descr=targettoken2)
         guard_false(i3, descr=fdescr2) [i3]
-        jump(i3)
+        jump(i3, descr=targettoken2)
         ''', [1])
         ops = '''
         [i3]
-        jump(i3, 1, 2, 3, 4, 5, 6, 7, descr=looptoken)
+        jump(i3, 1, 2, 3, 4, 5, 6, 7, descr=targettoken)
         '''
-        bridge = self.attach_bridge(ops, other_loop, 0, looptoken=loop.token)
+        bridge = self.attach_bridge(ops, other_loop, 1)
         self.cpu.set_future_value_int(0, 1)
         fail = self.run(other_loop)
         assert fail.identifier == 1
@@ -86,6 +90,7 @@
     def test_bridge_jumps_to_self_deeper(self):
         loop = self.interpret('''
         [i0, i1, i2, i31, i32, i33]
+        label(i0, i1, i2, i31, i32, i33, descr=targettoken)
         i98 = same_as(0)
         i99 = same_as(1)
         i30 = int_add(i1, i2)
@@ -94,7 +99,7 @@
         guard_false(i4) [i98, i3]
         i5 = int_lt(i3, 20)
         guard_true(i5) [i99, i3]
-        jump(i3, i30, 1, i30, i30, i30)
+        jump(i3, i30, 1, i30, i30, i30, descr=targettoken)
         ''', [0])
         assert self.getint(0) == 0
         assert self.getint(1) == 1
@@ -104,17 +109,19 @@
         i8 = int_add(i3, 1)
         i6 = int_add(i8, i10)
         i7 = int_add(i3, i6)
+        force_spill(i6)
+        force_spill(i7)
+        force_spill(i8)
         i12 = int_add(i7, i8)
         i11 = int_add(i12, i6)
-        jump(i3, i12, i11, i10, i6, i7, descr=looptoken)
+        jump(i3, i12, i11, i10, i6, i7, descr=targettoken)
         '''
-        bridge = self.attach_bridge(ops, loop, 5, looptoken=loop.token)
-        guard_op = loop.operations[5]
-        loop_frame_depth = loop.token._x86_frame_depth
-        assert loop.token._x86_param_depth == 0
-        # XXX: Maybe add enough ops to force stack on 64-bit as well?
-        if IS_X86_32:
-            assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
+        loop_frame_depth = loop._jitcelltoken.compiled_loop_token.frame_depth
+        bridge = self.attach_bridge(ops, loop, 6)
+        guard_op = loop.operations[6]
+        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
+        # the force_spill() forces the stack to grow
+        assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
         assert guard_op.getdescr()._x86_bridge_param_depth == 0
         self.cpu.set_future_value_int(0, 0)
         self.cpu.set_future_value_int(1, 0)
@@ -126,6 +133,7 @@
     def test_bridge_jumps_to_self_shallower(self):
         loop = self.interpret('''
         [i0, i1, i2]
+        label(i0, i1, i2, descr=targettoken)
         i98 = same_as(0)
         i99 = same_as(1)
         i3 = int_add(i0, 1)
@@ -133,15 +141,15 @@
         guard_false(i4) [i98, i3]
         i5 = int_lt(i3, 20)
         guard_true(i5) [i99, i3]
-        jump(i3, i1, i2)
+        jump(i3, i1, i2, descr=targettoken)
         ''', [0])
         assert self.getint(0) == 0
         assert self.getint(1) == 1
         ops = '''
         [i97, i3]
-        jump(i3, 0, 1, descr=looptoken)
+        jump(i3, 0, 1, descr=targettoken)
         '''
-        bridge = self.attach_bridge(ops, loop, 4, looptoken=loop.token)
+        bridge = self.attach_bridge(ops, loop, 5)
         self.cpu.set_future_value_int(0, 0)
         self.cpu.set_future_value_int(1, 0)
         self.cpu.set_future_value_int(2, 0)
diff --git a/pypy/jit/backend/x86/test/test_regalloc.py b/pypy/jit/backend/x86/test/test_regalloc.py
--- a/pypy/jit/backend/x86/test/test_regalloc.py
+++ b/pypy/jit/backend/x86/test/test_regalloc.py
@@ -4,7 +4,7 @@
 
 import py
 from pypy.jit.metainterp.history import BoxInt, ConstInt,\
-     BoxPtr, ConstPtr, LoopToken, BasicFailDescr
+     BoxPtr, ConstPtr, BasicFailDescr, JitCellToken, TargetToken
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.backend.llsupport.descr import GcCache
 from pypy.jit.backend.detect_cpu import getcpuclass
@@ -96,10 +96,16 @@
     raising_calldescr = cpu.calldescrof(FPTR.TO, FPTR.TO.ARGS, FPTR.TO.RESULT,
                                         EffectInfo.MOST_GENERAL)
 
+    targettoken = TargetToken()
+    targettoken2 = TargetToken()
     fdescr1 = BasicFailDescr(1)
     fdescr2 = BasicFailDescr(2)
     fdescr3 = BasicFailDescr(3)
 
+    def setup_method(self, meth):
+        self.targettoken._x86_loop_code = 0
+        self.targettoken2._x86_loop_code = 0
+
     def f1(x):
         return x+1
 
@@ -134,7 +140,8 @@
 
     def interpret(self, ops, args, run=True):
         loop = self.parse(ops)
-        self.cpu.compile_loop(loop.inputargs, loop.operations, loop.token)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
         for i, arg in enumerate(args):
             if isinstance(arg, int):
                 self.cpu.set_future_value_int(i, arg)
@@ -145,10 +152,18 @@
                 assert isinstance(lltype.typeOf(arg), lltype.Ptr)
                 llgcref = lltype.cast_opaque_ptr(llmemory.GCREF, arg)
                 self.cpu.set_future_value_ref(i, llgcref)
+        loop._jitcelltoken = looptoken
         if run:
-            self.cpu.execute_token(loop.token)
+            self.cpu.execute_token(looptoken)
         return loop
 
+    def prepare_loop(self, ops):
+        loop = self.parse(ops)
+        regalloc = RegAlloc(self.cpu.assembler, False)
+        regalloc.prepare_loop(loop.inputargs, loop.operations,
+                              loop.original_jitcell_token, [])
+        return regalloc
+
     def getint(self, index):
         return self.cpu.get_latest_value_int(index)
 
@@ -167,10 +182,7 @@
         gcref = self.cpu.get_latest_value_ref(index)
         return lltype.cast_opaque_ptr(T, gcref)
 
-    def attach_bridge(self, ops, loop, guard_op_index, looptoken=None, **kwds):
-        if looptoken is not None:
-            self.namespace = self.namespace.copy()
-            self.namespace['looptoken'] = looptoken
+    def attach_bridge(self, ops, loop, guard_op_index, **kwds):
         guard_op = loop.operations[guard_op_index]
         assert guard_op.is_guard()
         bridge = self.parse(ops, **kwds)
@@ -178,20 +190,21 @@
                 [box.type for box in guard_op.getfailargs()])
         faildescr = guard_op.getdescr()
         self.cpu.compile_bridge(faildescr, bridge.inputargs, bridge.operations,
-                                loop.token)
+                                loop._jitcelltoken)
         return bridge
 
     def run(self, loop):
-        return self.cpu.execute_token(loop.token)
+        return self.cpu.execute_token(loop._jitcelltoken)
 
 class TestRegallocSimple(BaseTestRegalloc):
     def test_simple_loop(self):
         ops = '''
         [i0]
+        label(i0, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 20)
         guard_true(i2) [i1]
-        jump(i1)
+        jump(i1, descr=targettoken)
         '''
         self.interpret(ops, [0])
         assert self.getint(0) == 20
@@ -199,27 +212,29 @@
     def test_two_loops_and_a_bridge(self):
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i4 = int_add(i0, 1)
         i5 = int_lt(i4, 20)
         guard_true(i5) [i4, i1, i2, i3]
-        jump(i4, i1, i2, i3)
+        jump(i4, i1, i2, i3, descr=targettoken)
         '''
         loop = self.interpret(ops, [0, 0, 0, 0])
         ops2 = '''
         [i5]
+        label(i5, descr=targettoken2)
         i1 = int_add(i5, 1)
         i3 = int_add(i1, 1)
         i4 = int_add(i3, 1)
         i2 = int_lt(i4, 30)
         guard_true(i2) [i4]
-        jump(i4)
+        jump(i4, descr=targettoken2)
         '''
         loop2 = self.interpret(ops2, [0])
         bridge_ops = '''
         [i4]
-        jump(i4, i4, i4, i4, descr=looptoken)
+        jump(i4, i4, i4, i4, descr=targettoken)
         '''
-        bridge = self.attach_bridge(bridge_ops, loop2, 4, looptoken=loop.token)
+        bridge = self.attach_bridge(bridge_ops, loop2, 5)
         self.cpu.set_future_value_int(0, 0)
         self.run(loop2)
         assert self.getint(0) == 31
@@ -230,10 +245,11 @@
     def test_pointer_arg(self):
         ops = '''
         [i0, p0]
+        label(i0, p0, descr=targettoken)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 10)
         guard_true(i2) [p0]
-        jump(i1, p0)
+        jump(i1, p0, descr=targettoken)
         '''
         S = lltype.GcStruct('S')
         ptr = lltype.malloc(S)
@@ -311,10 +327,11 @@
     def test_spill_for_constant(self):
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i4 = int_add(3, i1)
         i5 = int_lt(i4, 30)
         guard_true(i5) [i0, i4, i2, i3]
-        jump(1, i4, 3, 4)
+        jump(1, i4, 3, 4, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0])
         assert self.getints(4) == [1, 30, 3, 4]
@@ -322,31 +339,34 @@
     def test_spill_for_constant_lshift(self):
         ops = '''
         [i0, i2, i1, i3]
+        label(i0, i2, i1, i3, descr=targettoken)
         i4 = int_lshift(1, i1)
         i5 = int_add(1, i1)
         i6 = int_lt(i5, 30)
         guard_true(i6) [i4, i5, i2, i3]
-        jump(i4, 3, i5, 4)
+        jump(i4, 3, i5, 4, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0])
         assert self.getints(4) == [1<<29, 30, 3, 4]
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i4 = int_lshift(1, i1)
         i5 = int_add(1, i1)
         i6 = int_lt(i5, 30)
         guard_true(i6) [i4, i5, i2, i3]
-        jump(i4, i5, 3, 4)
+        jump(i4, i5, 3, 4, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0])
         assert self.getints(4) == [1<<29, 30, 3, 4]
         ops = '''
         [i0, i3, i1, i2]
+        label(i0, i3, i1, i2, descr=targettoken)
         i4 = int_lshift(1, i1)
         i5 = int_add(1, i1)
         i6 = int_lt(i5, 30)
         guard_true(i6) [i4, i5, i2, i3]
-        jump(i4, 4, i5, 3)
+        jump(i4, 4, i5, 3, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 0, 0])
         assert self.getints(4) == [1<<29, 30, 3, 4]
@@ -354,11 +374,12 @@
     def test_result_selected_reg_via_neg(self):
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i6 = int_neg(i2)
         i7 = int_add(1, i1)
         i4 = int_lt(i7, 10)
         guard_true(i4) [i0, i6, i7]
-        jump(1, i7, i2, i6)
+        jump(1, i7, i2, i6, descr=targettoken)
         '''
         self.interpret(ops, [0, 0, 3, 0])
         assert self.getints(3) == [1, -3, 10]
@@ -366,11 +387,12 @@
     def test_compare_memory_result_survives(self):
         ops = '''
         [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
         i4 = int_lt(i0, i1)
         i5 = int_add(i3, 1)
         i6 = int_lt(i5, 30)
         guard_true(i6) [i4]
-        jump(i0, i1, i4, i5)
+        jump(i0, i1, i4, i5, descr=targettoken)
         '''
         self.interpret(ops, [0, 10, 0, 0])
         assert self.getint(0) == 1
@@ -378,10 +400,11 @@
     def test_jump_different_args(self):
         ops = '''
         [i0, i15, i16, i18, i1, i2, i3]
+        label(i0, i15, i16, i18, i1, i2, i3, descr=targettoken)
         i4 = int_add(i3, 1)
         i5 = int_lt(i4, 20)
         guard_true(i5) [i2, i1]
-        jump(i0, i18, i15, i16, i2, i1, i4)
+        jump(i0, i18, i15, i16, i2, i1, i4, descr=targettoken)
         '''
         self.interpret(ops, [0, 1, 2, 3])
 
@@ -422,6 +445,35 @@
         self.run(loop)
         assert self.getints(9) == range(9)
 
+    def test_loopargs(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        jump(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 2
+
+    def test_loopargs_2(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        finish(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 2
+
+    def test_loopargs_3(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        guard_true(i4) [i0, i1, i2, i3, i4]
+        jump(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 2
+    
+
 class TestRegallocCompOps(BaseTestRegalloc):
     
     def test_cmp_op_0(self):
@@ -438,6 +490,7 @@
 class TestRegallocMoreRegisters(BaseTestRegalloc):
 
     cpu = BaseTestRegalloc.cpu
+    targettoken = TargetToken()
 
     S = lltype.GcStruct('S', ('field', lltype.Char))
     fielddescr = cpu.fielddescrof(S, 'field')
@@ -510,6 +563,7 @@
     def test_division_optimized(self):
         ops = '''
         [i7, i6]
+        label(i7, i6, descr=targettoken)
         i18 = int_floordiv(i7, i6)
         i19 = int_xor(i7, i6)
         i21 = int_lt(i19, 0)
@@ -517,7 +571,7 @@
         i23 = int_is_true(i22)
         i24 = int_eq(i6, 4)
         guard_false(i24) [i18]
-        jump(i18, i6)
+        jump(i18, i6, descr=targettoken)
         '''
         self.interpret(ops, [10, 4])
         assert self.getint(0) == 2
@@ -588,7 +642,8 @@
         '''
         loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 9])
         assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9]
-        assert loop.token._x86_param_depth == self.expected_param_depth(1)
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.param_depth == self.expected_param_depth(1)
 
     def test_two_calls(self):
         ops = '''
@@ -599,7 +654,8 @@
         '''
         loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 9])
         assert self.getints(11) == [5*7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9]
-        assert loop.token._x86_param_depth == self.expected_param_depth(2)
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.param_depth == self.expected_param_depth(2)
 
     def test_call_many_arguments(self):
         # NB: The first and last arguments in the call are constants. This
@@ -612,7 +668,8 @@
         '''
         loop = self.interpret(ops, [2, 3, 4, 5, 6, 7, 8, 9])
         assert self.getint(0) == 55
-        assert loop.token._x86_param_depth == self.expected_param_depth(10)
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.param_depth == self.expected_param_depth(10)
 
     def test_bridge_calls_1(self):
         ops = '''
diff --git a/pypy/jit/backend/x86/test/test_regalloc2.py b/pypy/jit/backend/x86/test/test_regalloc2.py
--- a/pypy/jit/backend/x86/test/test_regalloc2.py
+++ b/pypy/jit/backend/x86/test/test_regalloc2.py
@@ -1,6 +1,6 @@
 import py
 from pypy.jit.metainterp.history import ResOperation, BoxInt, ConstInt,\
-     BoxPtr, ConstPtr, BasicFailDescr, LoopToken
+     BoxPtr, ConstPtr, BasicFailDescr, JitCellToken
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.arch import WORD
@@ -20,7 +20,7 @@
         ]
     cpu = CPU(None, None)
     cpu.setup_once()
-    looptoken = LoopToken()
+    looptoken = JitCellToken()
     cpu.compile_loop(inputargs, operations, looptoken)
     cpu.set_future_value_int(0, 9)
     cpu.execute_token(looptoken)
@@ -43,7 +43,7 @@
             ]
     cpu = CPU(None, None)
     cpu.setup_once()
-    looptoken = LoopToken()
+    looptoken = JitCellToken()
     cpu.compile_loop(inputargs, operations, looptoken)
     cpu.set_future_value_int(0, -10)
     cpu.execute_token(looptoken)
@@ -140,7 +140,7 @@
             ]
     cpu = CPU(None, None)
     cpu.setup_once()
-    looptoken = LoopToken()
+    looptoken = JitCellToken()
     cpu.compile_loop(inputargs, operations, looptoken)
     cpu.set_future_value_int(0, -13)
     cpu.set_future_value_int(1, 10)
@@ -255,7 +255,7 @@
             ]
     cpu = CPU(None, None)
     cpu.setup_once()
-    looptoken = LoopToken()
+    looptoken = JitCellToken()
     cpu.compile_loop(inputargs, operations, looptoken)
     cpu.set_future_value_int(0, 17)
     cpu.set_future_value_int(1, -20)
diff --git a/pypy/jit/backend/x86/test/test_regloc.py b/pypy/jit/backend/x86/test/test_regloc.py
--- a/pypy/jit/backend/x86/test/test_regloc.py
+++ b/pypy/jit/backend/x86/test/test_regloc.py
@@ -146,8 +146,10 @@
         expected_instructions = (
                 # mov r11, 0xFEDCBA9876543210
                 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
-                # mov rcx, [rdx+r11]
-                '\x4A\x8B\x0C\x1A'
+                # lea r11, [rdx+r11]
+                '\x4E\x8D\x1C\x1A'
+                # mov rcx, [r11]
+                '\x49\x8B\x0B'
         )
         assert cb.getvalue() == expected_instructions
 
@@ -174,6 +176,30 @@
 
     # ------------------------------------------------------------
 
+    def test_MOV_64bit_constant_into_r11(self):
+        base_constant = 0xFEDCBA9876543210
+        cb = LocationCodeBuilder64()
+        cb.MOV(r11, imm(base_constant))
+
+        expected_instructions = (
+                # mov r11, 0xFEDCBA9876543210
+                '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
+        )
+        assert cb.getvalue() == expected_instructions
+
+    def test_MOV_64bit_address_into_r11(self):
+        base_addr = 0xFEDCBA9876543210
+        cb = LocationCodeBuilder64()
+        cb.MOV(r11, heap(base_addr))
+
+        expected_instructions = (
+                # mov r11, 0xFEDCBA9876543210
+                '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' +
+                # mov r11, [r11]
+                '\x4D\x8B\x1B'
+        )
+        assert cb.getvalue() == expected_instructions
+
     def test_MOV_immed32_into_64bit_address_1(self):
         immed = -0x01234567
         base_addr = 0xFEDCBA9876543210
@@ -217,8 +243,10 @@
         expected_instructions = (
                 # mov r11, 0xFEDCBA9876543210
                 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
-                # mov [rdx+r11], -0x01234567
-                '\x4A\xC7\x04\x1A\x99\xBA\xDC\xFE'
+                # lea r11, [rdx+r11]
+                '\x4E\x8D\x1C\x1A'
+                # mov [r11], -0x01234567
+                '\x49\xC7\x03\x99\xBA\xDC\xFE'
         )
         assert cb.getvalue() == expected_instructions
 
@@ -300,8 +328,10 @@
                 '\x48\xBA\xEF\xCD\xAB\x89\x67\x45\x23\x01'
                 # mov r11, 0xFEDCBA9876543210
                 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
-                # mov [rax+r11], rdx
-                '\x4A\x89\x14\x18'
+                # lea r11, [rax+r11]
+                '\x4E\x8D\x1C\x18'
+                # mov [r11], rdx
+                '\x49\x89\x13'
                 # pop rdx
                 '\x5A'
         )
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -1,9 +1,10 @@
 import py
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rstr, rclass
 from pypy.rpython.annlowlevel import llhelper
-from pypy.jit.metainterp.history import ResOperation, LoopToken
+from pypy.jit.metainterp.history import ResOperation, TargetToken, JitCellToken
 from pypy.jit.metainterp.history import (BoxInt, BoxPtr, ConstInt, ConstFloat,
-                                         ConstPtr, Box, BoxFloat, BasicFailDescr)
+                                         ConstPtr, Box, BoxFloat,
+                                         BasicFailDescr)
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.arch import WORD
 from pypy.jit.backend.x86.rx86 import fits_in_32bits
@@ -279,7 +280,7 @@
                                      descr=BasicFailDescr()),
                         ]
                     ops[-2].setfailargs([i1])
-                    looptoken = LoopToken()
+                    looptoken = JitCellToken()
                     self.cpu.compile_loop([b], ops, looptoken)
                     if op == rop.INT_IS_TRUE:
                         self.cpu.set_future_value_int(0, b.value)
@@ -329,7 +330,7 @@
                         ]
                     ops[-2].setfailargs([i1])
                     inputargs = [i for i in (a, b) if isinstance(i, Box)]
-                    looptoken = LoopToken()
+                    looptoken = JitCellToken()
                     self.cpu.compile_loop(inputargs, ops, looptoken)
                     for i, box in enumerate(inputargs):
                         self.cpu.set_future_value_int(i, box.value)
@@ -353,9 +354,10 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
+        targettoken = TargetToken()
         faildescr1 = BasicFailDescr(1)
         faildescr2 = BasicFailDescr(2)
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
         looptoken.number = 17
         class FakeString(object):
             def __init__(self, val):
@@ -365,14 +367,15 @@
                 return self.val
 
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("hello"), 0], None),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
-        operations[3].setfailargs([i1])
+        operations[-2].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
         assert name == "Loop # 17: hello (loop counter 0)"
@@ -385,7 +388,7 @@
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
             ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("bye"), 0], None),
-            ResOperation(rop.JUMP, [i1b], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
 
@@ -408,11 +411,13 @@
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
-        looptoken = LoopToken()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
         operations = [
+            ResOperation(rop.LABEL, [i0], None, descr=targettoken),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
-            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ResOperation(rop.JUMP, [i1], None, descr=targettoken),
             ]
         inputargs = [i0]
         debug._log = dlog = debug.DebugLog()
@@ -455,6 +460,9 @@
                                                 EffectInfo.MOST_GENERAL,
                                                 ffi_flags=-1)
             calldescr.get_call_conv = lambda: ffi      # <==== hack
+            # ^^^ we patch get_call_conv() so that the test also makes sense
+            #     on Linux, because clibffi.get_call_conv() would always
+            #     return FFI_DEFAULT_ABI on non-Windows platforms.
             funcbox = ConstInt(rawstart)
             i1 = BoxInt()
             i2 = BoxInt()
@@ -496,7 +504,7 @@
             ops[3].setfailargs([])
             ops[5].setfailargs([])
             ops[7].setfailargs([])
-            looptoken = LoopToken()
+            looptoken = JitCellToken()
             self.cpu.compile_loop([i1, i2], ops, looptoken)
 
             self.cpu.set_future_value_int(0, 123450)
@@ -520,19 +528,21 @@
 
         loop = """
         [i0]
+        label(i0, descr=targettoken)
         debug_merge_point('xyz', 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
-        jump(i1)
+        jump(i1, descr=targettoken)
         """
-        ops = parse(loop)
+        ops = parse(loop, namespace={'targettoken': TargetToken()})
         debug._log = dlog = debug.DebugLog()
         try:
             self.cpu.assembler.set_debug(True)
-            self.cpu.compile_loop(ops.inputargs, ops.operations, ops.token)
+            looptoken = JitCellToken()
+            self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
             self.cpu.set_future_value_int(0, 0)
-            self.cpu.execute_token(ops.token)
+            self.cpu.execute_token(looptoken)
             # check debugging info
             struct = self.cpu.assembler.loop_run_counters[0]
             assert struct.i == 10
@@ -544,16 +554,18 @@
     def test_debugger_checksum(self):
         loop = """
         [i0]
+        label(i0, descr=targettoken)
         debug_merge_point('xyz', 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
-        jump(i1)
+        jump(i1, descr=targettoken)
         """
-        ops = parse(loop)
+        ops = parse(loop, namespace={'targettoken': TargetToken()})
         self.cpu.assembler.set_debug(True)
-        self.cpu.compile_loop(ops.inputargs, ops.operations, ops.token)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
         self.cpu.set_future_value_int(0, 0)
-        self.cpu.execute_token(ops.token)
-        assert ops.token._x86_debug_checksum == sum([op.getopnum()
+        self.cpu.execute_token(looptoken)
+        assert looptoken._x86_debug_checksum == sum([op.getopnum()
                                                      for op in ops.operations])
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -457,6 +457,46 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_7_interior(cls):
+        # Array of structs containing pointers (test the write barrier
+        # for setinteriorfield_gc)
+        S = lltype.GcStruct('S', ('i', lltype.Signed))
+        A = lltype.GcArray(lltype.Struct('entry', ('x', lltype.Ptr(S)),
+                                                  ('y', lltype.Ptr(S)),
+                                                  ('z', lltype.Ptr(S))))
+        class Glob:
+            a = lltype.nullptr(A)
+        glob = Glob()
+        #
+        def make_s(i):
+            s = lltype.malloc(S)
+            s.i = i
+            return s
+        #
+        @unroll_safe
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            a = glob.a
+            if not a:
+                a = glob.a = lltype.malloc(A, 10)
+            i = 0
+            while i < 10:
+                a[i].x = make_s(n + i * 100 + 1)
+                a[i].y = make_s(n + i * 100 + 2)
+                a[i].z = make_s(n + i * 100 + 3)
+                i += 1
+            i = 0
+            while i < 10:
+                check(a[i].x.i == n + i * 100 + 1)
+                check(a[i].y.i == n + i * 100 + 2)
+                check(a[i].z.i == n + i * 100 + 3)
+                i += 1
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        return None, f, None
+
+    def test_compile_framework_7_interior(self):
+        self.run('compile_framework_7_interior')
+
     def define_compile_framework_8(cls):
         # Array of pointers, of unknown length (test write_barrier_from_array)
         def before(n, x):
diff --git a/pypy/jit/backend/x86/test/test_ztranslation.py b/pypy/jit/backend/x86/test/test_ztranslation.py
--- a/pypy/jit/backend/x86/test/test_ztranslation.py
+++ b/pypy/jit/backend/x86/test/test_ztranslation.py
@@ -1,6 +1,6 @@
 import py, os, sys
 from pypy.tool.udir import udir
-from pypy.rlib.jit import JitDriver, unroll_parameters
+from pypy.rlib.jit import JitDriver, unroll_parameters, set_param
 from pypy.rlib.jit import PARAMETERS, dont_look_inside
 from pypy.rlib.jit import promote
 from pypy.jit.metainterp.jitprof import Profiler
@@ -47,9 +47,9 @@
         def f(i, j):
             for param, _ in unroll_parameters:
                 defl = PARAMETERS[param]
-                jitdriver.set_param(param, defl)
-            jitdriver.set_param("threshold", 3)
-            jitdriver.set_param("trace_eagerness", 2)
+                set_param(jitdriver, param, defl)
+            set_param(jitdriver, "threshold", 3)
+            set_param(jitdriver, "trace_eagerness", 2)
             total = 0
             frame = Frame(i)
             while frame.i > 3:
@@ -213,8 +213,8 @@
             else:
                 return Base()
         def myportal(i):
-            jitdriver.set_param("threshold", 3)
-            jitdriver.set_param("trace_eagerness", 2)
+            set_param(jitdriver, "threshold", 3)
+            set_param(jitdriver, "trace_eagerness", 2)
             total = 0
             n = i
             while True:
diff --git a/pypy/jit/backend/x86/tool/viewcode.py b/pypy/jit/backend/x86/tool/viewcode.py
--- a/pypy/jit/backend/x86/tool/viewcode.py
+++ b/pypy/jit/backend/x86/tool/viewcode.py
@@ -58,7 +58,7 @@
     assert not p.returncode, ('Encountered an error running objdump: %s' %
                               stderr)
     # drop some objdump cruft
-    lines = stdout.splitlines()[6:]
+    lines = stdout.splitlines(True)[6:]     # drop some objdump cruft
     return format_code_dump_with_labels(originaddr, lines, label_list)
 
 def format_code_dump_with_labels(originaddr, lines, label_list):
@@ -97,7 +97,7 @@
     stdout, stderr = p.communicate()
     assert not p.returncode, ('Encountered an error running nm: %s' %
                               stderr)
-    for line in stdout.splitlines():
+    for line in stdout.splitlines(True):
         match = re_symbolentry.match(line)
         if match:
             addr = long(match.group(1), 16)
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -212,7 +212,10 @@
         elidable = False
         loopinvariant = False
         if op.opname == "direct_call":
-            func = getattr(get_funcobj(op.args[0].value), '_callable', None)
+            funcobj = get_funcobj(op.args[0].value)
+            assert getattr(funcobj, 'calling_conv', 'c') == 'c', (
+                "%r: getcalldescr() with a non-default call ABI" % (op,))
+            func = getattr(funcobj, '_callable', None)
             elidable = getattr(func, "_elidable_function_", False)
             loopinvariant = getattr(func, "_jit_loop_invariant_", False)
             if loopinvariant:
diff --git a/pypy/jit/codewriter/codewriter.py b/pypy/jit/codewriter/codewriter.py
--- a/pypy/jit/codewriter/codewriter.py
+++ b/pypy/jit/codewriter/codewriter.py
@@ -104,6 +104,8 @@
         else:
             name = 'unnamed' % id(ssarepr)
         i = 1
+        # escape <lambda> names for windows
+        name = name.replace('<lambda>', '_(lambda)_')
         extra = ''
         while name+extra in self._seen_files:
             i += 1
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -48,6 +48,8 @@
     OS_LIBFFI_PREPARE           = 60
     OS_LIBFFI_PUSH_ARG          = 61
     OS_LIBFFI_CALL              = 62
+    OS_LIBFFI_GETARRAYITEM      = 63
+    OS_LIBFFI_SETARRAYITEM      = 64
     #
     OS_LLONG_INVERT             = 69
     OS_LLONG_ADD                = 70
@@ -78,6 +80,9 @@
     #
     OS_MATH_SQRT                = 100
 
+    # for debugging:
+    _OS_CANRAISE = set([OS_NONE, OS_STR2UNICODE, OS_LIBFFI_CALL])
+
     def __new__(cls, readonly_descrs_fields, readonly_descrs_arrays,
                 write_descrs_fields, write_descrs_arrays,
                 extraeffect=EF_CAN_RAISE,
@@ -116,6 +121,8 @@
         result.extraeffect = extraeffect
         result.can_invalidate = can_invalidate
         result.oopspecindex = oopspecindex
+        if result.check_can_raise():
+            assert oopspecindex in cls._OS_CANRAISE
         cls._cache[key] = result
         return result
 
@@ -125,6 +132,10 @@
     def check_can_invalidate(self):
         return self.can_invalidate
 
+    def check_is_elidable(self):
+        return (self.extraeffect == self.EF_ELIDABLE_CAN_RAISE or
+                self.extraeffect == self.EF_ELIDABLE_CANNOT_RAISE)
+
     def check_forces_virtual_or_virtualizable(self):
         return self.extraeffect >= self.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
 
@@ -230,12 +241,15 @@
         return op.opname == 'jit_force_quasi_immutable'
 
 class RandomEffectsAnalyzer(BoolGraphAnalyzer):
-    def analyze_direct_call(self, graph, seen=None):
-        if hasattr(graph, "func") and hasattr(graph.func, "_ptr"):
-            if graph.func._ptr._obj.random_effects_on_gcobjs:
+    def analyze_external_call(self, op, seen=None):
+        try:
+            funcobj = op.args[0].value._obj
+            if funcobj.random_effects_on_gcobjs:
                 return True
-        return super(RandomEffectsAnalyzer, self).analyze_direct_call(graph,
-                                                                      seen)
+        except (AttributeError, lltype.DelayedPointer):
+            return True   # better safe than sorry
+        return super(RandomEffectsAnalyzer, self).analyze_external_call(
+            op, seen)
 
     def analyze_simple_operation(self, op, graphinfo):
         return False
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -15,6 +15,8 @@
 from pypy.translator.simplify import get_funcobj
 from pypy.translator.unsimplify import varoftype
 
+class UnsupportedMallocFlags(Exception):
+    pass
 
 def transform_graph(graph, cpu=None, callcontrol=None, portal_jd=None):
     """Transform a control flow graph to make it suitable for
@@ -205,7 +207,24 @@
         if op.args[0] in self.vable_array_vars:
             self.vable_array_vars[op.result]= self.vable_array_vars[op.args[0]]
 
-    rewrite_op_cast_pointer = rewrite_op_same_as
+    def rewrite_op_cast_pointer(self, op):
+        newop = self.rewrite_op_same_as(op)
+        assert newop is None
+        return
+        # disabled for now
+        if (self._is_rclass_instance(op.args[0]) and
+                self._is_rclass_instance(op.result)):
+            FROM = op.args[0].concretetype.TO
+            TO = op.result.concretetype.TO
+            if lltype._castdepth(TO, FROM) > 0:
+                vtable = heaptracker.get_vtable_for_gcstruct(self.cpu, TO)
+                const_vtable = Constant(vtable, lltype.typeOf(vtable))
+                return [None, # hack, do the right renaming from op.args[0] to op.result
+                        SpaceOperation("record_known_class", [op.args[0], const_vtable], None)]
+
+    def rewrite_op_jit_record_known_class(self, op):
+        return SpaceOperation("record_known_class", [op.args[0], op.args[1]], None)
+
     def rewrite_op_cast_bool_to_int(self, op): pass
     def rewrite_op_cast_bool_to_uint(self, op): pass
     def rewrite_op_cast_char_to_int(self, op): pass
@@ -481,8 +500,22 @@
 
     def rewrite_op_malloc_varsize(self, op):
         if op.args[1].value['flavor'] == 'raw':
+            d = op.args[1].value.copy()
+            d.pop('flavor')
+            add_memory_pressure = d.pop('add_memory_pressure', False)
+            zero = d.pop('zero', False)
+            track_allocation = d.pop('track_allocation', True)
+            if d:
+                raise UnsupportedMallocFlags(d)
             ARRAY = op.args[0].value
-            return self._do_builtin_call(op, 'raw_malloc',
+            name = 'raw_malloc'
+            if zero:
+                name += '_zero'
+            if add_memory_pressure:
+                name += '_add_memory_pressure'
+            if not track_allocation:
+                name += '_no_track_allocation'
+            return self._do_builtin_call(op, name,
                                          [op.args[2]],
                                          extra = (ARRAY,),
                                          extrakey = ARRAY)
@@ -844,6 +877,10 @@
         if self._is_gc(op.args[0]):
             return op
 
+    def rewrite_op_cast_opaque_ptr(self, op):
+        # None causes the result of this op to get aliased to op.args[0]
+        return [SpaceOperation('mark_opaque_ptr', op.args, None), None]
+
     def rewrite_op_force_cast(self, op):
         v_arg = op.args[0]
         v_result = op.result
@@ -1049,35 +1086,20 @@
     # jit.codewriter.support.
 
     for _op, _oopspec in [('llong_invert',  'INVERT'),
-                          ('ullong_invert', 'INVERT'),
                           ('llong_lt',      'LT'),
                           ('llong_le',      'LE'),
                           ('llong_eq',      'EQ'),
                           ('llong_ne',      'NE'),
                           ('llong_gt',      'GT'),
                           ('llong_ge',      'GE'),
-                          ('ullong_lt',     'ULT'),
-                          ('ullong_le',     'ULE'),
-                          ('ullong_eq',     'EQ'),
-                          ('ullong_ne',     'NE'),
-                          ('ullong_gt',     'UGT'),
-                          ('ullong_ge',     'UGE'),
                           ('llong_add',     'ADD'),
                           ('llong_sub',     'SUB'),
                           ('llong_mul',     'MUL'),
                           ('llong_and',     'AND'),
                           ('llong_or',      'OR'),
                           ('llong_xor',     'XOR'),
-                          ('ullong_add',    'ADD'),
-                          ('ullong_sub',    'SUB'),
-                          ('ullong_mul',    'MUL'),
-                          ('ullong_and',    'AND'),
-                          ('ullong_or',     'OR'),
-                          ('ullong_xor',    'XOR'),
                           ('llong_lshift',  'LSHIFT'),
                           ('llong_rshift',  'RSHIFT'),
-                          ('ullong_lshift', 'LSHIFT'),
-                          ('ullong_rshift', 'URSHIFT'),
                           ('cast_int_to_longlong',     'FROM_INT'),
                           ('truncate_longlong_to_int', 'TO_INT'),
                           ('cast_float_to_longlong',   'FROM_FLOAT'),
@@ -1100,6 +1122,21 @@
                           ('cast_uint_to_ulonglong',    'FROM_UINT'),
                           ('cast_float_to_ulonglong',   'FROM_FLOAT'),
                           ('cast_ulonglong_to_float',   'U_TO_FLOAT'),
+                          ('ullong_invert', 'INVERT'),
+                          ('ullong_lt',     'ULT'),
+                          ('ullong_le',     'ULE'),
+                          ('ullong_eq',     'EQ'),
+                          ('ullong_ne',     'NE'),
+                          ('ullong_gt',     'UGT'),
+                          ('ullong_ge',     'UGE'),
+                          ('ullong_add',    'ADD'),
+                          ('ullong_sub',    'SUB'),
+                          ('ullong_mul',    'MUL'),
+                          ('ullong_and',    'AND'),
+                          ('ullong_or',     'OR'),
+                          ('ullong_xor',    'XOR'),
+                          ('ullong_lshift', 'LSHIFT'),
+                          ('ullong_rshift', 'URSHIFT'),
                          ]:
         exec py.code.Source('''
             def rewrite_op_%s(self, op):
@@ -1130,7 +1167,7 @@
 
     def rewrite_op_llong_is_true(self, op):
         v = varoftype(op.args[0].concretetype)
-        op0 = SpaceOperation('cast_int_to_longlong',
+        op0 = SpaceOperation('cast_primitive',
                              [Constant(0, lltype.Signed)],
                              v)
         args = [op.args[0], v]
@@ -1611,6 +1648,12 @@
         elif oopspec_name.startswith('libffi_call_'):
             oopspecindex = EffectInfo.OS_LIBFFI_CALL
             extraeffect = EffectInfo.EF_RANDOM_EFFECTS
+        elif oopspec_name == 'libffi_array_getitem':
+            oopspecindex = EffectInfo.OS_LIBFFI_GETARRAYITEM
+            extraeffect = EffectInfo.EF_CANNOT_RAISE
+        elif oopspec_name == 'libffi_array_setitem':
+            oopspecindex = EffectInfo.OS_LIBFFI_SETARRAYITEM
+            extraeffect = EffectInfo.EF_CANNOT_RAISE
         else:
             assert False, 'unsupported oopspec: %s' % oopspec_name
         return self._handle_oopspec_call(op, args, oopspecindex, extraeffect)
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -37,9 +37,11 @@
     return a.typeannotation(t)
 
 def annotate(func, values, inline=None, backendoptimize=True,
-             type_system="lltype"):
+             type_system="lltype", translationoptions={}):
     # build the normal ll graphs for ll_function
     t = TranslationContext()
+    for key, value in translationoptions.items():
+        setattr(t.config.translation, key, value)
     annpolicy = AnnotatorPolicy()
     annpolicy.allow_someobjects = False
     a = t.buildannotator(policy=annpolicy)
@@ -256,6 +258,9 @@
     y = ~r_ulonglong(xll)
     return u_to_longlong(y)
 
+def _ll_1_ullong_invert(xull):
+    return ~xull
+
 def _ll_2_llong_lt(xll, yll):
     return xll < yll
 
@@ -274,16 +279,22 @@
 def _ll_2_llong_ge(xll, yll):
     return xll >= yll
 
-def _ll_2_llong_ult(xull, yull):
+def _ll_2_ullong_eq(xull, yull):
+    return xull == yull
+
+def _ll_2_ullong_ne(xull, yull):
+    return xull != yull
+
+def _ll_2_ullong_ult(xull, yull):
     return xull < yull
 
-def _ll_2_llong_ule(xull, yull):
+def _ll_2_ullong_ule(xull, yull):
     return xull <= yull
 
-def _ll_2_llong_ugt(xull, yull):
+def _ll_2_ullong_ugt(xull, yull):
     return xull > yull
 
-def _ll_2_llong_uge(xull, yull):
+def _ll_2_ullong_uge(xull, yull):
     return xull >= yull
 
 def _ll_2_llong_add(xll, yll):
@@ -310,14 +321,41 @@
     z = r_ulonglong(xll) ^ r_ulonglong(yll)
     return u_to_longlong(z)
 
+def _ll_2_ullong_add(xull, yull):
+    z = (xull) + (yull)
+    return (z)
+
+def _ll_2_ullong_sub(xull, yull):
+    z = (xull) - (yull)
+    return (z)
+
+def _ll_2_ullong_mul(xull, yull):
+    z = (xull) * (yull)
+    return (z)
+
+def _ll_2_ullong_and(xull, yull):
+    z = (xull) & (yull)
+    return (z)
+
+def _ll_2_ullong_or(xull, yull):
+    z = (xull) | (yull)
+    return (z)
+
+def _ll_2_ullong_xor(xull, yull):
+    z = (xull) ^ (yull)
+    return (z)
+
 def _ll_2_llong_lshift(xll, y):
     z = r_ulonglong(xll) << y
     return u_to_longlong(z)
 
+def _ll_2_ullong_lshift(xull, y):
+    return xull << y
+
 def _ll_2_llong_rshift(xll, y):
     return xll >> y
 
-def _ll_2_llong_urshift(xull, y):
+def _ll_2_ullong_urshift(xull, y):
     return xull >> y
 
 def _ll_1_llong_from_int(x):
@@ -561,10 +599,21 @@
             return p
         return _ll_0_alloc_with_del
 
-    def build_ll_1_raw_malloc(ARRAY):
-        def _ll_1_raw_malloc(n):
-            return lltype.malloc(ARRAY, n, flavor='raw')
-        return _ll_1_raw_malloc
+    def build_raw_malloc_builder(zero=False, add_memory_pressure=False, track_allocation=True):
+        def build_ll_1_raw_malloc(ARRAY):
+            def _ll_1_raw_malloc(n):
+                return lltype.malloc(ARRAY, n, flavor='raw', zero=zero, add_memory_pressure=add_memory_pressure)
+            return _ll_1_raw_malloc
+        return build_ll_1_raw_malloc
+
+    build_ll_1_raw_malloc = build_raw_malloc_builder()
+    build_ll_1_raw_malloc_zero = build_raw_malloc_builder(zero=True)
+    build_ll_1_raw_malloc_zero_add_memory_pressure = build_raw_malloc_builder(zero=True, add_memory_pressure=True)
+    build_ll_1_raw_malloc_add_memory_pressure = build_raw_malloc_builder(add_memory_pressure=True)
+    build_ll_1_raw_malloc_no_track_allocation = build_raw_malloc_builder(track_allocation=False)
+    build_ll_1_raw_malloc_zero_no_track_allocation = build_raw_malloc_builder(zero=True, track_allocation=False)
+    build_ll_1_raw_malloc_zero_add_memory_pressure_no_track_allocation = build_raw_malloc_builder(zero=True, add_memory_pressure=True, track_allocation=False)
+    build_ll_1_raw_malloc_add_memory_pressure_no_track_allocation = build_raw_malloc_builder(add_memory_pressure=True, track_allocation=False)
 
     def build_ll_1_raw_free(ARRAY):
         def _ll_1_raw_free(p):
diff --git a/pypy/jit/codewriter/test/test_call.py b/pypy/jit/codewriter/test/test_call.py
--- a/pypy/jit/codewriter/test/test_call.py
+++ b/pypy/jit/codewriter/test/test_call.py
@@ -192,3 +192,21 @@
     [op] = block.operations
     call_descr = cc.getcalldescr(op)
     assert call_descr.extrainfo.has_random_effects()
+
+def test_random_effects_on_stacklet_switch():
+    from pypy.jit.backend.llgraph.runner import LLtypeCPU
+    from pypy.rlib._rffi_stacklet import switch, thread_handle, handle
+    @jit.dont_look_inside
+    def f():
+        switch(rffi.cast(thread_handle, 0), rffi.cast(handle, 0))
+
+    rtyper = support.annotate(f, [])
+    jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0])
+    cc = CallControl(LLtypeCPU(rtyper), jitdrivers_sd=[jitdriver_sd])
+    res = cc.find_all_graphs(FakePolicy())
+
+    [f_graph] = [x for x in res if x.func is f]
+    [block, _] = list(f_graph.iterblocks())
+    op = block.operations[-1]
+    call_descr = cc.getcalldescr(op)
+    assert call_descr.extrainfo.has_random_effects()
diff --git a/pypy/jit/codewriter/test/test_flatten.py b/pypy/jit/codewriter/test/test_flatten.py
--- a/pypy/jit/codewriter/test/test_flatten.py
+++ b/pypy/jit/codewriter/test/test_flatten.py
@@ -5,7 +5,7 @@
 from pypy.jit.codewriter.format import assert_format
 from pypy.jit.codewriter import longlong
 from pypy.jit.metainterp.history import AbstractDescr
-from pypy.rpython.lltypesystem import lltype, rclass, rstr
+from pypy.rpython.lltypesystem import lltype, rclass, rstr, rffi
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
 from pypy.translator.unsimplify import varoftype
 from pypy.rlib.rarithmetic import ovfcheck, r_uint, r_longlong, r_ulonglong
@@ -743,7 +743,6 @@
         """, transform=True)
 
     def test_force_cast(self):
-        from pypy.rpython.lltypesystem import rffi
         # NB: we don't need to test for INT here, the logic in jtransform is
         # general enough so that if we have the below cases it should
         # generalize also to INT
@@ -849,7 +848,6 @@
                                        transform=True)
 
     def test_force_cast_pointer(self):
-        from pypy.rpython.lltypesystem import rffi
         def h(p):
             return rffi.cast(rffi.VOIDP, p)
         self.encoding_test(h, [lltype.nullptr(rffi.CCHARP.TO)], """
@@ -857,7 +855,6 @@
         """, transform=True)
 
     def test_force_cast_floats(self):
-        from pypy.rpython.lltypesystem import rffi
         # Caststs to lltype.Float
         def f(n):
             return rffi.cast(lltype.Float, n)
@@ -964,7 +961,6 @@
             """, transform=True)
 
     def test_direct_ptradd(self):
-        from pypy.rpython.lltypesystem import rffi
         def f(p, n):
             return lltype.direct_ptradd(p, n)
         self.encoding_test(f, [lltype.nullptr(rffi.CCHARP.TO), 123], """
@@ -975,7 +971,6 @@
 
 def check_force_cast(FROM, TO, operations, value):
     """Check that the test is correctly written..."""
-    from pypy.rpython.lltypesystem import rffi
     import re
     r = re.compile('(\w+) \%i\d, \$(-?\d+)')
     #
diff --git a/pypy/jit/codewriter/test/test_jtransform.py b/pypy/jit/codewriter/test/test_jtransform.py
--- a/pypy/jit/codewriter/test/test_jtransform.py
+++ b/pypy/jit/codewriter/test/test_jtransform.py
@@ -1,3 +1,5 @@
+
+import py
 import random
 try:
     from itertools import product
@@ -15,12 +17,12 @@
 
 from pypy.objspace.flow.model import FunctionGraph, Block, Link
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
-from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rstr
+from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rstr, rffi
 from pypy.rpython.lltypesystem.module import ll_math
 from pypy.translator.unsimplify import varoftype
 from pypy.jit.codewriter import heaptracker, effectinfo
 from pypy.jit.codewriter.flatten import ListOfKind
-from pypy.jit.codewriter.jtransform import Transformer
+from pypy.jit.codewriter.jtransform import Transformer, UnsupportedMallocFlags
 from pypy.jit.metainterp.history import getkind
 
 def const(x):
@@ -538,6 +540,44 @@
     assert op1.opname == '-live-'
     assert op1.args == []
 
+def test_raw_malloc():
+    S = rffi.CArray(lltype.Signed)
+    v1 = varoftype(lltype.Signed)
+    v = varoftype(lltype.Ptr(S))
+    flags = Constant({'flavor': 'raw'}, lltype.Void)
+    op = SpaceOperation('malloc_varsize', [Constant(S, lltype.Void), flags,
+                                           v1], v)
+    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    op0, op1 = tr.rewrite_operation(op)
+    assert op0.opname == 'residual_call_ir_i'
+    assert op0.args[0].value == 'raw_malloc'    # pseudo-function as a str
+    assert op1.opname == '-live-'
+    assert op1.args == []
+
+def test_raw_malloc_zero():
+    S = rffi.CArray(lltype.Signed)
+    v1 = varoftype(lltype.Signed)
+    v = varoftype(lltype.Ptr(S))
+    flags = Constant({'flavor': 'raw', 'zero': True}, lltype.Void)
+    op = SpaceOperation('malloc_varsize', [Constant(S, lltype.Void), flags,
+                                           v1], v)
+    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    op0, op1 = tr.rewrite_operation(op)
+    assert op0.opname == 'residual_call_ir_i'
+    assert op0.args[0].value == 'raw_malloc_zero'    # pseudo-function as a str
+    assert op1.opname == '-live-'
+    assert op1.args == []
+
+def test_raw_malloc_unsupported_flag():
+    S = rffi.CArray(lltype.Signed)
+    v1 = varoftype(lltype.Signed)
+    v = varoftype(lltype.Ptr(S))
+    flags = Constant({'flavor': 'raw', 'unsupported_flag': True}, lltype.Void)
+    op = SpaceOperation('malloc_varsize', [Constant(S, lltype.Void), flags,
+                                           v1], v)
+    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    py.test.raises(UnsupportedMallocFlags, tr.rewrite_operation, op)
+
 def test_rename_on_links():
     v1 = Variable()
     v2 = Variable(); v2.concretetype = llmemory.Address
@@ -1128,3 +1168,16 @@
                         varoftype(lltype.Signed))
     tr = Transformer(None, None)
     raises(NotImplementedError, tr.rewrite_operation, op)
+
+def test_cast_opaque_ptr():
+    S = lltype.GcStruct("S", ("x", lltype.Signed))
+    v1 = varoftype(lltype.Ptr(S))
+    v2 = varoftype(lltype.Ptr(rclass.OBJECT))
+
+    op = SpaceOperation('cast_opaque_ptr', [v1], v2)
+    tr = Transformer()
+    [op1, op2] = tr.rewrite_operation(op)
+    assert op1.opname == 'mark_opaque_ptr'
+    assert op1.args == [v1]
+    assert op1.result is None
+    assert op2 is None
diff --git a/pypy/jit/codewriter/test/test_longlong.py b/pypy/jit/codewriter/test/test_longlong.py
--- a/pypy/jit/codewriter/test/test_longlong.py
+++ b/pypy/jit/codewriter/test/test_longlong.py
@@ -78,7 +78,7 @@
             oplist = tr.rewrite_operation(op)
             assert len(oplist) == 2
             assert oplist[0].opname == 'residual_call_irf_f'
-            assert oplist[0].args[0].value == 'llong_from_int'
+            assert oplist[0].args[0].value == opname.split('_')[0]+'_from_int'
             assert oplist[0].args[1] == 'calldescr-84'
             assert list(oplist[0].args[2]) == [const(0)]
             assert list(oplist[0].args[3]) == []
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -505,9 +505,6 @@
     @arguments("r", "r", returns="i")
     def bhimpl_instance_ptr_ne(a, b):
         return a != b
-    @arguments("r", returns="r")
-    def bhimpl_cast_opaque_ptr(a):
-        return a
     @arguments("r", returns="i")
     def bhimpl_cast_ptr_to_int(a):
         i = lltype.cast_ptr_to_int(a)
@@ -518,6 +515,13 @@
         ll_assert((i & 1) == 1, "bhimpl_cast_int_to_ptr: not an odd int")
         return lltype.cast_int_to_ptr(llmemory.GCREF, i)
 
+    @arguments("r")
+    def bhimpl_mark_opaque_ptr(a):
+        pass
+    @arguments("r", "i")
+    def bhimpl_record_known_class(a, b):
+        pass
+
     @arguments("i", returns="i")
     def bhimpl_int_copy(a):
         return a
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -9,12 +9,13 @@
 from pypy.tool.sourcetools import func_with_new_name
 
 from pypy.jit.metainterp.resoperation import ResOperation, rop, get_deep_immutable_oplist
-from pypy.jit.metainterp.history import TreeLoop, Box, History, LoopToken
+from pypy.jit.metainterp.history import TreeLoop, Box, History, JitCellToken, TargetToken
 from pypy.jit.metainterp.history import AbstractFailDescr, BoxInt
 from pypy.jit.metainterp.history import BoxPtr, BoxObj, BoxFloat, Const
 from pypy.jit.metainterp import history
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
 from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.jit.metainterp.inliner import Inliner
 from pypy.jit.metainterp.resume import NUMBERING, PENDINGFIELDSP
 from pypy.jit.codewriter import heaptracker, longlong
 
@@ -23,7 +24,7 @@
     from pypy.jit.metainterp.jitprof import ABORT_BRIDGE
     raise SwitchToBlackhole(ABORT_BRIDGE)
 
-def show_loop(metainterp_sd, loop=None, error=None):
+def show_procedures(metainterp_sd, procedure=None, error=None):
     # debugging
     if option.view or option.viewloops:
         if error:
@@ -32,11 +33,12 @@
                 errmsg += ': ' + str(error)
         else:
             errmsg = None
-        if loop is None: # or type(loop) is TerminatingLoop:
-            extraloops = []
+        if procedure is None:
+            extraprocedures = []
         else:
-            extraloops = [loop]
-        metainterp_sd.stats.view(errmsg=errmsg, extraloops=extraloops)
+            extraprocedures = [procedure]
+        metainterp_sd.stats.view(errmsg=errmsg,
+                                 extraprocedures=extraprocedures)
 
 def create_empty_loop(metainterp, name_prefix=''):
     name = metainterp.staticdata.stats.name_for_new_loop()
@@ -45,131 +47,222 @@
     return loop
 
 
-def make_loop_token(nb_args, jitdriver_sd):
-    loop_token = LoopToken()
-    loop_token.outermost_jitdriver_sd = jitdriver_sd
-    return loop_token
+def make_jitcell_token(jitdriver_sd):
+    jitcell_token = JitCellToken()
+    jitcell_token.outermost_jitdriver_sd = jitdriver_sd
+    return jitcell_token
 
 def record_loop_or_bridge(metainterp_sd, loop):
     """Do post-backend recordings and cleanups on 'loop'.
     """
-    # get the original loop token (corresponding to 'loop', or if that is
-    # a bridge, to the loop that this bridge belongs to)
-    looptoken = loop.token
-    assert looptoken is not None
+    # get the original jitcell token corresponding to jitcell form which
+    # this trace starts
+    original_jitcell_token = loop.original_jitcell_token
+    assert original_jitcell_token is not None
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
-        assert looptoken.generation > 0     # has been registered with memmgr
-    wref = weakref.ref(looptoken)
+        assert original_jitcell_token.generation > 0     # has been registered with memmgr
+    wref = weakref.ref(original_jitcell_token)
     for op in loop.operations:
         descr = op.getdescr()
         if isinstance(descr, ResumeDescr):
             descr.wref_original_loop_token = wref   # stick it there
             n = descr.index
             if n >= 0:       # we also record the resumedescr number
-                looptoken.compiled_loop_token.record_faildescr_index(n)
-        elif isinstance(descr, LoopToken):
-            # for a JUMP or a CALL_ASSEMBLER: record it as a potential jump.
+                original_jitcell_token.compiled_loop_token.record_faildescr_index(n)
+        elif isinstance(descr, JitCellToken):
+            # for a CALL_ASSEMBLER: record it as a potential jump.
+            if descr is not original_jitcell_token:
+                original_jitcell_token.record_jump_to(descr)
+            descr.exported_state = None
+            op._descr = None    # clear reference, mostly for tests
+        elif isinstance(descr, TargetToken):
+            # for a JUMP: record it as a potential jump.
             # (the following test is not enough to prevent more complicated
             # cases of cycles, but at least it helps in simple tests of
             # test_memgr.py)
-            if descr is not looptoken:
-                looptoken.record_jump_to(descr)
-            op._descr = None    # clear reference, mostly for tests
+            if descr.original_jitcell_token is not original_jitcell_token:
+                assert descr.original_jitcell_token is not None
+                original_jitcell_token.record_jump_to(descr.original_jitcell_token)
+            # exported_state is clear by optimizeopt when the short preamble is
+            # constrcucted. if that did not happen the label should not show up
+            # in a trace that will be used
+            assert descr.exported_state is None 
             if not we_are_translated():
-                op._jumptarget_number = descr.number
+                op._descr_wref = weakref.ref(op._descr)
+            op._descr = None    # clear reference to prevent the history.Stats
+                                # from keeping the loop alive during tests
     # record this looptoken on the QuasiImmut used in the code
     if loop.quasi_immutable_deps is not None:
         for qmut in loop.quasi_immutable_deps:
             qmut.register_loop_token(wref)
         # XXX maybe we should clear the dictionary here
     # mostly for tests: make sure we don't keep a reference to the LoopToken
-    loop.token = None
+    loop.original_jitcell_token = None
     if not we_are_translated():
-        loop._looptoken_number = looptoken.number
+        loop._looptoken_number = original_jitcell_token.number
 
 # ____________________________________________________________
 
-def compile_new_loop(metainterp, old_loop_tokens, greenkey, start,
-                     start_resumedescr, full_preamble_needed=True):
-    """Try to compile a new loop by closing the current history back
+def compile_loop(metainterp, greenkey, start,
+                 inputargs, jumpargs,
+                 start_resumedescr, full_preamble_needed=True):
+    """Try to compile a new procedure by closing the current history back
     to the first operation.
     """
-    from pypy.jit.metainterp.optimize import optimize_loop
+    from pypy.jit.metainterp.optimizeopt import optimize_trace
 
     history = metainterp.history
-    loop = create_empty_loop(metainterp)
-    loop.inputargs = history.inputargs[:]
+    metainterp_sd = metainterp.staticdata
+    jitdriver_sd = metainterp.jitdriver_sd
+
+    if False:
+        part = partial_trace
+        assert False
+        procedur_token = metainterp.get_procedure_token(greenkey)
+        assert procedure_token
+        all_target_tokens = []
+    else:
+        jitcell_token = make_jitcell_token(jitdriver_sd)
+        part = create_empty_loop(metainterp)
+        part.inputargs = inputargs[:]
+        h_ops = history.operations
+        part.start_resumedescr = start_resumedescr
+        part.operations = [ResOperation(rop.LABEL, inputargs, None, descr=TargetToken(jitcell_token))] + \
+                          [h_ops[i].clone() for i in range(start, len(h_ops))] + \
+                          [ResOperation(rop.JUMP, jumpargs, None, descr=jitcell_token)]
+        try:
+            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+        except InvalidLoop:
+            return None
+        target_token = part.operations[0].getdescr()
+        assert isinstance(target_token, TargetToken)
+        all_target_tokens = [target_token]
+
+    loop = create_empty_loop(metainterp)        
+    loop.inputargs = part.inputargs
+    loop.operations = part.operations
+    loop.quasi_immutable_deps = {}
+    if part.quasi_immutable_deps:
+        loop.quasi_immutable_deps.update(part.quasi_immutable_deps)
+    while part.operations[-1].getopnum() == rop.LABEL:
+        inliner = Inliner(inputargs, jumpargs)
+        part.quasi_immutable_deps = None
+        part.operations = [part.operations[-1]] + \
+                          [inliner.inline_op(h_ops[i]) for i in range(start, len(h_ops))] + \
+                          [ResOperation(rop.JUMP, [inliner.inline_arg(a) for a in jumpargs],
+                                        None, descr=jitcell_token)]
+        target_token = part.operations[0].getdescr()
+        assert isinstance(target_token, TargetToken)
+        all_target_tokens.append(target_token)
+        inputargs = jumpargs
+        jumpargs = part.operations[-1].getarglist()
+
+        try:
+            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+        except InvalidLoop:
+            return None
+            
+        loop.operations = loop.operations[:-1] + part.operations
+        if part.quasi_immutable_deps:
+            loop.quasi_immutable_deps.update(part.quasi_immutable_deps)
+
+    if not loop.quasi_immutable_deps:
+        loop.quasi_immutable_deps = None
     for box in loop.inputargs:
         assert isinstance(box, Box)
-    # make a copy, because optimize_loop can mutate the ops and descrs
-    h_ops = history.operations
-    loop.operations = [h_ops[i].clone() for i in range(start, len(h_ops))]
+
+    loop.original_jitcell_token = jitcell_token
+    for label in all_target_tokens:
+        assert isinstance(label, TargetToken)
+        label.original_jitcell_token = jitcell_token
+        if label.virtual_state and label.short_preamble:
+            metainterp_sd.logger_ops.log_short_preamble([], label.short_preamble)
+    jitcell_token.target_tokens = all_target_tokens
+    send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, "loop")
+    record_loop_or_bridge(metainterp_sd, loop)
+    return all_target_tokens[0]
+
+def compile_retrace(metainterp, greenkey, start,
+                    inputargs, jumpargs,
+                    start_resumedescr, partial_trace, resumekey):
+    """Try to compile a new procedure by closing the current history back
+    to the first operation.
+    """
+    from pypy.jit.metainterp.optimizeopt import optimize_trace
+
+    history = metainterp.history
     metainterp_sd = metainterp.staticdata
     jitdriver_sd = metainterp.jitdriver_sd
-    loop_token = make_loop_token(len(loop.inputargs), jitdriver_sd)
-    loop.token = loop_token
-    loop.operations[-1].setdescr(loop_token)     # patch the target of the JUMP
 
-    loop.preamble = create_empty_loop(metainterp, 'Preamble ')
-    loop.preamble.inputargs = loop.inputargs
-    loop.preamble.token = make_loop_token(len(loop.inputargs), jitdriver_sd)
-    loop.preamble.start_resumedescr = start_resumedescr
+    loop_jitcell_token = metainterp.get_procedure_token(greenkey)
+    assert loop_jitcell_token
+    assert partial_trace.operations[-1].getopnum() == rop.LABEL
 
+    part = create_empty_loop(metainterp)
+    part.inputargs = inputargs[:]
+    part.start_resumedescr = start_resumedescr
+    h_ops = history.operations
+
+    part.operations = [partial_trace.operations[-1]] + \
+                      [h_ops[i].clone() for i in range(start, len(h_ops))] + \
+                      [ResOperation(rop.JUMP, jumpargs, None, descr=loop_jitcell_token)]
+    label = part.operations[0]
+    assert label.getopnum() == rop.LABEL
     try:
-        old_loop_token = optimize_loop(metainterp_sd, old_loop_tokens, loop,
-                                       jitdriver_sd.warmstate.enable_opts)
+        optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
     except InvalidLoop:
-        debug_print("compile_new_loop: got an InvalidLoop")
-        return None
-    if old_loop_token is not None:
-        metainterp.staticdata.log("reusing old loop")
-        return old_loop_token
+        #return None # XXX: Dissable for now
+        # Fall back on jumping to preamble
+        target_token = label.getdescr()
+        assert isinstance(target_token, TargetToken)
+        assert target_token.exported_state
+        part.operations = [label] + \
+                          [ResOperation(rop.JUMP, target_token.exported_state.jump_args,
+                                        None, descr=loop_jitcell_token)]
+        try:
+            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts,
+                           inline_short_preamble=False)
+        except InvalidLoop:
+            return None
+    assert part.operations[-1].getopnum() != rop.LABEL
+    target_token = label.getdescr()
+    assert isinstance(target_token, TargetToken)
+    assert loop_jitcell_token.target_tokens
+    loop_jitcell_token.target_tokens.append(target_token)
 
-    if loop.preamble.operations is not None:
-        send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop,
-                             "loop")
-        record_loop_or_bridge(metainterp_sd, loop)
-        token = loop.preamble.token
-        if full_preamble_needed:
-            send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd,
-                                 loop.preamble, "entry bridge")
-            insert_loop_token(old_loop_tokens, loop.preamble.token)
-            jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
-                greenkey, loop.preamble.token)
-            record_loop_or_bridge(metainterp_sd, loop.preamble)
-        elif token.short_preamble:
-            short = token.short_preamble[-1]
-            metainterp_sd.logger_ops.log_short_preamble(short.inputargs,
-                                                        short.operations)
-        return token
-    else:
-        send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop,
-                             "loop")
-        insert_loop_token(old_loop_tokens, loop_token)
-        jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
-            greenkey, loop.token)
-        record_loop_or_bridge(metainterp_sd, loop)
-        return loop_token
+    loop = partial_trace
+    loop.operations = loop.operations[:-1] + part.operations
 
-def insert_loop_token(old_loop_tokens, loop_token):
-    # Find where in old_loop_tokens we should insert this new loop_token.
-    # The following algo means "as late as possible, but before another
-    # loop token that would be more general and so completely mask off
-    # the new loop_token".
-    # XXX do we still need a list?
-    old_loop_tokens.append(loop_token)
+    quasi_immutable_deps = {}
+    if loop.quasi_immutable_deps:
+        quasi_immutable_deps.update(loop.quasi_immutable_deps)
+    if part.quasi_immutable_deps:
+        quasi_immutable_deps.update(part.quasi_immutable_deps)
+    if quasi_immutable_deps:
+        loop.quasi_immutable_deps = quasi_immutable_deps
+
+    for box in loop.inputargs:
+        assert isinstance(box, Box)
+
+    target_token = loop.operations[-1].getdescr()
+    resumekey.compile_and_attach(metainterp, loop)
+    target_token = label.getdescr()
+    assert isinstance(target_token, TargetToken)
+    target_token.original_jitcell_token = loop.original_jitcell_token
+    record_loop_or_bridge(metainterp_sd, loop)
+    return target_token
 
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
-    jitdriver_sd.on_compile(metainterp_sd.logger_ops, loop.token,
+    original_jitcell_token = loop.original_jitcell_token
+    jitdriver_sd.on_compile(metainterp_sd.logger_ops, original_jitcell_token,
                             loop.operations, type, greenkey)
     loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     globaldata = metainterp_sd.globaldata
-    loop_token = loop.token
-    loop_token.number = n = globaldata.loopnumbering
+    original_jitcell_token.number = n = globaldata.loopnumbering
     globaldata.loopnumbering += 1
 
     if not we_are_translated():
-        show_loop(metainterp_sd, loop)
+        show_procedures(metainterp_sd, loop)
         loop.check_consistency()
 
     operations = get_deep_immutable_oplist(loop.operations)
@@ -177,26 +270,19 @@
     debug_start("jit-backend")
     try:
         ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                    loop.token, name=loopname)
+                                                    original_jitcell_token, name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
     metainterp_sd.stats.add_new_loop(loop)
     if not we_are_translated():
-        if type != "entry bridge":
-            metainterp_sd.stats.compiled()
-        else:
-            loop._ignore_during_counting = True
+        metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new " + type)
     #
     metainterp_sd.logger_ops.log_loop(loop.inputargs, loop.operations, n, type, ops_offset)
-    short = loop.token.short_preamble
-    if short:
-        metainterp_sd.logger_ops.log_short_preamble(short[-1].inputargs,
-                                                    short[-1].operations)
     #
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
-        metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(loop.token)
+        metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(original_jitcell_token)
 
 def send_bridge_to_backend(jitdriver_sd, metainterp_sd, faildescr, inputargs,
                            operations, original_loop_token):
@@ -204,8 +290,9 @@
     jitdriver_sd.on_compile_bridge(metainterp_sd.logger_ops,
                                    original_loop_token, operations, n)
     if not we_are_translated():
-        show_loop(metainterp_sd)
-        TreeLoop.check_consistency_of(inputargs, operations)
+        show_procedures(metainterp_sd)
+        seen = dict.fromkeys(inputargs)
+        TreeLoop.check_consistency_of_branch(operations, seen)
     metainterp_sd.profiler.start_backend()
     operations = get_deep_immutable_oplist(operations)
     debug_start("jit-backend")
@@ -221,9 +308,9 @@
     #
     metainterp_sd.logger_ops.log_bridge(inputargs, operations, n, ops_offset)
     #
-    if metainterp_sd.warmrunnerdesc is not None:    # for tests
-        metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(
-            original_loop_token)
+    #if metainterp_sd.warmrunnerdesc is not None:    # for tests
+    #    metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(
+    #        original_loop_token)
 
 # ____________________________________________________________
 
@@ -263,7 +350,7 @@
         raise metainterp_sd.ExitFrameWithExceptionRef(cpu, value)
 
 
-class TerminatingLoopToken(LoopToken):
+class TerminatingLoopToken(JitCellToken): # FIXME: kill?
     terminating = True
 
     def __init__(self, nargs, finishdescr):
@@ -298,7 +385,7 @@
     pass
 
 class ResumeGuardDescr(ResumeDescr):
-    _counter = 0        # if < 0, there is one counter per value;
+    _counter = 0        # on a GUARD_VALUE, there is one counter per value;
     _counters = None    # they get stored in _counters then.
 
     # this class also gets the following attributes stored by resume.py code
@@ -309,10 +396,13 @@
     rd_virtuals = None
     rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO)
 
-    CNT_INT   = -0x20000000
-    CNT_REF   = -0x40000000
-    CNT_FLOAT = -0x60000000
-    CNT_MASK  =  0x1FFFFFFF
+    CNT_BASE_MASK  =  0x0FFFFFFF     # the base counter value
+    CNT_BUSY_FLAG  =  0x10000000     # if set, busy tracing from the guard
+    CNT_TYPE_MASK  =  0x60000000     # mask for the type
+
+    CNT_INT        =  0x20000000
+    CNT_REF        =  0x40000000
+    CNT_FLOAT      =  0x60000000
 
     def store_final_boxes(self, guard_op, boxes):
         guard_op.setfailargs(boxes)
@@ -326,6 +416,8 @@
         except ValueError:
             return     # xxx probably very rare
         else:
+            if i > self.CNT_BASE_MASK:
+                return    # probably never, but better safe than sorry
             if box.type == history.INT:
                 cnt = self.CNT_INT
             elif box.type == history.REF:
@@ -334,13 +426,17 @@
                 cnt = self.CNT_FLOAT
             else:
                 assert 0, box.type
-            # we build the following value for _counter, which is always
-            # a negative value
+            assert cnt > self.CNT_BASE_MASK
             self._counter = cnt | i
 
     def handle_fail(self, metainterp_sd, jitdriver_sd):
         if self.must_compile(metainterp_sd, jitdriver_sd):
-            self._trace_and_compile_from_bridge(metainterp_sd, jitdriver_sd)
+            self.start_compiling()
+            try:
+                self._trace_and_compile_from_bridge(metainterp_sd,
+                                                    jitdriver_sd)
+            finally:
+                self.done_compiling()
         else:
             from pypy.jit.metainterp.blackhole import resume_in_blackhole
             resume_in_blackhole(metainterp_sd, jitdriver_sd, self)
@@ -358,12 +454,22 @@
 
     def must_compile(self, metainterp_sd, jitdriver_sd):
         trace_eagerness = jitdriver_sd.warmstate.trace_eagerness
-        if self._counter >= 0:
+        #
+        if self._counter <= self.CNT_BASE_MASK:
+            # simple case: just counting from 0 to trace_eagerness
             self._counter += 1
             return self._counter >= trace_eagerness
-        else:
-            index = self._counter & self.CNT_MASK
-            typetag = self._counter & ~ self.CNT_MASK
+        #
+        # do we have the BUSY flag?  If so, we're tracing right now, e.g. in an
+        # outer invocation of the same function, so don't trace again for now.
+        elif self._counter & self.CNT_BUSY_FLAG:
+            return False
+        #
+        else: # we have a GUARD_VALUE that fails.  Make a _counters instance
+            # (only now, when the guard is actually failing at least once),
+            # and use it to record some statistics about the failing values.
+            index = self._counter & self.CNT_BASE_MASK
+            typetag = self._counter & self.CNT_TYPE_MASK
             counters = self._counters
             if typetag == self.CNT_INT:
                 intvalue = metainterp_sd.cpu.get_latest_value_int(index)
@@ -390,7 +496,16 @@
                 assert 0, typetag
             return counter >= trace_eagerness
 
-    def reset_counter_from_failure(self):
+    def start_compiling(self):
+        # start tracing and compiling from this guard.
+        self._counter |= self.CNT_BUSY_FLAG
+
+    def done_compiling(self):
+        # done tracing and compiling from this guard.  Either the bridge has
+        # been successfully compiled, in which case whatever value we store
+        # in self._counter will not be seen any more, or not, in which case
+        # we should reset the counter to 0, in order to wait a bit until the
+        # next attempt.
         if self._counter >= 0:
             self._counter = 0
         self._counters = None
@@ -399,13 +514,13 @@
         # We managed to create a bridge.  Attach the new operations
         # to the corresponding guard_op and compile from there
         assert metainterp.resumekey_original_loop_token is not None
-        new_loop.token = metainterp.resumekey_original_loop_token
+        new_loop.original_jitcell_token = metainterp.resumekey_original_loop_token
         inputargs = metainterp.history.inputargs
         if not we_are_translated():
             self._debug_suboperations = new_loop.operations
         send_bridge_to_backend(metainterp.jitdriver_sd, metainterp.staticdata,
                                self, inputargs, new_loop.operations,
-                               new_loop.token)
+                               new_loop.original_jitcell_token)
 
     def copy_all_attributes_into(self, res):
         # XXX a bit ugly to have to list them all here
@@ -588,44 +703,32 @@
         metainterp_sd = metainterp.staticdata
         jitdriver_sd = metainterp.jitdriver_sd
         redargs = new_loop.inputargs
-        # We make a new LoopToken for this entry bridge, and stick it
-        # to every guard in the loop.
-        new_loop_token = make_loop_token(len(redargs), jitdriver_sd)
-        new_loop.token = new_loop_token
+        new_loop.original_jitcell_token = jitcell_token = make_jitcell_token(jitdriver_sd)
         send_loop_to_backend(self.original_greenkey, metainterp.jitdriver_sd,
                              metainterp_sd, new_loop, "entry bridge")
         # send the new_loop to warmspot.py, to be called directly the next time
-        jitdriver_sd.warmstate.attach_unoptimized_bridge_from_interp(
-            self.original_greenkey,
-            new_loop_token)
-        # store the new loop in compiled_merge_points_wref too
-        old_loop_tokens = metainterp.get_compiled_merge_points(
-            self.original_greenkey)
-        # it always goes at the end of the list, as it is the most
-        # general loop token
-        old_loop_tokens.append(new_loop_token)
-        metainterp.set_compiled_merge_points(self.original_greenkey,
-                                             old_loop_tokens)
+        jitdriver_sd.warmstate.attach_procedure_to_interp(
+            self.original_greenkey, jitcell_token)
+        metainterp_sd.stats.add_jitcell_token(jitcell_token)
 
-    def reset_counter_from_failure(self):
-        pass
 
-
-def compile_new_bridge(metainterp, old_loop_tokens, resumekey, retraced=False):
+def compile_trace(metainterp, resumekey, start_resumedescr=None):
     """Try to compile a new bridge leading from the beginning of the history
     to some existing place.
     """
-    from pypy.jit.metainterp.optimize import optimize_bridge
+    from pypy.jit.metainterp.optimizeopt import optimize_trace
     
     # The history contains new operations to attach as the code for the
     # failure of 'resumekey.guard_op'.
-    #
+    # 
     # Attempt to use optimize_bridge().  This may return None in case
     # it does not work -- i.e. none of the existing old_loop_tokens match.
-    new_loop = create_empty_loop(metainterp)
-    new_loop.inputargs = metainterp.history.inputargs[:]
+    new_trace = create_empty_loop(metainterp)
+    new_trace.inputargs = inputargs = metainterp.history.inputargs[:]
     # clone ops, as optimize_bridge can mutate the ops
-    new_loop.operations = [op.clone() for op in metainterp.history.operations]
+
+    new_trace.operations = [op.clone() for op in metainterp.history.operations]
+    new_trace.start_resumedescr = start_resumedescr
     metainterp_sd = metainterp.staticdata
     state = metainterp.jitdriver_sd.warmstate
     if isinstance(resumekey, ResumeAtPositionDescr):
@@ -633,38 +736,25 @@
     else:
         inline_short_preamble = True
     try:
-        target_loop_token = optimize_bridge(metainterp_sd, old_loop_tokens,
-                                            new_loop, state.enable_opts,
-                                            inline_short_preamble, retraced)
+        optimize_trace(metainterp_sd, new_trace, state.enable_opts, inline_short_preamble)
     except InvalidLoop:
         debug_print("compile_new_bridge: got an InvalidLoop")
         # XXX I am fairly convinced that optimize_bridge cannot actually raise
         # InvalidLoop
         debug_print('InvalidLoop in compile_new_bridge')
         return None
-    # Did it work?
-    if target_loop_token is not None:
-        # Yes, we managed to create a bridge.  Dispatch to resumekey to
+
+    if new_trace.operations[-1].getopnum() != rop.LABEL:
+        # We managed to create a bridge.  Dispatch to resumekey to
         # know exactly what we must do (ResumeGuardDescr/ResumeFromInterpDescr)
-        prepare_last_operation(new_loop, target_loop_token)
-        resumekey.compile_and_attach(metainterp, new_loop)
-        record_loop_or_bridge(metainterp_sd, new_loop)
-    return target_loop_token
-
-def prepare_last_operation(new_loop, target_loop_token):
-    op = new_loop.operations[-1]
-    if not isinstance(target_loop_token, TerminatingLoopToken):
-        # normal case
-        #op.setdescr(target_loop_token)     # patch the jump target
-        pass
+        target_token = new_trace.operations[-1].getdescr()
+        resumekey.compile_and_attach(metainterp, new_trace)
+        record_loop_or_bridge(metainterp_sd, new_trace)
+        return target_token
     else:
-        # The target_loop_token is a pseudo loop token,
-        # e.g. loop_tokens_done_with_this_frame_void[0]
-        # Replace the operation with the real operation we want, i.e. a FINISH
-        descr = target_loop_token.finishdescr
-        args = op.getarglist()
-        new_op = ResOperation(rop.FINISH, args, None, descr=descr)
-        new_loop.operations[-1] = new_op
+        metainterp.retrace_needed(new_trace)
+        return None
+        
 
 # ____________________________________________________________
 
@@ -683,7 +773,7 @@
     """
     # 'redboxes' is only used to know the types of red arguments.
     inputargs = [box.clonebox() for box in redboxes]
-    loop_token = make_loop_token(len(inputargs), jitdriver_sd)
+    jitcell_token = make_jitcell_token(jitdriver_sd)
     # 'nb_red_args' might be smaller than len(redboxes),
     # because it doesn't include the virtualizable boxes.
     nb_red_args = jitdriver_sd.num_red_args
@@ -716,7 +806,7 @@
         ]
     operations[1].setfailargs([])
     operations = get_deep_immutable_oplist(operations)
-    cpu.compile_loop(inputargs, operations, loop_token, log=False)
+    cpu.compile_loop(inputargs, operations, jitcell_token, log=False)
     if memory_manager is not None:    # for tests
-        memory_manager.keep_loop_alive(loop_token)
-    return loop_token
+        memory_manager.keep_loop_alive(jitcell_token)
+    return jitcell_token
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -340,8 +340,11 @@
                          rop.DEBUG_MERGE_POINT,
                          rop.JIT_DEBUG,
                          rop.SETARRAYITEM_RAW,
+                         rop.GETINTERIORFIELD_RAW,
+                         rop.SETINTERIORFIELD_RAW,
                          rop.CALL_RELEASE_GIL,
                          rop.QUASIIMMUT_FIELD,
+                         rop.LABEL,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
             raise AssertionError("missing %r" % (key,))
diff --git a/pypy/jit/metainterp/gc.py b/pypy/jit/metainterp/gc.py
--- a/pypy/jit/metainterp/gc.py
+++ b/pypy/jit/metainterp/gc.py
@@ -7,6 +7,9 @@
         self.config = config
 
 
+class GC_none(GcDescription):
+    malloc_zero_filled = True
+
 class GC_boehm(GcDescription):
     malloc_zero_filled = True
 
diff --git a/pypy/jit/metainterp/graphpage.py b/pypy/jit/metainterp/graphpage.py
--- a/pypy/jit/metainterp/graphpage.py
+++ b/pypy/jit/metainterp/graphpage.py
@@ -12,8 +12,9 @@
     def get_display_text(self):
         return None
 
-def display_loops(loops, errmsg=None, highlight_loops={}):
-    graphs = [(loop, highlight_loops.get(loop, 0)) for loop in loops]    
+def display_procedures(procedures, errmsg=None, highlight_procedures={}):
+    graphs = [(procedure, highlight_procedures.get(procedure, 0))
+              for procedure in procedures]
     for graph, highlight in graphs:
         for op in graph.get_operations():
             if is_interesting_guard(op):
@@ -25,18 +26,19 @@
 def is_interesting_guard(op):
     return hasattr(op.getdescr(), '_debug_suboperations')
 
+def getdescr(op):
+    if op._descr is not None:
+        return op._descr
+    if hasattr(op, '_descr_wref'):
+        return op._descr_wref()
+    return None
+
 
 class ResOpGraphPage(GraphPage):
 
     def compute(self, graphs, errmsg=None):
         resopgen = ResOpGen()
         for graph, highlight in graphs:
-            if getattr(graph, 'token', None) is not None:
-                resopgen.jumps_to_graphs[graph.token] = graph
-            if getattr(graph, '_looptoken_number', None) is not None:
-                resopgen.jumps_to_graphs[graph._looptoken_number] = graph
-        
-        for graph, highlight in graphs:
             resopgen.add_graph(graph, highlight)
         if errmsg:
             resopgen.set_errmsg(errmsg)
@@ -54,7 +56,7 @@
         self.block_starters = {}    # {graphindex: {set-of-operation-indices}}
         self.all_operations = {}
         self.errmsg = None
-        self.jumps_to_graphs = {}
+        self.target_tokens = {}
 
     def op_name(self, graphindex, opindex):
         return 'g%dop%d' % (graphindex, opindex)
@@ -73,16 +75,21 @@
         for graphindex in range(len(self.graphs)):
             self.block_starters[graphindex] = {0: True}
         for graphindex, graph in enumerate(self.graphs):
-            last_was_mergepoint = False
+            mergepointblock = None
             for i, op in enumerate(graph.get_operations()):
                 if is_interesting_guard(op):
                     self.mark_starter(graphindex, i+1)
                 if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                    if not last_was_mergepoint:
-                        last_was_mergepoint = True
-                        self.mark_starter(graphindex, i)
+                    if mergepointblock is None:
+                        mergepointblock = i
+                elif op.getopnum() == rop.LABEL:
+                    self.mark_starter(graphindex, i)
+                    self.target_tokens[getdescr(op)] = (graphindex, i)
+                    mergepointblock = i
                 else:
-                    last_was_mergepoint = False
+                    if mergepointblock is not None:
+                        self.mark_starter(graphindex, mergepointblock)
+                        mergepointblock = None
 
     def set_errmsg(self, errmsg):
         self.errmsg = errmsg
@@ -172,24 +179,10 @@
                              (graphindex, opindex))
                 break
         if op.getopnum() == rop.JUMP:
-            tgt_g = -1
-            tgt = None
-            tgt_number = getattr(op, '_jumptarget_number', None)
-            if tgt_number is not None:
-                tgt = self.jumps_to_graphs.get(tgt_number)
-            else:
-                tgt_descr = op.getdescr()
-                if tgt_descr is None:
-                    tgt_g = graphindex
-                else:
-                    tgt = self.jumps_to_graphs.get(tgt_descr.number)
-                    if tgt is None:
-                        tgt = self.jumps_to_graphs.get(tgt_descr)
-            if tgt is not None:
-                tgt_g = self.graphs.index(tgt)
-            if tgt_g != -1:
+            tgt_descr = getdescr(op)
+            if tgt_descr is not None and tgt_descr in self.target_tokens:
                 self.genedge((graphindex, opstartindex),
-                             (tgt_g, 0),
+                             self.target_tokens[tgt_descr],
                              weight="0")
         lines.append("")
         label = "\\l".join(lines)
diff --git a/pypy/jit/metainterp/heapcache.py b/pypy/jit/metainterp/heapcache.py
--- a/pypy/jit/metainterp/heapcache.py
+++ b/pypy/jit/metainterp/heapcache.py
@@ -34,7 +34,6 @@
         self.clear_caches(opnum, descr, argboxes)
 
     def mark_escaped(self, opnum, argboxes):
-        idx = 0
         if opnum == rop.SETFIELD_GC:
             assert len(argboxes) == 2
             box, valuebox = argboxes
@@ -42,8 +41,20 @@
                 self.dependencies.setdefault(box, []).append(valuebox)
             else:
                 self._escape(valuebox)
-        # GETFIELD_GC doesn't escape it's argument
-        elif opnum != rop.GETFIELD_GC:
+        elif opnum == rop.SETARRAYITEM_GC:
+            assert len(argboxes) == 3
+            box, indexbox, valuebox = argboxes
+            if self.is_unescaped(box) and self.is_unescaped(valuebox):
+                self.dependencies.setdefault(box, []).append(valuebox)
+            else:
+                self._escape(valuebox)
+        # GETFIELD_GC, MARK_OPAQUE_PTR, PTR_EQ, and PTR_NE don't escape their
+        # arguments
+        elif (opnum != rop.GETFIELD_GC and
+              opnum != rop.MARK_OPAQUE_PTR and
+              opnum != rop.PTR_EQ and
+              opnum != rop.PTR_NE):
+            idx = 0
             for box in argboxes:
                 # setarrayitem_gc don't escape its first argument
                 if not (idx == 0 and opnum in [rop.SETARRAYITEM_GC]):
@@ -60,13 +71,13 @@
                 self._escape(dep)
 
     def clear_caches(self, opnum, descr, argboxes):
-        if opnum == rop.SETFIELD_GC:
-            return
-        if opnum == rop.SETARRAYITEM_GC:
-            return
-        if opnum == rop.SETFIELD_RAW:
-            return
-        if opnum == rop.SETARRAYITEM_RAW:
+        if (opnum == rop.SETFIELD_GC or
+            opnum == rop.SETARRAYITEM_GC or
+            opnum == rop.SETFIELD_RAW or
+            opnum == rop.SETARRAYITEM_RAW or
+            opnum == rop.SETINTERIORFIELD_GC or
+            opnum == rop.COPYSTRCONTENT or
+            opnum == rop.COPYUNICODECONTENT):
             return
         if rop._OVF_FIRST <= opnum <= rop._OVF_LAST:
             return
@@ -75,9 +86,9 @@
         if opnum == rop.CALL or opnum == rop.CALL_LOOPINVARIANT:
             effectinfo = descr.get_extra_info()
             ef = effectinfo.extraeffect
-            if ef == effectinfo.EF_LOOPINVARIANT or \
-               ef == effectinfo.EF_ELIDABLE_CANNOT_RAISE or \
-               ef == effectinfo.EF_ELIDABLE_CAN_RAISE:
+            if (ef == effectinfo.EF_LOOPINVARIANT or
+                ef == effectinfo.EF_ELIDABLE_CANNOT_RAISE or
+                ef == effectinfo.EF_ELIDABLE_CAN_RAISE):
                 return
             # A special case for ll_arraycopy, because it is so common, and its
             # effects are so well defined.
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -10,6 +10,7 @@
 from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.codewriter import heaptracker, longlong
 from pypy.rlib.objectmodel import compute_identity_hash
+import weakref
 
 # ____________________________________________________________
 
@@ -692,18 +693,17 @@
 
 # ____________________________________________________________
 
-# The TreeLoop class contains a loop or a generalized loop, i.e. a tree
-# of operations.  Each branch ends in a jump which can go either to
-# the top of the same loop, or to another TreeLoop; or it ends in a FINISH.
+# The JitCellToken class is the root of a tree of traces.  Each branch ends
+# in a jump which goes to a LABEL operation; or it ends in a FINISH.
 
-class LoopToken(AbstractDescr):
+class JitCellToken(AbstractDescr):
     """Used for rop.JUMP, giving the target of the jump.
     This is different from TreeLoop: the TreeLoop class contains the
     whole loop, including 'operations', and goes away after the loop
     was compiled; but the LoopDescr remains alive and points to the
     generated assembler.
     """
-    short_preamble = None
+    target_tokens = None
     failed_states = None
     retraced_count = 0
     terminating = False # see TerminatingLoopToken in compile.py
@@ -720,10 +720,11 @@
 
     def __init__(self):
         # For memory management of assembled loops
-        self._keepalive_target_looktokens = {}      # set of other LoopTokens
+        self._keepalive_jitcell_tokens = {}      # set of other JitCellToken
 
-    def record_jump_to(self, target_loop_token):
-        self._keepalive_target_looktokens[target_loop_token] = None
+    def record_jump_to(self, jitcell_token):
+        assert isinstance(jitcell_token, JitCellToken)
+        self._keepalive_jitcell_tokens[jitcell_token] = None
 
     def __repr__(self):
         return '<Loop %d, gen=%d>' % (self.number, self.generation)
@@ -734,17 +735,36 @@
     def dump(self):
         self.compiled_loop_token.cpu.dump_loop_token(self)
 
+class TargetToken(AbstractDescr):
+    def __init__(self, targeting_jitcell_token=None):
+        # The jitcell to which jumps might result in a jump to this label
+        self.targeting_jitcell_token = targeting_jitcell_token
+        
+        # The jitcell where the trace containing the label with this TargetToken begins
+        self.original_jitcell_token = None
+
+        self.virtual_state = None
+        self.exported_state = None
+        
 class TreeLoop(object):
     inputargs = None
     operations = None
-    token = None
     call_pure_results = None
     logops = None
     quasi_immutable_deps = None
+    start_resumedescr = None
+
+    def _token(*args):
+        raise Exception("TreeLoop.token is killed")
+    token = property(_token, _token)
+
+    # This is the jitcell where the trace starts. Labels within the trace might
+    # belong to some other jitcells in the sens that jumping to this other
+    # jitcell will result in a jump to the label.
+    original_jitcell_token = None
 
     def __init__(self, name):
         self.name = name
-        # self.inputargs = list of distinct Boxes
         # self.operations = list of ResOperations
         #   ops of the kind 'guard_xxx' contain a further list of operations,
         #   which may itself contain 'guard_xxx' and so on, making a tree.
@@ -777,6 +797,10 @@
     def check_consistency(self):     # for testing
         "NOT_RPYTHON"
         self.check_consistency_of(self.inputargs, self.operations)
+        for op in self.operations:
+            descr = op.getdescr()
+            if op.getopnum() == rop.LABEL and isinstance(descr, TargetToken):
+                assert descr.original_jitcell_token is self.original_jitcell_token
 
     @staticmethod
     def check_consistency_of(inputargs, operations):
@@ -811,15 +835,23 @@
                 assert isinstance(box, Box)
                 assert box not in seen
                 seen[box] = True
+            if op.getopnum() == rop.LABEL:
+                inputargs = op.getarglist()
+                for box in inputargs:
+                    assert isinstance(box, Box), "LABEL contains %r" % (box,)
+                seen = dict.fromkeys(inputargs)
+                assert len(seen) == len(inputargs), (
+                    "duplicate Box in the LABEL arguments")
+                
         assert operations[-1].is_final()
         if operations[-1].getopnum() == rop.JUMP:
             target = operations[-1].getdescr()
             if target is not None:
-                assert isinstance(target, LoopToken)
+                assert isinstance(target, TargetToken)
 
     def dump(self):
         # RPython-friendly
-        print '%r: inputargs =' % self, self._dump_args(self.inputargs)
+        print '%r: inputargs =' % self, self._dump_args(self.inputargs)        
         for op in self.operations:
             args = op.getarglist()
             print '\t', op.getopname(), self._dump_args(args), \
@@ -898,6 +930,12 @@
     def view(self, **kwds):
         pass
 
+    def clear(self):
+        pass
+
+    def add_jitcell_token(self, token):
+        pass
+
 class Stats(object):
     """For tests."""
 
@@ -910,8 +948,22 @@
         self.loops = []
         self.locations = []
         self.aborted_keys = []
-        self.invalidated_token_numbers = set()
+        self.invalidated_token_numbers = set()    # <- not RPython
+        self.jitcell_token_wrefs = []
 
+    def clear(self):
+        del self.loops[:]
+        del self.locations[:]
+        del self.aborted_keys[:]
+        self.invalidated_token_numbers.clear()
+        self.compiled_count = 0
+        self.enter_count = 0
+        self.aborted_count = 0
+
+    def add_jitcell_token(self, token):
+        assert isinstance(token, JitCellToken)
+        self.jitcell_token_wrefs.append(weakref.ref(token))
+        
     def set_history(self, history):
         self.operations = history.operations
 
@@ -941,6 +993,15 @@
     def get_all_loops(self):
         return self.loops
 
+    def get_all_jitcell_tokens(self):
+        tokens = [t() for t in self.jitcell_token_wrefs]
+        if None in tokens:
+            assert False, "get_all_jitcell_tokens will not work as "+\
+                          "loops have been freed"
+        return tokens
+            
+        
+
     def check_history(self, expected=None, **check):
         insns = {}
         for op in self.operations:
@@ -956,16 +1017,90 @@
                 "found %d %r, expected %d" % (found, insn, expected_count))
         return insns
 
+    def check_resops(self, expected=None, **check):
+        insns = {}
+        for loop in self.get_all_loops():
+            insns = loop.summary(adding_insns=insns)
+        return self._check_insns(insns, expected, check)
+
+    def _check_insns(self, insns, expected, check):
+        if expected is not None:
+            insns.pop('debug_merge_point', None)
+            insns.pop('label', None)
+            assert insns == expected
+        for insn, expected_count in check.items():
+            getattr(rop, insn.upper())  # fails if 'rop.INSN' does not exist
+            found = insns.get(insn, 0)
+            assert found == expected_count, (
+                "found %d %r, expected %d" % (found, insn, expected_count))
+        return insns
+
+    def check_simple_loop(self, expected=None, **check):
+        # Usefull in the simplest case when we have only one trace ending with
+        # a jump back to itself and possibly a few bridges ending with finnish.
+        # Only the operations within the loop formed by that single jump will
+        # be counted.
+
+        # XXX hacked version, ignore and remove me when jit-targets is merged.
+        loops = self.get_all_loops()
+        loops = [loop for loop in loops if 'Preamble' not in repr(loop)] #XXX
+        assert len(loops) == 1
+        loop, = loops
+        jumpop = loop.operations[-1]
+        assert jumpop.getopnum() == rop.JUMP
+        insns = {}
+        for op in loop.operations:
+            opname = op.getopname()
+            insns[opname] = insns.get(opname, 0) + 1
+        return self._check_insns(insns, expected, check)
+
+    def check_simple_loop(self, expected=None, **check):
+        # Usefull in the simplest case when we have only one trace ending with
+        # a jump back to itself and possibly a few bridges ending with finnish.
+        # Only the operations within the loop formed by that single jump will
+        # be counted.
+        loops = self.get_all_loops()
+        assert len(loops) == 1
+        loop = loops[0]
+        jumpop = loop.operations[-1]
+        assert jumpop.getopnum() == rop.JUMP
+        assert self.check_resops(jump=1)
+        labels = [op for op in loop.operations if op.getopnum() == rop.LABEL]
+        targets = [op._descr_wref() for op in labels]
+        assert None not in targets # TargetToken was freed, give up
+        target = jumpop._descr_wref()
+        assert target
+        assert targets.count(target) == 1
+        i = loop.operations.index(labels[targets.index(target)])
+        insns = {}
+        for op in loop.operations[i:]:
+            opname = op.getopname()
+            insns[opname] = insns.get(opname, 0) + 1
+        return self._check_insns(insns, expected, check)
+        
     def check_loops(self, expected=None, everywhere=False, **check):
         insns = {}
-        for loop in self.loops:
-            if not everywhere:
-                if getattr(loop, '_ignore_during_counting', False):
-                    continue
+        for loop in self.get_all_loops():
+            #if not everywhere:
+            #    if getattr(loop, '_ignore_during_counting', False):
+            #        continue
             insns = loop.summary(adding_insns=insns)
         if expected is not None:
             insns.pop('debug_merge_point', None)
-            assert insns == expected
+            print
+            print
+            print "        self.check_resops(%s)" % str(insns)
+            print
+            import pdb; pdb.set_trace()
+        else:
+            chk = ['%s=%d' % (i, insns.get(i, 0)) for i in check]
+            print
+            print
+            print "        self.check_resops(%s)" % ', '.join(chk)
+            print
+            import pdb; pdb.set_trace()
+        return
+        
         for insn, expected_count in check.items():
             getattr(rop, insn.upper())  # fails if 'rop.INSN' does not exist
             found = insns.get(insn, 0)
@@ -975,26 +1110,26 @@
 
     def check_consistency(self):
         "NOT_RPYTHON"
-        for loop in self.loops:
+        for loop in self.get_all_loops():
             loop.check_consistency()
 
     def maybe_view(self):
         if option.view:
             self.view()
 
-    def view(self, errmsg=None, extraloops=[]):
-        from pypy.jit.metainterp.graphpage import display_loops
-        loops = self.get_all_loops()[:]
-        for loop in extraloops:
-            if loop in loops:
-                loops.remove(loop)
-            loops.append(loop)
-        highlight_loops = dict.fromkeys(extraloops, 1)
-        for loop in loops:
-            if hasattr(loop, '_looptoken_number') and (
-                    loop._looptoken_number in self.invalidated_token_numbers):
-                highlight_loops.setdefault(loop, 2)
-        display_loops(loops, errmsg, highlight_loops)
+    def view(self, errmsg=None, extraprocedures=[]):
+        from pypy.jit.metainterp.graphpage import display_procedures
+        procedures = self.get_all_loops()[:]
+        for procedure in extraprocedures:
+            if procedure in procedures:
+                procedures.remove(procedure)
+            procedures.append(procedure)
+        highlight_procedures = dict.fromkeys(extraprocedures, 1)
+        for procedure in procedures:
+            if hasattr(procedure, '_looptoken_number') and (
+               procedure._looptoken_number in self.invalidated_token_numbers):
+                highlight_procedures.setdefault(procedure, 2)
+        display_procedures(procedures, errmsg, highlight_procedures)
 
 # ----------------------------------------------------------------
 
diff --git a/pypy/jit/metainterp/inliner.py b/pypy/jit/metainterp/inliner.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/inliner.py
@@ -0,0 +1,57 @@
+from pypy.jit.metainterp.history import Const
+from pypy.jit.metainterp.resume import Snapshot
+
+class Inliner(object):
+    def __init__(self, inputargs, jump_args):
+        assert len(inputargs) == len(jump_args)
+        self.argmap = {}
+        for i in range(len(inputargs)):
+            if inputargs[i] in self.argmap:
+                assert self.argmap[inputargs[i]] == jump_args[i]
+            else:
+                self.argmap[inputargs[i]] = jump_args[i]
+        self.snapshot_map = {None: None}
+
+    def inline_op(self, newop, ignore_result=False, clone=True,
+                  ignore_failargs=False):
+        if clone:
+            newop = newop.clone()
+        args = newop.getarglist()
+        newop.initarglist([self.inline_arg(a) for a in args])
+
+        if newop.is_guard():
+            args = newop.getfailargs()
+            if args and not ignore_failargs:
+                newop.setfailargs([self.inline_arg(a) for a in args])
+            else:
+                newop.setfailargs([])
+
+        if newop.result and not ignore_result:
+            old_result = newop.result
+            newop.result = newop.result.clonebox()
+            self.argmap[old_result] = newop.result
+
+        self.inline_descr_inplace(newop.getdescr())
+
+        return newop
+
+    def inline_descr_inplace(self, descr):
+        from pypy.jit.metainterp.compile import ResumeGuardDescr
+        if isinstance(descr, ResumeGuardDescr):
+            descr.rd_snapshot = self.inline_snapshot(descr.rd_snapshot)
+
+    def inline_arg(self, arg):
+        if arg is None:
+            return None
+        if isinstance(arg, Const):
+            return arg
+        return self.argmap[arg]
+
+    def inline_snapshot(self, snapshot):
+        if snapshot in self.snapshot_map:
+            return self.snapshot_map[snapshot]
+        boxes = [self.inline_arg(a) for a in snapshot.boxes]
+        new_snapshot = Snapshot(self.inline_snapshot(snapshot.prev), boxes)
+        self.snapshot_map[snapshot] = new_snapshot
+        return new_snapshot
+
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -4,13 +4,15 @@
 from pypy.jit.metainterp.optimizeopt.virtualize import OptVirtualize
 from pypy.jit.metainterp.optimizeopt.heap import OptHeap
 from pypy.jit.metainterp.optimizeopt.vstring import OptString
-from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll, OptInlineShortPreamble
+from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll
 from pypy.jit.metainterp.optimizeopt.fficall import OptFfiCall
 from pypy.jit.metainterp.optimizeopt.simplify import OptSimplify
 from pypy.jit.metainterp.optimizeopt.pure import OptPure
 from pypy.jit.metainterp.optimizeopt.earlyforce import OptEarlyForce
 from pypy.rlib.jit import PARAMETERS
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
+
 
 ALL_OPTS = [('intbounds', OptIntBounds),
             ('rewrite', OptRewrite),
@@ -28,8 +30,7 @@
 ALL_OPTS_LIST = [name for name, _ in ALL_OPTS]
 ALL_OPTS_NAMES = ':'.join([name for name, _ in ALL_OPTS])
 
-def build_opt_chain(metainterp_sd, enable_opts,
-                    inline_short_preamble=True, retraced=False):
+def build_opt_chain(metainterp_sd, enable_opts):
     config = metainterp_sd.config
     optimizations = []
     unroll = 'unroll' in enable_opts    # 'enable_opts' is normally a dict
@@ -45,17 +46,14 @@
                 optimizations.append(OptFfiCall())
 
     if ('rewrite' not in enable_opts or 'virtualize' not in enable_opts
-        or 'heap' not in enable_opts):
+        or 'heap' not in enable_opts or 'unroll' not in enable_opts):
         optimizations.append(OptSimplify())
 
-    if inline_short_preamble:
-        optimizations = [OptInlineShortPreamble(retraced)] + optimizations
-
     return optimizations, unroll
 
 
 def optimize_loop_1(metainterp_sd, loop, enable_opts,
-                    inline_short_preamble=True, retraced=False, bridge=False):
+                    inline_short_preamble=True, retraced=False):
     """Optimize loop.operations to remove internal overheadish operations.
     """
 
@@ -64,7 +62,7 @@
     if unroll:
         optimize_unroll(metainterp_sd, loop, optimizations)
     else:
-        optimizer = Optimizer(metainterp_sd, loop, optimizations, bridge)
+        optimizer = Optimizer(metainterp_sd, loop, optimizations)
         optimizer.propagate_all_forward()
 
 def optimize_bridge_1(metainterp_sd, bridge, enable_opts,
@@ -76,7 +74,25 @@
     except KeyError:
         pass
     optimize_loop_1(metainterp_sd, bridge, enable_opts,
-                    inline_short_preamble, retraced, bridge=True)
+                    inline_short_preamble, retraced)
 
 if __name__ == '__main__':
     print ALL_OPTS_NAMES
+
+def optimize_trace(metainterp_sd, loop, enable_opts, inline_short_preamble=True):
+    """Optimize loop.operations to remove internal overheadish operations.
+    """
+
+    debug_start("jit-optimize")
+    try:
+        loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
+                                                          loop.operations)
+        optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts)
+        if unroll:
+            optimize_unroll(metainterp_sd, loop, optimizations, inline_short_preamble)
+        else:
+            optimizer = Optimizer(metainterp_sd, loop, optimizations)
+            optimizer.propagate_all_forward()
+    finally:
+        debug_stop("jit-optimize")
+        
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,11 +1,13 @@
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.rlib import clibffi, libffi
+from pypy.rlib.debug import debug_print
+from pypy.rlib.libffi import Func
+from pypy.rlib.objectmodel import we_are_translated
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
-from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib.libffi import Func
-from pypy.rlib.debug import debug_print
-from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.rpython.lltypesystem import llmemory, rffi
 
 
 class FuncInfo(object):
@@ -78,7 +80,7 @@
 
     def new(self):
         return OptFfiCall()
-    
+
     def begin_optimization(self, funcval, op):
         self.rollback_maybe('begin_optimization', op)
         self.funcinfo = FuncInfo(funcval, self.optimizer.cpu, op)
@@ -116,6 +118,9 @@
             ops = self.do_push_arg(op)
         elif oopspec == EffectInfo.OS_LIBFFI_CALL:
             ops = self.do_call(op)
+        elif (oopspec == EffectInfo.OS_LIBFFI_GETARRAYITEM or
+            oopspec == EffectInfo.OS_LIBFFI_SETARRAYITEM):
+            ops = self.do_getsetarrayitem(op, oopspec)
         #
         for op in ops:
             self.emit_operation(op)
@@ -190,6 +195,56 @@
         ops.append(newop)
         return ops
 
+    def do_getsetarrayitem(self, op, oopspec):
+        ffitypeval = self.getvalue(op.getarg(1))
+        widthval = self.getvalue(op.getarg(2))
+        offsetval = self.getvalue(op.getarg(5))
+        if not ffitypeval.is_constant() or not widthval.is_constant() or not offsetval.is_constant():
+            return [op]
+
+        ffitypeaddr = ffitypeval.box.getaddr()
+        ffitype = llmemory.cast_adr_to_ptr(ffitypeaddr, clibffi.FFI_TYPE_P)
+        offset = offsetval.box.getint()
+        width = widthval.box.getint()
+        descr = self._get_interior_descr(ffitype, width, offset)
+
+        arglist = [
+            self.getvalue(op.getarg(3)).force_box(self.optimizer),
+            self.getvalue(op.getarg(4)).force_box(self.optimizer),
+        ]
+        if oopspec == EffectInfo.OS_LIBFFI_GETARRAYITEM:
+            opnum = rop.GETINTERIORFIELD_RAW
+        elif oopspec == EffectInfo.OS_LIBFFI_SETARRAYITEM:
+            opnum = rop.SETINTERIORFIELD_RAW
+            arglist.append(self.getvalue(op.getarg(6)).force_box(self.optimizer))
+        else:
+            assert False
+        return [
+            ResOperation(opnum, arglist, op.result, descr=descr),
+        ]
+
+    def _get_interior_descr(self, ffitype, width, offset):
+        kind = libffi.types.getkind(ffitype)
+        is_pointer = is_float = is_signed = False
+        if ffitype is libffi.types.pointer:
+            is_pointer = True
+        elif kind == 'i':
+            is_signed = True
+        elif kind == 'f' or kind == 'I' or kind == 'U':
+            # longlongs are treated as floats, see
+            # e.g. llsupport/descr.py:getDescrClass
+            is_float = True
+        elif kind == 'u':
+            # they're all False
+            pass
+        else:
+            assert False, "unsupported ffitype or kind"
+        #
+        fieldsize = rffi.getintfield(ffitype, 'c_size')
+        return self.optimizer.cpu.interiorfielddescrof_dynamic(
+            offset, width, fieldsize, is_pointer, is_float, is_signed
+        )
+
     def propagate_forward(self, op):
         if self.logops is not None:
             debug_print(self.logops.repr_of_resop(op))
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -43,7 +43,7 @@
             optheap.optimizer.ensure_imported(cached_fieldvalue)
             cached_fieldvalue = self._cached_fields.get(structvalue, None)
 
-        if cached_fieldvalue is not fieldvalue:
+        if not fieldvalue.same_value(cached_fieldvalue):
             # common case: store the 'op' as lazy_setfield, and register
             # myself in the optheap's _lazy_setfields_and_arrayitems list
             self._lazy_setfield = op
@@ -140,6 +140,15 @@
                     getop = ResOperation(rop.GETFIELD_GC, [op.getarg(0)],
                                          result, op.getdescr())
                     shortboxes.add_potential(getop, synthetic=True)
+                if op.getopnum() == rop.SETARRAYITEM_GC:
+                    result = op.getarg(2)
+                    if isinstance(result, Const):
+                        newresult = result.clonebox()
+                        optimizer.make_constant(newresult, result)
+                        result = newresult
+                    getop = ResOperation(rop.GETARRAYITEM_GC, [op.getarg(0), op.getarg(1)],
+                                         result, op.getdescr())
+                    shortboxes.add_potential(getop, synthetic=True)
                 elif op.result is not None:
                     shortboxes.add_potential(op)
 
@@ -225,7 +234,7 @@
             or op.is_ovf()):
             self.posponedop = op
         else:
-            self.next_optimization.propagate_forward(op)
+            Optimization.emit_operation(self, op)
 
     def emitting_operation(self, op):
         if op.has_no_side_effect():
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -1,3 +1,4 @@
+import sys
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0, \
                                                   MODE_ARRAY, MODE_STR, MODE_UNICODE
 from pypy.jit.metainterp.history import ConstInt
@@ -5,36 +6,18 @@
     IntUpperBound)
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.resoperation import rop
+from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.rlib.rarithmetic import LONG_BIT
 
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
        redundant guards"""
 
-    def setup(self):
-        self.posponedop = None
-        self.nextop = None
-
     def new(self):
-        assert self.posponedop is None
         return OptIntBounds()
-        
-    def flush(self):
-        assert self.posponedop is None
-
-    def setup(self):
-        self.posponedop = None
-        self.nextop = None
 
     def propagate_forward(self, op):
-        if op.is_ovf():
-            self.posponedop = op
-            return
-        if self.posponedop:
-            self.nextop = op
-            op = self.posponedop
-            self.posponedop = None
-
         dispatch_opt(self, op)
 
     def opt_default(self, op):
@@ -126,14 +109,29 @@
         r.intbound.intersect(v1.intbound.div_bound(v2.intbound))
 
     def optimize_INT_MOD(self, op):
+        v1 = self.getvalue(op.getarg(0))
+        v2 = self.getvalue(op.getarg(1))
+        known_nonneg = (v1.intbound.known_ge(IntBound(0, 0)) and 
+                        v2.intbound.known_ge(IntBound(0, 0)))
+        if known_nonneg and v2.is_constant():
+            val = v2.box.getint()
+            if (val & (val-1)) == 0:
+                # nonneg % power-of-two ==> nonneg & (power-of-two - 1)
+                arg1 = op.getarg(0)
+                arg2 = ConstInt(val-1)
+                op = op.copy_and_change(rop.INT_AND, args=[arg1, arg2])
         self.emit_operation(op)
-        v2 = self.getvalue(op.getarg(1))
         if v2.is_constant():
             val = v2.box.getint()
             r = self.getvalue(op.result)
             if val < 0:
+                if val == -sys.maxint-1:
+                    return     # give up
                 val = -val
-            r.intbound.make_gt(IntBound(-val, -val))
+            if known_nonneg:
+                r.intbound.make_ge(IntBound(0, 0))
+            else:
+                r.intbound.make_gt(IntBound(-val, -val))
             r.intbound.make_lt(IntBound(val, val))
 
     def optimize_INT_LSHIFT(self, op):
@@ -153,72 +151,84 @@
     def optimize_INT_RSHIFT(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
+        b = v1.intbound.rshift_bound(v2.intbound)
+        if b.has_lower and b.has_upper and b.lower == b.upper:
+            # constant result (likely 0, for rshifts that kill all bits)
+            self.make_constant_int(op.result, b.lower)
+        else:
+            self.emit_operation(op)
+            r = self.getvalue(op.result)
+            r.intbound.intersect(b)
+
+    def optimize_GUARD_NO_OVERFLOW(self, op):
+        lastop = self.last_emitted_operation
+        if lastop is not None:
+            opnum = lastop.getopnum()
+            args = lastop.getarglist()
+            result = lastop.result
+            # If the INT_xxx_OVF was replaced with INT_xxx, then we can kill
+            # the GUARD_NO_OVERFLOW.
+            if (opnum == rop.INT_ADD or
+                opnum == rop.INT_SUB or
+                opnum == rop.INT_MUL):
+                return
+            # Else, synthesize the non overflowing op for optimize_default to
+            # reuse, as well as the reverse op
+            elif opnum == rop.INT_ADD_OVF:
+                self.pure(rop.INT_ADD, args[:], result)
+                self.pure(rop.INT_SUB, [result, args[1]], args[0])
+                self.pure(rop.INT_SUB, [result, args[0]], args[1])
+            elif opnum == rop.INT_SUB_OVF:
+                self.pure(rop.INT_SUB, args[:], result)
+                self.pure(rop.INT_ADD, [result, args[1]], args[0])
+                self.pure(rop.INT_SUB, [args[0], result], args[1])
+            elif opnum == rop.INT_MUL_OVF:
+                self.pure(rop.INT_MUL, args[:], result)
         self.emit_operation(op)
-        r = self.getvalue(op.result)
-        r.intbound.intersect(v1.intbound.rshift_bound(v2.intbound))
+
+    def optimize_GUARD_OVERFLOW(self, op):
+        # If INT_xxx_OVF was replaced by INT_xxx, *but* we still see
+        # GUARD_OVERFLOW, then the loop is invalid.
+        lastop = self.last_emitted_operation
+        if lastop is None:
+            raise InvalidLoop
+        opnum = lastop.getopnum()
+        if opnum not in (rop.INT_ADD_OVF, rop.INT_SUB_OVF, rop.INT_MUL_OVF):
+            raise InvalidLoop
+        self.emit_operation(op)
 
     def optimize_INT_ADD_OVF(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
         resbound = v1.intbound.add_bound(v2.intbound)
-        if resbound.has_lower and resbound.has_upper and \
-           self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
-            # Transform into INT_ADD and remove guard
+        if resbound.bounded():
+            # Transform into INT_ADD.  The following guard will be killed
+            # by optimize_GUARD_NO_OVERFLOW; if we see instead an
+            # optimize_GUARD_OVERFLOW, then InvalidLoop.
             op = op.copy_and_change(rop.INT_ADD)
-            self.optimize_INT_ADD(op) # emit the op
-        else:
-            self.emit_operation(op)
-            r = self.getvalue(op.result)
-            r.intbound.intersect(resbound)
-            self.emit_operation(self.nextop)
-            if self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
-                # Synthesize the non overflowing op for optimize_default to reuse
-                self.pure(rop.INT_ADD, op.getarglist()[:], op.result)
-                # Synthesize the reverse op for optimize_default to reuse
-                self.pure(rop.INT_SUB, [op.result, op.getarg(1)], op.getarg(0))
-                self.pure(rop.INT_SUB, [op.result, op.getarg(0)], op.getarg(1))
-
+        self.emit_operation(op) # emit the op
+        r = self.getvalue(op.result)
+        r.intbound.intersect(resbound)
 
     def optimize_INT_SUB_OVF(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
         resbound = v1.intbound.sub_bound(v2.intbound)
-        if resbound.has_lower and resbound.has_upper and \
-               self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
-            # Transform into INT_SUB and remove guard
+        if resbound.bounded():
             op = op.copy_and_change(rop.INT_SUB)
-            self.optimize_INT_SUB(op) # emit the op
-        else:
-            self.emit_operation(op)
-            r = self.getvalue(op.result)
-            r.intbound.intersect(resbound)
-            self.emit_operation(self.nextop)
-            if self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
-                # Synthesize the non overflowing op for optimize_default to reuse
-                self.pure(rop.INT_SUB, op.getarglist()[:], op.result)
-                # Synthesize the reverse ops for optimize_default to reuse
-                self.pure(rop.INT_ADD, [op.result, op.getarg(1)], op.getarg(0))
-                self.pure(rop.INT_SUB, [op.getarg(0), op.result], op.getarg(1))
-
+        self.emit_operation(op) # emit the op
+        r = self.getvalue(op.result)
+        r.intbound.intersect(resbound)
 
     def optimize_INT_MUL_OVF(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
         resbound = v1.intbound.mul_bound(v2.intbound)
-        if resbound.has_lower and resbound.has_upper and \
-               self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
-            # Transform into INT_MUL and remove guard
+        if resbound.bounded():
             op = op.copy_and_change(rop.INT_MUL)
-            self.optimize_INT_MUL(op) # emit the op
-        else:
-            self.emit_operation(op)
-            r = self.getvalue(op.result)
-            r.intbound.intersect(resbound)
-            self.emit_operation(self.nextop)
-            if self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
-                # Synthesize the non overflowing op for optimize_default to reuse
-                self.pure(rop.INT_MUL, op.getarglist()[:], op.result)
-
+        self.emit_operation(op)
+        r = self.getvalue(op.result)
+        r.intbound.intersect(resbound)
 
     def optimize_INT_LT(self, op):
         v1 = self.getvalue(op.getarg(0))
diff --git a/pypy/jit/metainterp/optimizeopt/intutils.py b/pypy/jit/metainterp/optimizeopt/intutils.py
--- a/pypy/jit/metainterp/optimizeopt/intutils.py
+++ b/pypy/jit/metainterp/optimizeopt/intutils.py
@@ -1,4 +1,5 @@
-from pypy.rlib.rarithmetic import ovfcheck, ovfcheck_lshift, LONG_BIT
+from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT
+from pypy.rlib.objectmodel import we_are_translated
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.history import BoxInt, ConstInt
 import sys
@@ -13,6 +14,10 @@
         self.has_lower = True
         self.upper = upper
         self.lower = lower
+        # check for unexpected overflows:
+        if not we_are_translated():
+            assert type(upper) is not long
+            assert type(lower) is not long
 
     # Returns True if the bound was updated
     def make_le(self, other):
@@ -169,10 +174,10 @@
            other.known_ge(IntBound(0, 0)) and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             try:
-                vals = (ovfcheck_lshift(self.upper, other.upper),
-                        ovfcheck_lshift(self.upper, other.lower),
-                        ovfcheck_lshift(self.lower, other.upper),
-                        ovfcheck_lshift(self.lower, other.lower))
+                vals = (ovfcheck(self.upper << other.upper),
+                        ovfcheck(self.upper << other.lower),
+                        ovfcheck(self.lower << other.upper),
+                        ovfcheck(self.lower << other.lower))
                 return IntBound(min4(vals), max4(vals))
             except (OverflowError, ValueError):
                 return IntUnbounded()
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -1,12 +1,12 @@
 from pypy.jit.metainterp import jitprof, resume, compile
 from pypy.jit.metainterp.executor import execute_nonspec
-from pypy.jit.metainterp.history import BoxInt, BoxFloat, Const, ConstInt, REF
+from pypy.jit.metainterp.history import BoxInt, BoxFloat, Const, ConstInt, REF, INT
 from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded, \
                                                      ImmutableIntUnbounded, \
                                                      IntLowerBound, MININT, MAXINT
 from pypy.jit.metainterp.optimizeopt.util import (make_dispatcher_method,
     args_dict)
-from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.jit.metainterp.resoperation import rop, ResOperation, AbstractResOp
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
 from pypy.tool.pairtype import extendabletype
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
@@ -95,6 +95,10 @@
         return guards
 
     def import_from(self, other, optimizer):
+        if self.level == LEVEL_CONSTANT:
+            assert other.level == LEVEL_CONSTANT
+            assert other.box.same_constant(self.box)
+            return
         assert self.level <= LEVEL_NONNULL
         if other.level == LEVEL_CONSTANT:
             self.make_constant(other.get_key_box())
@@ -141,6 +145,13 @@
             return not box.nonnull()
         return False
 
+    def same_value(self, other):
+        if not other:
+            return False
+        if self.is_constant() and other.is_constant():
+            return self.box.same_constant(other.box)
+        return self is other
+
     def make_constant(self, constbox):
         """Replace 'self.box' with a Const box."""
         assert isinstance(constbox, Const)
@@ -209,13 +220,19 @@
     def setfield(self, ofs, value):
         raise NotImplementedError
 
+    def getlength(self):
+        raise NotImplementedError
+
     def getitem(self, index):
         raise NotImplementedError
 
-    def getlength(self):
+    def setitem(self, index, value):
         raise NotImplementedError
 
-    def setitem(self, index, value):
+    def getinteriorfield(self, index, ofs, default):
+        raise NotImplementedError
+
+    def setinteriorfield(self, index, ofs, value):
         raise NotImplementedError
 
 
@@ -230,9 +247,10 @@
 CONST_1      = ConstInt(1)
 CVAL_ZERO    = ConstantValue(CONST_0)
 CVAL_ZERO_FLOAT = ConstantValue(Const._new(0.0))
-CVAL_UNINITIALIZED_ZERO = ConstantValue(CONST_0)
 llhelper.CVAL_NULLREF = ConstantValue(llhelper.CONST_NULL)
 oohelper.CVAL_NULLREF = ConstantValue(oohelper.CONST_NULL)
+REMOVED = AbstractResOp(None)
+
 
 class Optimization(object):
     next_optimization = None
@@ -244,6 +262,7 @@
         raise NotImplementedError
 
     def emit_operation(self, op):
+        self.last_emitted_operation = op
         self.next_optimization.propagate_forward(op)
 
     # FIXME: Move some of these here?
@@ -283,11 +302,11 @@
             return self.optimizer.optpure.has_pure_result(opnum, args, descr)
         return False
 
-    def get_pure_result(self, key):    
+    def get_pure_result(self, key):
         if self.optimizer.optpure:
             return self.optimizer.optpure.get_pure_result(key)
         return None
-    
+
     def setup(self):
         pass
 
@@ -311,24 +330,25 @@
     def forget_numberings(self, box):
         self.optimizer.forget_numberings(box)
 
+
 class Optimizer(Optimization):
 
-    def __init__(self, metainterp_sd, loop, optimizations=None, bridge=False):
+    def __init__(self, metainterp_sd, loop, optimizations=None):
         self.metainterp_sd = metainterp_sd
         self.cpu = metainterp_sd.cpu
         self.loop = loop
-        self.bridge = bridge
         self.values = {}
         self.interned_refs = self.cpu.ts.new_ref_dict()
+        self.interned_ints = {}
         self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd)
         self.bool_boxes = {}
         self.producer = {}
         self.pendingfields = []
-        self.exception_might_have_happened = False
         self.quasi_immutable_deps = None
         self.opaque_pointers = {}
         self.replaces_guard = {}
         self._newoperations = []
+        self.seen_results = {}
         self.optimizer = self
         self.optpure = None
         self.optearlyforce = None
@@ -346,6 +366,7 @@
             optimizations[-1].next_optimization = self
             for o in optimizations:
                 o.optimizer = self
+                o.last_emitted_operation = None
                 o.setup()
         else:
             optimizations = []
@@ -392,6 +413,9 @@
             if not value:
                 return box
             return self.interned_refs.setdefault(value, box)
+        #elif constbox.type == INT:
+        #    value = constbox.getint()
+        #    return self.interned_ints.setdefault(value, box)
         else:
             return box
 
@@ -476,9 +500,9 @@
         else:
             return CVAL_ZERO
 
-    def propagate_all_forward(self):
-        self.exception_might_have_happened = self.bridge
-        self.clear_newoperations()
+    def propagate_all_forward(self, clear=True):
+        if clear:
+            self.clear_newoperations()
         for op in self.loop.operations:
             self.first_optimization.propagate_forward(op)
         self.loop.operations = self.get_newoperations()
@@ -520,11 +544,15 @@
                 op = self.store_final_boxes_in_guard(op)
         elif op.can_raise():
             self.exception_might_have_happened = True
+        if op.result:
+            if op.result in self.seen_results:
+                raise ValueError, "invalid optimization"
+            self.seen_results[op.result] = None
         self._newoperations.append(op)
 
     def replace_op(self, old_op, new_op):
         # XXX: Do we want to cache indexes to prevent search?
-        i = len(self._newoperations) 
+        i = len(self._newoperations)
         while i > 0:
             i -= 1
             if self._newoperations[i] is old_op:
@@ -537,9 +565,12 @@
         descr = op.getdescr()
         assert isinstance(descr, compile.ResumeGuardDescr)
         modifier = resume.ResumeDataVirtualAdder(descr, self.resumedata_memo)
-        newboxes = modifier.finish(self.values, self.pendingfields)
-        if len(newboxes) > self.metainterp_sd.options.failargs_limit: # XXX be careful here
-            compile.giveup()
+        try:
+            newboxes = modifier.finish(self.values, self.pendingfields)
+            if len(newboxes) > self.metainterp_sd.options.failargs_limit:
+                raise resume.TagOverflow
+        except resume.TagOverflow:
+            raise compile.giveup()
         descr.store_final_boxes(op, newboxes)
         #
         if op.getopnum() == rop.GUARD_VALUE:
diff --git a/pypy/jit/metainterp/optimizeopt/pure.py b/pypy/jit/metainterp/optimizeopt/pure.py
--- a/pypy/jit/metainterp/optimizeopt/pure.py
+++ b/pypy/jit/metainterp/optimizeopt/pure.py
@@ -1,4 +1,4 @@
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, REMOVED
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeopt.util import (make_dispatcher_method,
     args_dict)
@@ -61,7 +61,10 @@
         oldop = self.pure_operations.get(args, None)
         if oldop is not None and oldop.getdescr() is op.getdescr():
             assert oldop.getopnum() == op.getopnum()
+            # this removes a CALL_PURE that has the same (non-constant)
+            # arguments as a previous CALL_PURE.
             self.make_equal_to(op.result, self.getvalue(oldop.result))
+            self.last_emitted_operation = REMOVED
             return
         else:
             self.pure_operations[args] = op
@@ -72,6 +75,13 @@
         self.emit_operation(ResOperation(rop.CALL, args, op.result,
                                          op.getdescr()))
 
+    def optimize_GUARD_NO_EXCEPTION(self, op):
+        if self.last_emitted_operation is REMOVED:
+            # it was a CALL_PURE that was killed; so we also kill the
+            # following GUARD_NO_EXCEPTION
+            return
+        self.emit_operation(op)
+
     def flush(self):
         assert self.posponedop is None
 
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -260,6 +260,16 @@
     def optimize_GUARD_FALSE(self, op):
         self.optimize_guard(op, CONST_0)
 
+    def optimize_RECORD_KNOWN_CLASS(self, op):
+        value = self.getvalue(op.getarg(0))
+        expectedclassbox = op.getarg(1)
+        assert isinstance(expectedclassbox, Const)
+        realclassbox = value.get_constant_class(self.optimizer.cpu)
+        if realclassbox is not None:
+            assert realclassbox.same_constant(expectedclassbox)
+            return
+        value.make_constant_class(expectedclassbox, None)
+
     def optimize_GUARD_CLASS(self, op):
         value = self.getvalue(op.getarg(0))
         expectedclassbox = op.getarg(1)
@@ -294,12 +304,6 @@
             raise InvalidLoop
         self.optimize_GUARD_CLASS(op)
 
-    def optimize_GUARD_NO_EXCEPTION(self, op):
-        if not self.optimizer.exception_might_have_happened:
-            return
-        self.emit_operation(op)
-        self.optimizer.exception_might_have_happened = False
-
     def optimize_CALL_LOOPINVARIANT(self, op):
         arg = op.getarg(0)
         # 'arg' must be a Const, because residual_call in codewriter
@@ -310,6 +314,7 @@
         resvalue = self.loop_invariant_results.get(key, None)
         if resvalue is not None:
             self.make_equal_to(op.result, resvalue)
+            self.last_emitted_operation = REMOVED
             return
         # change the op to be a normal call, from the backend's point of view
         # there is no reason to have a separate operation for this
@@ -444,10 +449,19 @@
             except KeyError:
                 pass
             else:
+                # this removes a CALL_PURE with all constant arguments.
                 self.make_constant(op.result, result)
+                self.last_emitted_operation = REMOVED
                 return
         self.emit_operation(op)
 
+    def optimize_GUARD_NO_EXCEPTION(self, op):
+        if self.last_emitted_operation is REMOVED:
+            # it was a CALL_PURE or a CALL_LOOPINVARIANT that was killed;
+            # so we also kill the following GUARD_NO_EXCEPTION
+            return
+        self.emit_operation(op)
+
     def optimize_INT_FLOORDIV(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
@@ -465,10 +479,9 @@
                                         args = [op.getarg(0), ConstInt(highest_bit(val))])
         self.emit_operation(op)
 
-    def optimize_CAST_OPAQUE_PTR(self, op):
+    def optimize_MARK_OPAQUE_PTR(self, op):
         value = self.getvalue(op.getarg(0))
         self.optimizer.opaque_pointers[value] = True
-        self.make_equal_to(op.result, value)
 
     def optimize_CAST_PTR_TO_INT(self, op):
         self.pure(rop.CAST_INT_TO_PTR, [op.result], op.getarg(0))
@@ -478,6 +491,9 @@
         self.pure(rop.CAST_PTR_TO_INT, [op.result], op.getarg(0))
         self.emit_operation(op)
 
+    def optimize_SAME_AS(self, op):
+        self.make_equal_to(op.result, self.getvalue(op.getarg(0)))
+
 dispatch_opt = make_dispatcher_method(OptRewrite, 'optimize_',
         default=OptRewrite.emit_operation)
 optimize_guards = _findall(OptRewrite, 'optimize_', 'GUARD')
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -1,9 +1,12 @@
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.resoperation import ResOperation, rop
-
+from pypy.jit.metainterp.history import TargetToken, JitCellToken
 
 class OptSimplify(Optimization):
+    def __init__(self):
+        self.last_label_descr = None
+        
     def optimize_CALL_PURE(self, op):
         args = op.getarglist()
         self.emit_operation(ResOperation(rop.CALL, args, op.result,
@@ -25,8 +28,29 @@
         #     but it's a bit hard to implement robustly if heap.py is also run
         pass
 
-    optimize_CAST_OPAQUE_PTR = optimize_VIRTUAL_REF
+    def optimize_MARK_OPAQUE_PTR(self, op):
+        pass
 
+    def optimize_RECORD_KNOWN_CLASS(self, op):
+        pass
+
+    def optimize_LABEL(self, op):
+        self.last_label_descr = op.getdescr()
+        self.emit_operation(op)
+        
+    def optimize_JUMP(self, op):
+        descr = op.getdescr()
+        assert isinstance(descr, JitCellToken)
+        if not descr.target_tokens:
+            assert self.last_label_descr is not None
+            target_token = self.last_label_descr
+            assert isinstance(target_token, TargetToken)
+            assert target_token.targeting_jitcell_token is descr
+            op.setdescr(self.last_label_descr)
+        else:
+            assert len(descr.target_tokens) == 1
+            op.setdescr(descr.target_tokens[0])
+        self.emit_operation(op)
 
 dispatch_opt = make_dispatcher_method(OptSimplify, 'optimize_',
         default=OptSimplify.emit_operation)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -0,0 +1,200 @@
+from pypy.jit.metainterp.optimizeopt.test.test_util import (
+    LLtypeMixin, BaseTest, Storage, _sortboxes, FakeDescrWithSnapshot)
+from pypy.jit.metainterp.history import TreeLoop, JitCellToken, TargetToken
+from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
+from pypy.jit.metainterp.optimize import InvalidLoop
+from py.test import raises
+
+class BaseTestMultiLabel(BaseTest):
+    enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
+
+    def optimize_loop(self, ops, expected):
+        loop = self.parse(ops)
+        if expected != "crash!":
+            expected = self.parse(expected)
+
+        part = TreeLoop('part')
+        part.inputargs = loop.inputargs
+        part.start_resumedescr = FakeDescrWithSnapshot()
+        token = loop.original_jitcell_token
+
+        optimized = TreeLoop('optimized')
+        optimized.inputargs = loop.inputargs
+        optimized.operations = []
+        
+        labels = [i for i, op in enumerate(loop.operations) \
+                  if op.getopnum()==rop.LABEL]
+        prv = 0
+        last_label = []
+        for nxt in labels + [len(loop.operations)]:
+            assert prv != nxt
+            operations = last_label + loop.operations[prv:nxt]
+            if nxt < len(loop.operations):
+                label = loop.operations[nxt]
+                assert label.getopnum() == rop.LABEL
+                jumpop = ResOperation(rop.JUMP, label.getarglist(),
+                                      None, descr=token)
+                operations.append(jumpop)
+            part.operations = operations
+            self._do_optimize_loop(part, None)
+            if part.operations[-1].getopnum() == rop.LABEL:
+                last_label = [part.operations.pop()]
+            else:
+                last_label = []
+            optimized.operations.extend(part.operations)
+            prv = nxt + 1
+        
+        #
+        print
+        print "Optimized:"
+        if optimized.operations:
+            print '\n'.join([str(o) for o in optimized.operations])
+        else:
+            print 'Failed!'
+        print
+
+        assert expected != "crash!", "should have raised an exception"
+        self.assert_equal(optimized, expected)
+
+        return optimized
+
+    def test_simple(self):
+        ops = """
+        [i1]
+        i2 = int_add(i1, 1)
+        escape(i2)
+        label(i1)
+        i3 = int_add(i1, 1)
+        escape(i3)
+        jump(i1)
+        """
+        expected = """
+        [i1]
+        i2 = int_add(i1, 1)
+        escape(i2)
+        label(i1, i2)
+        escape(i2)
+        jump(i1, i2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_forced_virtual(self):
+        ops = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        label(p3)
+        escape(p3)
+        jump(p3)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_virtuals_with_nonmatching_fields(self):
+        ops = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p3, 1, descr=valuedescr)
+        label(p3)
+        p4 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p4, 1, descr=nextdescr)
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_virtual_arrays_with_nonmatching_lens(self):
+        ops = """
+        [p1]
+        p2 = new_array(3, descr=arraydescr)
+        label(p2)
+        p4 = new_array(2, descr=arraydescr)        
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+        
+    def test_nonmatching_arraystruct_1(self):
+        ops = """
+        [p1, f0]
+        p2 = new_array(3, descr=complexarraydescr)
+        setinteriorfield_gc(p2, 2, f0, descr=complexrealdescr)
+        label(p2, f0)
+        p4 = new_array(3, descr=complexarraydescr)
+        setinteriorfield_gc(p4, 2, f0, descr=compleximagdescr)
+        jump(p4, f0)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+        
+    def test_nonmatching_arraystruct_2(self):
+        ops = """
+        [p1, f0]
+        p2 = new_array(3, descr=complexarraydescr)
+        setinteriorfield_gc(p2, 2, f0, descr=complexrealdescr)
+        label(p2, f0)
+        p4 = new_array(2, descr=complexarraydescr)
+        setinteriorfield_gc(p4, 0, f0, descr=complexrealdescr)        
+        jump(p4, f0)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_not_virtual(self):
+        ops = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        label(p3)
+        p4 = escape()
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_not_virtual_array(self):
+        ops = """
+        [p1]
+        p3 = new_array(3, descr=arraydescr)
+        label(p3)
+        p4 = escape()
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_not_virtual_arraystruct(self):
+        ops = """
+        [p1]
+        p3 = new_array(3, descr=complexarraydescr)
+        label(p3)
+        p4 = escape()
+        jump(p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+    def test_virtual_turns_constant(self):
+        ops = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        label(p3)
+        guard_value(p3, ConstPtr(myptr)) []
+        jump(p3)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+        
+    def test_virtuals_turns_not_equal(self):
+        ops = """
+        [p1, p2]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        label(p3, p3)
+        p4 = new_with_vtable(ConstClass(node_vtable))
+        jump(p3, p4)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+        
+    
+class TestLLtype(BaseTestMultiLabel, LLtypeMixin):
+    pass
+
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -1,7 +1,8 @@
 import py
 from pypy.rlib.objectmodel import instantiate
 from pypy.jit.metainterp.optimizeopt.test.test_util import (
-    LLtypeMixin, BaseTest, FakeMetaInterpStaticData)
+    LLtypeMixin, BaseTest, FakeMetaInterpStaticData, convert_old_style_to_targets)
+from pypy.jit.metainterp.history import TargetToken, JitCellToken
 from pypy.jit.metainterp.test.test_compile import FakeLogger
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
@@ -9,7 +10,7 @@
 from pypy.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt
 from pypy.jit.metainterp import executor, compile, resume, history
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
-
+from pypy.rlib.rarithmetic import LONG_BIT
 
 def test_store_final_boxes_in_guard():
     from pypy.jit.metainterp.compile import ResumeGuardDescr
@@ -115,9 +116,13 @@
     enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap"
 
     def optimize_loop(self, ops, optops, call_pure_results=None):
-
         loop = self.parse(ops)
-        expected = self.parse(optops)
+        token = JitCellToken() 
+        loop.operations = [ResOperation(rop.LABEL, loop.inputargs, None, descr=TargetToken(token))] + \
+                          loop.operations
+        if loop.operations[-1].getopnum() == rop.JUMP:
+            loop.operations[-1].setdescr(token)
+        expected = convert_old_style_to_targets(self.parse(optops), jump=True)
         self._do_optimize_loop(loop, call_pure_results)
         print '\n'.join([str(o) for o in loop.operations])
         self.assert_equal(loop, expected)
@@ -680,25 +685,60 @@
 
     # ----------
 
-    def test_fold_guard_no_exception(self):
-        ops = """
-        [i]
-        guard_no_exception() []
-        i1 = int_add(i, 3)
-        guard_no_exception() []
+    def test_keep_guard_no_exception(self):
+        ops = """
+        [i1]
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
-        guard_no_exception() []
-        i3 = call(i2, descr=nonwritedescr)
-        jump(i1)       # the exception is considered lost when we loop back
-        """
-        expected = """
-        [i]
-        i1 = int_add(i, 3)
-        i2 = call(i1, descr=nonwritedescr)
+        jump(i2)
+        """
+        self.optimize_loop(ops, ops)
+
+    def test_keep_guard_no_exception_with_call_pure_that_is_not_folded(self):
+        ops = """
+        [i1]
+        i2 = call_pure(123456, i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
-        i3 = call(i2, descr=nonwritedescr)
-        jump(i1)
+        jump(i2)
+        """
+        expected = """
+        [i1]
+        i2 = call(123456, i1, descr=nonwritedescr)
+        guard_no_exception() [i1, i2]
+        jump(i2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_remove_guard_no_exception_with_call_pure_on_constant_args(self):
+        arg_consts = [ConstInt(i) for i in (123456, 81)]
+        call_pure_results = {tuple(arg_consts): ConstInt(5)}
+        ops = """
+        [i1]
+        i3 = same_as(81)
+        i2 = call_pure(123456, i3, descr=nonwritedescr)
+        guard_no_exception() [i1, i2]
+        jump(i2)
+        """
+        expected = """
+        [i1]
+        jump(5)
+        """
+        self.optimize_loop(ops, expected, call_pure_results)
+
+    def test_remove_guard_no_exception_with_duplicated_call_pure(self):
+        ops = """
+        [i1]
+        i2 = call_pure(123456, i1, descr=nonwritedescr)
+        guard_no_exception() [i1, i2]
+        i3 = call_pure(123456, i1, descr=nonwritedescr)
+        guard_no_exception() [i1, i2, i3]
+        jump(i3)
+        """
+        expected = """
+        [i1]
+        i2 = call(123456, i1, descr=nonwritedescr)
+        guard_no_exception() [i1, i2]
+        jump(i2)
         """
         self.optimize_loop(ops, expected)
 
@@ -935,7 +975,6 @@
         """
         self.optimize_loop(ops, expected)
 
-
     def test_virtual_constant_isnonnull(self):
         ops = """
         [i0]
@@ -951,6 +990,55 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_virtual_array_of_struct(self):
+        ops = """
+        [f0, f1, f2, f3]
+        p0 = new_array(2, descr=complexarraydescr)
+        setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
+        setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
+        setinteriorfield_gc(p0, 1, f2, descr=complexrealdescr)
+        setinteriorfield_gc(p0, 1, f3, descr=compleximagdescr)
+        f4 = getinteriorfield_gc(p0, 0, descr=complexrealdescr)
+        f5 = getinteriorfield_gc(p0, 1, descr=complexrealdescr)
+        f6 = float_mul(f4, f5)
+        f7 = getinteriorfield_gc(p0, 0, descr=compleximagdescr)
+        f8 = getinteriorfield_gc(p0, 1, descr=compleximagdescr)
+        f9 = float_mul(f7, f8)
+        f10 = float_add(f6, f9)
+        finish(f10)
+        """
+        expected = """
+        [f0, f1, f2, f3]
+        f4 = float_mul(f0, f2)
+        f5 = float_mul(f1, f3)
+        f6 = float_add(f4, f5)
+        finish(f6)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_virtual_array_of_struct_forced(self):
+        ops = """
+        [f0, f1]
+        p0 = new_array(1, descr=complexarraydescr)
+        setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
+        setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
+        f2 = getinteriorfield_gc(p0, 0, descr=complexrealdescr)
+        f3 = getinteriorfield_gc(p0, 0, descr=compleximagdescr)
+        f4 = float_mul(f2, f3)
+        i0 = escape(f4, p0)
+        finish(i0)
+        """
+        expected = """
+        [f0, f1]
+        f2 = float_mul(f0, f1)
+        p0 = new_array(1, descr=complexarraydescr)
+        setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
+        setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
+        i0 = escape(f2, p0)
+        finish(i0)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_nonvirtual_1(self):
         ops = """
         [i]
@@ -4074,6 +4162,38 @@
         """
         self.optimize_strunicode_loop(ops, expected)
 
+    def test_str_concat_constant_lengths(self):
+        ops = """
+        [i0]
+        p0 = newstr(1)
+        strsetitem(p0, 0, i0)
+        p1 = newstr(0)
+        p2 = call(0, p0, p1, descr=strconcatdescr)
+        i1 = call(0, p2, p0, descr=strequaldescr)
+        finish(i1)
+        """
+        expected = """
+        [i0]
+        finish(1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_str_concat_constant_lengths_2(self):
+        ops = """
+        [i0]
+        p0 = newstr(0)
+        p1 = newstr(1)
+        strsetitem(p1, 0, i0)
+        p2 = call(0, p0, p1, descr=strconcatdescr)
+        i1 = call(0, p2, p1, descr=strequaldescr)
+        finish(i1)
+        """
+        expected = """
+        [i0]
+        finish(1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
     def test_str_slice_1(self):
         ops = """
         [p1, i1, i2]
@@ -4176,15 +4296,38 @@
         """
         self.optimize_strunicode_loop(ops, expected)
 
+    def test_str_slice_plain_virtual(self):
+        ops = """
+        []
+        p0 = newstr(11)
+        copystrcontent(s"hello world", p0, 0, 0, 11)
+        p1 = call(0, p0, 0, 5, descr=strslicedescr)
+        finish(p1)
+        """
+        expected = """
+        []
+        p0 = newstr(11)
+        copystrcontent(s"hello world", p0, 0, 0, 11)
+        # Eventually this should just return s"hello", but ATM this test is
+        # just verifying that it doesn't return "\0\0\0\0\0", so being
+        # slightly underoptimized is ok.
+        p1 = newstr(5)
+        copystrcontent(p0, p1, 0, 0, 5)
+        finish(p1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
     # ----------
     def optimize_strunicode_loop_extradescrs(self, ops, optops):
         class FakeCallInfoCollection:
             def callinfo_for_oopspec(self, oopspecindex):
                 calldescrtype = type(LLtypeMixin.strequaldescr)
+                effectinfotype = type(LLtypeMixin.strequaldescr.get_extra_info())
                 for value in LLtypeMixin.__dict__.values():
                     if isinstance(value, calldescrtype):
                         extra = value.get_extra_info()
-                        if extra and extra.oopspecindex == oopspecindex:
+                        if (extra and isinstance(extra, effectinfotype) and
+                            extra.oopspecindex == oopspecindex):
                             # returns 0 for 'func' in this test
                             return value, 0
                 raise AssertionError("not found: oopspecindex=%d" %
@@ -4664,11 +4807,11 @@
         i5 = int_ge(i0, 0)
         guard_true(i5) []
         i1 = int_mod(i0, 42)
-        i2 = int_rshift(i1, 63)
+        i2 = int_rshift(i1, %d)
         i3 = int_and(42, i2)
         i4 = int_add(i1, i3)
         finish(i4)
-        """
+        """ % (LONG_BIT-1)
         expected = """
         [i0]
         i5 = int_ge(i0, 0)
@@ -4676,21 +4819,41 @@
         i1 = int_mod(i0, 42)
         finish(i1)
         """
-        py.test.skip("in-progress")
         self.optimize_loop(ops, expected)
 
-        # Also, 'n % power-of-two' can be turned into int_and(),
-        # but that's a bit harder to detect here because it turns into
-        # several operations, and of course it is wrong to just turn
+        # 'n % power-of-two' can be turned into int_and(); at least that's
+        # easy to do now if n is known to be non-negative.
+        ops = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 8)
+        i2 = int_rshift(i1, %d)
+        i3 = int_and(42, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """ % (LONG_BIT-1)
+        expected = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_and(i0, 7)
+        finish(i1)
+        """
+        self.optimize_loop(ops, expected)
+
+        # Of course any 'maybe-negative % power-of-two' can be turned into
+        # int_and(), but that's a bit harder to detect here because it turns
+        # into several operations, and of course it is wrong to just turn
         # int_mod(i0, 16) into int_and(i0, 15).
         ops = """
         [i0]
         i1 = int_mod(i0, 16)
-        i2 = int_rshift(i1, 63)
+        i2 = int_rshift(i1, %d)
         i3 = int_and(16, i2)
         i4 = int_add(i1, i3)
         finish(i4)
-        """
+        """ % (LONG_BIT-1)
         expected = """
         [i0]
         i4 = int_and(i0, 15)
@@ -4699,6 +4862,16 @@
         py.test.skip("harder")
         self.optimize_loop(ops, expected)
 
+    def test_intmod_bounds_bug1(self):
+        ops = """
+        [i0]
+        i1 = int_mod(i0, %d)
+        i2 = int_eq(i1, 0)
+        guard_false(i2) []
+        finish()
+        """ % (-(1<<(LONG_BIT-1)),)
+        self.optimize_loop(ops, ops)
+
     def test_bounded_lazy_setfield(self):
         ops = """
         [p0, i0]
@@ -4781,6 +4954,27 @@
 
     def test_plain_virtual_string_copy_content(self):
         ops = """
+        [i1]
+        p0 = newstr(6)
+        copystrcontent(s"hello!", p0, 0, 0, 6)
+        p1 = call(0, p0, s"abc123", descr=strconcatdescr)
+        i0 = strgetitem(p1, i1)
+        finish(i0)
+        """
+        expected = """
+        [i1]
+        p0 = newstr(6)
+        copystrcontent(s"hello!", p0, 0, 0, 6)
+        p1 = newstr(12)
+        copystrcontent(p0, p1, 0, 0, 6)
+        copystrcontent(s"abc123", p1, 0, 6, 6)
+        i0 = strgetitem(p1, i1)
+        finish(i0)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+    def test_plain_virtual_string_copy_content_2(self):
+        ops = """
         []
         p0 = newstr(6)
         copystrcontent(s"hello!", p0, 0, 0, 6)
@@ -4792,10 +4986,7 @@
         []
         p0 = newstr(6)
         copystrcontent(s"hello!", p0, 0, 0, 6)
-        p1 = newstr(12)
-        copystrcontent(p0, p1, 0, 0, 6)
-        copystrcontent(s"abc123", p1, 0, 6, 6)
-        i0 = strgetitem(p1, 0)
+        i0 = strgetitem(p0, 0)
         finish(i0)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -4812,6 +5003,34 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_known_equal_ints(self):
+        py.test.skip("in-progress")
+        ops = """
+        [i0, i1, i2, p0]
+        i3 = int_eq(i0, i1)
+        guard_true(i3) []
+
+        i4 = int_lt(i2, i0)
+        guard_true(i4) []
+        i5 = int_lt(i2, i1)
+        guard_true(i5) []
+
+        i6 = getarrayitem_gc(p0, i2)
+        finish(i6)
+        """
+        expected = """
+        [i0, i1, i2, p0]
+        i3 = int_eq(i0, i1)
+        guard_true(i3) []
+
+        i4 = int_lt(i2, i0)
+        guard_true(i4) []
+
+        i6 = getarrayitem_gc(p0, i3)
+        finish(i6)
+        """
+        self.optimize_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1,13 +1,13 @@
 import py
 from pypy.rlib.objectmodel import instantiate
 from pypy.jit.metainterp.optimizeopt.test.test_util import (
-    LLtypeMixin, BaseTest, Storage, _sortboxes)
+    LLtypeMixin, BaseTest, Storage, _sortboxes, convert_old_style_to_targets)
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
 from pypy.jit.metainterp.optimizeopt import optimize_loop_1, ALL_OPTS_DICT, build_opt_chain
 from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt
-from pypy.jit.metainterp.history import TreeLoop, LoopToken
+from pypy.jit.metainterp.history import TreeLoop, JitCellToken, TargetToken
 from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp import executor, compile, resume, history
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
@@ -15,7 +15,7 @@
 from pypy.jit.metainterp.optimizeopt.util import args_dict
 from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import FakeMetaInterpStaticData
 from pypy.config.pypyoption import get_pypy_config
-
+from pypy.jit.metainterp.optimizeopt.unroll import Inliner
 
 def test_build_opt_chain():
     def check(chain, expected_names):
@@ -23,49 +23,37 @@
         assert names == expected_names
     #
     metainterp_sd = FakeMetaInterpStaticData(None)
-    chain, _ = build_opt_chain(metainterp_sd, "", inline_short_preamble=False)
+    chain, _ = build_opt_chain(metainterp_sd, "")
     check(chain, ["OptSimplify"])
     #
     chain, _ = build_opt_chain(metainterp_sd, "")
-    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    check(chain, ["OptSimplify"])
     #
     chain, _ = build_opt_chain(metainterp_sd, "")
-    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    check(chain, ["OptSimplify"])
     #
     chain, _ = build_opt_chain(metainterp_sd, "heap:intbounds")
-    check(chain, ["OptInlineShortPreamble", "OptIntBounds", "OptHeap", "OptSimplify"])
+    check(chain, ["OptIntBounds", "OptHeap", "OptSimplify"])
     #
     chain, unroll = build_opt_chain(metainterp_sd, "unroll")
-    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    check(chain, ["OptSimplify"])
     assert unroll
     #
-    chain, _ = build_opt_chain(metainterp_sd, "aaa:bbb", inline_short_preamble=False)
+    chain, _ = build_opt_chain(metainterp_sd, "aaa:bbb")
     check(chain, ["OptSimplify"])
     #
-    chain, _ = build_opt_chain(metainterp_sd, "ffi", inline_short_preamble=False)
+    chain, _ = build_opt_chain(metainterp_sd, "ffi")
     check(chain, ["OptFfiCall", "OptSimplify"])
     #
     metainterp_sd.config = get_pypy_config(translating=True)
     assert not metainterp_sd.config.translation.jit_ffi
-    chain, _ = build_opt_chain(metainterp_sd, "ffi", inline_short_preamble=False)
+    chain, _ = build_opt_chain(metainterp_sd, "ffi")
     check(chain, ["OptSimplify"])
 
 
 # ____________________________________________________________
 
 
-class FakeDescr(compile.ResumeGuardDescr):
-    class rd_snapshot:
-        class prev:
-            prev = None
-            boxes = []
-        boxes = []
-    def clone_if_mutable(self):
-        return FakeDescr()
-    def __eq__(self, other):
-        return isinstance(other, Storage) or isinstance(other, FakeDescr)
-
-
 class BaseTestWithUnroll(BaseTest):
 
     enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
@@ -79,40 +67,41 @@
             expected_preamble = self.parse(expected_preamble)
         if expected_short:
             expected_short = self.parse(expected_short)
-        loop.preamble = TreeLoop('preamble')
-        loop.preamble.inputargs = loop.inputargs
-        loop.preamble.token = LoopToken()
-        loop.preamble.start_resumedescr = FakeDescr()
-        #
-        self._do_optimize_loop(loop, call_pure_results)
+
+        preamble = self.unroll_and_optimize(loop, call_pure_results)
+        
         #
         print
         print "Preamble:"
-        print loop.preamble.inputargs
-        if loop.preamble.operations:
-            print '\n'.join([str(o) for o in loop.preamble.operations])
+        if preamble.operations:
+            print '\n'.join([str(o) for o in preamble.operations])
         else:
             print 'Failed!'
         print
         print "Loop:"
-        print loop.inputargs
         print '\n'.join([str(o) for o in loop.operations])
         print
         if expected_short:
             print "Short Preamble:"
-            short = loop.preamble.token.short_preamble[0]
-            print short.inputargs
-            print '\n'.join([str(o) for o in short.operations])
+            short = loop.operations[0].getdescr().short_preamble
+            print '\n'.join([str(o) for o in short])
             print
 
         assert expected != "crash!", "should have raised an exception"
-        self.assert_equal(loop, expected)
+        self.assert_equal(loop, convert_old_style_to_targets(expected, jump=True))
+        assert loop.operations[0].getdescr() == loop.operations[-1].getdescr()
         if expected_preamble:
-            self.assert_equal(loop.preamble, expected_preamble,
+            self.assert_equal(preamble, convert_old_style_to_targets(expected_preamble, jump=False),
                               text_right='expected preamble')
+            assert preamble.operations[-1].getdescr() == loop.operations[0].getdescr()
         if expected_short:
-            self.assert_equal(short, expected_short,
+            short_preamble = TreeLoop('short preamble')
+            assert short[0].getopnum() == rop.LABEL
+            short_preamble.inputargs = short[0].getarglist()
+            short_preamble.operations = short
+            self.assert_equal(short_preamble, convert_old_style_to_targets(expected_short, jump=True),
                               text_right='expected short preamble')
+            assert short[-1].getdescr() == loop.operations[0].getdescr()
 
         return loop
 
@@ -234,7 +223,7 @@
             """ % expected_value
             self.optimize_loop(ops, expected)
 
-    def test_reverse_of_cast(self):
+    def test_reverse_of_cast_1(self):
         ops = """
         [i0]
         p0 = cast_int_to_ptr(i0)
@@ -246,6 +235,8 @@
         jump(i0)
         """
         self.optimize_loop(ops, expected)
+
+    def test_reverse_of_cast_2(self):        
         ops = """
         [p0]
         i1 = cast_ptr_to_int(p0)
@@ -931,17 +922,14 @@
         [i]
         guard_no_exception() []
         i1 = int_add(i, 3)
-        guard_no_exception() []
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
-        guard_no_exception() []
         i3 = call(i2, descr=nonwritedescr)
         jump(i1)       # the exception is considered lost when we loop back
         """
-        # note that 'guard_no_exception' at the very start is kept around
-        # for bridges, but not for loops
         preamble = """
         [i]
+        guard_no_exception() []    # occurs at the start of bridges, so keep it
         i1 = int_add(i, 3)
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
@@ -950,6 +938,7 @@
         """
         expected = """
         [i]
+        guard_no_exception() []    # occurs at the start of bridges, so keep it
         i1 = int_add(i, 3)
         i2 = call(i1, descr=nonwritedescr)
         guard_no_exception() [i1, i2]
@@ -958,6 +947,23 @@
         """
         self.optimize_loop(ops, expected, preamble)
 
+    def test_bug_guard_no_exception(self):
+        ops = """
+        []
+        i0 = call(123, descr=nonwritedescr)
+        p0 = call(0, "xy", descr=s2u_descr)      # string -> unicode
+        guard_no_exception() []
+        escape(p0)
+        jump()
+        """
+        expected = """
+        []
+        i0 = call(123, descr=nonwritedescr)
+        escape(u"xy")
+        jump()
+        """
+        self.optimize_loop(ops, expected)
+
     # ----------
 
     def test_call_loopinvariant(self):
@@ -1166,6 +1172,7 @@
         i1 = getfield_gc(p0, descr=valuedescr)
         i2 = int_sub(i1, 1)
         i3 = int_add(i0, i1)
+        i4 = same_as(i2) # This same_as should be killed by backend
         jump(i3, i2, i1)
         """
         expected = """
@@ -1176,6 +1183,75 @@
         """
         self.optimize_loop(ops, expected, preamble)
 
+    def test_virtual_recursive(self):
+        ops = """
+        [p0]
+        p41 = getfield_gc(p0, descr=nextdescr)
+        i0 = getfield_gc(p41, descr=valuedescr)
+        p1 = new_with_vtable(ConstClass(node_vtable2))
+        p2 = new_with_vtable(ConstClass(node_vtable2))
+        setfield_gc(p2, p1, descr=nextdescr)
+        setfield_gc(p1, p2, descr=nextdescr)
+        i1 = int_add(i0, 1)
+        setfield_gc(p2, i1, descr=valuedescr)
+        jump(p1)
+        """
+        preamble = """
+        [p0]
+        p41 = getfield_gc(p0, descr=nextdescr)
+        i0 = getfield_gc(p41, descr=valuedescr)
+        i3 = int_add(i0, 1)
+        jump(i3)
+        """
+        expected = """
+        [i0]
+        i1 = int_add(i0, 1)
+        jump(i1)
+        """
+        self.optimize_loop(ops, expected, preamble)
+
+    def test_virtual_recursive_forced(self):
+        ops = """
+        [p0]
+        p41 = getfield_gc(p0, descr=nextdescr)
+        i0 = getfield_gc(p41, descr=valuedescr)
+        p1 = new_with_vtable(ConstClass(node_vtable2))
+        p2 = new_with_vtable(ConstClass(node_vtable2))
+        setfield_gc(p2, p1, descr=nextdescr)
+        setfield_gc(p1, p2, descr=nextdescr)
+        i1 = int_add(i0, 1)
+        setfield_gc(p2, i1, descr=valuedescr)
+        setfield_gc(p0, p1, descr=nextdescr)
+        jump(p1)
+        """
+        preamble = """
+        [p0]
+        p41 = getfield_gc(p0, descr=nextdescr)
+        i0 = getfield_gc(p41, descr=valuedescr)
+        i1 = int_add(i0, 1)
+        p1 = new_with_vtable(ConstClass(node_vtable2))
+        p2 = new_with_vtable(ConstClass(node_vtable2))
+        setfield_gc(p2, i1, descr=valuedescr)
+        setfield_gc(p2, p1, descr=nextdescr)
+        setfield_gc(p1, p2, descr=nextdescr)
+        setfield_gc(p0, p1, descr=nextdescr)
+        jump(p1)
+        """
+        loop = """
+        [p0]
+        p41 = getfield_gc(p0, descr=nextdescr)
+        i0 = getfield_gc(p41, descr=valuedescr)
+        i1 = int_add(i0, 1)
+        p1 = new_with_vtable(ConstClass(node_vtable2))
+        p2 = new_with_vtable(ConstClass(node_vtable2))
+        setfield_gc(p2, i1, descr=valuedescr)
+        setfield_gc(p2, p1, descr=nextdescr)
+        setfield_gc(p1, p2, descr=nextdescr)
+        setfield_gc(p0, p1, descr=nextdescr)
+        jump(p1)
+        """
+        self.optimize_loop(ops, loop, preamble)
+
     def test_virtual_constant_isnull(self):
         ops = """
         [i0]
@@ -1233,6 +1309,7 @@
         p30 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p30, i28, descr=nextdescr)
         setfield_gc(p3, p30, descr=valuedescr)
+        p46 = same_as(p30) # This same_as should be killed by backend        
         jump(i29, p30, p3)
         """
         expected = """
@@ -1240,8 +1317,8 @@
         i28 = int_add(i0, 1)
         i29 = int_add(i28, 1)
         p30 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p30, i28, descr=nextdescr)
         setfield_gc(p3, p30, descr=valuedescr)
-        setfield_gc(p30, i28, descr=nextdescr)
         jump(i29, p30, p3)
         """
         self.optimize_loop(ops, expected, preamble)
@@ -2034,7 +2111,9 @@
         guard_true(i3) []
         i4 = int_neg(i2)
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, i4, i4)
+        i7 = same_as(i2) # This same_as should be killed by backend
+        i6 = same_as(i4)
+        jump(p1, i1, i2, i4, i6)
         """
         expected = """
         [p1, i1, i2, i4, i5]
@@ -2064,7 +2143,8 @@
         i4 = int_neg(i2)
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, i4, i4)
+        i5 = same_as(i4)
+        jump(p1, i2, i4, i5)
         """
         expected = """
         [p1, i2, i4, i5]
@@ -2093,7 +2173,8 @@
         i4 = int_neg(i2)
         setfield_gc(p1, NULL, descr=nextdescr)
         escape()
-        jump(p1, i2, i4, i4)
+        i5 = same_as(i4)
+        jump(p1, i2, i4, i5)
         """
         expected = """
         [p1, i2, i4, i5]
@@ -2123,7 +2204,9 @@
         guard_true(i5) []
         i4 = int_neg(i2)
         setfield_gc(p1, i2, descr=valuedescr)
-        jump(p1, i1, i2, i4, i4)
+        i8 = same_as(i2) # This same_as should be killed by backend
+        i7 = same_as(i4)
+        jump(p1, i1, i2, i4, i7)
         """
         expected = """
         [p1, i1, i2, i4, i7]
@@ -2168,13 +2251,13 @@
         ops = """
         [p0, i0, p1, i1, i2]
         setfield_gc(p0, i1, descr=valuedescr)
-        copystrcontent(p0, i0, p1, i1, i2)
+        copystrcontent(p0, p1, i0, i1, i2)
         escape()
         jump(p0, i0, p1, i1, i2)
         """
         expected = """
         [p0, i0, p1, i1, i2]
-        copystrcontent(p0, i0, p1, i1, i2)
+        copystrcontent(p0, p1, i0, i1, i2)
         setfield_gc(p0, i1, descr=valuedescr)
         escape()
         jump(p0, i0, p1, i1, i2)
@@ -2349,7 +2432,8 @@
         p2 = new_with_vtable(ConstClass(node_vtable))
         setfield_gc(p2, p4, descr=nextdescr)
         setfield_gc(p1, p2, descr=nextdescr)
-        jump(p1, i2, i4, p4, i4)
+        i101 = same_as(i4) 
+        jump(p1, i2, i4, p4, i101)
         """
         expected = """
         [p1, i2, i4, p4, i5]
@@ -3192,7 +3276,15 @@
         setfield_gc(p1, i3, descr=valuedescr)
         jump(p1, i4, i3)
         '''
-        self.optimize_loop(ops, ops, ops)
+        preamble = '''
+        [p1, i1, i4]