[pypy-commit] pypy py3k: hg merge default

Sat Jan 14 21:48:46 CET 2012

Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3k
Changeset: r51330:86189f3364a6
Date: 2012-01-14 21:45 +0100
http://bitbucket.org/pypy/pypy/changeset/86189f3364a6/

Log:	hg merge default

diff too long, truncating to 10000 out of 21522 lines

diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -27,7 +27,7 @@
     DEALINGS IN THE SOFTWARE.
 
 
-PyPy Copyright holders 2003-2011
+PyPy Copyright holders 2003-2012
 ----------------------------------- 
 
 Except when otherwise stated (look for LICENSE files or information at
@@ -37,43 +37,47 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Amaury Forgeot d'Arc
     Antonio Cuni
-    Amaury Forgeot d'Arc
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
-    Benjamin Peterson
+    Alex Gaynor
     Christian Tismer
     Hakan Ardo
-    Alex Gaynor
+    Benjamin Peterson
+    David Schneider
     Eric van Riet Paap
     Anders Chrigstrom
-    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
+    Lukas Diekmann
     Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
+    Sven Hager
     Leonardo Santagada
     Toon Verwaest
     Seo Sanghyeon
+    Justin Peel
     Lawrence Oluyede
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
     Daniel Roberts
+    Laura Creighton
     Adrien Di Mascio
-    Laura Creighton
     Ludovic Aubry
     Niko Matsakis
+    Wim Lavrijsen
+    Matti Picus
     Jason Creighton
     Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
-    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -84,34 +88,36 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
-    Justin Peel
+    David Edelsohn
     Jean-Paul Calderone
     John Witulski
-    Lukas Diekmann
+    Timo Paulssen
     holger krekel
-    Wim Lavrijsen
     Dario Bertini
+    Mark Pearse
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
     Pavel Vinogradov
     Valentino Volonghi
     Paul deGrandis
+    Ilya Osadchiy
+    Ronny Pfannschmidt
     Adrian Kuhn
     tav
     Georg Brandl
+    Philip Jenvey
     Gerald Klix
     Wanja Saatkamp
-    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
     David Malcolm
     Eugene Oden
     Henry Mason
-    Sven Hager
+    Jeff Terrace
     Lukas Renggli
-    Ilya Osadchiy
     Guenter Jantzen
+    Ned Batchelder
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -142,7 +148,6 @@
     Anders Qvist
     Beatrice During
     Alexander Sedov
-    Timo Paulssen
     Corbin Simpson
     Vincent Legoll
     Romain Guillebert
@@ -165,9 +170,10 @@
     Lucio Torre
     Lene Wagner
     Miguel de Val Borro
+    Artur Lisiecki
+    Bruno Gola
     Ignas Mikalajunas
-    Artur Lisiecki
-    Philip Jenvey
+    Stefano Rivera
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -179,17 +185,17 @@
     Kristjan Valur Jonsson
     Bobby Impollonia
     Michael Hudson-Doyle
+    Laurence Tratt
+    Yasir Suhail
     Andrew Thompson
     Anders Sigfridsson
     Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
     Zooko Wilcox-O Hearn
-    Dan Villiom Podlaski Christiansen
-    Anders Hammarquist
+    Dan Loewenherz
     Chris Lambacher
     Dinu Gherman
-    Dan Colish
     Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -73,8 +73,12 @@
 
 class Field(object):
     def __init__(self, name, offset, size, ctype, num, is_bitfield):
-        for k in ('name', 'offset', 'size', 'ctype', 'num', 'is_bitfield'):
-            self.__dict__[k] = locals()[k]
+        self.__dict__['name'] = name
+        self.__dict__['offset'] = offset
+        self.__dict__['size'] = size
+        self.__dict__['ctype'] = ctype
+        self.__dict__['num'] = num
+        self.__dict__['is_bitfield'] = is_bitfield
 
     def __setattr__(self, name, value):
         raise AttributeError(name)
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -231,8 +231,10 @@
 sqlite.sqlite3_result_text.argtypes = [c_void_p, c_char_p, c_int, c_void_p]
 sqlite.sqlite3_result_text.restype = None
 
-sqlite.sqlite3_enable_load_extension.argtypes = [c_void_p, c_int]
-sqlite.sqlite3_enable_load_extension.restype = c_int
+HAS_LOAD_EXTENSION = hasattr(sqlite, "sqlite3_enable_load_extension")
+if HAS_LOAD_EXTENSION:
+    sqlite.sqlite3_enable_load_extension.argtypes = [c_void_p, c_int]
+    sqlite.sqlite3_enable_load_extension.restype = c_int
 
 ##########################################
 # END Wrapped SQLite C API and constants
@@ -708,13 +710,14 @@
         from sqlite3.dump import _iterdump
         return _iterdump(self)
 
-    def enable_load_extension(self, enabled):
-        self._check_thread()
-        self._check_closed()
+    if HAS_LOAD_EXTENSION:
+        def enable_load_extension(self, enabled):
+            self._check_thread()
+            self._check_closed()
 
-        rc = sqlite.sqlite3_enable_load_extension(self.db, int(enabled))
-        if rc != SQLITE_OK:
-            raise OperationalError("Error enabling load extension")
+            rc = sqlite.sqlite3_enable_load_extension(self.db, int(enabled))
+            if rc != SQLITE_OK:
+                raise OperationalError("Error enabling load extension")
 
 DML, DQL, DDL = range(3)
 
diff --git a/lib_pypy/numpypy/__init__.py b/lib_pypy/numpypy/__init__.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/numpypy/__init__.py
@@ -0,0 +1,2 @@
+from _numpypy import *
+from .fromnumeric import *
diff --git a/lib_pypy/numpypy/fromnumeric.py b/lib_pypy/numpypy/fromnumeric.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/numpypy/fromnumeric.py
@@ -0,0 +1,2400 @@
+######################################################################    
+# This is a copy of numpy/core/fromnumeric.py modified for numpypy
+######################################################################
+# Each name in __all__ was a function in  'numeric' that is now 
+# a method in 'numpy'.
+# When the corresponding method is added to numpypy BaseArray
+# each function should be added as a module function 
+# at the applevel 
+# This can be as simple as doing the following
+#
+# def func(a, ...):
+#     if not hasattr(a, 'func')
+#         a = numpypy.array(a)
+#     return a.func(...)
+#
+######################################################################
+
+import numpypy
+
+# Module containing non-deprecated functions borrowed from Numeric.
+__docformat__ = "restructuredtext en"
+
+# functions that are now methods
+__all__ = ['take', 'reshape', 'choose', 'repeat', 'put',
+           'swapaxes', 'transpose', 'sort', 'argsort', 'argmax', 'argmin',
+           'searchsorted', 'alen',
+           'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape',
+           'compress', 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue',
+           'any', 'all', 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim',
+           'rank', 'size', 'around', 'round_', 'mean', 'std', 'var', 'squeeze',
+           'amax', 'amin',
+          ]
+          
+def take(a, indices, axis=None, out=None, mode='raise'):
+    """
+    Take elements from an array along an axis.
+
+    This function does the same thing as "fancy" indexing (indexing arrays
+    using arrays); however, it can be easier to use if you need elements
+    along a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        The source array.
+    indices : array_like
+        The indices of the values to extract.
+    axis : int, optional
+        The axis over which to select values. By default, the flattened
+        input array is used.
+    out : ndarray, optional
+        If provided, the result will be placed in this array. It should
+        be of the appropriate shape and dtype.
+    mode : {'raise', 'wrap', 'clip'}, optional
+        Specifies how out-of-bounds indices will behave.
+
+        * 'raise' -- raise an error (default)
+        * 'wrap' -- wrap around
+        * 'clip' -- clip to the range
+
+        'clip' mode means that all indices that are too large are replaced
+        by the index that addresses the last element along that axis. Note
+        that this disables indexing with negative numbers.
+
+    Returns
+    -------
+    subarray : ndarray
+        The returned array has the same type as `a`.
+
+    See Also
+    --------
+    ndarray.take : equivalent method
+
+    Examples
+    --------
+    >>> a = [4, 3, 5, 7, 6, 8]
+    >>> indices = [0, 1, 4]
+    >>> np.take(a, indices)
+    array([4, 3, 6])
+
+    In this example if `a` is an ndarray, "fancy" indexing can be used.
+
+    >>> a = np.array(a)
+    >>> a[indices]
+    array([4, 3, 6])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+# not deprecated --- copy if necessary, view otherwise
+def reshape(a, newshape, order='C'):
+    """
+    Gives a new shape to an array without changing its data.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to be reshaped.
+    newshape : int or tuple of ints
+        The new shape should be compatible with the original shape. If
+        an integer, then the result will be a 1-D array of that length.
+        One shape dimension can be -1. In this case, the value is inferred
+        from the length of the array and remaining dimensions.
+    order : {'C', 'F', 'A'}, optional
+        Determines whether the array data should be viewed as in C
+        (row-major) order, FORTRAN (column-major) order, or the C/FORTRAN
+        order should be preserved.
+
+    Returns
+    -------
+    reshaped_array : ndarray
+        This will be a new view object if possible; otherwise, it will
+        be a copy.
+
+
+    See Also
+    --------
+    ndarray.reshape : Equivalent method.
+
+    Notes
+    -----
+
+    It is not always possible to change the shape of an array without
+    copying the data. If you want an error to be raise if the data is copied,
+    you should assign the new shape to the shape attribute of the array::
+
+     >>> a = np.zeros((10, 2))
+     # A transpose make the array non-contiguous
+     >>> b = a.T
+     # Taking a view makes it possible to modify the shape without modiying the
+     # initial object.
+     >>> c = b.view()
+     >>> c.shape = (20)
+     AttributeError: incompatible shape for a non-contiguous array
+
+
+    Examples
+    --------
+    >>> a = np.array([[1,2,3], [4,5,6]])
+    >>> np.reshape(a, 6)
+    array([1, 2, 3, 4, 5, 6])
+    >>> np.reshape(a, 6, order='F')
+    array([1, 4, 2, 5, 3, 6])
+
+    >>> np.reshape(a, (3,-1))       # the unspecified value is inferred to be 2
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+
+    """
+    if not hasattr(a, 'reshape'):
+       a = numpypy.array(a)
+    return a.reshape(newshape)
+
+
+def choose(a, choices, out=None, mode='raise'):
+    """
+    Construct an array from an index array and a set of arrays to choose from.
+
+    First of all, if confused or uncertain, definitely look at the Examples -
+    in its full generality, this function is less simple than it might
+    seem from the following code description (below ndi =
+    `numpy.lib.index_tricks`):
+
+    ``np.choose(a,c) == np.array([c[a[I]][I] for I in ndi.ndindex(a.shape)])``.
+
+    But this omits some subtleties.  Here is a fully general summary:
+
+    Given an "index" array (`a`) of integers and a sequence of `n` arrays
+    (`choices`), `a` and each choice array are first broadcast, as necessary,
+    to arrays of a common shape; calling these *Ba* and *Bchoices[i], i =
+    0,...,n-1* we have that, necessarily, ``Ba.shape == Bchoices[i].shape``
+    for each `i`.  Then, a new array with shape ``Ba.shape`` is created as
+    follows:
+
+    * if ``mode=raise`` (the default), then, first of all, each element of
+      `a` (and thus `Ba`) must be in the range `[0, n-1]`; now, suppose that
+      `i` (in that range) is the value at the `(j0, j1, ..., jm)` position
+      in `Ba` - then the value at the same position in the new array is the
+      value in `Bchoices[i]` at that same position;
+
+    * if ``mode=wrap``, values in `a` (and thus `Ba`) may be any (signed)
+      integer; modular arithmetic is used to map integers outside the range
+      `[0, n-1]` back into that range; and then the new array is constructed
+      as above;
+
+    * if ``mode=clip``, values in `a` (and thus `Ba`) may be any (signed)
+      integer; negative integers are mapped to 0; values greater than `n-1`
+      are mapped to `n-1`; and then the new array is constructed as above.
+
+    Parameters
+    ----------
+    a : int array
+        This array must contain integers in `[0, n-1]`, where `n` is the number
+        of choices, unless ``mode=wrap`` or ``mode=clip``, in which cases any
+        integers are permissible.
+    choices : sequence of arrays
+        Choice arrays. `a` and all of the choices must be broadcastable to the
+        same shape.  If `choices` is itself an array (not recommended), then
+        its outermost dimension (i.e., the one corresponding to
+        ``choices.shape[0]``) is taken as defining the "sequence".
+    out : array, optional
+        If provided, the result will be inserted into this array. It should
+        be of the appropriate shape and dtype.
+    mode : {'raise' (default), 'wrap', 'clip'}, optional
+        Specifies how indices outside `[0, n-1]` will be treated:
+
+          * 'raise' : an exception is raised
+          * 'wrap' : value becomes value mod `n`
+          * 'clip' : values < 0 are mapped to 0, values > n-1 are mapped to n-1
+
+    Returns
+    -------
+    merged_array : array
+        The merged result.
+
+    Raises
+    ------
+    ValueError: shape mismatch
+        If `a` and each choice array are not all broadcastable to the same
+        shape.
+
+    See Also
+    --------
+    ndarray.choose : equivalent method
+
+    Notes
+    -----
+    To reduce the chance of misinterpretation, even though the following
+    "abuse" is nominally supported, `choices` should neither be, nor be
+    thought of as, a single array, i.e., the outermost sequence-like container
+    should be either a list or a tuple.
+
+    Examples
+    --------
+
+    >>> choices = [[0, 1, 2, 3], [10, 11, 12, 13],
+    ...   [20, 21, 22, 23], [30, 31, 32, 33]]
+    >>> np.choose([2, 3, 1, 0], choices
+    ... # the first element of the result will be the first element of the
+    ... # third (2+1) "array" in choices, namely, 20; the second element
+    ... # will be the second element of the fourth (3+1) choice array, i.e.,
+    ... # 31, etc.
+    ... )
+    array([20, 31, 12,  3])
+    >>> np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1)
+    array([20, 31, 12,  3])
+    >>> # because there are 4 choice arrays
+    >>> np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4)
+    array([20,  1, 12,  3])
+    >>> # i.e., 0
+
+    A couple examples illustrating how choose broadcasts:
+
+    >>> a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]]
+    >>> choices = [-10, 10]
+    >>> np.choose(a, choices)
+    array([[ 10, -10,  10],
+           [-10,  10, -10],
+           [ 10, -10,  10]])
+
+    >>> # With thanks to Anne Archibald
+    >>> a = np.array([0, 1]).reshape((2,1,1))
+    >>> c1 = np.array([1, 2, 3]).reshape((1,3,1))
+    >>> c2 = np.array([-1, -2, -3, -4, -5]).reshape((1,1,5))
+    >>> np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2
+    array([[[ 1,  1,  1,  1,  1],
+            [ 2,  2,  2,  2,  2],
+            [ 3,  3,  3,  3,  3]],
+           [[-1, -2, -3, -4, -5],
+            [-1, -2, -3, -4, -5],
+            [-1, -2, -3, -4, -5]]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def repeat(a, repeats, axis=None):
+    """
+    Repeat elements of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    repeats : {int, array of ints}
+        The number of repetitions for each element.  `repeats` is broadcasted
+        to fit the shape of the given axis.
+    axis : int, optional
+        The axis along which to repeat values.  By default, use the
+        flattened input array, and return a flat output array.
+
+    Returns
+    -------
+    repeated_array : ndarray
+        Output array which has the same shape as `a`, except along
+        the given axis.
+
+    See Also
+    --------
+    tile : Tile an array.
+
+    Examples
+    --------
+    >>> x = np.array([[1,2],[3,4]])
+    >>> np.repeat(x, 2)
+    array([1, 1, 2, 2, 3, 3, 4, 4])
+    >>> np.repeat(x, 3, axis=1)
+    array([[1, 1, 1, 2, 2, 2],
+           [3, 3, 3, 4, 4, 4]])
+    >>> np.repeat(x, [1, 2], axis=0)
+    array([[1, 2],
+           [3, 4],
+           [3, 4]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def put(a, ind, v, mode='raise'):
+    """
+    Replaces specified elements of an array with given values.
+
+    The indexing works on the flattened target array. `put` is roughly
+    equivalent to:
+
+    ::
+
+        a.flat[ind] = v
+
+    Parameters
+    ----------
+    a : ndarray
+        Target array.
+    ind : array_like
+        Target indices, interpreted as integers.
+    v : array_like
+        Values to place in `a` at target indices. If `v` is shorter than
+        `ind` it will be repeated as necessary.
+    mode : {'raise', 'wrap', 'clip'}, optional
+        Specifies how out-of-bounds indices will behave.
+
+        * 'raise' -- raise an error (default)
+        * 'wrap' -- wrap around
+        * 'clip' -- clip to the range
+
+        'clip' mode means that all indices that are too large are replaced
+        by the index that addresses the last element along that axis. Note
+        that this disables indexing with negative numbers.
+
+    See Also
+    --------
+    putmask, place
+
+    Examples
+    --------
+    >>> a = np.arange(5)
+    >>> np.put(a, [0, 2], [-44, -55])
+    >>> a
+    array([-44,   1, -55,   3,   4])
+
+    >>> a = np.arange(5)
+    >>> np.put(a, 22, -5, mode='clip')
+    >>> a
+    array([ 0,  1,  2,  3, -5])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def swapaxes(a, axis1, axis2):
+    """
+    Interchange two axes of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis1 : int
+        First axis.
+    axis2 : int
+        Second axis.
+
+    Returns
+    -------
+    a_swapped : ndarray
+        If `a` is an ndarray, then a view of `a` is returned; otherwise
+        a new array is created.
+
+    Examples
+    --------
+    >>> x = np.array([[1,2,3]])
+    >>> np.swapaxes(x,0,1)
+    array([[1],
+           [2],
+           [3]])
+
+    >>> x = np.array([[[0,1],[2,3]],[[4,5],[6,7]]])
+    >>> x
+    array([[[0, 1],
+            [2, 3]],
+           [[4, 5],
+            [6, 7]]])
+
+    >>> np.swapaxes(x,0,2)
+    array([[[0, 4],
+            [2, 6]],
+           [[1, 5],
+            [3, 7]]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def transpose(a, axes=None):
+    """
+    Permute the dimensions of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axes : list of ints, optional
+        By default, reverse the dimensions, otherwise permute the axes
+        according to the values given.
+
+    Returns
+    -------
+    p : ndarray
+        `a` with its axes permuted.  A view is returned whenever
+        possible.
+
+    See Also
+    --------
+    rollaxis
+
+    Examples
+    --------
+    >>> x = np.arange(4).reshape((2,2))
+    >>> x
+    array([[0, 1],
+           [2, 3]])
+
+    >>> np.transpose(x)
+    array([[0, 2],
+           [1, 3]])
+
+    >>> x = np.ones((1, 2, 3))
+    >>> np.transpose(x, (1, 0, 2)).shape
+    (2, 1, 3)
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def sort(a, axis=-1, kind='quicksort', order=None):
+    """
+    Return a sorted copy of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to be sorted.
+    axis : int or None, optional
+        Axis along which to sort. If None, the array is flattened before
+        sorting. The default is -1, which sorts along the last axis.
+    kind : {'quicksort', 'mergesort', 'heapsort'}, optional
+        Sorting algorithm. Default is 'quicksort'.
+    order : list, optional
+        When `a` is a structured array, this argument specifies which fields
+        to compare first, second, and so on.  This list does not need to
+        include all of the fields.
+
+    Returns
+    -------
+    sorted_array : ndarray
+        Array of the same type and shape as `a`.
+
+    See Also
+    --------
+    ndarray.sort : Method to sort an array in-place.
+    argsort : Indirect sort.
+    lexsort : Indirect stable sort on multiple keys.
+    searchsorted : Find elements in a sorted array.
+
+    Notes
+    -----
+    The various sorting algorithms are characterized by their average speed,
+    worst case performance, work space size, and whether they are stable. A
+    stable sort keeps items with the same key in the same relative
+    order. The three available algorithms have the following
+    properties:
+
+    =========== ======= ============= ============ =======
+       kind      speed   worst case    work space  stable
+    =========== ======= ============= ============ =======
+    'quicksort'    1     O(n^2)            0          no
+    'mergesort'    2     O(n*log(n))      ~n/2        yes
+    'heapsort'     3     O(n*log(n))       0          no
+    =========== ======= ============= ============ =======
+
+    All the sort algorithms make temporary copies of the data when
+    sorting along any but the last axis.  Consequently, sorting along
+    the last axis is faster and uses less space than sorting along
+    any other axis.
+
+    The sort order for complex numbers is lexicographic. If both the real
+    and imaginary parts are non-nan then the order is determined by the
+    real parts except when they are equal, in which case the order is
+    determined by the imaginary parts.
+
+    Previous to numpy 1.4.0 sorting real and complex arrays containing nan
+    values led to undefined behaviour. In numpy versions >= 1.4.0 nan
+    values are sorted to the end. The extended sort order is:
+
+      * Real: [R, nan]
+      * Complex: [R + Rj, R + nanj, nan + Rj, nan + nanj]
+
+    where R is a non-nan real value. Complex values with the same nan
+    placements are sorted according to the non-nan part if it exists.
+    Non-nan values are sorted as before.
+
+    Examples
+    --------
+    >>> a = np.array([[1,4],[3,1]])
+    >>> np.sort(a)                # sort along the last axis
+    array([[1, 4],
+           [1, 3]])
+    >>> np.sort(a, axis=None)     # sort the flattened array
+    array([1, 1, 3, 4])
+    >>> np.sort(a, axis=0)        # sort along the first axis
+    array([[1, 1],
+           [3, 4]])
+
+    Use the `order` keyword to specify a field to use when sorting a
+    structured array:
+
+    >>> dtype = [('name', 'S10'), ('height', float), ('age', int)]
+    >>> values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38),
+    ...           ('Galahad', 1.7, 38)]
+    >>> a = np.array(values, dtype=dtype)       # create a structured array
+    >>> np.sort(a, order='height')                        # doctest: +SKIP
+    array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
+           ('Lancelot', 1.8999999999999999, 38)],
+          dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])
+
+    Sort by age, then height if ages are equal:
+
+    >>> np.sort(a, order=['age', 'height'])               # doctest: +SKIP
+    array([('Galahad', 1.7, 38), ('Lancelot', 1.8999999999999999, 38),
+           ('Arthur', 1.8, 41)],
+          dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def argsort(a, axis=-1, kind='quicksort', order=None):
+    """
+    Returns the indices that would sort an array.
+
+    Perform an indirect sort along the given axis using the algorithm specified
+    by the `kind` keyword. It returns an array of indices of the same shape as
+    `a` that index data along the given axis in sorted order.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to sort.
+    axis : int or None, optional
+        Axis along which to sort.  The default is -1 (the last axis). If None,
+        the flattened array is used.
+    kind : {'quicksort', 'mergesort', 'heapsort'}, optional
+        Sorting algorithm.
+    order : list, optional
+        When `a` is an array with fields defined, this argument specifies
+        which fields to compare first, second, etc.  Not all fields need be
+        specified.
+
+    Returns
+    -------
+    index_array : ndarray, int
+        Array of indices that sort `a` along the specified axis.
+        In other words, ``a[index_array]`` yields a sorted `a`.
+
+    See Also
+    --------
+    sort : Describes sorting algorithms used.
+    lexsort : Indirect stable sort with multiple keys.
+    ndarray.sort : Inplace sort.
+
+    Notes
+    -----
+    See `sort` for notes on the different sorting algorithms.
+
+    As of NumPy 1.4.0 `argsort` works with real/complex arrays containing
+    nan values. The enhanced sort order is documented in `sort`.
+
+    Examples
+    --------
+    One dimensional array:
+
+    >>> x = np.array([3, 1, 2])
+    >>> np.argsort(x)
+    array([1, 2, 0])
+
+    Two-dimensional array:
+
+    >>> x = np.array([[0, 3], [2, 2]])
+    >>> x
+    array([[0, 3],
+           [2, 2]])
+
+    >>> np.argsort(x, axis=0)
+    array([[0, 1],
+           [1, 0]])
+
+    >>> np.argsort(x, axis=1)
+    array([[0, 1],
+           [0, 1]])
+
+    Sorting with keys:
+
+    >>> x = np.array([(1, 0), (0, 1)], dtype=[('x', '<i4'), ('y', '<i4')])
+    >>> x
+    array([(1, 0), (0, 1)],
+          dtype=[('x', '<i4'), ('y', '<i4')])
+
+    >>> np.argsort(x, order=('x','y'))
+    array([1, 0])
+
+    >>> np.argsort(x, order=('y','x'))
+    array([0, 1])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def argmax(a, axis=None):
+    """
+    Indices of the maximum values along an axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int, optional
+        By default, the index is into the flattened array, otherwise
+        along the specified axis.
+
+    Returns
+    -------
+    index_array : ndarray of ints
+        Array of indices into the array. It has the same shape as `a.shape`
+        with the dimension along `axis` removed.
+
+    See Also
+    --------
+    ndarray.argmax, argmin
+    amax : The maximum value along a given axis.
+    unravel_index : Convert a flat index into an index tuple.
+
+    Notes
+    -----
+    In case of multiple occurrences of the maximum values, the indices
+    corresponding to the first occurrence are returned.
+
+    Examples
+    --------
+    >>> a = np.arange(6).reshape(2,3)
+    >>> a
+    array([[0, 1, 2],
+           [3, 4, 5]])
+    >>> np.argmax(a)
+    5
+    >>> np.argmax(a, axis=0)
+    array([1, 1, 1])
+    >>> np.argmax(a, axis=1)
+    array([2, 2])
+
+    >>> b = np.arange(6)
+    >>> b[1] = 5
+    >>> b
+    array([0, 5, 2, 3, 4, 5])
+    >>> np.argmax(b) # Only the first occurrence is returned.
+    1
+
+    """
+    if not hasattr(a, 'argmax'):
+        a = numpypy.array(a)
+    return a.argmax()
+
+
+def argmin(a, axis=None):
+    """
+    Return the indices of the minimum values along an axis.
+
+    See Also
+    --------
+    argmax : Similar function.  Please refer to `numpy.argmax` for detailed
+        documentation.
+
+    """
+    if not hasattr(a, 'argmin'):
+        a = numpypy.array(a)
+    return a.argmin()
+
+
+def searchsorted(a, v, side='left'):
+    """
+    Find indices where elements should be inserted to maintain order.
+
+    Find the indices into a sorted array `a` such that, if the corresponding
+    elements in `v` were inserted before the indices, the order of `a` would
+    be preserved.
+
+    Parameters
+    ----------
+    a : 1-D array_like
+        Input array, sorted in ascending order.
+    v : array_like
+        Values to insert into `a`.
+    side : {'left', 'right'}, optional
+        If 'left', the index of the first suitable location found is given.  If
+        'right', return the last such index.  If there is no suitable
+        index, return either 0 or N (where N is the length of `a`).
+
+    Returns
+    -------
+    indices : array of ints
+        Array of insertion points with the same shape as `v`.
+
+    See Also
+    --------
+    sort : Return a sorted copy of an array.
+    histogram : Produce histogram from 1-D data.
+
+    Notes
+    -----
+    Binary search is used to find the required insertion points.
+
+    As of Numpy 1.4.0 `searchsorted` works with real/complex arrays containing
+    `nan` values. The enhanced sort order is documented in `sort`.
+
+    Examples
+    --------
+    >>> np.searchsorted([1,2,3,4,5], 3)
+    2
+    >>> np.searchsorted([1,2,3,4,5], 3, side='right')
+    3
+    >>> np.searchsorted([1,2,3,4,5], [-10, 10, 2, 3])
+    array([0, 5, 1, 2])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def resize(a, new_shape):
+    """
+    Return a new array with the specified shape.
+
+    If the new array is larger than the original array, then the new
+    array is filled with repeated copies of `a`.  Note that this behavior
+    is different from a.resize(new_shape) which fills with zeros instead
+    of repeated copies of `a`.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to be resized.
+
+    new_shape : int or tuple of int
+        Shape of resized array.
+
+    Returns
+    -------
+    reshaped_array : ndarray
+        The new array is formed from the data in the old array, repeated
+        if necessary to fill out the required number of elements.  The
+        data are repeated in the order that they are stored in memory.
+
+    See Also
+    --------
+    ndarray.resize : resize an array in-place.
+
+    Examples
+    --------
+    >>> a=np.array([[0,1],[2,3]])
+    >>> np.resize(a,(1,4))
+    array([[0, 1, 2, 3]])
+    >>> np.resize(a,(2,4))
+    array([[0, 1, 2, 3],
+           [0, 1, 2, 3]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def squeeze(a):
+    """
+    Remove single-dimensional entries from the shape of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+
+    Returns
+    -------
+    squeezed : ndarray
+        The input array, but with with all dimensions of length 1
+        removed.  Whenever possible, a view on `a` is returned.
+
+    Examples
+    --------
+    >>> x = np.array([[[0], [1], [2]]])
+    >>> x.shape
+    (1, 3, 1)
+    >>> np.squeeze(x).shape
+    (3,)
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def diagonal(a, offset=0, axis1=0, axis2=1):
+    """
+    Return specified diagonals.
+
+    If `a` is 2-D, returns the diagonal of `a` with the given offset,
+    i.e., the collection of elements of the form ``a[i, i+offset]``.  If
+    `a` has more than two dimensions, then the axes specified by `axis1`
+    and `axis2` are used to determine the 2-D sub-array whose diagonal is
+    returned.  The shape of the resulting array can be determined by
+    removing `axis1` and `axis2` and appending an index to the right equal
+    to the size of the resulting diagonals.
+
+    Parameters
+    ----------
+    a : array_like
+        Array from which the diagonals are taken.
+    offset : int, optional
+        Offset of the diagonal from the main diagonal.  Can be positive or
+        negative.  Defaults to main diagonal (0).
+    axis1 : int, optional
+        Axis to be used as the first axis of the 2-D sub-arrays from which
+        the diagonals should be taken.  Defaults to first axis (0).
+    axis2 : int, optional
+        Axis to be used as the second axis of the 2-D sub-arrays from
+        which the diagonals should be taken. Defaults to second axis (1).
+
+    Returns
+    -------
+    array_of_diagonals : ndarray
+        If `a` is 2-D, a 1-D array containing the diagonal is returned.
+        If the dimension of `a` is larger, then an array of diagonals is
+        returned, "packed" from left-most dimension to right-most (e.g.,
+        if `a` is 3-D, then the diagonals are "packed" along rows).
+
+    Raises
+    ------
+    ValueError
+        If the dimension of `a` is less than 2.
+
+    See Also
+    --------
+    diag : MATLAB work-a-like for 1-D and 2-D arrays.
+    diagflat : Create diagonal arrays.
+    trace : Sum along diagonals.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape(2,2)
+    >>> a
+    array([[0, 1],
+           [2, 3]])
+    >>> a.diagonal()
+    array([0, 3])
+    >>> a.diagonal(1)
+    array([1])
+
+    A 3-D example:
+
+    >>> a = np.arange(8).reshape(2,2,2); a
+    array([[[0, 1],
+            [2, 3]],
+           [[4, 5],
+            [6, 7]]])
+    >>> a.diagonal(0, # Main diagonals of two arrays created by skipping
+    ...            0, # across the outer(left)-most axis last and
+    ...            1) # the "middle" (row) axis first.
+    array([[0, 6],
+           [1, 7]])
+
+    The sub-arrays whose main diagonals we just obtained; note that each
+    corresponds to fixing the right-most (column) axis, and that the
+    diagonals are "packed" in rows.
+
+    >>> a[:,:,0] # main diagonal is [0 6]
+    array([[0, 2],
+           [4, 6]])
+    >>> a[:,:,1] # main diagonal is [1 7]
+    array([[1, 3],
+           [5, 7]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None):
+    """
+    Return the sum along diagonals of the array.
+
+    If `a` is 2-D, the sum along its diagonal with the given offset
+    is returned, i.e., the sum of elements ``a[i,i+offset]`` for all i.
+
+    If `a` has more than two dimensions, then the axes specified by axis1 and
+    axis2 are used to determine the 2-D sub-arrays whose traces are returned.
+    The shape of the resulting array is the same as that of `a` with `axis1`
+    and `axis2` removed.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array, from which the diagonals are taken.
+    offset : int, optional
+        Offset of the diagonal from the main diagonal. Can be both positive
+        and negative. Defaults to 0.
+    axis1, axis2 : int, optional
+        Axes to be used as the first and second axis of the 2-D sub-arrays
+        from which the diagonals should be taken. Defaults are the first two
+        axes of `a`.
+    dtype : dtype, optional
+        Determines the data-type of the returned array and of the accumulator
+        where the elements are summed. If dtype has the value None and `a` is
+        of integer type of precision less than the default integer
+        precision, then the default integer precision is used. Otherwise,
+        the precision is the same as that of `a`.
+    out : ndarray, optional
+        Array into which the output is placed. Its type is preserved and
+        it must be of the right shape to hold the output.
+
+    Returns
+    -------
+    sum_along_diagonals : ndarray
+        If `a` is 2-D, the sum along the diagonal is returned.  If `a` has
+        larger dimensions, then an array of sums along diagonals is returned.
+
+    See Also
+    --------
+    diag, diagonal, diagflat
+
+    Examples
+    --------
+    >>> np.trace(np.eye(3))
+    3.0
+    >>> a = np.arange(8).reshape((2,2,2))
+    >>> np.trace(a)
+    array([6, 8])
+
+    >>> a = np.arange(24).reshape((2,2,2,3))
+    >>> np.trace(a).shape
+    (2, 3)
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+def ravel(a, order='C'):
+    """
+    Return a flattened array.
+
+    A 1-D array, containing the elements of the input, is returned.  A copy is
+    made only if needed.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.  The elements in ``a`` are read in the order specified by
+        `order`, and packed as a 1-D array.
+    order : {'C','F', 'A', 'K'}, optional
+        The elements of ``a`` are read in this order. 'C' means to view
+        the elements in C (row-major) order. 'F' means to view the elements
+        in Fortran (column-major) order. 'A' means to view the elements
+        in 'F' order if a is Fortran contiguous, 'C' order otherwise.
+        'K' means to view the elements in the order they occur in memory,
+        except for reversing the data when strides are negative.
+        By default, 'C' order is used.
+
+    Returns
+    -------
+    1d_array : ndarray
+        Output of the same dtype as `a`, and of shape ``(a.size(),)``.
+
+    See Also
+    --------
+    ndarray.flat : 1-D iterator over an array.
+    ndarray.flatten : 1-D array copy of the elements of an array
+                      in row-major order.
+
+    Notes
+    -----
+    In row-major order, the row index varies the slowest, and the column
+    index the quickest.  This can be generalized to multiple dimensions,
+    where row-major order implies that the index along the first axis
+    varies slowest, and the index along the last quickest.  The opposite holds
+    for Fortran-, or column-major, mode.
+
+    Examples
+    --------
+    It is equivalent to ``reshape(-1, order=order)``.
+
+    >>> x = np.array([[1, 2, 3], [4, 5, 6]])
+    >>> print np.ravel(x)
+    [1 2 3 4 5 6]
+
+    >>> print x.reshape(-1)
+    [1 2 3 4 5 6]
+
+    >>> print np.ravel(x, order='F')
+    [1 4 2 5 3 6]
+
+    When ``order`` is 'A', it will preserve the array's 'C' or 'F' ordering:
+
+    >>> print np.ravel(x.T)
+    [1 4 2 5 3 6]
+    >>> print np.ravel(x.T, order='A')
+    [1 2 3 4 5 6]
+
+    When ``order`` is 'K', it will preserve orderings that are neither 'C'
+    nor 'F', but won't reverse axes:
+
+    >>> a = np.arange(3)[::-1]; a
+    array([2, 1, 0])
+    >>> a.ravel(order='C')
+    array([2, 1, 0])
+    >>> a.ravel(order='K')
+    array([2, 1, 0])
+
+    >>> a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a
+    array([[[ 0,  2,  4],
+            [ 1,  3,  5]],
+           [[ 6,  8, 10],
+            [ 7,  9, 11]]])
+    >>> a.ravel(order='C')
+    array([ 0,  2,  4,  1,  3,  5,  6,  8, 10,  7,  9, 11])
+    >>> a.ravel(order='K')
+    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def nonzero(a):
+    """
+    Return the indices of the elements that are non-zero.
+
+    Returns a tuple of arrays, one for each dimension of `a`, containing
+    the indices of the non-zero elements in that dimension. The
+    corresponding non-zero values can be obtained with::
+
+        a[nonzero(a)]
+
+    To group the indices by element, rather than dimension, use::
+
+        transpose(nonzero(a))
+
+    The result of this is always a 2-D array, with a row for
+    each non-zero element.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+
+    Returns
+    -------
+    tuple_of_arrays : tuple
+        Indices of elements that are non-zero.
+
+    See Also
+    --------
+    flatnonzero :
+        Return indices that are non-zero in the flattened version of the input
+        array.
+    ndarray.nonzero :
+        Equivalent ndarray method.
+    count_nonzero :
+        Counts the number of non-zero elements in the input array.
+
+    Examples
+    --------
+    >>> x = np.eye(3)
+    >>> x
+    array([[ 1.,  0.,  0.],
+           [ 0.,  1.,  0.],
+           [ 0.,  0.,  1.]])
+    >>> np.nonzero(x)
+    (array([0, 1, 2]), array([0, 1, 2]))
+
+    >>> x[np.nonzero(x)]
+    array([ 1.,  1.,  1.])
+    >>> np.transpose(np.nonzero(x))
+    array([[0, 0],
+           [1, 1],
+           [2, 2]])
+
+    A common use for ``nonzero`` is to find the indices of an array, where
+    a condition is True.  Given an array `a`, the condition `a` > 3 is a
+    boolean array and since False is interpreted as 0, np.nonzero(a > 3)
+    yields the indices of the `a` where the condition is true.
+
+    >>> a = np.array([[1,2,3],[4,5,6],[7,8,9]])
+    >>> a > 3
+    array([[False, False, False],
+           [ True,  True,  True],
+           [ True,  True,  True]], dtype=bool)
+    >>> np.nonzero(a > 3)
+    (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))
+
+    The ``nonzero`` method of the boolean array can also be called.
+
+    >>> (a > 3).nonzero()
+    (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def shape(a):
+    """
+    Return the shape of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+
+    Returns
+    -------
+    shape : tuple of ints
+        The elements of the shape tuple give the lengths of the
+        corresponding array dimensions.
+
+    See Also
+    --------
+    alen
+    ndarray.shape : Equivalent array method.
+
+    Examples
+    --------
+    >>> np.shape(np.eye(3))
+    (3, 3)
+    >>> np.shape([[1, 2]])
+    (1, 2)
+    >>> np.shape([0])
+    (1,)
+    >>> np.shape(0)
+    ()
+
+    >>> a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
+    >>> np.shape(a)
+    (2,)
+    >>> a.shape
+    (2,)
+
+    """
+    if not hasattr(a, 'shape'):
+        a = numpypy.array(a)
+    return a.shape
+
+
+def compress(condition, a, axis=None, out=None):
+    """
+    Return selected slices of an array along given axis.
+
+    When working along a given axis, a slice along that axis is returned in
+    `output` for each index where `condition` evaluates to True. When
+    working on a 1-D array, `compress` is equivalent to `extract`.
+
+    Parameters
+    ----------
+    condition : 1-D array of bools
+        Array that selects which entries to return. If len(condition)
+        is less than the size of `a` along the given axis, then output is
+        truncated to the length of the condition array.
+    a : array_like
+        Array from which to extract a part.
+    axis : int, optional
+        Axis along which to take slices. If None (default), work on the
+        flattened array.
+    out : ndarray, optional
+        Output array.  Its type is preserved and it must be of the right
+        shape to hold the output.
+
+    Returns
+    -------
+    compressed_array : ndarray
+        A copy of `a` without the slices along axis for which `condition`
+        is false.
+
+    See Also
+    --------
+    take, choose, diag, diagonal, select
+    ndarray.compress : Equivalent method.
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4], [5, 6]])
+    >>> a
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+    >>> np.compress([0, 1], a, axis=0)
+    array([[3, 4]])
+    >>> np.compress([False, True, True], a, axis=0)
+    array([[3, 4],
+           [5, 6]])
+    >>> np.compress([False, True], a, axis=1)
+    array([[2],
+           [4],
+           [6]])
+
+    Working on the flattened array does not return slices along an axis but
+    selects elements.
+
+    >>> np.compress([False, True], a)
+    array([2])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def clip(a, a_min, a_max, out=None):
+    """
+    Clip (limit) the values in an array.
+
+    Given an interval, values outside the interval are clipped to
+    the interval edges.  For example, if an interval of ``[0, 1]``
+    is specified, values smaller than 0 become 0, and values larger
+    than 1 become 1.
+
+    Parameters
+    ----------
+    a : array_like
+        Array containing elements to clip.
+    a_min : scalar or array_like
+        Minimum value.
+    a_max : scalar or array_like
+        Maximum value.  If `a_min` or `a_max` are array_like, then they will
+        be broadcasted to the shape of `a`.
+    out : ndarray, optional
+        The results will be placed in this array. It may be the input
+        array for in-place clipping.  `out` must be of the right shape
+        to hold the output.  Its type is preserved.
+
+    Returns
+    -------
+    clipped_array : ndarray
+        An array with the elements of `a`, but where values
+        < `a_min` are replaced with `a_min`, and those > `a_max`
+        with `a_max`.
+
+    See Also
+    --------
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Examples
+    --------
+    >>> a = np.arange(10)
+    >>> np.clip(a, 1, 8)
+    array([1, 1, 2, 3, 4, 5, 6, 7, 8, 8])
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.clip(a, 3, 6, out=a)
+    array([3, 3, 3, 3, 4, 5, 6, 6, 6, 6])
+    >>> a = np.arange(10)
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.clip(a, [3,4,1,1,1,4,4,4,4,4], 8)
+    array([3, 4, 2, 3, 4, 5, 6, 7, 8, 8])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def sum(a, axis=None, dtype=None, out=None):
+    """
+    Sum of array elements over a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Elements to sum.
+    axis : integer, optional
+        Axis over which the sum is taken. By default `axis` is None,
+        and all elements are summed.
+    dtype : dtype, optional
+        The type of the returned array and of the accumulator in which
+        the elements are summed.  By default, the dtype of `a` is used.
+        An exception is when `a` has an integer type with less precision
+        than the default platform integer.  In that case, the default
+        platform integer is used instead.
+    out : ndarray, optional
+        Array into which the output is placed.  By default, a new array is
+        created.  If `out` is given, it must be of the appropriate shape
+        (the shape of `a` with `axis` removed, i.e.,
+        ``numpy.delete(a.shape, axis)``).  Its type is preserved. See
+        `doc.ufuncs` (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    sum_along_axis : ndarray
+        An array with the same shape as `a`, with the specified
+        axis removed.   If `a` is a 0-d array, or if `axis` is None, a scalar
+        is returned.  If an output array is specified, a reference to
+        `out` is returned.
+
+    See Also
+    --------
+    ndarray.sum : Equivalent method.
+
+    cumsum : Cumulative sum of array elements.
+
+    trapz : Integration of array values using the composite trapezoidal rule.
+
+    mean, average
+
+    Notes
+    -----
+    Arithmetic is modular when using integer types, and no error is
+    raised on overflow.
+
+    Examples
+    --------
+    >>> np.sum([0.5, 1.5])
+    2.0
+    >>> np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32)
+    1
+    >>> np.sum([[0, 1], [0, 5]])
+    6
+    >>> np.sum([[0, 1], [0, 5]], axis=0)
+    array([0, 6])
+    >>> np.sum([[0, 1], [0, 5]], axis=1)
+    array([1, 5])
+
+    If the accumulator is too small, overflow occurs:
+
+    >>> np.ones(128, dtype=np.int8).sum(dtype=np.int8)
+    -128
+
+    """
+    if not hasattr(a, "sum"):
+        a = numpypy.array(a)
+    return a.sum()
+
+
+def product (a, axis=None, dtype=None, out=None):
+    """
+    Return the product of array elements over a given axis.
+
+    See Also
+    --------
+    prod : equivalent function; see for details.
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def sometrue(a, axis=None, out=None):
+    """
+    Check whether some values are true.
+
+    Refer to `any` for full documentation.
+
+    See Also
+    --------
+    any : equivalent function
+
+    """
+    if not hasattr(a, 'any'):
+        a = numpypy.array(a)
+    return a.any()
+
+
+def alltrue (a, axis=None, out=None):
+    """
+    Check if all elements of input array are true.
+
+    See Also
+    --------
+    numpy.all : Equivalent function; see for details.
+
+    """
+    if not hasattr(a, 'all'):
+        a = numpypy.array(a)
+    return a.all()
+
+def any(a,axis=None, out=None):
+    """
+    Test whether any array element along a given axis evaluates to True.
+
+    Returns single boolean unless `axis` is not ``None``
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    axis : int, optional
+        Axis along which a logical OR is performed.  The default
+        (`axis` = `None`) is to perform a logical OR over a flattened
+        input array. `axis` may be negative, in which case it counts
+        from the last to the first axis.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  It must have
+        the same shape as the expected output and its type is preserved
+        (e.g., if it is of type float, then it will remain so, returning
+        1.0 for True and 0.0 for False, regardless of the type of `a`).
+        See `doc.ufuncs` (Section "Output arguments") for details.
+
+    Returns
+    -------
+    any : bool or ndarray
+        A new boolean or `ndarray` is returned unless `out` is specified,
+        in which case a reference to `out` is returned.
+
+    See Also
+    --------
+    ndarray.any : equivalent method
+
+    all : Test whether all elements along a given axis evaluate to True.
+
+    Notes
+    -----
+    Not a Number (NaN), positive infinity and negative infinity evaluate
+    to `True` because these are not equal to zero.
+
+    Examples
+    --------
+    >>> np.any([[True, False], [True, True]])
+    True
+
+    >>> np.any([[True, False], [False, False]], axis=0)
+    array([ True, False], dtype=bool)
+
+    >>> np.any([-1, 0, 5])
+    True
+
+    >>> np.any(np.nan)
+    True
+
+    >>> o=np.array([False])
+    >>> z=np.any([-1, 4, 5], out=o)
+    >>> z, o
+    (array([ True], dtype=bool), array([ True], dtype=bool))
+    >>> # Check now that z is a reference to o
+    >>> z is o
+    True
+    >>> id(z), id(o) # identity of z and o              # doctest: +SKIP
+    (191614240, 191614240)
+
+    """
+    if not hasattr(a, 'any'):
+        a = numpypy.array(a)
+    return a.any()
+
+
+def all(a,axis=None, out=None):
+    """
+    Test whether all array elements along a given axis evaluate to True.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    axis : int, optional
+        Axis along which a logical AND is performed.
+        The default (`axis` = `None`) is to perform a logical AND
+        over a flattened input array.  `axis` may be negative, in which
+        case it counts from the last to the first axis.
+    out : ndarray, optional
+        Alternate output array in which to place the result.
+        It must have the same shape as the expected output and its
+        type is preserved (e.g., if ``dtype(out)`` is float, the result
+        will consist of 0.0's and 1.0's).  See `doc.ufuncs` (Section
+        "Output arguments") for more details.
+
+    Returns
+    -------
+    all : ndarray, bool
+        A new boolean or array is returned unless `out` is specified,
+        in which case a reference to `out` is returned.
+
+    See Also
+    --------
+    ndarray.all : equivalent method
+
+    any : Test whether any element along a given axis evaluates to True.
+
+    Notes
+    -----
+    Not a Number (NaN), positive infinity and negative infinity
+    evaluate to `True` because these are not equal to zero.
+
+    Examples
+    --------
+    >>> np.all([[True,False],[True,True]])
+    False
+
+    >>> np.all([[True,False],[True,True]], axis=0)
+    array([ True, False], dtype=bool)
+
+    >>> np.all([-1, 4, 5])
+    True
+
+    >>> np.all([1.0, np.nan])
+    True
+
+    >>> o=np.array([False])
+    >>> z=np.all([-1, 4, 5], out=o)
+    >>> id(z), id(o), z                             # doctest: +SKIP
+    (28293632, 28293632, array([ True], dtype=bool))
+
+    """
+    if not hasattr(a, 'all'):
+        a = numpypy.array(a)
+    return a.all()
+
+
+def cumsum (a, axis=None, dtype=None, out=None):
+    """
+    Return the cumulative sum of the elements along a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the cumulative sum is computed. The default
+        (None) is to compute the cumsum over the flattened array.
+    dtype : dtype, optional
+        Type of the returned array and of the accumulator in which the
+        elements are summed.  If `dtype` is not specified, it defaults
+        to the dtype of `a`, unless `a` has an integer dtype with a
+        precision less than that of the default platform integer.  In
+        that case, the default platform integer is used.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output
+        but the type will be cast if necessary. See `doc.ufuncs`
+        (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    cumsum_along_axis : ndarray.
+        A new array holding the result is returned unless `out` is
+        specified, in which case a reference to `out` is returned. The
+        result has the same size as `a`, and the same shape as `a` if
+        `axis` is not None or `a` is a 1-d array.
+
+
+    See Also
+    --------
+    sum : Sum array elements.
+
+    trapz : Integration of array values using the composite trapezoidal rule.
+
+    Notes
+    -----
+    Arithmetic is modular when using integer types, and no error is
+    raised on overflow.
+
+    Examples
+    --------
+    >>> a = np.array([[1,2,3], [4,5,6]])
+    >>> a
+    array([[1, 2, 3],
+           [4, 5, 6]])
+    >>> np.cumsum(a)
+    array([ 1,  3,  6, 10, 15, 21])
+    >>> np.cumsum(a, dtype=float)     # specifies type of output value(s)
+    array([  1.,   3.,   6.,  10.,  15.,  21.])
+
+    >>> np.cumsum(a,axis=0)      # sum over rows for each of the 3 columns
+    array([[1, 2, 3],
+           [5, 7, 9]])
+    >>> np.cumsum(a,axis=1)      # sum over columns for each of the 2 rows
+    array([[ 1,  3,  6],
+           [ 4,  9, 15]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def cumproduct(a, axis=None, dtype=None, out=None):
+    """
+    Return the cumulative product over the given axis.
+
+
+    See Also
+    --------
+    cumprod : equivalent function; see for details.
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def ptp(a, axis=None, out=None):
+    """
+    Range of values (maximum - minimum) along an axis.
+
+    The name of the function comes from the acronym for 'peak to peak'.
+
+    Parameters
+    ----------
+    a : array_like
+        Input values.
+    axis : int, optional
+        Axis along which to find the peaks.  By default, flatten the
+        array.
+    out : array_like
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output,
+        but the type of the output values will be cast if necessary.
+
+    Returns
+    -------
+    ptp : ndarray
+        A new array holding the result, unless `out` was
+        specified, in which case a reference to `out` is returned.
+
+    Examples
+    --------
+    >>> x = np.arange(4).reshape((2,2))
+    >>> x
+    array([[0, 1],
+           [2, 3]])
+
+    >>> np.ptp(x, axis=0)
+    array([2, 2])
+
+    >>> np.ptp(x, axis=1)
+    array([1, 1])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def amax(a, axis=None, out=None):
+    """
+    Return the maximum of an array or maximum along an axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis along which to operate.  By default flattened input is used.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  Must be of
+        the same shape and buffer length as the expected output.  See
+        `doc.ufuncs` (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    amax : ndarray or scalar
+        Maximum of `a`. If `axis` is None, the result is a scalar value.
+        If `axis` is given, the result is an array of dimension
+        ``a.ndim - 1``.
+
+    See Also
+    --------
+    nanmax : NaN values are ignored instead of being propagated.
+    fmax : same behavior as the C99 fmax function.
+    argmax : indices of the maximum values.
+
+    Notes
+    -----
+    NaN values are propagated, that is if at least one item is NaN, the
+    corresponding max value will be NaN as well.  To ignore NaN values
+    (MATLAB behavior), please use nanmax.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape((2,2))
+    >>> a
+    array([[0, 1],
+           [2, 3]])
+    >>> np.amax(a)
+    3
+    >>> np.amax(a, axis=0)
+    array([2, 3])
+    >>> np.amax(a, axis=1)
+    array([1, 3])
+
+    >>> b = np.arange(5, dtype=np.float)
+    >>> b[2] = np.NaN
+    >>> np.amax(b)
+    nan
+    >>> np.nanmax(b)
+    4.0
+
+    """
+    if not hasattr(a, "max"):
+        a = numpypy.array(a)
+    return a.max()
+
+
+def amin(a, axis=None, out=None):
+    """
+    Return the minimum of an array or minimum along an axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis along which to operate.  By default a flattened input is used.
+    out : ndarray, optional
+        Alternative output array in which to place the result.  Must
+        be of the same shape and buffer length as the expected output.
+        See `doc.ufuncs` (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    amin : ndarray
+        A new array or a scalar array with the result.
+
+    See Also
+    --------
+    nanmin: nan values are ignored instead of being propagated
+    fmin: same behavior as the C99 fmin function
+    argmin: Return the indices of the minimum values.
+
+    amax, nanmax, fmax
+
+    Notes
+    -----
+    NaN values are propagated, that is if at least one item is nan, the
+    corresponding min value will be nan as well. To ignore NaN values (matlab
+    behavior), please use nanmin.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape((2,2))
+    >>> a
+    array([[0, 1],
+           [2, 3]])
+    >>> np.amin(a)           # Minimum of the flattened array
+    0
+    >>> np.amin(a, axis=0)         # Minima along the first axis
+    array([0, 1])
+    >>> np.amin(a, axis=1)         # Minima along the second axis
+    array([0, 2])
+
+    >>> b = np.arange(5, dtype=np.float)
+    >>> b[2] = np.NaN
+    >>> np.amin(b)
+    nan
+    >>> np.nanmin(b)
+    0.0
+
+    """
+    # amin() is equivalent to min()
+    if not hasattr(a, 'min'):
+        a = numpypy.array(a)
+    return a.min()
+
+def alen(a):
+    """
+    Return the length of the first dimension of the input array.
+
+    Parameters
+    ----------
+    a : array_like
+       Input array.
+
+    Returns
+    -------
+    l : int
+       Length of the first dimension of `a`.
+
+    See Also
+    --------
+    shape, size
+
+    Examples
+    --------
+    >>> a = np.zeros((7,4,5))
+    >>> a.shape[0]
+    7
+    >>> np.alen(a)
+    7
+
+    """
+    if not hasattr(a, 'shape'):
+        a = numpypy.array(a)
+    return a.shape[0]
+
+
+def prod(a, axis=None, dtype=None, out=None):
+    """
+    Return the product of array elements over a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis over which the product is taken.  By default, the product
+        of all elements is calculated.
+    dtype : data-type, optional
+        The data-type of the returned array, as well as of the accumulator
+        in which the elements are multiplied.  By default, if `a` is of
+        integer type, `dtype` is the default platform integer. (Note: if
+        the type of `a` is unsigned, then so is `dtype`.)  Otherwise,
+        the dtype is the same as that of `a`.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output, but the type of the
+        output values will be cast if necessary.
+
+    Returns
+    -------
+    product_along_axis : ndarray, see `dtype` parameter above.
+        An array shaped as `a` but with the specified axis removed.
+        Returns a reference to `out` if specified.
+
+    See Also
+    --------
+    ndarray.prod : equivalent method
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Notes
+    -----
+    Arithmetic is modular when using integer types, and no error is
+    raised on overflow.  That means that, on a 32-bit platform:
+
+    >>> x = np.array([536870910, 536870910, 536870910, 536870910])
+    >>> np.prod(x) #random
+    16
+
+    Examples
+    --------
+    By default, calculate the product of all elements:
+
+    >>> np.prod([1.,2.])
+    2.0
+
+    Even when the input array is two-dimensional:
+
+    >>> np.prod([[1.,2.],[3.,4.]])
+    24.0
+
+    But we can also specify the axis over which to multiply:
+
+    >>> np.prod([[1.,2.],[3.,4.]], axis=1)
+    array([  2.,  12.])
+
+    If the type of `x` is unsigned, then the output type is
+    the unsigned platform integer:
+
+    >>> x = np.array([1, 2, 3], dtype=np.uint8)
+    >>> np.prod(x).dtype == np.uint
+    True
+
+    If `x` is of a signed integer type, then the output type
+    is the default platform integer:
+
+    >>> x = np.array([1, 2, 3], dtype=np.int8)
+    >>> np.prod(x).dtype == np.int
+    True
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def cumprod(a, axis=None, dtype=None, out=None):
+    """
+    Return the cumulative product of elements along a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the cumulative product is computed.  By default
+        the input is flattened.
+    dtype : dtype, optional
+        Type of the returned array, as well as of the accumulator in which
+        the elements are multiplied.  If *dtype* is not specified, it
+        defaults to the dtype of `a`, unless `a` has an integer dtype with
+        a precision less than that of the default platform integer.  In
+        that case, the default platform integer is used instead.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output
+        but the type of the resulting values will be cast if necessary.
+
+    Returns
+    -------
+    cumprod : ndarray
+        A new array holding the result is returned unless `out` is
+        specified, in which case a reference to out is returned.
+
+    See Also
+    --------
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Notes
+    -----
+    Arithmetic is modular when using integer types, and no error is
+    raised on overflow.
+
+    Examples
+    --------
+    >>> a = np.array([1,2,3])
+    >>> np.cumprod(a) # intermediate results 1, 1*2
+    ...               # total product 1*2*3 = 6
+    array([1, 2, 6])
+    >>> a = np.array([[1, 2, 3], [4, 5, 6]])
+    >>> np.cumprod(a, dtype=float) # specify type of output
+    array([   1.,    2.,    6.,   24.,  120.,  720.])
+
+    The cumulative product for each column (i.e., over the rows) of `a`:
+
+    >>> np.cumprod(a, axis=0)
+    array([[ 1,  2,  3],
+           [ 4, 10, 18]])
+
+    The cumulative product for each row (i.e. over the columns) of `a`:
+
+    >>> np.cumprod(a,axis=1)
+    array([[  1,   2,   6],
+           [  4,  20, 120]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def ndim(a):
+    """
+    Return the number of dimensions of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.  If it is not already an ndarray, a conversion is
+        attempted.
+
+    Returns
+    -------
+    number_of_dimensions : int
+        The number of dimensions in `a`.  Scalars are zero-dimensional.
+
+    See Also
+    --------
+    ndarray.ndim : equivalent method
+    shape : dimensions of array
+    ndarray.shape : dimensions of array
+
+    Examples
+    --------
+    >>> np.ndim([[1,2,3],[4,5,6]])
+    2
+    >>> np.ndim(np.array([[1,2,3],[4,5,6]]))
+    2
+    >>> np.ndim(1)
+    0
+
+    """
+    if not hasattr(a, 'ndim'):
+        a = numpypy.array(a)
+    return a.ndim
+
+
+def rank(a):
+    """
+    Return the number of dimensions of an array.
+
+    If `a` is not already an array, a conversion is attempted.
+    Scalars are zero dimensional.
+
+    Parameters
+    ----------
+    a : array_like
+        Array whose number of dimensions is desired. If `a` is not an array,
+        a conversion is attempted.
+
+    Returns
+    -------
+    number_of_dimensions : int
+        The number of dimensions in the array.
+
+    See Also
+    --------
+    ndim : equivalent function
+    ndarray.ndim : equivalent property
+    shape : dimensions of array
+    ndarray.shape : dimensions of array
+
+    Notes
+    -----
+    In the old Numeric package, `rank` was the term used for the number of
+    dimensions, but in Numpy `ndim` is used instead.
+
+    Examples
+    --------
+    >>> np.rank([1,2,3])
+    1
+    >>> np.rank(np.array([[1,2,3],[4,5,6]]))
+    2
+    >>> np.rank(1)
+    0
+
+    """
+    if not hasattr(a, 'ndim'):
+        a = numpypy.array(a)
+    return a.ndim
+
+
+def size(a, axis=None):
+    """
+    Return the number of elements along a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis along which the elements are counted.  By default, give
+        the total number of elements.
+
+    Returns
+    -------
+    element_count : int
+        Number of elements along the specified axis.
+
+    See Also
+    --------
+    shape : dimensions of array
+    ndarray.shape : dimensions of array
+    ndarray.size : number of elements in array
+
+    Examples
+    --------
+    >>> a = np.array([[1,2,3],[4,5,6]])
+    >>> np.size(a)
+    6
+    >>> np.size(a,1)
+    3
+    >>> np.size(a,0)
+    2
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def around(a, decimals=0, out=None):
+    """
+    Evenly round to the given number of decimals.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    decimals : int, optional
+        Number of decimal places to round to (default: 0).  If
+        decimals is negative, it specifies the number of positions to
+        the left of the decimal point.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output, but the type of the output
+        values will be cast if necessary. See `doc.ufuncs` (Section
+        "Output arguments") for details.
+
+    Returns
+    -------
+    rounded_array : ndarray
+        An array of the same type as `a`, containing the rounded values.
+        Unless `out` was specified, a new array is created.  A reference to
+        the result is returned.
+
+        The real and imaginary parts of complex numbers are rounded
+        separately.  The result of rounding a float is a float.
+
+    See Also
+    --------
+    ndarray.round : equivalent method
+
+    ceil, fix, floor, rint, trunc
+
+
+    Notes
+    -----
+    For values exactly halfway between rounded decimal values, Numpy
+    rounds to the nearest even value. Thus 1.5 and 2.5 round to 2.0,
+    -0.5 and 0.5 round to 0.0, etc. Results may also be surprising due
+    to the inexact representation of decimal fractions in the IEEE
+    floating point standard [1]_ and errors introduced when scaling
+    by powers of ten.
+
+    References
+    ----------
+    .. [1] "Lecture Notes on the Status of  IEEE 754", William Kahan,
+           http://www.cs.berkeley.edu/~wkahan/ieee754status/IEEE754.PDF
+    .. [2] "How Futile are Mindless Assessments of
+           Roundoff in Floating-Point Computation?", William Kahan,
+           http://www.cs.berkeley.edu/~wkahan/Mindless.pdf
+
+    Examples
+    --------
+    >>> np.around([0.37, 1.64])
+    array([ 0.,  2.])
+    >>> np.around([0.37, 1.64], decimals=1)
+    array([ 0.4,  1.6])
+    >>> np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value
+    array([ 0.,  2.,  2.,  4.,  4.])
+    >>> np.around([1,2,3,11], decimals=1) # ndarray of ints is returned
+    array([ 1,  2,  3, 11])
+    >>> np.around([1,2,3,11], decimals=-1)
+    array([ 0,  0,  0, 10])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def round_(a, decimals=0, out=None):
+    """
+    Round an array to the given number of decimals.
+
+    Refer to `around` for full documentation.
+
+    See Also
+    --------
+    around : equivalent function
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def mean(a, axis=None, dtype=None, out=None):
+    """
+    Compute the arithmetic mean along the specified axis.
+
+    Returns the average of the array elements.  The average is taken over
+    the flattened array by default, otherwise over the specified axis.
+    `float64` intermediate and return values are used for integer inputs.
+
+    Parameters
+    ----------
+    a : array_like
+        Array containing numbers whose mean is desired. If `a` is not an
+        array, a conversion is attempted.
+    axis : int, optional
+        Axis along which the means are computed. The default is to compute
+        the mean of the flattened array.
+    dtype : data-type, optional
+        Type to use in computing the mean.  For integer inputs, the default
+        is `float64`; for floating point inputs, it is the same as the
+        input dtype.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  The default
+        is ``None``; if provided, it must have the same shape as the
+        expected output, but the type will be cast if necessary.
+        See `doc.ufuncs` for details.
+
+    Returns
+    -------
+    m : ndarray, see dtype parameter above
+        If `out=None`, returns a new array containing the mean values,
+        otherwise a reference to the output array is returned.
+
+    See Also
+    --------
+    average : Weighted average
+
+    Notes
+    -----
+    The arithmetic mean is the sum of the elements along the axis divided
+    by the number of elements.
+
+    Note that for floating-point input, the mean is computed using the
+    same precision the input has.  Depending on the input data, this can
+    cause the results to be inaccurate, especially for `float32` (see
+    example below).  Specifying a higher-precision accumulator using the
+    `dtype` keyword can alleviate this issue.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> np.mean(a)
+    2.5
+    >>> np.mean(a, axis=0)
+    array([ 2.,  3.])
+    >>> np.mean(a, axis=1)
+    array([ 1.5,  3.5])
+
+    In single precision, `mean` can be inaccurate:
+
+    >>> a = np.zeros((2, 512*512), dtype=np.float32)
+    >>> a[0, :] = 1.0
+    >>> a[1, :] = 0.1
+    >>> np.mean(a)
+    0.546875
+
+    Computing the mean in float64 is more accurate:
+
+    >>> np.mean(a, dtype=np.float64)
+    0.55000000074505806
+
+    """
+    if not hasattr(a, "mean"):
+        a = numpypy.array(a)
+    return a.mean()
+
+
+def std(a, axis=None, dtype=None, out=None, ddof=0):
+    """
+    Compute the standard deviation along the specified axis.
+
+    Returns the standard deviation, a measure of the spread of a distribution,
+    of the array elements. The standard deviation is computed for the
+    flattened array by default, otherwise over the specified axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Calculate the standard deviation of these values.
+    axis : int, optional
+        Axis along which the standard deviation is computed. The default is
+        to compute the standard deviation of the flattened array.
+    dtype : dtype, optional
+        Type to use in computing the standard deviation. For arrays of
+        integer type the default is float64, for arrays of float types it is
+        the same as the array type.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output but the type (of the calculated
+        values) will be cast if necessary.
+    ddof : int, optional
+        Means Delta Degrees of Freedom.  The divisor used in calculations
+        is ``N - ddof``, where ``N`` represents the number of elements.
+        By default `ddof` is zero.
+
+    Returns
+    -------
+    standard_deviation : ndarray, see dtype parameter above.
+        If `out` is None, return a new array containing the standard deviation,
+        otherwise return a reference to the output array.
+
+    See Also
+    --------
+    var, mean
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Notes
+    -----
+    The standard deviation is the square root of the average of the squared
+    deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``.
+
+    The average squared deviation is normally calculated as ``x.sum() / N``, where
+    ``N = len(x)``.  If, however, `ddof` is specified, the divisor ``N - ddof``
+    is used instead. In standard statistical practice, ``ddof=1`` provides an
+    unbiased estimator of the variance of the infinite population. ``ddof=0``
+    provides a maximum likelihood estimate of the variance for normally
+    distributed variables. The standard deviation computed in this function
+    is the square root of the estimated variance, so even with ``ddof=1``, it
+    will not be an unbiased estimate of the standard deviation per se.
+
+    Note that, for complex numbers, `std` takes the absolute
+    value before squaring, so that the result is always real and nonnegative.
+
+    For floating-point input, the *std* is computed using the same
+    precision the input has. Depending on the input data, this can cause
+    the results to be inaccurate, especially for float32 (see example below).
+    Specifying a higher-accuracy accumulator using the `dtype` keyword can
+    alleviate this issue.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> np.std(a)
+    1.1180339887498949
+    >>> np.std(a, axis=0)
+    array([ 1.,  1.])
+    >>> np.std(a, axis=1)
+    array([ 0.5,  0.5])
+
+    In single precision, std() can be inaccurate:
+
+    >>> a = np.zeros((2,512*512), dtype=np.float32)
+    >>> a[0,:] = 1.0
+    >>> a[1,:] = 0.1
+    >>> np.std(a)
+    0.45172946707416706
+
+    Computing the standard deviation in float64 is more accurate:
+
+    >>> np.std(a, dtype=np.float64)
+    0.44999999925552653
+
+    """
+    if not hasattr(a, "std"):
+        a = numpypy.array(a)
+    return a.std()
+
+
+def var(a, axis=None, dtype=None, out=None, ddof=0):
+    """
+    Compute the variance along the specified axis.
+
+    Returns the variance of the array elements, a measure of the spread of a
+    distribution.  The variance is computed for the flattened array by
+    default, otherwise over the specified axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Array containing numbers whose variance is desired.  If `a` is not an
+        array, a conversion is attempted.
+    axis : int, optional
+        Axis along which the variance is computed.  The default is to compute
+        the variance of the flattened array.
+    dtype : data-type, optional
+        Type to use in computing the variance.  For arrays of integer type
+        the default is `float32`; for arrays of float types it is the same as
+        the array type.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  It must have
+        the same shape as the expected output, but the type is cast if
+        necessary.
+    ddof : int, optional
+        "Delta Degrees of Freedom": the divisor used in the calculation is
+        ``N - ddof``, where ``N`` represents the number of elements. By
+        default `ddof` is zero.
+
+    Returns
+    -------
+    variance : ndarray, see dtype parameter above
+        If ``out=None``, returns a new array containing the variance;
+        otherwise, a reference to the output array is returned.
+
+    See Also
+    --------
+    std : Standard deviation
+    mean : Average
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Notes
+    -----
+    The variance is the average of the squared deviations from the mean,
+    i.e.,  ``var = mean(abs(x - x.mean())**2)``.
+
+    The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
+    If, however, `ddof` is specified, the divisor ``N - ddof`` is used
+    instead.  In standard statistical practice, ``ddof=1`` provides an
+    unbiased estimator of the variance of a hypothetical infinite population.
+    ``ddof=0`` provides a maximum likelihood estimate of the variance for
+    normally distributed variables.
+
+    Note that for complex numbers, the absolute value is taken before
+    squaring, so that the result is always real and nonnegative.
+
+    For floating-point input, the variance is computed using the same
+    precision the input has.  Depending on the input data, this can cause
+    the results to be inaccurate, especially for `float32` (see example
+    below).  Specifying a higher-accuracy accumulator using the ``dtype``
+    keyword can alleviate this issue.
+
+    Examples
+    --------
+    >>> a = np.array([[1,2],[3,4]])
+    >>> np.var(a)
+    1.25
+    >>> np.var(a,0)
+    array([ 1.,  1.])
+    >>> np.var(a,1)
+    array([ 0.25,  0.25])
+
+    In single precision, var() can be inaccurate:
+
+    >>> a = np.zeros((2,512*512), dtype=np.float32)
+    >>> a[0,:] = 1.0
+    >>> a[1,:] = 0.1
+    >>> np.var(a)
+    0.20405951142311096
+
+    Computing the standard deviation in float64 is more accurate:
+
+    >>> np.var(a, dtype=np.float64)
+    0.20249999932997387
+    >>> ((1-0.55)**2 + (0.1-0.55)**2)/2
+    0.20250000000000001
+
+    """
+    if not hasattr(a, "var"):
+        a = numpypy.array(a)
+    return a.var()
diff --git a/lib_pypy/numpypy/test/test_fromnumeric.py b/lib_pypy/numpypy/test/test_fromnumeric.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/numpypy/test/test_fromnumeric.py
@@ -0,0 +1,109 @@
+
+from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
+
+class AppTestFromNumeric(BaseNumpyAppTest):     
+    def test_argmax(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, arange, argmax
+        a = arange(6).reshape((2,3))
+        assert argmax(a) == 5
+        # assert (argmax(a, axis=0) == array([1, 1, 1])).all()
+        # assert (argmax(a, axis=1) == array([2, 2])).all()
+        b = arange(6)
+        b[1] = 5
+        assert argmax(b) == 1
+
+    def test_argmin(self):
+        # tests adapted from test_argmax
+        from numpypy import array, arange, argmin
+        a = arange(6).reshape((2,3))
+        assert argmin(a) == 0
+        # assert (argmax(a, axis=0) == array([0, 0, 0])).all()
+        # assert (argmax(a, axis=1) == array([0, 0])).all()
+        b = arange(6)
+        b[1] = 0
+        assert argmin(b) == 0
+   
+    def test_shape(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, identity, shape
+        assert shape(identity(3)) == (3, 3)
+        assert shape([[1, 2]]) == (1, 2)
+        assert shape([0]) ==  (1,)
+        assert shape(0) == ()
+        # a = array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
+        # assert shape(a) == (2,)
+
+    def test_sum(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, sum, ones
+        assert sum([0.5, 1.5])== 2.0
+        assert sum([[0, 1], [0, 5]]) == 6
+        # assert sum([0.5, 0.7, 0.2, 1.5], dtype=int32) == 1
+        # assert (sum([[0, 1], [0, 5]], axis=0) == array([0, 6])).all()
+        # assert (sum([[0, 1], [0, 5]], axis=1) == array([1, 5])).all()
+        # If the accumulator is too small, overflow occurs:
+        # assert ones(128, dtype=int8).sum(dtype=int8) == -128
+                                 
+    def test_amin(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, arange, amin
+        a = arange(4).reshape((2,2))
+        assert amin(a) == 0
+        # # Minima along the first axis
+        # assert (amin(a, axis=0) == array([0, 1])).all()
+        # # Minima along the second axis
+        # assert (amin(a, axis=1) == array([0, 2])).all()
+        # # NaN behaviour
+        # b = arange(5, dtype=float)
+        # b[2] = NaN
+        # assert amin(b) == nan
+        # assert nanmin(b) == 0.0
+
+    def test_amax(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, arange, amax
+        a = arange(4).reshape((2,2))
+        assert amax(a) == 3
+        # assert (amax(a, axis=0) == array([2, 3])).all()
+        # assert (amax(a, axis=1) == array([1, 3])).all()
+        # # NaN behaviour
+        # b = arange(5, dtype=float)
+        # b[2] = NaN
+        # assert amax(b) == nan
+        # assert nanmax(b) == 4.0
+
+    def test_alen(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, zeros, alen
+        a = zeros((7,4,5))
+        assert a.shape[0] == 7
+        assert alen(a)    == 7
+
+    def test_ndim(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, ndim
+        assert ndim([[1,2,3],[4,5,6]]) == 2
+        assert ndim(array([[1,2,3],[4,5,6]])) == 2
+        assert ndim(1) == 0
+    
+    def test_rank(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, rank
+        assert rank([[1,2,3],[4,5,6]]) == 2
+        assert rank(array([[1,2,3],[4,5,6]])) == 2
+        assert rank(1) == 0
+    
+    def test_var(self):
+        from numpypy import array, var
+        a = array([[1,2],[3,4]])
+        assert var(a) == 1.25
+        # assert (np.var(a,0) == array([ 1.,  1.])).all()
+        # assert (np.var(a,1) == array([ 0.25,  0.25])).all()
+
+    def test_std(self):
+        from numpypy import array, std
+        a = array([[1, 2], [3, 4]])
+        assert std(a) ==  1.1180339887498949
+        # assert (std(a, axis=0) == array([ 1.,  1.])).all()
+        # assert (std(a, axis=1) == array([ 0.5,  0.5]).all()
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -180,7 +180,12 @@
         if name is None:
             name = pyobj.func_name
         if signature is None:
-            signature = cpython_code_signature(pyobj.func_code)
+            if hasattr(pyobj, '_generator_next_method_of_'):
+                from pypy.interpreter.argument import Signature
+                signature = Signature(['entry'])     # haaaaaack
+                defaults = ()
+            else:
+                signature = cpython_code_signature(pyobj.func_code)
         if defaults is None:
             defaults = pyobj.func_defaults
         self.name = name
@@ -252,7 +257,8 @@
         try:
             inputcells = args.match_signature(signature, defs_s)
         except ArgErr, e:
-            raise TypeError, "signature mismatch: %s" % e.getmsg(self.name)
+            raise TypeError("signature mismatch: %s() %s" % 
+                            (self.name, e.getmsg()))
         return inputcells
 
     def specialize(self, inputcells, op=None):
diff --git a/pypy/doc/Makefile b/pypy/doc/Makefile
--- a/pypy/doc/Makefile
+++ b/pypy/doc/Makefile
@@ -12,7 +12,7 @@
 PAPEROPT_letter = -D latex_paper_size=letter
 ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 
-.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
+.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex man changes linkcheck doctest
 
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
@@ -23,6 +23,7 @@
 	@echo "  htmlhelp  to make HTML files and a HTML help project"
 	@echo "  qthelp    to make HTML files and a qthelp project"
 	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  man       to make manual pages"
 	@echo "  changes   to make an overview of all changed/added/deprecated items"
 	@echo "  linkcheck to check all external links for integrity"
 	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
@@ -79,6 +80,11 @@
 	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
 	      "run these through (pdf)latex."
 
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man"
+
 changes:
 	python config/generate.py
 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -175,15 +175,15 @@
 RPython
 =================
 
-RPython Definition, not
------------------------
+RPython Definition
+------------------
 
-The list and exact details of the "RPython" restrictions are a somewhat
-evolving topic.  In particular, we have no formal language definition
-as we find it more practical to discuss and evolve the set of
-restrictions while working on the whole program analysis.  If you
-have any questions about the restrictions below then please feel
-free to mail us at pypy-dev at codespeak net.
+RPython is a restricted subset of Python that is amenable to static analysis.
+Although there are additions to the language and some things might surprisingly
+work, this is a rough list of restrictions that should be considered. Note
+that there are tons of special cased restrictions that you'll encounter
+as you go. The exact definition is "RPython is everything that our translation
+toolchain can accept" :)
 
 .. _`wrapped object`: coding-guide.html#wrapping-rules
 
@@ -198,7 +198,7 @@
   contain both a string and a int must be avoided.  It is allowed to
   mix None (basically with the role of a null pointer) with many other
   types: `wrapped objects`, class instances, lists, dicts, strings, etc.
-  but *not* with int and floats.
+  but *not* with int, floats or tuples.
 
 **constants**
 
@@ -209,9 +209,12 @@
   have this restriction, so if you need mutable global state, store it
   in the attributes of some prebuilt singleton instance.
 
+
+
 **control structures**
 
-  all allowed but yield, ``for`` loops restricted to builtin types
+  all allowed, ``for`` loops restricted to builtin types, generators
+  very restricted.
 
 **range**
 
@@ -226,7 +229,8 @@
 
 **generators**
 
-  generators are not supported.
+  generators are supported, but their exact scope is very limited. you can't
+  merge two different generator in one control point.
 
 **exceptions**
 
@@ -245,22 +249,27 @@
 
 **strings**
 
-  a lot of, but not all string methods are supported.  Indexes can be
+  a lot of, but not all string methods are supported and those that are
+  supported, not necesarilly accept all arguments.  Indexes can be
   negative.  In case they are not, then you get slightly more efficient
   code if the translator can prove that they are non-negative.  When
   slicing a string it is necessary to prove that the slice start and
-  stop indexes are non-negative.
+  stop indexes are non-negative. There is no implicit str-to-unicode cast
+  anywhere.
 
 **tuples**
 
   no variable-length tuples; use them to store or return pairs or n-tuples of
-  values. Each combination of types for elements and length constitute a separate
-  and not mixable type.
+  values. Each combination of types for elements and length constitute
+  a separate and not mixable type.
 
 **lists**
 
   lists are used as an allocated array.  Lists are over-allocated, so list.append()
-  is reasonably fast.  Negative or out-of-bound indexes are only allowed for the
+  is reasonably fast. However, if you use a fixed-size list, the code
+  is more efficient. Annotator can figure out most of the time that your
+  list is fixed-size, even when you use list comprehension.
+  Negative or out-of-bound indexes are only allowed for the
   most common operations, as follows:
 
   - *indexing*:
@@ -287,16 +296,14 @@
 
 **dicts**
 
-  dicts with a unique key type only, provided it is hashable. 
-  String keys have been the only allowed key types for a while, but this was generalized. 
-  After some re-optimization,
-  the implementation could safely decide that all string dict keys should be interned.
+  dicts with a unique key type only, provided it is hashable. Custom
+  hash functions and custom equality will not be honored.
+  Use ``pypy.rlib.objectmodel.r_dict`` for custom hash functions.
 
 
 **list comprehensions**
 
-  may be used to create allocated, initialized arrays.
-  After list over-allocation was introduced, there is no longer any restriction.
+  May be used to create allocated, initialized arrays.
 
 **functions**
 
@@ -334,9 +341,8 @@
 
 **objects**
 
-  in PyPy, wrapped objects are borrowed from the object space. Just like
-  in CPython, code that needs e.g. a dictionary can use a wrapped dict
-  and the object space operations on it.
+  Normal rules apply. Special methods are not honoured, except ``__init__`` and
+  ``__del__``.
 
 This layout makes the number of types to take care about quite limited.
 
diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py
--- a/pypy/doc/conf.py
+++ b/pypy/doc/conf.py
@@ -197,3 +197,10 @@
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {'http://docs.python.org/': None}
 
+# -- Options for manpage output-------------------------------------------------
+
+man_pages = [
+  ('man/pypy.1', 'pypy',
+   u'fast, compliant alternative implementation of the Python language',
+   u'The PyPy Project', 1)
+]
diff --git a/pypy/doc/extradoc.rst b/pypy/doc/extradoc.rst
--- a/pypy/doc/extradoc.rst
+++ b/pypy/doc/extradoc.rst
@@ -8,6 +8,9 @@
 *Articles about PyPy published so far, most recent first:* (bibtex_ file)
 
 
+* `Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages`_,
+  C.F. Bolz, A. Cuni, M. Fijalkowski, M. Leuschel, S. Pedroni, A. Rigo
+
 * `Allocation Removal by Partial Evaluation in a Tracing JIT`_,
   C.F. Bolz, A. Cuni, M. Fijalkowski, M. Leuschel, S. Pedroni, A. Rigo
 
@@ -50,6 +53,9 @@
 
 *Other research using PyPy (as far as we know it):*
 
+* `Hardware Transactional Memory Support for Lightweight Dynamic Language Evolution`_,
+  N. Riley and C. Zilles
+
 * `PyGirl: Generating Whole-System VMs from High-Level Prototypes using PyPy`_,
   C. Bruni and T. Verwaest
 
@@ -65,6 +71,7 @@
 
 
 .. _bibtex: https://bitbucket.org/pypy/extradoc/raw/tip/talk/bibtex.bib
+.. _`Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages`: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2011/jit-hints.pdf
 .. _`Allocation Removal by Partial Evaluation in a Tracing JIT`: http://codespeak.net/svn/pypy/extradoc/talk/pepm2011/bolz-allocation-removal.pdf
 .. _`Towards a Jitting VM for Prolog Execution`: http://www.stups.uni-duesseldorf.de/publications/bolz-prolog-jit.pdf
 .. _`High performance implementation of Python for CLI/.NET with JIT compiler generation for dynamic languages`: http://buildbot.pypy.org/misc/antocuni-thesis.pdf
@@ -74,6 +81,7 @@
 .. _`Automatic JIT Compiler Generation with Runtime Partial Evaluation`:  http://www.stups.uni-duesseldorf.de/thesis/final-master.pdf
 .. _`RPython: A Step towards Reconciling Dynamically and Statically Typed OO Languages`: http://www.disi.unige.it/person/AnconaD/papers/Recent_abstracts.html#AACM-DLS07
 .. _`EU Reports`: index-report.html
+.. _`Hardware Transactional Memory Support for Lightweight Dynamic Language Evolution`: http://sabi.net/nriley/pubs/dls6-riley.pdf
 .. _`PyGirl: Generating Whole-System VMs from High-Level Prototypes using PyPy`: http://scg.unibe.ch/archive/papers/Brun09cPyGirl.pdf
 .. _`Representation-Based Just-in-Time Specialization and the Psyco Prototype for Python`: http://psyco.sourceforge.net/psyco-pepm-a.ps.gz
 .. _`Back to the Future in One Week -- Implementing a Smalltalk VM in PyPy`: http://dx.doi.org/10.1007/978-3-540-89275-5_7
diff --git a/pypy/doc/man/pypy.1.rst b/pypy/doc/man/pypy.1.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/man/pypy.1.rst
@@ -0,0 +1,90 @@
+======
+ pypy
+======
+
+SYNOPSIS
+========
+
+``pypy`` [*options*]
+[``-c`` *cmd*\ \|\ ``-m`` *mod*\ \|\ *file.py*\ \|\ ``-``\ ]
+[*arg*\ ...]
+
+OPTIONS
+=======
+
+-i
+    Inspect interactively after running script.
+
+-O
+    Dummy optimization flag for compatibility with C Python.
+
+-c *cmd*
+    Program passed in as CMD (terminates option list).
+
+-S
+    Do not ``import site`` on initialization.
+
+-u
+    Unbuffered binary ``stdout`` and ``stderr``.
+
+-h, --help
+    Show a help message and exit.
+
+-m *mod*
+    Library module to be run as a script (terminates option list).
+
+-W *arg*
+    Warning control (*arg* is *action*:*message*:*category*:*module*:*lineno*).
+
+-E
+    Ignore environment variables (such as ``PYTHONPATH``).
+
+--version
+    Print the PyPy version.
+
+--info
+    Print translation information about this PyPy executable.
+
+--jit *arg*
+    Low level JIT parameters. Format is
+    *arg*\ ``=``\ *value*\ [``,``\ *arg*\ ``=``\ *value*\ ...]
+
+    ``off``
+        Disable the JIT.
+
+    ``threshold=``\ *value*
+        Number of times a loop has to run for it to become hot.
+
+    ``function_threshold=``\ *value*
+        Number of times a function must run for it to become traced from
+        start.
+
+    ``inlining=``\ *value*
+        Inline python functions or not (``1``/``0``).
+
+    ``loop_longevity=``\ *value*
+        A parameter controlling how long loops will be kept before being
+        freed, an estimate.
+
+    ``max_retrace_guards=``\ *value*
+        Number of extra guards a retrace can cause.
+
+    ``retrace_limit=``\ *value*
+        How many times we can try retracing before giving up.
+
+    ``trace_eagerness=``\ *value*
+        Number of times a guard has to fail before we start compiling a
+        bridge.
+
+    ``trace_limit=``\ *value*
+        Number of recorded operations before we abort tracing with
+        ``ABORT_TRACE_TOO_LONG``.
+
+    ``enable_opts=``\ *value*
+        Optimizations to enabled or ``all``.
+        Warning, this option is dangerous, and should be avoided.
+
+SEE ALSO
+========
+
+**python**\ (1)
diff --git a/pypy/doc/tool/makecontributor.py b/pypy/doc/tool/makecontributor.py
deleted file mode 100644
--- a/pypy/doc/tool/makecontributor.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-
-generates a contributor list
-
-"""
-import py
-
-# this file is useless, use the following commandline instead:
-# hg churn -c -t "{author}" | sed -e 's/ <.*//'
-
-try: 
-    path = py.std.sys.argv[1]
-except IndexError: 
-    print "usage: %s ROOTPATH" %(py.std.sys.argv[0])
-    raise SystemExit, 1
-
-d = {}
-
-for logentry in py.path.svnwc(path).log(): 
-    a = logentry.author 
-    if a in d: 
-        d[a] += 1
-    else: 
-        d[a] = 1
-
-items = d.items()
-items.sort(lambda x,y: -cmp(x[1], y[1]))
-
-import uconf # http://codespeak.net/svn/uconf/dist/uconf 
-
-# Authors that don't want to be listed
-excluded = set("anna gintas ignas".split())
-cutoff = 5 # cutoff for authors in the LICENSE file
-mark = False
-for author, count in items: 
-    if author in excluded:
-        continue
-    user = uconf.system.User(author)
-    try:
-        realname = user.realname.strip()
-    except KeyError:
-        realname = author
-    if not mark and count < cutoff:
-        mark = True
-        print '-'*60
-    print "   ", realname
-    #print count, "   ", author 
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -428,8 +428,8 @@
             return self._match_signature(w_firstarg,
                                          scope_w, signature, defaults_w, 0)
         except ArgErr, e:
-            raise OperationError(self.space.w_TypeError,
-                                 self.space.wrap(e.getmsg(fnname)))
+            raise operationerrfmt(self.space.w_TypeError,
+                                  "%s() %s", fnname, e.getmsg())
 
     def _parse(self, w_firstarg, signature, defaults_w, blindargs=0):
         """Parse args and kwargs according to the signature of a code object,
@@ -450,8 +450,8 @@
         try:
             return self._parse(w_firstarg, signature, defaults_w, blindargs)
         except ArgErr, e:
-            raise OperationError(self.space.w_TypeError,
-                                 self.space.wrap(e.getmsg(fnname)))
+            raise operationerrfmt(self.space.w_TypeError,
+                                  "%s() %s", fnname, e.getmsg())
 
     @staticmethod
     def frompacked(space, w_args=None, w_kwds=None):
@@ -626,7 +626,7 @@
 
 class ArgErr(Exception):
 
-    def getmsg(self, fnname):
+    def getmsg(self):
         raise NotImplementedError
 
 class ArgErrCount(ArgErr):
@@ -642,11 +642,10 @@
         self.num_args = got_nargs
         self.num_kwds = nkwds
 
-    def getmsg(self, fnname):
+    def getmsg(self):
         n = self.expected_nargs
         if n == 0:
-            msg = "%s() takes no arguments (%d given)" % (
-                fnname,
+            msg = "takes no arguments (%d given)" % (
                 self.num_args + self.num_kwds)
         else:
             defcount = self.num_defaults
@@ -672,8 +671,7 @@
                 msg2 = " non-keyword"
             else:
                 msg2 = ""
-            msg = "%s() takes %s %d%s argument%s (%d given)" % (
-                fnname,
+            msg = "takes %s %d%s argument%s (%d given)" % (
                 msg1,
                 n,
                 msg2,
@@ -686,9 +684,8 @@
     def __init__(self, argname):
         self.argname = argname
 
-    def getmsg(self, fnname):
-        msg = "%s() got multiple values for keyword argument '%s'" % (
-            fnname,
+    def getmsg(self):
+        msg = "got multiple values for keyword argument '%s'" % (
             self.argname)
         return msg
 
@@ -722,13 +719,11 @@
                     break
         self.kwd_name = name
 
-    def getmsg(self, fnname):
+    def getmsg(self):
         if self.num_kwds == 1:
-            msg = "%s() got an unexpected keyword argument '%s'" % (
-                fnname,
+            msg = "got an unexpected keyword argument '%s'" % (
                 self.kwd_name)
         else:
-            msg = "%s() got %d unexpected keyword arguments" % (
-                fnname,
+            msg = "got %d unexpected keyword arguments" % (
                 self.num_kwds)
         return msg
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1567,12 +1567,15 @@
     'ArithmeticError',
     'AssertionError',
     'AttributeError',
+    'BaseException',
+    'DeprecationWarning',
     'EOFError',
     'EnvironmentError',
     'Exception',
     'FloatingPointError',
     'IOError',
     'ImportError',
+    'ImportWarning',
     'IndentationError',
     'IndexError',
     'KeyError',
@@ -1593,7 +1596,10 @@
     'TabError',
     'TypeError',
     'UnboundLocalError',
+    'UnicodeDecodeError',
     'UnicodeError',
+    'UnicodeEncodeError',
+    'UnicodeTranslateError',
     'ValueError',
     'ZeroDivisionError',
     'UnicodeEncodeError',
diff --git a/pypy/interpreter/eval.py b/pypy/interpreter/eval.py
--- a/pypy/interpreter/eval.py
+++ b/pypy/interpreter/eval.py
@@ -2,7 +2,6 @@
 This module defines the abstract base classes that support execution:
 Code and Frame.
 """
-from pypy.rlib import jit
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import Wrappable
 
@@ -98,7 +97,6 @@
         "Abstract. Get the expected number of locals."
         raise TypeError, "abstract"
 
-    @jit.dont_look_inside
     def fast2locals(self):
         # Copy values from the fastlocals to self.w_locals
         if self.w_locals is None:
@@ -112,7 +110,6 @@
                 w_name = self.space.wrap(name)
                 self.space.setitem(self.w_locals, w_name, w_value)
 
-    @jit.dont_look_inside
     def locals2fast(self):
         # Copy values from self.w_locals to the fastlocals
         assert self.w_locals is not None
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -619,7 +619,8 @@
                                                   self.descr_reqcls,
                                                   args)
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -655,7 +656,8 @@
                                                   self.descr_reqcls,
                                                   args)
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -674,7 +676,8 @@
                                                   self.descr_reqcls,
                                                   args.prepend(w_obj))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -690,7 +693,8 @@
             raise OperationError(space.w_SystemError,
                                  space.wrap("unexpected DescrMismatch error"))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -708,7 +712,8 @@
                                            self.descr_reqcls,
                                            Arguments(space, [w1]))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -726,7 +731,8 @@
                                            self.descr_reqcls,
                                            Arguments(space, [w1, w2]))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -744,7 +750,8 @@
                                            self.descr_reqcls,
                                            Arguments(space, [w1, w2, w3]))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -763,7 +770,8 @@
                                            Arguments(space,
                                                      [w1, w2, w3, w4]))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -162,7 +162,8 @@
     # generate 2 versions of the function and 2 jit drivers.
     def _create_unpack_into():
         jitdriver = jit.JitDriver(greens=['pycode'],
-                                  reds=['self', 'frame', 'results'])
+                                  reds=['self', 'frame', 'results'],
+                                  name='unpack_into')
         def unpack_into(self, results):
             """This is a hack for performance: runs the generator and collects
             all produced items in a list."""
@@ -196,4 +197,4 @@
                 self.frame = None
         return unpack_into
     unpack_into = _create_unpack_into()
-    unpack_into_w = _create_unpack_into()
\ No newline at end of file
+    unpack_into_w = _create_unpack_into()
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -393,8 +393,8 @@
 
         class FakeArgErr(ArgErr):
 
-            def getmsg(self, fname):
-                return "msg "+fname
+            def getmsg(self):
+                return "msg"
 
         def _match_signature(*args):
             raise FakeArgErr()
@@ -404,7 +404,7 @@
         excinfo = py.test.raises(OperationError, args.parse_obj, "obj", "foo",
                        Signature(["a", "b"], None, None))
         assert excinfo.value.w_type is TypeError
-        assert excinfo.value._w_value == "msg foo"
+        assert excinfo.value.get_w_value(space) == "foo() msg"
 
 
     def test_args_parsing_into_scope(self):
@@ -448,8 +448,8 @@
 
         class FakeArgErr(ArgErr):
 
-            def getmsg(self, fname):
-                return "msg "+fname
+            def getmsg(self):
+                return "msg"
 
         def _match_signature(*args):
             raise FakeArgErr()
@@ -460,7 +460,7 @@
                                  "obj", [None, None], "foo",
                                  Signature(["a", "b"], None, None))
         assert excinfo.value.w_type is TypeError
-        assert excinfo.value._w_value == "msg foo"
+        assert excinfo.value.get_w_value(space) == "foo() msg"
 
     def test_topacked_frompacked(self):
         space = DummySpace()
@@ -493,35 +493,35 @@
         # got_nargs, nkwds, expected_nargs, has_vararg, has_kwarg,
         # defaults_w, missing_args
         err = ArgErrCount(1, 0, 0, False, False, None, 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes no arguments (1 given)"
+        s = err.getmsg()
+        assert s == "takes no arguments (1 given)"
         err = ArgErrCount(0, 0, 1, False, False, [], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes exactly 1 argument (0 given)"
+        s = err.getmsg()
+        assert s == "takes exactly 1 argument (0 given)"
         err = ArgErrCount(3, 0, 2, False, False, [], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes exactly 2 arguments (3 given)"
+        s = err.getmsg()
+        assert s == "takes exactly 2 arguments (3 given)"
         err = ArgErrCount(3, 0, 2, False, False, ['a'], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at most 2 arguments (3 given)"
+        s = err.getmsg()
+        assert s == "takes at most 2 arguments (3 given)"
         err = ArgErrCount(1, 0, 2, True, False, [], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at least 2 arguments (1 given)"
+        s = err.getmsg()
+        assert s == "takes at least 2 arguments (1 given)"
         err = ArgErrCount(0, 1, 2, True, False, ['a'], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at least 1 non-keyword argument (0 given)"
+        s = err.getmsg()
+        assert s == "takes at least 1 non-keyword argument (0 given)"
         err = ArgErrCount(2, 1, 1, False, True, [], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes exactly 1 non-keyword argument (2 given)"
+        s = err.getmsg()
+        assert s == "takes exactly 1 non-keyword argument (2 given)"
         err = ArgErrCount(0, 1, 1, False, True, [], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes exactly 1 non-keyword argument (0 given)"
+        s = err.getmsg()
+        assert s == "takes exactly 1 non-keyword argument (0 given)"
         err = ArgErrCount(0, 1, 1, True, True, [], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at least 1 non-keyword argument (0 given)"
+        s = err.getmsg()
+        assert s == "takes at least 1 non-keyword argument (0 given)"
         err = ArgErrCount(2, 1, 1, False, True, ['a'], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at most 1 non-keyword argument (2 given)"
+        s = err.getmsg()
+        assert s == "takes at most 1 non-keyword argument (2 given)"
 
     def test_bad_type_for_star(self):
         space = self.space
@@ -543,12 +543,12 @@
     def test_unknown_keywords(self):
         space = DummySpace()
         err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [True, False], None)
-        s = err.getmsg('foo')
-        assert s == "foo() got an unexpected keyword argument 'b'"
+        s = err.getmsg()
+        assert s == "got an unexpected keyword argument 'b'"
         err = ArgErrUnknownKwds(space, 2, ['a', 'b', 'c'],
                                 [True, False, False], None)
-        s = err.getmsg('foo')
-        assert s == "foo() got 2 unexpected keyword arguments"
+        s = err.getmsg()
+        assert s == "got 2 unexpected keyword arguments"
 
     def test_unknown_unicode_keyword(self):
         class DummySpaceUnicode(DummySpace):
@@ -558,13 +558,13 @@
         err = ArgErrUnknownKwds(space, 1, ['a', None, 'b', 'c'],
                                 [True, False, True, True],
                                 [unichr(0x1234), u'b', u'c'])
-        s = err.getmsg('foo')
-        assert s == "foo() got an unexpected keyword argument '\xe1\x88\xb4'"
+        s = err.getmsg()
+        assert s == "got an unexpected keyword argument '\xe1\x88\xb4'"
 
     def test_multiple_values(self):
         err = ArgErrMultipleValues('bla')
-        s = err.getmsg('foo')
-        assert s == "foo() got multiple values for keyword argument 'bla'"
+        s = err.getmsg()
+        assert s == "got multiple values for keyword argument 'bla'"
 
 class AppTestArgument:
     def test_error_message(self):
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -37,7 +37,7 @@
     def get_arg_types(self):
         return self.arg_types
 
-    def get_return_type(self):
+    def get_result_type(self):
         return self.typeinfo
 
     def get_extra_info(self):
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -5,11 +5,7 @@
 from pypy.jit.metainterp.history import AbstractDescr, getkind
 from pypy.jit.metainterp import history
 from pypy.jit.codewriter import heaptracker, longlong
-
-# The point of the class organization in this file is to make instances
-# as compact as possible.  This is done by not storing the field size or
-# the 'is_pointer_field' flag in the instance itself but in the class
-# (in methods actually) using a few classes instead of just one.
+from pypy.jit.codewriter.longlong import is_longlong
 
 
 class GcCache(object):
@@ -19,6 +15,7 @@
         self._cache_size = {}
         self._cache_field = {}
         self._cache_array = {}
+        self._cache_arraylen = {}
         self._cache_call = {}
         self._cache_interiorfield = {}
 
@@ -26,24 +23,15 @@
         assert isinstance(STRUCT, lltype.GcStruct)
 
     def init_array_descr(self, ARRAY, arraydescr):
-        assert isinstance(ARRAY, lltype.GcArray)
+        assert (isinstance(ARRAY, lltype.GcArray) or
+                isinstance(ARRAY, lltype.GcStruct) and ARRAY._arrayfld)
 
 
-if lltype.SignedLongLong is lltype.Signed:
-    def is_longlong(TYPE):
-        return False
-else:
-    assert rffi.sizeof(lltype.SignedLongLong) == rffi.sizeof(lltype.Float)
-    def is_longlong(TYPE):
-        return TYPE in (lltype.SignedLongLong, lltype.UnsignedLongLong)
-
 # ____________________________________________________________
 # SizeDescrs
 
 class SizeDescr(AbstractDescr):
     size = 0      # help translation
-    is_immutable = False
-
     tid = llop.combine_ushort(lltype.Signed, 0, 0)
 
     def __init__(self, size, count_fields_if_immut=-1):
@@ -77,265 +65,247 @@
         cache[STRUCT] = sizedescr
         return sizedescr
 
+
 # ____________________________________________________________
 # FieldDescrs
 
-class BaseFieldDescr(AbstractDescr):
+FLAG_POINTER  = 'P'
+FLAG_FLOAT    = 'F'
+FLAG_UNSIGNED = 'U'
+FLAG_SIGNED   = 'S'
+FLAG_STRUCT   = 'X'
+FLAG_VOID     = 'V'
+
+class FieldDescr(AbstractDescr):
+    name = ''
     offset = 0      # help translation
-    name = ''
-    _clsname = ''
+    field_size = 0
+    flag = '\x00'
 
-    def __init__(self, name, offset):
+    def __init__(self, name, offset, field_size, flag):
         self.name = name
         self.offset = offset
+        self.field_size = field_size
+        self.flag = flag
+
+    def is_pointer_field(self):
+        return self.flag == FLAG_POINTER
+
+    def is_float_field(self):
+        return self.flag == FLAG_FLOAT
+
+    def is_field_signed(self):
+        return self.flag == FLAG_SIGNED
 
     def sort_key(self):
         return self.offset
 
-    def get_field_size(self, translate_support_code):
-        raise NotImplementedError
+    def repr_of_descr(self):
+        return '<Field%s %s %s>' % (self.flag, self.name, self.offset)
 
-    _is_pointer_field = False   # unless overridden by GcPtrFieldDescr
-    _is_float_field = False     # unless overridden by FloatFieldDescr
-    _is_field_signed = False    # unless overridden by XxxFieldDescr
-
-    def is_pointer_field(self):
-        return self._is_pointer_field
-
-    def is_float_field(self):
-        return self._is_float_field
-
-    def is_field_signed(self):
-        return self._is_field_signed
-
-    def repr_of_descr(self):
-        return '<%s %s %s>' % (self._clsname, self.name, self.offset)
-
-class DynamicFieldDescr(BaseFieldDescr):
-    def __init__(self, offset, fieldsize, is_pointer, is_float, is_signed):
-        self.offset = offset
-        self._fieldsize = fieldsize
-        self._is_pointer_field = is_pointer
-        self._is_float_field = is_float
-        self._is_field_signed = is_signed
-
-    def get_field_size(self, translate_support_code):
-        return self._fieldsize
-
-class NonGcPtrFieldDescr(BaseFieldDescr):
-    _clsname = 'NonGcPtrFieldDescr'
-    def get_field_size(self, translate_support_code):
-        return symbolic.get_size_of_ptr(translate_support_code)
-
-class GcPtrFieldDescr(NonGcPtrFieldDescr):
-    _clsname = 'GcPtrFieldDescr'
-    _is_pointer_field = True
-
-def getFieldDescrClass(TYPE):
-    return getDescrClass(TYPE, BaseFieldDescr, GcPtrFieldDescr,
-                         NonGcPtrFieldDescr, 'Field', 'get_field_size',
-                         '_is_float_field', '_is_field_signed')
 
 def get_field_descr(gccache, STRUCT, fieldname):
     cache = gccache._cache_field
     try:
         return cache[STRUCT][fieldname]
     except KeyError:
-        offset, _ = symbolic.get_field_token(STRUCT, fieldname,
-                                             gccache.translate_support_code)
+        offset, size = symbolic.get_field_token(STRUCT, fieldname,
+                                                gccache.translate_support_code)
         FIELDTYPE = getattr(STRUCT, fieldname)
+        flag = get_type_flag(FIELDTYPE)
         name = '%s.%s' % (STRUCT._name, fieldname)
-        fielddescr = getFieldDescrClass(FIELDTYPE)(name, offset)
+        fielddescr = FieldDescr(name, offset, size, flag)
         cachedict = cache.setdefault(STRUCT, {})
         cachedict[fieldname] = fielddescr
         return fielddescr
 
+def get_type_flag(TYPE):
+    if isinstance(TYPE, lltype.Ptr):
+        if TYPE.TO._gckind == 'gc':
+            return FLAG_POINTER
+        else:
+            return FLAG_UNSIGNED
+    if isinstance(TYPE, lltype.Struct):
+        return FLAG_STRUCT
+    if TYPE is lltype.Float or is_longlong(TYPE):
+        return FLAG_FLOAT
+    if (TYPE is not lltype.Bool and isinstance(TYPE, lltype.Number) and
+           rffi.cast(TYPE, -1) == -1):
+        return FLAG_SIGNED
+    return FLAG_UNSIGNED
+
+def get_field_arraylen_descr(gccache, ARRAY_OR_STRUCT):
+    cache = gccache._cache_arraylen
+    try:
+        return cache[ARRAY_OR_STRUCT]
+    except KeyError:
+        tsc = gccache.translate_support_code
+        (_, _, ofs) = symbolic.get_array_token(ARRAY_OR_STRUCT, tsc)
+        size = symbolic.get_size(lltype.Signed, tsc)
+        result = FieldDescr("len", ofs, size, get_type_flag(lltype.Signed))
+        cache[ARRAY_OR_STRUCT] = result
+        return result
+
+
 # ____________________________________________________________
 # ArrayDescrs
 
-_A = lltype.GcArray(lltype.Signed)     # a random gcarray
-_AF = lltype.GcArray(lltype.Float)     # an array of C doubles
+class ArrayDescr(AbstractDescr):
+    tid = 0
+    basesize = 0       # workaround for the annotator
+    itemsize = 0
+    lendescr = None
+    flag = '\x00'
 
-
-class BaseArrayDescr(AbstractDescr):
-    _clsname = ''
-    tid = llop.combine_ushort(lltype.Signed, 0, 0)
-
-    def get_base_size(self, translate_support_code):
-        basesize, _, _ = symbolic.get_array_token(_A, translate_support_code)
-        return basesize
-
-    def get_ofs_length(self, translate_support_code):
-        _, _, ofslength = symbolic.get_array_token(_A, translate_support_code)
-        return ofslength
-
-    def get_item_size(self, translate_support_code):
-        raise NotImplementedError
-
-    _is_array_of_pointers = False      # unless overridden by GcPtrArrayDescr
-    _is_array_of_floats   = False      # unless overridden by FloatArrayDescr
-    _is_array_of_structs  = False      # unless overridden by StructArrayDescr
-    _is_item_signed       = False      # unless overridden by XxxArrayDescr
+    def __init__(self, basesize, itemsize, lendescr, flag):
+        self.basesize = basesize
+        self.itemsize = itemsize
+        self.lendescr = lendescr    # or None, if no length
+        self.flag = flag
 
     def is_array_of_pointers(self):
-        return self._is_array_of_pointers
+        return self.flag == FLAG_POINTER
 
     def is_array_of_floats(self):
-        return self._is_array_of_floats
+        return self.flag == FLAG_FLOAT
+
+    def is_item_signed(self):
+        return self.flag == FLAG_SIGNED
 
     def is_array_of_structs(self):
-        return self._is_array_of_structs
-
-    def is_item_signed(self):
-        return self._is_item_signed
+        return self.flag == FLAG_STRUCT
 
     def repr_of_descr(self):
-        return '<%s>' % self._clsname
+        return '<Array%s %s>' % (self.flag, self.itemsize)
 
 
-class NonGcPtrArrayDescr(BaseArrayDescr):
-    _clsname = 'NonGcPtrArrayDescr'
-    def get_item_size(self, translate_support_code):
-        return symbolic.get_size_of_ptr(translate_support_code)
-
-class GcPtrArrayDescr(NonGcPtrArrayDescr):
-    _clsname = 'GcPtrArrayDescr'
-    _is_array_of_pointers = True
-
-class FloatArrayDescr(BaseArrayDescr):
-    _clsname = 'FloatArrayDescr'
-    _is_array_of_floats = True
-    def get_base_size(self, translate_support_code):
-        basesize, _, _ = symbolic.get_array_token(_AF, translate_support_code)
-        return basesize
-    def get_item_size(self, translate_support_code):
-        return symbolic.get_size(lltype.Float, translate_support_code)
-
-class StructArrayDescr(BaseArrayDescr):
-    _clsname = 'StructArrayDescr'
-    _is_array_of_structs = True
-
-class BaseArrayNoLengthDescr(BaseArrayDescr):
-    def get_base_size(self, translate_support_code):
-        return 0
-
-    def get_ofs_length(self, translate_support_code):
-        return -1
-
-class DynamicArrayNoLengthDescr(BaseArrayNoLengthDescr):
-    def __init__(self, itemsize):
-        self.itemsize = itemsize
-
-    def get_item_size(self, translate_support_code):
-        return self.itemsize
-
-class NonGcPtrArrayNoLengthDescr(BaseArrayNoLengthDescr):
-    _clsname = 'NonGcPtrArrayNoLengthDescr'
-    def get_item_size(self, translate_support_code):
-        return symbolic.get_size_of_ptr(translate_support_code)
-
-class GcPtrArrayNoLengthDescr(NonGcPtrArrayNoLengthDescr):
-    _clsname = 'GcPtrArrayNoLengthDescr'
-    _is_array_of_pointers = True
-
-def getArrayDescrClass(ARRAY):
-    if ARRAY.OF is lltype.Float:
-        return FloatArrayDescr
-    elif isinstance(ARRAY.OF, lltype.Struct):
-        class Descr(StructArrayDescr):
-            _clsname = '%sArrayDescr' % ARRAY.OF._name
-            def get_item_size(self, translate_support_code):
-                return symbolic.get_size(ARRAY.OF, translate_support_code)
-        Descr.__name__ = Descr._clsname
-        return Descr
-    return getDescrClass(ARRAY.OF, BaseArrayDescr, GcPtrArrayDescr,
-                         NonGcPtrArrayDescr, 'Array', 'get_item_size',
-                         '_is_array_of_floats', '_is_item_signed')
-
-def getArrayNoLengthDescrClass(ARRAY):
-    return getDescrClass(ARRAY.OF, BaseArrayNoLengthDescr, GcPtrArrayNoLengthDescr,
-                         NonGcPtrArrayNoLengthDescr, 'ArrayNoLength', 'get_item_size',
-                         '_is_array_of_floats', '_is_item_signed')
-
-def get_array_descr(gccache, ARRAY):
+def get_array_descr(gccache, ARRAY_OR_STRUCT):
     cache = gccache._cache_array
     try:
-        return cache[ARRAY]
+        return cache[ARRAY_OR_STRUCT]
     except KeyError:
-        # we only support Arrays that are either GcArrays, or raw no-length
-        # non-gc Arrays.
-        if ARRAY._hints.get('nolength', False):
-            assert not isinstance(ARRAY, lltype.GcArray)
-            arraydescr = getArrayNoLengthDescrClass(ARRAY)()
+        tsc = gccache.translate_support_code
+        basesize, itemsize, _ = symbolic.get_array_token(ARRAY_OR_STRUCT, tsc)
+        if isinstance(ARRAY_OR_STRUCT, lltype.Array):
+            ARRAY_INSIDE = ARRAY_OR_STRUCT
         else:
-            assert isinstance(ARRAY, lltype.GcArray)
-            arraydescr = getArrayDescrClass(ARRAY)()
-        # verify basic assumption that all arrays' basesize and ofslength
-        # are equal
-        basesize, itemsize, ofslength = symbolic.get_array_token(ARRAY, False)
-        assert basesize == arraydescr.get_base_size(False)
-        assert itemsize == arraydescr.get_item_size(False)
-        if not ARRAY._hints.get('nolength', False):
-            assert ofslength == arraydescr.get_ofs_length(False)
-        if isinstance(ARRAY, lltype.GcArray):
-            gccache.init_array_descr(ARRAY, arraydescr)
-        cache[ARRAY] = arraydescr
+            ARRAY_INSIDE = ARRAY_OR_STRUCT._flds[ARRAY_OR_STRUCT._arrayfld]
+        if ARRAY_INSIDE._hints.get('nolength', False):
+            lendescr = None
+        else:
+            lendescr = get_field_arraylen_descr(gccache, ARRAY_OR_STRUCT)
+        flag = get_type_flag(ARRAY_INSIDE.OF)
+        arraydescr = ArrayDescr(basesize, itemsize, lendescr, flag)
+        if ARRAY_OR_STRUCT._gckind == 'gc':
+            gccache.init_array_descr(ARRAY_OR_STRUCT, arraydescr)
+        cache[ARRAY_OR_STRUCT] = arraydescr
         return arraydescr
 
+
 # ____________________________________________________________
 # InteriorFieldDescr
 
 class InteriorFieldDescr(AbstractDescr):
-    arraydescr = BaseArrayDescr()     # workaround for the annotator
-    fielddescr = BaseFieldDescr('', 0)
+    arraydescr = ArrayDescr(0, 0, None, '\x00')  # workaround for the annotator
+    fielddescr = FieldDescr('', 0, 0, '\x00')
 
     def __init__(self, arraydescr, fielddescr):
+        assert arraydescr.flag == FLAG_STRUCT
         self.arraydescr = arraydescr
         self.fielddescr = fielddescr
 
+    def sort_key(self):
+        return self.fielddescr.sort_key()
+
     def is_pointer_field(self):
         return self.fielddescr.is_pointer_field()
 
     def is_float_field(self):
         return self.fielddescr.is_float_field()
 
-    def sort_key(self):
-        return self.fielddescr.sort_key()
-
     def repr_of_descr(self):
         return '<InteriorFieldDescr %s>' % self.fielddescr.repr_of_descr()
 
-def get_interiorfield_descr(gc_ll_descr, ARRAY, FIELDTP, name):
+def get_interiorfield_descr(gc_ll_descr, ARRAY, name):
     cache = gc_ll_descr._cache_interiorfield
     try:
-        return cache[(ARRAY, FIELDTP, name)]
+        return cache[(ARRAY, name)]
     except KeyError:
         arraydescr = get_array_descr(gc_ll_descr, ARRAY)
-        fielddescr = get_field_descr(gc_ll_descr, FIELDTP, name)
+        fielddescr = get_field_descr(gc_ll_descr, ARRAY.OF, name)
         descr = InteriorFieldDescr(arraydescr, fielddescr)
-        cache[(ARRAY, FIELDTP, name)] = descr
+        cache[(ARRAY, name)] = descr
         return descr
 
+def get_dynamic_interiorfield_descr(gc_ll_descr, offset, width, fieldsize,
+                                    is_pointer, is_float, is_signed):
+    arraydescr = ArrayDescr(0, width, None, FLAG_STRUCT)
+    if is_pointer:
+        assert not is_float
+        flag = FLAG_POINTER
+    elif is_float:
+        flag = FLAG_FLOAT
+    elif is_signed:
+        flag = FLAG_SIGNED
+    else:
+        flag = FLAG_UNSIGNED
+    fielddescr = FieldDescr('dynamic', offset, fieldsize, flag)
+    return InteriorFieldDescr(arraydescr, fielddescr)
+
+
 # ____________________________________________________________
 # CallDescrs
 
-class BaseCallDescr(AbstractDescr):
-    _clsname = ''
-    loop_token = None
+class CallDescr(AbstractDescr):
     arg_classes = ''     # <-- annotation hack
+    result_type = '\x00'
+    result_flag = '\x00'
     ffi_flags = 1
+    call_stub_i = staticmethod(lambda func, args_i, args_r, args_f:
+                               0)
+    call_stub_r = staticmethod(lambda func, args_i, args_r, args_f:
+                               lltype.nullptr(llmemory.GCREF.TO))
+    call_stub_f = staticmethod(lambda func,args_i,args_r,args_f:
+                               longlong.ZEROF)
 
-    def __init__(self, arg_classes, extrainfo=None, ffi_flags=1):
-        self.arg_classes = arg_classes    # string of "r" and "i" (ref/int)
+    def __init__(self, arg_classes, result_type, result_signed, result_size,
+                 extrainfo=None, ffi_flags=1):
+        """
+            'arg_classes' is a string of characters, one per argument:
+                'i', 'r', 'f', 'L', 'S'
+
+            'result_type' is one character from the same list or 'v'
+
+            'result_signed' is a boolean True/False
+        """
+        self.arg_classes = arg_classes
+        self.result_type = result_type
+        self.result_size = result_size
         self.extrainfo = extrainfo
         self.ffi_flags = ffi_flags
         # NB. the default ffi_flags is 1, meaning FUNCFLAG_CDECL, which
         # makes sense on Windows as it's the one for all the C functions
         # we are compiling together with the JIT.  On non-Windows platforms
         # it is just ignored anyway.
+        if result_type == 'v':
+            result_flag = FLAG_VOID
+        elif result_type == 'i':
+            if result_signed:
+                result_flag = FLAG_SIGNED
+            else:
+                result_flag = FLAG_UNSIGNED
+        elif result_type == history.REF:
+            result_flag = FLAG_POINTER
+        elif result_type == history.FLOAT or result_type == 'L':
+            result_flag = FLAG_FLOAT
+        elif result_type == 'S':
+            result_flag = FLAG_UNSIGNED
+        else:
+            raise NotImplementedError("result_type = '%s'" % (result_type,))
+        self.result_flag = result_flag
 
     def __repr__(self):
-        res = '%s(%s)' % (self.__class__.__name__, self.arg_classes)
+        res = 'CallDescr(%s)' % (self.arg_classes,)
         extraeffect = getattr(self.extrainfo, 'extraeffect', None)
         if extraeffect is not None:
             res += ' EF=%r' % extraeffect
@@ -363,14 +333,14 @@
     def get_arg_types(self):
         return self.arg_classes
 
-    def get_return_type(self):
-        return self._return_type
+    def get_result_type(self):
+        return self.result_type
 
-    def get_result_size(self, translate_support_code):
-        raise NotImplementedError
+    def get_result_size(self):
+        return self.result_size
 
     def is_result_signed(self):
-        return False    # unless overridden
+        return self.result_flag == FLAG_SIGNED
 
     def create_call_stub(self, rtyper, RESULT):
         from pypy.rlib.clibffi import FFI_DEFAULT_ABI
@@ -408,18 +378,26 @@
         seen = {'i': 0, 'r': 0, 'f': 0}
         args = ", ".join([process(c) for c in self.arg_classes])
 
-        if self.get_return_type() == history.INT:
+        result_type = self.get_result_type()
+        if result_type == history.INT:
             result = 'rffi.cast(lltype.Signed, res)'
-        elif self.get_return_type() == history.REF:
+            category = 'i'
+        elif result_type == history.REF:
+            assert RESULT == llmemory.GCREF   # should be ensured by the caller
             result = 'lltype.cast_opaque_ptr(llmemory.GCREF, res)'
-        elif self.get_return_type() == history.FLOAT:
+            category = 'r'
+        elif result_type == history.FLOAT:
             result = 'longlong.getfloatstorage(res)'
-        elif self.get_return_type() == 'L':
+            category = 'f'
+        elif result_type == 'L':
             result = 'rffi.cast(lltype.SignedLongLong, res)'
-        elif self.get_return_type() == history.VOID:
-            result = 'None'
-        elif self.get_return_type() == 'S':
+            category = 'f'
+        elif result_type == history.VOID:
+            result = '0'
+            category = 'i'
+        elif result_type == 'S':
             result = 'longlong.singlefloat2int(res)'
+            category = 'i'
         else:
             assert 0
         source = py.code.Source("""
@@ -433,10 +411,13 @@
         d = globals().copy()
         d.update(locals())
         exec source.compile() in d
-        self.call_stub = d['call_stub']
+        call_stub = d['call_stub']
+        # store the function into one of three attributes, to preserve
+        # type-correctness of the return value
+        setattr(self, 'call_stub_%s' % category, call_stub)
 
     def verify_types(self, args_i, args_r, args_f, return_type):
-        assert self._return_type in return_type
+        assert self.result_type in return_type
         assert (self.arg_classes.count('i') +
                 self.arg_classes.count('S')) == len(args_i or ())
         assert self.arg_classes.count('r') == len(args_r or ())
@@ -444,161 +425,56 @@
                 self.arg_classes.count('L')) == len(args_f or ())
 
     def repr_of_descr(self):
-        return '<%s>' % self._clsname
+        res = 'Call%s %d' % (self.result_type, self.result_size)
+        if self.arg_classes:
+            res += ' ' + self.arg_classes
+        if self.extrainfo:
+            res += ' EF=%d' % self.extrainfo.extraeffect
+            oopspecindex = self.extrainfo.oopspecindex
+            if oopspecindex:
+                res += ' OS=%d' % oopspecindex
+        return '<%s>' % res
 
 
-class BaseIntCallDescr(BaseCallDescr):
-    # Base class of the various subclasses of descrs corresponding to
-    # calls having a return kind of 'int' (including non-gc pointers).
-    # The inheritance hierarchy is a bit different than with other Descr
-    # classes because of the 'call_stub' attribute, which is of type
-    #
-    #     lambda func, args_i, args_r, args_f --> int/ref/float/void
-    #
-    # The purpose of BaseIntCallDescr is to be the parent of all classes
-    # in which 'call_stub' has a return kind of 'int'.
-    _return_type = history.INT
-    call_stub = staticmethod(lambda func, args_i, args_r, args_f: 0)
-
-    _is_result_signed = False      # can be overridden in XxxCallDescr
-    def is_result_signed(self):
-        return self._is_result_signed
-
-class DynamicIntCallDescr(BaseIntCallDescr):
-    """
-    calldescr that works for every integer type, by explicitly passing it the
-    size of the result. Used only by get_call_descr_dynamic
-    """
-    _clsname = 'DynamicIntCallDescr'
-
-    def __init__(self, arg_classes, result_size, result_sign, extrainfo, ffi_flags):
-        BaseIntCallDescr.__init__(self, arg_classes, extrainfo, ffi_flags)
-        assert isinstance(result_sign, bool)
-        self._result_size = chr(result_size)
-        self._result_sign = result_sign
-
-    def get_result_size(self, translate_support_code):
-        return ord(self._result_size)
-
-    def is_result_signed(self):
-        return self._result_sign
-
-
-class NonGcPtrCallDescr(BaseIntCallDescr):
-    _clsname = 'NonGcPtrCallDescr'
-    def get_result_size(self, translate_support_code):
-        return symbolic.get_size_of_ptr(translate_support_code)
-
-class GcPtrCallDescr(BaseCallDescr):
-    _clsname = 'GcPtrCallDescr'
-    _return_type = history.REF
-    call_stub = staticmethod(lambda func, args_i, args_r, args_f:
-                             lltype.nullptr(llmemory.GCREF.TO))
-    def get_result_size(self, translate_support_code):
-        return symbolic.get_size_of_ptr(translate_support_code)
-
-class FloatCallDescr(BaseCallDescr):
-    _clsname = 'FloatCallDescr'
-    _return_type = history.FLOAT
-    call_stub = staticmethod(lambda func,args_i,args_r,args_f: longlong.ZEROF)
-    def get_result_size(self, translate_support_code):
-        return symbolic.get_size(lltype.Float, translate_support_code)
-
-class LongLongCallDescr(FloatCallDescr):
-    _clsname = 'LongLongCallDescr'
-    _return_type = 'L'
-
-class VoidCallDescr(BaseCallDescr):
-    _clsname = 'VoidCallDescr'
-    _return_type = history.VOID
-    call_stub = staticmethod(lambda func, args_i, args_r, args_f: None)
-    def get_result_size(self, translate_support_code):
-        return 0
-
-_SingleFloatCallDescr = None   # built lazily
-
-def getCallDescrClass(RESULT):
-    if RESULT is lltype.Void:
-        return VoidCallDescr
-    if RESULT is lltype.Float:
-        return FloatCallDescr
-    if RESULT is lltype.SingleFloat:
-        global _SingleFloatCallDescr
-        if _SingleFloatCallDescr is None:
-            assert rffi.sizeof(rffi.UINT) == rffi.sizeof(RESULT)
-            class SingleFloatCallDescr(getCallDescrClass(rffi.UINT)):
-                _clsname = 'SingleFloatCallDescr'
-                _return_type = 'S'
-            _SingleFloatCallDescr = SingleFloatCallDescr
-        return _SingleFloatCallDescr
-    if is_longlong(RESULT):
-        return LongLongCallDescr
-    return getDescrClass(RESULT, BaseIntCallDescr, GcPtrCallDescr,
-                         NonGcPtrCallDescr, 'Call', 'get_result_size',
-                         Ellipsis,  # <= floatattrname should not be used here
-                         '_is_result_signed')
-getCallDescrClass._annspecialcase_ = 'specialize:memo'
+def map_type_to_argclass(ARG, accept_void=False):
+    kind = getkind(ARG)
+    if   kind == 'int':
+        if ARG is lltype.SingleFloat: return 'S'
+        else:                         return 'i'
+    elif kind == 'ref':               return 'r'
+    elif kind == 'float':
+        if is_longlong(ARG):          return 'L'
+        else:                         return 'f'
+    elif kind == 'void':
+        if accept_void:               return 'v'
+    raise NotImplementedError('ARG = %r' % (ARG,))
 
 def get_call_descr(gccache, ARGS, RESULT, extrainfo=None):
-    arg_classes = []
-    for ARG in ARGS:
-        kind = getkind(ARG)
-        if   kind == 'int':
-            if ARG is lltype.SingleFloat:
-                arg_classes.append('S')
+    arg_classes = map(map_type_to_argclass, ARGS)
+    arg_classes = ''.join(arg_classes)
+    result_type = map_type_to_argclass(RESULT, accept_void=True)
+    RESULT_ERASED = RESULT
+    if RESULT is lltype.Void:
+        result_size = 0
+        result_signed = False
+    else:
+        if isinstance(RESULT, lltype.Ptr):
+            # avoid too many CallDescrs
+            if result_type == 'r':
+                RESULT_ERASED = llmemory.GCREF
             else:
-                arg_classes.append('i')
-        elif kind == 'ref': arg_classes.append('r')
-        elif kind == 'float':
-            if is_longlong(ARG):
-                arg_classes.append('L')
-            else:
-                arg_classes.append('f')
-        else:
-            raise NotImplementedError('ARG = %r' % (ARG,))
-    arg_classes = ''.join(arg_classes)
-    cls = getCallDescrClass(RESULT)
-    key = (cls, arg_classes, extrainfo)
+                RESULT_ERASED = llmemory.Address
+        result_size = symbolic.get_size(RESULT_ERASED,
+                                        gccache.translate_support_code)
+        result_signed = get_type_flag(RESULT) == FLAG_SIGNED
+    key = (arg_classes, result_type, result_signed, RESULT_ERASED, extrainfo)
     cache = gccache._cache_call
     try:
-        return cache[key]
+        calldescr = cache[key]
     except KeyError:
-        calldescr = cls(arg_classes, extrainfo)
-        calldescr.create_call_stub(gccache.rtyper, RESULT)
+        calldescr = CallDescr(arg_classes, result_type, result_signed,
+                              result_size, extrainfo)
+        calldescr.create_call_stub(gccache.rtyper, RESULT_ERASED)
         cache[key] = calldescr
-        return calldescr
-
-
-# ____________________________________________________________
-
-def getDescrClass(TYPE, BaseDescr, GcPtrDescr, NonGcPtrDescr,
-                  nameprefix, methodname, floatattrname, signedattrname,
-                  _cache={}):
-    if isinstance(TYPE, lltype.Ptr):
-        if TYPE.TO._gckind == 'gc':
-            return GcPtrDescr
-        else:
-            return NonGcPtrDescr
-    if TYPE is lltype.SingleFloat:
-        assert rffi.sizeof(rffi.UINT) == rffi.sizeof(TYPE)
-        TYPE = rffi.UINT
-    try:
-        return _cache[nameprefix, TYPE]
-    except KeyError:
-        #
-        class Descr(BaseDescr):
-            _clsname = '%s%sDescr' % (TYPE._name, nameprefix)
-        Descr.__name__ = Descr._clsname
-        #
-        def method(self, translate_support_code):
-            return symbolic.get_size(TYPE, translate_support_code)
-        setattr(Descr, methodname, method)
-        #
-        if TYPE is lltype.Float or is_longlong(TYPE):
-            setattr(Descr, floatattrname, True)
-        elif (TYPE is not lltype.Bool and isinstance(TYPE, lltype.Number) and
-              rffi.cast(TYPE, -1) == -1):
-            setattr(Descr, signedattrname, True)
-        #
-        _cache[nameprefix, TYPE] = Descr
-        return Descr
+    assert repr(calldescr.result_size) == repr(result_size)
+    return calldescr
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -1,9 +1,7 @@
 from pypy.rlib.rarithmetic import intmask
 from pypy.jit.metainterp import history
 from pypy.rpython.lltypesystem import rffi
-from pypy.jit.backend.llsupport.descr import (
-    DynamicIntCallDescr, NonGcPtrCallDescr, FloatCallDescr, VoidCallDescr,
-    LongLongCallDescr, getCallDescrClass)
+from pypy.jit.backend.llsupport.descr import CallDescr
 
 class UnsupportedKind(Exception):
     pass
@@ -16,29 +14,13 @@
         argkinds = [get_ffi_type_kind(cpu, arg) for arg in ffi_args]
     except UnsupportedKind:
         return None
-    arg_classes = ''.join(argkinds)
-    if reskind == history.INT:
-        size = intmask(ffi_result.c_size)
-        signed = is_ffi_type_signed(ffi_result)
-        return DynamicIntCallDescr(arg_classes, size, signed, extrainfo,
-                                   ffi_flags=ffi_flags)
-    elif reskind == history.REF:
-        return  NonGcPtrCallDescr(arg_classes, extrainfo,
-                                  ffi_flags=ffi_flags)
-    elif reskind == history.FLOAT:
-        return FloatCallDescr(arg_classes, extrainfo,
-                              ffi_flags=ffi_flags)
-    elif reskind == history.VOID:
-        return VoidCallDescr(arg_classes, extrainfo,
-                             ffi_flags=ffi_flags)
-    elif reskind == 'L':
-        return LongLongCallDescr(arg_classes, extrainfo,
-                                 ffi_flags=ffi_flags)
-    elif reskind == 'S':
-        SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
-        return SingleFloatCallDescr(arg_classes, extrainfo,
-                                    ffi_flags=ffi_flags)
-    assert False
+    if reskind == history.VOID:
+        result_size = 0
+    else:
+        result_size = intmask(ffi_result.c_size)
+    argkinds = ''.join(argkinds)
+    return CallDescr(argkinds, reskind, is_ffi_type_signed(ffi_result),
+                     result_size, extrainfo, ffi_flags=ffi_flags)
 
 def get_ffi_type_kind(cpu, ffi_type):
     from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -1,6 +1,6 @@
 import os
 from pypy.rlib import rgc
-from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.rlib.debug import fatalerror
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rclass, rstr
@@ -8,52 +8,93 @@
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.annlowlevel import llhelper
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-from pypy.jit.metainterp.history import BoxInt, BoxPtr, ConstInt, ConstPtr
-from pypy.jit.metainterp.history import AbstractDescr
+from pypy.jit.codewriter import heaptracker
+from pypy.jit.metainterp.history import ConstPtr, AbstractDescr
 from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.backend.llsupport.symbolic import WORD
-from pypy.jit.backend.llsupport.descr import BaseSizeDescr, BaseArrayDescr
+from pypy.jit.backend.llsupport.descr import SizeDescr, ArrayDescr
 from pypy.jit.backend.llsupport.descr import GcCache, get_field_descr
-from pypy.jit.backend.llsupport.descr import GcPtrFieldDescr
+from pypy.jit.backend.llsupport.descr import get_array_descr
 from pypy.jit.backend.llsupport.descr import get_call_descr
+from pypy.jit.backend.llsupport.rewrite import GcRewriterAssembler
 from pypy.rpython.memory.gctransform import asmgcroot
 
 # ____________________________________________________________
 
 class GcLLDescription(GcCache):
-    minimal_size_in_nursery = 0
-    get_malloc_slowpath_addr = None
 
     def __init__(self, gcdescr, translator=None, rtyper=None):
         GcCache.__init__(self, translator is not None, rtyper)
         self.gcdescr = gcdescr
+        if translator and translator.config.translation.gcremovetypeptr:
+            self.fielddescr_vtable = None
+        else:
+            self.fielddescr_vtable = get_field_descr(self, rclass.OBJECT,
+                                                     'typeptr')
+        self._generated_functions = []
+
+    def _setup_str(self):
+        self.str_descr     = get_array_descr(self, rstr.STR)
+        self.unicode_descr = get_array_descr(self, rstr.UNICODE)
+
+    def generate_function(self, funcname, func, ARGS, RESULT=llmemory.GCREF):
+        """Generates a variant of malloc with the given name and the given
+        arguments.  It should return NULL if out of memory.  If it raises
+        anything, it must be an optional MemoryError.
+        """
+        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, RESULT))
+        descr = get_call_descr(self, ARGS, RESULT)
+        setattr(self, funcname, func)
+        setattr(self, funcname + '_FUNCPTR', FUNCPTR)
+        setattr(self, funcname + '_descr', descr)
+        self._generated_functions.append(funcname)
+
+    @specialize.arg(1)
+    def get_malloc_fn(self, funcname):
+        func = getattr(self, funcname)
+        FUNC = getattr(self, funcname + '_FUNCPTR')
+        return llhelper(FUNC, func)
+
+    @specialize.arg(1)
+    def get_malloc_fn_addr(self, funcname):
+        ll_func = self.get_malloc_fn(funcname)
+        return heaptracker.adr2int(llmemory.cast_ptr_to_adr(ll_func))
+
     def _freeze_(self):
         return True
     def initialize(self):
         pass
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         pass
-    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
-        return operations
-    def can_inline_malloc(self, descr):
-        return False
-    def can_inline_malloc_varsize(self, descr, num_elem):
+    def can_use_nursery_malloc(self, size):
         return False
     def has_write_barrier_class(self):
         return None
     def freeing_block(self, start, stop):
         pass
+    def get_nursery_free_addr(self):
+        raise NotImplementedError
+    def get_nursery_top_addr(self):
+        raise NotImplementedError
 
-    def get_funcptr_for_newarray(self):
-        return llhelper(self.GC_MALLOC_ARRAY, self.malloc_array)
-    def get_funcptr_for_newstr(self):
-        return llhelper(self.GC_MALLOC_STR_UNICODE, self.malloc_str)
-    def get_funcptr_for_newunicode(self):
-        return llhelper(self.GC_MALLOC_STR_UNICODE, self.malloc_unicode)
+    def gc_malloc(self, sizedescr):
+        """Blackhole: do a 'bh_new'.  Also used for 'bh_new_with_vtable',
+        with the vtable pointer set manually afterwards."""
+        assert isinstance(sizedescr, SizeDescr)
+        return self._bh_malloc(sizedescr)
 
+    def gc_malloc_array(self, arraydescr, num_elem):
+        assert isinstance(arraydescr, ArrayDescr)
+        return self._bh_malloc_array(arraydescr, num_elem)
 
-    def record_constptrs(self, op, gcrefs_output_list):
+    def gc_malloc_str(self, num_elem):
+        return self._bh_malloc_array(self.str_descr, num_elem)
+
+    def gc_malloc_unicode(self, num_elem):
+        return self._bh_malloc_array(self.unicode_descr, num_elem)
+
+    def _record_constptrs(self, op, gcrefs_output_list):
         for i in range(op.numargs()):
             v = op.getarg(i)
             if isinstance(v, ConstPtr) and bool(v.value):
@@ -61,11 +102,27 @@
                 rgc._make_sure_does_not_move(p)
                 gcrefs_output_list.append(p)
 
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
+        rewriter = GcRewriterAssembler(self, cpu)
+        newops = rewriter.rewrite(operations)
+        # record all GCREFs, because the GC (or Boehm) cannot see them and
+        # keep them alive if they end up as constants in the assembler
+        for op in newops:
+            self._record_constptrs(op, gcrefs_output_list)
+        return newops
+
 # ____________________________________________________________
 
 class GcLLDescr_boehm(GcLLDescription):
-    moving_gc = False
-    gcrootmap = None
+    kind                  = 'boehm'
+    moving_gc             = False
+    round_up              = False
+    gcrootmap             = None
+    write_barrier_descr   = None
+    fielddescr_tid        = None
+    str_type_id           = 0
+    unicode_type_id       = 0
+    get_malloc_slowpath_addr = None
 
     @classmethod
     def configure_boehm_once(cls):
@@ -76,6 +133,16 @@
         from pypy.rpython.tool import rffi_platform
         compilation_info = rffi_platform.configure_boehm()
 
+        # on some platform GC_init is required before any other
+        # GC_* functions, call it here for the benefit of tests
+        # XXX move this to tests
+        init_fn_ptr = rffi.llexternal("GC_init",
+                                      [], lltype.Void,
+                                      compilation_info=compilation_info,
+                                      sandboxsafe=True,
+                                      _nowrapper=True)
+        init_fn_ptr()
+
         # Versions 6.x of libgc needs to use GC_local_malloc().
         # Versions 7.x of libgc removed this function; GC_malloc() has
         # the same behavior if libgc was compiled with
@@ -95,96 +162,42 @@
                                         sandboxsafe=True,
                                         _nowrapper=True)
         cls.malloc_fn_ptr = malloc_fn_ptr
-        cls.compilation_info = compilation_info
         return malloc_fn_ptr
 
     def __init__(self, gcdescr, translator, rtyper):
         GcLLDescription.__init__(self, gcdescr, translator, rtyper)
         # grab a pointer to the Boehm 'malloc' function
-        malloc_fn_ptr = self.configure_boehm_once()
-        self.funcptr_for_new = malloc_fn_ptr
+        self.malloc_fn_ptr = self.configure_boehm_once()
+        self._setup_str()
+        self._make_functions()
 
-        def malloc_array(basesize, itemsize, ofs_length, num_elem):
+    def _make_functions(self):
+
+        def malloc_fixedsize(size):
+            return self.malloc_fn_ptr(size)
+        self.generate_function('malloc_fixedsize', malloc_fixedsize,
+                               [lltype.Signed])
+
+        def malloc_array(basesize, num_elem, itemsize, ofs_length):
             try:
-                size = ovfcheck(basesize + ovfcheck(itemsize * num_elem))
+                totalsize = ovfcheck(basesize + ovfcheck(itemsize * num_elem))
             except OverflowError:
                 return lltype.nullptr(llmemory.GCREF.TO)
-            res = self.funcptr_for_new(size)
-            if not res:
-                return res
-            rffi.cast(rffi.CArrayPtr(lltype.Signed), res)[ofs_length/WORD] = num_elem
+            res = self.malloc_fn_ptr(totalsize)
+            if res:
+                arrayptr = rffi.cast(rffi.CArrayPtr(lltype.Signed), res)
+                arrayptr[ofs_length/WORD] = num_elem
             return res
-        self.malloc_array = malloc_array
-        self.GC_MALLOC_ARRAY = lltype.Ptr(lltype.FuncType(
-            [lltype.Signed] * 4, llmemory.GCREF))
+        self.generate_function('malloc_array', malloc_array,
+                               [lltype.Signed] * 4)
 
+    def _bh_malloc(self, sizedescr):
+        return self.malloc_fixedsize(sizedescr.size)
 
-        (str_basesize, str_itemsize, str_ofs_length
-         ) = symbolic.get_array_token(rstr.STR, self.translate_support_code)
-        (unicode_basesize, unicode_itemsize, unicode_ofs_length
-         ) = symbolic.get_array_token(rstr.UNICODE, self.translate_support_code)
-        def malloc_str(length):
-            return self.malloc_array(
-                str_basesize, str_itemsize, str_ofs_length, length
-            )
-        def malloc_unicode(length):
-            return self.malloc_array(
-                unicode_basesize, unicode_itemsize, unicode_ofs_length, length
-            )
-        self.malloc_str = malloc_str
-        self.malloc_unicode = malloc_unicode
-        self.GC_MALLOC_STR_UNICODE = lltype.Ptr(lltype.FuncType(
-            [lltype.Signed], llmemory.GCREF))
-
-
-        # on some platform GC_init is required before any other
-        # GC_* functions, call it here for the benefit of tests
-        # XXX move this to tests
-        init_fn_ptr = rffi.llexternal("GC_init",
-                                      [], lltype.Void,
-                                      compilation_info=self.compilation_info,
-                                      sandboxsafe=True,
-                                      _nowrapper=True)
-
-        init_fn_ptr()
-
-    def gc_malloc(self, sizedescr):
-        assert isinstance(sizedescr, BaseSizeDescr)
-        return self.funcptr_for_new(sizedescr.size)
-
-    def gc_malloc_array(self, arraydescr, num_elem):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        ofs_length = arraydescr.get_ofs_length(self.translate_support_code)
-        basesize = arraydescr.get_base_size(self.translate_support_code)
-        itemsize = arraydescr.get_item_size(self.translate_support_code)
-        return self.malloc_array(basesize, itemsize, ofs_length, num_elem)
-
-    def gc_malloc_str(self, num_elem):
-        return self.malloc_str(num_elem)
-
-    def gc_malloc_unicode(self, num_elem):
-        return self.malloc_unicode(num_elem)
-
-    def args_for_new(self, sizedescr):
-        assert isinstance(sizedescr, BaseSizeDescr)
-        return [sizedescr.size]
-
-    def args_for_new_array(self, arraydescr):
-        ofs_length = arraydescr.get_ofs_length(self.translate_support_code)
-        basesize = arraydescr.get_base_size(self.translate_support_code)
-        itemsize = arraydescr.get_item_size(self.translate_support_code)
-        return [basesize, itemsize, ofs_length]
-
-    def get_funcptr_for_new(self):
-        return self.funcptr_for_new
-
-    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
-        # record all GCREFs too, because Boehm cannot see them and keep them
-        # alive if they end up as constants in the assembler
-        for op in operations:
-            self.record_constptrs(op, gcrefs_output_list)
-        return GcLLDescription.rewrite_assembler(self, cpu, operations,
-                                                 gcrefs_output_list)
+    def _bh_malloc_array(self, arraydescr, num_elem):
+        return self.malloc_array(arraydescr.basesize, num_elem,
+                                 arraydescr.itemsize,
+                                 arraydescr.lendescr.offset)
 
 
 # ____________________________________________________________
@@ -554,12 +567,14 @@
 
 class WriteBarrierDescr(AbstractDescr):
     def __init__(self, gc_ll_descr):
-        GCClass = gc_ll_descr.GCClass
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
         self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
-        self.fielddescr_tid = get_field_descr(gc_ll_descr, GCClass.HDR, 'tid')
+        self.fielddescr_tid = gc_ll_descr.fielddescr_tid
         #
+        GCClass = gc_ll_descr.GCClass
+        if GCClass is None:     # for tests
+            return
         self.jit_wb_if_flag = GCClass.JIT_WB_IF_FLAG
         self.jit_wb_if_flag_byteofs, self.jit_wb_if_flag_singlebyte = (
             self.extract_flag_byte(self.jit_wb_if_flag))
@@ -596,48 +611,74 @@
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)    # this may return 0
 
+    def has_write_barrier_from_array(self, cpu):
+        return self.get_write_barrier_from_array_fn(cpu) != 0
+
 
 class GcLLDescr_framework(GcLLDescription):
     DEBUG = False    # forced to True by x86/test/test_zrpy_gc.py
+    kind = 'framework'
+    round_up = True
 
-    def __init__(self, gcdescr, translator, rtyper, llop1=llop):
-        from pypy.rpython.memory.gctypelayout import check_typeid
-        from pypy.rpython.memory.gcheader import GCHeaderBuilder
-        from pypy.rpython.memory.gctransform import framework
+    def __init__(self, gcdescr, translator, rtyper, llop1=llop,
+                 really_not_translated=False):
         GcLLDescription.__init__(self, gcdescr, translator, rtyper)
-        assert self.translate_support_code, "required with the framework GC"
         self.translator = translator
         self.llop1 = llop1
+        if really_not_translated:
+            assert not self.translate_support_code  # but half does not work
+            self._initialize_for_tests()
+        else:
+            assert self.translate_support_code,"required with the framework GC"
+            self._check_valid_gc()
+            self._make_gcrootmap()
+            self._make_layoutbuilder()
+            self._setup_gcclass()
+            self._setup_tid()
+        self._setup_write_barrier()
+        self._setup_str()
+        self._make_functions(really_not_translated)
 
+    def _initialize_for_tests(self):
+        self.layoutbuilder = None
+        self.fielddescr_tid = AbstractDescr()
+        self.max_size_of_young_obj = 1000
+        self.GCClass = None
+
+    def _check_valid_gc(self):
         # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
         # to work
-        if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
+        if self.gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
                                       (gcdescr.config.translation.gc,))
 
+    def _make_gcrootmap(self):
         # to find roots in the assembler, make a GcRootMap
-        name = gcdescr.config.translation.gcrootfinder
+        name = self.gcdescr.config.translation.gcrootfinder
         try:
             cls = globals()['GcRootMap_' + name]
         except KeyError:
             raise NotImplementedError("--gcrootfinder=%s not implemented"
                                       " with the JIT" % (name,))
-        gcrootmap = cls(gcdescr)
+        gcrootmap = cls(self.gcdescr)
         self.gcrootmap = gcrootmap
 
+    def _make_layoutbuilder(self):
         # make a TransformerLayoutBuilder and save it on the translator
         # where it can be fished and reused by the FrameworkGCTransformer
+        from pypy.rpython.memory.gctransform import framework
+        translator = self.translator
         self.layoutbuilder = framework.TransformerLayoutBuilder(translator)
         self.layoutbuilder.delay_encoding()
-        self.translator._jit2gc = {'layoutbuilder': self.layoutbuilder}
-        gcrootmap.add_jit2gc_hooks(self.translator._jit2gc)
+        translator._jit2gc = {'layoutbuilder': self.layoutbuilder}
+        self.gcrootmap.add_jit2gc_hooks(translator._jit2gc)
 
+    def _setup_gcclass(self):
+        from pypy.rpython.memory.gcheader import GCHeaderBuilder
         self.GCClass = self.layoutbuilder.GCClass
         self.moving_gc = self.GCClass.moving_gc
         self.HDRPTR = lltype.Ptr(self.GCClass.HDR)
         self.gcheaderbuilder = GCHeaderBuilder(self.HDRPTR.TO)
-        (self.array_basesize, _, self.array_length_ofs) = \
-             symbolic.get_array_token(lltype.GcArray(lltype.Signed), True)
         self.max_size_of_young_obj = self.GCClass.JIT_max_size_of_young_obj()
         self.minimal_size_in_nursery=self.GCClass.JIT_minimal_size_in_nursery()
 
@@ -645,87 +686,124 @@
         assert self.GCClass.inline_simple_malloc
         assert self.GCClass.inline_simple_malloc_varsize
 
-        # make a malloc function, with two arguments
-        def malloc_basic(size, tid):
-            type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
-            check_typeid(type_id)
-            res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                                  type_id, size,
-                                                  False, False, False)
-            # In case the operation above failed, we are returning NULL
-            # from this function to assembler.  There is also an RPython
-            # exception set, typically MemoryError; but it's easier and
-            # faster to check for the NULL return value, as done by
-            # translator/exceptiontransform.py.
-            #llop.debug_print(lltype.Void, "\tmalloc_basic", size, type_id,
-            #                 "-->", res)
-            return res
-        self.malloc_basic = malloc_basic
-        self.GC_MALLOC_BASIC = lltype.Ptr(lltype.FuncType(
-            [lltype.Signed, lltype.Signed], llmemory.GCREF))
+    def _setup_tid(self):
+        self.fielddescr_tid = get_field_descr(self, self.GCClass.HDR, 'tid')
+
+    def _setup_write_barrier(self):
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, llmemory.Address], lltype.Void))
         self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
-        #
+
+    def _make_functions(self, really_not_translated):
+        from pypy.rpython.memory.gctypelayout import check_typeid
+        llop1 = self.llop1
+        (self.standard_array_basesize, _, self.standard_array_length_ofs) = \
+             symbolic.get_array_token(lltype.GcArray(lltype.Signed),
+                                      not really_not_translated)
+
+        def malloc_nursery_slowpath(size):
+            """Allocate 'size' null bytes out of the nursery.
+            Note that the fast path is typically inlined by the backend."""
+            if self.DEBUG:
+                self._random_usage_of_xmm_registers()
+            type_id = rffi.cast(llgroup.HALFWORD, 0)    # missing here
+            return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                                   type_id, size,
+                                                   False, False, False)
+        self.generate_function('malloc_nursery', malloc_nursery_slowpath,
+                               [lltype.Signed])
+
         def malloc_array(itemsize, tid, num_elem):
+            """Allocate an array with a variable-size num_elem.
+            Only works for standard arrays."""
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             check_typeid(type_id)
             return llop1.do_malloc_varsize_clear(
                 llmemory.GCREF,
-                type_id, num_elem, self.array_basesize, itemsize,
-                self.array_length_ofs)
-        self.malloc_array = malloc_array
-        self.GC_MALLOC_ARRAY = lltype.Ptr(lltype.FuncType(
-            [lltype.Signed] * 3, llmemory.GCREF))
-        #
-        (str_basesize, str_itemsize, str_ofs_length
-         ) = symbolic.get_array_token(rstr.STR, True)
-        (unicode_basesize, unicode_itemsize, unicode_ofs_length
-         ) = symbolic.get_array_token(rstr.UNICODE, True)
-        str_type_id = self.layoutbuilder.get_type_id(rstr.STR)
-        unicode_type_id = self.layoutbuilder.get_type_id(rstr.UNICODE)
-        #
+                type_id, num_elem, self.standard_array_basesize, itemsize,
+                self.standard_array_length_ofs)
+        self.generate_function('malloc_array', malloc_array,
+                               [lltype.Signed] * 3)
+
+        def malloc_array_nonstandard(basesize, itemsize, lengthofs, tid,
+                                     num_elem):
+            """For the rare case of non-standard arrays, i.e. arrays where
+            self.standard_array_{basesize,length_ofs} is wrong.  It can
+            occur e.g. with arrays of floats on Win32."""
+            type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
+            check_typeid(type_id)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                type_id, num_elem, basesize, itemsize, lengthofs)
+        self.generate_function('malloc_array_nonstandard',
+                               malloc_array_nonstandard,
+                               [lltype.Signed] * 5)
+
+        str_type_id    = self.str_descr.tid
+        str_basesize   = self.str_descr.basesize
+        str_itemsize   = self.str_descr.itemsize
+        str_ofs_length = self.str_descr.lendescr.offset
+        unicode_type_id    = self.unicode_descr.tid
+        unicode_basesize   = self.unicode_descr.basesize
+        unicode_itemsize   = self.unicode_descr.itemsize
+        unicode_ofs_length = self.unicode_descr.lendescr.offset
+
         def malloc_str(length):
             return llop1.do_malloc_varsize_clear(
                 llmemory.GCREF,
                 str_type_id, length, str_basesize, str_itemsize,
                 str_ofs_length)
+        self.generate_function('malloc_str', malloc_str,
+                               [lltype.Signed])
+
         def malloc_unicode(length):
             return llop1.do_malloc_varsize_clear(
                 llmemory.GCREF,
-                unicode_type_id, length, unicode_basesize,unicode_itemsize,
+                unicode_type_id, length, unicode_basesize, unicode_itemsize,
                 unicode_ofs_length)
-        self.malloc_str = malloc_str
-        self.malloc_unicode = malloc_unicode
-        self.GC_MALLOC_STR_UNICODE = lltype.Ptr(lltype.FuncType(
-            [lltype.Signed], llmemory.GCREF))
-        #
-        class ForTestOnly:
-            pass
-        for_test_only = ForTestOnly()
-        for_test_only.x = 1.23
-        def random_usage_of_xmm_registers():
-            x0 = for_test_only.x
-            x1 = x0 * 0.1
-            x2 = x0 * 0.2
-            x3 = x0 * 0.3
-            for_test_only.x = x0 + x1 + x2 + x3
-        #
-        def malloc_slowpath(size):
-            if self.DEBUG:
-                random_usage_of_xmm_registers()
-            assert size >= self.minimal_size_in_nursery
-            # NB. although we call do_malloc_fixedsize_clear() here,
-            # it's a bit of a hack because we set tid to 0 and may
-            # also use it to allocate varsized objects.  The tid
-            # and possibly the length are both set afterward.
-            gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                        0, size, False, False, False)
-            return rffi.cast(lltype.Signed, gcref)
-        self.malloc_slowpath = malloc_slowpath
-        self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed], lltype.Signed)
+        self.generate_function('malloc_unicode', malloc_unicode,
+                               [lltype.Signed])
+
+        # Rarely called: allocate a fixed-size amount of bytes, but
+        # not in the nursery, because it is too big.  Implemented like
+        # malloc_nursery_slowpath() above.
+        self.generate_function('malloc_fixedsize', malloc_nursery_slowpath,
+                               [lltype.Signed])
+
+    def _bh_malloc(self, sizedescr):
+        from pypy.rpython.memory.gctypelayout import check_typeid
+        llop1 = self.llop1
+        type_id = llop.extract_ushort(llgroup.HALFWORD, sizedescr.tid)
+        check_typeid(type_id)
+        return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                               type_id, sizedescr.size,
+                                               False, False, False)
+
+    def _bh_malloc_array(self, arraydescr, num_elem):
+        from pypy.rpython.memory.gctypelayout import check_typeid
+        llop1 = self.llop1
+        type_id = llop.extract_ushort(llgroup.HALFWORD, arraydescr.tid)
+        check_typeid(type_id)
+        return llop1.do_malloc_varsize_clear(llmemory.GCREF,
+                                             type_id, num_elem,
+                                             arraydescr.basesize,
+                                             arraydescr.itemsize,
+                                             arraydescr.lendescr.offset)
+
+
+    class ForTestOnly:
+        pass
+    for_test_only = ForTestOnly()
+    for_test_only.x = 1.23
+
+    def _random_usage_of_xmm_registers(self):
+        x0 = self.for_test_only.x
+        x1 = x0 * 0.1
+        x2 = x0 * 0.2
+        x3 = x0 * 0.3
+        self.for_test_only.x = x0 + x1 + x2 + x3
 
     def get_nursery_free_addr(self):
         nurs_addr = llop.gc_adr_of_nursery_free(llmemory.Address)
@@ -735,49 +813,26 @@
         nurs_top_addr = llop.gc_adr_of_nursery_top(llmemory.Address)
         return rffi.cast(lltype.Signed, nurs_top_addr)
 
-    def get_malloc_slowpath_addr(self):
-        fptr = llhelper(lltype.Ptr(self.MALLOC_SLOWPATH), self.malloc_slowpath)
-        return rffi.cast(lltype.Signed, fptr)
-
     def initialize(self):
         self.gcrootmap.initialize()
 
     def init_size_descr(self, S, descr):
-        type_id = self.layoutbuilder.get_type_id(S)
-        assert not self.layoutbuilder.is_weakref_type(S)
-        assert not self.layoutbuilder.has_finalizer(S)
-        descr.tid = llop.combine_ushort(lltype.Signed, type_id, 0)
+        if self.layoutbuilder is not None:
+            type_id = self.layoutbuilder.get_type_id(S)
+            assert not self.layoutbuilder.is_weakref_type(S)
+            assert not self.layoutbuilder.has_finalizer(S)
+            descr.tid = llop.combine_ushort(lltype.Signed, type_id, 0)
 
     def init_array_descr(self, A, descr):
-        type_id = self.layoutbuilder.get_type_id(A)
-        descr.tid = llop.combine_ushort(lltype.Signed, type_id, 0)
+        if self.layoutbuilder is not None:
+            type_id = self.layoutbuilder.get_type_id(A)
+            descr.tid = llop.combine_ushort(lltype.Signed, type_id, 0)
 
-    def gc_malloc(self, sizedescr):
-        assert isinstance(sizedescr, BaseSizeDescr)
-        return self.malloc_basic(sizedescr.size, sizedescr.tid)
-
-    def gc_malloc_array(self, arraydescr, num_elem):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        itemsize = arraydescr.get_item_size(self.translate_support_code)
-        return self.malloc_array(itemsize, arraydescr.tid, num_elem)
-
-    def gc_malloc_str(self, num_elem):
-        return self.malloc_str(num_elem)
-
-    def gc_malloc_unicode(self, num_elem):
-        return self.malloc_unicode(num_elem)
-
-    def args_for_new(self, sizedescr):
-        assert isinstance(sizedescr, BaseSizeDescr)
-        return [sizedescr.size, sizedescr.tid]
-
-    def args_for_new_array(self, arraydescr):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        itemsize = arraydescr.get_item_size(self.translate_support_code)
-        return [itemsize, arraydescr.tid]
-
-    def get_funcptr_for_new(self):
-        return llhelper(self.GC_MALLOC_BASIC, self.malloc_basic)
+    def _set_tid(self, gcptr, tid):
+        hdr_addr = llmemory.cast_ptr_to_adr(gcptr)
+        hdr_addr -= self.gcheaderbuilder.size_gc_header
+        hdr = llmemory.cast_adr_to_ptr(hdr_addr, self.HDRPTR)
+        hdr.tid = tid
 
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         hdr_addr = llmemory.cast_ptr_to_adr(gcref_struct)
@@ -791,108 +846,8 @@
             funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
                     llmemory.cast_ptr_to_adr(gcref_newptr))
 
-    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
-        # Perform two kinds of rewrites in parallel:
-        #
-        # - Add COND_CALLs to the write barrier before SETFIELD_GC and
-        #   SETARRAYITEM_GC operations.
-        #
-        # - Record the ConstPtrs from the assembler.
-        #
-        newops = []
-        known_lengths = {}
-        # we can only remember one malloc since the next malloc can possibly
-        # collect
-        last_malloc = None
-        for op in operations:
-            if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                continue
-            # ---------- record the ConstPtrs ----------
-            self.record_constptrs(op, gcrefs_output_list)
-            if op.is_malloc():
-                last_malloc = op.result
-            elif op.can_malloc():
-                last_malloc = None
-            # ---------- write barrier for SETFIELD_GC ----------
-            if op.getopnum() == rop.SETFIELD_GC:
-                val = op.getarg(0)
-                # no need for a write barrier in the case of previous malloc
-                if val is not last_malloc:
-                    v = op.getarg(1)
-                    if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
-                                            bool(v.value)): # store a non-NULL
-                        self._gen_write_barrier(newops, op.getarg(0), v)
-                        op = op.copy_and_change(rop.SETFIELD_RAW)
-            # ---------- write barrier for SETINTERIORFIELD_GC ------
-            if op.getopnum() == rop.SETINTERIORFIELD_GC:
-                val = op.getarg(0)
-                if val is not last_malloc:
-                    v = op.getarg(2)
-                    if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
-                                            bool(v.value)): # store a non-NULL
-                        self._gen_write_barrier(newops, op.getarg(0), v)
-                        op = op.copy_and_change(rop.SETINTERIORFIELD_RAW)
-            # ---------- write barrier for SETARRAYITEM_GC ----------
-            if op.getopnum() == rop.SETARRAYITEM_GC:
-                val = op.getarg(0)
-                # no need for a write barrier in the case of previous malloc
-                if val is not last_malloc:
-                    v = op.getarg(2)
-                    if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
-                                            bool(v.value)): # store a non-NULL
-                        self._gen_write_barrier_array(newops, op.getarg(0),
-                                                      op.getarg(1), v,
-                                                      cpu, known_lengths)
-                        op = op.copy_and_change(rop.SETARRAYITEM_RAW)
-            elif op.getopnum() == rop.NEW_ARRAY:
-                v_length = op.getarg(0)
-                if isinstance(v_length, ConstInt):
-                    known_lengths[op.result] = v_length.getint()
-            # ----------
-            newops.append(op)
-        return newops
-
-    def _gen_write_barrier(self, newops, v_base, v_value):
-        args = [v_base, v_value]
-        newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
-                                   descr=self.write_barrier_descr))
-
-    def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
-                                 cpu, known_lengths):
-        if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
-            # If we know statically the length of 'v', and it is not too
-            # big, then produce a regular write_barrier.  If it's unknown or
-            # too big, produce instead a write_barrier_from_array.
-            LARGE = 130
-            length = known_lengths.get(v_base, LARGE)
-            if length >= LARGE:
-                # unknown or too big: produce a write_barrier_from_array
-                args = [v_base, v_index, v_value]
-                newops.append(ResOperation(rop.COND_CALL_GC_WB_ARRAY, args,
-                                           None,
-                                           descr=self.write_barrier_descr))
-                return
-        # fall-back case: produce a write_barrier
-        self._gen_write_barrier(newops, v_base, v_value)
-
-    def can_inline_malloc(self, descr):
-        assert isinstance(descr, BaseSizeDescr)
-        if descr.size < self.max_size_of_young_obj:
-            has_finalizer = bool(descr.tid & (1<<llgroup.HALFSHIFT))
-            if has_finalizer:
-                return False
-            return True
-        return False
-
-    def can_inline_malloc_varsize(self, arraydescr, num_elem):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        basesize = arraydescr.get_base_size(self.translate_support_code)
-        itemsize = arraydescr.get_item_size(self.translate_support_code)
-        try:
-            size = ovfcheck(basesize + ovfcheck(itemsize * num_elem))
-            return size < self.max_size_of_young_obj
-        except OverflowError:
-            return False
+    def can_use_nursery_malloc(self, size):
+        return size < self.max_size_of_young_obj
 
     def has_write_barrier_class(self):
         return WriteBarrierDescr
@@ -900,6 +855,9 @@
     def freeing_block(self, start, stop):
         self.gcrootmap.freeing_block(start, stop)
 
+    def get_malloc_slowpath_addr(self):
+        return self.get_malloc_fn_addr('malloc_nursery')
+
 # ____________________________________________________________
 
 def get_ll_description(gcdescr, translator=None, rtyper=None):
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -8,11 +8,10 @@
 from pypy.jit.backend.model import AbstractCPU
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.backend.llsupport.symbolic import WORD, unroll_basic_sizes
-from pypy.jit.backend.llsupport.descr import (get_size_descr,
-     get_field_descr, BaseFieldDescr, DynamicFieldDescr, get_array_descr,
-     BaseArrayDescr, DynamicArrayNoLengthDescr, get_call_descr,
-     BaseIntCallDescr, GcPtrCallDescr, FloatCallDescr, VoidCallDescr,
-     InteriorFieldDescr, get_interiorfield_descr)
+from pypy.jit.backend.llsupport.descr import (
+    get_size_descr, get_field_descr, get_array_descr,
+    get_call_descr, get_interiorfield_descr, get_dynamic_interiorfield_descr,
+    FieldDescr, ArrayDescr, CallDescr, InteriorFieldDescr)
 from pypy.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
 
 
@@ -107,9 +106,15 @@
             _exception_emulator[1] = 0
             self.saved_exc_value = rffi.cast(llmemory.GCREF, v_i)
 
+        def save_exception_memoryerr():
+            save_exception()
+            if not self.saved_exc_value:
+                self.saved_exc_value = "memoryerror!"    # for tests
+
         self.pos_exception = pos_exception
         self.pos_exc_value = pos_exc_value
         self.save_exception = save_exception
+        self.save_exception_memoryerr = save_exception_memoryerr
         self.insert_stack_check = lambda: (0, 0, 0)
 
 
@@ -134,6 +139,15 @@
             # in the assignment to self.saved_exc_value, as needed.
             self.saved_exc_value = exc_value
 
+        def save_exception_memoryerr():
+            from pypy.rpython.annlowlevel import cast_instance_to_base_ptr
+            save_exception()
+            if not self.saved_exc_value:
+                exc = MemoryError()
+                exc = cast_instance_to_base_ptr(exc)
+                exc = lltype.cast_opaque_ptr(llmemory.GCREF, exc)
+                self.saved_exc_value = exc
+
         from pypy.rlib import rstack
         STACK_CHECK_SLOWPATH = lltype.Ptr(lltype.FuncType([lltype.Signed],
                                                           lltype.Void))
@@ -147,16 +161,19 @@
         self.pos_exception = pos_exception
         self.pos_exc_value = pos_exc_value
         self.save_exception = save_exception
+        self.save_exception_memoryerr = save_exception_memoryerr
         self.insert_stack_check = insert_stack_check
 
     def _setup_on_leave_jitted_untranslated(self):
         # assume we don't need a backend leave in this case
         self.on_leave_jitted_save_exc = self.save_exception
+        self.on_leave_jitted_memoryerr = self.save_exception_memoryerr
         self.on_leave_jitted_noexc = lambda : None
 
     def _setup_on_leave_jitted_translated(self):
         on_leave_jitted_hook = self.get_on_leave_jitted_hook()
         save_exception = self.save_exception
+        save_exception_memoryerr = self.save_exception_memoryerr
 
         def on_leave_jitted_noexc():
             on_leave_jitted_hook()
@@ -165,16 +182,24 @@
             save_exception()
             on_leave_jitted_hook()
 
+        def on_leave_jitted_memoryerr():
+            save_exception_memoryerr()
+            on_leave_jitted_hook()
+
         self.on_leave_jitted_noexc = on_leave_jitted_noexc
         self.on_leave_jitted_save_exc = on_leave_jitted_save_exc
+        self.on_leave_jitted_memoryerr = on_leave_jitted_memoryerr
 
     def get_on_leave_jitted_hook(self):
         return lambda : None
 
     _ON_JIT_LEAVE_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
 
-    def get_on_leave_jitted_int(self, save_exception):
-        if save_exception:
+    def get_on_leave_jitted_int(self, save_exception,
+                                default_to_memoryerror=False):
+        if default_to_memoryerror:
+            f = llhelper(self._ON_JIT_LEAVE_FUNC, self.on_leave_jitted_memoryerr)
+        elif save_exception:
             f = llhelper(self._ON_JIT_LEAVE_FUNC, self.on_leave_jitted_save_exc)
         else:
             f = llhelper(self._ON_JIT_LEAVE_FUNC, self.on_leave_jitted_noexc)
@@ -221,14 +246,14 @@
         return get_field_descr(self.gc_ll_descr, STRUCT, fieldname)
 
     def unpack_fielddescr(self, fielddescr):
-        assert isinstance(fielddescr, BaseFieldDescr)
+        assert isinstance(fielddescr, FieldDescr)
         return fielddescr.offset
     unpack_fielddescr._always_inline_ = True
 
     def unpack_fielddescr_size(self, fielddescr):
-        assert isinstance(fielddescr, BaseFieldDescr)
+        assert isinstance(fielddescr, FieldDescr)
         ofs = fielddescr.offset
-        size = fielddescr.get_field_size(self.translate_support_code)
+        size = fielddescr.field_size
         sign = fielddescr.is_field_signed()
         return ofs, size, sign
     unpack_fielddescr_size._always_inline_ = True
@@ -237,23 +262,23 @@
         return get_array_descr(self.gc_ll_descr, A)
 
     def interiorfielddescrof(self, A, fieldname):
-        return get_interiorfield_descr(self.gc_ll_descr, A, A.OF, fieldname)
+        return get_interiorfield_descr(self.gc_ll_descr, A, fieldname)
 
     def interiorfielddescrof_dynamic(self, offset, width, fieldsize,
-        is_pointer, is_float, is_signed):
-        arraydescr = DynamicArrayNoLengthDescr(width)
-        fielddescr = DynamicFieldDescr(offset, fieldsize, is_pointer, is_float, is_signed)
-        return InteriorFieldDescr(arraydescr, fielddescr)
+                                     is_pointer, is_float, is_signed):
+        return get_dynamic_interiorfield_descr(self.gc_ll_descr,
+                                               offset, width, fieldsize,
+                                               is_pointer, is_float, is_signed)
 
     def unpack_arraydescr(self, arraydescr):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        return arraydescr.get_base_size(self.translate_support_code)
+        assert isinstance(arraydescr, ArrayDescr)
+        return arraydescr.basesize
     unpack_arraydescr._always_inline_ = True
 
     def unpack_arraydescr_size(self, arraydescr):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        ofs = arraydescr.get_base_size(self.translate_support_code)
-        size = arraydescr.get_item_size(self.translate_support_code)
+        assert isinstance(arraydescr, ArrayDescr)
+        ofs = arraydescr.basesize
+        size = arraydescr.itemsize
         sign = arraydescr.is_item_signed()
         return ofs, size, sign
     unpack_arraydescr_size._always_inline_ = True
@@ -281,8 +306,8 @@
     # ____________________________________________________________
 
     def bh_arraylen_gc(self, arraydescr, array):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        ofs = arraydescr.get_ofs_length(self.translate_support_code)
+        assert isinstance(arraydescr, ArrayDescr)
+        ofs = arraydescr.lendescr.offset
         return rffi.cast(rffi.CArrayPtr(lltype.Signed), array)[ofs/WORD]
 
     @specialize.argtype(2)
@@ -367,7 +392,7 @@
         arraydescr = descr.arraydescr
         ofs, size, _ = self.unpack_arraydescr_size(arraydescr)
         ofs += descr.fielddescr.offset
-        fieldsize = descr.fielddescr.get_field_size(self.translate_support_code)
+        fieldsize = descr.fielddescr.field_size
         sign = descr.fielddescr.is_field_signed()
         fullofs = itemindex * size + ofs
         # --- start of GC unsafe code (no GC operation!) ---
@@ -418,7 +443,7 @@
         arraydescr = descr.arraydescr
         ofs, size, _ = self.unpack_arraydescr_size(arraydescr)
         ofs += descr.fielddescr.offset
-        fieldsize = descr.fielddescr.get_field_size(self.translate_support_code)
+        fieldsize = descr.fielddescr.field_size
         ofs = itemindex * size + ofs
         # --- start of GC unsafe code (no GC operation!) ---
         items = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
@@ -604,25 +629,26 @@
         rstr.copy_unicode_contents(src, dst, srcstart, dststart, length)
 
     def bh_call_i(self, func, calldescr, args_i, args_r, args_f):
-        assert isinstance(calldescr, BaseIntCallDescr)
+        assert isinstance(calldescr, CallDescr)
         if not we_are_translated():
             calldescr.verify_types(args_i, args_r, args_f, history.INT + 'S')
-        return calldescr.call_stub(func, args_i, args_r, args_f)
+        return calldescr.call_stub_i(func, args_i, args_r, args_f)
 
     def bh_call_r(self, func, calldescr, args_i, args_r, args_f):
-        assert isinstance(calldescr, GcPtrCallDescr)
+        assert isinstance(calldescr, CallDescr)
         if not we_are_translated():
             calldescr.verify_types(args_i, args_r, args_f, history.REF)
-        return calldescr.call_stub(func, args_i, args_r, args_f)
+        return calldescr.call_stub_r(func, args_i, args_r, args_f)
 
     def bh_call_f(self, func, calldescr, args_i, args_r, args_f):
-        assert isinstance(calldescr, FloatCallDescr)  # or LongLongCallDescr
+        assert isinstance(calldescr, CallDescr)
         if not we_are_translated():
             calldescr.verify_types(args_i, args_r, args_f, history.FLOAT + 'L')
-        return calldescr.call_stub(func, args_i, args_r, args_f)
+        return calldescr.call_stub_f(func, args_i, args_r, args_f)
 
     def bh_call_v(self, func, calldescr, args_i, args_r, args_f):
-        assert isinstance(calldescr, VoidCallDescr)
+        assert isinstance(calldescr, CallDescr)
         if not we_are_translated():
             calldescr.verify_types(args_i, args_r, args_f, history.VOID)
-        return calldescr.call_stub(func, args_i, args_r, args_f)
+        # the 'i' return value is ignored (and nonsense anyway)
+        calldescr.call_stub_i(func, args_i, args_r, args_f)
diff --git a/pypy/jit/backend/llsupport/rewrite.py b/pypy/jit/backend/llsupport/rewrite.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/llsupport/rewrite.py
@@ -0,0 +1,328 @@
+import sys
+from pypy.rlib.rarithmetic import ovfcheck
+from pypy.jit.metainterp.history import ConstInt, BoxPtr, ConstPtr
+from pypy.jit.metainterp.resoperation import ResOperation, rop
+from pypy.jit.codewriter import heaptracker
+from pypy.jit.backend.llsupport.symbolic import WORD
+from pypy.jit.backend.llsupport.descr import SizeDescr, ArrayDescr
+
+
+class GcRewriterAssembler(object):
+    # This class performs the following rewrites on the list of operations:
+    #
+    # - Remove the DEBUG_MERGE_POINTs.
+    #
+    # - Turn all NEW_xxx to either a CALL_MALLOC_GC, or a CALL_MALLOC_NURSERY
+    #   followed by SETFIELDs in order to initialize their GC fields.  The
+    #   two advantages of CALL_MALLOC_NURSERY is that it inlines the common
+    #   path, and we need only one such operation to allocate several blocks
+    #   of memory at once.
+    #
+    # - Add COND_CALLs to the write barrier before SETFIELD_GC and
+    #   SETARRAYITEM_GC operations.
+
+    _previous_size = -1
+    _op_malloc_nursery = None
+    _v_last_malloced_nursery = None
+    c_zero = ConstInt(0)
+
+    def __init__(self, gc_ll_descr, cpu):
+        self.gc_ll_descr = gc_ll_descr
+        self.cpu = cpu
+        self.newops = []
+        self.known_lengths = {}
+        self.recent_mallocs = {}     # set of variables
+
+    def rewrite(self, operations):
+        # we can only remember one malloc since the next malloc can possibly
+        # collect; but we can try to collapse several known-size mallocs into
+        # one, both for performance and to reduce the number of write
+        # barriers.  We do this on each "basic block" of operations, which in
+        # this case means between CALLs or unknown-size mallocs.
+        #
+        for op in operations:
+            if op.getopnum() == rop.DEBUG_MERGE_POINT:
+                continue
+            # ---------- turn NEWxxx into CALL_MALLOC_xxx ----------
+            if op.is_malloc():
+                self.handle_malloc_operation(op)
+                continue
+            elif op.can_malloc():
+                self.emitting_an_operation_that_can_collect()
+            elif op.getopnum() == rop.LABEL:
+                self.emitting_an_operation_that_can_collect()
+                self.known_lengths.clear()
+            # ---------- write barriers ----------
+            if self.gc_ll_descr.write_barrier_descr is not None:
+                if op.getopnum() == rop.SETFIELD_GC:
+                    self.handle_write_barrier_setfield(op)
+                    continue
+                if op.getopnum() == rop.SETINTERIORFIELD_GC:
+                    self.handle_write_barrier_setinteriorfield(op)
+                    continue
+                if op.getopnum() == rop.SETARRAYITEM_GC:
+                    self.handle_write_barrier_setarrayitem(op)
+                    continue
+            # ----------
+            self.newops.append(op)
+        return self.newops
+
+    # ----------
+
+    def handle_malloc_operation(self, op):
+        opnum = op.getopnum()
+        if opnum == rop.NEW:
+            self.handle_new_fixedsize(op.getdescr(), op)
+        elif opnum == rop.NEW_WITH_VTABLE:
+            classint = op.getarg(0).getint()
+            descr = heaptracker.vtable2descr(self.cpu, classint)
+            self.handle_new_fixedsize(descr, op)
+            if self.gc_ll_descr.fielddescr_vtable is not None:
+                op = ResOperation(rop.SETFIELD_GC,
+                                  [op.result, ConstInt(classint)], None,
+                                  descr=self.gc_ll_descr.fielddescr_vtable)
+                self.newops.append(op)
+        elif opnum == rop.NEW_ARRAY:
+            descr = op.getdescr()
+            assert isinstance(descr, ArrayDescr)
+            self.handle_new_array(descr, op)
+        elif opnum == rop.NEWSTR:
+            self.handle_new_array(self.gc_ll_descr.str_descr, op)
+        elif opnum == rop.NEWUNICODE:
+            self.handle_new_array(self.gc_ll_descr.unicode_descr, op)
+        else:
+            raise NotImplementedError(op.getopname())
+
+    def handle_new_fixedsize(self, descr, op):
+        assert isinstance(descr, SizeDescr)
+        size = descr.size
+        self.gen_malloc_nursery(size, op.result)
+        self.gen_initialize_tid(op.result, descr.tid)
+
+    def handle_new_array(self, arraydescr, op):
+        v_length = op.getarg(0)
+        total_size = -1
+        if isinstance(v_length, ConstInt):
+            num_elem = v_length.getint()
+            self.known_lengths[op.result] = num_elem
+            try:
+                var_size = ovfcheck(arraydescr.itemsize * num_elem)
+                total_size = ovfcheck(arraydescr.basesize + var_size)
+            except OverflowError:
+                pass    # total_size is still -1
+        elif arraydescr.itemsize == 0:
+            total_size = arraydescr.basesize
+        if 0 <= total_size <= 0xffffff:     # up to 16MB, arbitrarily
+            self.gen_malloc_nursery(total_size, op.result)
+            self.gen_initialize_tid(op.result, arraydescr.tid)
+            self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
+        elif self.gc_ll_descr.kind == 'boehm':
+            self.gen_boehm_malloc_array(arraydescr, v_length, op.result)
+        else:
+            opnum = op.getopnum()
+            if opnum == rop.NEW_ARRAY:
+                self.gen_malloc_array(arraydescr, v_length, op.result)
+            elif opnum == rop.NEWSTR:
+                self.gen_malloc_str(v_length, op.result)
+            elif opnum == rop.NEWUNICODE:
+                self.gen_malloc_unicode(v_length, op.result)
+            else:
+                raise NotImplementedError(op.getopname())
+
+    # ----------
+
+    def emitting_an_operation_that_can_collect(self):
+        # must be called whenever we emit an operation that can collect:
+        # forgets the previous MALLOC_NURSERY, if any; and empty the
+        # set 'recent_mallocs', so that future SETFIELDs will generate
+        # a write barrier as usual.
+        self._op_malloc_nursery = None
+        self.recent_mallocs.clear()
+
+    def _gen_call_malloc_gc(self, args, v_result, descr):
+        """Generate a CALL_MALLOC_GC with the given args."""
+        self.emitting_an_operation_that_can_collect()
+        op = ResOperation(rop.CALL_MALLOC_GC, args, v_result, descr)
+        self.newops.append(op)
+        # mark 'v_result' as freshly malloced
+        self.recent_mallocs[v_result] = None
+
+    def gen_malloc_fixedsize(self, size, v_result):
+        """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, Const(size)).
+        Note that with the framework GC, this should be called very rarely.
+        """
+        addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_fixedsize')
+        self._gen_call_malloc_gc([ConstInt(addr), ConstInt(size)], v_result,
+                                 self.gc_ll_descr.malloc_fixedsize_descr)
+
+    def gen_boehm_malloc_array(self, arraydescr, v_num_elem, v_result):
+        """Generate a CALL_MALLOC_GC(malloc_array_fn, ...) for Boehm."""
+        addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_array')
+        self._gen_call_malloc_gc([ConstInt(addr),
+                                  ConstInt(arraydescr.basesize),
+                                  v_num_elem,
+                                  ConstInt(arraydescr.itemsize),
+                                  ConstInt(arraydescr.lendescr.offset)],
+                                 v_result,
+                                 self.gc_ll_descr.malloc_array_descr)
+
+    def gen_malloc_array(self, arraydescr, v_num_elem, v_result):
+        """Generate a CALL_MALLOC_GC(malloc_array_fn, ...) going either
+        to the standard or the nonstandard version of the function."""
+        #
+        if (arraydescr.basesize == self.gc_ll_descr.standard_array_basesize
+            and arraydescr.lendescr.offset ==
+                self.gc_ll_descr.standard_array_length_ofs):
+            # this is a standard-looking array, common case
+            addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_array')
+            args = [ConstInt(addr),
+                    ConstInt(arraydescr.itemsize),
+                    ConstInt(arraydescr.tid),
+                    v_num_elem]
+            calldescr = self.gc_ll_descr.malloc_array_descr
+        else:
+            # rare case, so don't care too much about the number of arguments
+            addr = self.gc_ll_descr.get_malloc_fn_addr(
+                                              'malloc_array_nonstandard')
+            args = [ConstInt(addr),
+                    ConstInt(arraydescr.basesize),
+                    ConstInt(arraydescr.itemsize),
+                    ConstInt(arraydescr.lendescr.offset),
+                    ConstInt(arraydescr.tid),
+                    v_num_elem]
+            calldescr = self.gc_ll_descr.malloc_array_nonstandard_descr
+        self._gen_call_malloc_gc(args, v_result, calldescr)
+
+    def gen_malloc_str(self, v_num_elem, v_result):
+        """Generate a CALL_MALLOC_GC(malloc_str_fn, ...)."""
+        addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_str')
+        self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
+                                 self.gc_ll_descr.malloc_str_descr)
+
+    def gen_malloc_unicode(self, v_num_elem, v_result):
+        """Generate a CALL_MALLOC_GC(malloc_unicode_fn, ...)."""
+        addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
+        self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
+                                 self.gc_ll_descr.malloc_unicode_descr)
+
+    def gen_malloc_nursery(self, size, v_result):
+        """Try to generate or update a CALL_MALLOC_NURSERY.
+        If that fails, generate a plain CALL_MALLOC_GC instead.
+        """
+        size = self.round_up_for_allocation(size)
+        if not self.gc_ll_descr.can_use_nursery_malloc(size):
+            self.gen_malloc_fixedsize(size, v_result)
+            return
+        #
+        op = None
+        if self._op_malloc_nursery is not None:
+            # already a MALLOC_NURSERY: increment its total size
+            total_size = self._op_malloc_nursery.getarg(0).getint()
+            total_size += size
+            if self.gc_ll_descr.can_use_nursery_malloc(total_size):
+                # if the total size is still reasonable, merge it
+                self._op_malloc_nursery.setarg(0, ConstInt(total_size))
+                op = ResOperation(rop.INT_ADD,
+                                  [self._v_last_malloced_nursery,
+                                   ConstInt(self._previous_size)],
+                                  v_result)
+        if op is None:
+            # if we failed to merge with a previous MALLOC_NURSERY, emit one
+            self.emitting_an_operation_that_can_collect()
+            op = ResOperation(rop.CALL_MALLOC_NURSERY,
+                              [ConstInt(size)],
+                              v_result)
+            self._op_malloc_nursery = op
+        #
+        self.newops.append(op)
+        self._previous_size = size
+        self._v_last_malloced_nursery = v_result
+        self.recent_mallocs[v_result] = None
+
+    def gen_initialize_tid(self, v_newgcobj, tid):
+        if self.gc_ll_descr.fielddescr_tid is not None:
+            # produce a SETFIELD to initialize the GC header
+            op = ResOperation(rop.SETFIELD_GC,
+                              [v_newgcobj, ConstInt(tid)], None,
+                              descr=self.gc_ll_descr.fielddescr_tid)
+            self.newops.append(op)
+
+    def gen_initialize_len(self, v_newgcobj, v_length, arraylen_descr):
+        # produce a SETFIELD to initialize the array length
+        op = ResOperation(rop.SETFIELD_GC,
+                          [v_newgcobj, v_length], None,
+                          descr=arraylen_descr)
+        self.newops.append(op)
+
+    # ----------
+
+    def handle_write_barrier_setfield(self, op):
+        val = op.getarg(0)
+        # no need for a write barrier in the case of previous malloc
+        if val not in self.recent_mallocs:
+            v = op.getarg(1)
+            if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
+                                         bool(v.value)): # store a non-NULL
+                self.gen_write_barrier(op.getarg(0), v)
+                op = op.copy_and_change(rop.SETFIELD_RAW)
+        self.newops.append(op)
+
+    def handle_write_barrier_setinteriorfield(self, op):
+        val = op.getarg(0)
+        # no need for a write barrier in the case of previous malloc
+        if val not in self.recent_mallocs:
+            v = op.getarg(2)
+            if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
+                                         bool(v.value)): # store a non-NULL
+                self.gen_write_barrier(op.getarg(0), v)
+                op = op.copy_and_change(rop.SETINTERIORFIELD_RAW)
+        self.newops.append(op)
+
+    def handle_write_barrier_setarrayitem(self, op):
+        val = op.getarg(0)
+        # no need for a write barrier in the case of previous malloc
+        if val not in self.recent_mallocs:
+            v = op.getarg(2)
+            if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
+                                         bool(v.value)): # store a non-NULL
+                self.gen_write_barrier_array(op.getarg(0),
+                                             op.getarg(1), v)
+                op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+        self.newops.append(op)
+
+    def gen_write_barrier(self, v_base, v_value):
+        write_barrier_descr = self.gc_ll_descr.write_barrier_descr
+        args = [v_base, v_value]
+        self.newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
+                                        descr=write_barrier_descr))
+
+    def gen_write_barrier_array(self, v_base, v_index, v_value):
+        write_barrier_descr = self.gc_ll_descr.write_barrier_descr
+        if write_barrier_descr.has_write_barrier_from_array(self.cpu):
+            # If we know statically the length of 'v', and it is not too
+            # big, then produce a regular write_barrier.  If it's unknown or
+            # too big, produce instead a write_barrier_from_array.
+            LARGE = 130
+            length = self.known_lengths.get(v_base, LARGE)
+            if length >= LARGE:
+                # unknown or too big: produce a write_barrier_from_array
+                args = [v_base, v_index, v_value]
+                self.newops.append(
+                    ResOperation(rop.COND_CALL_GC_WB_ARRAY, args, None,
+                                 descr=write_barrier_descr))
+                return
+        # fall-back case: produce a write_barrier
+        self.gen_write_barrier(v_base, v_value)
+
+    def round_up_for_allocation(self, size):
+        if not self.gc_ll_descr.round_up:
+            return size
+        if self.gc_ll_descr.translate_support_code:
+            from pypy.rpython.lltypesystem import llarena
+            return llarena.round_up_for_allocation(
+                size, self.gc_ll_descr.minimal_size_in_nursery)
+        else:
+            # non-translated: do it manually
+            # assume that "self.gc_ll_descr.minimal_size_in_nursery" is 2 WORDs
+            size = max(size, 2 * WORD)
+            return (size + WORD-1) & ~(WORD-1)     # round up
diff --git a/pypy/jit/backend/llsupport/test/test_descr.py b/pypy/jit/backend/llsupport/test/test_descr.py
--- a/pypy/jit/backend/llsupport/test/test_descr.py
+++ b/pypy/jit/backend/llsupport/test/test_descr.py
@@ -1,4 +1,4 @@
-from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rpython.lltypesystem import lltype, rffi, rstr
 from pypy.jit.backend.llsupport.descr import *
 from pypy.jit.backend.llsupport import symbolic
 from pypy.rlib.objectmodel import Symbolic
@@ -53,18 +53,6 @@
                              ('z', lltype.Ptr(U)),
                              ('f', lltype.Float),
                              ('s', lltype.SingleFloat))
-    assert getFieldDescrClass(lltype.Ptr(T)) is GcPtrFieldDescr
-    assert getFieldDescrClass(lltype.Ptr(U)) is NonGcPtrFieldDescr
-    cls = getFieldDescrClass(lltype.Char)
-    assert cls != getFieldDescrClass(lltype.Signed)
-    assert cls == getFieldDescrClass(lltype.Char)
-    clsf = getFieldDescrClass(lltype.Float)
-    assert clsf != cls
-    assert clsf == getFieldDescrClass(lltype.Float)
-    clss = getFieldDescrClass(lltype.SingleFloat)
-    assert clss not in (cls, clsf)
-    assert clss == getFieldDescrClass(lltype.SingleFloat)
-    assert clss == getFieldDescrClass(rffi.UINT)    # for now
     #
     c0 = GcCache(False)
     c1 = GcCache(True)
@@ -77,11 +65,7 @@
         descr_z = get_field_descr(c2, S, 'z')
         descr_f = get_field_descr(c2, S, 'f')
         descr_s = get_field_descr(c2, S, 's')
-        assert descr_x.__class__ is cls
-        assert descr_y.__class__ is GcPtrFieldDescr
-        assert descr_z.__class__ is NonGcPtrFieldDescr
-        assert descr_f.__class__ is clsf
-        assert descr_s.__class__ is clss
+        assert isinstance(descr_x, FieldDescr)
         assert descr_x.name == 'S.x'
         assert descr_y.name == 'S.y'
         assert descr_z.name == 'S.z'
@@ -90,33 +74,27 @@
         if not tsc:
             assert descr_x.offset < descr_y.offset < descr_z.offset
             assert descr_x.sort_key() < descr_y.sort_key() < descr_z.sort_key()
-            assert descr_x.get_field_size(False) == rffi.sizeof(lltype.Char)
-            assert descr_y.get_field_size(False) == rffi.sizeof(lltype.Ptr(T))
-            assert descr_z.get_field_size(False) == rffi.sizeof(lltype.Ptr(U))
-            assert descr_f.get_field_size(False) == rffi.sizeof(lltype.Float)
-            assert descr_s.get_field_size(False) == rffi.sizeof(
-                                                            lltype.SingleFloat)
+            assert descr_x.field_size == rffi.sizeof(lltype.Char)
+            assert descr_y.field_size == rffi.sizeof(lltype.Ptr(T))
+            assert descr_z.field_size == rffi.sizeof(lltype.Ptr(U))
+            assert descr_f.field_size == rffi.sizeof(lltype.Float)
+            assert descr_s.field_size == rffi.sizeof(lltype.SingleFloat)
         else:
             assert isinstance(descr_x.offset, Symbolic)
             assert isinstance(descr_y.offset, Symbolic)
             assert isinstance(descr_z.offset, Symbolic)
             assert isinstance(descr_f.offset, Symbolic)
             assert isinstance(descr_s.offset, Symbolic)
-            assert isinstance(descr_x.get_field_size(True), Symbolic)
-            assert isinstance(descr_y.get_field_size(True), Symbolic)
-            assert isinstance(descr_z.get_field_size(True), Symbolic)
-            assert isinstance(descr_f.get_field_size(True), Symbolic)
-            assert isinstance(descr_s.get_field_size(True), Symbolic)
-        assert not descr_x.is_pointer_field()
-        assert     descr_y.is_pointer_field()
-        assert not descr_z.is_pointer_field()
-        assert not descr_f.is_pointer_field()
-        assert not descr_s.is_pointer_field()
-        assert not descr_x.is_float_field()
-        assert not descr_y.is_float_field()
-        assert not descr_z.is_float_field()
-        assert     descr_f.is_float_field()
-        assert not descr_s.is_float_field()
+            assert isinstance(descr_x.field_size, Symbolic)
+            assert isinstance(descr_y.field_size, Symbolic)
+            assert isinstance(descr_z.field_size, Symbolic)
+            assert isinstance(descr_f.field_size, Symbolic)
+            assert isinstance(descr_s.field_size, Symbolic)
+        assert descr_x.flag == FLAG_UNSIGNED
+        assert descr_y.flag == FLAG_POINTER
+        assert descr_z.flag == FLAG_UNSIGNED
+        assert descr_f.flag == FLAG_FLOAT
+        assert descr_s.flag == FLAG_UNSIGNED
 
 
 def test_get_field_descr_sign():
@@ -128,7 +106,8 @@
         for tsc in [False, True]:
             c2 = GcCache(tsc)
             descr_x = get_field_descr(c2, S, 'x')
-            assert descr_x.is_field_signed() == signed
+            assert descr_x.flag == {False: FLAG_UNSIGNED,
+                                    True:  FLAG_SIGNED  }[signed]
 
 def test_get_field_descr_longlong():
     if sys.maxint > 2147483647:
@@ -136,9 +115,8 @@
     c0 = GcCache(False)
     S = lltype.GcStruct('S', ('y', lltype.UnsignedLongLong))
     descr = get_field_descr(c0, S, 'y')
-    assert not descr.is_pointer_field()
-    assert descr.is_float_field()
-    assert descr.get_field_size(False) == 8
+    assert descr.flag == FLAG_FLOAT
+    assert descr.field_size == 8
 
 
 def test_get_array_descr():
@@ -149,19 +127,8 @@
     A3 = lltype.GcArray(lltype.Ptr(U))
     A4 = lltype.GcArray(lltype.Float)
     A5 = lltype.GcArray(lltype.Struct('x', ('v', lltype.Signed),
-                                      ('k', lltype.Signed)))
+                                           ('k', lltype.Signed)))
     A6 = lltype.GcArray(lltype.SingleFloat)
-    assert getArrayDescrClass(A2) is GcPtrArrayDescr
-    assert getArrayDescrClass(A3) is NonGcPtrArrayDescr
-    cls = getArrayDescrClass(A1)
-    assert cls != getArrayDescrClass(lltype.GcArray(lltype.Signed))
-    assert cls == getArrayDescrClass(lltype.GcArray(lltype.Char))
-    clsf = getArrayDescrClass(A4)
-    assert clsf != cls
-    assert clsf == getArrayDescrClass(lltype.GcArray(lltype.Float))
-    clss = getArrayDescrClass(A6)
-    assert clss not in (clsf, cls)
-    assert clss == getArrayDescrClass(lltype.GcArray(rffi.UINT))
     #
     c0 = GcCache(False)
     descr1 = get_array_descr(c0, A1)
@@ -170,82 +137,61 @@
     descr4 = get_array_descr(c0, A4)
     descr5 = get_array_descr(c0, A5)
     descr6 = get_array_descr(c0, A6)
-    assert descr1.__class__ is cls
-    assert descr2.__class__ is GcPtrArrayDescr
-    assert descr3.__class__ is NonGcPtrArrayDescr
-    assert descr4.__class__ is clsf
-    assert descr6.__class__ is clss
+    assert isinstance(descr1, ArrayDescr)
     assert descr1 == get_array_descr(c0, lltype.GcArray(lltype.Char))
-    assert not descr1.is_array_of_pointers()
-    assert     descr2.is_array_of_pointers()
-    assert not descr3.is_array_of_pointers()
-    assert not descr4.is_array_of_pointers()
-    assert not descr5.is_array_of_pointers()
-    assert not descr1.is_array_of_floats()
-    assert not descr2.is_array_of_floats()
-    assert not descr3.is_array_of_floats()
-    assert     descr4.is_array_of_floats()
-    assert not descr5.is_array_of_floats()
+    assert descr1.flag == FLAG_UNSIGNED
+    assert descr2.flag == FLAG_POINTER
+    assert descr3.flag == FLAG_UNSIGNED
+    assert descr4.flag == FLAG_FLOAT
+    assert descr5.flag == FLAG_STRUCT
+    assert descr6.flag == FLAG_UNSIGNED
     #
     def get_alignment(code):
         # Retrieve default alignment for the compiler/platform
         return struct.calcsize('l' + code) - struct.calcsize(code)
-    assert descr1.get_base_size(False) == get_alignment('c')
-    assert descr2.get_base_size(False) == get_alignment('p')
-    assert descr3.get_base_size(False) == get_alignment('p')
-    assert descr4.get_base_size(False) == get_alignment('d')
-    assert descr5.get_base_size(False) == get_alignment('f')
-    assert descr1.get_ofs_length(False) == 0
-    assert descr2.get_ofs_length(False) == 0
-    assert descr3.get_ofs_length(False) == 0
-    assert descr4.get_ofs_length(False) == 0
-    assert descr5.get_ofs_length(False) == 0
-    assert descr1.get_item_size(False) == rffi.sizeof(lltype.Char)
-    assert descr2.get_item_size(False) == rffi.sizeof(lltype.Ptr(T))
-    assert descr3.get_item_size(False) == rffi.sizeof(lltype.Ptr(U))
-    assert descr4.get_item_size(False) == rffi.sizeof(lltype.Float)
-    assert descr5.get_item_size(False) == rffi.sizeof(lltype.Signed) * 2
-    assert descr6.get_item_size(False) == rffi.sizeof(lltype.SingleFloat)
+    assert descr1.basesize == get_alignment('c')
+    assert descr2.basesize == get_alignment('p')
+    assert descr3.basesize == get_alignment('p')
+    assert descr4.basesize == get_alignment('d')
+    assert descr5.basesize == get_alignment('f')
+    assert descr1.lendescr.offset == 0
+    assert descr2.lendescr.offset == 0
+    assert descr3.lendescr.offset == 0
+    assert descr4.lendescr.offset == 0
+    assert descr5.lendescr.offset == 0
+    assert descr1.itemsize == rffi.sizeof(lltype.Char)
+    assert descr2.itemsize == rffi.sizeof(lltype.Ptr(T))
+    assert descr3.itemsize == rffi.sizeof(lltype.Ptr(U))
+    assert descr4.itemsize == rffi.sizeof(lltype.Float)
+    assert descr5.itemsize == rffi.sizeof(lltype.Signed) * 2
+    assert descr6.itemsize == rffi.sizeof(lltype.SingleFloat)
     #
-    assert isinstance(descr1.get_base_size(True), Symbolic)
-    assert isinstance(descr2.get_base_size(True), Symbolic)
-    assert isinstance(descr3.get_base_size(True), Symbolic)
-    assert isinstance(descr4.get_base_size(True), Symbolic)
-    assert isinstance(descr5.get_base_size(True), Symbolic)
-    assert isinstance(descr1.get_ofs_length(True), Symbolic)
-    assert isinstance(descr2.get_ofs_length(True), Symbolic)
-    assert isinstance(descr3.get_ofs_length(True), Symbolic)
-    assert isinstance(descr4.get_ofs_length(True), Symbolic)
-    assert isinstance(descr5.get_ofs_length(True), Symbolic)
-    assert isinstance(descr1.get_item_size(True), Symbolic)
-    assert isinstance(descr2.get_item_size(True), Symbolic)
-    assert isinstance(descr3.get_item_size(True), Symbolic)
-    assert isinstance(descr4.get_item_size(True), Symbolic)
-    assert isinstance(descr5.get_item_size(True), Symbolic)
     CA = rffi.CArray(lltype.Signed)
     descr = get_array_descr(c0, CA)
-    assert not descr.is_array_of_floats()
-    assert descr.get_base_size(False) == 0
-    assert descr.get_ofs_length(False) == -1
+    assert descr.flag == FLAG_SIGNED
+    assert descr.basesize == 0
+    assert descr.lendescr is None
     CA = rffi.CArray(lltype.Ptr(lltype.GcStruct('S')))
     descr = get_array_descr(c0, CA)
-    assert descr.is_array_of_pointers()
-    assert descr.get_base_size(False) == 0
-    assert descr.get_ofs_length(False) == -1
+    assert descr.flag == FLAG_POINTER
+    assert descr.basesize == 0
+    assert descr.lendescr is None
     CA = rffi.CArray(lltype.Ptr(lltype.Struct('S')))
     descr = get_array_descr(c0, CA)
-    assert descr.get_base_size(False) == 0
-    assert descr.get_ofs_length(False) == -1
+    assert descr.flag == FLAG_UNSIGNED
+    assert descr.basesize == 0
+    assert descr.lendescr is None
     CA = rffi.CArray(lltype.Float)
     descr = get_array_descr(c0, CA)
-    assert descr.is_array_of_floats()
-    assert descr.get_base_size(False) == 0
-    assert descr.get_ofs_length(False) == -1
+    assert descr.flag == FLAG_FLOAT
+    assert descr.basesize == 0
+    assert descr.lendescr is None
     CA = rffi.CArray(rffi.FLOAT)
     descr = get_array_descr(c0, CA)
-    assert not descr.is_array_of_floats()
-    assert descr.get_base_size(False) == 0
-    assert descr.get_ofs_length(False) == -1
+    assert descr.flag == FLAG_UNSIGNED
+    assert descr.basesize == 0
+    assert descr.itemsize == rffi.sizeof(lltype.SingleFloat)
+    assert descr.lendescr is None
 
 
 def test_get_array_descr_sign():
@@ -257,46 +203,55 @@
         for tsc in [False, True]:
             c2 = GcCache(tsc)
             arraydescr = get_array_descr(c2, A)
-            assert arraydescr.is_item_signed() == signed
+            assert arraydescr.flag == {False: FLAG_UNSIGNED,
+                                       True:  FLAG_SIGNED  }[signed]
         #
         RA = rffi.CArray(RESTYPE)
         for tsc in [False, True]:
             c2 = GcCache(tsc)
             arraydescr = get_array_descr(c2, RA)
-            assert arraydescr.is_item_signed() == signed
+            assert arraydescr.flag == {False: FLAG_UNSIGNED,
+                                       True:  FLAG_SIGNED  }[signed]
+
+
+def test_get_array_descr_str():
+    c0 = GcCache(False)
+    descr1 = get_array_descr(c0, rstr.STR)
+    assert descr1.itemsize == rffi.sizeof(lltype.Char)
+    assert descr1.flag == FLAG_UNSIGNED
 
 
 def test_get_call_descr_not_translated():
     c0 = GcCache(False)
     descr1 = get_call_descr(c0, [lltype.Char, lltype.Signed], lltype.Char)
-    assert descr1.get_result_size(False) == rffi.sizeof(lltype.Char)
-    assert descr1.get_return_type() == history.INT
+    assert descr1.get_result_size() == rffi.sizeof(lltype.Char)
+    assert descr1.get_result_type() == history.INT
     assert descr1.arg_classes == "ii"
     #
     T = lltype.GcStruct('T')
     descr2 = get_call_descr(c0, [lltype.Ptr(T)], lltype.Ptr(T))
-    assert descr2.get_result_size(False) == rffi.sizeof(lltype.Ptr(T))
-    assert descr2.get_return_type() == history.REF
+    assert descr2.get_result_size() == rffi.sizeof(lltype.Ptr(T))
+    assert descr2.get_result_type() == history.REF
     assert descr2.arg_classes == "r"
     #
     U = lltype.GcStruct('U', ('x', lltype.Signed))
     assert descr2 == get_call_descr(c0, [lltype.Ptr(U)], lltype.Ptr(U))
     #
     V = lltype.Struct('V', ('x', lltype.Signed))
-    assert (get_call_descr(c0, [], lltype.Ptr(V)).get_return_type() ==
+    assert (get_call_descr(c0, [], lltype.Ptr(V)).get_result_type() ==
             history.INT)
     #
-    assert (get_call_descr(c0, [], lltype.Void).get_return_type() ==
+    assert (get_call_descr(c0, [], lltype.Void).get_result_type() ==
             history.VOID)
     #
     descr4 = get_call_descr(c0, [lltype.Float, lltype.Float], lltype.Float)
-    assert descr4.get_result_size(False) == rffi.sizeof(lltype.Float)
-    assert descr4.get_return_type() == history.FLOAT
+    assert descr4.get_result_size() == rffi.sizeof(lltype.Float)
+    assert descr4.get_result_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
     #
     descr5 = get_call_descr(c0, [lltype.SingleFloat], lltype.SingleFloat)
-    assert descr5.get_result_size(False) == rffi.sizeof(lltype.SingleFloat)
-    assert descr5.get_return_type() == "S"
+    assert descr5.get_result_size() == rffi.sizeof(lltype.SingleFloat)
+    assert descr5.get_result_type() == "S"
     assert descr5.arg_classes == "S"
 
 def test_get_call_descr_not_translated_longlong():
@@ -305,13 +260,13 @@
     c0 = GcCache(False)
     #
     descr5 = get_call_descr(c0, [lltype.SignedLongLong], lltype.Signed)
-    assert descr5.get_result_size(False) == 4
-    assert descr5.get_return_type() == history.INT
+    assert descr5.get_result_size() == 4
+    assert descr5.get_result_type() == history.INT
     assert descr5.arg_classes == "L"
     #
     descr6 = get_call_descr(c0, [lltype.Signed], lltype.SignedLongLong)
-    assert descr6.get_result_size(False) == 8
-    assert descr6.get_return_type() == "L"
+    assert descr6.get_result_size() == 8
+    assert descr6.get_result_type() == "L"
     assert descr6.arg_classes == "i"
 
 def test_get_call_descr_translated():
@@ -319,18 +274,18 @@
     T = lltype.GcStruct('T')
     U = lltype.GcStruct('U', ('x', lltype.Signed))
     descr3 = get_call_descr(c1, [lltype.Ptr(T)], lltype.Ptr(U))
-    assert isinstance(descr3.get_result_size(True), Symbolic)
-    assert descr3.get_return_type() == history.REF
+    assert isinstance(descr3.get_result_size(), Symbolic)
+    assert descr3.get_result_type() == history.REF
     assert descr3.arg_classes == "r"
     #
     descr4 = get_call_descr(c1, [lltype.Float, lltype.Float], lltype.Float)
-    assert isinstance(descr4.get_result_size(True), Symbolic)
-    assert descr4.get_return_type() == history.FLOAT
+    assert isinstance(descr4.get_result_size(), Symbolic)
+    assert descr4.get_result_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
     #
     descr5 = get_call_descr(c1, [lltype.SingleFloat], lltype.SingleFloat)
-    assert isinstance(descr5.get_result_size(True), Symbolic)
-    assert descr5.get_return_type() == "S"
+    assert isinstance(descr5.get_result_size(), Symbolic)
+    assert descr5.get_result_type() == "S"
     assert descr5.arg_classes == "S"
 
 def test_call_descr_extra_info():
@@ -358,6 +313,10 @@
 
 
 def test_repr_of_descr():
+    def repr_of_descr(descr):
+        s = descr.repr_of_descr()
+        assert ',' not in s  # makes the life easier for pypy.tool.jitlogparser
+        return s
     c0 = GcCache(False)
     T = lltype.GcStruct('T')
     S = lltype.GcStruct('S', ('x', lltype.Char),
@@ -365,33 +324,34 @@
                              ('z', lltype.Ptr(T)))
     descr1 = get_size_descr(c0, S)
     s = symbolic.get_size(S, False)
-    assert descr1.repr_of_descr() == '<SizeDescr %d>' % s
+    assert repr_of_descr(descr1) == '<SizeDescr %d>' % s
     #
     descr2 = get_field_descr(c0, S, 'y')
     o, _ = symbolic.get_field_token(S, 'y', False)
-    assert descr2.repr_of_descr() == '<GcPtrFieldDescr S.y %d>' % o
+    assert repr_of_descr(descr2) == '<FieldP S.y %d>' % o
     #
     descr2i = get_field_descr(c0, S, 'x')
     o, _ = symbolic.get_field_token(S, 'x', False)
-    assert descr2i.repr_of_descr() == '<CharFieldDescr S.x %d>' % o
+    assert repr_of_descr(descr2i) == '<FieldU S.x %d>' % o
     #
     descr3 = get_array_descr(c0, lltype.GcArray(lltype.Ptr(S)))
-    assert descr3.repr_of_descr() == '<GcPtrArrayDescr>'
+    o = symbolic.get_size(lltype.Ptr(S), False)
+    assert repr_of_descr(descr3) == '<ArrayP %d>' % o
     #
     descr3i = get_array_descr(c0, lltype.GcArray(lltype.Char))
-    assert descr3i.repr_of_descr() == '<CharArrayDescr>'
+    assert repr_of_descr(descr3i) == '<ArrayU 1>'
     #
     descr4 = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Ptr(S))
-    assert 'GcPtrCallDescr' in descr4.repr_of_descr()
+    assert repr_of_descr(descr4) == '<Callr %d ir>' % o
     #
     descr4i = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Char)
-    assert 'CharCallDescr' in descr4i.repr_of_descr()
+    assert repr_of_descr(descr4i) == '<Calli 1 ir>'
     #
     descr4f = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Float)
-    assert 'FloatCallDescr' in descr4f.repr_of_descr()
+    assert repr_of_descr(descr4f) == '<Callf 8 ir>'
     #
     descr5f = get_call_descr(c0, [lltype.Char], lltype.SingleFloat)
-    assert 'SingleFloatCallDescr' in descr5f.repr_of_descr()
+    assert repr_of_descr(descr5f) == '<CallS 4 i>'
 
 def test_call_stubs_1():
     c0 = GcCache(False)
@@ -401,10 +361,10 @@
     def f(a, b):
         return 'c'
 
-    call_stub = descr1.call_stub
     fnptr = llhelper(lltype.Ptr(lltype.FuncType(ARGS, RES)), f)
 
-    res = call_stub(rffi.cast(lltype.Signed, fnptr), [1, 2], None, None)
+    res = descr1.call_stub_i(rffi.cast(lltype.Signed, fnptr),
+                             [1, 2], None, None)
     assert res == ord('c')
 
 def test_call_stubs_2():
@@ -421,8 +381,8 @@
     a = lltype.malloc(ARRAY, 3)
     opaquea = lltype.cast_opaque_ptr(llmemory.GCREF, a)
     a[0] = 1
-    res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
-                           [], [opaquea], [longlong.getfloatstorage(3.5)])
+    res = descr2.call_stub_f(rffi.cast(lltype.Signed, fnptr),
+                             [], [opaquea], [longlong.getfloatstorage(3.5)])
     assert longlong.getrealfloat(res) == 4.5
 
 def test_call_stubs_single_float():
@@ -445,6 +405,22 @@
     a = intmask(singlefloat2uint(r_singlefloat(-10.0)))
     b = intmask(singlefloat2uint(r_singlefloat(3.0)))
     c = intmask(singlefloat2uint(r_singlefloat(2.0)))
-    res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
-                           [a, b, c], [], [])
+    res = descr2.call_stub_i(rffi.cast(lltype.Signed, fnptr),
+                             [a, b, c], [], [])
     assert float(uint2singlefloat(rffi.r_uint(res))) == -11.5
+
+def test_field_arraylen_descr():
+    c0 = GcCache(True)
+    A1 = lltype.GcArray(lltype.Signed)
+    fielddescr = get_field_arraylen_descr(c0, A1)
+    assert isinstance(fielddescr, FieldDescr)
+    ofs = fielddescr.offset
+    assert repr(ofs) == '< ArrayLengthOffset <GcArray of Signed > >'
+    #
+    fielddescr = get_field_arraylen_descr(c0, rstr.STR)
+    ofs = fielddescr.offset
+    assert repr(ofs) == ("< <FieldOffset <GcStruct rpy_string { hash, chars }>"
+                         " 'chars'> + < ArrayLengthOffset"
+                         " <Array of Char > > >")
+    # caching:
+    assert fielddescr is get_field_arraylen_descr(c0, rstr.STR)
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -1,5 +1,6 @@
 from pypy.rlib.libffi import types
 from pypy.jit.codewriter.longlong import is_64_bit
+from pypy.jit.backend.llsupport.descr import *
 from pypy.jit.backend.llsupport.ffisupport import *
 
 
@@ -15,7 +16,9 @@
     args = [types.sint, types.pointer]
     descr = get_call_descr_dynamic(FakeCPU(), args, types.sint, None,
                                    ffi_flags=42)
-    assert isinstance(descr, DynamicIntCallDescr)
+    assert isinstance(descr, CallDescr)
+    assert descr.result_type == 'i'
+    assert descr.result_flag == FLAG_SIGNED
     assert descr.arg_classes == 'ii'
     assert descr.get_ffi_flags() == 42
 
@@ -24,18 +27,20 @@
     assert descr is None    # missing floats
     descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
                                    args, types.void, None, ffi_flags=43)
-    assert isinstance(descr, VoidCallDescr)
+    assert descr.result_type == 'v'
+    assert descr.result_flag == FLAG_VOID
     assert descr.arg_classes == 'ifi'
     assert descr.get_ffi_flags() == 43
 
     descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8, None, 42)
-    assert isinstance(descr, DynamicIntCallDescr)
-    assert descr.get_result_size(False) == 1
+    assert descr.get_result_size() == 1
+    assert descr.result_flag == FLAG_SIGNED
     assert descr.is_result_signed() == True
 
     descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8, None, 42)
-    assert isinstance(descr, DynamicIntCallDescr)
-    assert descr.get_result_size(False) == 1
+    assert isinstance(descr, CallDescr)
+    assert descr.get_result_size() == 1
+    assert descr.result_flag == FLAG_UNSIGNED
     assert descr.is_result_signed() == False
 
     if not is_64_bit:
@@ -44,7 +49,9 @@
         assert descr is None   # missing longlongs
         descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
                                        [], types.slonglong, None, ffi_flags=43)
-        assert isinstance(descr, LongLongCallDescr)
+        assert isinstance(descr, CallDescr)
+        assert descr.result_flag == FLAG_FLOAT
+        assert descr.result_type == 'L'
         assert descr.get_ffi_flags() == 43
     else:
         assert types.slonglong is types.slong
@@ -53,6 +60,6 @@
     assert descr is None   # missing singlefloats
     descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
                                    [], types.float, None, ffi_flags=44)
-    SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
-    assert isinstance(descr, SingleFloatCallDescr)
+    assert descr.result_flag == FLAG_UNSIGNED
+    assert descr.result_type == 'S'
     assert descr.get_ffi_flags() == 44
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -6,6 +6,7 @@
 from pypy.jit.backend.llsupport.gc import *
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.metainterp.gc import get_description
+from pypy.jit.metainterp.history import BoxPtr, BoxInt, ConstPtr
 from pypy.jit.metainterp.resoperation import get_deep_immutable_oplist
 from pypy.jit.tool.oparser import parse
 from pypy.rpython.lltypesystem.rclass import OBJECT, OBJECT_VTABLE
@@ -15,12 +16,12 @@
     gc_ll_descr = GcLLDescr_boehm(None, None, None)
     #
     record = []
-    prev_funcptr_for_new = gc_ll_descr.funcptr_for_new
-    def my_funcptr_for_new(size):
-        p = prev_funcptr_for_new(size)
+    prev_malloc_fn_ptr = gc_ll_descr.malloc_fn_ptr
+    def my_malloc_fn_ptr(size):
+        p = prev_malloc_fn_ptr(size)
         record.append((size, p))
         return p
-    gc_ll_descr.funcptr_for_new = my_funcptr_for_new
+    gc_ll_descr.malloc_fn_ptr = my_malloc_fn_ptr
     #
     # ---------- gc_malloc ----------
     S = lltype.GcStruct('S', ('x', lltype.Signed))
@@ -32,8 +33,8 @@
     A = lltype.GcArray(lltype.Signed)
     arraydescr = get_array_descr(gc_ll_descr, A)
     p = gc_ll_descr.gc_malloc_array(arraydescr, 10)
-    assert record == [(arraydescr.get_base_size(False) +
-                       10 * arraydescr.get_item_size(False), p)]
+    assert record == [(arraydescr.basesize +
+                       10 * arraydescr.itemsize, p)]
     del record[:]
     # ---------- gc_malloc_str ----------
     p = gc_ll_descr.gc_malloc_str(10)
@@ -246,24 +247,28 @@
     def __init__(self):
         self.record = []
 
+    def _malloc(self, type_id, size):
+        tid = llop.combine_ushort(lltype.Signed, type_id, 0)
+        x = llmemory.raw_malloc(self.gcheaderbuilder.size_gc_header + size)
+        x += self.gcheaderbuilder.size_gc_header
+        return x, tid
+
     def do_malloc_fixedsize_clear(self, RESTYPE, type_id, size,
                                   has_finalizer, has_light_finalizer,
                                   contains_weakptr):
         assert not contains_weakptr
-        assert not has_finalizer           # in these tests
-        assert not has_light_finalizer     # in these tests
-        p = llmemory.raw_malloc(size)
+        assert not has_finalizer
+        assert not has_light_finalizer
+        p, tid = self._malloc(type_id, size)
         p = llmemory.cast_adr_to_ptr(p, RESTYPE)
-        tid = llop.combine_ushort(lltype.Signed, type_id, 0)
         self.record.append(("fixedsize", repr(size), tid, p))
         return p
 
     def do_malloc_varsize_clear(self, RESTYPE, type_id, length, size,
                                 itemsize, offset_to_length):
-        p = llmemory.raw_malloc(size + itemsize * length)
+        p, tid = self._malloc(type_id, size + itemsize * length)
         (p + offset_to_length).signed[0] = length
         p = llmemory.cast_adr_to_ptr(p, RESTYPE)
-        tid = llop.combine_ushort(lltype.Signed, type_id, 0)
         self.record.append(("varsize", tid, length,
                             repr(size), repr(itemsize),
                             repr(offset_to_length), p))
@@ -322,43 +327,40 @@
         gc_ll_descr = GcLLDescr_framework(gcdescr, FakeTranslator(), None,
                                           llop1)
         gc_ll_descr.initialize()
+        llop1.gcheaderbuilder = gc_ll_descr.gcheaderbuilder
         self.llop1 = llop1
         self.gc_ll_descr = gc_ll_descr
         self.fake_cpu = FakeCPU()
 
-    def test_args_for_new(self):
-        S = lltype.GcStruct('S', ('x', lltype.Signed))
-        sizedescr = get_size_descr(self.gc_ll_descr, S)
-        args = self.gc_ll_descr.args_for_new(sizedescr)
-        for x in args:
-            assert lltype.typeOf(x) == lltype.Signed
-        A = lltype.GcArray(lltype.Signed)
-        arraydescr = get_array_descr(self.gc_ll_descr, A)
-        args = self.gc_ll_descr.args_for_new(sizedescr)
-        for x in args:
-            assert lltype.typeOf(x) == lltype.Signed
+##    def test_args_for_new(self):
+##        S = lltype.GcStruct('S', ('x', lltype.Signed))
+##        sizedescr = get_size_descr(self.gc_ll_descr, S)
+##        args = self.gc_ll_descr.args_for_new(sizedescr)
+##        for x in args:
+##            assert lltype.typeOf(x) == lltype.Signed
+##        A = lltype.GcArray(lltype.Signed)
+##        arraydescr = get_array_descr(self.gc_ll_descr, A)
+##        args = self.gc_ll_descr.args_for_new(sizedescr)
+##        for x in args:
+##            assert lltype.typeOf(x) == lltype.Signed
 
     def test_gc_malloc(self):
         S = lltype.GcStruct('S', ('x', lltype.Signed))
         sizedescr = get_size_descr(self.gc_ll_descr, S)
         p = self.gc_ll_descr.gc_malloc(sizedescr)
-        assert self.llop1.record == [("fixedsize",
-                                      repr(sizedescr.size),
+        assert lltype.typeOf(p) == llmemory.GCREF
+        assert self.llop1.record == [("fixedsize", repr(sizedescr.size),
                                       sizedescr.tid, p)]
-        assert repr(self.gc_ll_descr.args_for_new(sizedescr)) == repr(
-            [sizedescr.size, sizedescr.tid])
 
     def test_gc_malloc_array(self):
         A = lltype.GcArray(lltype.Signed)
         arraydescr = get_array_descr(self.gc_ll_descr, A)
         p = self.gc_ll_descr.gc_malloc_array(arraydescr, 10)
         assert self.llop1.record == [("varsize", arraydescr.tid, 10,
-                                      repr(arraydescr.get_base_size(True)),
-                                      repr(arraydescr.get_item_size(True)),
-                                      repr(arraydescr.get_ofs_length(True)),
+                                      repr(arraydescr.basesize),
+                                      repr(arraydescr.itemsize),
+                                      repr(arraydescr.lendescr.offset),
                                       p)]
-        assert repr(self.gc_ll_descr.args_for_new_array(arraydescr)) == repr(
-            [arraydescr.get_item_size(True), arraydescr.tid])
 
     def test_gc_malloc_str(self):
         p = self.gc_ll_descr.gc_malloc_str(10)
@@ -404,10 +406,11 @@
         gc_ll_descr = self.gc_ll_descr
         llop1 = self.llop1
         #
-        newops = []
+        rewriter = GcRewriterAssembler(gc_ll_descr, None)
+        newops = rewriter.newops
         v_base = BoxPtr()
         v_value = BoxPtr()
-        gc_ll_descr._gen_write_barrier(newops, v_base, v_value)
+        rewriter.gen_write_barrier(v_base, v_value)
         assert llop1.record == []
         assert len(newops) == 1
         assert newops[0].getopnum() == rop.COND_CALL_GC_WB
@@ -427,8 +430,7 @@
         operations = gc_ll_descr.rewrite_assembler(None, operations, [])
         assert len(operations) == 0
 
-    def test_rewrite_assembler_1(self):
-        # check recording of ConstPtrs
+    def test_record_constptrs(self):
         class MyFakeCPU(object):
             def cast_adr_to_int(self, adr):
                 assert adr == "some fake address"
@@ -455,211 +457,6 @@
         assert operations2 == operations
         assert gcrefs == [s_gcref]
 
-    def test_rewrite_assembler_2(self):
-        # check write barriers before SETFIELD_GC
-        v_base = BoxPtr()
-        v_value = BoxPtr()
-        field_descr = AbstractDescr()
-        operations = [
-            ResOperation(rop.SETFIELD_GC, [v_base, v_value], None,
-                         descr=field_descr),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
-                                                   [])
-        assert len(operations) == 2
-        #
-        assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-        assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
-        assert operations[0].result is None
-        #
-        assert operations[1].getopnum() == rop.SETFIELD_RAW
-        assert operations[1].getarg(0) == v_base
-        assert operations[1].getarg(1) == v_value
-        assert operations[1].getdescr() == field_descr
-
-    def test_rewrite_assembler_3(self):
-        # check write barriers before SETARRAYITEM_GC
-        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
-            v_base = BoxPtr()
-            v_index = BoxInt()
-            v_value = BoxPtr()
-            array_descr = AbstractDescr()
-            operations = [
-                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
-                             None, descr=array_descr),
-                ]
-            if v_new_length is not None:
-                operations.insert(0, ResOperation(rop.NEW_ARRAY,
-                                                  [v_new_length], v_base,
-                                                  descr=array_descr))
-                # we need to insert another, unrelated NEW_ARRAY here
-                # to prevent the initialization_store optimization
-                operations.insert(1, ResOperation(rop.NEW_ARRAY,
-                                                  [ConstInt(12)], BoxPtr(),
-                                                  descr=array_descr))
-            gc_ll_descr = self.gc_ll_descr
-            operations = get_deep_immutable_oplist(operations)
-            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
-                                                       operations, [])
-            if v_new_length is not None:
-                assert operations[0].getopnum() == rop.NEW_ARRAY
-                assert operations[1].getopnum() == rop.NEW_ARRAY
-                del operations[:2]
-            assert len(operations) == 2
-            #
-            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-            assert operations[0].getarg(0) == v_base
-            assert operations[0].getarg(1) == v_value
-            assert operations[0].result is None
-            #
-            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-            assert operations[1].getarg(0) == v_base
-            assert operations[1].getarg(1) == v_index
-            assert operations[1].getarg(2) == v_value
-            assert operations[1].getdescr() == array_descr
-
-    def test_rewrite_assembler_4(self):
-        # check write barriers before SETARRAYITEM_GC,
-        # if we have actually a write_barrier_from_array.
-        self.llop1._have_wb_from_array = True
-        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
-            v_base = BoxPtr()
-            v_index = BoxInt()
-            v_value = BoxPtr()
-            array_descr = AbstractDescr()
-            operations = [
-                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
-                             None, descr=array_descr),
-                ]
-            if v_new_length is not None:
-                operations.insert(0, ResOperation(rop.NEW_ARRAY,
-                                                  [v_new_length], v_base,
-                                                  descr=array_descr))
-                # we need to insert another, unrelated NEW_ARRAY here
-                # to prevent the initialization_store optimization
-                operations.insert(1, ResOperation(rop.NEW_ARRAY,
-                                                  [ConstInt(12)], BoxPtr(),
-                                                  descr=array_descr))
-            gc_ll_descr = self.gc_ll_descr
-            operations = get_deep_immutable_oplist(operations)
-            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
-                                                       operations, [])
-            if v_new_length is not None:
-                assert operations[0].getopnum() == rop.NEW_ARRAY
-                assert operations[1].getopnum() == rop.NEW_ARRAY
-                del operations[:2]
-            assert len(operations) == 2
-            #
-            if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
-                assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-                assert operations[0].getarg(0) == v_base
-                assert operations[0].getarg(1) == v_value
-            else:
-                assert operations[0].getopnum() == rop.COND_CALL_GC_WB_ARRAY
-                assert operations[0].getarg(0) == v_base
-                assert operations[0].getarg(1) == v_index
-                assert operations[0].getarg(2) == v_value
-            assert operations[0].result is None
-            #
-            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-            assert operations[1].getarg(0) == v_base
-            assert operations[1].getarg(1) == v_index
-            assert operations[1].getarg(2) == v_value
-            assert operations[1].getdescr() == array_descr
-
-    def test_rewrite_assembler_5(self):
-        S = lltype.GcStruct('S')
-        A = lltype.GcArray(lltype.Struct('A', ('x', lltype.Ptr(S))))
-        interiordescr = get_interiorfield_descr(self.gc_ll_descr, A,
-                                                A.OF, 'x')
-        wbdescr = self.gc_ll_descr.write_barrier_descr
-        ops = parse("""
-        [p1, p2]
-        setinteriorfield_gc(p1, 0, p2, descr=interiordescr)
-        jump(p1, p2)
-        """, namespace=locals())
-        expected = parse(""" 
-        [p1, p2]
-        cond_call_gc_wb(p1, p2, descr=wbdescr)
-        setinteriorfield_raw(p1, 0, p2, descr=interiordescr)
-        jump(p1, p2)
-        """, namespace=locals())
-        operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
-                                                        operations, [])
-        equaloplists(operations, expected.operations)
-
-    def test_rewrite_assembler_initialization_store(self):
-        S = lltype.GcStruct('S', ('parent', OBJECT),
-                            ('x', lltype.Signed))
-        s_vtable = lltype.malloc(OBJECT_VTABLE, immortal=True)
-        xdescr = get_field_descr(self.gc_ll_descr, S, 'x')
-        ops = parse("""
-        [p1]
-        p0 = new_with_vtable(ConstClass(s_vtable))
-        setfield_gc(p0, p1, descr=xdescr)
-        jump()
-        """, namespace=locals())
-        expected = parse("""
-        [p1]
-        p0 = new_with_vtable(ConstClass(s_vtable))
-        # no write barrier
-        setfield_gc(p0, p1, descr=xdescr)
-        jump()
-        """, namespace=locals())
-        operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
-                                                        operations, [])
-        equaloplists(operations, expected.operations)
-
-    def test_rewrite_assembler_initialization_store_2(self):
-        S = lltype.GcStruct('S', ('parent', OBJECT),
-                            ('x', lltype.Signed))
-        s_vtable = lltype.malloc(OBJECT_VTABLE, immortal=True)
-        wbdescr = self.gc_ll_descr.write_barrier_descr
-        xdescr = get_field_descr(self.gc_ll_descr, S, 'x')
-        ops = parse("""
-        [p1]
-        p0 = new_with_vtable(ConstClass(s_vtable))
-        p3 = new_with_vtable(ConstClass(s_vtable))
-        setfield_gc(p0, p1, descr=xdescr)
-        jump()
-        """, namespace=locals())
-        expected = parse("""
-        [p1]
-        p0 = new_with_vtable(ConstClass(s_vtable))
-        p3 = new_with_vtable(ConstClass(s_vtable))
-        cond_call_gc_wb(p0, p1, descr=wbdescr)
-        setfield_raw(p0, p1, descr=xdescr)
-        jump()
-        """, namespace=locals())
-        operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
-                                                        operations, [])
-        equaloplists(operations, expected.operations)
-
-    def test_rewrite_assembler_initialization_store_3(self):
-        A = lltype.GcArray(lltype.Ptr(lltype.GcStruct('S')))
-        arraydescr = get_array_descr(self.gc_ll_descr, A)
-        ops = parse("""
-        [p1]
-        p0 = new_array(3, descr=arraydescr)
-        setarrayitem_gc(p0, 0, p1, descr=arraydescr)
-        jump()
-        """, namespace=locals())
-        expected = parse("""
-        [p1]
-        p0 = new_array(3, descr=arraydescr)
-        setarrayitem_gc(p0, 0, p1, descr=arraydescr)
-        jump()
-        """, namespace=locals())
-        operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
-                                                        operations, [])
-        equaloplists(operations, expected.operations)
 
 class TestFrameworkMiniMark(TestFramework):
     gc = 'minimark'
diff --git a/pypy/jit/backend/llsupport/test/test_rewrite.py b/pypy/jit/backend/llsupport/test/test_rewrite.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/llsupport/test/test_rewrite.py
@@ -0,0 +1,668 @@
+from pypy.jit.backend.llsupport.descr import *
+from pypy.jit.backend.llsupport.gc import *
+from pypy.jit.metainterp.gc import get_description
+from pypy.jit.tool.oparser import parse
+from pypy.jit.metainterp.optimizeopt.util import equaloplists
+from pypy.jit.codewriter.heaptracker import register_known_gctype
+
+
+class Evaluator(object):
+    def __init__(self, scope):
+        self.scope = scope
+    def __getitem__(self, key):
+        return eval(key, self.scope)
+
+
+class RewriteTests(object):
+    def check_rewrite(self, frm_operations, to_operations, **namespace):
+        S = lltype.GcStruct('S', ('x', lltype.Signed),
+                                 ('y', lltype.Signed))
+        sdescr = get_size_descr(self.gc_ll_descr, S)
+        sdescr.tid = 1234
+        #
+        T = lltype.GcStruct('T', ('y', lltype.Signed),
+                                 ('z', lltype.Ptr(S)),
+                                 ('t', lltype.Signed))
+        tdescr = get_size_descr(self.gc_ll_descr, T)
+        tdescr.tid = 5678
+        tzdescr = get_field_descr(self.gc_ll_descr, T, 'z')
+        #
+        A = lltype.GcArray(lltype.Signed)
+        adescr = get_array_descr(self.gc_ll_descr, A)
+        adescr.tid = 4321
+        alendescr = adescr.lendescr
+        #
+        B = lltype.GcArray(lltype.Char)
+        bdescr = get_array_descr(self.gc_ll_descr, B)
+        bdescr.tid = 8765
+        blendescr = bdescr.lendescr
+        #
+        C = lltype.GcArray(lltype.Ptr(S))
+        cdescr = get_array_descr(self.gc_ll_descr, C)
+        cdescr.tid = 8111
+        clendescr = cdescr.lendescr
+        #
+        E = lltype.GcStruct('Empty')
+        edescr = get_size_descr(self.gc_ll_descr, E)
+        edescr.tid = 9000
+        #
+        vtable_descr = self.gc_ll_descr.fielddescr_vtable
+        O = lltype.GcStruct('O', ('parent', rclass.OBJECT),
+                                 ('x', lltype.Signed))
+        o_vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
+        register_known_gctype(self.cpu, o_vtable, O)
+        #
+        tiddescr = self.gc_ll_descr.fielddescr_tid
+        wbdescr = self.gc_ll_descr.write_barrier_descr
+        WORD = globals()['WORD']
+        #
+        strdescr     = self.gc_ll_descr.str_descr
+        unicodedescr = self.gc_ll_descr.unicode_descr
+        strlendescr     = strdescr.lendescr
+        unicodelendescr = unicodedescr.lendescr
+        #
+        namespace.update(locals())
+        #
+        for funcname in self.gc_ll_descr._generated_functions:
+            namespace[funcname] = self.gc_ll_descr.get_malloc_fn(funcname)
+            namespace[funcname + '_descr'] = getattr(self.gc_ll_descr,
+                                                     '%s_descr' % funcname)
+        #
+        ops = parse(frm_operations, namespace=namespace)
+        expected = parse(to_operations % Evaluator(namespace),
+                         namespace=namespace)
+        operations = self.gc_ll_descr.rewrite_assembler(self.cpu,
+                                                        ops.operations,
+                                                        [])
+        equaloplists(operations, expected.operations)
+
+
+class TestBoehm(RewriteTests):
+    def setup_method(self, meth):
+        class FakeCPU(object):
+            def sizeof(self, STRUCT):
+                return SizeDescrWithVTable(102)
+        self.cpu = FakeCPU()
+        self.gc_ll_descr = GcLLDescr_boehm(None, None, None)
+
+    def test_new(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=sdescr)
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
+                                descr=malloc_fixedsize_descr)
+            jump()
+        """)
+
+    def test_no_collapsing(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=sdescr)
+            p1 = new(descr=sdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
+                                descr=malloc_fixedsize_descr)
+            p1 = call_malloc_gc(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
+                                descr=malloc_fixedsize_descr)
+            jump()
+        """)
+
+    def test_new_array_fixed(self):
+        self.check_rewrite("""
+            []
+            p0 = new_array(10, descr=adescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
+                                %(adescr.basesize + 10 * adescr.itemsize)d, \
+                                descr=malloc_fixedsize_descr)
+            setfield_gc(p0, 10, descr=alendescr)
+            jump()
+        """)
+
+    def test_new_array_variable(self):
+        self.check_rewrite("""
+            [i1]
+            p0 = new_array(i1, descr=adescr)
+            jump()
+        """, """
+            [i1]
+            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+                                %(adescr.basesize)d,        \
+                                i1,                         \
+                                %(adescr.itemsize)d,        \
+                                %(adescr.lendescr.offset)d, \
+                                descr=malloc_array_descr)
+            jump()
+        """)
+
+    def test_new_with_vtable(self):
+        self.check_rewrite("""
+            []
+            p0 = new_with_vtable(ConstClass(o_vtable))
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), 102, \
+                                descr=malloc_fixedsize_descr)
+            setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
+            jump()
+        """)
+
+    def test_newstr(self):
+        self.check_rewrite("""
+            [i1]
+            p0 = newstr(i1)
+            jump()
+        """, """
+            [i1]
+            p0 = call_malloc_gc(ConstClass(malloc_array), \
+                                %(strdescr.basesize)d,    \
+                                i1,                       \
+                                %(strdescr.itemsize)d,    \
+                                %(strlendescr.offset)d,   \
+                                descr=malloc_array_descr)
+            jump()
+        """)
+
+    def test_newunicode(self):
+        self.check_rewrite("""
+            [i1]
+            p0 = newunicode(10)
+            jump()
+        """, """
+            [i1]
+            p0 = call_malloc_gc(ConstClass(malloc_fixedsize),   \
+                                %(unicodedescr.basesize +       \
+                                  10 * unicodedescr.itemsize)d, \
+                                descr=malloc_fixedsize_descr)
+            setfield_gc(p0, 10, descr=unicodelendescr)
+            jump()
+        """)
+
+
+class TestFramework(RewriteTests):
+    def setup_method(self, meth):
+        class config_(object):
+            class translation(object):
+                gc = 'hybrid'
+                gcrootfinder = 'asmgcc'
+                gctransformer = 'framework'
+                gcremovetypeptr = False
+        gcdescr = get_description(config_)
+        self.gc_ll_descr = GcLLDescr_framework(gcdescr, None, None, None,
+                                               really_not_translated=True)
+        self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
+            lambda cpu: True)
+        #
+        class FakeCPU(object):
+            def sizeof(self, STRUCT):
+                descr = SizeDescrWithVTable(102)
+                descr.tid = 9315
+                return descr
+        self.cpu = FakeCPU()
+
+    def test_rewrite_assembler_new_to_malloc(self):
+        self.check_rewrite("""
+            [p1]
+            p0 = new(descr=sdescr)
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_nursery(%(sdescr.size)d)
+            setfield_gc(p0, 1234, descr=tiddescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_new3_to_malloc(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=sdescr)
+            p1 = new(descr=tdescr)
+            p2 = new(descr=sdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(   \
+                               %(sdescr.size + tdescr.size + sdescr.size)d)
+            setfield_gc(p0, 1234, descr=tiddescr)
+            p1 = int_add(p0, %(sdescr.size)d)
+            setfield_gc(p1, 5678, descr=tiddescr)
+            p2 = int_add(p1, %(tdescr.size)d)
+            setfield_gc(p2, 1234, descr=tiddescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_new_array_fixed_to_malloc(self):
+        self.check_rewrite("""
+            []
+            p0 = new_array(10, descr=adescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(    \
+                                %(adescr.basesize + 10 * adescr.itemsize)d)
+            setfield_gc(p0, 4321, descr=tiddescr)
+            setfield_gc(p0, 10, descr=alendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_new_and_new_array_fixed_to_malloc(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=sdescr)
+            p1 = new_array(10, descr=adescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(                                  \
+                                %(sdescr.size +                        \
+                                  adescr.basesize + 10 * adescr.itemsize)d)
+            setfield_gc(p0, 1234, descr=tiddescr)
+            p1 = int_add(p0, %(sdescr.size)d)
+            setfield_gc(p1, 4321, descr=tiddescr)
+            setfield_gc(p1, 10, descr=alendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_round_up(self):
+        self.check_rewrite("""
+            []
+            p0 = new_array(6, descr=bdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(%(bdescr.basesize + 8)d)
+            setfield_gc(p0, 8765, descr=tiddescr)
+            setfield_gc(p0, 6, descr=blendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_round_up_always(self):
+        self.check_rewrite("""
+            []
+            p0 = new_array(5, descr=bdescr)
+            p1 = new_array(5, descr=bdescr)
+            p2 = new_array(5, descr=bdescr)
+            p3 = new_array(5, descr=bdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(%(4 * (bdescr.basesize + 8))d)
+            setfield_gc(p0, 8765, descr=tiddescr)
+            setfield_gc(p0, 5, descr=blendescr)
+            p1 = int_add(p0, %(bdescr.basesize + 8)d)
+            setfield_gc(p1, 8765, descr=tiddescr)
+            setfield_gc(p1, 5, descr=blendescr)
+            p2 = int_add(p1, %(bdescr.basesize + 8)d)
+            setfield_gc(p2, 8765, descr=tiddescr)
+            setfield_gc(p2, 5, descr=blendescr)
+            p3 = int_add(p2, %(bdescr.basesize + 8)d)
+            setfield_gc(p3, 8765, descr=tiddescr)
+            setfield_gc(p3, 5, descr=blendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_minimal_size(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=edescr)
+            p1 = new(descr=edescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(%(4*WORD)d)
+            setfield_gc(p0, 9000, descr=tiddescr)
+            p1 = int_add(p0, %(2*WORD)d)
+            setfield_gc(p1, 9000, descr=tiddescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_variable_size(self):
+        self.check_rewrite("""
+            [i0]
+            p0 = new_array(i0, descr=bdescr)
+            jump(i0)
+        """, """
+            [i0]
+            p0 = call_malloc_gc(ConstClass(malloc_array), 1,  \
+                                %(bdescr.tid)d, i0,           \
+                                descr=malloc_array_descr)
+            jump(i0)
+        """)
+
+    def test_rewrite_assembler_nonstandard_array(self):
+        # a non-standard array is a bit hard to get; e.g. GcArray(Float)
+        # is like that on Win32, but not on Linux.  Build one manually...
+        NONSTD = lltype.GcArray(lltype.Float)
+        nonstd_descr = get_array_descr(self.gc_ll_descr, NONSTD)
+        nonstd_descr.tid = 6464
+        nonstd_descr.basesize = 64      # <= hacked
+        nonstd_descr.itemsize = 8
+        nonstd_descr_gcref = 123
+        self.check_rewrite("""
+            [i0]
+            p0 = new_array(i0, descr=nonstd_descr)
+            jump(i0)
+        """, """
+            [i0]
+            p0 = call_malloc_gc(ConstClass(malloc_array_nonstandard), \
+                                64, 8,                                \
+                                %(nonstd_descr.lendescr.offset)d,     \
+                                6464, i0,                             \
+                                descr=malloc_array_nonstandard_descr)
+            jump(i0)
+        """, nonstd_descr=nonstd_descr)
+
+    def test_rewrite_assembler_maximal_size_1(self):
+        self.gc_ll_descr.max_size_of_young_obj = 100
+        self.check_rewrite("""
+            []
+            p0 = new_array(103, descr=bdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
+                                %(bdescr.basesize + 104)d,    \
+                                descr=malloc_fixedsize_descr)
+            setfield_gc(p0, 8765, descr=tiddescr)
+            setfield_gc(p0, 103, descr=blendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_maximal_size_2(self):
+        self.gc_ll_descr.max_size_of_young_obj = 300
+        self.check_rewrite("""
+            []
+            p0 = new_array(101, descr=bdescr)
+            p1 = new_array(102, descr=bdescr)  # two new_arrays can be combined
+            p2 = new_array(103, descr=bdescr)  # but not all three
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(    \
+                              %(2 * (bdescr.basesize + 104))d)
+            setfield_gc(p0, 8765, descr=tiddescr)
+            setfield_gc(p0, 101, descr=blendescr)
+            p1 = int_add(p0, %(bdescr.basesize + 104)d)
+            setfield_gc(p1, 8765, descr=tiddescr)
+            setfield_gc(p1, 102, descr=blendescr)
+            p2 = call_malloc_nursery(    \
+                              %(bdescr.basesize + 104)d)
+            setfield_gc(p2, 8765, descr=tiddescr)
+            setfield_gc(p2, 103, descr=blendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_huge_size(self):
+        # "huge" is defined as "larger than 0xffffff bytes, or 16MB"
+        self.check_rewrite("""
+            []
+            p0 = new_array(20000000, descr=bdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_gc(ConstClass(malloc_array), 1, \
+                                %(bdescr.tid)d, 20000000,    \
+                                descr=malloc_array_descr)
+            jump()
+        """)
+
+    def test_new_with_vtable(self):
+        self.check_rewrite("""
+            []
+            p0 = new_with_vtable(ConstClass(o_vtable))
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_nursery(104)      # rounded up
+            setfield_gc(p0, 9315, descr=tiddescr)
+            setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
+            jump()
+        """)
+
+    def test_new_with_vtable_too_big(self):
+        self.gc_ll_descr.max_size_of_young_obj = 100
+        self.check_rewrite("""
+            []
+            p0 = new_with_vtable(ConstClass(o_vtable))
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), 104, \
+                                descr=malloc_fixedsize_descr)
+            setfield_gc(p0, 9315, descr=tiddescr)
+            setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_newstr_newunicode(self):
+        self.check_rewrite("""
+            [i2]
+            p0 = newstr(14)
+            p1 = newunicode(10)
+            p2 = newunicode(i2)
+            p3 = newstr(i2)
+            jump()
+        """, """
+            [i2]
+            p0 = call_malloc_nursery(                                \
+                      %(strdescr.basesize + 16 * strdescr.itemsize + \
+                        unicodedescr.basesize + 10 * unicodedescr.itemsize)d)
+            setfield_gc(p0, %(strdescr.tid)d, descr=tiddescr)
+            setfield_gc(p0, 14, descr=strlendescr)
+            p1 = int_add(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d)
+            setfield_gc(p1, %(unicodedescr.tid)d, descr=tiddescr)
+            setfield_gc(p1, 10, descr=unicodelendescr)
+            p2 = call_malloc_gc(ConstClass(malloc_unicode), i2, \
+                                descr=malloc_unicode_descr)
+            p3 = call_malloc_gc(ConstClass(malloc_str), i2, \
+                                descr=malloc_str_descr)
+            jump()
+        """)
+
+    def test_write_barrier_before_setfield_gc(self):
+        self.check_rewrite("""
+            [p1, p2]
+            setfield_gc(p1, p2, descr=tzdescr)
+            jump()
+        """, """
+            [p1, p2]
+            cond_call_gc_wb(p1, p2, descr=wbdescr)
+            setfield_raw(p1, p2, descr=tzdescr)
+            jump()
+        """)
+
+    def test_write_barrier_before_array_without_from_array(self):
+        self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
+            lambda cpu: False)
+        self.check_rewrite("""
+            [p1, i2, p3]
+            setarrayitem_gc(p1, i2, p3, descr=cdescr)
+            jump()
+        """, """
+            [p1, i2, p3]
+            cond_call_gc_wb(p1, p3, descr=wbdescr)
+            setarrayitem_raw(p1, i2, p3, descr=cdescr)
+            jump()
+        """)
+
+    def test_write_barrier_before_short_array(self):
+        self.gc_ll_descr.max_size_of_young_obj = 2000
+        self.check_rewrite("""
+            [i2, p3]
+            p1 = new_array(129, descr=cdescr)
+            call(123456)
+            setarrayitem_gc(p1, i2, p3, descr=cdescr)
+            jump()
+        """, """
+            [i2, p3]
+            p1 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 129 * cdescr.itemsize)d)
+            setfield_gc(p1, 8111, descr=tiddescr)
+            setfield_gc(p1, 129, descr=clendescr)
+            call(123456)
+            cond_call_gc_wb(p1, p3, descr=wbdescr)
+            setarrayitem_raw(p1, i2, p3, descr=cdescr)
+            jump()
+        """)
+
+    def test_write_barrier_before_long_array(self):
+        # the limit of "being too long" is fixed, arbitrarily, at 130
+        self.gc_ll_descr.max_size_of_young_obj = 2000
+        self.check_rewrite("""
+            [i2, p3]
+            p1 = new_array(130, descr=cdescr)
+            call(123456)
+            setarrayitem_gc(p1, i2, p3, descr=cdescr)
+            jump()
+        """, """
+            [i2, p3]
+            p1 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 130 * cdescr.itemsize)d)
+            setfield_gc(p1, 8111, descr=tiddescr)
+            setfield_gc(p1, 130, descr=clendescr)
+            call(123456)
+            cond_call_gc_wb_array(p1, i2, p3, descr=wbdescr)
+            setarrayitem_raw(p1, i2, p3, descr=cdescr)
+            jump()
+        """)
+
+    def test_write_barrier_before_unknown_array(self):
+        self.check_rewrite("""
+            [p1, i2, p3]
+            setarrayitem_gc(p1, i2, p3, descr=cdescr)
+            jump()
+        """, """
+            [p1, i2, p3]
+            cond_call_gc_wb_array(p1, i2, p3, descr=wbdescr)
+            setarrayitem_raw(p1, i2, p3, descr=cdescr)
+            jump()
+        """)
+
+    def test_label_makes_size_unknown(self):
+        self.check_rewrite("""
+            [i2, p3]
+            p1 = new_array(5, descr=cdescr)
+            label(p1, i2, p3)
+            setarrayitem_gc(p1, i2, p3, descr=cdescr)
+            jump()
+        """, """
+            [i2, p3]
+            p1 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 5 * cdescr.itemsize)d)
+            setfield_gc(p1, 8111, descr=tiddescr)
+            setfield_gc(p1, 5, descr=clendescr)
+            label(p1, i2, p3)
+            cond_call_gc_wb_array(p1, i2, p3, descr=wbdescr)
+            setarrayitem_raw(p1, i2, p3, descr=cdescr)
+            jump()
+        """)
+
+    def test_write_barrier_before_setinteriorfield_gc(self):
+        S1 = lltype.GcStruct('S1')
+        INTERIOR = lltype.GcArray(('z', lltype.Ptr(S1)))
+        interiordescr = get_array_descr(self.gc_ll_descr, INTERIOR)
+        interiordescr.tid = 1291
+        interiorlendescr = interiordescr.lendescr
+        interiorzdescr = get_interiorfield_descr(self.gc_ll_descr,
+                                                 INTERIOR, 'z')
+        self.check_rewrite("""
+            [p1, p2]
+            setinteriorfield_gc(p1, 0, p2, descr=interiorzdescr)
+            jump(p1, p2)
+        """, """
+            [p1, p2]
+            cond_call_gc_wb(p1, p2, descr=wbdescr)
+            setinteriorfield_raw(p1, 0, p2, descr=interiorzdescr)
+            jump(p1, p2)
+        """, interiorzdescr=interiorzdescr)
+
+    def test_initialization_store(self):
+        self.check_rewrite("""
+            [p1]
+            p0 = new(descr=tdescr)
+            setfield_gc(p0, p1, descr=tzdescr)
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_nursery(%(tdescr.size)d)
+            setfield_gc(p0, 5678, descr=tiddescr)
+            setfield_gc(p0, p1, descr=tzdescr)
+            jump()
+        """)
+
+    def test_initialization_store_2(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=tdescr)
+            p1 = new(descr=sdescr)
+            setfield_gc(p0, p1, descr=tzdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(%(tdescr.size + sdescr.size)d)
+            setfield_gc(p0, 5678, descr=tiddescr)
+            p1 = int_add(p0, %(tdescr.size)d)
+            setfield_gc(p1, 1234, descr=tiddescr)
+            # <<<no cond_call_gc_wb here>>>
+            setfield_gc(p0, p1, descr=tzdescr)
+            jump()
+        """)
+
+    def test_initialization_store_array(self):
+        self.check_rewrite("""
+            [p1, i2]
+            p0 = new_array(5, descr=cdescr)
+            setarrayitem_gc(p0, i2, p1, descr=cdescr)
+            jump()
+        """, """
+            [p1, i2]
+            p0 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 5 * cdescr.itemsize)d)
+            setfield_gc(p0, 8111, descr=tiddescr)
+            setfield_gc(p0, 5, descr=clendescr)
+            setarrayitem_gc(p0, i2, p1, descr=cdescr)
+            jump()
+        """)
+
+    def test_non_initialization_store(self):
+        self.check_rewrite("""
+            [i0]
+            p0 = new(descr=tdescr)
+            p1 = newstr(i0)
+            setfield_gc(p0, p1, descr=tzdescr)
+            jump()
+        """, """
+            [i0]
+            p0 = call_malloc_nursery(%(tdescr.size)d)
+            setfield_gc(p0, 5678, descr=tiddescr)
+            p1 = call_malloc_gc(ConstClass(malloc_str), i0, \
+                                descr=malloc_str_descr)
+            cond_call_gc_wb(p0, p1, descr=wbdescr)
+            setfield_raw(p0, p1, descr=tzdescr)
+            jump()
+        """)
+
+    def test_non_initialization_store_label(self):
+        self.check_rewrite("""
+            [p1]
+            p0 = new(descr=tdescr)
+            label(p0, p1)
+            setfield_gc(p0, p1, descr=tzdescr)
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_nursery(%(tdescr.size)d)
+            setfield_gc(p0, 5678, descr=tiddescr)
+            label(p0, p1)
+            cond_call_gc_wb(p0, p1, descr=wbdescr)
+            setfield_raw(p0, p1, descr=tzdescr)
+            jump()
+        """)
diff --git a/pypy/jit/backend/llsupport/test/test_runner.py b/pypy/jit/backend/llsupport/test/test_runner.py
--- a/pypy/jit/backend/llsupport/test/test_runner.py
+++ b/pypy/jit/backend/llsupport/test/test_runner.py
@@ -8,6 +8,12 @@
 
 class MyLLCPU(AbstractLLCPU):
     supports_floats = True
+
+    class assembler(object):
+        @staticmethod
+        def set_debug(flag):
+            pass
+    
     def compile_loop(self, inputargs, operations, looptoken):
         py.test.skip("llsupport test: cannot compile operations")
 
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -17,6 +17,7 @@
 from pypy.rpython.llinterp import LLException
 from pypy.jit.codewriter import heaptracker, longlong
 from pypy.rlib.rarithmetic import intmask
+from pypy.jit.backend.detect_cpu import autodetect_main_model_and_size
 
 def boxfloat(x):
     return BoxFloat(longlong.getfloatstorage(x))
@@ -27,6 +28,9 @@
 
 class Runner(object):
 
+    add_loop_instruction = ['overload for a specific cpu']
+    bridge_loop_instruction = ['overload for a specific cpu']
+
     def execute_operation(self, opname, valueboxes, result_type, descr=None):
         inputargs, operations = self._get_single_operation_list(opname,
                                                                 result_type,
@@ -2930,6 +2934,8 @@
         # overflowing value:
         fail = self.cpu.execute_token(looptoken, sys.maxint // 4 + 1)
         assert fail.identifier == excdescr.identifier
+        exc = self.cpu.grab_exc_value()
+        assert exc == "memoryerror!"
 
     def test_compile_loop_with_target(self):
         i0 = BoxInt()
@@ -2972,6 +2978,56 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == -10
 
+    def test_compile_asmlen(self):
+        from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
+        if not isinstance(self.cpu, AbstractLLCPU):
+            py.test.skip("pointless test on non-asm")
+        from pypy.jit.backend.x86.tool.viewcode import machine_code_dump
+        import ctypes
+        ops = """
+        [i2]
+        i0 = same_as(i2)    # but forced to be in a register
+        label(i0, descr=1)
+        i1 = int_add(i0, i0)
+        guard_true(i1, descr=faildesr) [i1]
+        jump(i1, descr=1)
+        """
+        faildescr = BasicFailDescr(2)
+        loop = parse(ops, self.cpu, namespace=locals())
+        faildescr = loop.operations[-2].getdescr()
+        jumpdescr = loop.operations[-1].getdescr()
+        bridge_ops = """
+        [i0]
+        jump(i0, descr=jumpdescr)
+        """
+        bridge = parse(bridge_ops, self.cpu, namespace=locals())
+        looptoken = JitCellToken()
+        self.cpu.assembler.set_debug(False)
+        info = self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        bridge_info = self.cpu.compile_bridge(faildescr, bridge.inputargs,
+                                              bridge.operations,
+                                              looptoken)
+        self.cpu.assembler.set_debug(True) # always on untranslated
+        assert info.asmlen != 0
+        cpuname = autodetect_main_model_and_size()
+        # XXX we have to check the precise assembler, otherwise
+        # we don't quite know if borders are correct
+
+        def checkops(mc, ops):
+            assert len(mc) == len(ops)
+            for i in range(len(mc)):
+                assert mc[i].split("\t")[-1].startswith(ops[i])
+            
+        data = ctypes.string_at(info.asmaddr, info.asmlen)
+        mc = list(machine_code_dump(data, info.asmaddr, cpuname))
+        lines = [line for line in mc if line.count('\t') == 2]
+        checkops(lines, self.add_loop_instructions)
+        data = ctypes.string_at(bridge_info.asmaddr, bridge_info.asmlen)
+        mc = list(machine_code_dump(data, bridge_info.asmaddr, cpuname))
+        lines = [line for line in mc if line.count('\t') == 2]
+        checkops(lines, self.bridge_loop_instructions)
+
+
     def test_compile_bridge_with_target(self):
         # This test creates a loopy piece of code in a bridge, and builds another
         # unrelated loop that ends in a jump directly to this loopy bit of code.
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -7,6 +7,7 @@
 from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.annlowlevel import llhelper
+from pypy.rlib.jit import AsmInfo
 from pypy.jit.backend.model import CompiledLoopToken
 from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
     gpr_reg_mgr_cls, _valid_addressing_size)
@@ -39,6 +40,7 @@
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.codewriter import longlong
 from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.objectmodel import compute_unique_id
 
 # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
 # better safe than sorry
@@ -58,7 +60,8 @@
         self.is_guard_not_invalidated = is_guard_not_invalidated
 
 DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
-                              ('bridge', lltype.Signed), # 0 or 1
+                              ('type', lltype.Char), # 'b'ridge, 'l'abel or
+                                                     # 'e'ntry point
                               ('number', lltype.Signed))
 
 class Assembler386(object):
@@ -70,10 +73,6 @@
         self.cpu = cpu
         self.verbose = False
         self.rtyper = cpu.rtyper
-        self.malloc_func_addr = 0
-        self.malloc_array_func_addr = 0
-        self.malloc_str_func_addr = 0
-        self.malloc_unicode_func_addr = 0
         self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
         self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
         self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
@@ -108,20 +107,6 @@
         # the address of the function called by 'new'
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
-        ll_new = gc_ll_descr.get_funcptr_for_new()
-        self.malloc_func_addr = rffi.cast(lltype.Signed, ll_new)
-        if gc_ll_descr.get_funcptr_for_newarray is not None:
-            ll_new_array = gc_ll_descr.get_funcptr_for_newarray()
-            self.malloc_array_func_addr = rffi.cast(lltype.Signed,
-                                                    ll_new_array)
-        if gc_ll_descr.get_funcptr_for_newstr is not None:
-            ll_new_str = gc_ll_descr.get_funcptr_for_newstr()
-            self.malloc_str_func_addr = rffi.cast(lltype.Signed,
-                                                  ll_new_str)
-        if gc_ll_descr.get_funcptr_for_newunicode is not None:
-            ll_new_unicode = gc_ll_descr.get_funcptr_for_newunicode()
-            self.malloc_unicode_func_addr = rffi.cast(lltype.Signed,
-                                                      ll_new_unicode)
         self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
         self._build_failure_recovery(False)
         self._build_failure_recovery(True)
@@ -165,12 +150,15 @@
     def finish_once(self):
         if self._debug:
             debug_start('jit-backend-counts')
-            for struct in self.loop_run_counters:
-                if struct.bridge:
-                    prefix = 'bridge '
+            for i in range(len(self.loop_run_counters)):
+                struct = self.loop_run_counters[i]
+                if struct.type == 'l':
+                    prefix = 'TargetToken(%d)' % struct.number
+                elif struct.type == 'b':
+                    prefix = 'bridge ' + str(struct.number)
                 else:
-                    prefix = 'loop '
-                debug_print(prefix + str(struct.number) + ':' + str(struct.i))
+                    prefix = 'entry ' + str(struct.number)
+                debug_print(prefix + ':' + str(struct.i))
             debug_stop('jit-backend-counts')
 
     def _build_float_constants(self):
@@ -275,7 +263,8 @@
         #
         self.mc = codebuf.MachineCodeBlockWrapper()
         # call on_leave_jitted_save_exc()
-        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True,
+                                                default_to_memoryerror=True)
         self.mc.CALL(imm(addr))
         self.mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
         self._call_footer()
@@ -423,6 +412,7 @@
         '''adds the following attributes to looptoken:
                _x86_function_addr   (address of the generated func, as an int)
                _x86_loop_code       (debug: addr of the start of the ResOps)
+               _x86_fullsize        (debug: full size including failure)
                _x86_debug_checksum
         '''
         # XXX this function is too longish and contains some code
@@ -439,8 +429,8 @@
 
         self.setup(looptoken)
         if log:
-            self._register_counter(False, looptoken.number)
-            operations = self._inject_debugging_code(looptoken, operations)
+            operations = self._inject_debugging_code(looptoken, operations,
+                                                     'e', looptoken.number)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         #
@@ -488,7 +478,8 @@
             name = "Loop # %s: %s" % (looptoken.number, loopname)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, full_size)
-        return ops_offset
+        return AsmInfo(ops_offset, rawstart + looppos,
+                       size_excluding_failure_stuff - looppos)
 
     def assemble_bridge(self, faildescr, inputargs, operations,
                         original_loop_token, log):
@@ -497,17 +488,12 @@
             assert len(set(inputargs)) == len(inputargs)
 
         descr_number = self.cpu.get_fail_descr_number(faildescr)
-        try:
-            failure_recovery = self._find_failure_recovery_bytecode(faildescr)
-        except ValueError:
-            debug_print("Bridge out of guard", descr_number,
-                        "was already compiled!")
-            return
+        failure_recovery = self._find_failure_recovery_bytecode(faildescr)
 
         self.setup(original_loop_token)
         if log:
-            self._register_counter(True, descr_number)
-            operations = self._inject_debugging_code(faildescr, operations)
+            operations = self._inject_debugging_code(faildescr, operations,
+                                                     'b', descr_number)
 
         arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
         if not we_are_translated():
@@ -515,6 +501,7 @@
                     [loc.assembler() for loc in faildescr._x86_debug_faillocs])
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
+        startpos = self.mc.get_relative_pos()
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
                                              operations,
                                              self.current_clt.allgcrefs)
@@ -549,7 +536,7 @@
             name = "Bridge # %s" % (descr_number,)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
-        return ops_offset
+        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
 
     def write_pending_failure_recoveries(self):
         # for each pending guard, generate the code of the recovery stub
@@ -614,22 +601,29 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _register_counter(self, bridge, number):
-        if self._debug:
-            # YYY very minor leak -- we need the counters to stay alive
-            # forever, just because we want to report them at the end
-            # of the process
-            struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
-                                   track_allocation=False)
-            struct.i = 0
-            struct.bridge = int(bridge)
+    def _register_counter(self, tp, number, token):
+        # YYY very minor leak -- we need the counters to stay alive
+        # forever, just because we want to report them at the end
+        # of the process
+        struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
+                               track_allocation=False)
+        struct.i = 0
+        struct.type = tp
+        if tp == 'b' or tp == 'e':
             struct.number = number
-            self.loop_run_counters.append(struct)
+        else:
+            assert token
+            struct.number = compute_unique_id(token)
+        self.loop_run_counters.append(struct)            
+        return struct
 
     def _find_failure_recovery_bytecode(self, faildescr):
         adr_jump_offset = faildescr._x86_adr_jump_offset
         if adr_jump_offset == 0:
-            raise ValueError
+            # This case should be prevented by the logic in compile.py:
+            # look for CNT_BUSY_FLAG, which disables tracing from a guard
+            # when another tracing from the same guard is already in progress.
+            raise BridgeAlreadyCompiled
         # follow the JMP/Jcond
         p = rffi.cast(rffi.INTP, adr_jump_offset)
         adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
@@ -668,27 +662,36 @@
             targettoken._x86_loop_code += rawstart
         self.target_tokens_currently_compiling = None
 
+    def _append_debugging_code(self, operations, tp, number, token):
+        counter = self._register_counter(tp, number, token)
+        c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
+        box = BoxInt()
+        box2 = BoxInt()
+        ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
+                            box, descr=self.debug_counter_descr),
+               ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
+               ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
+                            None, descr=self.debug_counter_descr)]
+        operations.extend(ops)
+        
     @specialize.argtype(1)
-    def _inject_debugging_code(self, looptoken, operations):
+    def _inject_debugging_code(self, looptoken, operations, tp, number):
         if self._debug:
             # before doing anything, let's increase a counter
             s = 0
             for op in operations:
                 s += op.getopnum()
             looptoken._x86_debug_checksum = s
-            c_adr = ConstInt(rffi.cast(lltype.Signed,
-                                       self.loop_run_counters[-1]))
-            box = BoxInt()
-            box2 = BoxInt()
-            ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
-                                box, descr=self.debug_counter_descr),
-                   ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
-                   ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
-                                None, descr=self.debug_counter_descr)]
-            if operations[0].getopnum() == rop.LABEL:
-                operations = [operations[0]] + ops + operations[1:]
-            else:
-                operations =  ops + operations
+
+            newoperations = []
+            self._append_debugging_code(newoperations, tp, number,
+                                        None)
+            for op in operations:
+                newoperations.append(op)
+                if op.getopnum() == rop.LABEL:
+                    self._append_debugging_code(newoperations, 'l', number,
+                                                op.getdescr())
+            operations = newoperations
         return operations
 
     def _assemble(self, regalloc, operations):
@@ -809,7 +812,10 @@
         target = newlooptoken._x86_function_addr
         mc = codebuf.MachineCodeBlockWrapper()
         mc.JMP(imm(target))
-        assert mc.get_relative_pos() <= 13  # keep in sync with prepare_loop()
+        if WORD == 4:         # keep in sync with prepare_loop()
+            assert mc.get_relative_pos() == 5
+        else:
+            assert mc.get_relative_pos() <= 13
         mc.copy_to_raw_memory(oldadr)
 
     def dump(self, text):
@@ -865,8 +871,8 @@
         high_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[1]
         low_part  = intmask(low_part)
         high_part = intmask(high_part)
-        self.mc.MOV_bi(to_loc.value,     low_part)
-        self.mc.MOV_bi(to_loc.value + 4, high_part)
+        self.mc.MOV32_bi(to_loc.value,     low_part)
+        self.mc.MOV32_bi(to_loc.value + 4, high_part)
 
     def regalloc_perform(self, op, arglocs, resloc):
         genop_list[op.getopnum()](self, op, arglocs, resloc)
@@ -1357,46 +1363,10 @@
         self.mc.SHR_ri(resloc.value, 7)
         self.mc.AND_ri(resloc.value, 1)
 
-    def genop_new_with_vtable(self, op, arglocs, result_loc):
-        assert result_loc is eax
-        loc_vtable = arglocs[-1]
-        assert isinstance(loc_vtable, ImmedLoc)
-        arglocs = arglocs[:-1]
-        self.call(self.malloc_func_addr, arglocs, eax)
-        self.propagate_memoryerror_if_eax_is_null()
-        self.set_vtable(eax, loc_vtable)
+    # ----------
 
-    def set_vtable(self, loc, loc_vtable):
-        if self.cpu.vtable_offset is not None:
-            assert isinstance(loc, RegLoc)
-            assert isinstance(loc_vtable, ImmedLoc)
-            self.mc.MOV(mem(loc, self.cpu.vtable_offset), loc_vtable)
-
-    def set_new_array_length(self, loc, ofs_length, loc_num_elem):
-        assert isinstance(loc, RegLoc)
-        assert isinstance(loc_num_elem, ImmedLoc)
-        self.mc.MOV(mem(loc, ofs_length), loc_num_elem)
-
-    # XXX genop_new is abused for all varsized mallocs with Boehm, for now
-    # (instead of genop_new_array, genop_newstr, genop_newunicode)
-    def genop_new(self, op, arglocs, result_loc):
-        assert result_loc is eax
-        self.call(self.malloc_func_addr, arglocs, eax)
-        self.propagate_memoryerror_if_eax_is_null()
-
-    def genop_new_array(self, op, arglocs, result_loc):
-        assert result_loc is eax
-        self.call(self.malloc_array_func_addr, arglocs, eax)
-        self.propagate_memoryerror_if_eax_is_null()
-
-    def genop_newstr(self, op, arglocs, result_loc):
-        assert result_loc is eax
-        self.call(self.malloc_str_func_addr, arglocs, eax)
-        self.propagate_memoryerror_if_eax_is_null()
-
-    def genop_newunicode(self, op, arglocs, result_loc):
-        assert result_loc is eax
-        self.call(self.malloc_unicode_func_addr, arglocs, eax)
+    def genop_call_malloc_gc(self, op, arglocs, result_loc):
+        self.genop_call(op, arglocs, result_loc)
         self.propagate_memoryerror_if_eax_is_null()
 
     def propagate_memoryerror_if_eax_is_null(self):
@@ -2065,6 +2035,8 @@
         self._genop_call(op, arglocs, resloc, force_index)
 
     def _genop_call(self, op, arglocs, resloc, force_index):
+        from pypy.jit.backend.llsupport.descr import CallDescr
+
         sizeloc = arglocs[0]
         assert isinstance(sizeloc, ImmedLoc)
         size = sizeloc.value
@@ -2079,13 +2051,16 @@
         else:
             tmp = eax
 
+        descr = op.getdescr()
+        assert isinstance(descr, CallDescr)
+
         self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
-                        argtypes=op.getdescr().get_arg_types(),
-                        callconv=op.getdescr().get_call_conv())
+                        argtypes=descr.get_arg_types(),
+                        callconv=descr.get_call_conv())
 
         if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.type == FLOAT:
             # a float or a long long return
-            if op.getdescr().get_return_type() == 'L':
+            if descr.get_result_type() == 'L':
                 self.mc.MOV_br(resloc.value, eax.value)      # long long
                 self.mc.MOV_br(resloc.value + 4, edx.value)
                 # XXX should ideally not move the result on the stack,
@@ -2094,7 +2069,7 @@
                 #     can just be always a stack location
             else:
                 self.mc.FSTPL_b(resloc.value)   # float return
-        elif op.getdescr().get_return_type() == 'S':
+        elif descr.get_result_type() == 'S':
             # singlefloat return
             assert resloc is eax
             if IS_X86_32:
@@ -2292,9 +2267,9 @@
         #
         # Reset the vable token --- XXX really too much special logic here:-(
         if jd.index_of_virtualizable >= 0:
-            from pypy.jit.backend.llsupport.descr import BaseFieldDescr
+            from pypy.jit.backend.llsupport.descr import FieldDescr
             fielddescr = jd.vable_token_descr
-            assert isinstance(fielddescr, BaseFieldDescr)
+            assert isinstance(fielddescr, FieldDescr)
             ofs = fielddescr.offset
             self.mc.MOV(eax, arglocs[1])
             self.mc.MOV_mi((eax.value, ofs), 0)
@@ -2497,9 +2472,8 @@
         else:
             self.mc.JMP(imm(target))
 
-    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, tid):
-        size = max(size, self.cpu.gc_ll_descr.minimal_size_in_nursery)
-        size = (size + WORD-1) & ~(WORD-1)     # round up
+    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
+        assert size & (WORD-1) == 0     # must be correctly aligned
         self.mc.MOV(eax, heap(nursery_free_adr))
         self.mc.LEA_rm(edx.value, (eax.value, size))
         self.mc.CMP(edx, heap(nursery_top_adr))
@@ -2535,9 +2509,6 @@
         offset = self.mc.get_relative_pos() - jmp_adr
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr-1, chr(offset))
-        # on 64-bits, 'tid' is a value that fits in 31 bits
-        assert rx86.fits_in_32bits(tid)
-        self.mc.MOV_mi((eax.value, 0), tid)
         self.mc.MOV(heap(nursery_free_adr), edx)
 
 genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
@@ -2584,3 +2555,6 @@
 def not_implemented(msg):
     os.write(2, '[x86/asm] %s\n' % msg)
     raise NotImplementedError(msg)
+
+class BridgeAlreadyCompiled(Exception):
+    pass
diff --git a/pypy/jit/backend/x86/jump.py b/pypy/jit/backend/x86/jump.py
--- a/pypy/jit/backend/x86/jump.py
+++ b/pypy/jit/backend/x86/jump.py
@@ -17,7 +17,10 @@
         key = src._getregkey()
         if key in srccount:
             if key == dst_locations[i]._getregkey():
-                srccount[key] = -sys.maxint     # ignore a move "x = x"
+                # ignore a move "x = x"
+                # setting any "large enough" negative value is ok, but
+                # be careful of overflows, don't use -sys.maxint
+                srccount[key] = -len(dst_locations) - 1
                 pending_dests -= 1
             else:
                 srccount[key] += 1
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -16,8 +16,8 @@
 from pypy.jit.codewriter import heaptracker, longlong
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.backend.llsupport.descr import BaseFieldDescr, BaseArrayDescr
-from pypy.jit.backend.llsupport.descr import BaseCallDescr, BaseSizeDescr
+from pypy.jit.backend.llsupport.descr import FieldDescr, ArrayDescr
+from pypy.jit.backend.llsupport.descr import CallDescr, SizeDescr
 from pypy.jit.backend.llsupport.descr import InteriorFieldDescr
 from pypy.jit.backend.llsupport.regalloc import FrameManager, RegisterManager,\
      TempBox
@@ -188,7 +188,10 @@
         # note: we need to make a copy of inputargs because possibly_free_vars
         # is also used on op args, which is a non-resizable list
         self.possibly_free_vars(list(inputargs))
-        self.min_bytes_before_label = 13
+        if WORD == 4:       # see redirect_call_assembler()
+            self.min_bytes_before_label = 5
+        else:
+            self.min_bytes_before_label = 13
         return operations
 
     def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
@@ -741,7 +744,7 @@
         self.xrm.possibly_free_var(op.getarg(0))
 
     def consider_cast_int_to_float(self, op):
-        loc0 = self.rm.loc(op.getarg(0))
+        loc0 = self.rm.make_sure_var_in_reg(op.getarg(0))
         loc1 = self.xrm.force_allocate_reg(op.result)
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.getarg(0))
@@ -870,9 +873,9 @@
 
     def _consider_call(self, op, guard_not_forced_op=None):
         calldescr = op.getdescr()
-        assert isinstance(calldescr, BaseCallDescr)
+        assert isinstance(calldescr, CallDescr)
         assert len(calldescr.arg_classes) == op.numargs() - 1
-        size = calldescr.get_result_size(self.translate_support_code)
+        size = calldescr.get_result_size()
         sign = calldescr.is_result_signed()
         if sign:
             sign_loc = imm1
@@ -917,12 +920,15 @@
 
     consider_call_release_gil = consider_call_may_force
 
+    def consider_call_malloc_gc(self, op):
+        self._consider_call(op)
+
     def consider_call_assembler(self, op, guard_op):
         descr = op.getdescr()
         assert isinstance(descr, JitCellToken)
         jd = descr.outermost_jitdriver_sd
         assert jd is not None
-        size = jd.portal_calldescr.get_result_size(self.translate_support_code)
+        size = jd.portal_calldescr.get_result_size()
         vable_index = jd.index_of_virtualizable
         if vable_index >= 0:
             self.rm._sync_var(op.getarg(vable_index))
@@ -957,21 +963,10 @@
 
     consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
 
-    def fastpath_malloc_fixedsize(self, op, descr):
-        assert isinstance(descr, BaseSizeDescr)
-        self._do_fastpath_malloc(op, descr.size, descr.tid)
-
-    def fastpath_malloc_varsize(self, op, arraydescr, num_elem):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        ofs_length = arraydescr.get_ofs_length(self.translate_support_code)
-        basesize = arraydescr.get_base_size(self.translate_support_code)
-        itemsize = arraydescr.get_item_size(self.translate_support_code)
-        size = basesize + itemsize * num_elem
-        self._do_fastpath_malloc(op, size, arraydescr.tid)
-        self.assembler.set_new_array_length(eax, ofs_length, imm(num_elem))
-
-    def _do_fastpath_malloc(self, op, size, tid):
-        gc_ll_descr = self.assembler.cpu.gc_ll_descr
+    def consider_call_malloc_nursery(self, op):
+        size_box = op.getarg(0)
+        assert isinstance(size_box, ConstInt)
+        size = size_box.getint()
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
         #
         # We need edx as a temporary, but otherwise don't save any more
@@ -980,86 +975,39 @@
         self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
         self.rm.possibly_free_var(tmp_box)
         #
+        gc_ll_descr = self.assembler.cpu.gc_ll_descr
         self.assembler.malloc_cond(
             gc_ll_descr.get_nursery_free_addr(),
             gc_ll_descr.get_nursery_top_addr(),
-            size, tid,
-            )
-
-    def consider_new(self, op):
-        gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        if gc_ll_descr.can_inline_malloc(op.getdescr()):
-            self.fastpath_malloc_fixedsize(op, op.getdescr())
-        else:
-            args = gc_ll_descr.args_for_new(op.getdescr())
-            arglocs = [imm(x) for x in args]
-            return self._call(op, arglocs)
-
-    def consider_new_with_vtable(self, op):
-        classint = op.getarg(0).getint()
-        descrsize = heaptracker.vtable2descr(self.assembler.cpu, classint)
-        if self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize):
-            self.fastpath_malloc_fixedsize(op, descrsize)
-            self.assembler.set_vtable(eax, imm(classint))
-            # result of fastpath malloc is in eax
-        else:
-            args = self.assembler.cpu.gc_ll_descr.args_for_new(descrsize)
-            arglocs = [imm(x) for x in args]
-            arglocs.append(self.loc(op.getarg(0)))
-            return self._call(op, arglocs)
-
-    def consider_newstr(self, op):
-        loc = self.loc(op.getarg(0))
-        return self._call(op, [loc])
-
-    def consider_newunicode(self, op):
-        loc = self.loc(op.getarg(0))
-        return self._call(op, [loc])
-
-    def consider_new_array(self, op):
-        gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        box_num_elem = op.getarg(0)
-        if isinstance(box_num_elem, ConstInt):
-            num_elem = box_num_elem.value
-            if gc_ll_descr.can_inline_malloc_varsize(op.getdescr(),
-                                                     num_elem):
-                self.fastpath_malloc_varsize(op, op.getdescr(), num_elem)
-                return
-        args = self.assembler.cpu.gc_ll_descr.args_for_new_array(
-            op.getdescr())
-        arglocs = [imm(x) for x in args]
-        arglocs.append(self.loc(box_num_elem))
-        self._call(op, arglocs)
+            size)
 
     def _unpack_arraydescr(self, arraydescr):
-        assert isinstance(arraydescr, BaseArrayDescr)
-        ofs_length = arraydescr.get_ofs_length(self.translate_support_code)
-        ofs = arraydescr.get_base_size(self.translate_support_code)
-        size = arraydescr.get_item_size(self.translate_support_code)
-        ptr = arraydescr.is_array_of_pointers()
+        assert isinstance(arraydescr, ArrayDescr)
+        ofs = arraydescr.basesize
+        size = arraydescr.itemsize
         sign = arraydescr.is_item_signed()
-        return size, ofs, ofs_length, ptr, sign
+        return size, ofs, sign
 
     def _unpack_fielddescr(self, fielddescr):
-        assert isinstance(fielddescr, BaseFieldDescr)
+        assert isinstance(fielddescr, FieldDescr)
         ofs = fielddescr.offset
-        size = fielddescr.get_field_size(self.translate_support_code)
-        ptr = fielddescr.is_pointer_field()
+        size = fielddescr.field_size
         sign = fielddescr.is_field_signed()
-        return imm(ofs), imm(size), ptr, sign
+        return imm(ofs), imm(size), sign
+    _unpack_fielddescr._always_inline_ = True
 
     def _unpack_interiorfielddescr(self, descr):
         assert isinstance(descr, InteriorFieldDescr)
         arraydescr = descr.arraydescr
-        ofs = arraydescr.get_base_size(self.translate_support_code)
-        itemsize = arraydescr.get_item_size(self.translate_support_code)
-        fieldsize = descr.fielddescr.get_field_size(self.translate_support_code)
+        ofs = arraydescr.basesize
+        itemsize = arraydescr.itemsize
+        fieldsize = descr.fielddescr.field_size
         sign = descr.fielddescr.is_field_signed()
         ofs += descr.fielddescr.offset
         return imm(ofs), imm(itemsize), imm(fieldsize), sign
 
     def consider_setfield_gc(self, op):
-        ofs_loc, size_loc, _, _ = self._unpack_fielddescr(op.getdescr())
+        ofs_loc, size_loc, _ = self._unpack_fielddescr(op.getdescr())
         assert isinstance(size_loc, ImmedLoc)
         if size_loc.value == 1:
             need_lower_byte = True
@@ -1117,7 +1065,7 @@
     consider_unicodesetitem = consider_strsetitem
 
     def consider_setarrayitem_gc(self, op):
-        itemsize, ofs, _, _, _ = self._unpack_arraydescr(op.getdescr())
+        itemsize, ofs, _ = self._unpack_arraydescr(op.getdescr())
         args = op.getarglist()
         base_loc  = self.rm.make_sure_var_in_reg(op.getarg(0), args)
         if itemsize == 1:
@@ -1134,7 +1082,7 @@
     consider_setarrayitem_raw = consider_setarrayitem_gc
 
     def consider_getfield_gc(self, op):
-        ofs_loc, size_loc, _, sign = self._unpack_fielddescr(op.getdescr())
+        ofs_loc, size_loc, sign = self._unpack_fielddescr(op.getdescr())
         args = op.getarglist()
         base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
         self.rm.possibly_free_vars(args)
@@ -1150,7 +1098,7 @@
     consider_getfield_gc_pure = consider_getfield_gc
 
     def consider_getarrayitem_gc(self, op):
-        itemsize, ofs, _, _, sign = self._unpack_arraydescr(op.getdescr())
+        itemsize, ofs, sign = self._unpack_arraydescr(op.getdescr())
         args = op.getarglist()
         base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
         ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
@@ -1229,8 +1177,8 @@
 
     def consider_arraylen_gc(self, op):
         arraydescr = op.getdescr()
-        assert isinstance(arraydescr, BaseArrayDescr)
-        ofs = arraydescr.get_ofs_length(self.translate_support_code)
+        assert isinstance(arraydescr, ArrayDescr)
+        ofs = arraydescr.lendescr.offset
         args = op.getarglist()
         base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
         self.rm.possibly_free_vars_for_op(op)
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -8,7 +8,7 @@
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.codewriter import heaptracker
 from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.backend.llsupport.descr import GcCache
+from pypy.jit.backend.llsupport.descr import GcCache, FieldDescr, FLAG_SIGNED
 from pypy.jit.backend.llsupport.gc import GcLLDescription
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.regalloc import RegAlloc
@@ -17,7 +17,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -41,20 +41,15 @@
         return ['compressed'] + shape[1:]
 
 class MockGcDescr(GcCache):
-    def get_funcptr_for_new(self):
-        return 123
-    get_funcptr_for_newarray = get_funcptr_for_new
-    get_funcptr_for_newstr = get_funcptr_for_new
-    get_funcptr_for_newunicode = get_funcptr_for_new
     get_malloc_slowpath_addr = None
-
+    write_barrier_descr = None
     moving_gc = True
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
         pass
 
-    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
+    _record_constptrs = GcLLDescr_framework._record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
@@ -170,42 +165,32 @@
         '''
         self.interpret(ops, [0, 0, 0, 0, 0, 0, 0, 0, 0], run=False)
 
+NOT_INITIALIZED = chr(0xdd)
+
 class GCDescrFastpathMalloc(GcLLDescription):
     gcrootmap = None
-    expected_malloc_slowpath_size = WORD*2
+    write_barrier_descr = None
 
     def __init__(self):
-        GcCache.__init__(self, False)
+        GcLLDescription.__init__(self, None)
         # create a nursery
-        NTP = rffi.CArray(lltype.Signed)
-        self.nursery = lltype.malloc(NTP, 16, flavor='raw')
-        self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 3,
+        NTP = rffi.CArray(lltype.Char)
+        self.nursery = lltype.malloc(NTP, 64, flavor='raw')
+        for i in range(64):
+            self.nursery[i] = NOT_INITIALIZED
+        self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 2,
                                    flavor='raw')
         self.addrs[0] = rffi.cast(lltype.Signed, self.nursery)
-        self.addrs[1] = self.addrs[0] + 16*WORD
-        self.addrs[2] = 0
-        # 16 WORDs
+        self.addrs[1] = self.addrs[0] + 64
+        self.calls = []
         def malloc_slowpath(size):
-            assert size == self.expected_malloc_slowpath_size
+            self.calls.append(size)
+            # reset the nursery
             nadr = rffi.cast(lltype.Signed, self.nursery)
             self.addrs[0] = nadr + size
-            self.addrs[2] += 1
             return nadr
-        self.malloc_slowpath = malloc_slowpath
-        self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed],
-                                               lltype.Signed)
-        self._counter = 123000
-
-    def can_inline_malloc(self, descr):
-        return True
-
-    def get_funcptr_for_new(self):
-        return 42
-#        return llhelper(lltype.Ptr(self.NEW_TP), self.new)
-
-    def init_size_descr(self, S, descr):
-        descr.tid = self._counter
-        self._counter += 1
+        self.generate_function('malloc_nursery', malloc_slowpath,
+                               [lltype.Signed], lltype.Signed)
 
     def get_nursery_free_addr(self):
         return rffi.cast(lltype.Signed, self.addrs)
@@ -214,204 +199,61 @@
         return rffi.cast(lltype.Signed, self.addrs) + WORD
 
     def get_malloc_slowpath_addr(self):
-        fptr = llhelper(lltype.Ptr(self.MALLOC_SLOWPATH), self.malloc_slowpath)
-        return rffi.cast(lltype.Signed, fptr)
+        return self.get_malloc_fn_addr('malloc_nursery')
 
-    get_funcptr_for_newarray = None
-    get_funcptr_for_newstr = None
-    get_funcptr_for_newunicode = None
+    def check_nothing_in_nursery(self):
+        # CALL_MALLOC_NURSERY should not write anything in the nursery
+        for i in range(64):
+            assert self.nursery[i] == NOT_INITIALIZED
 
 class TestMallocFastpath(BaseTestRegalloc):
 
     def setup_method(self, method):
         cpu = CPU(None, None)
-        cpu.vtable_offset = WORD
         cpu.gc_ll_descr = GCDescrFastpathMalloc()
         cpu.setup_once()
+        self.cpu = cpu
 
-        # hack: specify 'tid' explicitly, because this test is not running
-        # with the gc transformer
-        NODE = lltype.GcStruct('node', ('tid', lltype.Signed),
-                                       ('value', lltype.Signed))
-        nodedescr = cpu.sizeof(NODE)
-        valuedescr = cpu.fielddescrof(NODE, 'value')
-
-        self.cpu = cpu
-        self.nodedescr = nodedescr
-        vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
-        vtable_int = cpu.cast_adr_to_int(llmemory.cast_ptr_to_adr(vtable))
-        NODE2 = lltype.GcStruct('node2',
-                                  ('parent', rclass.OBJECT),
-                                  ('tid', lltype.Signed),
-                                  ('vtable', lltype.Ptr(rclass.OBJECT_VTABLE)))
-        descrsize = cpu.sizeof(NODE2)
-        heaptracker.register_known_gctype(cpu, vtable, NODE2)
-        self.descrsize = descrsize
-        self.vtable_int = vtable_int
-
-        self.namespace = locals().copy()
-        
     def test_malloc_fastpath(self):
         ops = '''
-        [i0]
-        p0 = new(descr=nodedescr)
-        setfield_gc(p0, i0, descr=valuedescr)
-        finish(p0)
+        []
+        p0 = call_malloc_nursery(16)
+        p1 = call_malloc_nursery(32)
+        p2 = call_malloc_nursery(16)
+        finish(p0, p1, p2)
         '''
-        self.interpret(ops, [42])
-        # check the nursery
+        self.interpret(ops, [])
+        # check the returned pointers
         gc_ll_descr = self.cpu.gc_ll_descr
-        assert gc_ll_descr.nursery[0] == self.nodedescr.tid
-        assert gc_ll_descr.nursery[1] == 42
         nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
-        assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*2)
-        assert gc_ll_descr.addrs[2] == 0   # slowpath never called
+        ref = self.cpu.get_latest_value_ref
+        assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+        assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 16
+        assert rffi.cast(lltype.Signed, ref(2)) == nurs_adr + 48
+        # check the nursery content and state
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.addrs[0] == nurs_adr + 64
+        # slowpath never called
+        assert gc_ll_descr.calls == []
 
     def test_malloc_slowpath(self):
         ops = '''
         []
-        p0 = new(descr=nodedescr)
-        p1 = new(descr=nodedescr)
-        p2 = new(descr=nodedescr)
-        p3 = new(descr=nodedescr)
-        p4 = new(descr=nodedescr)
-        p5 = new(descr=nodedescr)
-        p6 = new(descr=nodedescr)
-        p7 = new(descr=nodedescr)
-        p8 = new(descr=nodedescr)
-        finish(p0, p1, p2, p3, p4, p5, p6, p7, p8)
+        p0 = call_malloc_nursery(16)
+        p1 = call_malloc_nursery(32)
+        p2 = call_malloc_nursery(24)     # overflow
+        finish(p0, p1, p2)
         '''
         self.interpret(ops, [])
+        # check the returned pointers
+        gc_ll_descr = self.cpu.gc_ll_descr
+        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+        ref = self.cpu.get_latest_value_ref
+        assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+        assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 16
+        assert rffi.cast(lltype.Signed, ref(2)) == nurs_adr + 0
+        # check the nursery content and state
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.addrs[0] == nurs_adr + 24
         # this should call slow path once
-        gc_ll_descr = self.cpu.gc_ll_descr
-        nadr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
-        assert gc_ll_descr.addrs[0] == nadr + (WORD*2)
-        assert gc_ll_descr.addrs[2] == 1   # slowpath called once
-
-    def test_new_with_vtable(self):
-        ops = '''
-        [i0, i1]
-        p0 = new_with_vtable(ConstClass(vtable))
-        guard_class(p0, ConstClass(vtable)) [i0]
-        finish(i1)
-        '''
-        self.interpret(ops, [0, 1])
-        assert self.getint(0) == 1
-        gc_ll_descr = self.cpu.gc_ll_descr
-        assert gc_ll_descr.nursery[0] == self.descrsize.tid
-        assert gc_ll_descr.nursery[1] == self.vtable_int
-        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
-        assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*3)
-        assert gc_ll_descr.addrs[2] == 0   # slowpath never called
-
-
-class Seen(Exception):
-    pass
-
-class GCDescrFastpathMallocVarsize(GCDescrFastpathMalloc):
-    def can_inline_malloc_varsize(self, arraydescr, num_elem):
-        return num_elem < 5
-    def get_funcptr_for_newarray(self):
-        return 52
-    def init_array_descr(self, A, descr):
-        descr.tid = self._counter
-        self._counter += 1
-    def args_for_new_array(self, descr):
-        raise Seen("args_for_new_array")
-
-class TestMallocVarsizeFastpath(BaseTestRegalloc):
-    def setup_method(self, method):
-        cpu = CPU(None, None)
-        cpu.vtable_offset = WORD
-        cpu.gc_ll_descr = GCDescrFastpathMallocVarsize()
-        cpu.setup_once()
-        self.cpu = cpu
-
-        ARRAY = lltype.GcArray(lltype.Signed)
-        arraydescr = cpu.arraydescrof(ARRAY)
-        self.arraydescr = arraydescr
-        ARRAYCHAR = lltype.GcArray(lltype.Char)
-        arraychardescr = cpu.arraydescrof(ARRAYCHAR)
-
-        self.namespace = locals().copy()
-
-    def test_malloc_varsize_fastpath(self):
-        # Hack.  Running the GcLLDescr_framework without really having
-        # a complete GC means that we end up with both the tid and the
-        # length being at offset 0.  In this case, so the length overwrites
-        # the tid.  This is of course only the case in this test class.
-        ops = '''
-        []
-        p0 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p0, 0, 142, descr=arraydescr)
-        setarrayitem_gc(p0, 3, 143, descr=arraydescr)
-        finish(p0)
-        '''
-        self.interpret(ops, [])
-        # check the nursery
-        gc_ll_descr = self.cpu.gc_ll_descr
-        assert gc_ll_descr.nursery[0] == 4
-        assert gc_ll_descr.nursery[1] == 142
-        assert gc_ll_descr.nursery[4] == 143
-        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
-        assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*5)
-        assert gc_ll_descr.addrs[2] == 0   # slowpath never called
-
-    def test_malloc_varsize_slowpath(self):
-        ops = '''
-        []
-        p0 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p0, 0, 420, descr=arraydescr)
-        setarrayitem_gc(p0, 3, 430, descr=arraydescr)
-        p1 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p1, 0, 421, descr=arraydescr)
-        setarrayitem_gc(p1, 3, 431, descr=arraydescr)
-        p2 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p2, 0, 422, descr=arraydescr)
-        setarrayitem_gc(p2, 3, 432, descr=arraydescr)
-        p3 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p3, 0, 423, descr=arraydescr)
-        setarrayitem_gc(p3, 3, 433, descr=arraydescr)
-        finish(p0, p1, p2, p3)
-        '''
-        gc_ll_descr = self.cpu.gc_ll_descr
-        gc_ll_descr.expected_malloc_slowpath_size = 5*WORD
-        self.interpret(ops, [])
-        assert gc_ll_descr.addrs[2] == 1   # slowpath called once
-
-    def test_malloc_varsize_too_big(self):
-        ops = '''
-        []
-        p0 = new_array(5, descr=arraydescr)
-        finish(p0)
-        '''
-        py.test.raises(Seen, self.interpret, ops, [])
-
-    def test_malloc_varsize_variable(self):
-        ops = '''
-        [i0]
-        p0 = new_array(i0, descr=arraydescr)
-        finish(p0)
-        '''
-        py.test.raises(Seen, self.interpret, ops, [])
-
-    def test_malloc_array_of_char(self):
-        # check that fastpath_malloc_varsize() respects the alignment
-        # of the pointer in the nursery
-        ops = '''
-        []
-        p1 = new_array(1, descr=arraychardescr)
-        p2 = new_array(2, descr=arraychardescr)
-        p3 = new_array(3, descr=arraychardescr)
-        p4 = new_array(4, descr=arraychardescr)
-        finish(p1, p2, p3, p4)
-        '''
-        self.interpret(ops, [])
-        p1 = self.getptr(0, llmemory.GCREF)
-        p2 = self.getptr(1, llmemory.GCREF)
-        p3 = self.getptr(2, llmemory.GCREF)
-        p4 = self.getptr(3, llmemory.GCREF)
-        assert p1._obj.intval & (WORD-1) == 0    # aligned
-        assert p2._obj.intval & (WORD-1) == 0    # aligned
-        assert p3._obj.intval & (WORD-1) == 0    # aligned
-        assert p4._obj.intval & (WORD-1) == 0    # aligned
+        assert gc_ll_descr.calls == [24]
diff --git a/pypy/jit/backend/x86/test/test_jump.py b/pypy/jit/backend/x86/test/test_jump.py
--- a/pypy/jit/backend/x86/test/test_jump.py
+++ b/pypy/jit/backend/x86/test/test_jump.py
@@ -20,6 +20,11 @@
     def regalloc_pop(self, loc):
         self.ops.append(('pop', loc))
 
+    def regalloc_immedmem2mem(self, from_loc, to_loc):
+        assert isinstance(from_loc, ConstFloatLoc)
+        assert isinstance(to_loc,   StackLoc)
+        self.ops.append(('immedmem2mem', from_loc, to_loc))
+
     def got(self, expected):
         print '------------------------ comparing ---------------------------'
         for op1, op2 in zip(self.ops, expected):
@@ -244,6 +249,13 @@
         else:
             return pick1()
     #
+    def pick2c():
+        n = random.randrange(-2000, 500)
+        if n >= 0:
+            return ConstFloatLoc(n)    # n is the address, not really used here
+        else:
+            return pick2()
+    #
     def pick_dst(fn, count, seen):
         result = []
         while len(result) < count:
@@ -280,12 +292,12 @@
                 if loc.get_width() > WORD:
                     stack[loc.value+WORD] = 'value-hiword-%d' % i
             else:
-                assert isinstance(loc, ImmedLoc)
+                assert isinstance(loc, (ImmedLoc, ConstFloatLoc))
         return regs1, regs2, stack
     #
     for i in range(500):
         seen = {}
-        src_locations2 = [pick2() for i in range(4)]
+        src_locations2 = [pick2c() for i in range(4)]
         dst_locations2 = pick_dst(pick2, 4, seen)
         src_locations1 = [pick1c() for i in range(5)]
         dst_locations1 = pick_dst(pick1, 5, seen)
@@ -312,9 +324,15 @@
                 return got
             if isinstance(loc, ImmedLoc):
                 return 'const-%d' % loc.value
+            if isinstance(loc, ConstFloatLoc):
+                got = 'constfloat-@%d' % loc.value
+                if loc.get_width() > WORD:
+                    got = (got, 'constfloat-next-@%d' % loc.value)
+                return got
             assert 0, loc
         #
         def write(loc, newvalue):
+            assert (type(newvalue) is tuple) == (loc.get_width() > WORD)
             if isinstance(loc, RegLoc):
                 if loc.is_xmm:
                     regs2[loc.value] = newvalue
@@ -337,10 +355,14 @@
         for op in assembler.ops:
             if op[0] == 'mov':
                 src, dst = op[1:]
-                assert isinstance(src, (RegLoc, StackLoc, ImmedLoc))
-                assert isinstance(dst, (RegLoc, StackLoc))
-                assert not (isinstance(src, StackLoc) and
-                            isinstance(dst, StackLoc))
+                if isinstance(src, ConstFloatLoc):
+                    assert isinstance(dst, RegLoc)
+                    assert dst.is_xmm
+                else:
+                    assert isinstance(src, (RegLoc, StackLoc, ImmedLoc))
+                    assert isinstance(dst, (RegLoc, StackLoc))
+                    assert not (isinstance(src, StackLoc) and
+                                isinstance(dst, StackLoc))
                 write(dst, read(src))
             elif op[0] == 'push':
                 src, = op[1:]
@@ -350,6 +372,11 @@
                 dst, = op[1:]
                 assert isinstance(dst, (RegLoc, StackLoc))
                 write(dst, extrapushes.pop())
+            elif op[0] == 'immedmem2mem':
+                src, dst = op[1:]
+                assert isinstance(src, ConstFloatLoc)
+                assert isinstance(dst, StackLoc)
+                write(dst, read(src, 8))
             else:
                 assert 0, "unknown op: %r" % (op,)
         assert not extrapushes
@@ -358,3 +385,32 @@
             assert read(loc, WORD) == src_values1[i]
         for i, loc in enumerate(dst_locations2):
             assert read(loc, 8) == src_values2[i]
+
+
+def test_overflow_bug():
+    CASE = [
+        (-144, -248),   # \ cycle
+        (-248, -144),   # /
+        (-488, -416),   # \ two usages of -488
+        (-488, -480),   # /
+        (-488, -488),   # - one self-application of -488
+        ]
+    class FakeAssembler:
+        def regalloc_mov(self, src, dst):
+            print "mov", src, dst
+        def regalloc_push(self, x):
+            print "push", x
+        def regalloc_pop(self, x):
+            print "pop", x
+        def regalloc_immedmem2mem(self, x, y):
+            print "?????????????????????????"
+    def main():
+        srclocs = [StackLoc(9999, x, 'i') for x,y in CASE]
+        dstlocs = [StackLoc(9999, y, 'i') for x,y in CASE]
+        remap_frame_layout(FakeAssembler(), srclocs, dstlocs, eax)
+    # it works when run directly
+    main()
+    # but it used to crash when translated,
+    # because of a -sys.maxint-2 overflowing to sys.maxint
+    from pypy.rpython.test.test_llinterp import interpret
+    interpret(main, [])
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -33,6 +33,13 @@
     # for the individual tests see
     # ====> ../../test/runner_test.py
 
+    add_loop_instructions = ['mov', 'add', 'test', 'je', 'jmp']
+    if WORD == 4:
+        bridge_loop_instructions = ['lea', 'jmp']
+    else:
+        # the 'mov' is part of the 'jmp' so far
+        bridge_loop_instructions = ['lea', 'mov', 'jmp']
+
     def setup_method(self, meth):
         self.cpu = CPU(rtyper=None, stats=FakeStats())
         self.cpu.setup_once()
@@ -69,6 +76,7 @@
         return ctypes.cast(res.value._obj.intval, ctypes.POINTER(item_tp))
 
     def test_allocations(self):
+        py.test.skip("rewrite or kill")
         from pypy.rpython.lltypesystem import rstr
 
         allocs = [None]
@@ -415,12 +423,13 @@
             ]
         inputargs = [i0]
         debug._log = dlog = debug.DebugLog()
-        ops_offset = self.cpu.compile_loop(inputargs, operations, looptoken)
+        info = self.cpu.compile_loop(inputargs, operations, looptoken)
+        ops_offset = info.ops_offset
         debug._log = None
         #
         assert ops_offset is looptoken._x86_ops_offset
-        # getfield_raw/int_add/setfield_raw + ops + None
-        assert len(ops_offset) == 3 + len(operations) + 1
+        # 2*(getfield_raw/int_add/setfield_raw) + ops + None
+        assert len(ops_offset) == 2*3 + len(operations) + 1
         assert (ops_offset[operations[0]] <=
                 ops_offset[operations[1]] <=
                 ops_offset[operations[2]] <=
@@ -518,16 +527,23 @@
         from pypy.tool.logparser import parse_log_file, extract_category
         from pypy.rlib import debug
 
+        targettoken, preambletoken = TargetToken(), TargetToken()
         loop = """
         [i0]
-        label(i0, descr=targettoken)
+        label(i0, descr=preambletoken)
         debug_merge_point('xyz', 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
-        jump(i1, descr=targettoken)
+        label(i1, descr=targettoken)
+        debug_merge_point('xyz', 0)
+        i11 = int_add(i1, 1)
+        i12 = int_ge(i11, 10)
+        guard_false(i12) []
+        jump(i11, descr=targettoken)
         """
-        ops = parse(loop, namespace={'targettoken': TargetToken()})
+        ops = parse(loop, namespace={'targettoken': targettoken,
+                                     'preambletoken': preambletoken})
         debug._log = dlog = debug.DebugLog()
         try:
             self.cpu.assembler.set_debug(True)
@@ -536,11 +552,18 @@
             self.cpu.execute_token(looptoken, 0)
             # check debugging info
             struct = self.cpu.assembler.loop_run_counters[0]
-            assert struct.i == 10
+            assert struct.i == 1
+            struct = self.cpu.assembler.loop_run_counters[1]
+            assert struct.i == 1
+            struct = self.cpu.assembler.loop_run_counters[2]
+            assert struct.i == 9
             self.cpu.finish_once()
         finally:
             debug._log = None
-        assert ('jit-backend-counts', [('debug_print', 'loop -1:10')]) in dlog
+        l0 = ('debug_print', 'entry -1:1')
+        l1 = ('debug_print', preambletoken.repr_of_descr() + ':1')
+        l2 = ('debug_print', targettoken.repr_of_descr() + ':9')
+        assert ('jit-backend-counts', [l0, l1, l2]) in dlog
 
     def test_debugger_checksum(self):
         loop = """
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -69,16 +69,17 @@
 def get_functions_to_patch():
     from pypy.jit.backend.llsupport import gc
     #
-    can_inline_malloc1 = gc.GcLLDescr_framework.can_inline_malloc
-    def can_inline_malloc2(*args):
+    can_use_nursery_malloc1 = gc.GcLLDescr_framework.can_use_nursery_malloc
+    def can_use_nursery_malloc2(*args):
         try:
             if os.environ['PYPY_NO_INLINE_MALLOC']:
                 return False
         except KeyError:
             pass
-        return can_inline_malloc1(*args)
+        return can_use_nursery_malloc1(*args)
     #
-    return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
+    return {(gc.GcLLDescr_framework, 'can_use_nursery_malloc'):
+                can_use_nursery_malloc2}
 
 def compile(f, gc, enable_opts='', **kwds):
     from pypy.annotation.listdef import s_list_of_strings
diff --git a/pypy/jit/backend/x86/test/test_zrpy_platform.py b/pypy/jit/backend/x86/test/test_zrpy_platform.py
--- a/pypy/jit/backend/x86/test/test_zrpy_platform.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_platform.py
@@ -74,8 +74,8 @@
     myjitdriver = jit.JitDriver(greens = [], reds = ['n'])
 
     def entrypoint(argv):
-        myjitdriver.set_param('threshold', 2)
-        myjitdriver.set_param('trace_eagerness', 0)
+        jit.set_param(myjitdriver, 'threshold', 2)
+        jit.set_param(myjitdriver, 'trace_eagerness', 0)
         n = 16
         while n > 0:
             myjitdriver.can_enter_jit(n=n)
diff --git a/pypy/jit/backend/x86/tool/viewcode.py b/pypy/jit/backend/x86/tool/viewcode.py
--- a/pypy/jit/backend/x86/tool/viewcode.py
+++ b/pypy/jit/backend/x86/tool/viewcode.py
@@ -39,6 +39,7 @@
 def machine_code_dump(data, originaddr, backend_name, label_list=None):
     objdump_backend_option = {
         'x86': 'i386',
+        'x86_32': 'i386',
         'x86_64': 'x86-64',
         'i386': 'i386',
     }
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -42,8 +42,7 @@
         except AttributeError:
             pass
 
-        def is_candidate(graph):
-            return policy.look_inside_graph(graph)
+        is_candidate = policy.look_inside_graph
 
         assert len(self.jitdrivers_sd) > 0
         todo = [jd.portal_graph for jd in self.jitdrivers_sd]
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -8,11 +8,15 @@
 
 
 class JitPolicy(object):
-    def __init__(self):
+    def __init__(self, jithookiface=None):
         self.unsafe_loopy_graphs = set()
         self.supports_floats = False
         self.supports_longlong = False
         self.supports_singlefloats = False
+        if jithookiface is None:
+            from pypy.rlib.jit import JitHookInterface
+            jithookiface = JitHookInterface()
+        self.jithookiface = jithookiface
 
     def set_supports_floats(self, flag):
         self.supports_floats = flag
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -162,7 +162,6 @@
 _ll_4_list_setslice = rlist.ll_listsetslice
 _ll_2_list_delslice_startonly = rlist.ll_listdelslice_startonly
 _ll_3_list_delslice_startstop = rlist.ll_listdelslice_startstop
-_ll_1_list_list2fixed = lltypesystem_rlist.ll_list2fixed
 _ll_2_list_inplace_mul = rlist.ll_inplace_mul
 
 _ll_2_list_getitem_foldable = _ll_2_list_getitem
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -5,6 +5,7 @@
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.rlib import rstack
+from pypy.rlib.jit import JitDebugInfo
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -38,7 +39,8 @@
         else:
             extraprocedures = [procedure]
         metainterp_sd.stats.view(errmsg=errmsg,
-                                 extraprocedures=extraprocedures)
+                                 extraprocedures=extraprocedures,
+                                 metainterp_sd=metainterp_sd)
 
 def create_empty_loop(metainterp, name_prefix=''):
     name = metainterp.staticdata.stats.name_for_new_loop()
@@ -74,7 +76,7 @@
             if descr is not original_jitcell_token:
                 original_jitcell_token.record_jump_to(descr)
             descr.exported_state = None
-            op._descr = None    # clear reference, mostly for tests
+            op.cleardescr()    # clear reference, mostly for tests
         elif isinstance(descr, TargetToken):
             # for a JUMP: record it as a potential jump.
             # (the following test is not enough to prevent more complicated
@@ -89,8 +91,8 @@
             assert descr.exported_state is None 
             if not we_are_translated():
                 op._descr_wref = weakref.ref(op._descr)
-            op._descr = None    # clear reference to prevent the history.Stats
-                                # from keeping the loop alive during tests
+            op.cleardescr()    # clear reference to prevent the history.Stats
+                               # from keeping the loop alive during tests
     # record this looptoken on the QuasiImmut used in the code
     if loop.quasi_immutable_deps is not None:
         for qmut in loop.quasi_immutable_deps:
@@ -105,38 +107,32 @@
 
 def compile_loop(metainterp, greenkey, start,
                  inputargs, jumpargs,
-                 start_resumedescr, full_preamble_needed=True):
+                 resume_at_jump_descr, full_preamble_needed=True):
     """Try to compile a new procedure by closing the current history back
     to the first operation.
     """
     from pypy.jit.metainterp.optimizeopt import optimize_trace
 
-    history = metainterp.history
     metainterp_sd = metainterp.staticdata
     jitdriver_sd = metainterp.jitdriver_sd
+    history = metainterp.history
 
-    if False:
-        part = partial_trace
-        assert False
-        procedur_token = metainterp.get_procedure_token(greenkey)
-        assert procedure_token
-        all_target_tokens = []
-    else:
-        jitcell_token = make_jitcell_token(jitdriver_sd)
-        part = create_empty_loop(metainterp)
-        part.inputargs = inputargs[:]
-        h_ops = history.operations
-        part.start_resumedescr = start_resumedescr
-        part.operations = [ResOperation(rop.LABEL, inputargs, None, descr=TargetToken(jitcell_token))] + \
-                          [h_ops[i].clone() for i in range(start, len(h_ops))] + \
-                          [ResOperation(rop.JUMP, jumpargs, None, descr=jitcell_token)]
-        try:
-            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
-        except InvalidLoop:
-            return None
-        target_token = part.operations[0].getdescr()
-        assert isinstance(target_token, TargetToken)
-        all_target_tokens = [target_token]
+    jitcell_token = make_jitcell_token(jitdriver_sd)
+    part = create_empty_loop(metainterp)
+    part.inputargs = inputargs[:]
+    h_ops = history.operations
+    part.resume_at_jump_descr = resume_at_jump_descr
+    part.operations = [ResOperation(rop.LABEL, inputargs, None, descr=TargetToken(jitcell_token))] + \
+                      [h_ops[i].clone() for i in range(start, len(h_ops))] + \
+                      [ResOperation(rop.LABEL, jumpargs, None, descr=jitcell_token)]
+
+    try:
+        optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+    except InvalidLoop:
+        return None
+    target_token = part.operations[0].getdescr()
+    assert isinstance(target_token, TargetToken)
+    all_target_tokens = [target_token]
 
     loop = create_empty_loop(metainterp)        
     loop.inputargs = part.inputargs
@@ -174,17 +170,17 @@
     loop.original_jitcell_token = jitcell_token
     for label in all_target_tokens:
         assert isinstance(label, TargetToken)
-        label.original_jitcell_token = jitcell_token
         if label.virtual_state and label.short_preamble:
             metainterp_sd.logger_ops.log_short_preamble([], label.short_preamble)
     jitcell_token.target_tokens = all_target_tokens
+    propagate_original_jitcell_token(loop)
     send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, "loop")
     record_loop_or_bridge(metainterp_sd, loop)
     return all_target_tokens[0]
 
 def compile_retrace(metainterp, greenkey, start,
                     inputargs, jumpargs,
-                    start_resumedescr, partial_trace, resumekey):
+                    resume_at_jump_descr, partial_trace, resumekey):
     """Try to compile a new procedure by closing the current history back
     to the first operation.
     """
@@ -200,7 +196,7 @@
 
     part = create_empty_loop(metainterp)
     part.inputargs = inputargs[:]
-    part.start_resumedescr = start_resumedescr
+    part.resume_at_jump_descr = resume_at_jump_descr
     h_ops = history.operations
 
     part.operations = [partial_trace.operations[-1]] + \
@@ -212,13 +208,12 @@
     try:
         optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
     except InvalidLoop:
-        #return None # XXX: Dissable for now
         # Fall back on jumping to preamble
         target_token = label.getdescr()
         assert isinstance(target_token, TargetToken)
         assert target_token.exported_state
         part.operations = [orignial_label] + \
-                          [ResOperation(rop.JUMP, target_token.exported_state.jump_args,
+                          [ResOperation(rop.JUMP, inputargs[:],
                                         None, descr=loop_jitcell_token)]
         try:
             optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts,
@@ -246,11 +241,11 @@
     for box in loop.inputargs:
         assert isinstance(box, Box)
 
-    target_token = loop.operations[-1].getdescr()
+    target_token = loop.operations[-1].getdescr()    
     resumekey.compile_and_attach(metainterp, loop)
+    
     target_token = label.getdescr()
     assert isinstance(target_token, TargetToken)
-    target_token.original_jitcell_token = loop.original_jitcell_token
     record_loop_or_bridge(metainterp_sd, loop)
     return target_token
 
@@ -287,14 +282,21 @@
     assert i == len(inputargs)
     loop.operations = extra_ops + loop.operations
 
+def propagate_original_jitcell_token(trace):
+    for op in trace.operations:
+        if op.getopnum() == rop.LABEL:
+            token = op.getdescr()
+            assert isinstance(token, TargetToken)
+            assert token.original_jitcell_token is None
+            token.original_jitcell_token = trace.original_jitcell_token
+            
+    
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
     vinfo = jitdriver_sd.virtualizable_info
     if vinfo is not None:
         patch_new_loop_to_load_virtualizable_fields(loop, jitdriver_sd)
 
     original_jitcell_token = loop.original_jitcell_token
-    jitdriver_sd.on_compile(metainterp_sd.logger_ops, original_jitcell_token,
-                            loop.operations, type, greenkey)
     loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     globaldata = metainterp_sd.globaldata
     original_jitcell_token.number = n = globaldata.loopnumbering
@@ -304,21 +306,41 @@
         show_procedures(metainterp_sd, loop)
         loop.check_consistency()
 
+    if metainterp_sd.warmrunnerdesc is not None:
+        hooks = metainterp_sd.warmrunnerdesc.hooks
+        debug_info = JitDebugInfo(jitdriver_sd, metainterp_sd.logger_ops,
+                                  original_jitcell_token, loop.operations,
+                                  type, greenkey)
+        hooks.before_compile(debug_info)
+    else:
+        debug_info = None
+        hooks = None
     operations = get_deep_immutable_oplist(loop.operations)
     metainterp_sd.profiler.start_backend()
     debug_start("jit-backend")
     try:
-        ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                    original_jitcell_token, name=loopname)
+        asminfo = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
+                                                  original_jitcell_token,
+                                                  name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
+    if hooks is not None:
+        debug_info.asminfo = asminfo
+        hooks.after_compile(debug_info)
     metainterp_sd.stats.add_new_loop(loop)
     if not we_are_translated():
         metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new " + type)
     #
-    metainterp_sd.logger_ops.log_loop(loop.inputargs, loop.operations, n, type, ops_offset)
+    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
+    if asminfo is not None:
+        ops_offset = asminfo.ops_offset
+    else:
+        ops_offset = None
+    metainterp_sd.logger_ops.log_loop(loop.inputargs, loop.operations, n,
+                                      type, ops_offset,
+                                      name=loopname)
     #
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
         metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(original_jitcell_token)
@@ -326,25 +348,40 @@
 def send_bridge_to_backend(jitdriver_sd, metainterp_sd, faildescr, inputargs,
                            operations, original_loop_token):
     n = metainterp_sd.cpu.get_fail_descr_number(faildescr)
-    jitdriver_sd.on_compile_bridge(metainterp_sd.logger_ops,
-                                   original_loop_token, operations, n)
     if not we_are_translated():
         show_procedures(metainterp_sd)
         seen = dict.fromkeys(inputargs)
         TreeLoop.check_consistency_of_branch(operations, seen)
+    if metainterp_sd.warmrunnerdesc is not None:
+        hooks = metainterp_sd.warmrunnerdesc.hooks
+        debug_info = JitDebugInfo(jitdriver_sd, metainterp_sd.logger_ops,
+                                  original_loop_token, operations, 'bridge',
+                                  fail_descr_no=n)
+        hooks.before_compile_bridge(debug_info)
+    else:
+        hooks = None
+        debug_info = None
+    operations = get_deep_immutable_oplist(operations)
     metainterp_sd.profiler.start_backend()
-    operations = get_deep_immutable_oplist(operations)
     debug_start("jit-backend")
     try:
-        ops_offset = metainterp_sd.cpu.compile_bridge(faildescr, inputargs, operations,
-                                                      original_loop_token)
+        asminfo = metainterp_sd.cpu.compile_bridge(faildescr, inputargs,
+                                                   operations,
+                                                   original_loop_token)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
+    if hooks is not None:
+        debug_info.asminfo = asminfo
+        hooks.after_compile_bridge(debug_info)
     if not we_are_translated():
         metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new bridge")
     #
+    if asminfo is not None:
+        ops_offset = asminfo.ops_offset
+    else:
+        ops_offset = None
     metainterp_sd.logger_ops.log_bridge(inputargs, operations, n, ops_offset)
     #
     #if metainterp_sd.warmrunnerdesc is not None:    # for tests
@@ -557,6 +594,7 @@
         inputargs = metainterp.history.inputargs
         if not we_are_translated():
             self._debug_suboperations = new_loop.operations
+        propagate_original_jitcell_token(new_loop)
         send_bridge_to_backend(metainterp.jitdriver_sd, metainterp.staticdata,
                                self, inputargs, new_loop.operations,
                                new_loop.original_jitcell_token)
@@ -743,6 +781,7 @@
         jitdriver_sd = metainterp.jitdriver_sd
         redargs = new_loop.inputargs
         new_loop.original_jitcell_token = jitcell_token = make_jitcell_token(jitdriver_sd)
+        propagate_original_jitcell_token(new_loop)
         send_loop_to_backend(self.original_greenkey, metainterp.jitdriver_sd,
                              metainterp_sd, new_loop, "entry bridge")
         # send the new_loop to warmspot.py, to be called directly the next time
@@ -751,7 +790,7 @@
         metainterp_sd.stats.add_jitcell_token(jitcell_token)
 
 
-def compile_trace(metainterp, resumekey, start_resumedescr=None):
+def compile_trace(metainterp, resumekey, resume_at_jump_descr=None):
     """Try to compile a new bridge leading from the beginning of the history
     to some existing place.
     """
@@ -767,7 +806,7 @@
     # clone ops, as optimize_bridge can mutate the ops
 
     new_trace.operations = [op.clone() for op in metainterp.history.operations]
-    new_trace.start_resumedescr = start_resumedescr
+    new_trace.resume_at_jump_descr = resume_at_jump_descr
     metainterp_sd = metainterp.staticdata
     state = metainterp.jitdriver_sd.warmstate
     if isinstance(resumekey, ResumeAtPositionDescr):
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -46,7 +46,7 @@
     # get the function address as an integer
     func = argboxes[0].getint()
     # do the call using the correct function from the cpu
-    rettype = descr.get_return_type()
+    rettype = descr.get_result_type()
     if rettype == INT or rettype == 'S':       # *S*ingle float
         try:
             result = cpu.bh_call_i(func, descr, args_i, args_r, args_f)
@@ -344,6 +344,8 @@
                          rop.SETINTERIORFIELD_RAW,
                          rop.CALL_RELEASE_GIL,
                          rop.QUASIIMMUT_FIELD,
+                         rop.CALL_MALLOC_GC,
+                         rop.CALL_MALLOC_NURSERY,
                          rop.LABEL,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
diff --git a/pypy/jit/metainterp/graphpage.py b/pypy/jit/metainterp/graphpage.py
--- a/pypy/jit/metainterp/graphpage.py
+++ b/pypy/jit/metainterp/graphpage.py
@@ -12,7 +12,7 @@
     def get_display_text(self):
         return None
 
-def display_procedures(procedures, errmsg=None, highlight_procedures={}):
+def display_procedures(procedures, errmsg=None, highlight_procedures={}, metainterp_sd=None):
     graphs = [(procedure, highlight_procedures.get(procedure, 0))
               for procedure in procedures]
     for graph, highlight in graphs:
@@ -20,7 +20,7 @@
             if is_interesting_guard(op):
                 graphs.append((SubGraph(op.getdescr()._debug_suboperations),
                                highlight))
-    graphpage = ResOpGraphPage(graphs, errmsg)
+    graphpage = ResOpGraphPage(graphs, errmsg, metainterp_sd)
     graphpage.display()
 
 def is_interesting_guard(op):
@@ -36,8 +36,8 @@
 
 class ResOpGraphPage(GraphPage):
 
-    def compute(self, graphs, errmsg=None):
-        resopgen = ResOpGen()
+    def compute(self, graphs, errmsg=None, metainterp_sd=None):
+        resopgen = ResOpGen(metainterp_sd)
         for graph, highlight in graphs:
             resopgen.add_graph(graph, highlight)
         if errmsg:
@@ -50,13 +50,14 @@
     CLUSTERING = True
     BOX_COLOR = (128, 0, 96)
 
-    def __init__(self):
+    def __init__(self, metainterp_sd=None):
         self.graphs = []
         self.highlight_graphs = {}
         self.block_starters = {}    # {graphindex: {set-of-operation-indices}}
         self.all_operations = {}
         self.errmsg = None
         self.target_tokens = {}
+        self.metainterp_sd = metainterp_sd
 
     def op_name(self, graphindex, opindex):
         return 'g%dop%d' % (graphindex, opindex)
@@ -164,7 +165,14 @@
         opindex = opstartindex
         while True:
             op = operations[opindex]
-            lines.append(op.repr(graytext=True))
+            op_repr = op.repr(graytext=True)
+            if op.getopnum() == rop.DEBUG_MERGE_POINT:
+                jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
+                if jd_sd._get_printable_location_ptr:
+                    s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+                    s = s.replace(',', '.') # we use comma for argument splitting
+                    op_repr = "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
+            lines.append(op_repr)
             if is_interesting_guard(op):
                 tgt = op.getdescr()._debug_suboperations[0]
                 tgt_g, tgt_i = self.all_operations[tgt]
diff --git a/pypy/jit/metainterp/heapcache.py b/pypy/jit/metainterp/heapcache.py
--- a/pypy/jit/metainterp/heapcache.py
+++ b/pypy/jit/metainterp/heapcache.py
@@ -79,9 +79,9 @@
             opnum == rop.COPYSTRCONTENT or
             opnum == rop.COPYUNICODECONTENT):
             return
-        if rop._OVF_FIRST <= opnum <= rop._OVF_LAST:
-            return
-        if rop._NOSIDEEFFECT_FIRST <= opnum <= rop._NOSIDEEFFECT_LAST:
+        if (rop._OVF_FIRST <= opnum <= rop._OVF_LAST or
+            rop._NOSIDEEFFECT_FIRST <= opnum <= rop._NOSIDEEFFECT_LAST or
+            rop._GUARD_FIRST <= opnum <= rop._GUARD_LAST):
             return
         if opnum == rop.CALL or opnum == rop.CALL_LOOPINVARIANT:
             effectinfo = descr.get_extra_info()
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -142,59 +142,6 @@
     def repr_of_descr(self):
         return '%r' % (self,)
 
-    def get_arg_types(self):
-        """ Implement in call descr.
-        Must return a string of INT, REF and FLOAT ('i', 'r', 'f').
-        """
-        raise NotImplementedError
-
-    def get_return_type(self):
-        """ Implement in call descr.
-        Must return INT, REF, FLOAT, or 'v' for void.
-        On 32-bit (hack) it can also be 'L' for longlongs.
-        Additionally it can be 'S' for singlefloats.
-        """
-        raise NotImplementedError
-
-    def get_extra_info(self):
-        """ Implement in call descr
-        """
-        raise NotImplementedError
-
-    def is_array_of_pointers(self):
-        """ Implement for array descr
-        """
-        raise NotImplementedError
-
-    def is_array_of_floats(self):
-        """ Implement for array descr
-        """
-        raise NotImplementedError
-
-    def is_array_of_structs(self):
-        """ Implement for array descr
-        """
-        raise NotImplementedError
-
-    def is_pointer_field(self):
-        """ Implement for field descr
-        """
-        raise NotImplementedError
-
-    def is_float_field(self):
-        """ Implement for field descr
-        """
-        raise NotImplementedError
-
-    def as_vtable_size_descr(self):
-        """ Implement for size descr representing objects with vtables.
-        Returns self.  (it's an annotation hack)
-        """
-        raise NotImplementedError
-
-    def count_fields_if_immutable(self):
-        return -1
-
     def _clone_if_mutable(self):
         return self
     def clone_if_mutable(self):
@@ -758,6 +705,9 @@
 
         self.virtual_state = None
         self.exported_state = None
+
+    def repr_of_descr(self):
+        return 'TargetToken(%d)' % compute_unique_id(self)
         
 class TreeLoop(object):
     inputargs = None
@@ -765,7 +715,7 @@
     call_pure_results = None
     logops = None
     quasi_immutable_deps = None
-    start_resumedescr = None
+    resume_at_jump_descr = None
 
     def _token(*args):
         raise Exception("TreeLoop.token is killed")
@@ -1053,35 +1003,16 @@
         return insns
 
     def check_simple_loop(self, expected=None, **check):
-        # Usefull in the simplest case when we have only one trace ending with
-        # a jump back to itself and possibly a few bridges ending with finnish.
-        # Only the operations within the loop formed by that single jump will
-        # be counted.
-
-        # XXX hacked version, ignore and remove me when jit-targets is merged.
-        loops = self.get_all_loops()
-        loops = [loop for loop in loops if 'Preamble' not in repr(loop)] #XXX
-        assert len(loops) == 1
-        loop, = loops
-        jumpop = loop.operations[-1]
-        assert jumpop.getopnum() == rop.JUMP
-        insns = {}
-        for op in loop.operations:
-            opname = op.getopname()
-            insns[opname] = insns.get(opname, 0) + 1
-        return self._check_insns(insns, expected, check)
-
-    def check_simple_loop(self, expected=None, **check):
-        # Usefull in the simplest case when we have only one trace ending with
-        # a jump back to itself and possibly a few bridges ending with finnish.
-        # Only the operations within the loop formed by that single jump will
-        # be counted.
+        """ Usefull in the simplest case when we have only one trace ending with
+        a jump back to itself and possibly a few bridges.
+        Only the operations within the loop formed by that single jump will
+        be counted.
+        """
         loops = self.get_all_loops()
         assert len(loops) == 1
         loop = loops[0]
         jumpop = loop.operations[-1]
         assert jumpop.getopnum() == rop.JUMP
-        assert self.check_resops(jump=1)
         labels = [op for op in loop.operations if op.getopnum() == rop.LABEL]
         targets = [op._descr_wref() for op in labels]
         assert None not in targets # TargetToken was freed, give up
@@ -1134,7 +1065,7 @@
         if option.view:
             self.view()
 
-    def view(self, errmsg=None, extraprocedures=[]):
+    def view(self, errmsg=None, extraprocedures=[], metainterp_sd=None):
         from pypy.jit.metainterp.graphpage import display_procedures
         procedures = self.get_all_loops()[:]
         for procedure in extraprocedures:
@@ -1146,7 +1077,7 @@
             if hasattr(procedure, '_looptoken_number') and (
                procedure._looptoken_number in self.invalidated_token_numbers):
                 highlight_procedures.setdefault(procedure, 2)
-        display_procedures(procedures, errmsg, highlight_procedures)
+        display_procedures(procedures, errmsg, highlight_procedures, metainterp_sd)
 
 # ----------------------------------------------------------------
 
diff --git a/pypy/jit/metainterp/jitdriver.py b/pypy/jit/metainterp/jitdriver.py
--- a/pypy/jit/metainterp/jitdriver.py
+++ b/pypy/jit/metainterp/jitdriver.py
@@ -21,7 +21,6 @@
     #    self.portal_finishtoken... pypy.jit.metainterp.pyjitpl
     #    self.index             ... pypy.jit.codewriter.call
     #    self.mainjitcode       ... pypy.jit.codewriter.call
-    #    self.on_compile        ... pypy.jit.metainterp.warmstate
 
     # These attributes are read by the backend in CALL_ASSEMBLER:
     #    self.assembler_helper_adr
diff --git a/pypy/jit/metainterp/jitprof.py b/pypy/jit/metainterp/jitprof.py
--- a/pypy/jit/metainterp/jitprof.py
+++ b/pypy/jit/metainterp/jitprof.py
@@ -18,8 +18,8 @@
 OPT_FORCINGS
 ABORT_TOO_LONG
 ABORT_BRIDGE
+ABORT_BAD_LOOP
 ABORT_ESCAPE
-ABORT_BAD_LOOP
 ABORT_FORCE_QUASIIMMUT
 NVIRTUALS
 NVHOLES
@@ -30,10 +30,13 @@
 TOTAL_FREED_BRIDGES
 """
 
+counter_names = []
+
 def _setup():
     names = counters.split()
     for i, name in enumerate(names):
         globals()[name] = i
+        counter_names.append(name)
     global ncounters
     ncounters = len(names)
 _setup()
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -5,7 +5,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp.history import Const, ConstInt, Box, \
-     BoxInt, ConstFloat, BoxFloat, AbstractFailDescr
+     BoxInt, ConstFloat, BoxFloat, AbstractFailDescr, TargetToken
 
 class Logger(object):
 
@@ -13,14 +13,14 @@
         self.metainterp_sd = metainterp_sd
         self.guard_number = guard_number
 
-    def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
+    def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None, name=''):
         if type is None:
             debug_start("jit-log-noopt-loop")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
         else:
             debug_start("jit-log-opt-loop")
-            debug_print("# Loop", number, ":", type,
+            debug_print("# Loop", number, '(%s)' % name , ":", type,
                         "with", len(operations), "ops")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-loop")
@@ -135,6 +135,13 @@
             fail_args = ''
         return s_offset + res + op.getopname() + '(' + args + ')' + fail_args
 
+    def _log_inputarg_setup_ops(self, op):
+        target_token = op.getdescr()
+        if isinstance(target_token, TargetToken):
+            if target_token.exported_state:
+                for op in target_token.exported_state.inputarg_setup_ops:
+                    debug_print('    ' + self.repr_of_resop(op))
+        
     def _log_operations(self, inputargs, operations, ops_offset):
         if not have_debug_prints():
             return
@@ -146,6 +153,8 @@
         for i in range(len(operations)):
             op = operations[i]
             debug_print(self.repr_of_resop(operations[i], ops_offset))
+            if op.getopnum() == rop.LABEL:
+                self._log_inputarg_setup_ops(op)
         if ops_offset and None in ops_offset:
             offset = ops_offset[None]
             debug_print("+%d: --end of the loop--" % offset)
diff --git a/pypy/jit/metainterp/memmgr.py b/pypy/jit/metainterp/memmgr.py
--- a/pypy/jit/metainterp/memmgr.py
+++ b/pypy/jit/metainterp/memmgr.py
@@ -1,5 +1,5 @@
 import math
-from pypy.rlib.rarithmetic import r_int64, r_uint
+from pypy.rlib.rarithmetic import r_int64
 from pypy.rlib.debug import debug_start, debug_print, debug_stop
 from pypy.rlib.objectmodel import we_are_translated
 
@@ -21,7 +21,6 @@
 #
 
 class MemoryManager(object):
-    NO_NEXT_CHECK = r_int64(2 ** 63 - 1)
 
     def __init__(self):
         self.check_frequency = -1
@@ -37,13 +36,12 @@
         # According to my estimates it's about 5e9 years given 1000 loops
         # per second
         self.current_generation = r_int64(1)
-        self.next_check = self.NO_NEXT_CHECK
+        self.next_check = r_int64(-1)
         self.alive_loops = {}
-        self._cleanup_jitcell_dicts = lambda: None
 
     def set_max_age(self, max_age, check_frequency=0):
         if max_age <= 0:
-            self.next_check = self.NO_NEXT_CHECK
+            self.next_check = r_int64(-1)
         else:
             self.max_age = max_age
             if check_frequency <= 0:
@@ -51,11 +49,10 @@
             self.check_frequency = check_frequency
             self.next_check = self.current_generation + 1
 
-    def next_generation(self, do_cleanups_now=True):
+    def next_generation(self):
         self.current_generation += 1
-        if do_cleanups_now and self.current_generation >= self.next_check:
+        if self.current_generation == self.next_check:
             self._kill_old_loops_now()
-            self._cleanup_jitcell_dicts()
             self.next_check = self.current_generation + self.check_frequency
 
     def keep_loop_alive(self, looptoken):
@@ -84,22 +81,3 @@
             # a single one is not enough for all tests :-(
             rgc.collect(); rgc.collect(); rgc.collect()
         debug_stop("jit-mem-collect")
-
-    def get_current_generation_uint(self):
-        """Return the current generation, possibly truncated to a uint.
-        To use only as an approximation for decaying counters."""
-        return r_uint(self.current_generation)
-
-    def record_jitcell_dict(self, callback):
-        """NOT_RPYTHON.  The given jitcell_dict is a dict that needs
-        occasional clean-ups of old cells.  A cell is old if it never
-        reached the threshold, and its counter decayed to a tiny value."""
-        # note that the various jitcell_dicts have different RPython types,
-        # so we have to make a different function for each one.  These
-        # functions are chained to each other: each calls the previous one.
-        def cleanup_dict():
-            callback()
-            cleanup_previous()
-        #
-        cleanup_previous = self._cleanup_jitcell_dicts
-        self._cleanup_jitcell_dicts = cleanup_dict
diff --git a/pypy/jit/metainterp/optimize.py b/pypy/jit/metainterp/optimize.py
--- a/pypy/jit/metainterp/optimize.py
+++ b/pypy/jit/metainterp/optimize.py
@@ -5,58 +5,3 @@
     """Raised when the optimize*.py detect that the loop that
     we are trying to build cannot possibly make sense as a
     long-running loop (e.g. it cannot run 2 complete iterations)."""
-
-class RetraceLoop(JitException):
-    """ Raised when inlining a short preamble resulted in an
-        InvalidLoop. This means the optimized loop is too specialized
-        to be useful here, so we trace it again and produced a second
-        copy specialized in some different way.
-    """
-
-# ____________________________________________________________
-
-def optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
-    debug_start("jit-optimize")
-    try:
-        return _optimize_loop(metainterp_sd, old_loop_tokens, loop,
-                              enable_opts)
-    finally:
-        debug_stop("jit-optimize")
-
-def _optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
-    from pypy.jit.metainterp.optimizeopt import optimize_loop_1
-    loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
-                                                      loop.operations)
-    # XXX do we really still need a list?
-    if old_loop_tokens:
-        return old_loop_tokens[0]
-    optimize_loop_1(metainterp_sd, loop, enable_opts)
-    return None
-
-# ____________________________________________________________
-
-def optimize_bridge(metainterp_sd, old_loop_tokens, bridge, enable_opts,
-                    inline_short_preamble=True, retraced=False):
-    debug_start("jit-optimize")
-    try:
-        return _optimize_bridge(metainterp_sd, old_loop_tokens, bridge,
-                                enable_opts,
-                                inline_short_preamble, retraced)
-    finally:
-        debug_stop("jit-optimize")
-
-def _optimize_bridge(metainterp_sd, old_loop_tokens, bridge, enable_opts,
-                     inline_short_preamble, retraced=False):
-    from pypy.jit.metainterp.optimizeopt import optimize_bridge_1
-    bridge.logops = metainterp_sd.logger_noopt.log_loop(bridge.inputargs,
-                                                        bridge.operations)
-    if old_loop_tokens:
-        old_loop_token = old_loop_tokens[0]
-        bridge.operations[-1].setdescr(old_loop_token)   # patch jump target
-        optimize_bridge_1(metainterp_sd, bridge, enable_opts,
-                          inline_short_preamble, retraced)
-        return old_loop_tokens[0]
-        #return bridge.operations[-1].getdescr()
-    return None
-
-# ____________________________________________________________
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -51,34 +51,6 @@
 
     return optimizations, unroll
 
-
-def optimize_loop_1(metainterp_sd, loop, enable_opts,
-                    inline_short_preamble=True, retraced=False):
-    """Optimize loop.operations to remove internal overheadish operations.
-    """
-
-    optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts,
-                                            inline_short_preamble, retraced)
-    if unroll:
-        optimize_unroll(metainterp_sd, loop, optimizations)
-    else:
-        optimizer = Optimizer(metainterp_sd, loop, optimizations)
-        optimizer.propagate_all_forward()
-
-def optimize_bridge_1(metainterp_sd, bridge, enable_opts,
-                      inline_short_preamble=True, retraced=False):
-    """The same, but for a bridge. """
-    enable_opts = enable_opts.copy()
-    try:
-        del enable_opts['unroll']
-    except KeyError:
-        pass
-    optimize_loop_1(metainterp_sd, bridge, enable_opts,
-                    inline_short_preamble, retraced)
-
-if __name__ == '__main__':
-    print ALL_OPTS_NAMES
-
 def optimize_trace(metainterp_sd, loop, enable_opts, inline_short_preamble=True):
     """Optimize loop.operations to remove internal overheadish operations.
     """
@@ -96,3 +68,6 @@
     finally:
         debug_stop("jit-optimize")
         
+if __name__ == '__main__':
+    print ALL_OPTS_NAMES
+
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -234,11 +234,11 @@
             # longlongs are treated as floats, see
             # e.g. llsupport/descr.py:getDescrClass
             is_float = True
-        elif kind == 'u':
+        elif kind == 'u' or kind == 's':
             # they're all False
             pass
         else:
-            assert False, "unsupported ffitype or kind"
+            raise NotImplementedError("unsupported ffitype or kind: %s" % kind)
         #
         fieldsize = rffi.getintfield(ffitype, 'c_size')
         return self.optimizer.cpu.interiorfielddescrof_dynamic(
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -453,6 +453,7 @@
 
     def clear_newoperations(self):
         self._newoperations = []
+        self.seen_results = {}
 
     def make_equal_to(self, box, value, replace=False):
         assert isinstance(value, OptValue)
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -35,6 +35,9 @@
         pass
 
     def optimize_LABEL(self, op):
+        descr = op.getdescr()
+        if isinstance(descr, JitCellToken):
+            return self.optimize_JUMP(op.copy_and_change(rop.JUMP))
         self.last_label_descr = op.getdescr()
         self.emit_operation(op)
         
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -1,21 +1,25 @@
+from __future__ import with_statement
 from pypy.jit.metainterp.optimizeopt.test.test_util import (
-    LLtypeMixin, BaseTest, Storage, _sortboxes, FakeDescrWithSnapshot)
+    LLtypeMixin, BaseTest, Storage, _sortboxes, FakeDescrWithSnapshot,
+    FakeMetaInterpStaticData)
 from pypy.jit.metainterp.history import TreeLoop, JitCellToken, TargetToken
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
 from pypy.jit.metainterp.optimize import InvalidLoop
 from py.test import raises
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 
 class BaseTestMultiLabel(BaseTest):
     enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
 
-    def optimize_loop(self, ops, expected):
+    def optimize_loop(self, ops, expected, expected_shorts=None):
         loop = self.parse(ops)
         if expected != "crash!":
             expected = self.parse(expected)
 
         part = TreeLoop('part')
         part.inputargs = loop.inputargs
-        part.start_resumedescr = FakeDescrWithSnapshot()
+        part.resume_at_jump_descr = FakeDescrWithSnapshot()
         token = loop.original_jitcell_token
 
         optimized = TreeLoop('optimized')
@@ -32,15 +36,17 @@
             if nxt < len(loop.operations):
                 label = loop.operations[nxt]
                 assert label.getopnum() == rop.LABEL
-                jumpop = ResOperation(rop.JUMP, label.getarglist(),
-                                      None, descr=token)
-                operations.append(jumpop)
+                if label.getdescr() is None:
+                    label.setdescr(token)
+                operations.append(label)
             part.operations = operations
+
             self._do_optimize_loop(part, None)
             if part.operations[-1].getopnum() == rop.LABEL:
                 last_label = [part.operations.pop()]
             else:
                 last_label = []
+            
             optimized.operations.extend(part.operations)
             prv = nxt + 1
         
@@ -53,11 +59,36 @@
             print 'Failed!'
         print
 
+        shorts = [op.getdescr().short_preamble
+                  for op in optimized.operations
+                  if op.getopnum() == rop.LABEL]
+
+        if expected_shorts:
+            for short in shorts:
+                print
+                print "Short preamble:"
+                print '\n'.join([str(o) for o in short])
+
+
         assert expected != "crash!", "should have raised an exception"
         self.assert_equal(optimized, expected)
 
+        if expected_shorts:
+            assert len(shorts) == len(expected_shorts)
+            for short, expected_short in zip(shorts, expected_shorts):
+                expected_short = self.parse(expected_short)
+                short_preamble = TreeLoop('short preamble')
+                assert short[0].getopnum() == rop.LABEL
+                short_preamble.inputargs = short[0].getarglist()
+                short_preamble.operations = short
+                self.assert_equal(short_preamble, expected_short,
+                                  text_right='expected short preamble')
+
+        
         return optimized
 
+class OptimizeoptTestMultiLabel(BaseTestMultiLabel):
+
     def test_simple(self):
         ops = """
         [i1]
@@ -193,8 +224,244 @@
         """
         with raises(InvalidLoop):
             self.optimize_loop(ops, ops)
+
+    def test_two_intermediate_labels_basic_1(self):
+        ops = """
+        [p1, i1]
+        i2 = getfield_gc(p1, descr=valuedescr)
+        label(p1, i1)
+        i3 = getfield_gc(p1, descr=valuedescr)
+        i4 = int_add(i1, i3)
+        label(p1, i4)
+        i5 = int_add(i4, 1)
+        jump(p1, i5)
+        """
+        expected = """
+        [p1, i1]
+        i2 = getfield_gc(p1, descr=valuedescr)
+        label(p1, i1, i2)
+        i4 = int_add(i1, i2)
+        label(p1, i4)
+        i5 = int_add(i4, 1)
+        jump(p1, i5)
+        """
+        short1 = """
+        [p1, i1]
+        label(p1, i1)
+        i2 = getfield_gc(p1, descr=valuedescr)
+        jump(p1, i1, i2)
+        """
+        short2 = """
+        [p1, i1]
+        label(p1, i1)
+        jump(p1, i1)
+        """
+        self.optimize_loop(ops, expected, expected_shorts=[short1, short2])
+
+    def test_two_intermediate_labels_basic_2(self):
+        ops = """
+        [p1, i1]
+        i2 = int_add(i1, 1)
+        label(p1, i1)
+        i3 = getfield_gc(p1, descr=valuedescr)
+        i4 = int_add(i1, i3)
+        label(p1, i4)
+        i5 = getfield_gc(p1, descr=valuedescr)
+        i6 = int_add(i4, i5)
+        jump(p1, i6)
+        """
+        expected = """
+        [p1, i1]
+        i2 = int_add(i1, 1)
+        label(p1, i1)
+        i3 = getfield_gc(p1, descr=valuedescr)
+        i4 = int_add(i1, i3)
+        label(p1, i4, i3)
+        i6 = int_add(i4, i3)
+        jump(p1, i6, i3)
+        """
+        short1 = """
+        [p1, i1]
+        label(p1, i1)
+        jump(p1, i1)
+        """
+        short2 = """
+        [p1, i1]
+        label(p1, i1)
+        i2 = getfield_gc(p1, descr=valuedescr)
+        jump(p1, i1, i2)
+        """
+        self.optimize_loop(ops, expected, expected_shorts=[short1, short2])
+
+    def test_two_intermediate_labels_both(self):
+        ops = """
+        [p1, i1]
+        i2 = getfield_gc(p1, descr=valuedescr)
+        label(p1, i1)
+        i3 = getfield_gc(p1, descr=valuedescr)
+        i4 = int_add(i1, i3)
+        label(p1, i4)
+        i5 = getfield_gc(p1, descr=valuedescr)
+        i6 = int_mul(i4, i5)
+        jump(p1, i6)
+        """
+        expected = """
+        [p1, i1]
+        i2 = getfield_gc(p1, descr=valuedescr)
+        label(p1, i1, i2)
+        i4 = int_add(i1, i2)
+        label(p1, i4, i2)
+        i6 = int_mul(i4, i2)
+        jump(p1, i6, i2)
+        """
+        short = """
+        [p1, i1]
+        label(p1, i1)
+        i2 = getfield_gc(p1, descr=valuedescr)        
+        jump(p1, i1, i2)
+        """
+        self.optimize_loop(ops, expected, expected_shorts=[short, short])
+
+    def test_import_across_multiple_labels_basic(self):
+        # Not supported, juts make sure we get a functional trace
+        ops = """
+        [p1, i1]
+        i2 = getfield_gc(p1, descr=valuedescr)
+        label(p1, i1)
+        i3 = int_add(i1, 1)
+        label(p1, i1)
+        i4 = getfield_gc(p1, descr=valuedescr)
+        i5 = int_add(i4, 1)
+        jump(p1, i5)
+        """
+        self.optimize_loop(ops, ops)
+
+    def test_import_across_multiple_labels_with_duplication(self):
+        # Not supported, juts make sure we get a functional trace
+        ops = """
+        [p1, i1]
+        i2 = getfield_gc(p1, descr=valuedescr)
+        label(p1, i2)
+        i3 = int_add(i2, 1)
+        label(p1, i2)
+        i4 = getfield_gc(p1, descr=valuedescr)
+        i5 = int_add(i4, 1)
+        jump(p1, i5)
+        """
+        exported = """
+        [p1, i1]
+        i2 = getfield_gc(p1, descr=valuedescr)
+        i6 = same_as(i2)
+        label(p1, i2)
+        i3 = int_add(i2, 1)
+        label(p1, i2)
+        i4 = getfield_gc(p1, descr=valuedescr)
+        i5 = int_add(i4, 1)
+        jump(p1, i5)
+        """
+        self.optimize_loop(ops, exported)
+    
+    def test_import_virtual_across_multiple_labels(self):
+        ops = """
+        [p0, i1]
+        i1a = int_add(i1, 1)
+        pv = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(pv, i1a, descr=valuedescr)
+        label(pv, i1)
+        i2 = int_mul(i1, 3)
+        label(pv, i2)
+        i3 = getfield_gc(pv, descr=valuedescr)
+        i4 = int_add(i3, i2)
+        jump(pv, i4)
+        """
+        expected = """
+        [p0, i1]
+        i1a = int_add(i1, 1)
+        i5 = same_as(i1a)
+        label(i1a, i1)
+        i2 = int_mul(i1, 3)
+        label(i1a, i2)
+        i4 = int_add(i1a, i2)
+        jump(i1a, i4)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_virtual_as_field_of_forced_box(self):
+        ops = """
+        [p0]
+        pv1 = new_with_vtable(ConstClass(node_vtable))
+        label(pv1, p0)
+        pv2 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(pv2, pv1, descr=valuedescr)
+        jump(pv1, pv2)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+class OptRenameStrlen(Optimization):
+    def propagate_forward(self, op):
+        dispatch_opt(self, op)
+
+    def optimize_STRLEN(self, op):
+        newop = op.clone()
+        newop.result = op.result.clonebox()
+        self.emit_operation(newop)
+        self.make_equal_to(op.result, self.getvalue(newop.result))
+    
+dispatch_opt = make_dispatcher_method(OptRenameStrlen, 'optimize_',
+                                      default=OptRenameStrlen.emit_operation)
+
+class BaseTestOptimizerRenamingBoxes(BaseTestMultiLabel):
+
+    def _do_optimize_loop(self, loop, call_pure_results):
+        from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll
+        from pypy.jit.metainterp.optimizeopt.util import args_dict
+        from pypy.jit.metainterp.optimizeopt.pure import OptPure
+
+        self.loop = loop
+        loop.call_pure_results = args_dict()
+        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+        optimize_unroll(metainterp_sd, loop, [OptRenameStrlen(), OptPure()], True)
+
+    def test_optimizer_renaming_boxes(self):
+        ops = """
+        [p1]
+        i1 = strlen(p1)
+        label(p1)
+        i2 = strlen(p1)
+        i3 = int_add(i2, 7)
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        i1 = strlen(p1)
+        label(p1, i1)
+        i11 = same_as(i1)
+        i2 = int_add(i11, 7)
+        jump(p1, i11)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_optimizer_renaming_boxes_not_imported(self):
+        ops = """
+        [p1]
+        i1 = strlen(p1)
+        label(p1)
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        i1 = strlen(p1)
+        label(p1, i1)
+        i11 = same_as(i1)
+        jump(p1, i11)
+        """
+        self.optimize_loop(ops, expected)
+
         
-    
-class TestLLtype(BaseTestMultiLabel, LLtypeMixin):
+
+class TestLLtype(OptimizeoptTestMultiLabel, LLtypeMixin):
     pass
 
+class TestOptimizerRenamingBoxesLLtype(BaseTestOptimizerRenamingBoxes, LLtypeMixin):
+    pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -117,7 +117,7 @@
 
     def optimize_loop(self, ops, optops, call_pure_results=None):
         loop = self.parse(ops)
-        token = JitCellToken() 
+        token = JitCellToken()
         loop.operations = [ResOperation(rop.LABEL, loop.inputargs, None, descr=TargetToken(token))] + \
                           loop.operations
         if loop.operations[-1].getopnum() == rop.JUMP:
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -4,7 +4,7 @@
     LLtypeMixin, BaseTest, Storage, _sortboxes, convert_old_style_to_targets)
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
-from pypy.jit.metainterp.optimizeopt import optimize_loop_1, ALL_OPTS_DICT, build_opt_chain
+from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT, build_opt_chain
 from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt
 from pypy.jit.metainterp.history import TreeLoop, JitCellToken, TargetToken
@@ -4211,7 +4211,6 @@
         preamble = """
         [p0]
         i0 = strlen(p0)
-        i3 = same_as(i0) # Should be killed by backend        
         jump(p0)
         """
         expected = """
@@ -5668,8 +5667,7 @@
         p3 = newstr(i3)
         copystrcontent(p1, p3, 0, 0, i1)
         copystrcontent(p2, p3, 0, i1, i2)
-        i7 = same_as(i2)        
-        jump(p2, p3, i7)
+        jump(p2, p3, i2)
         """
         expected = """
         [p1, p2, i1]
@@ -5744,9 +5742,7 @@
         copystrcontent(p1, p5, 0, 0, i1)
         copystrcontent(p2, p5, 0, i1, i2)
         copystrcontent(p3, p5, 0, i12, i3)
-        i129 = same_as(i2)
-        i130 = same_as(i3)
-        jump(p2, p3, p5, i129, i130)
+        jump(p2, p3, p5, i2, i3)
         """
         expected = """
         [p1, p2, p3, i1, i2]
@@ -5959,8 +5955,7 @@
         p4 = newstr(i5)
         copystrcontent(p1, p4, i1, 0, i3)
         copystrcontent(p2, p4, 0, i3, i4)
-        i9 = same_as(i4)
-        jump(p4, i1, i2, p2, i5, i3, i9)
+        jump(p4, i1, i2, p2, i5, i3, i4)
         """
         expected = """
         [p1, i1, i2, p2, i5, i3, i4]
@@ -6082,9 +6077,7 @@
         copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, p3, p4, descr=strequaldescr)
         escape(i0)
-        i11 = same_as(i1)
-        i12 = same_as(i2)
-        jump(p1, p2, p3, i3, i11, i12)
+        jump(p1, p2, p3, i3, i1, i2)
         """
         expected = """
         [p1, p2, p3, i3, i1, i2]
@@ -6304,7 +6297,6 @@
         i1 = strlen(p1)
         i0 = int_eq(i1, 0)
         escape(i0)
-        i3 = same_as(i1)        
         jump(p1, i0)
         """
         self.optimize_strunicode_loop_extradescrs(ops, expected, preamble)
@@ -6350,9 +6342,7 @@
         copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, s"hello world", p4, descr=streq_nonnull_descr)
         escape(i0)
-        i11 = same_as(i1)
-        i12 = same_as(i2)
-        jump(p1, p2, i3, i11, i12)
+        jump(p1, p2, i3, i1, i2)
         """
         expected = """
         [p1, p2, i3, i1, i2]
@@ -6925,8 +6915,7 @@
         [p9]
         i843 = strlen(p9)
         call(i843, descr=nonwritedescr)
-        i0 = same_as(i843)
-        jump(p9, i0)
+        jump(p9, i843)
         """
         short = """
         [p9]
@@ -7770,7 +7759,7 @@
         jump(i0, p0, i2)
         """
         self.optimize_loop(ops, expected)
-
+        
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
     pass
 
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_util.py b/pypy/jit/metainterp/optimizeopt/test/test_util.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_util.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_util.py
@@ -430,18 +430,18 @@
 
         preamble = TreeLoop('preamble')
         preamble.inputargs = inputargs
-        preamble.start_resumedescr = FakeDescrWithSnapshot()
+        preamble.resume_at_jump_descr = FakeDescrWithSnapshot()
 
         token = JitCellToken() 
         preamble.operations = [ResOperation(rop.LABEL, inputargs, None, descr=TargetToken(token))] + \
                               operations +  \
-                              [ResOperation(rop.JUMP, jump_args, None, descr=token)]
+                              [ResOperation(rop.LABEL, jump_args, None, descr=token)]
         self._do_optimize_loop(preamble, call_pure_results)
 
         assert preamble.operations[-1].getopnum() == rop.LABEL
 
         inliner = Inliner(inputargs, jump_args)
-        loop.start_resumedescr = preamble.start_resumedescr
+        loop.resume_at_jump_descr = preamble.resume_at_jump_descr
         loop.operations = [preamble.operations[-1]] + \
                           [inliner.inline_op(op, clone=False) for op in cloned_operations] + \
                           [ResOperation(rop.JUMP, [inliner.inline_arg(a) for a in jump_args],
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -3,7 +3,7 @@
 from pypy.jit.metainterp.compile import ResumeGuardDescr
 from pypy.jit.metainterp.history import TreeLoop, TargetToken, JitCellToken
 from pypy.jit.metainterp.jitexc import JitException
-from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.optimizeopt.optimizer import *
 from pypy.jit.metainterp.optimizeopt.generalize import KillHugeIntBounds
 from pypy.jit.metainterp.inliner import Inliner
@@ -51,10 +51,10 @@
     distinction anymore)"""
 
     inline_short_preamble = True
-    did_import = False
     
     def __init__(self, metainterp_sd, loop, optimizations):
         self.optimizer = UnrollableOptimizer(metainterp_sd, loop, optimizations)
+        self.boxes_created_this_iteration = None
 
     def fix_snapshot(self, jump_args, snapshot):
         if snapshot is None:
@@ -71,7 +71,6 @@
         loop = self.optimizer.loop
         self.optimizer.clear_newoperations()
 
-
         start_label = loop.operations[0]
         if start_label.getopnum() == rop.LABEL:
             loop.operations = loop.operations[1:]
@@ -82,7 +81,7 @@
             start_label = None            
 
         jumpop = loop.operations[-1]
-        if jumpop.getopnum() == rop.JUMP:
+        if jumpop.getopnum() == rop.JUMP or jumpop.getopnum() == rop.LABEL:
             loop.operations = loop.operations[:-1]
         else:
             jumpop = None
@@ -91,48 +90,87 @@
         self.optimizer.propagate_all_forward(clear=False)
 
         if not jumpop:
-            return 
-        if self.jump_to_already_compiled_trace(jumpop):
-            # Found a compiled trace to jump to
-            if self.did_import:
-
-                self.close_bridge(start_label)
-                self.finilize_short_preamble(start_label)
             return
 
         cell_token = jumpop.getdescr()
         assert isinstance(cell_token, JitCellToken)
         stop_label = ResOperation(rop.LABEL, jumpop.getarglist(), None, TargetToken(cell_token))
 
-        if not self.did_import: # Enforce the previous behaviour of always peeling  exactly one iteration (for now)
-            self.optimizer.flush()
-            KillHugeIntBounds(self.optimizer).apply()
+