[pypy-commit] pypy ppc-jit-backend: Merge.

Sun Aug 26 21:51:21 CEST 2012

Author: edelsohn
Branch: ppc-jit-backend
Changeset: r56872:05ee4f0092e2
Date: 2012-08-26 15:43 -0400
http://bitbucket.org/pypy/pypy/changeset/05ee4f0092e2/

Log:	Merge.

diff too long, truncating to 10000 out of 26272 lines

diff --git a/lib_pypy/PyQt4.py b/lib_pypy/PyQt4.py
deleted file mode 100644
--- a/lib_pypy/PyQt4.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from _rpyc_support import proxy_sub_module, remote_eval
-
-
-for name in ("QtCore", "QtGui", "QtWebKit"):
-    proxy_sub_module(globals(), name)
-
-s = "__import__('PyQt4').QtGui.QDialogButtonBox."
-QtGui.QDialogButtonBox.Cancel = remote_eval("%sCancel | %sCancel" % (s, s))
-QtGui.QDialogButtonBox.Ok = remote_eval("%sOk | %sOk" % (s, s))
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -59,7 +59,8 @@
         'resbuffer' is a _rawffi array of length 1 containing the value,
         and this returns a general Python object that corresponds.
         """
-        res = self.__new__(self)
+        res = object.__new__(self)
+        res.__class__ = self
         res.__dict__['_buffer'] = resbuffer
         res.__dict__['_base'] = base
         res.__dict__['_index'] = index
diff --git a/lib_pypy/_marshal.py b/lib_pypy/_marshal.py
--- a/lib_pypy/_marshal.py
+++ b/lib_pypy/_marshal.py
@@ -430,6 +430,7 @@
 def _read(self, n):
     pos = self.bufpos
     newpos = pos + n
+    if newpos > len(self.bufstr): raise EOFError
     ret = self.bufstr[pos : newpos]
     self.bufpos = newpos
     return ret
diff --git a/lib_pypy/_rpyc_support.py b/lib_pypy/_rpyc_support.py
deleted file mode 100644
--- a/lib_pypy/_rpyc_support.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import sys
-import socket
-
-from rpyc import connect, SlaveService
-from rpyc.utils.classic import DEFAULT_SERVER_PORT
-
-try:
-    conn = connect("localhost", DEFAULT_SERVER_PORT, SlaveService,
-           config=dict(call_by_value_for_builtin_mutable_types=True))
-except socket.error, e:
-    raise ImportError("Error while connecting: " + str(e))
-
-
-remote_eval = conn.eval
-
-
-def proxy_module(globals):
-    module = getattr(conn.modules, globals["__name__"])
-    for name in module.__dict__.keys():
-        globals[name] = getattr(module, name)
-
-def proxy_sub_module(globals, name):
-    fullname = globals["__name__"] + "." + name
-    sys.modules[fullname] = globals[name] = conn.modules[fullname]
diff --git a/lib_pypy/distributed/__init__.py b/lib_pypy/distributed/__init__.py
deleted file mode 100644
--- a/lib_pypy/distributed/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-
-try:
-    from protocol import RemoteProtocol, test_env, remote_loop, ObjectNotFound
-except ImportError:
-    # XXX fix it
-    # UGH. This is needed for tests
-    pass
diff --git a/lib_pypy/distributed/demo/sockdemo.py b/lib_pypy/distributed/demo/sockdemo.py
deleted file mode 100644
--- a/lib_pypy/distributed/demo/sockdemo.py
+++ /dev/null
@@ -1,42 +0,0 @@
-
-from distributed import RemoteProtocol, remote_loop
-from distributed.socklayer import Finished, socket_listener, socket_connecter
-
-PORT = 12122
-
-class X:
-    def __init__(self, z):
-        self.z = z
-        
-    def meth(self, x):
-        return self.z + x()
-
-    def raising(self):
-        1/0
-
-x = X(3)
-
-def remote():
-    send, receive = socket_listener(address=('', PORT))
-    remote_loop(RemoteProtocol(send, receive, globals()))
-
-def local():
-    send, receive = socket_connecter(('localhost', PORT))
-    return RemoteProtocol(send, receive)
-
-import sys
-if __name__ == '__main__':
-    if len(sys.argv) > 1 and sys.argv[1] == '-r':
-        try:
-            remote()
-        except Finished:
-            print "Finished"
-    else:
-        rp = local()
-        x = rp.get_remote("x")
-        try:
-            x.raising()
-        except:
-            import sys
-            import pdb
-            pdb.post_mortem(sys.exc_info()[2])
diff --git a/lib_pypy/distributed/faker.py b/lib_pypy/distributed/faker.py
deleted file mode 100644
--- a/lib_pypy/distributed/faker.py
+++ /dev/null
@@ -1,89 +0,0 @@
-
-""" This file is responsible for faking types
-"""
-
-class GetSetDescriptor(object):
-    def __init__(self, protocol, name):
-        self.protocol = protocol
-        self.name = name
-
-    def __get__(self, obj, type=None):
-        return self.protocol.get(self.name, obj, type)
-
-    def __set__(self, obj, value):
-        self.protocol.set(self.name, obj, value)
-
-class GetDescriptor(object):
-    def __init__(self, protocol, name):
-        self.protocol = protocol
-        self.name = name
-
-    def __get__(self, obj, type=None):
-        return self.protocol.get(self.name, obj, type)
-
-# these are one-go functions for wrapping/unwrapping types,
-# note that actual caching is defined in other files,
-# this is only the case when we *need* to wrap/unwrap
-# type
-
-from types import MethodType, FunctionType
-
-def not_ignore(name):
-    # we don't want to fake some default descriptors, because
-    # they'll alter the way we set attributes
-    l = ['__dict__', '__weakref__', '__class__', '__bases__',
-         '__getattribute__', '__getattr__', '__setattr__',
-         '__delattr__']
-    return not name in dict.fromkeys(l)
-
-def wrap_type(protocol, tp, tp_id):
-    """ Wrap type to transpotable entity, taking
-    care about descriptors
-    """
-    dict_w = {}
-    for item in tp.__dict__.keys():
-        value = getattr(tp, item)
-        if not_ignore(item):
-            # we've got shortcut for method
-            if hasattr(value, '__get__') and not type(value) is MethodType:
-                if hasattr(value, '__set__'):
-                    dict_w[item] = ('get', item)
-                else:
-                    dict_w[item] = ('set', item)
-            else:
-                dict_w[item] = protocol.wrap(value)
-    bases_w = [protocol.wrap(i) for i in tp.__bases__ if i is not object]
-    return tp_id, tp.__name__, dict_w, bases_w
-
-def unwrap_descriptor_gen(desc_class):
-    def unwrapper(protocol, data):
-        name = data
-        obj = desc_class(protocol, name)
-        obj.__name__ = name
-        return obj
-    return unwrapper
-
-unwrap_get_descriptor = unwrap_descriptor_gen(GetDescriptor)
-unwrap_getset_descriptor = unwrap_descriptor_gen(GetSetDescriptor)
-
-def unwrap_type(objkeeper, protocol, type_id, name_, dict_w, bases_w):
-    """ Unwrap remote type, based on it's description
-    """
-    if bases_w == []:
-        bases = (object,)
-    else:
-        bases = tuple([protocol.unwrap(i) for i in bases_w])
-    d = dict.fromkeys(dict_w)
-    # XXX we do it in two steps to avoid cyclic dependencies,
-    #     probably there is some smarter way of doing this
-    if '__doc__' in dict_w:
-        d['__doc__'] = protocol.unwrap(dict_w['__doc__'])
-    tp = type(name_, bases, d)
-    objkeeper.register_remote_type(tp, type_id)
-    for key, value in dict_w.items():
-        if key != '__doc__':
-            v = protocol.unwrap(value)
-            if isinstance(v, FunctionType):
-                setattr(tp, key, staticmethod(v))
-            else:
-                setattr(tp, key, v)
diff --git a/lib_pypy/distributed/objkeeper.py b/lib_pypy/distributed/objkeeper.py
deleted file mode 100644
--- a/lib_pypy/distributed/objkeeper.py
+++ /dev/null
@@ -1,63 +0,0 @@
-
-""" objkeeper - Storage for remoteprotocol
-"""
-
-from types import FunctionType
-from distributed import faker
-
-class ObjKeeper(object):
-    def __init__(self, exported_names = {}):
-        self.exported_objects = [] # list of object that we've exported outside
-        self.exported_names = exported_names # dictionary of visible objects
-        self.exported_types = {} # dict of exported types
-        self.remote_types = {}
-        self.reverse_remote_types = {}
-        self.remote_objects = {}
-        self.exported_types_id = 0 # unique id of exported types
-        self.exported_types_reverse = {} # reverse dict of exported types
-    
-    def register_object(self, obj):
-        # XXX: At some point it makes sense not to export them again and again...
-        self.exported_objects.append(obj)
-        return len(self.exported_objects) - 1
-    
-    def ignore(self, key, value):
-        # there are some attributes, which cannot be modified later, nor
-        # passed into default values, ignore them
-        if key in ('__dict__', '__weakref__', '__class__',
-                   '__dict__', '__bases__'):
-            return True
-        return False
-    
-    def register_type(self, protocol, tp):
-        try:
-            return self.exported_types[tp]
-        except KeyError:
-            self.exported_types[tp] = self.exported_types_id
-            self.exported_types_reverse[self.exported_types_id] = tp
-            tp_id = self.exported_types_id
-            self.exported_types_id += 1
-
-        protocol.send(('type_reg', faker.wrap_type(protocol, tp, tp_id)))
-        return tp_id
-    
-    def fake_remote_type(self, protocol, tp_data):
-        type_id, name_, dict_w, bases_w = tp_data
-        tp = faker.unwrap_type(self, protocol, type_id, name_, dict_w, bases_w)
-
-    def register_remote_type(self, tp, type_id):
-        self.remote_types[type_id] = tp
-        self.reverse_remote_types[tp] = type_id
-    
-    def get_type(self, id):
-        return self.remote_types[id]
-
-    def get_object(self, id):
-        return self.exported_objects[id]
-    
-    def register_remote_object(self, controller, id):
-        self.remote_objects[controller] = id
-
-    def get_remote_object(self, controller):
-        return self.remote_objects[controller]
-        
diff --git a/lib_pypy/distributed/protocol.py b/lib_pypy/distributed/protocol.py
deleted file mode 100644
--- a/lib_pypy/distributed/protocol.py
+++ /dev/null
@@ -1,447 +0,0 @@
-
-""" Distributed controller(s) for use with transparent proxy objects
-
-First idea:
-
-1. We use py.execnet to create a connection to wherever
-2. We run some code there (RSync in advance makes some sense)
-3. We access remote objects like normal ones, with a special protocol
-
-Local side:
-  - Request an object from remote side from global namespace as simple
-    --- request(name) --->
-  - Receive an object which is in protocol described below which is
-    constructed as shallow copy of the remote type.
-
-    Shallow copy is defined as follows:
-
-    - for interp-level object that we know we can provide transparent proxy
-      we just do that
-
-    - for others we fake or fail depending on object
-
-    - for user objects, we create a class which fakes all attributes of
-      a class as transparent proxies of remote objects, we create an instance
-      of that class and populate __dict__
-
-    - for immutable types, we just copy that
-
-Remote side:
-  - we run code, whatever we like
-  - additionally, we've got thread exporting stuff (or just exporting
-    globals, whatever)
-  - for every object, we just send an object, or provide a protocol for
-    sending it in a different way.
-
-"""
-
-try:
-    from __pypy__ import tproxy as proxy
-    from __pypy__ import get_tproxy_controller
-except ImportError:
-    raise ImportError("Cannot work without transparent proxy functionality")
-
-from distributed.objkeeper import ObjKeeper
-from distributed import faker
-import sys
-
-class ObjectNotFound(Exception):
-    pass
-
-# XXX We do not make any garbage collection. We'll need it at some point
-
-"""
-TODO list:
-
-1. Garbage collection - we would like probably to use weakrefs, but
-   since they're not perfectly working in pypy, let's leave it alone for now
-2. Some error handling - exceptions are working, there are still some
-   applications where it all explodes.
-3. Support inheritance and recursive types
-"""
-
-from __pypy__ import internal_repr
-
-import types
-from marshal import dumps
-import exceptions
-
-# just placeholders for letter_types value
-class RemoteBase(object):
-    pass
-
-class DataDescriptor(object):
-    pass
-
-class NonDataDescriptor(object):
-    pass
-# end of placeholders
-
-class AbstractProtocol(object):
-    immutable_primitives = (str, int, float, long, unicode, bool, types.NotImplementedType)
-    mutable_primitives = (list, dict, types.FunctionType, types.FrameType, types.TracebackType,
-        types.CodeType)
-    exc_dir = dict((val, name) for name, val in exceptions.__dict__.iteritems())
-    
-    letter_types = {
-        'l' : list,
-        'd' : dict,
-        'c' : types.CodeType,
-        't' : tuple,
-        'e' : Exception,
-        'ex': exceptions, # for instances
-        'i' : int,
-        'b' : bool,
-        'f' : float,
-        'u' : unicode,
-        'l' : long,
-        's' : str,
-        'ni' : types.NotImplementedType,
-        'n' : types.NoneType,
-        'lst' : list,
-        'fun' : types.FunctionType,
-        'cus' : object,
-        'meth' : types.MethodType,
-        'type' : type,
-        'tp' : None,
-        'fr' : types.FrameType,
-        'tb' : types.TracebackType,
-        'reg' : RemoteBase,
-        'get' : NonDataDescriptor,
-        'set' : DataDescriptor,
-    }
-    type_letters = dict([(value, key) for key, value in letter_types.items()])
-    assert len(type_letters) == len(letter_types)
-    
-    def __init__(self, exported_names={}):
-        self.keeper = ObjKeeper(exported_names)
-        #self.remote_objects = {} # a dictionary controller --> id
-        #self.objs = [] # we just store everything, maybe later
-        #   # we'll need some kind of garbage collection
-
-    def wrap(self, obj):
-        """ Wrap an object as sth prepared for sending
-        """
-        def is_element(x, iterable):
-            try:
-                return x in iterable
-            except (TypeError, ValueError):
-                return False
-        
-        tp = type(obj)
-        ctrl = get_tproxy_controller(obj)
-        if ctrl:
-            return "tp", self.keeper.get_remote_object(ctrl)
-        elif obj is None:
-            return self.type_letters[tp]
-        elif tp in self.immutable_primitives:
-            # simple, immutable object, just copy
-            return (self.type_letters[tp], obj)
-        elif hasattr(obj, '__class__') and obj.__class__ in self.exc_dir:
-            return (self.type_letters[Exception], (self.exc_dir[obj.__class__], \
-                self.wrap(obj.args)))
-        elif is_element(obj, self.exc_dir): # weird hashing problems
-            return (self.type_letters[exceptions], self.exc_dir[obj])
-        elif tp is tuple:
-            # we just pack all of the items
-            return ('t', tuple([self.wrap(elem) for elem in obj]))
-        elif tp in self.mutable_primitives:
-            id = self.keeper.register_object(obj)
-            return (self.type_letters[tp], id)
-        elif tp is type:
-            try:
-                return "reg", self.keeper.reverse_remote_types[obj]
-            except KeyError:
-                pass
-            try:
-                return self.type_letters[tp], self.type_letters[obj]
-            except KeyError:
-                id = self.register_type(obj)
-                return (self.type_letters[tp], id)
-        elif tp is types.MethodType:
-            w_class = self.wrap(obj.im_class)
-            w_func = self.wrap(obj.im_func)
-            w_self = self.wrap(obj.im_self)
-            return (self.type_letters[tp], (w_class, \
-                self.wrap(obj.im_func.func_name), w_func, w_self))
-        else:
-            id = self.keeper.register_object(obj)
-            w_tp = self.wrap(tp)
-            return ("cus", (w_tp, id))
-    
-    def unwrap(self, data):
-        """ Unwrap an object
-        """
-        if data == 'n':
-            return None
-        tp_letter, obj_data = data
-        tp = self.letter_types[tp_letter]
-        if tp is None:
-            return self.keeper.get_object(obj_data)
-        elif tp is RemoteBase:
-            return self.keeper.exported_types_reverse[obj_data]
-        elif tp in self.immutable_primitives:
-            return obj_data # this is the object
-        elif tp is tuple:
-            return tuple([self.unwrap(i) for i in obj_data])
-        elif tp in self.mutable_primitives:
-            id = obj_data
-            ro = RemoteBuiltinObject(self, id)
-            self.keeper.register_remote_object(ro.perform, id)
-            p = proxy(tp, ro.perform)
-            ro.obj = p
-            return p
-        elif tp is Exception:
-            cls_name, w_args = obj_data
-            return getattr(exceptions, cls_name)(self.unwrap(w_args))
-        elif tp is exceptions:
-            cls_name = obj_data
-            return getattr(exceptions, cls_name)
-        elif tp is types.MethodType:
-            w_class, w_name, w_func, w_self = obj_data
-            tp = self.unwrap(w_class)
-            name = self.unwrap(w_name)
-            self_ = self.unwrap(w_self)
-            if self_ is not None:
-                if tp is None:
-                    setattr(self_, name, classmethod(self.unwrap(w_func)))
-                    return getattr(self_, name)
-                return getattr(tp, name).__get__(self_, tp)
-            func = self.unwrap(w_func)
-            setattr(tp, name, func)
-            return getattr(tp, name)
-        elif tp is type:
-            if isinstance(obj_data, str):
-                return self.letter_types[obj_data]
-            id = obj_data
-            return self.get_type(obj_data)
-        elif tp is DataDescriptor:            
-            return faker.unwrap_getset_descriptor(self, obj_data)
-        elif tp is NonDataDescriptor:
-            return faker.unwrap_get_descriptor(self, obj_data)
-        elif tp is object:
-            # we need to create a proper type
-            w_tp, id = obj_data
-            real_tp = self.unwrap(w_tp)
-            ro = RemoteObject(self, id)
-            self.keeper.register_remote_object(ro.perform, id)
-            p = proxy(real_tp, ro.perform)
-            ro.obj = p
-            return p
-        else:
-            raise NotImplementedError("Cannot unwrap %s" % (data,))
-    
-    def perform(self, *args, **kwargs):
-        raise NotImplementedError("Abstract only protocol")
-    
-    # some simple wrappers
-    def pack_args(self, args, kwargs):
-        return self.pack_list(args), self.pack_dict(kwargs)
-    
-    def pack_list(self, lst):
-        return [self.wrap(i) for i in lst]
-    
-    def pack_dict(self, d):
-        return dict([(self.wrap(key), self.wrap(val)) for key, val in d.items()])
-    
-    def unpack_args(self, args, kwargs):
-        return self.unpack_list(args), self.unpack_dict(kwargs)
-    
-    def unpack_list(self, lst):
-        return [self.unwrap(i) for i in lst]
-    
-    def unpack_dict(self, d):
-        return dict([(self.unwrap(key), self.unwrap(val)) for key, val in d.items()])
-    
-    def register_type(self, tp):
-        return self.keeper.register_type(self, tp)
-    
-    def get_type(self, id):
-        return self.keeper.get_type(id)
-    
-class LocalProtocol(AbstractProtocol):
-    """ This is stupid protocol for testing purposes only
-    """
-    def __init__(self):
-        super(LocalProtocol, self).__init__()
-        self.types = []
-   
-    def perform(self, id, name, *args, **kwargs):
-        obj = self.keeper.get_object(id)
-        # we pack and than unpack, for tests
-        args, kwargs = self.pack_args(args, kwargs)
-        assert isinstance(name, str)
-        dumps((args, kwargs))
-        args, kwargs = self.unpack_args(args, kwargs)
-        return getattr(obj, name)(*args, **kwargs)
-    
-    def register_type(self, tp):
-        self.types.append(tp)
-        return len(self.types) - 1
-    
-    def get_type(self, id):
-        return self.types[id]
-
-def remote_loop(protocol):
-    # the simplest version possible, without any concurrency and such
-    wrap = protocol.wrap
-    unwrap = protocol.unwrap
-    send = protocol.send
-    receive = protocol.receive
-    # we need this for wrap/unwrap
-    while 1:
-        command, data = receive()
-        if command == 'get':
-            try:
-                item = protocol.keeper.exported_names[data]
-            except KeyError:
-                send(("finished_error",data))
-            else:
-                # XXX wrapping problems catching? do we have any?
-                send(("finished", wrap(item)))
-        elif command == 'call':
-            id, name, args, kwargs = data
-            args, kwargs = protocol.unpack_args(args, kwargs)
-            try:
-                retval = getattr(protocol.keeper.get_object(id), name)(*args, **kwargs)
-            except:
-                send(("raised", wrap(sys.exc_info())))
-            else:
-                send(("finished", wrap(retval)))
-        elif command == 'finished':
-            return unwrap(data)
-        elif command == 'finished_error':
-            raise ObjectNotFound("Cannot find name %s" % (data,))
-        elif command == 'raised':
-            exc, val, tb = unwrap(data)
-            raise exc, val, tb
-        elif command == 'type_reg':
-            protocol.keeper.fake_remote_type(protocol, data)
-        elif command == 'force':
-            obj = protocol.keeper.get_object(data)
-            w_obj = protocol.pack(obj)
-            send(("forced", w_obj))
-        elif command == 'forced':
-            obj = protocol.unpack(data)
-            return obj
-        elif command == 'desc_get':
-            name, w_obj, w_type = data
-            obj = protocol.unwrap(w_obj)
-            type_ = protocol.unwrap(w_type)
-            if obj:
-                type__ = type(obj)
-            else:
-                type__ = type_
-            send(('finished', protocol.wrap(getattr(type__, name).__get__(obj, type_))))
-
-        elif command == 'desc_set':
-            name, w_obj, w_value = data
-            obj = protocol.unwrap(w_obj)
-            value = protocol.unwrap(w_value)
-            getattr(type(obj), name).__set__(obj, value)
-            send(('finished', protocol.wrap(None)))
-        elif command == 'remote_keys':
-            keys = protocol.keeper.exported_names.keys()
-            send(('finished', protocol.wrap(keys)))
-        else:
-            raise NotImplementedError("command %s" % command)
-
-class RemoteProtocol(AbstractProtocol):
-    #def __init__(self, gateway, remote_code):
-    #    self.gateway = gateway
-    def __init__(self, send, receive, exported_names={}):
-        super(RemoteProtocol, self).__init__(exported_names)
-        #self.exported_names = exported_names
-        self.send = send
-        self.receive = receive
-        #self.type_cache = {}
-        #self.type_id = 0
-        #self.remote_types = {}
-    
-    def perform(self, id, name, *args, **kwargs):
-        args, kwargs = self.pack_args(args, kwargs)
-        self.send(('call', (id, name, args, kwargs)))
-        try:
-            retval = remote_loop(self)
-        except:
-            e, val, tb = sys.exc_info()
-            raise e, val, tb.tb_next.tb_next
-        return retval
-    
-    def get_remote(self, name):
-        self.send(("get", name))
-        retval = remote_loop(self)
-        return retval
-    
-    def force(self, id):
-        self.send(("force", id))
-        retval = remote_loop(self)
-        return retval
-    
-    def pack(self, obj):
-        if isinstance(obj, list):
-            return "l", self.pack_list(obj)
-        elif isinstance(obj, dict):
-            return "d", self.pack_dict(obj)
-        else:
-            raise NotImplementedError("Cannot pack %s" % obj)
-        
-    def unpack(self, data):
-        letter, w_obj = data
-        if letter == 'l':
-            return self.unpack_list(w_obj)
-        elif letter == 'd':
-            return self.unpack_dict(w_obj)
-        else:
-            raise NotImplementedError("Cannot unpack %s" % (data,))
-
-    def get(self, name, obj, type):
-        self.send(("desc_get", (name, self.wrap(obj), self.wrap(type))))
-        return remote_loop(self)
-
-    def set(self, obj, value):
-        self.send(("desc_set", (name, self.wrap(obj), self.wrap(value))))
-
-    def remote_keys(self):
-        self.send(("remote_keys",None))
-        return remote_loop(self)
-
-class RemoteObject(object):
-    def __init__(self, protocol, id):
-        self.id = id
-        self.protocol = protocol
-    
-    def perform(self, name, *args, **kwargs):
-        return self.protocol.perform(self.id, name, *args, **kwargs)
-
-class RemoteBuiltinObject(RemoteObject):
-    def __init__(self, protocol, id):
-        self.id = id
-        self.protocol = protocol
-        self.forced = False
-    
-    def perform(self, name, *args, **kwargs):
-        # XXX: Check who really goes here
-        if self.forced:
-            return getattr(self.obj, name)(*args, **kwargs)
-        if name in ('__eq__', '__ne__', '__lt__', '__gt__', '__ge__', '__le__',
-            '__cmp__'):
-            self.obj = self.protocol.force(self.id)
-            return getattr(self.obj, name)(*args, **kwargs)
-        return self.protocol.perform(self.id, name, *args, **kwargs)
-
-def test_env(exported_names):
-    from stackless import channel, tasklet, run
-    inp, out = channel(), channel()
-    remote_protocol = RemoteProtocol(inp.send, out.receive, exported_names)
-    t = tasklet(remote_loop)(remote_protocol)
-    
-    #def send_trace(data):
-    #    print "Sending %s" % (data,)
-    #    out.send(data)
-
-    #def receive_trace():
-    #    data = inp.receive()
-    #    print "Received %s" % (data,)
-    #    return data
-    return RemoteProtocol(out.send, inp.receive)
diff --git a/lib_pypy/distributed/socklayer.py b/lib_pypy/distributed/socklayer.py
deleted file mode 100644
--- a/lib_pypy/distributed/socklayer.py
+++ /dev/null
@@ -1,83 +0,0 @@
-
-import py
-from socket import socket
-
-raise ImportError("XXX needs import adaptation as 'green' is removed from py lib for years")
-from py.impl.green.msgstruct import decodemessage, message
-from socket import socket, AF_INET, SOCK_STREAM
-import marshal
-import sys
-
-TRACE = False
-def trace(msg):
-    if TRACE:
-        print >>sys.stderr, msg
-
-class Finished(Exception):
-    pass
-
-class SocketWrapper(object):
-    def __init__(self, conn):
-        self.buffer = ""
-        self.conn = conn
-
-class ReceiverWrapper(SocketWrapper):
-    def receive(self):
-        msg, self.buffer = decodemessage(self.buffer)
-        while msg is None:
-            data = self.conn.recv(8192)
-            if not data:
-                raise Finished()
-            self.buffer += data
-            msg, self.buffer = decodemessage(self.buffer)
-        assert msg[0] == 'c'
-        trace("received %s" % msg[1])
-        return marshal.loads(msg[1])
-
-class SenderWrapper(SocketWrapper):
-    def send(self, data):
-        trace("sending %s" % (data,))
-        self.conn.sendall(message('c', marshal.dumps(data)))
-        trace("done")
-
-def socket_listener(address, socket=socket):
-    s = socket(AF_INET, SOCK_STREAM)
-    s.bind(address)
-    s.listen(1)
-    print "Waiting for connection on %s" % (address,)
-    conn, addr = s.accept()
-    print "Connected from %s" % (addr,)
-
-    return SenderWrapper(conn).send, ReceiverWrapper(conn).receive
-
-def socket_loop(address, to_export, socket=socket):
-    from distributed import RemoteProtocol, remote_loop
-    try:
-        send, receive = socket_listener(address, socket)
-        remote_loop(RemoteProtocol(send, receive, to_export))
-    except Finished:
-        pass
-
-def socket_connecter(address, socket=socket):
-    s = socket(AF_INET, SOCK_STREAM)
-    print "Connecting %s" % (address,)
-    s.connect(address)
-    
-    return SenderWrapper(s).send, ReceiverWrapper(s).receive
-
-def connect(address, socket=socket):
-    from distributed.support import RemoteView
-    from distributed import RemoteProtocol
-    return RemoteView(RemoteProtocol(*socket_connecter(address, socket)))
-
-def spawn_remote_side(code, gw):
-    """ A very simple wrapper around greenexecnet to allow
-    spawning a remote side of lib/distributed
-    """
-    from distributed import RemoteProtocol
-    extra = str(py.code.Source("""
-    from distributed import remote_loop, RemoteProtocol
-    remote_loop(RemoteProtocol(channel.send, channel.receive, globals()))
-    """))
-    channel = gw.remote_exec(code + "\n" + extra)
-    return RemoteProtocol(channel.send, channel.receive)
diff --git a/lib_pypy/distributed/support.py b/lib_pypy/distributed/support.py
deleted file mode 100644
--- a/lib_pypy/distributed/support.py
+++ /dev/null
@@ -1,17 +0,0 @@
-
-""" Some random support functions
-"""
-
-from distributed.protocol import ObjectNotFound
-
-class RemoteView(object):
-    def __init__(self, protocol):
-        self.__dict__['__protocol'] = protocol
-
-    def __getattr__(self, name):
-        if name == '__dict__':
-            return super(RemoteView, self).__getattr__(name)
-        try:
-            return self.__dict__['__protocol'].get_remote(name)
-        except ObjectNotFound:
-            raise AttributeError(name)
diff --git a/lib_pypy/distributed/test/__init__.py b/lib_pypy/distributed/test/__init__.py
deleted file mode 100644
diff --git a/lib_pypy/distributed/test/test_distributed.py b/lib_pypy/distributed/test/test_distributed.py
deleted file mode 100644
--- a/lib_pypy/distributed/test/test_distributed.py
+++ /dev/null
@@ -1,301 +0,0 @@
-
-""" Controllers tests
-"""
-
-from pypy.conftest import gettestobjspace
-import sys
-import pytest
-
-class AppTestDistributed(object):
-    def setup_class(cls):
-        cls.space = gettestobjspace(**{"objspace.std.withtproxy": True,
-            "usemodules":("_continuation",)})
-
-    def test_init(self):
-        import distributed
-
-    def test_protocol(self):
-        from distributed.protocol import AbstractProtocol
-        protocol = AbstractProtocol()
-        for item in ("aaa", 3, u"aa", 344444444444444444L, 1.2, (1, "aa")):
-            assert protocol.unwrap(protocol.wrap(item)) == item
-        assert type(protocol.unwrap(protocol.wrap([1,2,3]))) is list
-        assert type(protocol.unwrap(protocol.wrap({"a":3}))) is dict
-        
-        def f():
-            pass
-        
-        assert type(protocol.unwrap(protocol.wrap(f))) is type(f)
-
-    def test_method_of_false_obj(self):
-        from distributed.protocol import AbstractProtocol
-        protocol = AbstractProtocol()
-        lst = []
-        m = lst.append
-        assert type(protocol.unwrap(protocol.wrap(m))) is type(m)
-
-    def test_protocol_run(self):
-        l = [1,2,3]
-        from distributed.protocol import LocalProtocol
-        protocol = LocalProtocol()
-        wrap = protocol.wrap
-        unwrap = protocol.unwrap
-        item = unwrap(wrap(l))
-        assert len(item) == 3
-        assert item[2] == 3
-        item += [1,1,1]
-        assert len(item) == 6
-
-    def test_protocol_call(self):
-        def f(x, y):
-            return x + y
-        
-        from distributed.protocol import LocalProtocol
-        protocol = LocalProtocol()
-        wrap = protocol.wrap
-        unwrap = protocol.unwrap
-        item = unwrap(wrap(f))
-        assert item(3, 2) == 5
-
-    def test_simulation_call(self):
-        def f(x, y):
-            return x + y
-        
-        import types
-        from distributed import RemoteProtocol
-        import sys
-
-        data = []
-        result = []
-        protocol = RemoteProtocol(result.append, data.pop)
-        data += [("finished", protocol.wrap(5)), ("finished", protocol.wrap(f))]
-        fun = protocol.get_remote("f")
-        assert isinstance(fun, types.FunctionType)
-        assert fun(2, 3) == 5
-
-    def test_local_obj(self):
-        class A(object):
-            def __init__(self, x):
-                self.x = x
-            
-            def __len__(self):
-                return self.x + 8
-        
-        from distributed.protocol import LocalProtocol
-        protocol = LocalProtocol()
-        wrap = protocol.wrap
-        unwrap = protocol.unwrap
-        item = unwrap(wrap(A(3)))
-        assert item.x == 3
-        assert len(item) == 11
-
-class AppTestDistributedTasklets(object):
-    spaceconfig = {"objspace.std.withtproxy": True,
-                   "objspace.usemodules._continuation": True}
-    def setup_class(cls):
-        cls.w_test_env = cls.space.appexec([], """():
-        from distributed import test_env
-        return test_env
-        """)
-        cls.reclimit = sys.getrecursionlimit()
-        sys.setrecursionlimit(100000)
-
-    def teardown_class(cls):
-        sys.setrecursionlimit(cls.reclimit)
-    
-    def test_remote_protocol_call(self):
-        def f(x, y):
-            return x + y
-        
-        protocol = self.test_env({"f": f})
-        fun = protocol.get_remote("f")
-        assert fun(2, 3) == 5
-
-    def test_callback(self):
-        def g():
-            return 8
-        
-        def f(x):
-            return x + g()
-        
-        protocol = self.test_env({"f":f})
-        fun = protocol.get_remote("f")
-        assert fun(8) == 16
-    
-    def test_remote_dict(self):
-        #skip("Land of infinite recursion")
-        d = {'a':3}
-        protocol = self.test_env({'d':d})
-        xd = protocol.get_remote('d')
-        #assert d['a'] == xd['a']
-        assert d.keys() == xd.keys()
-        assert d.values() == xd.values()
-        assert d == xd
-        
-    def test_remote_obj(self):
-        class A(object):
-            def __init__(self, x):
-                self.x = x
-            
-            def __len__(self):
-                return self.x + 8
-        a = A(3)
-        
-        protocol = self.test_env({'a':a})
-        xa = protocol.get_remote("a")
-        assert xa.x == 3
-        assert len(xa) == 11
-    
-    def test_remote_doc_and_callback(self):
-        class A(object):
-            """xxx"""
-            def __init__(self):
-                pass
-
-            def meth(self, x):
-                return x() + 3
-        
-        def x():
-            return 1
-        
-        a = A()
-        
-        protocol = self.test_env({'a':a})
-        xa = protocol.get_remote('a')
-        assert xa.__class__.__doc__ == 'xxx'
-        assert xa.meth(x) == 4
-
-    def test_double_reference(self):
-        class A(object):
-            def meth(self, one):
-                self.one = one
-            
-            def perform(self):
-                return 1 + len(self.one())
-        
-        class B(object):
-            def __call__(self):
-                return [1,2,3]
-        
-        a = A()
-        protocol = self.test_env({'a': a})
-        xa = protocol.get_remote('a')
-        xa.meth(B())
-        assert xa.perform() == 4
-
-    def test_frame(self):
-        #skip("Land of infinite recursion")
-        import sys
-        f = sys._getframe()
-        protocol = self.test_env({'f':f})
-        xf = protocol.get_remote('f')
-        assert f.f_globals.keys() == xf.f_globals.keys()
-        assert f.f_locals.keys() == xf.f_locals.keys()
-
-    def test_remote_exception(self):
-        def raising():
-            1/0
-        
-        protocol = self.test_env({'raising':raising})
-        xr = protocol.get_remote('raising')
-        try:
-            xr()
-        except ZeroDivisionError:
-            import sys
-            exc_info, val, tb  = sys.exc_info()
-            #assert tb.tb_next is None
-        else:
-            raise AssertionError("Did not raise")
-
-    def test_remote_classmethod(self):
-        class A(object):
-            z = 8
-
-            @classmethod
-            def x(cls):
-                return cls.z
-
-        a = A()
-        protocol = self.test_env({'a':a})
-        xa = protocol.get_remote("a")
-        res = xa.x()
-        assert res == 8
-
-    def test_types_reverse_mapping(self):
-        class A(object):
-            def m(self, tp):
-                assert type(self) is tp
-
-        a = A()
-        protocol = self.test_env({'a':a, 'A':A})
-        xa = protocol.get_remote('a')
-        xA = protocol.get_remote('A')
-        xa.m(xA)
-
-    def test_instantiate_remote_type(self):
-        class C(object):
-            def __init__(self, y):
-                self.y = y
-            
-            def x(self):
-                return self.y
-
-        protocol = self.test_env({'C':C})
-        xC = protocol.get_remote('C')
-        xc = xC(3)
-        res = xc.x()
-        assert res == 3
-
-    def test_remote_sys(self):
-        import sys
-
-        protocol = self.test_env({'sys':sys})
-        s = protocol.get_remote('sys')
-        l = dir(s)
-        assert l
-
-    def test_remote_file_access(self):
-        skip("Descriptor logic seems broken")
-        protocol = self.test_env({'f':open})
-        xf = protocol.get_remote('f')
-        data = xf('/etc/passwd').read()
-        assert data
-
-    def test_real_descriptor(self):
-        class getdesc(object):
-            def __get__(self, obj, val=None):
-                if obj is not None:
-                    assert type(obj) is X
-                return 3
-
-        class X(object):
-            x = getdesc()
-
-        x = X()
-
-        protocol = self.test_env({'x':x})
-        xx = protocol.get_remote('x')
-        assert xx.x == 3
-    
-    def test_bases(self):
-        class X(object):
-            pass
-
-        class Y(X):
-            pass
-
-        y = Y()
-        protocol = self.test_env({'y':y, 'X':X})
-        xy = protocol.get_remote('y')
-        xX = protocol.get_remote('X')
-        assert isinstance(xy, xX)
-
-    def test_key_error(self):
-        from distributed import ObjectNotFound
-        protocol = self.test_env({})
-        raises(ObjectNotFound, "protocol.get_remote('x')")
-
-    def test_list_items(self):
-        protocol = self.test_env({'x':3, 'y':8})
-        assert sorted(protocol.remote_keys()) == ['x', 'y']
-
diff --git a/lib_pypy/distributed/test/test_greensock.py b/lib_pypy/distributed/test/test_greensock.py
deleted file mode 100644
--- a/lib_pypy/distributed/test/test_greensock.py
+++ /dev/null
@@ -1,62 +0,0 @@
-
-import py
-from pypy.conftest import gettestobjspace, option
-
-def setup_module(mod):
-    py.test.importorskip("pygreen")   # found e.g. in py/trunk/contrib 
-
-class AppTestDistributedGreensock(object):
-    def setup_class(cls):
-        if not option.runappdirect:
-            py.test.skip("Cannot run this on top of py.py because of PopenGateway")
-        cls.space = gettestobjspace(**{"objspace.std.withtproxy": True,
-                                       "usemodules":("_continuation",)})
-        cls.w_remote_side_code = cls.space.appexec([], """():
-        import sys
-        sys.path.insert(0, '%s')
-        remote_side_code = '''
-class A:
-   def __init__(self, x):
-       self.x = x
-            
-   def __len__(self):
-       return self.x + 8
-
-   def raising(self):
-       1/0
-
-   def method(self, x):
-       return x() + self.x
-
-a = A(3)
-
-def count():
-    x = 10
-    # naive counting :)
-    result = 1
-    for i in range(x):
-        result += 1
-    return result
-'''
-        return remote_side_code
-        """ % str(py.path.local(__file__).dirpath().dirpath().dirpath().dirpath()))
-
-    def test_remote_call(self):
-        from distributed import socklayer
-        import sys
-        from pygreen.greenexecnet import PopenGateway
-        gw = PopenGateway()
-        rp = socklayer.spawn_remote_side(self.remote_side_code, gw)
-        a = rp.get_remote("a")
-        assert a.method(lambda : 13) == 16
-    
-    def test_remote_counting(self):
-        from distributed import socklayer
-        from pygreen.greensock2 import allof
-        from pygreen.greenexecnet import PopenGateway
-        gws = [PopenGateway() for i in range(3)]
-        rps = [socklayer.spawn_remote_side(self.remote_side_code, gw)
-               for gw in gws]
-        counters = [rp.get_remote("count") for rp in rps]
-        assert allof(*counters) == (11, 11, 11)
-
diff --git a/lib_pypy/distributed/test/test_socklayer.py b/lib_pypy/distributed/test/test_socklayer.py
deleted file mode 100644
--- a/lib_pypy/distributed/test/test_socklayer.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import py
-from pypy.conftest import gettestobjspace
-
-def setup_module(mod):
-    py.test.importorskip("pygreen")   # found e.g. in py/trunk/contrib 
-
-# XXX think how to close the socket
-
-class AppTestSocklayer:
-    def setup_class(cls):
-        cls.space = gettestobjspace(**{"objspace.std.withtproxy": True,
-                                       "usemodules":("_continuation",
-                                                     "_socket", "select")})
-    
-    def test_socklayer(self):
-        class X(object):
-            z = 3
-
-        x = X()
-
-        try:
-            import py
-        except ImportError:
-            skip("pylib not importable")
-        from pygreen.pipe.gsocke import GreenSocket
-        from distributed.socklayer import socket_loop, connect
-        from pygreen.greensock2 import oneof, allof
-
-        def one():
-            socket_loop(('127.0.0.1', 21211), {'x':x}, socket=GreenSocket)
-
-        def two():
-            rp = connect(('127.0.0.1', 21211), GreenSocket)
-            assert rp.x.z == 3
-
-        oneof(one, two)
diff --git a/lib_pypy/greenlet.py b/lib_pypy/greenlet.py
--- a/lib_pypy/greenlet.py
+++ b/lib_pypy/greenlet.py
@@ -77,8 +77,6 @@
         try:
             unbound_method = getattr(_continulet, methodname)
             args = unbound_method(current, *args, to=target)
-        except GreenletExit, e:
-            args = (e,)
         finally:
             _tls.current = current
         #
@@ -132,6 +130,8 @@
     _tls.current = greenlet
     try:
         res = greenlet.run(*args)
+    except GreenletExit, e:
+        res = e
     finally:
         _continuation.permute(greenlet, greenlet.parent)
     return (res,)
diff --git a/lib_pypy/pypy_test/test_marshal_extra.py b/lib_pypy/pypy_test/test_marshal_extra.py
--- a/lib_pypy/pypy_test/test_marshal_extra.py
+++ b/lib_pypy/pypy_test/test_marshal_extra.py
@@ -142,4 +142,6 @@
         f2.close()
     assert obj == case
 
-
+def test_load_truncated_string():
+    s = '(\x02\x00\x00\x00i\x03\x00\x00\x00sB\xf9\x00\x00\nabcd'
+    py.test.raises(EOFError, marshal.loads, s)
diff --git a/lib_pypy/pyrepl/readline.py b/lib_pypy/pyrepl/readline.py
--- a/lib_pypy/pyrepl/readline.py
+++ b/lib_pypy/pyrepl/readline.py
@@ -194,7 +194,7 @@
         except _error:
             return _old_raw_input(prompt)
         reader.ps1 = prompt
-        return reader.readline(reader, startup_hook=self.startup_hook)
+        return reader.readline(startup_hook=self.startup_hook)
 
     def multiline_input(self, more_lines, ps1, ps2, returns_unicode=False):
         """Read an input on possibly multiple lines, asking for more
diff --git a/lib_pypy/sip.py b/lib_pypy/sip.py
deleted file mode 100644
--- a/lib_pypy/sip.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from _rpyc_support import proxy_module
-
-proxy_module(globals())
-del proxy_module
diff --git a/pypy/annotation/binaryop.py b/pypy/annotation/binaryop.py
--- a/pypy/annotation/binaryop.py
+++ b/pypy/annotation/binaryop.py
@@ -7,7 +7,7 @@
 from pypy.tool.pairtype import pair, pairtype
 from pypy.annotation.model import SomeObject, SomeInteger, SomeBool, s_Bool
 from pypy.annotation.model import SomeString, SomeChar, SomeList, SomeDict
-from pypy.annotation.model import SomeUnicodeCodePoint
+from pypy.annotation.model import SomeUnicodeCodePoint, SomeStringOrUnicode
 from pypy.annotation.model import SomeTuple, SomeImpossibleValue, s_ImpossibleValue
 from pypy.annotation.model import SomeInstance, SomeBuiltin, SomeIterator
 from pypy.annotation.model import SomePBC, SomeFloat, s_None
@@ -470,30 +470,37 @@
             "string formatting mixing strings and unicode not supported")
 
 
-class __extend__(pairtype(SomeString, SomeTuple)):
-    def mod((str, s_tuple)):
+class __extend__(pairtype(SomeString, SomeTuple),
+                 pairtype(SomeUnicodeString, SomeTuple)):
+    def mod((s_string, s_tuple)):
+        is_string = isinstance(s_string, SomeString)
+        is_unicode = isinstance(s_string, SomeUnicodeString)
+        assert is_string or is_unicode
         for s_item in s_tuple.items:
-            if isinstance(s_item, (SomeUnicodeCodePoint, SomeUnicodeString)):
+            if (is_unicode and isinstance(s_item, (SomeChar, SomeString)) or
+                is_string and isinstance(s_item, (SomeUnicodeCodePoint,
+                                                  SomeUnicodeString))):
                 raise NotImplementedError(
                     "string formatting mixing strings and unicode not supported")
-        getbookkeeper().count('strformat', str, s_tuple)
-        no_nul = str.no_nul
+        getbookkeeper().count('strformat', s_string, s_tuple)
+        no_nul = s_string.no_nul
         for s_item in s_tuple.items:
             if isinstance(s_item, SomeFloat):
                 pass   # or s_item is a subclass, like SomeInteger
-            elif isinstance(s_item, SomeString) and s_item.no_nul:
+            elif isinstance(s_item, SomeStringOrUnicode) and s_item.no_nul:
                 pass
             else:
                 no_nul = False
                 break
-        return SomeString(no_nul=no_nul)
+        return s_string.__class__(no_nul=no_nul)
 
 
-class __extend__(pairtype(SomeString, SomeObject)):
+class __extend__(pairtype(SomeString, SomeObject),
+                 pairtype(SomeUnicodeString, SomeObject)):
 
-    def mod((str, args)):
-        getbookkeeper().count('strformat', str, args)
-        return SomeString()
+    def mod((s_string, args)):
+        getbookkeeper().count('strformat', s_string, args)
+        return s_string.__class__()
 
 class __extend__(pairtype(SomeFloat, SomeFloat)):
     
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -201,6 +201,7 @@
                     for op in block.operations:
                         if op.opname in ('simple_call', 'call_args'):
                             yield op
+
                         # some blocks are partially annotated
                         if binding(op.result, None) is None:
                             break   # ignore the unannotated part
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -450,6 +450,12 @@
                     attrs.update(self.basedesc.all_enforced_attrs)
                 self.all_enforced_attrs = attrs
 
+            if (self.is_builtin_exception_class() and
+                self.all_enforced_attrs is None):
+                from pypy.annotation import classdef
+                if self.pyobj not in classdef.FORCE_ATTRIBUTES_INTO_CLASSES:
+                    self.all_enforced_attrs = []    # no attribute allowed
+
     def add_source_attribute(self, name, value, mixin=False):
         if isinstance(value, types.FunctionType):
             # for debugging
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -3389,6 +3389,22 @@
         s = a.build_types(f, [str])
         assert isinstance(s, annmodel.SomeString)
 
+    def test_unicodeformatting(self):
+        def f(x):
+            return u'%s' % x
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [unicode])
+        assert isinstance(s, annmodel.SomeUnicodeString)
+
+    def test_unicodeformatting_tuple(self):
+        def f(x):
+            return u'%s' % (x,)
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [unicode])
+        assert isinstance(s, annmodel.SomeUnicodeString)
+
 
     def test_negative_slice(self):
         def f(s, e):
@@ -3793,7 +3809,55 @@
         assert isinstance(s, annmodel.SomeString)
         assert s.no_nul
 
-
+    def test_base_iter(self):
+        class A(object):
+            def __iter__(self):
+                return self
+        
+        def fn():
+            return iter(A())
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(fn, [])
+        assert isinstance(s, annmodel.SomeInstance)
+        assert s.classdef.name.endswith('.A')
+
+    def test_iter_next(self):
+        class A(object):
+            def __iter__(self):
+                return self
+
+            def next(self):
+                return 1
+
+        def fn():
+            s = 0
+            for x in A():
+                s += x
+            return s
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(fn, [])
+        assert len(a.translator.graphs) == 3 # fn, __iter__, next
+        assert isinstance(s, annmodel.SomeInteger)
+
+    def test_next_function(self):
+        def fn(n):
+            x = [0, 1, n]
+            i = iter(x)
+            return next(i) + next(i)
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(fn, [int])
+        assert isinstance(s, annmodel.SomeInteger)
+
+    def test_no_attr_on_common_exception_classes(self):
+        for cls in [ValueError, Exception]:
+            def fn():
+                e = cls()
+                e.foo = "bar"
+            a = self.RPythonAnnotator()
+            py.test.raises(Exception, a.build_types, fn, [])
 
 def g(n):
     return [0,1,2,n]
diff --git a/pypy/annotation/unaryop.py b/pypy/annotation/unaryop.py
--- a/pypy/annotation/unaryop.py
+++ b/pypy/annotation/unaryop.py
@@ -609,33 +609,36 @@
 
 class __extend__(SomeInstance):
 
+    def _true_getattr(ins, attr):
+        if attr == '__class__':
+            return ins.classdef.read_attr__class__()
+        attrdef = ins.classdef.find_attribute(attr)
+        position = getbookkeeper().position_key
+        attrdef.read_locations[position] = True
+        s_result = attrdef.getvalue()
+        # hack: if s_result is a set of methods, discard the ones
+        #       that can't possibly apply to an instance of ins.classdef.
+        # XXX do it more nicely
+        if isinstance(s_result, SomePBC):
+            s_result = ins.classdef.lookup_filter(s_result, attr,
+                                                  ins.flags)
+        elif isinstance(s_result, SomeImpossibleValue):
+            ins.classdef.check_missing_attribute_update(attr)
+            # blocking is harmless if the attribute is explicitly listed
+            # in the class or a parent class.
+            for basedef in ins.classdef.getmro():
+                if basedef.classdesc.all_enforced_attrs is not None:
+                    if attr in basedef.classdesc.all_enforced_attrs:
+                        raise HarmlesslyBlocked("get enforced attr")
+        elif isinstance(s_result, SomeList):
+            s_result = ins.classdef.classdesc.maybe_return_immutable_list(
+                attr, s_result)
+        return s_result
+
     def getattr(ins, s_attr):
         if s_attr.is_constant() and isinstance(s_attr.const, str):
             attr = s_attr.const
-            if attr == '__class__':
-                return ins.classdef.read_attr__class__()
-            attrdef = ins.classdef.find_attribute(attr)
-            position = getbookkeeper().position_key
-            attrdef.read_locations[position] = True
-            s_result = attrdef.getvalue()
-            # hack: if s_result is a set of methods, discard the ones
-            #       that can't possibly apply to an instance of ins.classdef.
-            # XXX do it more nicely
-            if isinstance(s_result, SomePBC):
-                s_result = ins.classdef.lookup_filter(s_result, attr,
-                                                      ins.flags)
-            elif isinstance(s_result, SomeImpossibleValue):
-                ins.classdef.check_missing_attribute_update(attr)
-                # blocking is harmless if the attribute is explicitly listed
-                # in the class or a parent class.
-                for basedef in ins.classdef.getmro():
-                    if basedef.classdesc.all_enforced_attrs is not None:
-                        if attr in basedef.classdesc.all_enforced_attrs:
-                            raise HarmlesslyBlocked("get enforced attr")
-            elif isinstance(s_result, SomeList):
-                s_result = ins.classdef.classdesc.maybe_return_immutable_list(
-                    attr, s_result)
-            return s_result
+            return ins._true_getattr(attr)
         return SomeObject()
     getattr.can_only_throw = []
 
@@ -657,6 +660,19 @@
         if not ins.can_be_None:
             s.const = True
 
+    def iter(ins):
+        s_iterable = ins._true_getattr('__iter__')
+        bk = getbookkeeper()
+        # record for calltables
+        bk.emulate_pbc_call(bk.position_key, s_iterable, [])
+        return s_iterable.call(bk.build_args("simple_call", []))
+
+    def next(ins):
+        s_next = ins._true_getattr('next')
+        bk = getbookkeeper()
+        # record for calltables
+        bk.emulate_pbc_call(bk.position_key, s_next, [])
+        return s_next.call(bk.build_args("simple_call", []))
 
 class __extend__(SomeBuiltin):
     def _can_only_throw(bltn, *args):
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -34,13 +34,14 @@
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
      "_collections", "_multibytecodec", "micronumpy", "_ffi",
-     "_continuation"]
+     "_continuation", "_cffi_backend"]
 ))
 
 translation_modules = default_modules.copy()
 translation_modules.update(dict.fromkeys(
     ["fcntl", "rctime", "select", "signal", "_rawffi", "zlib",
      "struct", "_md5", "cStringIO", "array", "_ffi",
+     "binascii",
      # the following are needed for pyrepl (and hence for the
      # interactive prompt/pdb)
      "termios", "_minimal_curses",
@@ -88,7 +89,6 @@
     "_rawffi": [("objspace.usemodules.struct", True)],
     "cpyext": [("translation.secondaryentrypoints", "cpyext"),
                ("translation.shared", sys.platform == "win32")],
-    "_ffi":    [("translation.jit_ffi", True)],
 }
 
 module_import_dependencies = {
diff --git a/pypy/config/test/test_pypyoption.py b/pypy/config/test/test_pypyoption.py
--- a/pypy/config/test/test_pypyoption.py
+++ b/pypy/config/test/test_pypyoption.py
@@ -71,9 +71,4 @@
         c = Config(descr)
         for path in c.getpaths(include_groups=True):
             fn = prefix + "." + path + ".txt"
-            yield check_file_exists, fn
-
-def test__ffi_opt():
-    config = get_pypy_config(translating=True)
-    config.objspace.usemodules._ffi = True
-    assert config.translation.jit_ffi
+            yield fn, check_file_exists, fn
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -123,8 +123,6 @@
     ChoiceOption("jit_profiler", "integrate profiler support into the JIT",
                  ["off", "oprofile"],
                  default="off"),
-    # jit_ffi is automatically turned on by withmod-_ffi (which is enabled by default)
-    BoolOption("jit_ffi", "optimize libffi calls", default=False, cmdline=None),
     BoolOption("check_str_without_nul",
                "Forbid NUL chars in strings in some external function calls",
                default=False, cmdline=None),
diff --git a/pypy/conftest.py b/pypy/conftest.py
--- a/pypy/conftest.py
+++ b/pypy/conftest.py
@@ -186,6 +186,9 @@
     def delslice(self, obj, *args):
         obj.__delslice__(*args)
 
+    def is_w(self, obj1, obj2):
+        return obj1 is obj2
+
 def translation_test_so_skip_if_appdirect():
     if option.runappdirect:
         py.test.skip("translation test, skipped for appdirect")
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -255,7 +255,12 @@
   code if the translator can prove that they are non-negative.  When
   slicing a string it is necessary to prove that the slice start and
   stop indexes are non-negative. There is no implicit str-to-unicode cast
-  anywhere.
+  anywhere. Simple string formatting using the ``%`` operator works, as long
+  as the format string is known at translation time; the only supported
+  formatting specifiers are ``%s``, ``%d``, ``%x``, ``%o``, ``%f``, plus
+  ``%r`` but only for user-defined instances. Modifiers such as conversion
+  flags, precision, length etc. are not supported. Moreover, it is forbidden
+  to mix unicode and strings when formatting.
 
 **tuples**
 
@@ -341,8 +346,8 @@
 
 **objects**
 
-  Normal rules apply. Special methods are not honoured, except ``__init__`` and
-  ``__del__``.
+  Normal rules apply. Special methods are not honoured, except ``__init__``,
+  ``__del__`` and ``__iter__``.
 
 This layout makes the number of types to take care about quite limited.
 
diff --git a/pypy/doc/config/objspace.usemodules._cffi_backend.txt b/pypy/doc/config/objspace.usemodules._cffi_backend.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.usemodules._cffi_backend.txt
@@ -0,0 +1,1 @@
+Core of CFFI (http://cffi.readthedocs.org)
diff --git a/pypy/doc/config/objspace.usemodules.cppyy.txt b/pypy/doc/config/objspace.usemodules.cppyy.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.usemodules.cppyy.txt
@@ -0,0 +1,1 @@
+Use the 'cppyy' module
diff --git a/pypy/doc/cppyy.rst b/pypy/doc/cppyy.rst
--- a/pypy/doc/cppyy.rst
+++ b/pypy/doc/cppyy.rst
@@ -153,6 +153,7 @@
 
 Automatic class loader
 ======================
+
 There is one big problem in the code above, that prevents its use in a (large
 scale) production setting: the explicit loading of the reflection library.
 Clearly, if explicit load statements such as these show up in code downstream
@@ -164,7 +165,9 @@
 The class loader makes use of so-called rootmap files, which ``genreflex``
 can produce.
 These files contain the list of available C++ classes and specify the library
-that needs to be loaded for their use.
+that needs to be loaded for their use (as an aside, this listing allows for a
+cross-check to see whether reflection info is generated for all classes that
+you expect).
 By convention, the rootmap files should be located next to the reflection info
 libraries, so that they can be found through the normal shared library search
 path.
@@ -198,6 +201,7 @@
 
 Advanced example
 ================
+
 The following snippet of C++ is very contrived, to allow showing that such
 pathological code can be handled and to show how certain features play out in
 practice::
@@ -253,6 +257,9 @@
 With the aid of a selection file, a large project can be easily managed:
 simply ``#include`` all relevant headers into a single header file that is
 handed to ``genreflex``.
+In fact, if you hand multiple header files to ``genreflex``, then a selection
+file is almost obligatory: without it, only classes from the last header will
+be selected.
 Then, apply a selection file to pick up all the relevant classes.
 For our purposes, the following rather straightforward selection will do
 (the name ``lcgdict`` for the root is historical, but required)::
@@ -325,15 +332,43 @@
 (active memory management is one such case), but by and large, if the use of a
 feature does not strike you as obvious, it is more likely to simply be a bug.
 That is a strong statement to make, but also a worthy goal.
+For the C++ side of the examples, refer to this `example code`_, which was
+bound using::
+
+    $ genreflex example.h --deep --rootmap=libexampleDict.rootmap --rootmap-lib=libexampleDict.so
+    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include example_rflx.cpp -o libexampleDict.so -L$ROOTSYS/lib -lReflex
+
+.. _`example code`: cppyy_example.html
 
 * **abstract classes**: Are represented as python classes, since they are
   needed to complete the inheritance hierarchies, but will raise an exception
   if an attempt is made to instantiate from them.
+  Example::
+
+    >>>> from cppyy.gbl import AbstractClass, ConcreteClass
+    >>>> a = AbstractClass()
+    Traceback (most recent call last):
+      File "<console>", line 1, in <module>
+    TypeError: cannot instantiate abstract class 'AbstractClass'
+    >>>> issubclass(ConcreteClass, AbstractClass)
+    True
+    >>>> c = ConcreteClass()
+    >>>> isinstance(c, AbstractClass)
+    True
+    >>>>
 
 * **arrays**: Supported for builtin data types only, as used from module
   ``array``.
   Out-of-bounds checking is limited to those cases where the size is known at
   compile time (and hence part of the reflection info).
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> from array import array
+    >>>> c = ConcreteClass()
+    >>>> c.array_method(array('d', [1., 2., 3., 4.]), 4)
+    1 2 3 4
+    >>>> 
 
 * **builtin data types**: Map onto the expected equivalent python types, with
   the caveat that there may be size differences, and thus it is possible that
@@ -344,23 +379,77 @@
   in the hierarchy of the object being returned.
   This is important to preserve object identity as well as to make casting,
   a pure C++ feature after all, superfluous.
+  Example::
+
+    >>>> from cppyy.gbl import AbstractClass, ConcreteClass
+    >>>> c = ConcreteClass()
+    >>>> ConcreteClass.show_autocast.__doc__
+    'AbstractClass* ConcreteClass::show_autocast()'
+    >>>> d = c.show_autocast()
+    >>>> type(d)
+    <class '__main__.ConcreteClass'>
+    >>>>
+
+  However, if need be, you can perform C++-style reinterpret_casts (i.e.
+  without taking offsets into account), by taking and rebinding the address
+  of an object::
+
+    >>>> from cppyy import addressof, bind_object
+    >>>> e = bind_object(addressof(d), AbstractClass)
+    >>>> type(e)
+    <class '__main__.AbstractClass'>
+    >>>>
 
 * **classes and structs**: Get mapped onto python classes, where they can be
   instantiated as expected.
   If classes are inner classes or live in a namespace, their naming and
   location will reflect that.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass, Namespace
+    >>>> ConcreteClass == Namespace.ConcreteClass
+    False
+    >>>> n = Namespace.ConcreteClass.NestedClass()
+    >>>> type(n)
+    <class '__main__.Namespace::ConcreteClass::NestedClass'>
+    >>>> 
 
 * **data members**: Public data members are represented as python properties
   and provide read and write access on instances as expected.
+  Private and protected data members are not accessible.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> c = ConcreteClass()
+    >>>> c.m_int
+    42
+    >>>>
 
 * **default arguments**: C++ default arguments work as expected, but python
   keywords are not supported.
   It is technically possible to support keywords, but for the C++ interface,
   the formal argument names have no meaning and are not considered part of the
   API, hence it is not a good idea to use keywords.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> c = ConcreteClass()       # uses default argument
+    >>>> c.m_int
+    42
+    >>>> c = ConcreteClass(13)
+    >>>> c.m_int
+    13
+    >>>>
 
 * **doc strings**: The doc string of a method or function contains the C++
   arguments and return types of all overloads of that name, as applicable.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> print ConcreteClass.array_method.__doc__
+    void ConcreteClass::array_method(int*, int)
+    void ConcreteClass::array_method(double*, int)
+    >>>> 
 
 * **enums**: Are translated as ints with no further checking.
 
@@ -375,11 +464,40 @@
   This is a current, not a fundamental, limitation.
   The C++ side will not see any overridden methods on the python side, as
   cross-inheritance is planned but not yet supported.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> help(ConcreteClass)
+    Help on class ConcreteClass in module __main__:
+
+    class ConcreteClass(AbstractClass)
+     |  Method resolution order:
+     |      ConcreteClass
+     |      AbstractClass
+     |      cppyy.CPPObject
+     |      __builtin__.CPPInstance
+     |      __builtin__.object
+     |  
+     |  Methods defined here:
+     |  
+     |  ConcreteClass(self, *args)
+     |      ConcreteClass::ConcreteClass(const ConcreteClass&)
+     |      ConcreteClass::ConcreteClass(int)
+     |      ConcreteClass::ConcreteClass()
+     |
+     etc. ....
 
 * **memory**: C++ instances created by calling their constructor from python
   are owned by python.
   You can check/change the ownership with the _python_owns flag that every
   bound instance carries.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> c = ConcreteClass()
+    >>>> c._python_owns            # True: object created in Python
+    True
+    >>>> 
 
 * **methods**: Are represented as python methods and work as expected.
   They are first class objects and can be bound to an instance.
@@ -395,23 +513,34 @@
   Namespaces are more open-ended than classes, so sometimes initial access may
   result in updates as data and functions are looked up and constructed
   lazily.
-  Thus the result of ``dir()`` on a namespace should not be relied upon: it
-  only shows the already accessed members. (TODO: to be fixed by implementing
-  __dir__.)
+  Thus the result of ``dir()`` on a namespace shows the classes available,
+  even if they may not have been created yet.
+  It does not show classes that could potentially be loaded by the class
+  loader.
+  Once created, namespaces are registered as modules, to allow importing from
+  them.
+  Namespace currently do not work with the class loader.
+  Fixing these bootstrap problems is on the TODO list.
   The global namespace is ``cppyy.gbl``.
 
 * **operator conversions**: If defined in the C++ class and a python
   equivalent exists (i.e. all builtin integer and floating point types, as well
   as ``bool``), it will map onto that python conversion.
   Note that ``char*`` is mapped onto ``__str__``.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> print ConcreteClass()
+    Hello operator const char*!
+    >>>> 
 
 * **operator overloads**: If defined in the C++ class and if a python
   equivalent is available (not always the case, think e.g. of ``operator||``),
   then they work as expected.
   Special care needs to be taken for global operator overloads in C++: first,
   make sure that they are actually reflected, especially for the global
-  overloads for ``operator==`` and ``operator!=`` of STL iterators in the case
-  of gcc.
+  overloads for ``operator==`` and ``operator!=`` of STL vector iterators in
+  the case of gcc (note that they are not needed to iterator over a vector).
   Second, make sure that reflection info is loaded in the proper order.
   I.e. that these global overloads are available before use.
 
@@ -441,17 +570,30 @@
   will be returned if the return type is ``const char*``.
 
 * **templated classes**: Are represented in a meta-class style in python.
-  This looks a little bit confusing, but conceptually is rather natural.
+  This may look a little bit confusing, but conceptually is rather natural.
   For example, given the class ``std::vector<int>``, the meta-class part would
-  be ``std.vector`` in python.
+  be ``std.vector``.
   Then, to get the instantiation on ``int``, do ``std.vector(int)`` and to
-  create an instance of that class, do ``std.vector(int)()``.
+  create an instance of that class, do ``std.vector(int)()``::
+
+    >>>> import cppyy
+    >>>> cppyy.load_reflection_info('libexampleDict.so')
+    >>>> cppyy.gbl.std.vector                # template metatype
+    <cppyy.CppyyTemplateType object at 0x00007fcdd330f1a0>
+    >>>> cppyy.gbl.std.vector(int)           # instantiates template -> class
+    <class '__main__.std::vector<int>'>
+    >>>> cppyy.gbl.std.vector(int)()         # instantiates class -> object
+    <__main__.std::vector<int> object at 0x00007fe480ba4bc0>
+    >>>> 
+
   Note that templates can be build up by handing actual types to the class
   instantiation (as done in this vector example), or by passing in the list of
   template arguments as a string.
   The former is a lot easier to work with if you have template instantiations
-  using classes that themselves are templates (etc.) in the arguments.
-  All classes must already exist in the loaded reflection info.
+  using classes that themselves are templates in  the arguments (think e.g a
+  vector of vectors).
+  All template classes must already exist in the loaded reflection info, they
+  do not work (yet) with the class loader.
 
 * **typedefs**: Are simple python references to the actual classes to which
   they refer.
@@ -502,11 +644,19 @@
 
 If you know for certain that all symbols will be linked in from other sources,
 you can also declare the explicit template instantiation ``extern``.
+An alternative is to add an object to an unnamed namespace::
 
-Unfortunately, this is not enough for gcc.
-The iterators, if they are going to be used, need to be instantiated as well,
-as do the comparison operators on those iterators, as these live in an
-internal namespace, rather than in the iterator classes.
+    namespace {
+        std::vector<MyClass> vmc;
+    } // unnamed namespace
+
+Unfortunately, this is not always enough for gcc.
+The iterators of vectors, if they are going to be used, need to be
+instantiated as well, as do the comparison operators on those iterators, as
+these live in an internal namespace, rather than in the iterator classes.
+Note that you do NOT need this iterators to iterator over a vector.
+You only need them if you plan to explicitly call e.g. ``begin`` and ``end``
+methods, and do comparisons of iterators.
 One way to handle this, is to deal with this once in a macro, then reuse that
 macro for all ``vector`` classes.
 Thus, the header above needs this (again protected with
@@ -533,8 +683,6 @@
     <lcgdict>
         <class pattern="std::vector<*>" />
         <class pattern="std::vector<*>::iterator" />
-        <class pattern="std::_Vector_base<*>" />
-        <class pattern="std::_Vector_base<*>::_Vector_impl" />
         <function name="__gnu_cxx::operator=="/>
         <function name="__gnu_cxx::operator!="/>
 
@@ -549,7 +697,7 @@
 Note: this is a dirty corner that clearly could do with some automation,
 even if the macro already helps.
 Such automation is planned.
-In fact, in the cling world, the backend can perform the template
+In fact, in the Cling world, the backend can perform the template
 instantations and generate the reflection info on the fly, and none of the
 above will any longer be necessary.
 
@@ -568,7 +716,8 @@
     1 2 3
     >>>>
 
-Other templates work similarly.
+Other templates work similarly, but are typically simpler, as there are no
+similar issues with iterators for e.g. ``std::list``.
 The arguments to the template instantiation can either be a string with the
 full list of arguments, or the explicit classes.
 The latter makes for easier code writing if the classes passed to the
@@ -655,3 +804,15 @@
 In that wrapper script you can rename methods exactly the way you need it.
 
 In the cling world, all these differences will be resolved.
+
+
+Python3
+=======
+
+To change versions of CPython (to Python3, another version of Python, or later
+to the `Py3k`_ version of PyPy), the only part that requires recompilation is
+the bindings module, be it ``cppyy`` or ``libPyROOT.so`` (in PyCintex).
+Although ``genreflex`` is indeed a Python tool, the generated reflection
+information is completely independent of Python.
+
+.. _`Py3k`: https://bitbucket.org/pypy/pypy/src/py3k
diff --git a/pypy/doc/cppyy_example.rst b/pypy/doc/cppyy_example.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/cppyy_example.rst
@@ -0,0 +1,56 @@
+// File: example.h::
+
+    #include <iostream>
+    #include <vector>
+
+    class AbstractClass {
+    public:
+        virtual ~AbstractClass() {}
+        virtual void abstract_method() = 0;
+    };
+
+    class ConcreteClass : AbstractClass {
+    public:
+        ConcreteClass(int n=42) : m_int(n) {}
+        ~ConcreteClass() {}
+
+        virtual void abstract_method() {
+            std::cout << "called concrete method" << std::endl;
+        }
+
+        void array_method(int* ad, int size) {
+            for (int i=0; i < size; ++i)
+                std::cout << ad[i] << ' ';
+            std::cout << std::endl;
+        }
+
+        void array_method(double* ad, int size) {
+            for (int i=0; i < size; ++i)
+                std::cout << ad[i] << ' ';
+            std::cout << std::endl;
+        }
+
+        AbstractClass* show_autocast() {
+            return this;
+        }
+
+        operator const char*() {
+            return "Hello operator const char*!";
+        }
+
+    public:
+        int m_int;
+    };
+
+    namespace Namespace {
+
+       class ConcreteClass {
+       public:
+          class NestedClass {
+          public:
+             std::vector<int> m_v;
+          };
+
+       };
+
+    } // namespace Namespace
diff --git a/pypy/doc/image/agile-talk.jpg b/pypy/doc/image/agile-talk.jpg
deleted file mode 100644
Binary file pypy/doc/image/agile-talk.jpg has changed
diff --git a/pypy/doc/image/architecture-session.jpg b/pypy/doc/image/architecture-session.jpg
deleted file mode 100644
Binary file pypy/doc/image/architecture-session.jpg has changed
diff --git a/pypy/doc/image/bram.jpg b/pypy/doc/image/bram.jpg
deleted file mode 100644
Binary file pypy/doc/image/bram.jpg has changed
diff --git a/pypy/doc/image/coding-discussion.jpg b/pypy/doc/image/coding-discussion.jpg
deleted file mode 100644
Binary file pypy/doc/image/coding-discussion.jpg has changed
diff --git a/pypy/doc/image/guido.jpg b/pypy/doc/image/guido.jpg
deleted file mode 100644
Binary file pypy/doc/image/guido.jpg has changed
diff --git a/pypy/doc/image/interview-bobippolito.jpg b/pypy/doc/image/interview-bobippolito.jpg
deleted file mode 100644
Binary file pypy/doc/image/interview-bobippolito.jpg has changed
diff --git a/pypy/doc/image/interview-timpeters.jpg b/pypy/doc/image/interview-timpeters.jpg
deleted file mode 100644
Binary file pypy/doc/image/interview-timpeters.jpg has changed
diff --git a/pypy/doc/image/introductory-student-talk.jpg b/pypy/doc/image/introductory-student-talk.jpg
deleted file mode 100644
Binary file pypy/doc/image/introductory-student-talk.jpg has changed
diff --git a/pypy/doc/image/introductory-talk-pycon.jpg b/pypy/doc/image/introductory-talk-pycon.jpg
deleted file mode 100644
Binary file pypy/doc/image/introductory-talk-pycon.jpg has changed
diff --git a/pypy/doc/image/ironpython.jpg b/pypy/doc/image/ironpython.jpg
deleted file mode 100644
Binary file pypy/doc/image/ironpython.jpg has changed
diff --git a/pypy/doc/image/mallorca-trailer.jpg b/pypy/doc/image/mallorca-trailer.jpg
deleted file mode 100644
Binary file pypy/doc/image/mallorca-trailer.jpg has changed
diff --git a/pypy/doc/image/pycon-trailer.jpg b/pypy/doc/image/pycon-trailer.jpg
deleted file mode 100644
Binary file pypy/doc/image/pycon-trailer.jpg has changed
diff --git a/pypy/doc/image/sprint-tutorial.jpg b/pypy/doc/image/sprint-tutorial.jpg
deleted file mode 100644
Binary file pypy/doc/image/sprint-tutorial.jpg has changed
diff --git a/pypy/doc/video-index.rst b/pypy/doc/video-index.rst
--- a/pypy/doc/video-index.rst
+++ b/pypy/doc/video-index.rst
@@ -2,39 +2,11 @@
 PyPy video documentation 
 =========================
 
-Requirements to download and view
----------------------------------
-
-In order to download the videos you need to point a
-BitTorrent client at the torrent files provided below. 
-We do not provide any other download method at this
-time.  Please get a BitTorrent client (such as bittorrent). 
-For a list of clients please 
-see http://en.wikipedia.org/wiki/Category:Free_BitTorrent_clients or 
-http://en.wikipedia.org/wiki/Comparison_of_BitTorrent_clients. 
-For more information about Bittorrent see 
-http://en.wikipedia.org/wiki/Bittorrent.
-
-In order to view the downloaded movies you need to 
-have a video player that supports DivX AVI files (DivX 5, mp3 audio)
-such as `mplayer`_, `xine`_, `vlc`_ or the windows media player.
-
-.. _`mplayer`: http://www.mplayerhq.hu/design7/dload.html
-.. _`xine`: http://www.xine-project.org
-.. _`vlc`: http://www.videolan.org/vlc/
-
-You can find the necessary codecs in the ffdshow-library:
-http://sourceforge.net/projects/ffdshow/
-
-or use the original divx codec (for Windows):
-http://www.divx.com/software/divx-plus
-
-
 Copyrights and Licensing 
 ----------------------------
 
-The following videos are copyrighted by merlinux gmbh and 
-published under the Creative Commons Attribution License 2.0 Germany: http://creativecommons.org/licenses/by/2.0/de/
+The following videos are copyrighted by merlinux gmbh and available on
+YouTube.
 
 If you need another license, don't hesitate to contact us. 
 
@@ -42,255 +14,202 @@
 Trailer: PyPy at the PyCon 2006
 -------------------------------
 
-130mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer.avi.torrent
+This trailer shows the PyPy team at the PyCon 2006, a behind-the-scenes at
+sprints, talks and everywhere else.
 
-71mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer-medium.avi.torrent
+.. raw:: html
 
-50mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer-320x240.avi.torrent
-
-.. image:: image/pycon-trailer.jpg
-   :scale: 100
-   :alt: Trailer PyPy at PyCon
-   :align: left
-
-This trailer shows the PyPy team at the PyCon 2006, a behind-the-scenes at sprints, talks and everywhere else.
-
-PAL, 9 min, DivX AVI
-
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/WfGszrRUdtc?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Interview with Tim Peters
 -------------------------
 
-440mb: http://buildbot.pypy.org/misc/torrent/interview-timpeters-v2.avi.torrent
+Interview with CPython core developer Tim Peters at PyCon 2006, Dallas,
+US. (2006-03-02)
 
-138mb: http://buildbot.pypy.org/misc/torrent/interview-timpeters-320x240.avi.torrent
+Tim Peters, a longtime CPython core developer talks about how he got into
+Python, what he thinks about the PyPy project and why he thinks it would have
+never been possible in the US.
 
-.. image:: image/interview-timpeters.jpg
-   :scale: 100
-   :alt: Interview with Tim Peters
-   :align: left
+.. raw:: html
 
-Interview with CPython core developer Tim Peters at PyCon 2006, Dallas, US. (2006-03-02)
-
-PAL, 23 min, DivX AVI
-
-Tim Peters, a longtime CPython core developer talks about how he got into Python, what he thinks about the PyPy project and why he thinks it would have never been possible in the US.
-
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/1wAOy88WxmY?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Interview with Bob Ippolito
 ---------------------------
 
-155mb: http://buildbot.pypy.org/misc/torrent/interview-bobippolito-v2.avi.torrent
+What do you think about PyPy? Interview with American software developer Bob
+Ippolito at PyCon 2006, Dallas, US. (2006-03-01)
 
-50mb: http://buildbot.pypy.org/misc/torrent/interview-bobippolito-320x240.avi.torrent
+Bob Ippolito is an Open Source software developer from San Francisco and has
+been to two PyPy sprints. In this interview he is giving his opinion on the
+project.
 
-.. image:: image/interview-bobippolito.jpg
-   :scale: 100
-   :alt: Interview with Bob Ippolito
-   :align: left
+.. raw:: html
 
-What do you think about PyPy? Interview with American software developer Bob Ippolito at tPyCon 2006, Dallas, US. (2006-03-01)
-
-PAL 8 min, DivX AVI
-
-Bob Ippolito is an Open Source software developer from San Francisco and has been to two PyPy sprints. In this interview he is giving his opinion on the project.
-
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/c5rq4Q03zgg?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Introductory talk on PyPy
 -------------------------
 
-430mb: http://buildbot.pypy.org/misc/torrent/introductory-talk-pycon-v1.avi.torrent
-
-166mb: http://buildbot.pypy.org/misc/torrent/introductory-talk-pycon-320x240.avi.torrent
-
-.. image:: image/introductory-talk-pycon.jpg
-   :scale: 100
-   :alt: Introductory talk at PyCon 2006
-   :align: left
-
-This introductory talk is given by core developers Michael Hudson and Christian Tismer at PyCon 2006, Dallas, US. (2006-02-26)
-
-PAL, 28 min, divx AVI
+This introductory talk is given by core developers Michael Hudson and
+Christian Tismer at PyCon 2006, Dallas, US. (2006-02-26)
 
 Michael Hudson talks about the basic building blocks of Python, the currently
 available back-ends, and the status of PyPy in general. Christian Tismer takes
-over to explain how co-routines can be used to implement things like
-Stackless and Greenlets in PyPy.
+over to explain how co-routines can be used to implement things like Stackless
+and Greenlets in PyPy.
 
+.. raw:: html
+
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/AWUhXW2pLDE?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Talk on Agile Open Source Methods in the PyPy project
 -----------------------------------------------------
 
-395mb: http://buildbot.pypy.org/misc/torrent/agile-talk-v1.avi.torrent
-
-153mb: http://buildbot.pypy.org/misc/torrent/agile-talk-320x240.avi.torrent
-
-.. image:: image/agile-talk.jpg
-   :scale: 100
-   :alt: Agile talk
-   :align: left
-
-Core developer Holger Krekel and project manager Beatrice During are giving a talk on the agile open source methods used in the PyPy project at PyCon 2006, Dallas, US. (2006-02-26)
-
-PAL, 26 min, divx AVI
+Core developer Holger Krekel and project manager Beatrice During are giving a
+talk on the agile open source methods used in the PyPy project at PyCon 2006,
+Dallas, US. (2006-02-26)
 
 Holger Krekel explains more about the goals and history of PyPy, and the
 structure and organization behind it. Bea During describes the intricacies of
 driving a distributed community in an agile way, and how to combine that with
 the formalities required for EU funding.
 
+.. raw:: html
+
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/ed-zAxZtGlY?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 PyPy Architecture session
 -------------------------
 
-744mb: http://buildbot.pypy.org/misc/torrent/architecture-session-v1.avi.torrent
-
-288mb: http://buildbot.pypy.org/misc/torrent/architecture-session-320x240.avi.torrent
-
-.. image:: image/architecture-session.jpg
-   :scale: 100
-   :alt: Architecture session
-   :align: left
-
-This architecture session is given by core developers Holger Krekel and Armin Rigo at PyCon 2006, Dallas, US. (2006-02-26)
-
-PAL, 48 min, divx AVI
+This architecture session is given by core developers Holger Krekel and Armin
+Rigo at PyCon 2006, Dallas, US. (2006-02-26)
 
 Holger Krekel and Armin Rigo talk about the basic implementation,
-implementation level aspects and the RPython translation toolchain. This
-talk also gives an insight into how a developer works with these tools on
-a daily basis, and pays special attention to flow graphs.
+implementation level aspects and the RPython translation toolchain. This talk
+also gives an insight into how a developer works with these tools on a daily
+basis, and pays special attention to flow graphs.
 
+.. raw:: html
+
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/7opXGaQUUA4?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Sprint tutorial
 ---------------
 
-680mb: http://buildbot.pypy.org/misc/torrent/sprint-tutorial-v2.avi.torrent
+Sprint tutorial by core developer Michael Hudson at PyCon 2006, Dallas,
+US. (2006-02-27)
 
-263mb: http://buildbot.pypy.org/misc/torrent/sprint-tutorial-320x240.avi.torrent
+Michael Hudson gives an in-depth, very technical introduction to a PyPy
+sprint. The film provides a detailed and hands-on overview about the
+architecture of PyPy, especially the RPython translation toolchain.
 
-.. image:: image/sprint-tutorial.jpg
-   :scale: 100
-   :alt: Sprint Tutorial
-   :align: left
+.. raw:: html
 
-Sprint tutorial by core developer Michael Hudson at PyCon 2006, Dallas, US. (2006-02-27)
-
-PAL, 44 min, divx AVI
-
-Michael Hudson gives an in-depth, very technical introduction to a PyPy sprint. The film provides a detailed and hands-on overview about the architecture of PyPy, especially the RPython translation toolchain.
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/1YV7J74xrMI?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Scripting .NET with IronPython by Jim Hugunin
 ---------------------------------------------
 
-372mb: http://buildbot.pypy.org/misc/torrent/ironpython-talk-v2.avi.torrent
+Talk by Jim Hugunin (Microsoft) on the IronPython implementation on the .NET
+framework at the PyCon 2006, Dallas, US.
 
-270mb: http://buildbot.pypy.org/misc/torrent/ironpython-talk-320x240.avi.torrent
+Jim Hugunin talks about regression tests, the code generation and the object
+layout, the new-style instance and gives a CLS interop demo.
 
-.. image:: image/ironpython.jpg
-   :scale: 100
-   :alt: Jim Hugunin on IronPython
-   :align: left
+.. raw:: html
 
-Talk by Jim Hugunin (Microsoft) on the IronPython implementation on the .NET framework at this years PyCon, Dallas, US.
-
-PAL, 44 min, DivX AVI
-
-Jim Hugunin talks about regression tests, the code generation and the object layout, the new-style instance and gives a CLS interop demo.
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/bq9ZGN3-o80?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Bram Cohen, founder and developer of BitTorrent
 -----------------------------------------------
 
-509mb: http://buildbot.pypy.org/misc/torrent/bram-cohen-interview-v1.avi.torrent
+Bram Cohen is interviewed by Steve Holden at the PyCon 2006, Dallas, US.
 
-370mb: http://buildbot.pypy.org/misc/torrent/bram-cohen-interview-320x240.avi.torrent
+.. raw:: html
 
-.. image:: image/bram.jpg
-   :scale: 100
-   :alt: Bram Cohen on BitTorrent
-   :align: left
-
-Bram Cohen is interviewed by Steve Holden at this years PyCon, Dallas, US.
-
-PAL, 60 min, DivX AVI
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/EopmJWrLmWI?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Keynote speech by Guido van Rossum on the new Python 2.5 features
 -----------------------------------------------------------------
 
-695mb: http://buildbot.pypy.org/misc/torrent/keynote-speech_guido-van-rossum_v1.avi.torrent
+Guido van Rossum explains the new Python 2.5 features at the PyCon 2006,
+Dallas, US.
 
-430mb: http://buildbot.pypy.org/misc/torrent/keynote-speech_guido-van-rossum_320x240.avi.torrent
+.. raw:: html
 
-.. image:: image/guido.jpg
-   :scale: 100
-   :alt: Guido van Rossum on Python 2.5
-   :align: left
-
-Guido van Rossum explains the new Python 2.5 features at this years PyCon, Dallas, US.
-
-PAL, 70 min, DivX AVI
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/RR2sX8tFGsI?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Trailer: PyPy sprint at the University of Palma de Mallorca
 -----------------------------------------------------------
 
-166mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-v1.avi.torrent
+This trailer shows the PyPy team at the sprint in Mallorca, a
+behind-the-scenes of a typical PyPy coding sprint and talk as well as
+everything else.
 
-88mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-medium.avi.torrent
+.. raw:: html
 
-64mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-320x240.avi.torrent
-
-.. image:: image/mallorca-trailer.jpg
-   :scale: 100
-   :alt: Trailer PyPy sprint in Mallorca
-   :align: left
-
-This trailer shows the PyPy team at the sprint in Mallorca, a behind-the-scenes of a typical PyPy coding sprint and talk as well as everything else.
-
-PAL, 11 min, DivX AVI
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/swsnRfj_cek?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Coding discussion of core developers Armin Rigo and Samuele Pedroni
 -------------------------------------------------------------------
 
-620mb: http://buildbot.pypy.org/misc/torrent/coding-discussion-v1.avi.torrent
+Coding discussion between Armin Rigo and Samuele Pedroni during the PyPy
+sprint at the University of Palma de Mallorca, Spain. 27.1.2006
 
-240mb: http://buildbot.pypy.org/misc/torrent/coding-discussion-320x240.avi.torrent
+.. raw:: html
 
-.. image:: image/coding-discussion.jpg
-   :scale: 100
-   :alt: Coding discussion
-   :align: left
-
-Coding discussion between Armin Rigo and Samuele Pedroni during the PyPy sprint at the University of Palma de Mallorca, Spain. 27.1.2006
-
-PAL 40 min, DivX AVI
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/H_IgK9qmEss?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 PyPy technical talk at the University of Palma de Mallorca
 ----------------------------------------------------------
 
-865mb: http://buildbot.pypy.org/misc/torrent/introductory-student-talk-v2.avi.torrent
-
-437mb: http://buildbot.pypy.org/misc/torrent/introductory-student-talk-320x240.avi.torrent
-
-.. image:: image/introductory-student-talk.jpg
-   :scale: 100
-   :alt: Introductory student talk
-   :align: left
-
 Technical talk on the PyPy project at the University of Palma de Mallorca, Spain. 27.1.2006
 
-PAL 72 min, DivX AVI
+Core developers Armin Rigo, Samuele Pedroni and Carl Friedrich Bolz are giving
+an overview of the PyPy architecture, the standard interpreter, the RPython
+translation toolchain and the just-in-time compiler.
 
-Core developers Armin Rigo, Samuele Pedroni and Carl Friedrich Bolz are giving an overview of the PyPy architecture, the standard interpreter, the RPython translation toolchain and the just-in-time compiler.
+.. raw:: html
 
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/6dnUzVQaSlg?rel=0"
+   frameborder="0" allowfullscreen></iframe>
+
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -14,5 +14,18 @@
 .. branch: nupypy-axis-arg-check
 Check that axis arg is valid in _numpypy
 
+.. branch: iterator-in-rpython
+.. branch: numpypy_count_nonzero
+.. branch: even-more-jit-hooks
+Implement better JIT hooks
+.. branch: virtual-arguments
+Improve handling of **kwds greatly, making them virtual sometimes.
+.. branch: improve-rbigint
+Introduce __int128 on systems where it's supported and improve the speed of
+rlib/rbigint.py greatly.
+
 .. "uninteresting" branches that we should just ignore for the whatsnew:
 .. branch: slightly-shorter-c
+.. branch: better-enforceargs
+.. branch: rpython-unicode-formatting
+.. branch: jit-opaque-licm
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -110,12 +110,10 @@
             make_sure_not_resized(self.keywords_w)
 
         make_sure_not_resized(self.arguments_w)
-        if w_stararg is not None:
-            self._combine_starargs_wrapped(w_stararg)
-        # if we have a call where **args are used at the callsite
-        # we shouldn't let the JIT see the argument matching
-        self._dont_jit = (w_starstararg is not None and
-                          self._combine_starstarargs_wrapped(w_starstararg))
+        self._combine_wrapped(w_stararg, w_starstararg)
+        # a flag that specifies whether the JIT can unroll loops that operate
+        # on the keywords
+        self._jit_few_keywords = self.keywords is None or jit.isconstant(len(self.keywords))
 
     def __repr__(self):
         """ NOT_RPYTHON """
@@ -129,7 +127,7 @@
 
     ###  Manipulation  ###
 
-    @jit.look_inside_iff(lambda self: not self._dont_jit)
+    @jit.look_inside_iff(lambda self: self._jit_few_keywords)
     def unpack(self): # slowish
         "Return a ([w1,w2...], {'kw':w3...}) pair."
         kwds_w = {}
@@ -176,13 +174,14 @@
         keywords, values_w = space.view_as_kwargs(w_starstararg)
         if keywords is not None: # this path also taken for empty dicts
             if self.keywords is None:
-                self.keywords = keywords[:] # copy to make non-resizable
-                self.keywords_w = values_w[:]
+                self.keywords = keywords
+                self.keywords_w = values_w
             else:
-                self._check_not_duplicate_kwargs(keywords, values_w)
+                _check_not_duplicate_kwargs(
+                    self.space, self.keywords, keywords, values_w)
                 self.keywords = self.keywords + keywords
                 self.keywords_w = self.keywords_w + values_w
-            return not jit.isconstant(len(self.keywords))
+            return
         if space.isinstance_w(w_starstararg, space.w_dict):
             keys_w = space.unpackiterable(w_starstararg)
         else:
@@ -198,57 +197,17 @@
                                    "a mapping, not %s" % (typename,)))
                 raise
             keys_w = space.unpackiterable(w_keys)
-        self._do_combine_starstarargs_wrapped(keys_w, w_starstararg)
-        return True
-
-    def _do_combine_starstarargs_wrapped(self, keys_w, w_starstararg):
-        space = self.space
         keywords_w = [None] * len(keys_w)
         keywords = [None] * len(keys_w)
-        i = 0
-        for w_key in keys_w:
-            try:
-                key = space.str_w(w_key)
-            except OperationError, e:
-                if e.match(space, space.w_TypeError):
-                    raise OperationError(
-                        space.w_TypeError,
-                        space.wrap("keywords must be strings"))
-                if e.match(space, space.w_UnicodeEncodeError):
-                    # Allow this to pass through
-                    key = None
-                else:
-                    raise
-            else:
-                if self.keywords and key in self.keywords:
-                    raise operationerrfmt(self.space.w_TypeError,
-                                          "got multiple values "
-                                          "for keyword argument "
-                                          "'%s'", key)
-            keywords[i] = key
-            keywords_w[i] = space.getitem(w_starstararg, w_key)
-            i += 1
+        _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords, keywords_w, self.keywords)
+        self.keyword_names_w = keys_w
         if self.keywords is None:
             self.keywords = keywords
             self.keywords_w = keywords_w
         else:
             self.keywords = self.keywords + keywords
             self.keywords_w = self.keywords_w + keywords_w
-        self.keyword_names_w = keys_w
 
-    @jit.look_inside_iff(lambda self, keywords, keywords_w:
-            jit.isconstant(len(keywords) and
-            jit.isconstant(self.keywords)))
-    def _check_not_duplicate_kwargs(self, keywords, keywords_w):
-        # looks quadratic, but the JIT should remove all of it nicely.
-        # Also, all the lists should be small
-        for key in keywords:
-            for otherkey in self.keywords:
-                if otherkey == key:
-                    raise operationerrfmt(self.space.w_TypeError,
-                                          "got multiple values "
-                                          "for keyword argument "
-                                          "'%s'", key)
 
     def fixedunpack(self, argcount):
         """The simplest argument parsing: get the 'argcount' arguments,
@@ -269,34 +228,14 @@
 
     ###  Parsing for function calls  ###
 
-    # XXX: this should be @jit.look_inside_iff, but we need key word arguments,
-    # and it doesn't support them for now.
+    @jit.unroll_safe
     def _match_signature(self, w_firstarg, scope_w, signature, defaults_w=None,
                          blindargs=0):
         """Parse args and kwargs according to the signature of a code object,
         or raise an ArgErr in case of failure.
-        Return the number of arguments filled in.
         """
-        if jit.we_are_jitted() and self._dont_jit:
-            return self._match_signature_jit_opaque(w_firstarg, scope_w,
-                                                    signature, defaults_w,
-                                                    blindargs)
-        return self._really_match_signature(w_firstarg, scope_w, signature,
-                                            defaults_w, blindargs)
-
-    @jit.dont_look_inside
-    def _match_signature_jit_opaque(self, w_firstarg, scope_w, signature,
-                                    defaults_w, blindargs):
-        return self._really_match_signature(w_firstarg, scope_w, signature,
-                                            defaults_w, blindargs)
-
-    @jit.unroll_safe
-    def _really_match_signature(self, w_firstarg, scope_w, signature,
-                                defaults_w=None, blindargs=0):
-        #
+        #   w_firstarg = a first argument to be inserted (e.g. self) or None
         #   args_w = list of the normal actual parameters, wrapped
-        #   kwds_w = real dictionary {'keyword': wrapped parameter}
-        #   argnames = list of formal parameter names
         #   scope_w = resulting list of wrapped values
         #
 
@@ -304,38 +243,29 @@
         # so all values coming from there can be assumed constant. It assumes
         # that the length of the defaults_w does not vary too much.
         co_argcount = signature.num_argnames() # expected formal arguments, without */**
-        has_vararg = signature.has_vararg()
-        has_kwarg = signature.has_kwarg()
-        extravarargs = None
-        input_argcount =  0
 
+        # put the special w_firstarg into the scope, if it exists
         if w_firstarg is not None:
             upfront = 1
             if co_argcount > 0:
                 scope_w[0] = w_firstarg
-                input_argcount = 1
-            else:
-                extravarargs = [w_firstarg]
         else:
             upfront = 0
 
         args_w = self.arguments_w
         num_args = len(args_w)
+        avail = num_args + upfront
 
         keywords = self.keywords
-        keywords_w = self.keywords_w
         num_kwds = 0
         if keywords is not None:
             num_kwds = len(keywords)
 
-        avail = num_args + upfront
 
+        # put as many positional input arguments into place as available
+        input_argcount = upfront
         if input_argcount < co_argcount:
-            # put as many positional input arguments into place as available
-            if avail > co_argcount:
-                take = co_argcount - input_argcount
-            else:
-                take = num_args
+            take = min(num_args, co_argcount - upfront)
 
             # letting the JIT unroll this loop is safe, because take is always
             # smaller than co_argcount
@@ -344,11 +274,10 @@
             input_argcount += take
 
         # collect extra positional arguments into the *vararg
-        if has_vararg:
+        if signature.has_vararg():
             args_left = co_argcount - upfront
             if args_left < 0:  # check required by rpython
-                assert extravarargs is not None
-                starargs_w = extravarargs
+                starargs_w = [w_firstarg]
                 if num_args:
                     starargs_w = starargs_w + args_w
             elif num_args > args_left:
@@ -357,86 +286,68 @@
                 starargs_w = []
             scope_w[co_argcount] = self.space.newtuple(starargs_w)
         elif avail > co_argcount:
-            raise ArgErrCount(avail, num_kwds,
-                              co_argcount, has_vararg, has_kwarg,
-                              defaults_w, 0)
+            raise ArgErrCount(avail, num_kwds, signature, defaults_w, 0)
 
-        # the code assumes that keywords can potentially be large, but that
-        # argnames is typically not too large
-        num_remainingkwds = num_kwds
-        used_keywords = None
-        if keywords:
-            # letting JIT unroll the loop is *only* safe if the callsite didn't
-            # use **args because num_kwds can be arbitrarily large otherwise.
-            used_keywords = [False] * num_kwds
-            for i in range(num_kwds):
-                name = keywords[i]
-                # If name was not encoded as a string, it could be None. In that
-                # case, it's definitely not going to be in the signature.
-                if name is None:
-                    continue
-                j = signature.find_argname(name)
-                if j < 0:
-                    continue
-                elif j < input_argcount:
-                    # check that no keyword argument conflicts with these. note
-                    # that for this purpose we ignore the first blindargs,
-                    # which were put into place by prepend().  This way,
-                    # keywords do not conflict with the hidden extra argument
-                    # bound by methods.
-                    if blindargs <= j:
-                        raise ArgErrMultipleValues(name)
+        # if a **kwargs argument is needed, create the dict
+        w_kwds = None
+        if signature.has_kwarg():
+            w_kwds = self.space.newdict(kwargs=True)
+            scope_w[co_argcount + signature.has_vararg()] = w_kwds
+
+        # handle keyword arguments
+        num_remainingkwds = 0
+        keywords_w = self.keywords_w
+        kwds_mapping = None
+        if num_kwds:
+            # kwds_mapping maps target indexes in the scope (minus input_argcount)
+            # to positions in the keywords_w list
+            cnt = (co_argcount - input_argcount)
+            if cnt < 0:
+                cnt = 0
+            kwds_mapping = [0] * cnt
+            # initialize manually, for the JIT :-(
+            for i in range(len(kwds_mapping)):
+                kwds_mapping[i] = -1
+            # match the keywords given at the call site to the argument names
+            # the called function takes
+            # this function must not take a scope_w, to make the scope not
+            # escape
+            num_remainingkwds = _match_keywords(
+                    signature, blindargs, input_argcount, keywords,
+                    kwds_mapping, self._jit_few_keywords)
+            if num_remainingkwds:
+                if w_kwds is not None:
+                    # collect extra keyword arguments into the **kwarg
+                    _collect_keyword_args(
+                            self.space, keywords, keywords_w, w_kwds,
+                            kwds_mapping, self.keyword_names_w, self._jit_few_keywords)
                 else:
-                    assert scope_w[j] is None
-                    scope_w[j] = keywords_w[i]
-                    used_keywords[i] = True # mark as used
-                    num_remainingkwds -= 1
+                    if co_argcount == 0:
+                        raise ArgErrCount(avail, num_kwds, signature, defaults_w, 0)
+                    raise ArgErrUnknownKwds(self.space, num_remainingkwds, keywords,
+                                            kwds_mapping, self.keyword_names_w)
+
+        # check for missing arguments and fill them from the kwds,
+        # or with defaults, if available
         missing = 0
         if input_argcount < co_argcount:
             def_first = co_argcount - (0 if defaults_w is None else len(defaults_w))
+            j = 0
+            kwds_index = -1
             for i in range(input_argcount, co_argcount):
-                if scope_w[i] is not None:
-                    continue
+                if kwds_mapping is not None:
+                    kwds_index = kwds_mapping[j]
+                    j += 1
+                    if kwds_index >= 0:
+                        scope_w[i] = keywords_w[kwds_index]
+                        continue
                 defnum = i - def_first
                 if defnum >= 0:
                     scope_w[i] = defaults_w[defnum]
                 else:
-                    # error: not enough arguments.  Don't signal it immediately
-                    # because it might be related to a problem with */** or
-                    # keyword arguments, which will be checked for below.
                     missing += 1
-
-        # collect extra keyword arguments into the **kwarg
-        if has_kwarg:
-            w_kwds = self.space.newdict(kwargs=True)
-            if num_remainingkwds:
-                #
-                limit = len(keywords)
-                if self.keyword_names_w is not None:
-                    limit -= len(self.keyword_names_w)
-                for i in range(len(keywords)):
-                    if not used_keywords[i]:
-                        if i < limit:
-                            w_key = self.space.wrap(keywords[i])
-                        else:
-                            w_key = self.keyword_names_w[i - limit]
-                        self.space.setitem(w_kwds, w_key, keywords_w[i])
-                #
-            scope_w[co_argcount + has_vararg] = w_kwds
-        elif num_remainingkwds:
-            if co_argcount == 0:
-                raise ArgErrCount(avail, num_kwds,
-                              co_argcount, has_vararg, has_kwarg,
-                              defaults_w, missing)
-            raise ArgErrUnknownKwds(self.space, num_remainingkwds, keywords,
-                                    used_keywords, self.keyword_names_w)
-
-        if missing:
-            raise ArgErrCount(avail, num_kwds,
-                              co_argcount, has_vararg, has_kwarg,
-                              defaults_w, missing)
-
-        return co_argcount + has_vararg + has_kwarg
+            if missing:
+                raise ArgErrCount(avail, num_kwds, signature, defaults_w, missing)
 
 
 
@@ -448,11 +359,12 @@
         scope_w must be big enough for signature.
         """
         try:
-            return self._match_signature(w_firstarg,
-                                         scope_w, signature, defaults_w, 0)
+            self._match_signature(w_firstarg,
+                                  scope_w, signature, defaults_w, 0)
         except ArgErr, e:
             raise operationerrfmt(self.space.w_TypeError,
                                   "%s() %s", fnname, e.getmsg())
+        return signature.scope_length()
 
     def _parse(self, w_firstarg, signature, defaults_w, blindargs=0):
         """Parse args and kwargs according to the signature of a code object,
@@ -499,6 +411,102 @@
                 space.setitem(w_kwds, w_key, self.keywords_w[i])
         return w_args, w_kwds
 
+# JIT helper functions
+# these functions contain functionality that the JIT is not always supposed to
+# look at. They should not get a self arguments, which makes the amount of
+# arguments annoying :-(
+
+ at jit.look_inside_iff(lambda space, existingkeywords, keywords, keywords_w:
+        jit.isconstant(len(keywords) and
+        jit.isconstant(existingkeywords)))
+def _check_not_duplicate_kwargs(space, existingkeywords, keywords, keywords_w):
+    # looks quadratic, but the JIT should remove all of it nicely.
+    # Also, all the lists should be small
+    for key in keywords:
+        for otherkey in existingkeywords:
+            if otherkey == key:
+                raise operationerrfmt(space.w_TypeError,
+                                      "got multiple values "
+                                      "for keyword argument "
+                                      "'%s'", key)
+
+def _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords,
+        keywords_w, existingkeywords):
+    i = 0
+    for w_key in keys_w:
+        try:
+            key = space.str_w(w_key)
+        except OperationError, e:
+            if e.match(space, space.w_TypeError):
+                raise OperationError(
+                    space.w_TypeError,
+                    space.wrap("keywords must be strings"))
+            if e.match(space, space.w_UnicodeEncodeError):
+                # Allow this to pass through
+                key = None
+            else:
+                raise
+        else:
+            if existingkeywords and key in existingkeywords:
+                raise operationerrfmt(space.w_TypeError,
+                                      "got multiple values "
+                                      "for keyword argument "
+                                      "'%s'", key)
+        keywords[i] = key
+        keywords_w[i] = space.getitem(w_starstararg, w_key)
+        i += 1
+
+ at jit.look_inside_iff(
+    lambda signature, blindargs, input_argcount,
+           keywords, kwds_mapping, jiton: jiton)
+def _match_keywords(signature, blindargs, input_argcount,
+                    keywords, kwds_mapping, _):
+    # letting JIT unroll the loop is *only* safe if the callsite didn't
+    # use **args because num_kwds can be arbitrarily large otherwise.
+    num_kwds = num_remainingkwds = len(keywords)
+    for i in range(num_kwds):
+        name = keywords[i]
+        # If name was not encoded as a string, it could be None. In that
+        # case, it's definitely not going to be in the signature.
+        if name is None:
+            continue
+        j = signature.find_argname(name)
+        # if j == -1 nothing happens, because j < input_argcount and
+        # blindargs > j
+        if j < input_argcount:
+            # check that no keyword argument conflicts with these. note
+            # that for this purpose we ignore the first blindargs,
+            # which were put into place by prepend().  This way,
+            # keywords do not conflict with the hidden extra argument
+            # bound by methods.
+            if blindargs <= j:
+                raise ArgErrMultipleValues(name)
+        else:
+            kwds_mapping[j - input_argcount] = i # map to the right index
+            num_remainingkwds -= 1
+    return num_remainingkwds
+
+ at jit.look_inside_iff(
+    lambda space, keywords, keywords_w, w_kwds, kwds_mapping,
+        keyword_names_w, jiton: jiton)
+def _collect_keyword_args(space, keywords, keywords_w, w_kwds, kwds_mapping,
+                          keyword_names_w, _):
+    limit = len(keywords)
+    if keyword_names_w is not None:
+        limit -= len(keyword_names_w)
+    for i in range(len(keywords)):
+        # again a dangerous-looking loop that either the JIT unrolls
+        # or that is not too bad, because len(kwds_mapping) is small
+        for j in kwds_mapping:
+            if i == j:
+                break
+        else:
+            if i < limit:
+                w_key = space.wrap(keywords[i])
+            else:
+                w_key = keyword_names_w[i - limit]
+            space.setitem(w_kwds, w_key, keywords_w[i])
+
 class ArgumentsForTranslation(Arguments):
     def __init__(self, space, args_w, keywords=None, keywords_w=None,
                  w_stararg=None, w_starstararg=None):
@@ -654,11 +662,9 @@
 
 class ArgErrCount(ArgErr):
 
-    def __init__(self, got_nargs, nkwds, expected_nargs, has_vararg, has_kwarg,
+    def __init__(self, got_nargs, nkwds, signature,
                  defaults_w, missing_args):
-        self.expected_nargs = expected_nargs
-        self.has_vararg = has_vararg
-        self.has_kwarg = has_kwarg
+        self.signature = signature
 
         self.num_defaults = 0 if defaults_w is None else len(defaults_w)
         self.missing_args = missing_args
@@ -666,16 +672,16 @@
         self.num_kwds = nkwds
 
     def getmsg(self):
-        n = self.expected_nargs
+        n = self.signature.num_argnames()
         if n == 0:
             msg = "takes no arguments (%d given)" % (
                 self.num_args + self.num_kwds)
         else:
             defcount = self.num_defaults
-            has_kwarg = self.has_kwarg
+            has_kwarg = self.signature.has_kwarg()
             num_args = self.num_args
             num_kwds = self.num_kwds
-            if defcount == 0 and not self.has_vararg:
+            if defcount == 0 and not self.signature.has_vararg():
                 msg1 = "exactly"
                 if not has_kwarg:
                     num_args += num_kwds
@@ -714,13 +720,13 @@
 
 class ArgErrUnknownKwds(ArgErr):
 
-    def __init__(self, space, num_remainingkwds, keywords, used_keywords,
+    def __init__(self, space, num_remainingkwds, keywords, kwds_mapping,
                  keyword_names_w):
         name = ''
         self.num_kwds = num_remainingkwds
         if num_remainingkwds == 1:
             for i in range(len(keywords)):
-                if not used_keywords[i]:
+                if i not in kwds_mapping:
                     name = keywords[i]
                     if name is None:
                         # We'll assume it's unicode. Encode it.
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1033,6 +1033,10 @@
         w_meth = self.getattr(w_obj, self.wrap(methname))
         return self.call_function(w_meth, *arg_w)
 
+    def raise_key_error(self, w_key):
+        e = self.call_function(self.w_KeyError, w_key)
+        raise OperationError(self.w_KeyError, e)
+
     def lookup(self, w_obj, name):
         w_type = self.type(w_obj)
         w_mro = self.getattr(w_type, self.wrap("__mro__"))
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -496,7 +496,12 @@
 
     # apply kw_spec
     for name, spec in kw_spec.items():
-        unwrap_spec[argnames.index(name)] = spec
+        try:
+            unwrap_spec[argnames.index(name)] = spec
+        except ValueError:
+            raise ValueError("unwrap_spec() got a keyword %r but it is not "
+                             "the name of an argument of the following "
+                             "function" % (name,))
 
     return unwrap_spec
 
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -57,6 +57,9 @@
     def __nonzero__(self):
         raise NotImplementedError
 
+class kwargsdict(dict):
+    pass
+
 class DummySpace(object):
     def newtuple(self, items):
         return tuple(items)
@@ -76,9 +79,13 @@
         return list(it)
 
     def view_as_kwargs(self, x):
+        if len(x) == 0:
+            return [], []
         return None, None
 
     def newdict(self, kwargs=False):
+        if kwargs:
+            return kwargsdict()
         return {}
 
     def newlist(self, l=[]):
@@ -299,6 +306,22 @@
             args._match_signature(None, l, Signature(["a", "b", "c"], None, "**"))
             assert l == [1, 2, 3, {'d': 4}]
 
+    def test_match_kwds_creates_kwdict(self):
+        space = DummySpace()
+        kwds = [("c", 3), ('d', 4)]
+        for i in range(4):
+            kwds_w = dict(kwds[:i])
+            keywords = kwds_w.keys()
+            keywords_w = kwds_w.values()
+            w_kwds = dummy_wrapped_dict(kwds[i:])
+            if i == 3:
+                w_kwds = None
+            args = Arguments(space, [1, 2], keywords, keywords_w, w_starstararg=w_kwds)
+            l = [None, None, None, None]
+            args._match_signature(None, l, Signature(["a", "b", "c"], None, "**"))
+            assert l == [1, 2, 3, {'d': 4}]
+            assert isinstance(l[-1], kwargsdict)
+
     def test_duplicate_kwds(self):
         space = DummySpace()
         excinfo = py.test.raises(OperationError, Arguments, space, [], ["a"],
@@ -546,34 +569,47 @@
     def test_missing_args(self):
         # got_nargs, nkwds, expected_nargs, has_vararg, has_kwarg,
         # defaults_w, missing_args
-        err = ArgErrCount(1, 0, 0, False, False, None, 0)
+        sig = Signature([], None, None)
+        err = ArgErrCount(1, 0, sig, None, 0)
         s = err.getmsg()
         assert s == "takes no arguments (1 given)"
-        err = ArgErrCount(0, 0, 1, False, False, [], 1)
+
+        sig = Signature(['a'], None, None)
+        err = ArgErrCount(0, 0, sig, [], 1)
         s = err.getmsg()
         assert s == "takes exactly 1 argument (0 given)"
-        err = ArgErrCount(3, 0, 2, False, False, [], 0)
+
+        sig = Signature(['a', 'b'], None, None)
+        err = ArgErrCount(3, 0, sig, [], 0)
         s = err.getmsg()
         assert s == "takes exactly 2 arguments (3 given)"
-        err = ArgErrCount(3, 0, 2, False, False, ['a'], 0)
+        err = ArgErrCount(3, 0, sig, ['a'], 0)
         s = err.getmsg()
         assert s == "takes at most 2 arguments (3 given)"
-        err = ArgErrCount(1, 0, 2, True, False, [], 1)
+
+        sig = Signature(['a', 'b'], '*', None)
+        err = ArgErrCount(1, 0, sig, [], 1)
         s = err.getmsg()
         assert s == "takes at least 2 arguments (1 given)"
-        err = ArgErrCount(0, 1, 2, True, False, ['a'], 1)
+        err = ArgErrCount(0, 1, sig, ['a'], 1)
         s = err.getmsg()
         assert s == "takes at least 1 non-keyword argument (0 given)"
-        err = ArgErrCount(2, 1, 1, False, True, [], 0)
+
+        sig = Signature(['a'], None, '**')
+        err = ArgErrCount(2, 1, sig, [], 0)
         s = err.getmsg()
         assert s == "takes exactly 1 non-keyword argument (2 given)"
-        err = ArgErrCount(0, 1, 1, False, True, [], 1)
+        err = ArgErrCount(0, 1, sig, [], 1)
         s = err.getmsg()
         assert s == "takes exactly 1 non-keyword argument (0 given)"
-        err = ArgErrCount(0, 1, 1, True, True, [], 1)
+
+        sig = Signature(['a'], '*', '**')
+        err = ArgErrCount(0, 1, sig, [], 1)
         s = err.getmsg()
         assert s == "takes at least 1 non-keyword argument (0 given)"
-        err = ArgErrCount(2, 1, 1, False, True, ['a'], 0)
+
+        sig = Signature(['a'], None, '**')
+        err = ArgErrCount(2, 1, sig, ['a'], 0)
         s = err.getmsg()
         assert s == "takes at most 1 non-keyword argument (2 given)"
 
@@ -596,11 +632,14 @@
 
     def test_unknown_keywords(self):
         space = DummySpace()
-        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [True, False], None)
+        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [0], None)
         s = err.getmsg()
         assert s == "got an unexpected keyword argument 'b'"
+        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [1], None)
+        s = err.getmsg()
+        assert s == "got an unexpected keyword argument 'a'"
         err = ArgErrUnknownKwds(space, 2, ['a', 'b', 'c'],
-                                [True, False, False], None)
+                                [0], None)
         s = err.getmsg()
         assert s == "got 2 unexpected keyword arguments"
 
@@ -610,7 +649,7 @@
                 defaultencoding = 'utf-8'
         space = DummySpaceUnicode()
         err = ArgErrUnknownKwds(space, 1, ['a', None, 'b', 'c'],
-                                [True, False, True, True],
+                                [0, 3, 2],
                                 [unichr(0x1234), u'b', u'c'])
         s = err.getmsg()
         assert s == "got an unexpected keyword argument '\xe1\x88\xb4'"
diff --git a/pypy/interpreter/test/test_function.py b/pypy/interpreter/test/test_function.py
--- a/pypy/interpreter/test/test_function.py
+++ b/pypy/interpreter/test/test_function.py
@@ -16,6 +16,7 @@
         assert f.func_defaults == None
         assert f.func_dict == {}
         assert type(f.func_globals) == dict
+        assert f.func_globals is f.__globals__
         assert f.func_closure is None
         assert f.func_doc == None
         assert f.func_name == 'f'
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -37,7 +37,7 @@
         assert __total_ordering__ in (None, 'auto'), "Unknown value for __total_ordering"
         if __total_ordering__ == 'auto':
             self.auto_total_ordering()
-    
+
     def add_entries(self, **rawdict):
         # xxx fix the names of the methods to match what app-level expects
         for key, value in rawdict.items():
@@ -228,7 +228,7 @@
 
     def add(Proto):
         for key, value in Proto.__dict__.items():
-            if (not key.startswith('__') and not key.startswith('_mixin_') 
+            if (not key.startswith('__') and not key.startswith('_mixin_')
                     or key == '__del__'):
                 if hasattr(value, "func_name"):
                     value = func_with_new_name(value, value.func_name)
@@ -315,10 +315,10 @@
         class Proto(object):
             def getdict(self, space):
                 return self.w__dict__
-            
+
             def setdict(self, space, w_dict):
                 self.w__dict__ = check_new_dictionary(space, w_dict)
-            
+
             def user_setup(self, space, w_subtype):
                 self.w__dict__ = space.newdict(
                     instance=True)
@@ -383,7 +383,7 @@
             return %(name)s(%(args)s, %(extra)s)
         """
         miniglobals[cls_name] = cls
-    
+
     name = func.__name__
     extra = ', '.join(extraargs)
     from pypy.interpreter import pycode
@@ -503,7 +503,7 @@
                 space, '__delattr__',
                 self.reqcls, Arguments(space, [w_obj,
                                                space.wrap(self.name)]))
-    
+
     def descr_get_objclass(space, property):
         return property.objclass_getter(space)
 
@@ -521,7 +521,7 @@
             return space.w_None
         else:
             return w_value
-    
+
     return GetSetProperty(fget, cls=cls, doc=doc)
 
 GetSetProperty.typedef = TypeDef(
@@ -543,7 +543,7 @@
         self.index = index
         self.name = name
         self.w_cls = w_cls
-    
+
     def typecheck(self, space, w_obj):
         if not space.is_true(space.isinstance(w_obj, self.w_cls)):
             raise operationerrfmt(space.w_TypeError,
@@ -552,7 +552,7 @@
                                   self.name,
                                   self.w_cls.name,
                                   space.type(w_obj).getname(space))
-    
+
     def descr_member_get(self, space, w_obj, w_w_cls=None):
         """member.__get__(obj[, type]) -> value
         Read the slot 'member' of the given 'obj'."""
@@ -565,13 +565,13 @@
                 raise OperationError(space.w_AttributeError,
                                      space.wrap(self.name)) # XXX better message
             return w_result
-    
+
     def descr_member_set(self, space, w_obj, w_value):
         """member.__set__(obj, value)
         Write into the slot 'member' of the given 'obj'."""
         self.typecheck(space, w_obj)
         w_obj.setslotvalue(self.index, w_value)
-    
+
     def descr_member_del(self, space, w_obj):
         """member.__delete__(obj)
         Delete the value of the slot 'member' from the given 'obj'."""
@@ -803,15 +803,16 @@
     func_dict = getset_func_dict,
     func_defaults = getset_func_defaults,
     func_globals = interp_attrproperty_w('w_func_globals', cls=Function),
-    func_closure = GetSetProperty( Function.fget_func_closure ),
+    func_closure = GetSetProperty(Function.fget_func_closure),
     __code__ = getset_func_code,
     __doc__ = getset_func_doc,
     __name__ = getset_func_name,
     __dict__ = getset_func_dict,
     __defaults__ = getset_func_defaults,
+    __globals__ = interp_attrproperty_w('w_func_globals', cls=Function),
     __module__ = getset___module__,
     __weakref__ = make_weakref_descr(Function),
-    )
+)
 Function.typedef.acceptable_as_base_class = False
 
 Method.typedef = TypeDef(
diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
--- a/pypy/jit/backend/arm/assembler.py
+++ b/pypy/jit/backend/arm/assembler.py
@@ -70,7 +70,9 @@
         self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
 
     def set_debug(self, v):
+        r = self._debug
         self._debug = v
+        return r
 
     def _compute_stack_size(self):
         self.STACK_FIXED_AREA = len(r.callee_saved_registers) * WORD
@@ -124,9 +126,13 @@
         self._leave_jitted_hook_save_exc = \
                                     self._gen_leave_jitted_hook_code(True)
         self._leave_jitted_hook = self._gen_leave_jitted_hook_code(False)
-        debug_start('jit-backend-counts')
-        self.set_debug(have_debug_prints())
-        debug_stop('jit-backend-counts')
+        if not self._debug:
+            # if self._debug is already set it means that someone called
+            # set_debug by hand before initializing the assembler. Leave it
+            # as it is
+            debug_start('jit-backend-counts')
+            self.set_debug(have_debug_prints())
+            debug_stop('jit-backend-counts')
 
     def finish_once(self):
         if self._debug:
@@ -326,7 +332,7 @@
                                     imm=descr.jit_wb_if_flag_byteofs)
             mc.TST_ri(r.ip.value, imm=0x80)
         #
-	mc.MOV_rr(r.pc.value, r.lr.value)
+        mc.MOV_rr(r.pc.value, r.lr.value)
         #
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.wb_slowpath[withcards + 2 * withfloats] = rawstart
diff --git a/pypy/jit/backend/arm/detect.py b/pypy/jit/backend/arm/detect.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/detect.py
@@ -0,0 +1,31 @@
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rpython.tool import rffi_platform
+from pypy.translator.platform import CompilationError
+
+eci = ExternalCompilationInfo(
+    post_include_bits=["""
+// we need to disable optimizations so the compiler does not remove this
+// function when checking if the file compiles
+static void __attribute__((optimize("O0"))) pypy__arm_has_vfp()
+{
+    asm volatile("VMOV s0, s1");
+}
+    """])
+
+def detect_hardfloat():
+    # http://gcc.gnu.org/ml/gcc-patches/2010-10/msg02419.html
+    if rffi_platform.getdefined('__ARM_PCS_VFP', ''):
+       return rffi_platform.getconstantinteger('__ARM_PCS_VFP', '')
+    return False
+
+def detect_float():
+    """Check for hardware float support
+    we try to compile a function containing a VFP instruction, and if the
+    compiler accepts it we assume we are fine
+    """
+    try:
+        rffi_platform.verify_eci(eci)
+        return True
+    except CompilationError:
+        return False
diff --git a/pypy/jit/backend/arm/opassembler.py b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -16,7 +16,6 @@
                                                 gen_emit_unary_float_op,
                                                 saved_registers,
                                                 count_reg_args)
-from pypy.jit.backend.arm.helper.regalloc import check_imm_arg
 from pypy.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
 from pypy.jit.backend.arm.jump import remap_frame_layout
 from pypy.jit.backend.arm.regalloc import TempInt, TempPtr
@@ -28,7 +27,7 @@
 from pypy.jit.metainterp.history import JitCellToken, TargetToken
 from pypy.jit.metainterp.resoperation import rop
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rpython.lltypesystem import lltype, rffi, rstr
+from pypy.rpython.lltypesystem import rstr
 
 NO_FORCE_INDEX = -1
 
@@ -50,7 +49,7 @@
 
     def emit_op_int_add(self, op, arglocs, regalloc, fcond):
         return self.int_add_impl(op, arglocs, regalloc, fcond)
- 
+
     def int_add_impl(self, op, arglocs, regalloc, fcond, flags=False):
         l0, l1, res = arglocs
         if flags:
@@ -94,6 +93,13 @@
         self.mc.MUL(res.value, reg1.value, reg2.value)
         return fcond
 
+    def emit_op_int_force_ge_zero(self, op, arglocs, regalloc, fcond):
+        arg, res = arglocs
+        self.mc.CMP_ri(arg.value, 0)
+        self.mc.MOV_ri(res.value, 0, cond=c.LT)
+        self.mc.MOV_rr(res.value, arg.value, cond=c.GE)
+        return fcond
+
     #ref: http://blogs.arm.com/software-enablement/detecting-overflow-from-mul/
     def emit_guard_int_mul_ovf(self, op, guard, arglocs, regalloc, fcond):
         reg1 = arglocs[0]
@@ -166,7 +172,6 @@
     emit_op_int_add_ovf = emit_op_int_add
     emit_op_int_sub_ovf = emit_op_int_sub
 
-
     emit_op_int_is_true = gen_emit_op_unary_cmp('int_is_true', c.NE)
     emit_op_int_is_zero = gen_emit_op_unary_cmp('int_is_zero', c.EQ)
 
@@ -184,7 +189,6 @@
         self.mc.RSB_ri(resloc.value, l0.value, imm=0)
         return fcond
 
-
     def _emit_guard(self, op, arglocs, fcond, save_exc,
                                     is_guard_not_invalidated=False):
         assert isinstance(save_exc, bool)
@@ -287,7 +291,6 @@
         return self._emit_guard(op, locs, fcond, save_exc=False,
                                             is_guard_not_invalidated=True)
 
-
     def emit_op_jump(self, op, arglocs, regalloc, fcond):
         # The backend's logic assumes that the target code is in a piece of
         # assembler that was also called with the same number of arguments,
@@ -355,7 +358,8 @@
         self.gen_func_epilog()
         return fcond
 
-    def emit_op_call(self, op, arglocs, regalloc, fcond, force_index=NO_FORCE_INDEX):
+    def emit_op_call(self, op, arglocs, regalloc, fcond,
+                                        force_index=NO_FORCE_INDEX):
         if force_index == NO_FORCE_INDEX:
             force_index = self.write_new_force_index()
         resloc = arglocs[0]
@@ -364,16 +368,18 @@
         descr = op.getdescr()
         size = descr.get_result_size()
         signed = descr.is_result_signed()
-        cond = self._emit_call(force_index, adr, arglist, 
+        cond = self._emit_call(force_index, adr, arglist,
                                             fcond, resloc, (size, signed))
         return cond
 
-    def _emit_call(self, force_index, adr, arglocs, fcond=c.AL, 
-                                                 resloc=None, result_info=(-1,-1)):
+    def _emit_call(self, force_index, adr, arglocs, fcond=c.AL,
+                                         resloc=None, result_info=(-1, -1)):
         if self.cpu.use_hf_abi:
-            stack_args, adr = self._setup_call_hf(force_index, adr, arglocs, fcond, resloc, result_info)
+            stack_args, adr = self._setup_call_hf(force_index, adr,
+                                        arglocs, fcond, resloc, result_info)
         else:
-            stack_args, adr = self._setup_call_sf(force_index, adr, arglocs, fcond, resloc, result_info)
+            stack_args, adr = self._setup_call_sf(force_index, adr,
+                                        arglocs, fcond, resloc, result_info)
 
         #the actual call
         #self.mc.BKPT()
@@ -409,7 +415,7 @@
                 else:
                     n += DOUBLE_WORD
             self._adjust_sp(-n, fcond=fcond)
-            assert n % 8 == 0 # sanity check
+            assert n % 8 == 0  # sanity check
 
     def _collect_stack_args_sf(self, arglocs):
         n_args = len(arglocs)
@@ -441,9 +447,8 @@
                 else:
                     self.regalloc_push(arg)
 
-    def _setup_call_sf(self, force_index, adr, arglocs, fcond=c.AL, 
-                                                 resloc=None, result_info=(-1,-1)):
-        n_args = len(arglocs)
+    def _setup_call_sf(self, force_index, adr, arglocs, fcond=c.AL,
+                                         resloc=None, result_info=(-1, -1)):
         reg_args = count_reg_args(arglocs)
         stack_args = self._collect_stack_args_sf(arglocs)
         self._push_stack_args(stack_args)
@@ -487,10 +492,8 @@
             self.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
         return stack_args, adr
 
-
-    def _setup_call_hf(self, force_index, adr, arglocs, fcond=c.AL, 
-                                                 resloc=None, result_info=(-1,-1)):
-        n_reg_args = n_vfp_args = 0
+    def _setup_call_hf(self, force_index, adr, arglocs, fcond=c.AL,
+                                         resloc=None, result_info=(-1, -1)):
         non_float_locs = []
         non_float_regs = []
         float_locs = []
@@ -500,24 +503,24 @@
         for arg in arglocs:
             if arg.type != FLOAT:
                 if len(non_float_regs) < len(r.argument_regs):
-		    reg = r.argument_regs[len(non_float_regs)]
+                    reg = r.argument_regs[len(non_float_regs)]
                     non_float_locs.append(arg)
                     non_float_regs.append(reg)
-                else: # non-float argument that needs to go on the stack 
+                else:  # non-float argument that needs to go on the stack
                     count += 1
                     stack_args.append(arg)
             else:
-                if len(float_regs) < len(r.vfp_argument_regs): 
-		    reg = r.vfp_argument_regs[len(float_regs)]
+                if len(float_regs) < len(r.vfp_argument_regs):
+                    reg = r.vfp_argument_regs[len(float_regs)]
                     float_locs.append(arg)
                     float_regs.append(reg)
-                else: # float argument that needs to go on the stack
+                else:  # float argument that needs to go on the stack
                     if count % 2 != 0:
                         stack_args.append(None)
-			count = 0
+                        count = 0
                     stack_args.append(arg)
         # align the stack
-	if count % 2 != 0:
+        if count % 2 != 0:
             stack_args.append(None)
         self._push_stack_args(stack_args)
         # Check that the address of the function we want to call is not
@@ -608,7 +611,7 @@
             # GCFLAG_CARDS_SET is in this byte at 0x80
             self.mc.TST_ri(r.ip.value, imm=0x80)
 
-            js_location = self.mc.currpos() # 
+            js_location = self.mc.currpos()
             self.mc.BKPT()
         else:
             js_location = 0
@@ -628,56 +631,56 @@
         #
         if loc_base is not r.r0:
             # push two registers to keep stack aligned
-	    self.mc.PUSH([r.r0.value, loc_base.value])
+            self.mc.PUSH([r.r0.value, loc_base.value])
             remap_frame_layout(self, [loc_base], [r.r0], r.ip)
         self.mc.BL(self.wb_slowpath[helper_num])
         if loc_base is not r.r0:
-	    self.mc.POP([r.r0.value, loc_base.value])
+            self.mc.POP([r.r0.value, loc_base.value])
 
         if card_marking:
-	    # The helper ends again with a check of the flag in the object.  So
-	    # here, we can simply write again a conditional jump, which will be
-	    # taken if GCFLAG_CARDS_SET is still not set.
+            # The helper ends again with a check of the flag in the object.  So
+            # here, we can simply write again a conditional jump, which will be
+            # taken if GCFLAG_CARDS_SET is still not set.
             jns_location = self.mc.currpos()
             self.mc.BKPT()
             #
             # patch the JS above
             offset = self.mc.currpos()
             pmc = OverwritingBuilder(self.mc, js_location, WORD)
-	    pmc.B_offs(offset, c.NE) # We want to jump if the z flag is not set
+            pmc.B_offs(offset, c.NE)  # We want to jump if the z flag isn't set
             #
             # case GCFLAG_CARDS_SET: emit a few instructions to do
             # directly the card flag setting
             loc_index = arglocs[1]
             assert loc_index.is_reg()
-	    # must save the register loc_index before it is mutated
-	    self.mc.PUSH([loc_index.value])
-	    tmp1 = loc_index
-	    tmp2 = arglocs[2] 
-	    # lr = byteofs
-	    s = 3 + descr.jit_wb_card_page_shift
-	    self.mc.MVN_rr(r.lr.value, loc_index.value,
-	    		imm=s, shifttype=shift.LSR)
-	    
-	    # tmp1 = byte_index
-	    self.mc.MOV_ri(r.ip.value, imm=7)
-	    self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
-	        imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
-	    
-	    # set the bit
-	    self.mc.MOV_ri(tmp2.value, imm=1)
-	    self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
-	    self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
-	    			tmp1.value, shifttype=shift.LSL)
-	    self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
-	    # done
-	    self.mc.POP([loc_index.value])
-	    #
+            # must save the register loc_index before it is mutated
+            self.mc.PUSH([loc_index.value])
+            tmp1 = loc_index
+            tmp2 = arglocs[2]
+            # lr = byteofs
+            s = 3 + descr.jit_wb_card_page_shift
+            self.mc.MVN_rr(r.lr.value, loc_index.value,
+                                       imm=s, shifttype=shift.LSR)
+
+            # tmp1 = byte_index
+            self.mc.MOV_ri(r.ip.value, imm=7)
+            self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
+            imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+
+            # set the bit
+            self.mc.MOV_ri(tmp2.value, imm=1)
+            self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
+            self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
+                                          tmp1.value, shifttype=shift.LSL)
+            self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
+            # done
+            self.mc.POP([loc_index.value])
+            #
             #
             # patch the JNS above
             offset = self.mc.currpos()
             pmc = OverwritingBuilder(self.mc, jns_location, WORD)
-	    pmc.B_offs(offset, c.EQ) # We want to jump if the z flag is set
+            pmc.B_offs(offset, c.EQ)  # We want to jump if the z flag is set
 
         offset = self.mc.currpos()
         pmc = OverwritingBuilder(self.mc, jz_location, WORD)
@@ -686,7 +689,6 @@
 
     emit_op_cond_call_gc_wb_array = emit_op_cond_call_gc_wb
 
-
     def emit_op_setfield_gc(self, op, arglocs, regalloc, fcond):
         value_loc, base_loc, ofs, size = arglocs
         if size.value == 8:
@@ -809,7 +811,6 @@
             assert 0
 
         return fcond
-    emit_op_getinteriorfield_raw = emit_op_getinteriorfield_gc
 
     def emit_op_setinteriorfield_gc(self, op, arglocs, regalloc, fcond):
         (base_loc, index_loc, value_loc,
@@ -839,7 +840,6 @@
         return fcond
     emit_op_setinteriorfield_raw = emit_op_setinteriorfield_gc
 
-
     def emit_op_arraylen_gc(self, op, arglocs, regalloc, fcond):
         res, base_loc, ofs = arglocs
         self.mc.LDR_ri(res.value, base_loc.value, ofs.value)
@@ -849,81 +849,97 @@
         value_loc, base_loc, ofs_loc, scale, ofs = arglocs
         assert ofs_loc.is_reg()
         if scale.value > 0:
-            scale_loc = r.ip
             self.mc.LSL_ri(r.ip.value, ofs_loc.value, scale.value)
-        else:
-            scale_loc = ofs_loc
+            ofs_loc = r.ip
 
         # add the base offset
         if ofs.value > 0:
-            self.mc.ADD_ri(r.ip.value, scale_loc.value, imm=ofs.value)
-            scale_loc = r.ip
+            self.mc.ADD_ri(r.ip.value, ofs_loc.value, imm=ofs.value)
+            ofs_loc = r.ip
+        self._write_to_mem(value_loc, base_loc, ofs_loc, scale, fcond)
+        return fcond
 
+    def _write_to_mem(self, value_loc, base_loc, ofs_loc, scale, fcond=c.AL):
         if scale.value == 3:
             assert value_loc.is_vfp_reg()
-            assert scale_loc.is_reg()
-            self.mc.ADD_rr(r.ip.value, base_loc.value, scale_loc.value)
+            assert ofs_loc.is_reg()
+            self.mc.ADD_rr(r.ip.value, base_loc.value, ofs_loc.value)
             self.mc.VSTR(value_loc.value, r.ip.value, cond=fcond)
         elif scale.value == 2:
-            self.mc.STR_rr(value_loc.value, base_loc.value, scale_loc.value,
+            self.mc.STR_rr(value_loc.value, base_loc.value, ofs_loc.value,
                                                                     cond=fcond)
         elif scale.value == 1:
-            self.mc.STRH_rr(value_loc.value, base_loc.value, scale_loc.value,
+            self.mc.STRH_rr(value_loc.value, base_loc.value, ofs_loc.value,
                                                                     cond=fcond)
         elif scale.value == 0:
-            self.mc.STRB_rr(value_loc.value, base_loc.value, scale_loc.value,
+            self.mc.STRB_rr(value_loc.value, base_loc.value, ofs_loc.value,
                                                                     cond=fcond)
         else:
             assert 0
-        return fcond
 
     emit_op_setarrayitem_raw = emit_op_setarrayitem_gc
 
+    def emit_op_raw_store(self, op, arglocs, regalloc, fcond):
+        value_loc, base_loc, ofs_loc, scale, ofs = arglocs
+        assert ofs_loc.is_reg()
+        self._write_to_mem(value_loc, base_loc, ofs_loc, scale, fcond)
+        return fcond
+
     def emit_op_getarrayitem_gc(self, op, arglocs, regalloc, fcond):
-        res, base_loc, ofs_loc, scale, ofs = arglocs
+        res_loc, base_loc, ofs_loc, scale, ofs = arglocs
         assert ofs_loc.is_reg()
         signed = op.getdescr().is_item_signed()
+
+        # scale the offset as required
         if scale.value > 0:
-            scale_loc = r.ip
             self.mc.LSL_ri(r.ip.value, ofs_loc.value, scale.value)
-        else:
-            scale_loc = ofs_loc
-
+            ofs_loc = r.ip
         # add the base offset
         if ofs.value > 0:
-            self.mc.ADD_ri(r.ip.value, scale_loc.value, imm=ofs.value)
-            scale_loc = r.ip
+            self.mc.ADD_ri(r.ip.value, ofs_loc.value, imm=ofs.value)
+            ofs_loc = r.ip
+        #
+        self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed)
+        return fcond
 
+    def _load_from_mem(self, res_loc, base_loc, ofs_loc, scale,
+                                            signed=False, fcond=c.AL):
         if scale.value == 3:
-            assert res.is_vfp_reg()
-            assert scale_loc.is_reg()
-            self.mc.ADD_rr(r.ip.value, base_loc.value, scale_loc.value)
-            self.mc.VLDR(res.value, r.ip.value, cond=fcond)
+            assert res_loc.is_vfp_reg()
+            assert ofs_loc.is_reg()
+            self.mc.ADD_rr(r.ip.value, base_loc.value, ofs_loc.value)
+            self.mc.VLDR(res_loc.value, r.ip.value, cond=fcond)
         elif scale.value == 2:
-            self.mc.LDR_rr(res.value, base_loc.value,
-                                 scale_loc.value, cond=fcond)
+            self.mc.LDR_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
         elif scale.value == 1:
             if signed:
-                self.mc.LDRSH_rr(res.value, base_loc.value,
-                                 scale_loc.value, cond=fcond)
+                self.mc.LDRSH_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
             else:
-                self.mc.LDRH_rr(res.value, base_loc.value,
-                                 scale_loc.value, cond=fcond)
+                self.mc.LDRH_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
         elif scale.value == 0:
             if signed:
-                self.mc.LDRSB_rr(res.value, base_loc.value,
-                                 scale_loc.value, cond=fcond)
+                self.mc.LDRSB_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
             else:
-                self.mc.LDRB_rr(res.value, base_loc.value,
-                                 scale_loc.value, cond=fcond)
+                self.mc.LDRB_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
         else:
             assert 0
 
-        return fcond
-
     emit_op_getarrayitem_raw = emit_op_getarrayitem_gc
     emit_op_getarrayitem_gc_pure = emit_op_getarrayitem_gc
 
+    def emit_op_raw_load(self, op, arglocs, regalloc, fcond):
+        res_loc, base_loc, ofs_loc, scale, ofs = arglocs
+        assert ofs_loc.is_reg()
+        # no base offset
+        assert ofs.value == 0
+        signed = op.getdescr().is_item_signed()
+        self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed)
+        return fcond
 
     def emit_op_strlen(self, op, arglocs, regalloc, fcond):
         l0, l1, res = arglocs
@@ -1010,7 +1026,8 @@
         # need the box here
         if isinstance(args[4], Box):
             length_box = args[4]
-            length_loc = regalloc._ensure_value_is_boxed(args[4], forbidden_vars)
+            length_loc = regalloc._ensure_value_is_boxed(args[4],
+                                                        forbidden_vars)
         else:
             length_box = TempInt()
             length_loc = regalloc.force_allocate_reg(length_box,
@@ -1072,7 +1089,6 @@
         else:
             raise AssertionError("bad unicode item size")
 
-
     emit_op_unicodelen = emit_op_strlen
 
     def emit_op_unicodegetitem(self, op, arglocs, regalloc, fcond):
@@ -1102,7 +1118,6 @@
 
         return fcond
 
-
     def emit_op_force_token(self, op, arglocs, regalloc, fcond):
         res_loc = arglocs[0]
         self.mc.MOV_rr(res_loc.value, r.fp.value)
@@ -1188,11 +1203,20 @@
             floats = r.caller_vfp_resp
         else:
             floats = []
-        with saved_registers(self.mc, r.caller_resp[1:] + [r.ip], floats):
+        # in case the call has a result we do not need to save the
+        # corresponding result register because it was already allocated for
+        # the result
+        core = r.caller_resp
+        if op.result:
+            if resloc.is_vfp_reg():
+                floats = r.caller_vfp_resp[1:]
+            else:
+                core = r.caller_resp[1:] + [r.ip]  # keep alignment
+        with saved_registers(self.mc, core, floats):
             # result of previous call is in r0
             self.mov_loc_loc(arglocs[0], r.r1)
             self.mc.BL(asm_helper_adr)
-            if op.result and resloc.is_vfp_reg():
+            if not self.cpu.use_hf_abi and op.result and resloc.is_vfp_reg():
                 # move result to the allocated register
                 self.mov_to_vfp_loc(r.r0, r.r1, resloc)
 
@@ -1237,7 +1261,7 @@
         size = descr.get_result_size()
         signed = descr.is_result_signed()
         #
-        self._emit_call(fail_index, adr, callargs, fcond, 
+        self._emit_call(fail_index, adr, callargs, fcond,
                                     resloc, (size, signed))
 
         self.mc.LDR_ri(r.ip.value, r.fp.value)
@@ -1266,7 +1290,7 @@
         size = descr.get_result_size()
         signed = descr.is_result_signed()
         #
-        self._emit_call(fail_index, adr, callargs, fcond, 
+        self._emit_call(fail_index, adr, callargs, fcond,
                                     resloc, (size, signed))
         # then reopen the stack
         if gcrootmap:
@@ -1288,7 +1312,8 @@
                 regs_to_save.append(reg)
         assert gcrootmap.is_shadow_stack
         with saved_registers(self.mc, regs_to_save):
-            self._emit_call(NO_FORCE_INDEX, imm(self.releasegil_addr), [], fcond)
+            self._emit_call(NO_FORCE_INDEX,
+                                imm(self.releasegil_addr), [], fcond)
 
     def call_reacquire_gil(self, gcrootmap, save_loc, fcond):
         # save the previous result into the stack temporarily.
@@ -1325,7 +1350,6 @@
         self.mc.gen_load_int(r.ip.value, fail_index)
         self.mc.STR_ri(r.ip.value, r.fp.value)
 
-
     def emit_op_call_malloc_gc(self, op, arglocs, regalloc, fcond):
         self.emit_op_call(op, arglocs, regalloc, fcond)
         self.propagate_memoryerror_if_r0_is_null()
@@ -1355,7 +1379,6 @@
         self.mc.BKPT()
         self.mc.NOP()
 
-
     emit_op_float_add = gen_emit_float_op('float_add', 'VADD')
     emit_op_float_sub = gen_emit_float_op('float_sub', 'VSUB')
     emit_op_float_mul = gen_emit_float_op('float_mul', 'VMUL')
@@ -1410,11 +1433,13 @@
         self.mc.VMOV_rc(res.value, r.ip.value, loc.value)
         return fcond
 
-    emit_op_convert_float_bytes_to_longlong = gen_emit_unary_float_op('float_bytes_to_longlong', 'VMOV_cc')
-    emit_op_convert_longlong_bytes_to_float = gen_emit_unary_float_op('longlong_bytes_to_float', 'VMOV_cc')
+    emit_op_convert_float_bytes_to_longlong = gen_emit_unary_float_op(
+                                    'float_bytes_to_longlong', 'VMOV_cc')
+    emit_op_convert_longlong_bytes_to_float = gen_emit_unary_float_op(
+                                    'longlong_bytes_to_float', 'VMOV_cc')
 
     def emit_op_read_timestamp(self, op, arglocs, regalloc, fcond):
-	assert 0, 'not supported'
+        assert 0, 'not supported'
         tmp = arglocs[0]
         res = arglocs[1]
         self.mc.MRC(15, 0, tmp.value, 15, 12, 1)
diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
--- a/pypy/jit/backend/arm/regalloc.py
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -104,8 +104,8 @@
         which is in variable v.
         """
         self._check_type(v)
-        r = self.force_allocate_reg(v)
-        return r
+        reg = self.force_allocate_reg(v, selected_reg=r.d0)
+        return reg
 
     def ensure_value_is_boxed(self, thing, forbidden_vars=[]):
         loc = None
@@ -309,11 +309,16 @@
         # The first inputargs are passed in registers r0-r3
         # we relly on the soft-float calling convention so we need to move
         # float params to the coprocessor.
+        if self.cpu.use_hf_abi:
+            self._set_initial_bindings_hf(inputargs)
+        else:
+            self._set_initial_bindings_sf(inputargs)
+
+    def _set_initial_bindings_sf(self, inputargs):
 
         arg_index = 0
         count = 0
         n_register_args = len(r.argument_regs)
-        cur_frame_pos = - (self.assembler.STACK_FIXED_AREA / WORD) + 1
         cur_frame_pos = 1 - (self.assembler.STACK_FIXED_AREA // WORD)
         for box in inputargs:
             assert isinstance(box, Box)
@@ -328,7 +333,7 @@
                     vfpreg = self.try_allocate_reg(box)
                     # move soft-float argument to vfp
                     self.assembler.mov_to_vfp_loc(loc, loc2, vfpreg)
-                    arg_index += 2  # this argument used to argument registers
+                    arg_index += 2  # this argument used two argument registers
                 else:
                     loc = r.argument_regs[arg_index]
                     self.try_allocate_reg(box, selected_reg=loc)
@@ -346,6 +351,37 @@
                 loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
                 self.frame_manager.set_binding(box, loc)
 
+    def _set_initial_bindings_hf(self, inputargs):
+
+        arg_index = vfp_arg_index = 0
+        count = 0
+        n_reg_args = len(r.argument_regs)
+        n_vfp_reg_args = len(r.vfp_argument_regs)
+        cur_frame_pos = 1 - (self.assembler.STACK_FIXED_AREA // WORD)
+        for box in inputargs:
+            assert isinstance(box, Box)
+            # handle inputargs in argument registers
+            if box.type != FLOAT and arg_index < n_reg_args:
+                reg = r.argument_regs[arg_index]
+                self.try_allocate_reg(box, selected_reg=reg)
+                arg_index += 1
+            elif box.type == FLOAT and vfp_arg_index < n_vfp_reg_args:
+                reg = r.vfp_argument_regs[vfp_arg_index]
+                self.try_allocate_reg(box, selected_reg=reg)
+                vfp_arg_index += 1
+            else:
+                # treat stack args as stack locations with a negative offset
+                if box.type == FLOAT:
+                    cur_frame_pos -= 2
+                    if count % 2 != 0: # Stack argument alignment
+                        cur_frame_pos -= 1
+                        count = 0
+                else:
+                    cur_frame_pos -= 1
+                    count += 1
+                loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
+                self.frame_manager.set_binding(box, loc)
+
     def _update_bindings(self, locs, inputargs):
         used = {}
         i = 0
@@ -459,6 +495,11 @@
         res = self.force_allocate_reg(op.result)
         self.possibly_free_var(op.result)
         return [reg1, reg2, res]
+    
+    def prepare_op_int_force_ge_zero(self, op, fcond):
+        argloc = self._ensure_value_is_boxed(op.getarg(0))
+        resloc = self.force_allocate_reg(op.result, [op.getarg(0)])
+        return [argloc, resloc]
 
     def prepare_guard_int_mul_ovf(self, op, guard, fcond):
         boxes = op.getarglist()
@@ -843,7 +884,6 @@
         result_loc = self.force_allocate_reg(op.result)
         return [base_loc, index_loc, result_loc, ofs_loc, imm(ofs),
                                     imm(itemsize), imm(fieldsize)]
-    prepare_op_getinteriorfield_raw = prepare_op_getinteriorfield_gc
 
     def prepare_op_setinteriorfield_gc(self, op, fcond):
         t = unpack_interiorfielddescr(op.getdescr())
@@ -883,6 +923,7 @@
         assert check_imm_arg(ofs)
         return [value_loc, base_loc, ofs_loc, imm(scale), imm(ofs)]
     prepare_op_setarrayitem_raw = prepare_op_setarrayitem_gc
+    prepare_op_raw_store = prepare_op_setarrayitem_gc
 
     def prepare_op_getarrayitem_gc(self, op, fcond):
         boxes = op.getarglist()
@@ -897,7 +938,9 @@
         return [res, base_loc, ofs_loc, imm(scale), imm(ofs)]
 
     prepare_op_getarrayitem_raw = prepare_op_getarrayitem_gc
+    prepare_op_getarrayitem_raw_pure = prepare_op_getarrayitem_gc
     prepare_op_getarrayitem_gc_pure = prepare_op_getarrayitem_gc
+    prepare_op_raw_load = prepare_op_getarrayitem_gc
 
     def prepare_op_strlen(self, op, fcond):
         args = op.getarglist()
@@ -1045,27 +1088,15 @@
 
     def prepare_op_cond_call_gc_wb(self, op, fcond):
         assert op.result is None
-        N = op.numargs()
         # we force all arguments in a reg because it will be needed anyway by
         # the following setfield_gc or setarrayitem_gc. It avoids loading it
         # twice from the memory.
-        arglocs = []
+        N = op.numargs()
         args = op.getarglist()
-        for i in range(N):
-            loc = self._ensure_value_is_boxed(op.getarg(i), args)
-            arglocs.append(loc)
-        card_marking = False
-        if op.getopnum() == rop.COND_CALL_GC_WB_ARRAY:
-            descr = op.getdescr()
-            if we_are_translated():
-                cls = self.cpu.gc_ll_descr.has_write_barrier_class()
-                assert cls is not None and isinstance(descr, cls)
-            card_marking = descr.jit_wb_cards_set != 0
-        if card_marking:  # allocate scratch registers
-            tmp1 = self.get_scratch_reg(INT)
-            tmp2 = self.get_scratch_reg(INT)
-            arglocs.append(tmp1)
-            arglocs.append(tmp2)
+        arglocs = [self._ensure_value_is_boxed(op.getarg(i), args)
+                                                              for i in range(N)]
+        tmp = self.get_scratch_reg(INT)
+        arglocs.append(tmp)
         return arglocs
 
     prepare_op_cond_call_gc_wb_array = prepare_op_cond_call_gc_wb
diff --git a/pypy/jit/backend/arm/registers.py b/pypy/jit/backend/arm/registers.py
--- a/pypy/jit/backend/arm/registers.py
+++ b/pypy/jit/backend/arm/registers.py
@@ -26,7 +26,7 @@
 callee_saved_registers = callee_resp + [lr]
 callee_restored_registers = callee_resp + [pc]
 
-caller_vfp_resp = [d0, d1, d2, d3, d4, d5, d6, d7]
+vfp_argument_regs = caller_vfp_resp = [d0, d1, d2, d3, d4, d5, d6, d7]
 callee_vfp_resp = [d8, d9, d10, d11, d12, d13, d14, d15]
 
 callee_saved_vfp_registers = callee_vfp_resp
diff --git a/pypy/jit/backend/arm/runner.py b/pypy/jit/backend/arm/runner.py
old mode 100644
new mode 100755
--- a/pypy/jit/backend/arm/runner.py
+++ b/pypy/jit/backend/arm/runner.py
@@ -3,14 +3,17 @@
 from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
 from pypy.rpython.llinterp import LLInterpreter
 from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.rlib.jit_hooks import LOOP_RUN_CONTAINER
 from pypy.jit.backend.arm.arch import FORCE_INDEX_OFS
 
 
-class ArmCPU(AbstractLLCPU):
+class AbstractARMCPU(AbstractLLCPU):
 
     supports_floats = True
     supports_longlong = False # XXX requires an implementation of
                               # read_timestamp that works in user mode
+    
+    use_hf_abi = False        # use hard float abi flag
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
@@ -19,6 +22,9 @@
         AbstractLLCPU.__init__(self, rtyper, stats, opts,
                                translate_support_code, gcdescr)
 
+    def set_debug(self, flag):
+        return self.assembler.set_debug(flag)
+
     def setup(self):
         if self.opts is not None:
             failargs_limit = self.opts.failargs_limit
@@ -139,3 +145,23 @@
             mc.copy_to_raw_memory(jmp)
         # positions invalidated
         looptoken.compiled_loop_token.invalidate_positions = []
+
+    # should be combined with other ll backends
+    def get_all_loop_runs(self):
+        l = lltype.malloc(LOOP_RUN_CONTAINER,
+                          len(self.assembler.loop_run_counters))
+        for i, ll_s in enumerate(self.assembler.loop_run_counters):
+            l[i].type = ll_s.type
+            l[i].number = ll_s.number
+            l[i].counter = ll_s.i
+        return l
+
+class CPU_ARM(AbstractARMCPU):
+    """ARM v7 uses softfp ABI, requires vfp"""
+    pass
+ArmCPU = CPU_ARM
+
+class CPU_ARMHF(AbstractARMCPU):
+    """ARM v7 uses hardfp ABI, requires vfp"""
+    use_hf_abi = True
+    supports_floats = False
diff --git a/pypy/jit/backend/arm/test/conftest.py b/pypy/jit/backend/arm/test/conftest.py
--- a/pypy/jit/backend/arm/test/conftest.py
+++ b/pypy/jit/backend/arm/test/conftest.py
@@ -17,5 +17,5 @@
                     help="run tests that translate code")
 
 def pytest_runtest_setup(item):
-    if cpu != 'arm':
+    if cpu not in  ('arm', 'armhf'):
         py.test.skip("ARM(v7) tests skipped: cpu is %r" % (cpu,))
diff --git a/pypy/jit/backend/arm/test/test_basic.py b/pypy/jit/backend/arm/test/test_basic.py
--- a/pypy/jit/backend/arm/test/test_basic.py
+++ b/pypy/jit/backend/arm/test/test_basic.py
@@ -2,6 +2,9 @@
 from pypy.jit.metainterp.test import test_ajit
 from pypy.rlib.jit import JitDriver
 from pypy.jit.backend.arm.test.support import JitARMMixin
+from pypy.jit.backend.detect_cpu import getcpuclass
+
+CPU = getcpuclass()
 
 class TestBasic(JitARMMixin, test_ajit.BaseLLtypeTests):
     # for the individual tests see
@@ -31,5 +34,17 @@
     def test_free_object(self):
         py.test.skip("issue of freeing, probably with ll2ctypes")
 
+
+    if not CPU.supports_longlong:
+        for k in dir(test_ajit.BaseLLtypeTests):
+            if k.find('longlong') < 0:
+                continue
+            locals()[k] = lambda self: py.test.skip('requires longlong support')
+
     def test_read_timestamp(self):
         py.test.skip("The JIT on ARM does not support read_timestamp")
+
+  
+    if not CPU.supports_floats:
+        for k in ('test_float', 'test_residual_external_call'):
+            locals()[k] = lambda self: py.test.skip('requires float support')
diff --git a/pypy/jit/backend/arm/test/test_runner.py b/pypy/jit/backend/arm/test/test_runner.py
--- a/pypy/jit/backend/arm/test/test_runner.py
+++ b/pypy/jit/backend/arm/test/test_runner.py
@@ -26,7 +26,8 @@
     # for the individual tests see
     # ====> ../../test/runner_test.py
 
-    add_loop_instructions = ['mov', 'adds', 'cmp', 'beq', 'b']
+    add_loop_instructions = ['nop', # this is the same as mov r0, r0
+                             'adds', 'cmp', 'beq', 'b']
     bridge_loop_instructions = ['movw', 'movt', 'bx']
 
     def setup_method(self, meth):
diff --git a/pypy/jit/backend/arm/test/test_ztranslation.py b/pypy/jit/backend/arm/test/test_ztranslation.py
--- a/pypy/jit/backend/arm/test/test_ztranslation.py
+++ b/pypy/jit/backend/arm/test/test_ztranslation.py
@@ -3,12 +3,14 @@
 from pypy.rlib.jit import JitDriver, unroll_parameters, set_param
 from pypy.rlib.jit import PARAMETERS, dont_look_inside
 from pypy.rlib.jit import promote
+from pypy.rlib import jit_hooks
 from pypy.jit.metainterp.jitprof import Profiler
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.test.support import CCompiledMixin
 from pypy.jit.codewriter.policy import StopAtXPolicy
 from pypy.translator.translator import TranslationContext
 from pypy.config.translationoption import DEFL_GC
+from pypy.rlib import rgc
 from pypy.jit.backend.arm.test.support import skip_unless_run_slow_tests
 skip_unless_run_slow_tests()
 
@@ -173,6 +175,25 @@
         assert 1024 <= bound <= 131072
         assert bound & (bound-1) == 0       # a power of two
 
+    def test_jit_get_stats(self):
+        driver = JitDriver(greens = [], reds = ['i'])
+        
+        def f():
+            i = 0
+            while i < 100000:
+                driver.jit_merge_point(i=i)
+                i += 1
+
+        def main():
+            jit_hooks.stats_set_debug(None, True)
+            f()
+            ll_times = jit_hooks.stats_get_loop_run_times(None)
+            return len(ll_times)
+
+        res = self.meta_interp(main, [])
+        assert res == 3
+        # one for loop, one for entry point and one for the prologue
+
 class TestTranslationRemoveTypePtrARM(CCompiledMixin):
     CPUClass = getcpuclass()
 
diff --git a/pypy/jit/backend/detect_cpu.py b/pypy/jit/backend/detect_cpu.py
--- a/pypy/jit/backend/detect_cpu.py
+++ b/pypy/jit/backend/detect_cpu.py
@@ -61,8 +61,13 @@
             from pypy.jit.backend.x86.detect_sse2 import detect_sse2
             if not detect_sse2():
                 model = 'x86-without-sse2'
+    if model == 'arm':
+            from pypy.jit.backend.arm.detect import detect_hardfloat, detect_float
+            if detect_hardfloat():
+                model = 'armhf'
+            assert detect_float(), 'the JIT-compiler requires a vfp unit'
     return model
-
+    
 def getcpuclassname(backend_name="auto"):
     if backend_name == "auto":
         backend_name = autodetect()
@@ -77,7 +82,9 @@
     elif backend_name == 'llvm':
         return "pypy.jit.backend.llvm.runner", "LLVMCPU"
     elif backend_name == 'arm':
-        return "pypy.jit.backend.arm.runner", "ArmCPU"
+        return "pypy.jit.backend.arm.runner", "CPU_ARM"
+    elif backend_name == 'armhf':
+        return "pypy.jit.backend.arm.runner", "CPU_ARMHF"
     elif backend_name.startswith("ppc"):
         return "pypy.jit.backend.ppc.runner", "PPC_CPU"
     else:
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -22,7 +22,6 @@
 from pypy.jit.codewriter import longlong
 from pypy.jit.codewriter.effectinfo import EffectInfo
 
-from pypy.rlib import libffi, clibffi
 from pypy.rlib.objectmodel import ComputedIntSymbolic, we_are_translated
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint
@@ -65,7 +64,8 @@
 
 FLOAT_ARRAY_TP = lltype.Ptr(lltype.Array(lltype.Float, hints={"nolength": True}))
 def maybe_uncast(TP, array):
-    if array._TYPE.TO._hints.get("uncast_on_llgraph"):
+    if array._TYPE.TO.OF != lltype.Float:
+        # array._TYPE.TO._hints.get("uncast_on_llgraph"):
         array = rffi.cast(TP, array)
     return array
 
@@ -97,6 +97,7 @@
     'int_add_ovf'     : (('int', 'int'), 'int'),
     'int_sub_ovf'     : (('int', 'int'), 'int'),
     'int_mul_ovf'     : (('int', 'int'), 'int'),
+    'int_force_ge_zero':(('int',), 'int'),
     'uint_add'        : (('int', 'int'), 'int'),
     'uint_sub'        : (('int', 'int'), 'int'),
     'uint_mul'        : (('int', 'int'), 'int'),
@@ -803,7 +804,7 @@
         if arraydescr.typeinfo == REF:
             raise NotImplementedError("getarrayitem_raw -> gcref")
         elif arraydescr.typeinfo == INT:
-            return do_getarrayitem_raw_int(array, index)
+            return do_getarrayitem_raw_int(array, index, arraydescr.ofs)
         elif arraydescr.typeinfo == FLOAT:
             return do_getarrayitem_raw_float(array, index)
         else:
@@ -824,9 +825,7 @@
     op_getfield_gc_pure = op_getfield_gc
 
     def op_getfield_raw(self, fielddescr, struct):
-        if fielddescr.arg_types == 'dynamic': # abuse of .arg_types
-            return do_getfield_raw_dynamic(struct, fielddescr)
-        elif fielddescr.typeinfo == REF:
+        if fielddescr.typeinfo == REF:
             return do_getfield_raw_ptr(struct, fielddescr.ofs)
         elif fielddescr.typeinfo == INT:
             return do_getfield_raw_int(struct, fielddescr.ofs)
@@ -837,6 +836,26 @@
 
     op_getfield_raw_pure = op_getfield_raw
 
+    def op_raw_store(self, arraydescr, addr, offset, value):
+        if arraydescr.typeinfo == REF:
+            raise AssertionError("cannot store GC pointer in raw storage")
+        elif arraydescr.typeinfo == INT:
+            do_raw_store_int(addr, offset, arraydescr.ofs, value)
+        elif arraydescr.typeinfo == FLOAT:
+            do_raw_store_float(addr, offset, value)
+        else:
+            raise NotImplementedError
+
+    def op_raw_load(self, arraydescr, addr, offset):
+        if arraydescr.typeinfo == REF: 
+            raise AssertionError("cannot store GC pointer in raw storage")
+        elif arraydescr.typeinfo == INT:
+            return do_raw_load_int(addr, offset, arraydescr.ofs)
+        elif arraydescr.typeinfo == FLOAT:
+            return do_raw_load_float(addr, offset)
+        else:
+            raise NotImplementedError
+
     def op_new(self, size):
         return do_new(size.ofs)
 
@@ -862,7 +881,7 @@
         if arraydescr.typeinfo == REF:
             raise NotImplementedError("setarrayitem_raw <- gcref")
         elif arraydescr.typeinfo == INT:
-            do_setarrayitem_raw_int(array, index, newvalue)
+            do_setarrayitem_raw_int(array, index, newvalue, arraydescr.ofs)
         elif arraydescr.typeinfo == FLOAT:
             do_setarrayitem_raw_float(array, index, newvalue)
         else:
@@ -922,9 +941,7 @@
             raise NotImplementedError
 
     def op_setfield_raw(self, fielddescr, struct, newvalue):
-        if fielddescr.arg_types == 'dynamic': # abuse of .arg_types
-            do_setfield_raw_dynamic(struct, fielddescr, newvalue)
-        elif fielddescr.typeinfo == REF:
+        if fielddescr.typeinfo == REF:
             do_setfield_raw_ptr(struct, fielddescr.ofs, newvalue)
         elif fielddescr.typeinfo == INT:
             do_setfield_raw_int(struct, fielddescr.ofs, newvalue)
@@ -1438,9 +1455,13 @@
     array = array._obj.container
     return cast_to_int(array.getitem(index))
 
-def do_getarrayitem_raw_int(array, index):
-    array = array.adr.ptr._obj
-    return cast_to_int(array.getitem(index))
+def do_getarrayitem_raw_int(array, index, itemsize):
+    array = array.adr.ptr
+    ITEMTYPE = lltype.typeOf(array).TO.OF
+    TYPE = symbolic.Size2Type[itemsize]
+    if TYPE.OF != ITEMTYPE:
+        array = rffi.cast(lltype.Ptr(TYPE), array)
+    return cast_to_int(array._obj.getitem(index))
 
 def do_getarrayitem_gc_float(array, index):
     array = array._obj.container
@@ -1484,18 +1505,6 @@
     struct = array._obj.container.getitem(index)
     return cast_to_ptr(_getinteriorfield_gc(struct, fieldnum))
 
-def _getinteriorfield_raw(ffitype, array, index, width, ofs):
-    addr = rffi.cast(rffi.VOIDP, array)
-    return libffi.array_getitem(ffitype, width, addr, index, ofs)
-
-def do_getinteriorfield_raw_int(array, index, width, ofs):
-    res = _getinteriorfield_raw(libffi.types.slong, array, index, width, ofs)
-    return res
-
-def do_getinteriorfield_raw_float(array, index, width, ofs):
-    res = _getinteriorfield_raw(libffi.types.double, array, index, width, ofs)
-    return res
-
 def _getfield_raw(struct, fieldnum):
     STRUCT, fieldname = symbolic.TokenToField[fieldnum]
     ptr = cast_from_int(lltype.Ptr(STRUCT), struct)
@@ -1510,16 +1519,31 @@
 def do_getfield_raw_ptr(struct, fieldnum):
     return cast_to_ptr(_getfield_raw(struct, fieldnum))
 
-def do_getfield_raw_dynamic(struct, fielddescr):
-    from pypy.rlib import libffi
-    addr = cast_from_int(rffi.VOIDP, struct)
-    ofs = fielddescr.ofs
-    if fielddescr.is_pointer_field():
-        assert False, 'fixme'
-    elif fielddescr.is_float_field():
-        assert False, 'fixme'
-    else:
-        return libffi._struct_getfield(lltype.Signed, addr, ofs)
+def do_raw_load_int(struct, offset, descrofs):
+    TYPE = symbolic.Size2Type[descrofs]
+    ll_p = rffi.cast(rffi.CCHARP, struct)
+    ll_p = rffi.cast(lltype.Ptr(TYPE), rffi.ptradd(ll_p, offset))
+    value = ll_p[0]
+    return rffi.cast(lltype.Signed, value)
+
+def do_raw_load_float(struct, offset):
+    ll_p = rffi.cast(rffi.CCHARP, struct)
+    ll_p = rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE),
+                     rffi.ptradd(ll_p, offset))
+    value = ll_p[0]
+    return value
+
+def do_raw_store_int(struct, offset, descrofs, value):
+    TYPE = symbolic.Size2Type[descrofs]
+    ll_p = rffi.cast(rffi.CCHARP, struct)
+    ll_p = rffi.cast(lltype.Ptr(TYPE), rffi.ptradd(ll_p, offset))
+    ll_p[0] = rffi.cast(TYPE.OF, value)
+
+def do_raw_store_float(struct, offset, value):
+    ll_p = rffi.cast(rffi.CCHARP, struct)
+    ll_p = rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE),
+                     rffi.ptradd(ll_p, offset))
+    ll_p[0] = value
 
 def do_new(size):
     TYPE = symbolic.Size2Type[size]
@@ -1528,6 +1552,7 @@
 
 def do_new_array(arraynum, count):
     TYPE = symbolic.Size2Type[arraynum]
+    assert count >= 0 # explode if it's not
     x = lltype.malloc(TYPE, count, zero=True)
     return cast_to_ptr(x)
 
@@ -1537,10 +1562,13 @@
     newvalue = cast_from_int(ITEMTYPE, newvalue)
     array.setitem(index, newvalue)
 
-def do_setarrayitem_raw_int(array, index, newvalue):
+def do_setarrayitem_raw_int(array, index, newvalue, itemsize):
     array = array.adr.ptr
     ITEMTYPE = lltype.typeOf(array).TO.OF
-    newvalue = cast_from_int(ITEMTYPE, newvalue)
+    TYPE = symbolic.Size2Type[itemsize]
+    if TYPE.OF != ITEMTYPE:
+        array = rffi.cast(lltype.Ptr(TYPE), array)
+    newvalue = cast_from_int(TYPE.OF, newvalue)
     array._obj.setitem(index, newvalue)
 
 def do_setarrayitem_gc_float(array, index, newvalue):
@@ -1585,18 +1613,6 @@
 do_setinteriorfield_gc_float = new_setinteriorfield_gc(cast_from_floatstorage)
 do_setinteriorfield_gc_ptr = new_setinteriorfield_gc(cast_from_ptr)
 
-def new_setinteriorfield_raw(cast_func, ffitype):
-    def do_setinteriorfield_raw(array, index, newvalue, width, ofs):
-        addr = rffi.cast(rffi.VOIDP, array)
-        for TYPE, ffitype2 in clibffi.ffitype_map:
-            if ffitype2 is ffitype:
-                newvalue = cast_func(TYPE, newvalue)
-                break
-        return libffi.array_setitem(ffitype, width, addr, index, ofs, newvalue)
-    return do_setinteriorfield_raw
-do_setinteriorfield_raw_int = new_setinteriorfield_raw(cast_from_int, libffi.types.slong)
-do_setinteriorfield_raw_float = new_setinteriorfield_raw(cast_from_floatstorage, libffi.types.double)
-
 def do_setfield_raw_int(struct, fieldnum, newvalue):
     STRUCT, fieldname = symbolic.TokenToField[fieldnum]
     ptr = cast_from_int(lltype.Ptr(STRUCT), struct)
@@ -1618,17 +1634,6 @@
     newvalue = cast_from_ptr(FIELDTYPE, newvalue)
     setattr(ptr, fieldname, newvalue)
 
-def do_setfield_raw_dynamic(struct, fielddescr, newvalue):
-    from pypy.rlib import libffi
-    addr = cast_from_int(rffi.VOIDP, struct)
-    ofs = fielddescr.ofs
-    if fielddescr.is_pointer_field():
-        assert False, 'fixme'
-    elif fielddescr.is_float_field():
-        assert False, 'fixme'
-    else:
-        libffi._struct_setfield(lltype.Signed, addr, ofs, newvalue)
-
 def do_newstr(length):
     x = rstr.mallocstr(length)
     return cast_to_ptr(x)
@@ -1933,6 +1938,7 @@
 setannotation(do_getinteriorfield_gc_int, annmodel.SomeInteger())
 setannotation(do_getinteriorfield_gc_ptr, annmodel.SomePtr(llmemory.GCREF))
 setannotation(do_getinteriorfield_gc_float, s_FloatStorage)
+setannotation(do_raw_load_int, annmodel.SomeInteger())
 setannotation(do_new, annmodel.SomePtr(llmemory.GCREF))
 setannotation(do_new_array, annmodel.SomePtr(llmemory.GCREF))
 setannotation(do_setarrayitem_gc_int, annmodel.s_None)
@@ -1949,6 +1955,7 @@
 setannotation(do_setinteriorfield_gc_int, annmodel.s_None)
 setannotation(do_setinteriorfield_gc_ptr, annmodel.s_None)
 setannotation(do_setinteriorfield_gc_float, annmodel.s_None)
+setannotation(do_raw_store_int, annmodel.s_None)
 setannotation(do_newstr, annmodel.SomePtr(llmemory.GCREF))
 setannotation(do_strsetitem, annmodel.s_None)
 setannotation(do_newunicode, annmodel.SomePtr(llmemory.GCREF))
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -4,6 +4,7 @@
 
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.jit_hooks import LOOP_RUN_CONTAINER
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.llinterp import LLInterpreter
@@ -33,6 +34,10 @@
         self.arg_types = arg_types
         self.count_fields_if_immut = count_fields_if_immut
         self.ffi_flags = ffi_flags
+        self._debug = False
+
+    def set_debug(self, v):
+        self._debug = True
 
     def get_arg_types(self):
         return self.arg_types
@@ -336,16 +341,6 @@
         token = history.getkind(getattr(S, fieldname))
         return self.getdescr(ofs, token[0], name=fieldname)
 
-    def fielddescrof_dynamic(self, offset, fieldsize, is_pointer, is_float, is_signed):
-        if is_pointer:
-            typeinfo = REF
-        elif is_float:
-            typeinfo = FLOAT
-        else:
-            typeinfo = INT
-        # we abuse the arg_types field to distinguish dynamic and static descrs
-        return self.getdescr(offset, typeinfo, arg_types='dynamic', name='<dynamic field>')
-
     def interiorfielddescrof(self, A, fieldname):
         S = A.OF
         width = symbolic.get_size(A)
@@ -353,18 +348,6 @@
         token = history.getkind(getattr(S, fieldname))
         return self.getdescr(ofs, token[0], name=fieldname, width=width)
 
-    def interiorfielddescrof_dynamic(self, offset, width, fieldsize,
-        is_pointer, is_float, is_signed):
-
-        if is_pointer:
-            typeinfo = REF
-        elif is_float:
-            typeinfo = FLOAT
-        else:
-            typeinfo = INT
-        # we abuse the arg_types field to distinguish dynamic and static descrs
-        return Descr(offset, typeinfo, arg_types='dynamic', name='<dynamic interior field>', width=width)
-
     def calldescrof(self, FUNC, ARGS, RESULT, extrainfo):
         arg_types = []
         for ARG in ARGS:
@@ -379,22 +362,27 @@
         return self.getdescr(0, token[0], extrainfo=extrainfo,
                              arg_types=''.join(arg_types))
 
-    def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo, ffi_flags):
+    def calldescrof_dynamic(self, cif_description, extrainfo):
         from pypy.jit.backend.llsupport.ffisupport import get_ffi_type_kind
         from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
         arg_types = []
         try:
-            for arg in ffi_args:
+            for arg in cif_description.atypes:
                 kind = get_ffi_type_kind(self, arg)
                 if kind != history.VOID:
                     arg_types.append(kind)
-            reskind = get_ffi_type_kind(self, ffi_result)
+            reskind = get_ffi_type_kind(self, cif_description.rtype)
         except UnsupportedKind:
             return None
         return self.getdescr(0, reskind, extrainfo=extrainfo,
                              arg_types=''.join(arg_types),
-                             ffi_flags=ffi_flags)
+                             ffi_flags=cif_description.abi)
 
+    def _calldescr_dynamic_for_tests(self, atypes, rtype,
+                                     abiname='FFI_DEFAULT_ABI'):
+        from pypy.jit.backend.llsupport import ffisupport
+        return ffisupport.calldescr_dynamic_for_tests(self, atypes, rtype,
+                                                      abiname)
 
     def grab_exc_value(self):
         return llimpl.grab_exc_value()
@@ -430,7 +418,7 @@
         return llimpl.do_getarrayitem_gc_int(array, index)
     def bh_getarrayitem_raw_i(self, arraydescr, array, index):
         assert isinstance(arraydescr, Descr)
-        return llimpl.do_getarrayitem_raw_int(array, index)
+        return llimpl.do_getarrayitem_raw_int(array, index, arraydescr.ofs)
     def bh_getarrayitem_gc_r(self, arraydescr, array, index):
         assert isinstance(arraydescr, Descr)
         return llimpl.do_getarrayitem_gc_ptr(array, index)
@@ -484,6 +472,19 @@
         return llimpl.do_setinteriorfield_gc_float(array, index, descr.ofs,
                                                    value)
 
+    def bh_raw_store_i(self, struct, offset, descr, newvalue):
+        assert isinstance(descr, Descr)
+        return llimpl.do_raw_store_int(struct, offset, descr.ofs, newvalue)
+    def bh_raw_store_f(self, struct, offset, descr, newvalue):
+        assert isinstance(descr, Descr)
+        return llimpl.do_raw_store_float(struct, offset, newvalue)
+    def bh_raw_load_i(self, struct, offset, descr):
+        assert isinstance(descr, Descr)
+        return llimpl.do_raw_load_int(struct, offset, descr.ofs)
+    def bh_raw_load_f(self, struct, offset, descr):
+        assert isinstance(descr, Descr)
+        return llimpl.do_raw_load_float(struct, offset)
+
     def bh_new(self, sizedescr):
         assert isinstance(sizedescr, Descr)
         return llimpl.do_new(sizedescr.ofs)
@@ -513,7 +514,7 @@
 
     def bh_setarrayitem_raw_i(self, arraydescr, array, index, newvalue):
         assert isinstance(arraydescr, Descr)
-        llimpl.do_setarrayitem_raw_int(array, index, newvalue)
+        llimpl.do_setarrayitem_raw_int(array, index, newvalue, arraydescr.ofs)
 
     def bh_setarrayitem_gc_r(self, arraydescr, array, index, newvalue):
         assert isinstance(arraydescr, Descr)
@@ -585,6 +586,9 @@
             for x in args_f:
                 llimpl.do_call_pushfloat(x)
 
+    def get_all_loop_runs(self):
+        return lltype.malloc(LOOP_RUN_CONTAINER, 0)
+
     def force(self, force_token):
         token = llmemory.cast_int_to_adr(force_token)
         frame = llimpl.get_forced_token_frame(token)
diff --git a/pypy/jit/backend/llgraph/symbolic.py b/pypy/jit/backend/llgraph/symbolic.py
--- a/pypy/jit/backend/llgraph/symbolic.py
+++ b/pypy/jit/backend/llgraph/symbolic.py
@@ -1,8 +1,7 @@
-import ctypes
 from pypy.rpython.lltypesystem import lltype, rffi, rclass
 
 
-Size2Type = [None]
+Size2Type = [None] * 100
 Type2Size = {}
 
 def get_size(TYPE):
@@ -14,7 +13,7 @@
         Type2Size[TYPE] = size
         return size
 
-TokenToField = [None]
+TokenToField = [None] * 100
 FieldToToken = {}
 
 def get_field_token(STRUCT, fieldname):
@@ -26,21 +25,3 @@
         FieldToToken[STRUCT, fieldname] = token
         return token
 get_field_token(rclass.OBJECT, 'typeptr')     # force the index 1 for this
-
-def get_array_token(T):
-    # T can be an array or a var-sized structure
-    if isinstance(T, lltype.Struct):
-        assert T._arrayfld is not None, "%r is not variable-sized" % (T,)
-        cstruct = ll2ctypes.get_ctypes_type(T)
-        cfield = getattr(cstruct, T._arrayfld)
-        before_array_part = cfield.offset
-        T = getattr(T, T._arrayfld)
-    else:
-        before_array_part = 0
-    carray = ll2ctypes.get_ctypes_type(T)
-    assert carray.length.size == 4
-    ofs_length = before_array_part + carray.length.offset
-    basesize = before_array_part + carray.items.offset
-    carrayitem = ll2ctypes.get_ctypes_type(T.OF)
-    itemsize = ctypes.sizeof(carrayitem)
-    return basesize, itemsize, ofs_length
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -237,29 +237,6 @@
         cache[(ARRAY, name)] = descr
         return descr
 
-def compute_flag(is_pointer, is_float, is_signed):
-    if is_pointer:
-        assert not is_float
-        return FLAG_POINTER
-    elif is_float:
-        return FLAG_FLOAT
-    elif is_signed:
-        return FLAG_SIGNED
-    else:
-        return FLAG_UNSIGNED
-
-def get_dynamic_field_descr(offset, fieldsize, is_pointer, is_float, is_signed):
-    flag = compute_flag(is_pointer, is_float, is_signed)
-    return FieldDescr('dynamic', offset, fieldsize, flag)
-
-def get_dynamic_interiorfield_descr(gc_ll_descr, offset, width, fieldsize,
-                                    is_pointer, is_float, is_signed):
-    arraydescr = ArrayDescr(0, width, None, FLAG_STRUCT)
-    flag = compute_flag(is_pointer, is_float, is_signed)
-    fielddescr = FieldDescr('dynamic', offset, fieldsize, flag)
-    return InteriorFieldDescr(arraydescr, fielddescr)
-
-
 # ____________________________________________________________
 # CallDescrs
 
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -1,43 +1,97 @@
 from pypy.rlib.rarithmetic import intmask
-from pypy.jit.metainterp import history
-from pypy.rpython.lltypesystem import rffi
+from pypy.rlib.objectmodel import specialize
+from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.jit.backend.llsupport.descr import CallDescr
 
 class UnsupportedKind(Exception):
     pass
 
-def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo, ffi_flags):
-    """Get a call descr: the types of result and args are represented by
-    rlib.libffi.types.*"""
+def get_call_descr_dynamic(cpu, cif_description, extrainfo):
+    """Get a call descr from the given CIF_DESCRIPTION"""
+    ffi_result = cif_description.rtype
     try:
         reskind = get_ffi_type_kind(cpu, ffi_result)
-        argkinds = [get_ffi_type_kind(cpu, arg) for arg in ffi_args]
+        argkinds = [get_ffi_type_kind(cpu, cif_description.atypes[i])
+                    for i in range(cif_description.nargs)]
     except UnsupportedKind:
         return None
-    if reskind == history.VOID:
+    if reskind == 'v':
         result_size = 0
     else:
         result_size = intmask(ffi_result.c_size)
     argkinds = ''.join(argkinds)
     return CallDescr(argkinds, reskind, is_ffi_type_signed(ffi_result),
-                     result_size, extrainfo, ffi_flags=ffi_flags)
+                     result_size, extrainfo, ffi_flags=cif_description.abi)
 
 def get_ffi_type_kind(cpu, ffi_type):
-    from pypy.rlib.libffi import types
+    from pypy.rlib.jit_libffi import types
+    kind = types.getkind(ffi_type)
+    if ((not cpu.supports_floats and kind == 'f') or
+        (not cpu.supports_longlong and kind == 'L') or
+        (not cpu.supports_singlefloats and kind == 'S') or
+        kind == '*' or kind == '?'):
+        raise UnsupportedKind("Unsupported kind '%s'" % kind)
+    if kind == 'u':
+        kind = 'i'
+    return kind
+
+def is_ffi_type_signed(ffi_type):
+    from pypy.rlib.jit_libffi import types
+    kind = types.getkind(ffi_type)
+    return kind != 'u'
+
+ at specialize.memo()
+def _get_ffi2descr_dict(cpu):
+    d = {('v', 0): ('v', None)}
+    if cpu.supports_floats:
+        d[('f', 0)] = ('f', cpu.arraydescrof(rffi.CArray(lltype.Float)))
+    if cpu.supports_singlefloats:
+        d[('S', 0)] = ('i', cpu.arraydescrof(rffi.CArray(lltype.SingleFloat)))
+    for SIGNED_TYPE in [rffi.SIGNEDCHAR,
+                        rffi.SHORT,
+                        rffi.INT,
+                        rffi.LONG,
+                        rffi.LONGLONG]:
+        key = ('i', rffi.sizeof(SIGNED_TYPE))
+        kind = 'i'
+        if key[1] > rffi.sizeof(lltype.Signed):
+            if not cpu.supports_longlong:
+                continue
+            key = ('L', 0)
+            kind = 'f'
+        d[key] = (kind, cpu.arraydescrof(rffi.CArray(SIGNED_TYPE)))
+    for UNSIGNED_TYPE in [rffi.UCHAR,
+                          rffi.USHORT,
+                          rffi.UINT,
+                          rffi.ULONG,
+                          rffi.ULONGLONG]:
+        key = ('u', rffi.sizeof(UNSIGNED_TYPE))
+        if key[1] > rffi.sizeof(lltype.Signed):
+            continue
+        d[key] = ('i', cpu.arraydescrof(rffi.CArray(UNSIGNED_TYPE)))
+    return d
+
+def get_arg_descr(cpu, ffi_type):
+    from pypy.rlib.jit_libffi import types
     kind = types.getkind(ffi_type)
     if kind == 'i' or kind == 'u':
-        return history.INT
-    elif cpu.supports_floats and kind == 'f':
-        return history.FLOAT
-    elif kind == 'v':
-        return history.VOID
-    elif cpu.supports_longlong and (kind == 'I' or kind == 'U'):     # longlong
-        return 'L'
-    elif cpu.supports_singlefloats and kind == 's':    # singlefloat
-        return 'S'
-    raise UnsupportedKind("Unsupported kind '%s'" % kind)
+        size = rffi.getintfield(ffi_type, 'c_size')
+    else:
+        size = 0
+    return _get_ffi2descr_dict(cpu)[kind, size]
 
-def is_ffi_type_signed(ffi_type):
-    from pypy.rlib.libffi import types
-    kind = types.getkind(ffi_type)
-    return kind != 'u'
+def calldescr_dynamic_for_tests(cpu, atypes, rtype, abiname='FFI_DEFAULT_ABI'):
+    from pypy.rlib import clibffi
+    from pypy.rlib.jit_libffi import CIF_DESCRIPTION, FFI_TYPE_PP
+    from pypy.jit.codewriter.effectinfo import EffectInfo
+    #
+    p = lltype.malloc(CIF_DESCRIPTION, len(atypes),
+                      flavor='raw', immortal=True)
+    p.abi = getattr(clibffi, abiname)
+    p.nargs = len(atypes)
+    p.rtype = rtype
+    p.atypes = lltype.malloc(FFI_TYPE_PP.TO, len(atypes),
+                             flavor='raw', immortal=True)
+    for i in range(len(atypes)):
+        p.atypes[i] = atypes[i]
+    return cpu.calldescrof_dynamic(p, EffectInfo.MOST_GENERAL)
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -10,8 +10,8 @@
 from pypy.jit.backend.llsupport.symbolic import WORD, unroll_basic_sizes
 from pypy.jit.backend.llsupport.descr import (
     get_size_descr, get_field_descr, get_array_descr,
-    get_call_descr, get_interiorfield_descr, get_dynamic_interiorfield_descr,
-    FieldDescr, ArrayDescr, CallDescr, InteriorFieldDescr, get_dynamic_field_descr)
+    get_call_descr, get_interiorfield_descr,
+    FieldDescr, ArrayDescr, CallDescr, InteriorFieldDescr)
 from pypy.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
 
 
@@ -247,9 +247,6 @@
     def fielddescrof(self, STRUCT, fieldname):
         return get_field_descr(self.gc_ll_descr, STRUCT, fieldname)
 
-    def fielddescrof_dynamic(self, offset, fieldsize, is_pointer, is_float, is_signed):
-        return get_dynamic_field_descr(offset, fieldsize, is_pointer, is_float, is_signed)
-
     def unpack_fielddescr(self, fielddescr):
         assert isinstance(fielddescr, FieldDescr)
         return fielddescr.offset
@@ -269,12 +266,6 @@
     def interiorfielddescrof(self, A, fieldname):
         return get_interiorfield_descr(self.gc_ll_descr, A, fieldname)
 
-    def interiorfielddescrof_dynamic(self, offset, width, fieldsize,
-                                     is_pointer, is_float, is_signed):
-        return get_dynamic_interiorfield_descr(self.gc_ll_descr,
-                                               offset, width, fieldsize,
-                                               is_pointer, is_float, is_signed)
-
     def unpack_arraydescr(self, arraydescr):
         assert isinstance(arraydescr, ArrayDescr)
         return arraydescr.basesize
@@ -291,10 +282,16 @@
     def calldescrof(self, FUNC, ARGS, RESULT, extrainfo):
         return get_call_descr(self.gc_ll_descr, ARGS, RESULT, extrainfo)
 
-    def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo, ffi_flags):
+    def calldescrof_dynamic(self, cif_description, extrainfo):
         from pypy.jit.backend.llsupport import ffisupport
-        return ffisupport.get_call_descr_dynamic(self, ffi_args, ffi_result,
-                                                 extrainfo, ffi_flags)
+        return ffisupport.get_call_descr_dynamic(self, cif_description,
+                                                 extrainfo)
+
+    def _calldescr_dynamic_for_tests(self, atypes, rtype,
+                                     abiname='FFI_DEFAULT_ABI'):
+        from pypy.jit.backend.llsupport import ffisupport
+        return ffisupport.calldescr_dynamic_for_tests(self, atypes, rtype,
+                                                      abiname)
 
     def get_overflow_error(self):
         ovf_vtable = self.cast_adr_to_int(self._ovf_error_vtable)
@@ -591,6 +588,32 @@
     bh_setfield_raw_r = _base_do_setfield_r
     bh_setfield_raw_f = _base_do_setfield_f
 
+    def bh_raw_store_i(self, addr, offset, descr, newvalue):
+        ofs, size, sign = self.unpack_arraydescr_size(descr)
+        items = addr + offset
+        for TYPE, _, itemsize in unroll_basic_sizes:
+            if size == itemsize:
+                items = rffi.cast(rffi.CArrayPtr(TYPE), items)
+                items[0] = rffi.cast(TYPE, newvalue)
+                break
+
+    def bh_raw_store_f(self, addr, offset, descr, newvalue):
+        items = rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), addr + offset)
+        items[0] = newvalue
+
+    def bh_raw_load_i(self, addr, offset, descr):
+        ofs, size, sign = self.unpack_arraydescr_size(descr)
+        items = addr + offset
+        for TYPE, _, itemsize in unroll_basic_sizes:
+            if size == itemsize:
+                items = rffi.cast(rffi.CArrayPtr(TYPE), items)
+                return rffi.cast(lltype.Signed, items[0])
+        assert False # unreachable code
+
+    def bh_raw_load_f(self, addr, offset, descr):
+        items = rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), addr + offset)
+        return items[0]
+
     def bh_new(self, sizedescr):
         return self.gc_ll_descr.gc_malloc(sizedescr)
 
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -1,4 +1,6 @@
-from pypy.rlib.libffi import types
+from pypy.rlib.jit_libffi import types, CIF_DESCRIPTION, FFI_TYPE_PP
+from pypy.rlib.clibffi import FFI_DEFAULT_ABI
+from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.jit.codewriter.longlong import is_64_bit
 from pypy.jit.backend.llsupport.descr import *
 from pypy.jit.backend.llsupport.ffisupport import *
@@ -11,56 +13,55 @@
         self.supports_floats = supports_floats
         self.supports_longlong = supports_longlong
         self.supports_singlefloats = supports_singlefloats
-
+    def calldescrof_dynamic(self, cif_descr, effectinfo):
+        return get_call_descr_dynamic(self, cif_descr, effectinfo)
 
 def test_call_descr_dynamic():
     args = [types.sint, types.pointer]
-    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint, None,
-                                   ffi_flags=42)
+    descr = calldescr_dynamic_for_tests(FakeCPU(), args, types.sint)
     assert isinstance(descr, CallDescr)
     assert descr.result_type == 'i'
     assert descr.result_flag == FLAG_SIGNED
     assert descr.arg_classes == 'ii'
-    assert descr.get_ffi_flags() == 42
+    assert descr.get_ffi_flags() == FFI_DEFAULT_ABI
 
     args = [types.sint, types.double, types.pointer]
-    descr = get_call_descr_dynamic(FakeCPU(), args, types.void, None, 42)
+    descr = calldescr_dynamic_for_tests(FakeCPU(), args, types.void)
     assert descr is None    # missing floats
-    descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
-                                   args, types.void, None, ffi_flags=43)
+    descr = calldescr_dynamic_for_tests(FakeCPU(supports_floats=True),
+                                        args, types.void)
     assert descr.result_type == 'v'
     assert descr.result_flag == FLAG_VOID
     assert descr.arg_classes == 'ifi'
-    assert descr.get_ffi_flags() == 43
+    assert descr.get_ffi_flags() == FFI_DEFAULT_ABI
 
-    descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8, None, 42)
+    descr = calldescr_dynamic_for_tests(FakeCPU(), [], types.sint8)
     assert descr.get_result_size() == 1
     assert descr.result_flag == FLAG_SIGNED
     assert descr.is_result_signed() == True
 
-    descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8, None, 42)
+    descr = calldescr_dynamic_for_tests(FakeCPU(), [], types.uint8)
     assert isinstance(descr, CallDescr)
     assert descr.get_result_size() == 1
     assert descr.result_flag == FLAG_UNSIGNED
     assert descr.is_result_signed() == False
 
     if not is_64_bit or is_emulated_long:
-        descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong,
-                                       None, 42)
+        descr = calldescr_dynamic_for_tests(FakeCPU(), [], types.slonglong)
         assert descr is None   # missing longlongs
-        descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
-                                       [], types.slonglong, None, ffi_flags=43)
+        descr = calldescr_dynamic_for_tests(FakeCPU(supports_longlong=True),
+                                            [], types.slonglong)
         assert isinstance(descr, CallDescr)
         assert descr.result_flag == FLAG_FLOAT
         assert descr.result_type == 'L'
-        assert descr.get_ffi_flags() == 43
+        assert descr.get_ffi_flags() == FFI_DEFAULT_ABI
     else:
         assert types.slonglong is types.slong
 
-    descr = get_call_descr_dynamic(FakeCPU(), [], types.float, None, 42)
+    descr = calldescr_dynamic_for_tests(FakeCPU(), [], types.float)
     assert descr is None   # missing singlefloats
-    descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
-                                   [], types.float, None, ffi_flags=44)
+    descr = calldescr_dynamic_for_tests(FakeCPU(supports_singlefloats=True),
+                                        [], types.float)
     assert descr.result_flag == FLAG_UNSIGNED
     assert descr.result_type == 'S'
-    assert descr.get_ffi_flags() == 44
+    assert descr.get_ffi_flags() == FFI_DEFAULT_ABI
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -60,6 +60,21 @@
         """Called once by the front-end when the program stops."""
         pass
 
+    def get_all_loop_runs(self):
+        """ Function that will return number of times all the loops were run.
+        Requires earlier setting of set_debug(True), otherwise you won't
+        get the information.
+
+        Returns an instance of LOOP_RUN_CONTAINER from rlib.jit_hooks
+        """
+        raise NotImplementedError
+
+    def set_debug(self, value):
+        """ Enable or disable debugging info. Does nothing by default. Returns
+        the previous setting.
+        """
+        return False
+
     def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """Assemble the given loop.
         Should create and attach a fresh CompiledLoopToken to
@@ -198,10 +213,6 @@
     def interiorfielddescrof(self, A, fieldname):
         raise NotImplementedError
 
-    def interiorfielddescrof_dynamic(self, offset, width, fieldsize, is_pointer,
-        is_float, is_signed):
-        raise NotImplementedError
-
     def arraydescrof(self, A):
         raise NotImplementedError
 
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -60,7 +60,6 @@
         return ConstInt(heaptracker.adr2int(addr))
 
     def test_call_aligned_with_spilled_values(self):
-        from pypy.rlib.libffi import types
         cpu = self.cpu
         if not cpu.supports_floats:
             py.test.skip('requires floats')
@@ -119,7 +118,6 @@
             assert abs(x - expected_result) < 0.0001
 
     def test_call_aligned_with_imm_values(self):
-        from pypy.rlib.libffi import types
         cpu = self.cpu
         if not cpu.supports_floats:
             py.test.skip('requires floats')
@@ -162,7 +160,6 @@
             assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_aligned_with_args_on_the_stack(self):
-        from pypy.rlib.libffi import types
         cpu = self.cpu
         if not cpu.supports_floats:
             py.test.skip('requires floats')
@@ -205,7 +202,6 @@
             assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_alignment_call_assembler(self):
-        from pypy.rlib.libffi import types
         cpu = self.cpu
         if not cpu.supports_floats:
             py.test.skip('requires floats')
@@ -332,7 +328,6 @@
             py.test.skip('requires floats and singlefloats')
 
         import random
-        from pypy.rlib.libffi import types
         from pypy.rlib.rarithmetic import r_singlefloat
 
         def func(*args):
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -611,7 +611,7 @@
             assert longlong.getrealfloat(x) == 3.5 - 42
 
     def test_call(self):
-        from pypy.rlib.libffi import types, FUNCFLAG_CDECL
+        from pypy.rlib.jit_libffi import types
 
         def func_int(a, b):
             return a + b
@@ -639,9 +639,8 @@
                                          'int', descr=calldescr)
             assert res.value == 2 * num
             # then, try it with the dynamic calldescr
-            dyn_calldescr = cpu.calldescrof_dynamic([ffi_type, ffi_type], ffi_type,
-                                                    EffectInfo.MOST_GENERAL,
-                                                    ffi_flags=FUNCFLAG_CDECL)
+            dyn_calldescr = cpu._calldescr_dynamic_for_tests(
+                [ffi_type, ffi_type], ffi_type)
             res = self.execute_operation(rop.CALL,
                                          [funcbox, BoxInt(num), BoxInt(num)],
                                          'int', descr=dyn_calldescr)
@@ -1845,6 +1844,7 @@
         if not self.cpu.supports_longlong:
             py.test.skip("longlong test")
         if sys.platform == 'win32':
+            # windows quite often is very inexact (like the old Intel 8259 PIC),
             # so we stretch the time a little bit.
             # On my virtual Parallels machine in a 2GHz Core i7 Mac Mini,
             # the test starts working at delay == 21670 and stops at 20600000.
@@ -1923,7 +1923,6 @@
         return BoxPtr(lltype.nullptr(llmemory.GCREF.TO))
 
     def alloc_array_of(self, ITEM, length):
-        cpu = self.cpu
         A = lltype.GcArray(ITEM)
         a = lltype.malloc(A, length)
         a_box = BoxPtr(lltype.cast_opaque_ptr(llmemory.GCREF, a))
@@ -1964,6 +1963,10 @@
         assert res == -19
 
     def test_convert_float_bytes(self):
+        if not self.cpu.supports_floats:
+            py.test.skip("requires floats")
+        if not self.cpu.supports_longlong:
+            py.test.skip("longlong test")
         t = 'int' if longlong.is_64_bit else 'float'
         res = self.execute_operation(rop.CONVERT_FLOAT_BYTES_TO_LONGLONG,
                                      [boxfloat(2.5)], t).value
@@ -2000,39 +2003,6 @@
         assert s.x == chr(190)
         assert s.y == chr(150)
 
-    def test_fielddescrof_dynamic(self):
-        S = lltype.Struct('S',
-                          ('x', lltype.Signed),
-                          ('y', lltype.Signed),
-                          )
-        longsize = rffi.sizeof(lltype.Signed)
-        y_ofs = longsize
-        s = lltype.malloc(S, flavor='raw')
-        sa = llmemory.cast_ptr_to_adr(s)
-        s_box = BoxInt(heaptracker.adr2int(sa))
-        #
-        field = self.cpu.fielddescrof(S, 'y')
-        field_dyn = self.cpu.fielddescrof_dynamic(offset=y_ofs,
-                                                  fieldsize=longsize,
-                                                  is_pointer=False,
-                                                  is_float=False,
-                                                  is_signed=True)
-        assert field.is_pointer_field() == field_dyn.is_pointer_field()
-        assert field.is_float_field()   == field_dyn.is_float_field()
-        if 'llgraph' not in str(self.cpu):
-            assert field.is_field_signed()  == field_dyn.is_field_signed()
-
-        #
-        for get_op, set_op in ((rop.GETFIELD_RAW, rop.SETFIELD_RAW),
-                               (rop.GETFIELD_RAW_PURE, rop.SETFIELD_RAW)):
-            for descr in (field, field_dyn):
-                self.execute_operation(set_op, [s_box, BoxInt(32)], 'void',
-                                       descr=descr)
-                res = self.execute_operation(get_op, [s_box], 'int', descr=descr)
-                assert res.getint()  == 32
-
-        lltype.free(s, flavor='raw')
-
     def test_new_with_vtable(self):
         cpu = self.cpu
         t_box, T_box = self.alloc_instance(self.T)
@@ -2469,9 +2439,7 @@
         cpu = self.cpu
         func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
         funcbox = ConstInt(heaptracker.adr2int(func_adr))
-        calldescr = cpu.calldescrof_dynamic([types.uchar], types.sint,
-                                            EffectInfo.MOST_GENERAL,
-                                            ffi_flags=FUNCFLAG_CDECL)
+        calldescr = cpu._calldescr_dynamic_for_tests([types.uchar], types.sint)
         i1 = BoxInt()
         i2 = BoxInt()
         tok = BoxInt()
@@ -2524,11 +2492,9 @@
         cpu = self.cpu
         func_adr = llmemory.cast_ptr_to_adr(c_qsort.funcsym)
         funcbox = ConstInt(heaptracker.adr2int(func_adr))
-        calldescr = cpu.calldescrof_dynamic([types.pointer, types_size_t,
-                                             types_size_t, types.pointer],
-                                            types.void,
-                                            EffectInfo.MOST_GENERAL,
-                                            ffi_flags=clibffi.FUNCFLAG_CDECL)
+        calldescr = cpu._calldescr_dynamic_for_tests(
+            [types.pointer, types_size_t, types_size_t, types.pointer],
+            types.void)
         i0 = BoxInt()
         i1 = BoxInt()
         i2 = BoxInt()
@@ -2577,10 +2543,10 @@
         cpu = self.cpu
         func_adr = llmemory.cast_ptr_to_adr(c_GetCurrentDir.funcsym)
         funcbox = ConstInt(heaptracker.adr2int(func_adr))
-        calldescr = cpu.calldescrof_dynamic([types.ulong, types.pointer],
-                                            types.ulong,
-                                            EffectInfo.MOST_GENERAL,
-                                            ffi_flags=FUNCFLAG_STDCALL)
+        calldescr = cpu._calldescr_dynamic_for_tests(
+            [types.ulong, types.pointer],
+            types.ulong,
+            abiname='FFI_STDCALL')
         i1 = BoxInt()
         i2 = BoxInt()
         faildescr = BasicFailDescr(1)
@@ -2863,13 +2829,14 @@
         assert str.chars[4] == '/'
 
     def test_sorting_of_fields(self):
-        S = self.S
+        S = lltype.GcStruct('S', ('parent', rclass.OBJECT),
+                                  ('value', lltype.Signed),
+                                  ('chr1', lltype.Char),
+                                  ('chr2', lltype.Char))
+        chr1 = self.cpu.fielddescrof(S, 'chr1').sort_key()
         value = self.cpu.fielddescrof(S, 'value').sort_key()
-        chr1 = self.cpu.fielddescrof(S, 'chr1').sort_key()
         chr2 = self.cpu.fielddescrof(S, 'chr2').sort_key()
-        assert (sorted([chr2, chr1, value]) ==
-                [value, chr1, chr2])
-        assert len(dict.fromkeys([value, chr1, chr2]).keys()) == 3
+        assert len(set([value, chr1, chr2])) == 3
 
     def test_guards_nongc(self):
         x = lltype.malloc(lltype.Struct('x'), flavor='raw')
@@ -3570,6 +3537,20 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == -10
 
+    def test_int_force_ge_zero(self):
+        ops = """
+        [i0]
+        i1 = int_force_ge_zero(i0)    # but forced to be in a register
+        finish(i1, descr=1)
+        """
+        loop = parse(ops, self.cpu, namespace=locals())
+        descr = loop.operations[-1].getdescr()
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        for inp, outp in [(2,2), (-3, 0)]:
+            self.cpu.execute_token(looptoken, inp)
+            assert outp == self.cpu.get_latest_value_int(0)
+
     def test_compile_asmlen(self):
         from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
         if not isinstance(self.cpu, AbstractLLCPU):
@@ -3577,7 +3558,7 @@
         from pypy.jit.backend.tool.viewcode import machine_code_dump
         import ctypes
         ops = """
-        [i3, i2]
+        [i2]
         i0 = same_as(i2)    # but forced to be in a register
         label(i0, descr=1)
         i1 = int_add(i0, i0)
@@ -3732,6 +3713,108 @@
         fail = self.cpu.execute_token(looptoken, null_box.getref_base())
         assert fail.identifier == 99
 
+    def test_raw_load_int(self):
+        from pypy.rlib import rawstorage
+        for T in [rffi.UCHAR, rffi.SIGNEDCHAR,
+                  rffi.USHORT, rffi.SHORT,
+                  rffi.UINT, rffi.INT,
+                  rffi.ULONG, rffi.LONG]:
+            ops = """
+            [i0, i1]
+            i2 = raw_load(i0, i1, descr=arraydescr)
+            finish(i2)
+            """
+            arraydescr = self.cpu.arraydescrof(rffi.CArray(T))
+            p = rawstorage.alloc_raw_storage(31)
+            for i in range(31):
+                p[i] = '\xDD'
+            value = rffi.cast(T, 0x4243444546474849)
+            rawstorage.raw_storage_setitem(p, 16, value)
+            loop = parse(ops, self.cpu, namespace=locals())
+            looptoken = JitCellToken()
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            self.cpu.execute_token(looptoken,
+                                   rffi.cast(lltype.Signed, p), 16)
+            result = self.cpu.get_latest_value_int(0)
+            assert result == rffi.cast(lltype.Signed, value)
+            rawstorage.free_raw_storage(p)
+
+    def test_raw_load_float(self):
+        if not self.cpu.supports_floats:
+            py.test.skip("requires floats")
+        from pypy.rlib import rawstorage
+        for T in [rffi.DOUBLE]:
+            ops = """
+            [i0, i1]
+            f2 = raw_load(i0, i1, descr=arraydescr)
+            finish(f2)
+            """
+            arraydescr = self.cpu.arraydescrof(rffi.CArray(T))
+            p = rawstorage.alloc_raw_storage(31)
+            for i in range(31):
+                p[i] = '\xDD'
+            value = rffi.cast(T, 1.12e20)
+            rawstorage.raw_storage_setitem(p, 16, value)
+            loop = parse(ops, self.cpu, namespace=locals())
+            looptoken = JitCellToken()
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            self.cpu.execute_token(looptoken,
+                                   rffi.cast(lltype.Signed, p), 16)
+            result = self.cpu.get_latest_value_float(0)
+            result = longlong.getrealfloat(result)
+            assert result == rffi.cast(lltype.Float, value)
+            rawstorage.free_raw_storage(p)
+
+    def test_raw_store_int(self):
+        from pypy.rlib import rawstorage
+        for T in [rffi.UCHAR, rffi.SIGNEDCHAR,
+                  rffi.USHORT, rffi.SHORT,
+                  rffi.UINT, rffi.INT,
+                  rffi.ULONG, rffi.LONG]:
+            ops = """
+            [i0, i1, i2]
+            raw_store(i0, i1, i2, descr=arraydescr)
+            finish()
+            """
+            arraydescr = self.cpu.arraydescrof(rffi.CArray(T))
+            p = rawstorage.alloc_raw_storage(31)
+            for i in range(31):
+                p[i] = '\xDD'
+            value = 0x4243444546474849 & sys.maxint
+            loop = parse(ops, self.cpu, namespace=locals())
+            looptoken = JitCellToken()
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            self.cpu.execute_token(looptoken,
+                                   rffi.cast(lltype.Signed, p), 16, value)
+            result = rawstorage.raw_storage_getitem(T, p, 16)
+            assert result == rffi.cast(T, value)
+            rawstorage.free_raw_storage(p)
+
+    def test_raw_store_float(self):
+        if not self.cpu.supports_floats:
+            py.test.skip("requires floats")
+        from pypy.rlib import rawstorage
+        for T in [rffi.DOUBLE]:
+            ops = """
+            [i0, i1, f2]
+            raw_store(i0, i1, f2, descr=arraydescr)
+            finish()
+            """
+            arraydescr = self.cpu.arraydescrof(rffi.CArray(T))
+            p = rawstorage.alloc_raw_storage(31)
+            for i in range(31):
+                p[i] = '\xDD'
+            value = 1.23e20
+            loop = parse(ops, self.cpu, namespace=locals())
+            looptoken = JitCellToken()
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            self.cpu.execute_token(looptoken,
+                                   rffi.cast(lltype.Signed, p), 16,
+                                   longlong.getfloatstorage(value))
+            result = rawstorage.raw_storage_getitem(T, p, 16)
+            assert result == rffi.cast(T, value)
+            rawstorage.free_raw_storage(p)
+
     def test_forcing_op_with_fail_arg_in_reg(self):
         values = []
         def maybe_force(token, flag):
diff --git a/pypy/jit/backend/test/test_zll_stress.py b/pypy/jit/backend/test/test_zll_stress.py
--- a/pypy/jit/backend/test/test_zll_stress.py
+++ b/pypy/jit/backend/test/test_zll_stress.py
@@ -1,12 +1,18 @@
 from pypy.jit.backend.test.test_random import check_random_function, Random
 from pypy.jit.backend.test.test_ll_random import LLtypeOperationBuilder
 from pypy.jit.backend.detect_cpu import getcpuclass
+import platform
 
 CPU = getcpuclass()
 
+iterations = 1000
+if platform.machine().startswith('arm'):
+    iterations = 100
+
+
 def test_stress():
     cpu = CPU(None, None)
     cpu.setup_once()
     r = Random()
-    for i in range(1000):
-        check_random_function(cpu, LLtypeOperationBuilder, r, i, 1000)
+    for i in range(iterations):
+        check_random_function(cpu, LLtypeOperationBuilder, r, i, iterations)
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -101,7 +101,9 @@
                                       llmemory.cast_ptr_to_adr(ptrs))
 
     def set_debug(self, v):
+        r = self._debug
         self._debug = v
+        return r
 
     def setup_once(self):
         # the address of the function called by 'new'
@@ -125,9 +127,13 @@
         self._build_stack_check_slowpath()
         if gc_ll_descr.gcrootmap:
             self._build_release_gil(gc_ll_descr.gcrootmap)
-        debug_start('jit-backend-counts')
-        self.set_debug(have_debug_prints())
-        debug_stop('jit-backend-counts')
+        if not self._debug:
+            # if self._debug is already set it means that someone called
+            # set_debug by hand before initializing the assembler. Leave it
+            # as it is
+            debug_start('jit-backend-counts')
+            self.set_debug(have_debug_prints())
+            debug_stop('jit-backend-counts')
 
     def setup(self, looptoken):
         assert self.memcpy_addr != 0, "setup_once() not called?"
@@ -750,7 +756,6 @@
     @specialize.argtype(1)
     def _inject_debugging_code(self, looptoken, operations, tp, number):
         if self._debug:
-            # before doing anything, let's increase a counter
             s = 0
             for op in operations:
                 s += op.getopnum()
@@ -997,6 +1002,24 @@
             getattr(self.mc, asmop)(arglocs[0], arglocs[1])
         return genop_binary
 
+    def _binaryop_or_lea(asmop, is_add):
+        def genop_binary_or_lea(self, op, arglocs, result_loc):
+            # use a regular ADD or SUB if result_loc is arglocs[0],
+            # and a LEA only if different.
+            if result_loc is arglocs[0]:
+                getattr(self.mc, asmop)(arglocs[0], arglocs[1])
+            else:
+                loc = arglocs[0]
+                argloc = arglocs[1]
+                assert isinstance(loc, RegLoc)
+                assert isinstance(argloc, ImmedLoc)
+                assert isinstance(result_loc, RegLoc)
+                delta = argloc.value
+                if not is_add:    # subtraction
+                    delta = -delta
+                self.mc.LEA_rm(result_loc.value, (loc.value, delta))
+        return genop_binary_or_lea
+
     def _cmpop(cond, rev_cond):
         def genop_cmp(self, op, arglocs, result_loc):
             rl = result_loc.lowest8bits()
@@ -1223,8 +1246,8 @@
 
     genop_int_neg = _unaryop("NEG")
     genop_int_invert = _unaryop("NOT")
-    genop_int_add = _binaryop("ADD", True)
-    genop_int_sub = _binaryop("SUB")
+    genop_int_add = _binaryop_or_lea("ADD", True)
+    genop_int_sub = _binaryop_or_lea("SUB", False)
     genop_int_mul = _binaryop("IMUL", True)
     genop_int_and = _binaryop("AND", True)
     genop_int_or  = _binaryop("OR", True)
@@ -1374,6 +1397,11 @@
     genop_cast_ptr_to_int = genop_same_as
     genop_cast_int_to_ptr = genop_same_as
 
+    def genop_int_force_ge_zero(self, op, arglocs, resloc):
+        self.mc.TEST(arglocs[0], arglocs[0])
+        self.mov(imm0, resloc)
+        self.mc.CMOVNS(resloc, arglocs[0])
+
     def genop_int_mod(self, op, arglocs, resloc):
         if IS_X86_32:
             self.mc.CDQ()
@@ -1544,6 +1572,13 @@
 
     genop_getarrayitem_gc_pure = genop_getarrayitem_gc
     genop_getarrayitem_raw = genop_getarrayitem_gc
+    genop_getarrayitem_raw_pure = genop_getarrayitem_gc
+
+    def genop_raw_load(self, op, arglocs, resloc):
+        base_loc, ofs_loc, size_loc, ofs, sign_loc = arglocs
+        assert isinstance(ofs, ImmedLoc)
+        src_addr = addr_add(base_loc, ofs_loc, ofs.value, 0)
+        self.load_from_mem(resloc, src_addr, size_loc, sign_loc)
 
     def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
                                 base_loc, ofs_loc):
@@ -1570,9 +1605,6 @@
                                                 ofs_loc)
         self.load_from_mem(resloc, src_addr, fieldsize_loc, sign_loc)
 
-    genop_getinteriorfield_raw = genop_getinteriorfield_gc
-
-
     def genop_discard_setfield_gc(self, op, arglocs):
         base_loc, ofs_loc, size_loc, value_loc = arglocs
         assert isinstance(size_loc, ImmedLoc)
@@ -1597,6 +1629,12 @@
         dest_addr = AddressLoc(base_loc, ofs_loc, scale, baseofs.value)
         self.save_into_mem(dest_addr, value_loc, size_loc)
 
+    def genop_discard_raw_store(self, op, arglocs):
+        base_loc, ofs_loc, value_loc, size_loc, baseofs = arglocs
+        assert isinstance(baseofs, ImmedLoc)
+        dest_addr = AddressLoc(base_loc, ofs_loc, 0, baseofs.value)
+        self.save_into_mem(dest_addr, value_loc, size_loc)
+
     def genop_discard_strsetitem(self, op, arglocs):
         base_loc, ofs_loc, val_loc = arglocs
         basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
@@ -1705,15 +1743,15 @@
                             guard_op.getopname())
 
     def genop_guard_int_add_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
-        self.genop_int_add(op, arglocs, result_loc)
+        self.mc.ADD(arglocs[0], arglocs[1])
         return self._gen_guard_overflow(guard_op, guard_token)
 
     def genop_guard_int_sub_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
-        self.genop_int_sub(op, arglocs, result_loc)
+        self.mc.SUB(arglocs[0], arglocs[1])
         return self._gen_guard_overflow(guard_op, guard_token)
 
     def genop_guard_int_mul_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
-        self.genop_int_mul(op, arglocs, result_loc)
+        self.mc.IMUL(arglocs[0], arglocs[1])
         return self._gen_guard_overflow(guard_op, guard_token)
 
     def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2):
@@ -2629,13 +2667,13 @@
     return AddressLoc(reg_or_imm1, reg_or_imm2, scale, offset)
 
 def addr_add_const(reg_or_imm1, offset):
-    return AddressLoc(reg_or_imm1, ImmedLoc(0), 0, offset)
+    return AddressLoc(reg_or_imm1, imm0, 0, offset)
 
 def mem(loc, offset):
-    return AddressLoc(loc, ImmedLoc(0), 0, offset)
+    return AddressLoc(loc, imm0, 0, offset)
 
 def heap(addr):
-    return AddressLoc(ImmedLoc(addr), ImmedLoc(0), 0, 0)
+    return AddressLoc(ImmedLoc(addr), imm0, 0, 0)
 
 def not_implemented(msg):
     os.write(2, '[x86/asm] %s\n' % msg)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -26,6 +26,7 @@
      TempBox, compute_vars_longevity, is_comparison_or_ovf_op
 from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE
 from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64, MY_COPY_OF_REGS
+from pypy.jit.backend.x86 import rx86
 from pypy.rlib.rarithmetic import r_longlong
 
 class X86RegisterManager(RegisterManager):
@@ -552,9 +553,31 @@
         loc, argloc = self._consider_binop_part(op)
         self.Perform(op, [loc, argloc], loc)
 
-    consider_int_add = _consider_binop
+    def _consider_lea(self, op, loc):
+        argloc = self.loc(op.getarg(1))
+        self.rm.possibly_free_var(op.getarg(0))
+        resloc = self.force_allocate_reg(op.result)
+        self.Perform(op, [loc, argloc], resloc)
+
+    def consider_int_add(self, op):
+        loc = self.loc(op.getarg(0))
+        y = op.getarg(1)
+        if (isinstance(loc, RegLoc) and
+            isinstance(y, ConstInt) and rx86.fits_in_32bits(y.value)):
+            self._consider_lea(op, loc)
+        else:
+            self._consider_binop(op)
+
+    def consider_int_sub(self, op):
+        loc = self.loc(op.getarg(0))
+        y = op.getarg(1)
+        if (isinstance(loc, RegLoc) and
+            isinstance(y, ConstInt) and rx86.fits_in_32bits(-y.value)):
+            self._consider_lea(op, loc)
+        else:
+            self._consider_binop(op)
+
     consider_int_mul = _consider_binop
-    consider_int_sub = _consider_binop
     consider_int_and = _consider_binop
     consider_int_or  = _consider_binop
     consider_int_xor = _consider_binop
@@ -1022,6 +1045,7 @@
                                  imm(itemsize), imm(ofs)])
 
     consider_setarrayitem_raw = consider_setarrayitem_gc
+    consider_raw_store = consider_setarrayitem_gc
 
     def consider_getfield_gc(self, op):
         ofs, size, sign = unpack_fielddescr(op.getdescr())
@@ -1057,6 +1081,8 @@
 
     consider_getarrayitem_raw = consider_getarrayitem_gc
     consider_getarrayitem_gc_pure = consider_getarrayitem_gc
+    consider_getarrayitem_raw_pure = consider_getarrayitem_gc
+    consider_raw_load = consider_getarrayitem_gc
 
     def consider_getinteriorfield_gc(self, op):
         t = unpack_interiorfielddescr(op.getdescr())
@@ -1088,8 +1114,6 @@
         self.Perform(op, [base_loc, ofs, itemsize, fieldsize,
                           index_loc, temp_loc, sign_loc], result_loc)
 
-    consider_getinteriorfield_raw = consider_getinteriorfield_gc
-
     def consider_int_is_true(self, op, guard_op):
         # doesn't need arg to be in a register
         argloc = self.loc(op.getarg(0))
@@ -1110,6 +1134,12 @@
     consider_cast_ptr_to_int = consider_same_as
     consider_cast_int_to_ptr = consider_same_as
 
+    def consider_int_force_ge_zero(self, op):
+        argloc = self.make_sure_var_in_reg(op.getarg(0))
+        resloc = self.force_allocate_reg(op.result, [op.getarg(0)])
+        self.possibly_free_var(op.getarg(0))
+        self.Perform(op, [argloc], resloc)
+
     def consider_strlen(self, op):
         args = op.getarglist()
         base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -548,6 +548,7 @@
     # Avoid XCHG because it always implies atomic semantics, which is
     # slower and does not pair well for dispatch.
     #XCHG = _binaryop('XCHG')
+    CMOVNS = _binaryop('CMOVNS')
 
     PUSH = _unaryop('PUSH')
     POP = _unaryop('POP')
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -3,6 +3,7 @@
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.llinterp import LLInterpreter
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.jit_hooks import LOOP_RUN_CONTAINER
 from pypy.jit.codewriter import longlong
 from pypy.jit.metainterp import history, compile
 from pypy.jit.backend.x86.assembler import Assembler386
@@ -44,6 +45,9 @@
 
         self.profile_agent = profile_agent
 
+    def set_debug(self, flag):
+        return self.assembler.set_debug(flag)
+
     def setup(self):
         if self.opts is not None:
             failargs_limit = self.opts.failargs_limit
@@ -181,6 +185,14 @@
         # positions invalidated
         looptoken.compiled_loop_token.invalidate_positions = []
 
+    def get_all_loop_runs(self):
+        l = lltype.malloc(LOOP_RUN_CONTAINER,
+                          len(self.assembler.loop_run_counters))
+        for i, ll_s in enumerate(self.assembler.loop_run_counters):
+            l[i].type = ll_s.type
+            l[i].number = ll_s.number
+            l[i].counter = ll_s.i
+        return l
 
 class CPU386(AbstractX86CPU):
     backend_name = 'x86'
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -530,6 +530,8 @@
     NOT_r = insn(rex_w, '\xF7', register(1), '\xD0')
     NOT_b = insn(rex_w, '\xF7', orbyte(2<<3), stack_bp(1))
 
+    CMOVNS_rr = insn(rex_w, '\x0F\x49', register(1, 8), register(2), '\xC0')
+
     # ------------------------------ Misc stuff ------------------------------
 
     NOP = insn('\x90')
diff --git a/pypy/jit/backend/x86/test/test_assembler.py b/pypy/jit/backend/x86/test/test_assembler.py
--- a/pypy/jit/backend/x86/test/test_assembler.py
+++ b/pypy/jit/backend/x86/test/test_assembler.py
@@ -10,8 +10,11 @@
 from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86_64_RegisterManager, X86XMMRegisterManager, X86_64_XMMRegisterManager
 from pypy.jit.codewriter import longlong
 import ctypes
+import py
 
 ACTUAL_CPU = getcpuclass()
+if not hasattr(ACTUAL_CPU, 'NUM_REGS'):
+    py.test.skip('unsupported CPU')
 
 class FakeCPU:
     rtyper = None
diff --git a/pypy/jit/backend/x86/test/test_fficall.py b/pypy/jit/backend/x86/test/test_fficall.py
--- a/pypy/jit/backend/x86/test/test_fficall.py
+++ b/pypy/jit/backend/x86/test/test_fficall.py
@@ -2,7 +2,7 @@
 from pypy.jit.metainterp.test import test_fficall
 from pypy.jit.backend.x86.test.test_basic import Jit386Mixin
 
-class TestFfiLookups(Jit386Mixin, test_fficall.FfiLookupTests):
+class TestFfiCall(Jit386Mixin, test_fficall.FfiCallTests):
     # for the individual tests see
     # ====> ../../../metainterp/test/test_fficall.py
-    supports_all = True
+    pass
diff --git a/pypy/jit/backend/x86/test/test_rawmem.py b/pypy/jit/backend/x86/test/test_rawmem.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/x86/test/test_rawmem.py
@@ -0,0 +1,9 @@
+
+from pypy.jit.backend.x86.test.test_basic import Jit386Mixin
+from pypy.jit.metainterp.test.test_rawmem import RawMemTests
+
+
+class TestRawMem(Jit386Mixin, RawMemTests):
+    # for the individual tests see
+    # ====> ../../../metainterp/test/test_rawmem.py
+    pass
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -458,10 +458,8 @@
                 mc.RET16_i(40)
             rawstart = mc.materialize(cpu.asmmemmgr, [])
             #
-            calldescr = cpu.calldescrof_dynamic([types.slong] * 10,
-                                                types.slong,
-                                                EffectInfo.MOST_GENERAL,
-                                                ffi_flags=-1)
+            calldescr = cpu._calldescr_dynamic_for_tests([types.slong] * 10,
+                                                         types.slong)
             calldescr.get_call_conv = lambda: ffi      # <==== hack
             # ^^^ we patch get_call_conv() so that the test also makes sense
             #     on Linux, because clibffi.get_call_conv() would always
diff --git a/pypy/jit/backend/x86/test/test_ztranslation.py b/pypy/jit/backend/x86/test/test_ztranslation.py
--- a/pypy/jit/backend/x86/test/test_ztranslation.py
+++ b/pypy/jit/backend/x86/test/test_ztranslation.py
@@ -3,6 +3,7 @@
 from pypy.rlib.jit import JitDriver, unroll_parameters, set_param
 from pypy.rlib.jit import PARAMETERS, dont_look_inside
 from pypy.rlib.jit import promote
+from pypy.rlib import jit_hooks
 from pypy.jit.metainterp.jitprof import Profiler
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.test.support import CCompiledMixin
@@ -170,6 +171,24 @@
         assert 1024 <= bound <= 131072
         assert bound & (bound-1) == 0       # a power of two
 
+    def test_jit_get_stats(self):
+        driver = JitDriver(greens = [], reds = ['i'])
+        
+        def f():
+            i = 0
+            while i < 100000:
+                driver.jit_merge_point(i=i)
+                i += 1
+
+        def main():
+            jit_hooks.stats_set_debug(None, True)
+            f()
+            ll_times = jit_hooks.stats_get_loop_run_times(None)
+            return len(ll_times)
+
+        res = self.meta_interp(main, [])
+        assert res == 3
+        # one for loop, one for entry point and one for the prologue
 
 class TestTranslationRemoveTypePtrX86(CCompiledMixin):
     CPUClass = getcpuclass()
diff --git a/pypy/jit/backend/x86/tool/test/test_viewcode.py b/pypy/jit/backend/x86/tool/test/test_viewcode.py
--- a/pypy/jit/backend/x86/tool/test/test_viewcode.py
+++ b/pypy/jit/backend/x86/tool/test/test_viewcode.py
@@ -1,5 +1,10 @@
 from cStringIO import StringIO
 from pypy.jit.backend.x86.tool.viewcode import format_code_dump_with_labels
+from pypy.jit.backend.x86.tool.viewcode import find_objdump
+import os
+import py
+import tempfile
+from pypy.tool.udir import udir
 
 def test_format_code_dump_with_labels():
     lines = StringIO("""
@@ -53,3 +58,16 @@
     lines = format_code_dump_with_labels(0xAA00, lines, label_list=None)
     out = ''.join(lines)
     assert out.strip() == input
+
+def test_find_objdump():
+    old = os.environ['PATH']
+    os.environ['PATH'] = ''
+    py.test.raises(find_objdump)
+
+    #
+    path = udir.join('objdump')
+    print >>path, 'hello world'
+    os.environ['PATH'] = path.dirname
+    assert find_objdump() == 'objdump'
+    #
+    os.environ['PATH'] = old
diff --git a/pypy/jit/backend/x86/tool/viewcode.py b/pypy/jit/backend/x86/tool/viewcode.py
--- a/pypy/jit/backend/x86/tool/viewcode.py
+++ b/pypy/jit/backend/x86/tool/viewcode.py
@@ -8,9 +8,9 @@
     ./viewcode.py log               # also includes a pygame viewer
 """
 
-import autopath
 import new
 import operator
+import os
 import py
 import re
 import sys
@@ -36,6 +36,17 @@
 if sys.platform == "win32":
     pass   # lots more in Psyco
 
+def find_objdump():
+    exe = ('objdump', 'gobjdump')
+    path = os.environ['PATH'].split(os.pathsep)
+    for e in exe:
+        for p in path:
+            path_to = os.path.join(p, e)
+            if not os.path.exists(path_to):
+                continue
+            return e
+    raise AssertionError('(g)objdump was not found in PATH')
+
 def machine_code_dump(data, originaddr, backend_name, label_list=None):
     objdump_backend_option = {
         'x86': 'i386',
@@ -43,7 +54,8 @@
         'x86_64': 'x86-64',
         'i386': 'i386',
     }
-    objdump = ('objdump -M %(backend)s -b binary -m i386 '
+    cmd = find_objdump()
+    objdump = ('%(command)s -M %(backend)s -b binary -m i386 '
                '--disassembler-options=intel-mnemonics '
                '--adjust-vma=%(origin)d -D %(file)s')
     #
@@ -51,6 +63,7 @@
     f.write(data)
     f.close()
     p = subprocess.Popen(objdump % {
+        'command': cmd,
         'file': tmpfile,
         'origin': originaddr,
         'backend': objdump_backend_option[backend_name],
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -16,6 +16,7 @@
 
 class CallControl(object):
     virtualref_info = None     # optionally set from outside
+    has_libffi_call = False    # default value
 
     def __init__(self, cpu=None, jitdrivers_sd=[]):
         assert isinstance(jitdrivers_sd, list)   # debugging
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -45,13 +45,7 @@
     OS_UNIEQ_LENGTHOK           = 51   #
     _OS_offset_uni              = OS_UNI_CONCAT - OS_STR_CONCAT
     #
-    OS_LIBFFI_PREPARE           = 60
-    OS_LIBFFI_PUSH_ARG          = 61
     OS_LIBFFI_CALL              = 62
-    OS_LIBFFI_STRUCT_GETFIELD   = 63
-    OS_LIBFFI_STRUCT_SETFIELD   = 64
-    OS_LIBFFI_GETARRAYITEM      = 65
-    OS_LIBFFI_SETARRAYITEM      = 66
     #
     OS_LLONG_INVERT             = 69
     OS_LLONG_ADD                = 70
@@ -81,9 +75,13 @@
     OS_LLONG_U_TO_FLOAT         = 94
     #
     OS_MATH_SQRT                = 100
+    #
+    OS_RAW_MALLOC_VARSIZE       = 110
+    OS_RAW_FREE                 = 111
 
     # for debugging:
-    _OS_CANRAISE = set([OS_NONE, OS_STR2UNICODE, OS_LIBFFI_CALL])
+    _OS_CANRAISE = set([OS_NONE, OS_STR2UNICODE, OS_LIBFFI_CALL,
+                        OS_RAW_MALLOC_VARSIZE])
 
     def __new__(cls, readonly_descrs_fields, readonly_descrs_arrays,
                 write_descrs_fields, write_descrs_arrays,
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -11,6 +11,7 @@
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant, c_last_exception
 from pypy.rlib import objectmodel
 from pypy.rlib.jit import _we_are_jitted
+from pypy.rlib.rgc import lltype_is_gc
 from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass, rffi
 from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
 from pypy.translator.simplify import get_funcobj
@@ -208,6 +209,10 @@
         if op.args[0] in self.vable_array_vars:
             self.vable_array_vars[op.result]= self.vable_array_vars[op.args[0]]
 
+    def rewrite_op_cast_ptr_to_adr(self, op):
+        if lltype_is_gc(op.args[0].concretetype):
+            raise Exception("cast_ptr_to_adr for GC types unsupported")
+
     def rewrite_op_cast_pointer(self, op):
         newop = self.rewrite_op_same_as(op)
         assert newop is None
@@ -223,6 +228,9 @@
                 return [None, # hack, do the right renaming from op.args[0] to op.result
                         SpaceOperation("record_known_class", [op.args[0], const_vtable], None)]
 
+    def rewrite_op_raw_malloc_usage(self, op):
+        pass
+
     def rewrite_op_jit_record_known_class(self, op):
         return SpaceOperation("record_known_class", [op.args[0], op.args[1]], None)
 
@@ -520,9 +528,12 @@
             name += '_add_memory_pressure'
         if not track_allocation:
             name += '_no_track_allocation'
-        return self._do_builtin_call(op, name, args,
-                                     extra = (TYPE,),
-                                     extrakey = TYPE)
+        op1 = self.prepare_builtin_call(op, name, args, (TYPE,), TYPE)
+        if name == 'raw_malloc_varsize':
+            return self._handle_oopspec_call(op1, args,
+                                             EffectInfo.OS_RAW_MALLOC_VARSIZE,
+                                             EffectInfo.EF_CAN_RAISE)
+        return self.rewrite_op_direct_call(op1)
 
     def rewrite_op_malloc_varsize(self, op):
         if op.args[1].value['flavor'] == 'raw':
@@ -550,8 +561,13 @@
         name = 'raw_free'
         if not track_allocation:
             name += '_no_track_allocation'
-        return self._do_builtin_call(op, name, [op.args[0]],
-                                     extra = (STRUCT,), extrakey = STRUCT)
+        op1 = self.prepare_builtin_call(op, name, [op.args[0]], (STRUCT,),
+                                        STRUCT)
+        if name == 'raw_free':
+            return self._handle_oopspec_call(op1, [op.args[0]],
+                                             EffectInfo.OS_RAW_FREE,
+                                             EffectInfo.EF_CANNOT_RAISE)
+        return self.rewrite_op_direct_call(op1)
 
     def rewrite_op_getarrayitem(self, op):
         ARRAY = op.args[0].concretetype.TO
@@ -566,9 +582,14 @@
                                    [v_base, arrayfielddescr, arraydescr,
                                     op.args[1]], op.result)]
         # normal case follows
+        pure = ''
+        immut = ARRAY._immutable_field(None)
+        if immut:
+            pure = '_pure'
         arraydescr = self.cpu.arraydescrof(ARRAY)
         kind = getkind(op.result.concretetype)
-        return SpaceOperation('getarrayitem_%s_%s' % (ARRAY._gckind, kind[0]),
+        return SpaceOperation('getarrayitem_%s_%s%s' % (ARRAY._gckind,
+                                                        kind[0], pure),
                               [op.args[0], arraydescr, op.args[1]],
                               op.result)
 
@@ -691,6 +712,16 @@
                               [v_inst, descr, v_value],
                               None)
 
+    def rewrite_op_getsubstruct(self, op):
+        STRUCT = op.args[0].concretetype.TO
+        argname = getattr(STRUCT, '_gckind', 'gc')
+        if argname != 'raw':
+            raise Exception("%r: only supported for gckind=raw" % (op,))
+        ofs = llmemory.offsetof(STRUCT, op.args[1].value)
+        return SpaceOperation('int_add',
+                              [op.args[0], Constant(ofs, lltype.Signed)],
+                              op.result)
+
     def is_typeptr_getset(self, op):
         return (op.args[1].value == 'typeptr' and
                 op.args[0].concretetype.TO._hints.get('typeptr'))
@@ -840,6 +871,23 @@
             return SpaceOperation('setinteriorfield_gc_%s' % kind, args,
                                   op.result)
 
+    def rewrite_op_raw_store(self, op):
+        T = op.args[2].concretetype
+        kind = getkind(T)[0]
+        assert kind != 'r'
+        descr = self.cpu.arraydescrof(rffi.CArray(T))
+        return SpaceOperation('raw_store_%s' % kind,
+                              [op.args[0], op.args[1], descr, op.args[2]],
+                              None)
+
+    def rewrite_op_raw_load(self, op):
+        T = op.result.concretetype
+        kind = getkind(T)[0]
+        assert kind != 'r'
+        descr = self.cpu.arraydescrof(rffi.CArray(T))
+        return SpaceOperation('raw_load_%s' % kind,
+                              [op.args[0], op.args[1], descr], op.result)
+
     def _rewrite_equality(self, op, opname):
         arg0, arg1 = op.args
         if isinstance(arg0, Constant) and not arg0.value:
@@ -850,7 +898,7 @@
             return self._rewrite_symmetric(op)
 
     def _is_gc(self, v):
-        return getattr(getattr(v.concretetype, "TO", None), "_gckind", "?") == 'gc'
+        return lltype_is_gc(v.concretetype)
 
     def _is_rclass_instance(self, v):
         return lltype._castdepth(v.concretetype.TO, rclass.OBJECT) >= 0
@@ -1228,6 +1276,8 @@
                        ('uint_or', 'int_or'),
                        ('uint_lshift', 'int_lshift'),
                        ('uint_xor', 'int_xor'),
+
+                       ('adr_add', 'int_add'),
                        ]:
         assert _old not in locals()
         exec py.code.Source('''
@@ -1430,7 +1480,19 @@
 
     def do_fixed_newlist(self, op, args, arraydescr):
         v_length = self._get_initial_newlist_length(op, args)
-        return SpaceOperation('new_array', [arraydescr, v_length], op.result)
+        assert v_length.concretetype is lltype.Signed
+        ops = []
+        if isinstance(v_length, Constant):
+            if v_length.value >= 0:
+                v = v_length
+            else:
+                v = Constant(0, lltype.Signed)
+        else:
+            v = Variable('new_length')
+            v.concretetype = lltype.Signed
+            ops.append(SpaceOperation('int_force_ge_zero', [v_length], v))
+        ops.append(SpaceOperation('new_array', [arraydescr, v], op.result))
+        return ops
 
     def do_fixed_list_len(self, op, args, arraydescr):
         if args[0] in self.vable_array_vars:     # virtualizable array
@@ -1457,7 +1519,7 @@
                                                      'check_neg_index')
         extra = getkind(op.result.concretetype)[0]
         if pure:
-            extra = 'pure_' + extra
+            extra += '_pure'
         op = SpaceOperation('getarrayitem_gc_%s' % extra,
                             [args[0], arraydescr, v_index], op.result)
         return extraop + [op]
@@ -1666,27 +1728,10 @@
     # rlib.libffi
 
     def _handle_libffi_call(self, op, oopspec_name, args):
-        if oopspec_name == 'libffi_prepare_call':
-            oopspecindex = EffectInfo.OS_LIBFFI_PREPARE
-            extraeffect = EffectInfo.EF_CANNOT_RAISE
-        elif oopspec_name.startswith('libffi_push_'):
-            oopspecindex = EffectInfo.OS_LIBFFI_PUSH_ARG
-            extraeffect = EffectInfo.EF_CANNOT_RAISE
-        elif oopspec_name.startswith('libffi_call_'):
+        if oopspec_name == 'libffi_call':
             oopspecindex = EffectInfo.OS_LIBFFI_CALL
             extraeffect = EffectInfo.EF_RANDOM_EFFECTS
-        elif oopspec_name == 'libffi_struct_getfield':
-            oopspecindex = EffectInfo.OS_LIBFFI_STRUCT_GETFIELD
-            extraeffect = EffectInfo.EF_CANNOT_RAISE
-        elif oopspec_name == 'libffi_struct_setfield':
-            oopspecindex = EffectInfo.OS_LIBFFI_STRUCT_SETFIELD
-            extraeffect = EffectInfo.EF_CANNOT_RAISE
-        elif oopspec_name == 'libffi_array_getitem':
-            oopspecindex = EffectInfo.OS_LIBFFI_GETARRAYITEM
-            extraeffect = EffectInfo.EF_CANNOT_RAISE
-        elif oopspec_name == 'libffi_array_setitem':
-            oopspecindex = EffectInfo.OS_LIBFFI_SETARRAYITEM
-            extraeffect = EffectInfo.EF_CANNOT_RAISE
+            self.callcontrol.has_libffi_call = True
         else:
             assert False, 'unsupported oopspec: %s' % oopspec_name
         return self._handle_oopspec_call(op, args, oopspecindex, extraeffect)
diff --git a/pypy/jit/codewriter/longlong.py b/pypy/jit/codewriter/longlong.py
--- a/pypy/jit/codewriter/longlong.py
+++ b/pypy/jit/codewriter/longlong.py
@@ -9,13 +9,14 @@
 import sys
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rlib import rarithmetic, longlong2float
+from pypy.jit.backend.arm.detect import detect_hardfloat
+from pypy.rlib.objectmodel import compute_hash
 
 
 if sys.maxint > 2147483647:
     # ---------- 64-bit platform ----------
     # the type FloatStorage is just a float
 
-    from pypy.rlib.objectmodel import compute_hash
 
     is_64_bit = True
     supports_longlong = False
@@ -28,6 +29,22 @@
     is_longlong     = lambda TYPE: False
 
     # -------------------------------------
+elif detect_hardfloat():
+    # ---------- ARM 32-bit platform ----------
+    # the type FloatStorage is float
+
+    is_64_bit = False
+    supports_longlong = False
+    r_float_storage = float
+    FLOATSTORAGE = lltype.Float
+
+    getfloatstorage = lambda x: x
+    getrealfloat    = lambda x: x
+    gethash         = compute_hash
+    is_longlong     = lambda TYPE: False
+
+    # -------------------------------------
+
 else:
     # ---------- 32-bit platform ----------
     # the type FloatStorage is r_longlong, and conversion is needed
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -63,11 +63,10 @@
             contains_loop = contains_loop and not getattr(
                     func, '_jit_unroll_safe_', False)
 
-        unsupported = contains_unsupported_variable_type(graph,
+        res = see_function and not contains_unsupported_variable_type(graph,
                             self.supports_floats,
                             self.supports_longlong,
                             self.supports_singlefloats)
-        res = see_function and not unsupported
         if res and contains_loop:
             self.unsafe_loopy_graphs.add(graph)
         res = res and not contains_loop
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -431,31 +431,6 @@
     return llop.uint_mod(lltype.Unsigned, xll, yll)
 
 
-# libffi support
-# --------------
-
-def func(llfunc):
-    from pypy.rlib.libffi import Func
-    return cast_base_ptr_to_instance(Func, llfunc)
-
-def _ll_1_libffi_prepare_call(llfunc):
-    return func(llfunc)._prepare()
-
-def _ll_4_libffi_push_int(llfunc, value, ll_args, i):
-    return func(llfunc)._push_int(value, ll_args, i)
-
-def _ll_4_libffi_push_float(llfunc, value, ll_args, i):
-    return func(llfunc)._push_float(value, ll_args, i)
-
-def _ll_3_libffi_call_int(llfunc, funcsym, ll_args):
-    return func(llfunc)._do_call(funcsym, ll_args, rffi.LONG)
-
-def _ll_3_libffi_call_float(llfunc, funcsym, ll_args):
-    return func(llfunc)._do_call(funcsym, ll_args, rffi.DOUBLE)
-
-def _ll_3_libffi_call_void(llfunc, funcsym, ll_args):
-    return func(llfunc)._do_call(funcsym, ll_args, lltype.Void)
-
 # in the following calls to builtins, the JIT is allowed to look inside:
 inline_calls_to = [
     ('int_floordiv_ovf_zer', [lltype.Signed, lltype.Signed], lltype.Signed),
diff --git a/pypy/jit/codewriter/test/test_codewriter.py b/pypy/jit/codewriter/test/test_codewriter.py
--- a/pypy/jit/codewriter/test/test_codewriter.py
+++ b/pypy/jit/codewriter/test/test_codewriter.py
@@ -221,3 +221,17 @@
     assert 'setarrayitem_raw_i' in s
     assert 'getarrayitem_raw_i' in s
     assert 'residual_call_ir_v $<* fn _ll_1_raw_free__arrayPtr>' in s
+
+def test_newlist_negativ():
+    def f(n):
+        l = [0] * n
+        return len(l)
+
+    rtyper = support.annotate(f, [-1])
+    jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0])
+    cw = CodeWriter(FakeCPU(rtyper), [jitdriver_sd])
+    cw.find_all_graphs(FakePolicy())
+    cw.make_jitcodes(verbose=True)
+    s = jitdriver_sd.mainjitcode.dump()
+    assert 'int_force_ge_zero' in s
+    assert 'new_array' in s
diff --git a/pypy/jit/codewriter/test/test_jtransform.py b/pypy/jit/codewriter/test/test_jtransform.py
--- a/pypy/jit/codewriter/test/test_jtransform.py
+++ b/pypy/jit/codewriter/test/test_jtransform.py
@@ -123,6 +123,7 @@
             INT = lltype.Signed
             UNICHAR = lltype.UniChar
             FLOAT = lltype.Float
+            ARRAYPTR = rffi.CArrayPtr(lltype.Signed)
             argtypes = {
              EI.OS_MATH_SQRT:  ([FLOAT], FLOAT),
              EI.OS_STR2UNICODE:([PSTR], PUNICODE),
@@ -139,16 +140,26 @@
              EI.OS_UNIEQ_NONNULL_CHAR:   ([PUNICODE, UNICHAR], INT),
              EI.OS_UNIEQ_CHECKNULL_CHAR: ([PUNICODE, UNICHAR], INT),
              EI.OS_UNIEQ_LENGTHOK:       ([PUNICODE, PUNICODE], INT),
+             EI.OS_RAW_MALLOC_VARSIZE:   ([INT], ARRAYPTR),
+             EI.OS_RAW_FREE:             ([ARRAYPTR], lltype.Void),
             }
             argtypes = argtypes[oopspecindex]
             assert argtypes[0] == [v.concretetype for v in op.args[1:]]
             assert argtypes[1] == op.result.concretetype
             if oopspecindex == EI.OS_STR2UNICODE:
                 assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE
+            elif oopspecindex == EI.OS_RAW_MALLOC_VARSIZE:
+                assert extraeffect == EI.EF_CAN_RAISE
+            elif oopspecindex == EI.OS_RAW_FREE:
+                assert extraeffect == EI.EF_CANNOT_RAISE
             else:
                 assert extraeffect == EI.EF_ELIDABLE_CANNOT_RAISE
         return 'calldescr-%d' % oopspecindex
+
     def calldescr_canraise(self, calldescr):
+        EI = effectinfo.EffectInfo
+        if calldescr == 'calldescr-%d' % EI.OS_RAW_MALLOC_VARSIZE:
+            return True
         return False
 
 
@@ -547,10 +558,13 @@
     flags = Constant({'flavor': 'raw'}, lltype.Void)
     op = SpaceOperation('malloc_varsize', [Constant(S, lltype.Void), flags,
                                            v1], v)
-    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
     op0, op1 = tr.rewrite_operation(op)
     assert op0.opname == 'residual_call_ir_i'
     assert op0.args[0].value == 'raw_malloc_varsize' # pseudo-function as a str
+    assert (op0.args[1] == 'calldescr-%d' %
+            effectinfo.EffectInfo.OS_RAW_MALLOC_VARSIZE)
+
     assert op1.opname == '-live-'
     assert op1.args == []
 
@@ -591,21 +605,28 @@
     assert op1.args == []
 
 def test_raw_free():
-    S = lltype.Struct('dummy', ('x', lltype.Signed))
-    for flag in [True, False]:
-        flags = Constant({'flavor': 'raw', 'track_allocation': flag},
-                         lltype.Void)
-        op = SpaceOperation('free', [varoftype(lltype.Ptr(S)), flags],
-                            varoftype(lltype.Void))
-        tr = Transformer(FakeCPU(), FakeResidualCallControl())
-        op0, op1 = tr.rewrite_operation(op)
-        assert op0.opname == 'residual_call_ir_v'
-        if flag:
-            pseudo_op_name = 'raw_free'
-        else:
-            pseudo_op_name = 'raw_free_no_track_allocation'
-        assert op0.args[0].value == pseudo_op_name   # pseudo-function as a str
-        assert op1.opname == '-live-'
+    S = rffi.CArray(lltype.Signed)
+    flags = Constant({'flavor': 'raw', 'track_allocation': True},
+                     lltype.Void)
+    op = SpaceOperation('free', [varoftype(lltype.Ptr(S)), flags],
+                        varoftype(lltype.Void))
+    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
+    op0 = tr.rewrite_operation(op)
+    assert op0.opname == 'residual_call_ir_v'
+    assert op0.args[0].value == 'raw_free'
+    assert op0.args[1] == 'calldescr-%d' % effectinfo.EffectInfo.OS_RAW_FREE
+
+def test_raw_free_no_track_allocation():
+    S = rffi.CArray(lltype.Signed)
+    flags = Constant({'flavor': 'raw', 'track_allocation': False},
+                     lltype.Void)
+    op = SpaceOperation('free', [varoftype(lltype.Ptr(S)), flags],
+                        varoftype(lltype.Void))
+    tr = Transformer(FakeCPU(), FakeResidualCallControl())
+    op0, op1 = tr.rewrite_operation(op)
+    assert op0.opname == 'residual_call_ir_v'
+    assert op0.args[0].value == 'raw_free_no_track_allocation'
+    assert op1.opname == '-live-'
 
 def test_rename_on_links():
     v1 = Variable()
@@ -621,6 +642,13 @@
     assert block.exits[0].target is block2
     assert block.exits[0].args == [v1]
 
+def test_cast_ptr_to_adr():
+    t = Transformer(FakeCPU(), None)
+    v = varoftype(lltype.Ptr(lltype.Array()))
+    v2 = varoftype(llmemory.Address)
+    op1 = t.rewrite_operation(SpaceOperation('cast_ptr_to_adr', [v], v2))
+    assert op1 is None
+
 def test_int_eq():
     v1 = varoftype(lltype.Signed)
     v2 = varoftype(lltype.Signed)
@@ -830,6 +858,30 @@
     op1 = Transformer(FakeCPU()).rewrite_operation(op)
     assert not op1
 
+def test_raw_store():
+    v_storage = varoftype(llmemory.Address)
+    v_index = varoftype(lltype.Signed)
+    v_item = varoftype(lltype.Signed) # for example
+    op = SpaceOperation('raw_store', [v_storage, v_index, v_item], None)
+    op1 = Transformer(FakeCPU()).rewrite_operation(op)
+    assert op1.opname == 'raw_store_i'
+    assert op1.args[0] == v_storage
+    assert op1.args[1] == v_index
+    assert op1.args[2] == ('arraydescr', rffi.CArray(lltype.Signed))
+    assert op1.args[3] == v_item
+
+def test_raw_load():
+    v_storage = varoftype(llmemory.Address)
+    v_index = varoftype(lltype.Signed)
+    v_res = varoftype(lltype.Signed) # for example
+    op = SpaceOperation('raw_load', [v_storage, v_index], v_res)
+    op1 = Transformer(FakeCPU()).rewrite_operation(op)
+    assert op1.opname == 'raw_load_i'
+    assert op1.args[0] == v_storage
+    assert op1.args[1] == v_index
+    assert op1.args[2] == ('arraydescr', rffi.CArray(lltype.Signed))
+    assert op1.result == v_res
+
 def test_promote_1():
     v1 = varoftype(lltype.Signed)
     v2 = varoftype(lltype.Signed)
diff --git a/pypy/jit/codewriter/test/test_list.py b/pypy/jit/codewriter/test/test_list.py
--- a/pypy/jit/codewriter/test/test_list.py
+++ b/pypy/jit/codewriter/test/test_list.py
@@ -85,8 +85,11 @@
                  """new_array <ArrayDescr>, $0 -> %r0""")
     builtin_test('newlist', [Constant(5, lltype.Signed)], FIXEDLIST,
                  """new_array <ArrayDescr>, $5 -> %r0""")
+    builtin_test('newlist', [Constant(-2, lltype.Signed)], FIXEDLIST,
+                 """new_array <ArrayDescr>, $0 -> %r0""")
     builtin_test('newlist', [varoftype(lltype.Signed)], FIXEDLIST,
-                 """new_array <ArrayDescr>, %i0 -> %r0""")
+                 """int_force_ge_zero %i0 -> %i1\n"""
+                 """new_array <ArrayDescr>, %i1 -> %r0""")
     builtin_test('newlist', [Constant(5, lltype.Signed),
                              Constant(0, lltype.Signed)], FIXEDLIST,
                  """new_array <ArrayDescr>, $5 -> %r0""")
@@ -126,14 +129,14 @@
     builtin_test('list.getitem_foldable/NONNEG',
                  [varoftype(FIXEDLIST), varoftype(lltype.Signed)],
                  lltype.Signed, """
-                     getarrayitem_gc_pure_i %r0, <ArrayDescr>, %i0 -> %i1
+                     getarrayitem_gc_i_pure %r0, <ArrayDescr>, %i0 -> %i1
                  """)
     builtin_test('list.getitem_foldable/NEG',
                  [varoftype(FIXEDLIST), varoftype(lltype.Signed)],
                  lltype.Signed, """
                      -live-
                      check_neg_index %r0, <ArrayDescr>, %i0 -> %i1
-                     getarrayitem_gc_pure_i %r0, <ArrayDescr>, %i1 -> %i2
+                     getarrayitem_gc_i_pure %r0, <ArrayDescr>, %i1 -> %i2
                  """)
 
 def test_fixed_setitem():
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -477,6 +477,11 @@
     @arguments("i", "i", "i", returns="i")
     def bhimpl_int_between(a, b, c):
         return a <= b < c
+    @arguments("i", returns="i")
+    def bhimpl_int_force_ge_zero(i):
+        if i < 0:
+            return 0
+        return i
 
     @arguments("i", "i", returns="i")
     def bhimpl_uint_lt(a, b):
@@ -1124,9 +1129,9 @@
     def bhimpl_getarrayitem_gc_f(cpu, array, arraydescr, index):
         return cpu.bh_getarrayitem_gc_f(arraydescr, array, index)
 
-    bhimpl_getarrayitem_gc_pure_i = bhimpl_getarrayitem_gc_i
-    bhimpl_getarrayitem_gc_pure_r = bhimpl_getarrayitem_gc_r
-    bhimpl_getarrayitem_gc_pure_f = bhimpl_getarrayitem_gc_f
+    bhimpl_getarrayitem_gc_i_pure = bhimpl_getarrayitem_gc_i
+    bhimpl_getarrayitem_gc_r_pure = bhimpl_getarrayitem_gc_r
+    bhimpl_getarrayitem_gc_f_pure = bhimpl_getarrayitem_gc_f
 
     @arguments("cpu", "i", "d", "i", returns="i")
     def bhimpl_getarrayitem_raw_i(cpu, array, arraydescr, index):
@@ -1135,6 +1140,9 @@
     def bhimpl_getarrayitem_raw_f(cpu, array, arraydescr, index):
         return cpu.bh_getarrayitem_raw_f(arraydescr, array, index)
 
+    bhimpl_getarrayitem_raw_i_pure = bhimpl_getarrayitem_raw_i
+    bhimpl_getarrayitem_raw_f_pure = bhimpl_getarrayitem_raw_f
+
     @arguments("cpu", "r", "d", "i", "i")
     def bhimpl_setarrayitem_gc_i(cpu, array, arraydescr, index, newvalue):
         cpu.bh_setarrayitem_gc_i(arraydescr, array, index, newvalue)
@@ -1269,6 +1277,20 @@
     def bhimpl_setfield_raw_f(cpu, struct, fielddescr, newvalue):
         cpu.bh_setfield_raw_f(struct, fielddescr, newvalue)
 
+    @arguments("cpu", "i", "i", "d", "i")
+    def bhimpl_raw_store_i(cpu, addr, offset, arraydescr, newvalue):
+        cpu.bh_raw_store_i(addr, offset, arraydescr, newvalue)
+    @arguments("cpu", "i", "i", "d", "f")
+    def bhimpl_raw_store_f(cpu, addr, offset, arraydescr, newvalue):
+        cpu.bh_raw_store_f(addr, offset, arraydescr, newvalue)
+
+    @arguments("cpu", "i", "i", "d", returns="i")
+    def bhimpl_raw_load_i(cpu, addr, offset, arraydescr):
+        return cpu.bh_raw_load_i(addr, offset, arraydescr)
+    @arguments("cpu", "i", "i", "d", returns="f")
+    def bhimpl_raw_load_f(cpu, addr, offset, arraydescr):
+        return cpu.bh_raw_load_f(addr, offset, arraydescr)
+
     @arguments("r", "d", "d")
     def bhimpl_record_quasiimmut_field(struct, fielddescr, mutatefielddescr):
         pass
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -5,7 +5,7 @@
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.rlib import rstack
-from pypy.rlib.jit import JitDebugInfo
+from pypy.rlib.jit import JitDebugInfo, Counters
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -22,8 +22,7 @@
 
 def giveup():
     from pypy.jit.metainterp.pyjitpl import SwitchToBlackhole
-    from pypy.jit.metainterp.jitprof import ABORT_BRIDGE
-    raise SwitchToBlackhole(ABORT_BRIDGE)
+    raise SwitchToBlackhole(Counters.ABORT_BRIDGE)
 
 def show_procedures(metainterp_sd, procedure=None, error=None):
     # debugging
@@ -226,6 +225,8 @@
     assert isinstance(target_token, TargetToken)
     assert loop_jitcell_token.target_tokens
     loop_jitcell_token.target_tokens.append(target_token)
+    if target_token.short_preamble:
+        metainterp_sd.logger_ops.log_short_preamble([], target_token.short_preamble)
 
     loop = partial_trace
     loop.operations = loop.operations[:-1] + part.operations
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -180,6 +180,26 @@
     else:
         cpu.bh_setfield_raw_i(struct, fielddescr, itembox.getint())
 
+def do_raw_store(cpu, _, addrbox, offsetbox, valuebox, arraydescr):
+    addr = addrbox.getint()
+    offset = offsetbox.getint()
+    if arraydescr.is_array_of_pointers():
+        raise AssertionError("cannot store GC pointers in raw store")
+    elif arraydescr.is_array_of_floats():
+        cpu.bh_raw_store_f(addr, offset, arraydescr,valuebox.getfloatstorage())
+    else:
+        cpu.bh_raw_store_i(addr, offset, arraydescr, valuebox.getint())
+
+def do_raw_load(cpu, _, addrbox, offsetbox, arraydescr):
+    addr = addrbox.getint()
+    offset = offsetbox.getint()
+    if arraydescr.is_array_of_pointers():
+        raise AssertionError("cannot store GC pointers in raw store")
+    elif arraydescr.is_array_of_floats():
+        return BoxFloat(cpu.bh_raw_load_f(addr, offset, arraydescr))
+    else:
+        return BoxInt(cpu.bh_raw_load_i(addr, offset, arraydescr))
+
 def exec_new_with_vtable(cpu, clsbox):
     from pypy.jit.codewriter import heaptracker
     vtable = clsbox.getint()
@@ -277,19 +297,6 @@
 
 
 def _make_execute_list():
-    if 0:     # enable this to trace calls to do_xxx
-        def wrap(fn):
-            def myfn(*args):
-                print '<<<', fn.__name__
-                try:
-                    return fn(*args)
-                finally:
-                    print fn.__name__, '>>>'
-            return myfn
-    else:
-        def wrap(fn):
-            return fn
-    #
     execute_by_num_args = {}
     for key, value in rop.__dict__.items():
         if not key.startswith('_'):
@@ -343,7 +350,6 @@
                          rop.DEBUG_MERGE_POINT,
                          rop.JIT_DEBUG,
                          rop.SETARRAYITEM_RAW,
-                         rop.GETINTERIORFIELD_RAW,
                          rop.SETINTERIORFIELD_RAW,
                          rop.CALL_RELEASE_GIL,
                          rop.QUASIIMMUT_FIELD,
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -39,7 +39,7 @@
         # XXX fix this for oo...
         if (TYPE != llmemory.Address and
             rffi.sizeof(TYPE) > rffi.sizeof(lltype.Signed)):
-            if supports_longlong:
+            if supports_longlong and TYPE is not lltype.LongFloat:
                 assert rffi.sizeof(TYPE) == 8
                 return 'float'
             raise NotImplementedError("type %s is too large" % TYPE)
@@ -706,6 +706,7 @@
 
         self.virtual_state = None
         self.exported_state = None
+        self.short_preamble = None
 
     def repr_of_descr(self):
         return 'TargetToken(%d)' % compute_unique_id(self)
diff --git a/pypy/jit/metainterp/jitprof.py b/pypy/jit/metainterp/jitprof.py
--- a/pypy/jit/metainterp/jitprof.py
+++ b/pypy/jit/metainterp/jitprof.py
@@ -6,42 +6,11 @@
 from pypy.rlib.debug import debug_print, debug_start, debug_stop
 from pypy.rlib.debug import have_debug_prints
 from pypy.jit.metainterp.jitexc import JitException
+from pypy.rlib.jit import Counters
 
-counters="""
-TRACING
-BACKEND
-OPS
-RECORDED_OPS
-GUARDS
-OPT_OPS
-OPT_GUARDS
-OPT_FORCINGS
-ABORT_TOO_LONG
-ABORT_BRIDGE
-ABORT_BAD_LOOP
-ABORT_ESCAPE
-ABORT_FORCE_QUASIIMMUT
-NVIRTUALS
-NVHOLES
-NVREUSED
-TOTAL_COMPILED_LOOPS
-TOTAL_COMPILED_BRIDGES
-TOTAL_FREED_LOOPS
-TOTAL_FREED_BRIDGES
-"""
 
-counter_names = []
-
-def _setup():
-    names = counters.split()
-    for i, name in enumerate(names):
-        globals()[name] = i
-        counter_names.append(name)
-    global ncounters
-    ncounters = len(names)
-_setup()
-
-JITPROF_LINES = ncounters + 1 + 1 # one for TOTAL, 1 for calls, update if needed
+JITPROF_LINES = Counters.ncounters + 1 + 1
+# one for TOTAL, 1 for calls, update if needed
 _CPU_LINES = 4       # the last 4 lines are stored on the cpu
 
 class BaseProfiler(object):
@@ -71,9 +40,12 @@
     def count(self, kind, inc=1):
         pass
 
-    def count_ops(self, opnum, kind=OPS):
+    def count_ops(self, opnum, kind=Counters.OPS):
         pass
 
+    def get_counter(self, num):
+        return -1.0
+
 class Profiler(BaseProfiler):
     initialized = False
     timer = time.time
@@ -89,7 +61,7 @@
         self.starttime = self.timer()
         self.t1 = self.starttime
         self.times = [0, 0]
-        self.counters = [0] * (ncounters - _CPU_LINES)
+        self.counters = [0] * (Counters.ncounters - _CPU_LINES)
         self.calls = 0
         self.current = []
 
@@ -117,19 +89,30 @@
             return
         self.times[ev1] += self.t1 - t0
 
-    def start_tracing(self):   self._start(TRACING)
-    def end_tracing(self):     self._end  (TRACING)
+    def start_tracing(self):   self._start(Counters.TRACING)
+    def end_tracing(self):     self._end  (Counters.TRACING)
 
-    def start_backend(self):   self._start(BACKEND)
-    def end_backend(self):     self._end  (BACKEND)
+    def start_backend(self):   self._start(Counters.BACKEND)
+    def end_backend(self):     self._end  (Counters.BACKEND)
 
     def count(self, kind, inc=1):
         self.counters[kind] += inc        
-    
-    def count_ops(self, opnum, kind=OPS):
+
+    def get_counter(self, num):
+        if num == Counters.TOTAL_COMPILED_LOOPS:
+            return self.cpu.total_compiled_loops
+        elif num == Counters.TOTAL_COMPILED_BRIDGES:
+            return self.cpu.total_compiled_bridges
+        elif num == Counters.TOTAL_FREED_LOOPS:
+            return self.cpu.total_freed_loops
+        elif num == Counters.TOTAL_FREED_BRIDGES:
+            return self.cpu.total_freed_bridges
+        return self.counters[num]
+
+    def count_ops(self, opnum, kind=Counters.OPS):
         from pypy.jit.metainterp.resoperation import rop
         self.counters[kind] += 1
-        if opnum == rop.CALL and kind == RECORDED_OPS:# or opnum == rop.OOSEND:
+        if opnum == rop.CALL and kind == Counters.RECORDED_OPS:# or opnum == rop.OOSEND:
             self.calls += 1
 
     def print_stats(self):
@@ -142,26 +125,29 @@
         cnt = self.counters
         tim = self.times
         calls = self.calls
-        self._print_line_time("Tracing", cnt[TRACING],   tim[TRACING])
-        self._print_line_time("Backend", cnt[BACKEND],   tim[BACKEND])
+        self._print_line_time("Tracing", cnt[Counters.TRACING],
+                              tim[Counters.TRACING])
+        self._print_line_time("Backend", cnt[Counters.BACKEND],
+                              tim[Counters.BACKEND])
         line = "TOTAL:      \t\t%f" % (self.tk - self.starttime, )
         debug_print(line)
-        self._print_intline("ops", cnt[OPS])
-        self._print_intline("recorded ops", cnt[RECORDED_OPS])
+        self._print_intline("ops", cnt[Counters.OPS])
+        self._print_intline("recorded ops", cnt[Counters.RECORDED_OPS])
         self._print_intline("  calls", calls)
-        self._print_intline("guards", cnt[GUARDS])
-        self._print_intline("opt ops", cnt[OPT_OPS])
-        self._print_intline("opt guards", cnt[OPT_GUARDS])
-        self._print_intline("forcings", cnt[OPT_FORCINGS])
-        self._print_intline("abort: trace too long", cnt[ABORT_TOO_LONG])
-        self._print_intline("abort: compiling", cnt[ABORT_BRIDGE])
-        self._print_intline("abort: vable escape", cnt[ABORT_ESCAPE])
-        self._print_intline("abort: bad loop", cnt[ABORT_BAD_LOOP])
+        self._print_intline("guards", cnt[Counters.GUARDS])
+        self._print_intline("opt ops", cnt[Counters.OPT_OPS])
+        self._print_intline("opt guards", cnt[Counters.OPT_GUARDS])
+        self._print_intline("forcings", cnt[Counters.OPT_FORCINGS])
+        self._print_intline("abort: trace too long",
+                            cnt[Counters.ABORT_TOO_LONG])
+        self._print_intline("abort: compiling", cnt[Counters.ABORT_BRIDGE])
+        self._print_intline("abort: vable escape", cnt[Counters.ABORT_ESCAPE])
+        self._print_intline("abort: bad loop", cnt[Counters.ABORT_BAD_LOOP])
         self._print_intline("abort: force quasi-immut",
-                                               cnt[ABORT_FORCE_QUASIIMMUT])
-        self._print_intline("nvirtuals", cnt[NVIRTUALS])
-        self._print_intline("nvholes", cnt[NVHOLES])
-        self._print_intline("nvreused", cnt[NVREUSED])
+                            cnt[Counters.ABORT_FORCE_QUASIIMMUT])
+        self._print_intline("nvirtuals", cnt[Counters.NVIRTUALS])
+        self._print_intline("nvholes", cnt[Counters.NVHOLES])
+        self._print_intline("nvreused", cnt[Counters.NVREUSED])
         cpu = self.cpu
         if cpu is not None:   # for some tests
             self._print_intline("Total # of loops",
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -5,7 +5,6 @@
 from pypy.jit.metainterp.optimizeopt.heap import OptHeap
 from pypy.jit.metainterp.optimizeopt.vstring import OptString
 from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll
-from pypy.jit.metainterp.optimizeopt.fficall import OptFfiCall
 from pypy.jit.metainterp.optimizeopt.simplify import OptSimplify
 from pypy.jit.metainterp.optimizeopt.pure import OptPure
 from pypy.jit.metainterp.optimizeopt.earlyforce import OptEarlyForce
@@ -21,7 +20,6 @@
             ('earlyforce', OptEarlyForce),
             ('pure', OptPure),
             ('heap', OptHeap),
-            ('ffi', None),
             ('unroll', None)]
 # no direct instantiation of unroll
 unroll_all_opts = unrolling_iterable(ALL_OPTS)
@@ -42,11 +40,6 @@
             if opt is not None:
                 o = opt()
                 optimizations.append(o)
-            elif name == 'ffi' and config.translation.jit_ffi:
-                # we cannot put the class directly in the unrolling_iterable,
-                # because we do not want it to be seen at all (to avoid to
-                # introduce a dependency on libffi in case we do not need it)
-                optimizations.append(OptFfiCall())
 
     if ('rewrite' not in enable_opts or 'virtualize' not in enable_opts
         or 'heap' not in enable_opts or 'unroll' not in enable_opts
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
deleted file mode 100644
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ /dev/null
@@ -1,307 +0,0 @@
-from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
-from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.rlib import clibffi, libffi
-from pypy.rlib.debug import debug_print
-from pypy.rlib.libffi import Func
-from pypy.rlib.objectmodel import we_are_translated
-from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
-from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib.rarithmetic import intmask
-
-
-class FuncInfo(object):
-
-    argtypes = None
-    restype = None
-    descr = None
-    prepare_op = None
-
-    def __init__(self, funcval, cpu, prepare_op):
-        self.funcval = funcval
-        self.opargs = []
-        argtypes, restype, flags = self._get_signature(funcval)
-        self.descr = cpu.calldescrof_dynamic(argtypes, restype,
-                                             EffectInfo.MOST_GENERAL,
-                                             ffi_flags=flags)
-        # ^^^ may be None if unsupported
-        self.prepare_op = prepare_op
-        self.delayed_ops = []
-
-    def _get_signature(self, funcval):
-        """
-        given the funcval, return a tuple (argtypes, restype, flags), where
-        the actuall types are libffi.types.*
-
-        The implementation is tricky because we have three possible cases:
-
-        - translated: the easiest case, we can just cast back the pointer to
-          the original Func instance and read .argtypes, .restype and .flags
-
-        - completely untranslated: this is what we get from test_optimizeopt
-          tests. funcval contains a FakeLLObject whose _fake_class is Func,
-          and we can just get .argtypes, .restype and .flags
-
-        - partially translated: this happens when running metainterp tests:
-          funcval contains the low-level equivalent of a Func, and thus we
-          have to fish inst_argtypes and inst_restype by hand.  Note that
-          inst_argtypes is actually a low-level array, but we can use it
-          directly since the only thing we do with it is to read its items
-        """
-
-        llfunc = funcval.box.getref_base()
-        if we_are_translated():
-            func = cast_base_ptr_to_instance(Func, llfunc)
-            return func.argtypes, func.restype, func.flags
-        elif getattr(llfunc, '_fake_class', None) is Func:
-            # untranslated
-            return llfunc.argtypes, llfunc.restype, llfunc.flags
-        else:
-            # partially translated
-            # llfunc contains an opaque pointer to something like the following:
-            # <GcStruct pypy.rlib.libffi.Func { super, inst_argtypes, inst_funcptr,
-            #                                   inst_funcsym, inst_restype }>
-            #
-            # Unfortunately, we cannot use the proper lltype.cast_opaque_ptr,
-            # because we don't have the exact TYPE to cast to.  Instead, we
-            # just fish it manually :-(
-            f = llfunc._obj.container
-            return f.inst_argtypes, f.inst_restype, f.inst_flags
-
-
-class OptFfiCall(Optimization):
-
-    def setup(self):
-        self.funcinfo = None
-        if self.optimizer.loop is not None:
-            self.logops = self.optimizer.loop.logops
-        else:
-            self.logops = None
-
-    def new(self):
-        return OptFfiCall()
-
-    def begin_optimization(self, funcval, op):
-        self.rollback_maybe('begin_optimization', op)
-        self.funcinfo = FuncInfo(funcval, self.optimizer.cpu, op)
-
-    def commit_optimization(self):
-        self.funcinfo = None
-
-    def rollback_maybe(self, msg, op):
-        if self.funcinfo is None:
-            return # nothing to rollback
-        #
-        # we immediately set funcinfo to None to prevent recursion when
-        # calling emit_op
-        if self.logops is not None:
-            debug_print('rollback: ' + msg + ': ', self.logops.repr_of_resop(op))
-        funcinfo = self.funcinfo
-        self.funcinfo = None
-        self.emit_operation(funcinfo.prepare_op)
-        for op in funcinfo.opargs:
-            self.emit_operation(op)
-        for delayed_op in funcinfo.delayed_ops:
-            self.emit_operation(delayed_op)
-
-    def emit_operation(self, op):
-        # we cannot emit any operation during the optimization
-        self.rollback_maybe('invalid op', op)
-        Optimization.emit_operation(self, op)
-
-    def optimize_CALL(self, op):
-        oopspec = self._get_oopspec(op)
-        ops = [op]
-        if oopspec == EffectInfo.OS_LIBFFI_PREPARE:
-            ops = self.do_prepare_call(op)
-        elif oopspec == EffectInfo.OS_LIBFFI_PUSH_ARG:
-            ops = self.do_push_arg(op)
-        elif oopspec == EffectInfo.OS_LIBFFI_CALL:
-            ops = self.do_call(op)
-        elif (oopspec == EffectInfo.OS_LIBFFI_STRUCT_GETFIELD or
-              oopspec == EffectInfo.OS_LIBFFI_STRUCT_SETFIELD):
-            ops = self.do_struct_getsetfield(op, oopspec)
-        elif (oopspec == EffectInfo.OS_LIBFFI_GETARRAYITEM or
-            oopspec == EffectInfo.OS_LIBFFI_SETARRAYITEM):
-            ops = self.do_getsetarrayitem(op, oopspec)
-        #
-        for op in ops:
-            self.emit_operation(op)
-
-    optimize_CALL_MAY_FORCE = optimize_CALL
-
-    def optimize_FORCE_TOKEN(self, op):
-        # The handling of force_token needs a bit of explanation.
-        # The original trace which is getting optimized looks like this:
-        #    i1 = force_token()
-        #    setfield_gc(p0, i1, ...)
-        #    call_may_force(...)
-        #
-        # In theory, fficall should take care of both force_token and
-        # setfield_gc.  However, the lazy setfield optimization in heap.py
-        # delays the setfield_gc, with the effect that fficall.py sees them in
-        # this order:
-        #    i1 = force_token()
-        #    call_may_force(...)
-        #    setfield_gc(p0, i1, ...)
-        #
-        # This means that see the setfield_gc only the call_may_force, when
-        # the optimization has already been done, and thus we need to take
-        # special care just of force_token.
-        #
-        # Finally, the method force_lazy_setfield in heap.py reorders the
-        # call_may_force and the setfield_gc, so the final result we get is
-        # again force_token/setfield_gc/call_may_force.
-        #
-        # However, note that nowadays we also allow to have any setfield_gc
-        # between libffi_prepare and libffi_call, so while the comment above
-        # it's a bit superfluous, it has been left there for future reference.
-        if self.funcinfo is None:
-            self.emit_operation(op)
-        else:
-            self.funcinfo.delayed_ops.append(op)
-
-    optimize_SETFIELD_GC = optimize_FORCE_TOKEN
-
-    def do_prepare_call(self, op):
-        self.rollback_maybe('prepare call', op)
-        funcval = self._get_funcval(op)
-        if not funcval.is_constant():
-            return [op] # cannot optimize
-        self.begin_optimization(funcval, op)
-        return []
-
-    def do_push_arg(self, op):
-        funcval = self._get_funcval(op)
-        if not self.funcinfo or self.funcinfo.funcval is not funcval:
-            return [op] # cannot optimize
-        self.funcinfo.opargs.append(op)
-        return []
-
-    def do_call(self, op):
-        funcval = self._get_funcval(op)
-        funcinfo = self.funcinfo
-        if (not funcinfo or funcinfo.funcval is not funcval or
-            funcinfo.descr is None):
-            return [op] # cannot optimize
-        funcsymval = self.getvalue(op.getarg(2))
-        arglist = [funcsymval.get_key_box()]
-        for push_op in funcinfo.opargs:
-            argval = self.getvalue(push_op.getarg(2))
-            arglist.append(argval.get_key_box())
-        newop = ResOperation(rop.CALL_RELEASE_GIL, arglist, op.result,
-                             descr=funcinfo.descr)
-        self.commit_optimization()
-        ops = []
-        for delayed_op in funcinfo.delayed_ops:
-            ops.append(delayed_op)
-        ops.append(newop)
-        return ops
-
-    def do_struct_getsetfield(self, op, oopspec):
-        ffitypeval = self.getvalue(op.getarg(1))
-        addrval = self.getvalue(op.getarg(2))
-        offsetval = self.getvalue(op.getarg(3))
-        if not ffitypeval.is_constant() or not offsetval.is_constant():
-            return [op]
-        #
-        ffitypeaddr = ffitypeval.box.getaddr()
-        ffitype = llmemory.cast_adr_to_ptr(ffitypeaddr, clibffi.FFI_TYPE_P)
-        offset = offsetval.box.getint()
-        descr = self._get_field_descr(ffitype, offset)
-        #
-        arglist = [addrval.force_box(self.optimizer)]
-        if oopspec == EffectInfo.OS_LIBFFI_STRUCT_GETFIELD:
-            opnum = rop.GETFIELD_RAW
-        else:
-            opnum = rop.SETFIELD_RAW
-            newval = self.getvalue(op.getarg(4))
-            arglist.append(newval.force_box(self.optimizer))
-        #
-        newop = ResOperation(opnum, arglist, op.result, descr=descr)
-        return [newop]
-
-    def _get_field_descr(self, ffitype, offset):
-        kind = libffi.types.getkind(ffitype)
-        is_pointer = is_float = is_signed = False
-        if ffitype is libffi.types.pointer:
-            is_pointer = True
-        elif kind == 'i':
-            is_signed = True
-        elif kind == 'f' or kind == 'I' or kind == 'U':
-            # longlongs are treated as floats, see e.g. llsupport/descr.py:getDescrClass
-            is_float = True
-        else:
-            assert False, "unsupported ffitype or kind"
-        #
-        fieldsize = intmask(ffitype.c_size)
-        return self.optimizer.cpu.fielddescrof_dynamic(offset, fieldsize,
-                                                       is_pointer, is_float, is_signed)
-    
-    def do_getsetarrayitem(self, op, oopspec):
-        ffitypeval = self.getvalue(op.getarg(1))
-        widthval = self.getvalue(op.getarg(2))
-        offsetval = self.getvalue(op.getarg(5))
-        if not ffitypeval.is_constant() or not widthval.is_constant() or not offsetval.is_constant():
-            return [op]
-
-        ffitypeaddr = ffitypeval.box.getaddr()
-        ffitype = llmemory.cast_adr_to_ptr(ffitypeaddr, clibffi.FFI_TYPE_P)
-        offset = offsetval.box.getint()
-        width = widthval.box.getint()
-        descr = self._get_interior_descr(ffitype, width, offset)
-
-        arglist = [
-            self.getvalue(op.getarg(3)).force_box(self.optimizer),
-            self.getvalue(op.getarg(4)).force_box(self.optimizer),
-        ]
-        if oopspec == EffectInfo.OS_LIBFFI_GETARRAYITEM:
-            opnum = rop.GETINTERIORFIELD_RAW
-        elif oopspec == EffectInfo.OS_LIBFFI_SETARRAYITEM:
-            opnum = rop.SETINTERIORFIELD_RAW
-            arglist.append(self.getvalue(op.getarg(6)).force_box(self.optimizer))
-        else:
-            assert False
-        return [
-            ResOperation(opnum, arglist, op.result, descr=descr),
-        ]
-
-    def _get_interior_descr(self, ffitype, width, offset):
-        kind = libffi.types.getkind(ffitype)
-        is_pointer = is_float = is_signed = False
-        if ffitype is libffi.types.pointer:
-            is_pointer = True
-        elif kind == 'i':
-            is_signed = True
-        elif kind == 'f' or kind == 'I' or kind == 'U':
-            # longlongs are treated as floats, see
-            # e.g. llsupport/descr.py:getDescrClass
-            is_float = True
-        elif kind == 'u' or kind == 's':
-            # they're all False
-            pass
-        else:
-            raise NotImplementedError("unsupported ffitype or kind: %s" % kind)
-        #
-        fieldsize = rffi.getintfield(ffitype, 'c_size')
-        return self.optimizer.cpu.interiorfielddescrof_dynamic(
-            offset, width, fieldsize, is_pointer, is_float, is_signed
-        )
-
-
-    def propagate_forward(self, op):
-        if self.logops is not None:
-            debug_print(self.logops.repr_of_resop(op))
-        dispatch_opt(self, op)
-
-    def _get_oopspec(self, op):
-        effectinfo = op.getdescr().get_extra_info()
-        return effectinfo.oopspecindex
-
-    def _get_funcval(self, op):
-        return self.getvalue(op.getarg(1))
-
-dispatch_opt = make_dispatcher_method(OptFfiCall, 'optimize_',
-        default=OptFfiCall.emit_operation)
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -1,7 +1,7 @@
 import os
 
 from pypy.jit.metainterp.jitexc import JitException
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, MODE_ARRAY
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, MODE_ARRAY, LEVEL_KNOWNCLASS
 from pypy.jit.metainterp.history import ConstInt, Const
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.resoperation import rop, ResOperation
@@ -128,8 +128,12 @@
             op = self._cached_fields_getfield_op[structvalue]
             if not op:
                 continue
-            if optimizer.getvalue(op.getarg(0)) in optimizer.opaque_pointers:
-                continue
+            value = optimizer.getvalue(op.getarg(0))
+            if value in optimizer.opaque_pointers:
+                if value.level < LEVEL_KNOWNCLASS:
+                    continue
+                if op.getopnum() != rop.SETFIELD_GC and op.getopnum() != rop.GETFIELD_GC:
+                    continue
             if structvalue in self._cached_fields:
                 if op.getopnum() == rop.SETFIELD_GC:
                     result = op.getarg(1)
@@ -251,6 +255,7 @@
             opnum == rop.SETARRAYITEM_GC or      # handled specially
             opnum == rop.SETARRAYITEM_RAW or     # no effect on GC struct
             opnum == rop.SETINTERIORFIELD_RAW or # no effect on GC struct
+            opnum == rop.RAW_STORE or            # no effect on GC struct
             opnum == rop.STRSETITEM or           # no effect on GC struct/array
             opnum == rop.UNICODESETITEM or       # no effect on GC struct/array
             opnum == rop.DEBUG_MERGE_POINT or    # no effect whatsoever
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -401,7 +401,7 @@
             o.turned_constant(value)
 
     def forget_numberings(self, virtualbox):
-        self.metainterp_sd.profiler.count(jitprof.OPT_FORCINGS)
+        self.metainterp_sd.profiler.count(jitprof.Counters.OPT_FORCINGS)
         self.resumedata_memo.forget_numberings(virtualbox)
 
     def getinterned(self, box):
@@ -535,9 +535,9 @@
             else:
                 self.ensure_imported(value)
                 op.setarg(i, value.force_box(self))
-        self.metainterp_sd.profiler.count(jitprof.OPT_OPS)
+        self.metainterp_sd.profiler.count(jitprof.Counters.OPT_OPS)
         if op.is_guard():
-            self.metainterp_sd.profiler.count(jitprof.OPT_GUARDS)
+            self.metainterp_sd.profiler.count(jitprof.Counters.OPT_GUARDS)
             if self.replaces_guard and op in self.replaces_guard:
                 self.replace_op(self.replaces_guard[op], op)
                 del self.replaces_guard[op]
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -241,6 +241,16 @@
             # guard_nonnull_class on this value, which is rather silly.
             # replace the original guard with a guard_value
             old_guard_op = value.last_guard
+            if old_guard_op.getopnum() != rop.GUARD_NONNULL:
+                # This is only safe if the class of the guard_value matches the
+                # class of the guard_*_class, otherwise the intermediate ops might
+                # be executed with wrong classes.
+                previous_classbox = value.get_constant_class(self.optimizer.cpu)            
+                expected_classbox = self.optimizer.cpu.ts.cls_of_box(op.getarg(1))
+                assert previous_classbox is not None
+                assert expected_classbox is not None
+                if not previous_classbox.same_constant(expected_classbox):
+                    raise InvalidLoop('A GUARD_VALUE was proven to always fail')
             op = old_guard_op.copy_and_change(rop.GUARD_VALUE,
                                       args = [old_guard_op.getarg(0), op.getarg(1)])
             self.optimizer.replaces_guard[op] = old_guard_op
@@ -251,6 +261,8 @@
             assert isinstance(descr, compile.ResumeGuardDescr)
             descr.guard_opnum = rop.GUARD_VALUE
             descr.make_a_counter_per_value(op)
+            # to be safe
+            value.last_guard = None
         constbox = op.getarg(1)
         assert isinstance(constbox, Const)
         self.optimize_guard(op, constbox)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -431,7 +431,53 @@
         jump(i55, i81)
         """
         self.optimize_loop(ops, expected)
-        
+
+    def test_boxed_opaque_unknown_class(self):
+        ops = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        i3 = getfield_gc(p2, descr=otherdescr)
+        label(p1)
+        i4 = getfield_gc(p1, descr=otherdescr)
+        label(p1)
+        p5 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p5)        
+        i6 = getfield_gc(p5, descr=otherdescr)
+        i7 = call(i6, descr=nonwritedescr)
+        """
+        expected = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        i3 = getfield_gc(p2, descr=otherdescr)
+        label(p1)
+        i4 = getfield_gc(p1, descr=otherdescr)
+        label(p1)
+        p5 = getfield_gc(p1, descr=nextdescr) 
+        i6 = getfield_gc(p5, descr=otherdescr)
+        i7 = call(i6, descr=nonwritedescr)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_opaque_pointer_fails_to_close_loop(self):
+        ops = """
+        [p1, p11]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2, ConstClass(node_vtable)) []
+        mark_opaque_ptr(p2)        
+        i3 = getfield_gc(p2, descr=otherdescr)
+        label(p1, p11)
+        p12 = getfield_gc(p1, descr=nextdescr) 
+        i13 = getfield_gc(p2, descr=otherdescr)
+        i14 = call(i13, descr=nonwritedescr)        
+        jump(p11, p1)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+            
+
+
 class OptRenameStrlen(Optimization):
     def propagate_forward(self, op):
         dispatch_opt(self, op)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
deleted file mode 100644
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
+++ /dev/null
@@ -1,315 +0,0 @@
-from pypy.rpython.lltypesystem import llmemory
-from pypy.rlib.libffi import Func, types
-from pypy.jit.metainterp.history import AbstractDescr
-from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import BaseTestBasic
-from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import LLtypeMixin
-
-class MyCallDescr(AbstractDescr):
-    """
-    Fake calldescr to be used inside the tests.
-
-    The particularity is that it provides an __eq__ method, so that it
-    comparses by value by comparing the arg_types and typeinfo fields, so you
-    can check that the signature of a call is really what you want.
-    """
-
-    def __init__(self, arg_types, typeinfo, flags):
-        self.arg_types = arg_types
-        self.typeinfo = typeinfo   # return type
-        self.flags = flags
-
-    def __eq__(self, other):
-        return (self.arg_types == other.arg_types and
-                self.typeinfo == other.typeinfo and
-                self.flags == other.get_ffi_flags())
-
-class FakeLLObject(object):
-
-    def __init__(self, **kwds):
-        self.__dict__.update(kwds)
-        self._TYPE = llmemory.GCREF
-
-    def _identityhash(self):
-        return id(self)
-
-
-class TestFfiCall(BaseTestBasic, LLtypeMixin):
-
-    enable_opts = "intbounds:rewrite:virtualize:string:pure:earlyforce:heap:ffi"
-
-    class namespace:
-        cpu = LLtypeMixin.cpu
-        FUNC = LLtypeMixin.FUNC
-        vable_token_descr = LLtypeMixin.valuedescr
-        valuedescr = LLtypeMixin.valuedescr
-
-        int_float__int_42 = MyCallDescr('if', 'i', 42)
-        int_float__int_43 = MyCallDescr('if', 'i', 43)
-        funcptr = FakeLLObject()
-        func = FakeLLObject(_fake_class=Func,
-                            argtypes=[types.sint, types.double],
-                            restype=types.sint,
-                            flags=42)
-        func2 = FakeLLObject(_fake_class=Func,
-                             argtypes=[types.sint, types.double],
-                             restype=types.sint,
-                             flags=43)
-        #
-        ffi_slong = types.slong
-        dyn_123_field = cpu.fielddescrof_dynamic(offset=123,
-                                                 fieldsize=types.slong.c_size,
-                                                 is_pointer=False,
-                                                 is_float=False,
-                                                 is_signed=True)
-        #
-        def calldescr(cpu, FUNC, oopspecindex, extraeffect=None):
-            if extraeffect == EffectInfo.EF_RANDOM_EFFECTS:
-                f = None   # means "can force all" really
-            else:
-                f = []
-            einfo = EffectInfo(f, f, f, f, oopspecindex=oopspecindex,
-                               extraeffect=extraeffect)
-            return cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT, einfo)
-        #
-        libffi_prepare =  calldescr(cpu, FUNC, EffectInfo.OS_LIBFFI_PREPARE)
-        libffi_push_arg = calldescr(cpu, FUNC, EffectInfo.OS_LIBFFI_PUSH_ARG)
-        libffi_call =     calldescr(cpu, FUNC, EffectInfo.OS_LIBFFI_CALL,
-                                    EffectInfo.EF_RANDOM_EFFECTS)
-        libffi_struct_getfield = calldescr(cpu, FUNC, EffectInfo.OS_LIBFFI_STRUCT_GETFIELD)
-        libffi_struct_setfield = calldescr(cpu, FUNC, EffectInfo.OS_LIBFFI_STRUCT_SETFIELD)
-    
-    namespace = namespace.__dict__
-
-    # ----------------------------------------------------------------------
-    # this group of tests is the most important, as they represent the "real"
-    # cases you actually get when using rlib.libffi
-    
-    def test_ffi_call_opt(self):
-        ops = """
-        [i0, f1]
-        call(0, ConstPtr(func),                       descr=libffi_prepare)
-        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
-        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, f1)
-        """
-        expected = """
-        [i0, f1]
-        i3 = call_release_gil(12345, i0, f1, descr=int_float__int_42)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, f1)
-        """
-        loop = self.optimize_loop(ops, expected)
-
-    def test_ffi_call_nonconst(self):
-        ops = """
-        [i0, f1, p2]
-        call(0, p2,                       descr=libffi_prepare)
-        call(0, p2, i0,                   descr=libffi_push_arg)
-        call(0, p2, f1,                   descr=libffi_push_arg)
-        i3 = call_may_force(0, p2, 12345, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, f1, p2)
-        """
-        expected = ops
-        loop = self.optimize_loop(ops, expected)
-
-    def test_handle_virtualizables(self):
-        # this test needs an explanation to understand what goes on: see the
-        # comment in optimize_FORCE_TOKEN
-        ops = """
-        [i0, f1, p2]
-        call(0, ConstPtr(func),                       descr=libffi_prepare)
-        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
-        i4 = force_token()
-        setfield_gc(p2, i4, descr=vable_token_descr)
-        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
-        guard_not_forced() [p2]
-        guard_no_exception() [p2]
-        jump(i3, f1, p2)
-        """
-        expected = """
-        [i0, f1, p2]
-        i4 = force_token()
-        setfield_gc(p2, i4, descr=vable_token_descr)
-        i3 = call_release_gil(12345, i0, f1, descr=int_float__int_42)
-        guard_not_forced() [p2]
-        guard_no_exception() [p2]
-        jump(i3, f1, p2)
-        """
-        loop = self.optimize_loop(ops, expected)
-
-    # ----------------------------------------------------------------------
-    # in pratice, the situations described in these tests should never happen,
-    # but we still want to ensure correctness
-
-    def test_rollback_if_op_in_between(self):
-        ops = """
-        [i0, f1]
-        call(0, ConstPtr(func),                       descr=libffi_prepare)
-        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
-        i1 = int_add(i0, 1)
-        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
-        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, f1)
-        """
-        expected = ops
-        loop = self.optimize_loop(ops, expected)
-
-    def test_rollback_multiple_calls(self):
-        ops = """
-        [i0, i2, f1]
-        call(0, ConstPtr(func),                        descr=libffi_prepare)
-        call(0, ConstPtr(func),  i0,                   descr=libffi_push_arg)
-        #
-        # this is the culprit!
-        call(0, ConstPtr(func2),                       descr=libffi_prepare)
-        #
-        call(0, ConstPtr(func),  f1,                   descr=libffi_push_arg)
-        i3 = call_may_force(0, ConstPtr(func),  12345, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        call(0, ConstPtr(func2), i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func2), f1,                   descr=libffi_push_arg)
-        i4 = call_may_force(0, ConstPtr(func2), 67890, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, i4, f1)
-        """
-        expected = ops
-        loop = self.optimize_loop(ops, expected)
-
-    def test_rollback_multiple_prepare(self):
-        ops = """
-        [i0, i2, f1]
-        call(0, ConstPtr(func),                        descr=libffi_prepare)
-        #
-        # this is the culprit!
-        call(0, ConstPtr(func2),                       descr=libffi_prepare)
-        #
-        call(0, ConstPtr(func),  i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func),  f1,                   descr=libffi_push_arg)
-        i3 = call_may_force(0, ConstPtr(func),  12345, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        call(0, ConstPtr(func2), i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func2), f1,                   descr=libffi_push_arg)
-        i4 = call_may_force(0, ConstPtr(func2), 67890, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, i4, f1)
-        """
-        expected = ops
-        loop = self.optimize_loop(ops, expected)
-
-    def test_optimize_nested_call(self):
-        ops = """
-        [i0, i2, f1]
-        call(0, ConstPtr(func),                        descr=libffi_prepare)
-        #
-        # this "nested" call is nicely optimized
-        call(0, ConstPtr(func2),                       descr=libffi_prepare)
-        call(0, ConstPtr(func2), i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func2), f1,                   descr=libffi_push_arg)
-        i4 = call_may_force(0, ConstPtr(func2), 67890, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        #
-        call(0, ConstPtr(func),  i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func),  f1,                   descr=libffi_push_arg)
-        i3 = call_may_force(0, ConstPtr(func),  12345, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, i4, f1)
-        """
-        expected = """
-        [i0, i2, f1]
-        call(0, ConstPtr(func),                        descr=libffi_prepare)
-        #
-        # this "nested" call is nicely optimized
-        i4 = call_release_gil(67890, i0, f1, descr=int_float__int_43)
-        guard_not_forced() []
-        guard_no_exception() []
-        #
-        call(0, ConstPtr(func),  i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func),  f1,                   descr=libffi_push_arg)
-        i3 = call_may_force(0, ConstPtr(func),  12345, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, i4, f1)
-        """
-        loop = self.optimize_loop(ops, expected)
-
-    def test_rollback_force_token(self):
-        ops = """
-        [i0, f1, p2]
-        call(0, ConstPtr(func),                       descr=libffi_prepare)
-        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
-        i4 = force_token()
-        i5 = int_add(i0, 1) # culprit!
-        setfield_gc(p2, i4, descr=vable_token_descr)
-        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
-        guard_not_forced() [p2]
-        guard_no_exception() [p2]
-        jump(i3, f1, p2)
-        """
-        expected = ops
-        loop = self.optimize_loop(ops, expected)
-
-    def test_allow_setfields_in_between(self):
-        ops = """
-        [i0, f1, p2]
-        call(0, ConstPtr(func),                       descr=libffi_prepare)
-        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
-        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
-        setfield_gc(p2, i0,                           descr=valuedescr)
-        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, f1, p2)
-        """
-        expected = """
-        [i0, f1, p2]
-        setfield_gc(p2, i0, descr=valuedescr)
-        i3 = call_release_gil(12345, i0, f1, descr=int_float__int_42)
-        guard_not_forced() []
-        guard_no_exception() []
-        jump(i3, f1, p2)
-        """
-        loop = self.optimize_loop(ops, expected)
-
-    def test_ffi_struct_fields(self):
-        ops = """
-        [i0]
-        i1 = call(0, ConstClass(ffi_slong), i0, 123, descr=libffi_struct_getfield)
-        i2 = int_add(i1, 1)
-        call(0, ConstClass(ffi_slong), i0, 123, i2, descr=libffi_struct_setfield)
-        jump(i1)
-        """
-        expected = """
-        [i0]
-        i1 = getfield_raw(i0, descr=dyn_123_field)
-        i2 = int_add(i1, 1)
-        setfield_raw(i0, i2, descr=dyn_123_field)
-        jump(i1)
-        """
-        loop = self.optimize_loop(ops, expected)
-
-    def test_ffi_struct_fields_nonconst(self):
-        ops = """
-        [i0, i1]
-        i2 = call(0, ConstClass(ffi_slong), i0, i1,  descr=libffi_struct_getfield)
-        i3 = call(0, i1                   , i0, 123, descr=libffi_struct_getfield)
-        jump(i1)
-        """
-        expected = ops
-        loop = self.optimize_loop(ops, expected)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -41,14 +41,6 @@
     #
     chain, _ = build_opt_chain(metainterp_sd, "aaa:bbb")
     check(chain, ["OptSimplify"])
-    #
-    chain, _ = build_opt_chain(metainterp_sd, "ffi")
-    check(chain, ["OptFfiCall", "OptSimplify"])
-    #
-    metainterp_sd.config = get_pypy_config(translating=True)
-    assert not metainterp_sd.config.translation.jit_ffi
-    chain, _ = build_opt_chain(metainterp_sd, "ffi")
-    check(chain, ["OptSimplify"])
 
 
 # ____________________________________________________________
@@ -7862,6 +7854,84 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_only_strengthen_guard_if_class_matches(self):
+        ops = """
+        [p1]
+        guard_class(p1, ConstClass(node_vtable2)) []
+        guard_value(p1, ConstPtr(myptr)) []
+        jump(p1)
+        """
+        self.raises(InvalidLoop, self.optimize_loop,
+                       ops, ops)
+
+    def test_licm_boxed_opaque_getitem(self):
+        ops = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1)
+        """
+        expected = """
+        [p1, i3]
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1, i3)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_licm_boxed_opaque_getitem_unknown_class(self):
+        ops = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1)
+        """
+        expected = """
+        [p1, p2]
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1, p2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_licm_unboxed_opaque_getitem(self):
+        ops = """
+        [p2]
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p2)
+        """
+        expected = """
+        [p1, i3]
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1, i3)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_licm_unboxed_opaque_getitem_unknown_class(self):
+        ops = """
+        [p2]
+        mark_opaque_ptr(p2)        
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p2)
+        """
+        expected = """
+        [p2]
+        i3 = getfield_gc(p2, descr=otherdescr) 
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p2)
+        """
+        self.optimize_loop(ops, expected)
+
+
+
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
     pass
 
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_util.py b/pypy/jit/metainterp/optimizeopt/test/test_util.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_util.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_util.py
@@ -346,7 +346,6 @@
         self.options = Fake()
         self.globaldata = Fake()
         self.config = get_pypy_config(translating=True)
-        self.config.translation.jit_ffi = True
 
     class logger_noopt:
         @classmethod
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -120,9 +120,9 @@
                 limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
                 if cell_token.retraced_count < limit:
                     cell_token.retraced_count += 1
-                    #debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
+                    debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
                 else:
-                    #debug_print("Retrace count reached, jumping to preamble")
+                    debug_print("Retrace count reached, jumping to preamble")
                     assert cell_token.target_tokens[0].virtual_state is None
                     jumpop.setdescr(cell_token.target_tokens[0])
                     self.optimizer.send_extra_operation(jumpop)
@@ -341,6 +341,12 @@
             op = self.short[i]
             newop = self.short_inliner.inline_op(op)
             self.optimizer.send_extra_operation(newop)
+            if op.result in self.short_boxes.assumed_classes:
+                classbox = self.getvalue(newop.result).get_constant_class(self.optimizer.cpu)
+                assumed_classbox = self.short_boxes.assumed_classes[op.result]
+                if not classbox or not classbox.same_constant(assumed_classbox):
+                    raise InvalidLoop('Class of opaque pointer needed in short ' +
+                                      'preamble unknown at end of loop')
             i += 1
 
         # Import boxes produced in the preamble but used in the loop
@@ -432,9 +438,13 @@
                 newargs[i] = a.clonebox()
                 boxmap[a] = newargs[i]
         inliner = Inliner(short_inputargs, newargs)
+        target_token.assumed_classes = {}
         for i in range(len(short)):
-            short[i] = inliner.inline_op(short[i])
-
+            op = short[i]
+            newop = inliner.inline_op(op)
+            if op.result and op.result in self.short_boxes.assumed_classes:
+                target_token.assumed_classes[newop.result] = self.short_boxes.assumed_classes[op.result]
+            short[i] = newop
         target_token.resume_at_jump_descr = target_token.resume_at_jump_descr.clone_if_mutable()
         inliner.inline_descr_inplace(target_token.resume_at_jump_descr)
 
@@ -588,6 +598,12 @@
                     for shop in target.short_preamble[1:]:
                         newop = inliner.inline_op(shop)
                         self.optimizer.send_extra_operation(newop)
+                        if shop.result in target.assumed_classes:
+                            classbox = self.getvalue(newop.result).get_constant_class(self.optimizer.cpu)
+                            if not classbox or not classbox.same_constant(target.assumed_classes[shop.result]):
+                                raise InvalidLoop('The class of an opaque pointer at the end ' +
+                                                  'of the bridge does not mach the class ' + 
+                                                  'it has at the start of the target loop')
                 except InvalidLoop:
                     #debug_print("Inlining failed unexpectedly",
                     #            "jumping to preamble instead")
diff --git a/pypy/jit/metainterp/optimizeopt/virtualstate.py b/pypy/jit/metainterp/optimizeopt/virtualstate.py
--- a/pypy/jit/metainterp/optimizeopt/virtualstate.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualstate.py
@@ -288,7 +288,8 @@
 
 
 class NotVirtualStateInfo(AbstractVirtualStateInfo):
-    def __init__(self, value):
+    def __init__(self, value, is_opaque=False):
+        self.is_opaque = is_opaque
         self.known_class = value.known_class
         self.level = value.level
         if value.intbound is None:
@@ -357,6 +358,9 @@
         if self.lenbound or other.lenbound:
             raise InvalidLoop('The array length bounds does not match.')
 
+        if self.is_opaque:
+            raise InvalidLoop('Generating guards for opaque pointers is not safe')
+
         if self.level == LEVEL_KNOWNCLASS and \
            box.nonnull() and \
            self.known_class.same_constant(cpu.ts.cls_of_box(box)):
@@ -560,7 +564,8 @@
         return VirtualState([self.state(box) for box in jump_args])
 
     def make_not_virtual(self, value):
-        return NotVirtualStateInfo(value)
+        is_opaque = value in self.optimizer.opaque_pointers
+        return NotVirtualStateInfo(value, is_opaque)
 
     def make_virtual(self, known_class, fielddescrs):
         return VirtualStateInfo(known_class, fielddescrs)
@@ -585,6 +590,7 @@
         self.rename = {}
         self.optimizer = optimizer
         self.availible_boxes = availible_boxes
+        self.assumed_classes = {}
 
         if surviving_boxes is not None:
             for box in surviving_boxes:
@@ -678,6 +684,12 @@
             raise BoxNotProducable
 
     def add_potential(self, op, synthetic=False):
+        if op.result and op.result in self.optimizer.values:
+            value = self.optimizer.values[op.result]
+            if value in self.optimizer.opaque_pointers:
+                classbox = value.get_constant_class(self.optimizer.cpu)
+                if classbox:
+                    self.assumed_classes[op.result] = classbox
         if op.result not in self.potential_ops:
             self.potential_ops[op.result] = op
         else:
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -13,9 +13,7 @@
 from pypy.jit.metainterp import executor
 from pypy.jit.metainterp.logger import Logger
 from pypy.jit.metainterp.jitprof import EmptyProfiler
-from pypy.jit.metainterp.jitprof import GUARDS, RECORDED_OPS, ABORT_ESCAPE
-from pypy.jit.metainterp.jitprof import ABORT_TOO_LONG, ABORT_BRIDGE, \
-                                        ABORT_FORCE_QUASIIMMUT, ABORT_BAD_LOOP
+from pypy.rlib.jit import Counters
 from pypy.jit.metainterp.jitexc import JitException, get_llexception
 from pypy.jit.metainterp.heapcache import HeapCache
 from pypy.rlib.objectmodel import specialize
@@ -224,7 +222,7 @@
                     'float_neg', 'float_abs',
                     'cast_ptr_to_int', 'cast_int_to_ptr',
                     'convert_float_bytes_to_longlong',
-                    'convert_longlong_bytes_to_float',
+                    'convert_longlong_bytes_to_float', 'int_force_ge_zero',
                     ]:
         exec py.code.Source('''
             @arguments("box")
@@ -453,12 +451,27 @@
     opimpl_getarrayitem_raw_f = _opimpl_getarrayitem_raw_any
 
     @arguments("box", "descr", "box")
+    def _opimpl_getarrayitem_raw_pure_any(self, arraybox,arraydescr, indexbox):
+        return self.execute_with_descr(rop.GETARRAYITEM_RAW_PURE,
+                                       arraydescr, arraybox, indexbox)
+
+    opimpl_getarrayitem_raw_i_pure = _opimpl_getarrayitem_raw_pure_any
+    opimpl_getarrayitem_raw_f_pure = _opimpl_getarrayitem_raw_pure_any
+
+    @arguments("box", "descr", "box")
     def _opimpl_getarrayitem_gc_pure_any(self, arraybox, arraydescr, indexbox):
+        if isinstance(arraybox, ConstPtr) and isinstance(indexbox, ConstInt):
+            # if the arguments are directly constants, bypass the heapcache
+            # completely
+            resbox = executor.execute(self.metainterp.cpu, self.metainterp,
+                                      rop.GETARRAYITEM_GC_PURE, arraydescr,
+                                      arraybox, indexbox)
+            return resbox.constbox()
         return self._do_getarrayitem_gc_any(rop.GETARRAYITEM_GC_PURE, arraybox, arraydescr, indexbox)
 
-    opimpl_getarrayitem_gc_pure_i = _opimpl_getarrayitem_gc_pure_any
-    opimpl_getarrayitem_gc_pure_r = _opimpl_getarrayitem_gc_pure_any
-    opimpl_getarrayitem_gc_pure_f = _opimpl_getarrayitem_gc_pure_any
+    opimpl_getarrayitem_gc_i_pure = _opimpl_getarrayitem_gc_pure_any
+    opimpl_getarrayitem_gc_r_pure = _opimpl_getarrayitem_gc_pure_any
+    opimpl_getarrayitem_gc_f_pure = _opimpl_getarrayitem_gc_pure_any
 
     @arguments("box", "descr", "box", "box")
     def _opimpl_setarrayitem_gc_any(self, arraybox, arraydescr,
@@ -565,6 +578,11 @@
 
     @arguments("box", "descr")
     def _opimpl_getfield_gc_pure_any(self, box, fielddescr):
+        if isinstance(box, ConstPtr):
+            # if 'box' is directly a ConstPtr, bypass the heapcache completely
+            resbox = executor.execute(self.metainterp.cpu, self.metainterp,
+                                      rop.GETFIELD_GC_PURE, fielddescr, box)
+            return resbox.constbox()
         return self._opimpl_getfield_gc_any_pureornot(
                 rop.GETFIELD_GC_PURE, box, fielddescr)
     opimpl_getfield_gc_i_pure = _opimpl_getfield_gc_pure_any
@@ -649,6 +667,20 @@
     opimpl_setfield_raw_r = _opimpl_setfield_raw_any
     opimpl_setfield_raw_f = _opimpl_setfield_raw_any
 
+    @arguments("box", "box", "descr", "box")
+    def _opimpl_raw_store(self, addrbox, offsetbox, arraydescr, valuebox):
+        self.execute_with_descr(rop.RAW_STORE, arraydescr,
+                                addrbox, offsetbox, valuebox)
+    opimpl_raw_store_i = _opimpl_raw_store
+    opimpl_raw_store_f = _opimpl_raw_store
+
+    @arguments("box", "box", "descr")
+    def _opimpl_raw_load(self, addrbox, offsetbox, arraydescr):
+        return self.execute_with_descr(rop.RAW_LOAD, arraydescr,
+                                       addrbox, offsetbox)
+    opimpl_raw_load_i = _opimpl_raw_load
+    opimpl_raw_load_f = _opimpl_raw_load
+
     @arguments("box", "descr", "descr", "orgpc")
     def opimpl_record_quasiimmut_field(self, box, fielddescr,
                                        mutatefielddescr, orgpc):
@@ -675,7 +707,7 @@
             from pypy.jit.metainterp.quasiimmut import do_force_quasi_immutable
             do_force_quasi_immutable(self.metainterp.cpu, box.getref_base(),
                                      mutatefielddescr)
-            raise SwitchToBlackhole(ABORT_FORCE_QUASIIMMUT)
+            raise SwitchToBlackhole(Counters.ABORT_FORCE_QUASIIMMUT)
         self.generate_guard(rop.GUARD_ISNULL, mutatebox, resumepc=orgpc)
 
     def _nonstandard_virtualizable(self, pc, box):
@@ -1255,7 +1287,7 @@
         guard_op = metainterp.history.record(opnum, moreargs, None,
                                              descr=resumedescr)
         self.capture_resumedata(resumedescr, resumepc)
-        self.metainterp.staticdata.profiler.count_ops(opnum, GUARDS)
+        self.metainterp.staticdata.profiler.count_ops(opnum, Counters.GUARDS)
         # count
         metainterp.attach_debug_info(guard_op)
         return guard_op
@@ -1370,6 +1402,8 @@
             if vablebox is not None:
                 self.metainterp.history.record(rop.KEEPALIVE, [vablebox], None)
             self.metainterp.handle_possible_exception()
+            if effectinfo.oopspecindex == effectinfo.OS_LIBFFI_CALL:
+                self.metainterp.direct_libffi_call()
             return resbox
         else:
             effect = effectinfo.extraeffect
@@ -1464,6 +1498,7 @@
         self.jitdrivers_sd = codewriter.callcontrol.jitdrivers_sd
         self.virtualref_info = codewriter.callcontrol.virtualref_info
         self.callinfocollection = codewriter.callcontrol.callinfocollection
+        self.has_libffi_call = codewriter.callcontrol.has_libffi_call
         #
         # store this information for fastpath of call_assembler
         # (only the paths that can actually be taken)
@@ -1776,7 +1811,7 @@
             return resbox.constbox()
         # record the operation
         profiler = self.staticdata.profiler
-        profiler.count_ops(opnum, RECORDED_OPS)
+        profiler.count_ops(opnum, Counters.RECORDED_OPS)
         self.heapcache.invalidate_caches(opnum, descr, argboxes)
         op = self.history.record(opnum, argboxes, resbox, descr)
         self.attach_debug_info(op)
@@ -1837,7 +1872,7 @@
             if greenkey_of_huge_function is not None:
                 warmrunnerstate.disable_noninlinable_function(
                     greenkey_of_huge_function)
-            raise SwitchToBlackhole(ABORT_TOO_LONG)
+            raise SwitchToBlackhole(Counters.ABORT_TOO_LONG)
 
     def _interpret(self):
         # Execute the frames forward until we raise a DoneWithThisFrame,
@@ -1921,7 +1956,7 @@
         try:
             self.prepare_resume_from_failure(key.guard_opnum, dont_change_position)
             if self.resumekey_original_loop_token is None:   # very rare case
-                raise SwitchToBlackhole(ABORT_BRIDGE)
+                raise SwitchToBlackhole(Counters.ABORT_BRIDGE)
             self.interpret()
         except SwitchToBlackhole, stb:
             self.run_blackhole_interp_to_cancel_tracing(stb)
@@ -1996,7 +2031,7 @@
                 # raises in case it works -- which is the common case
                 if self.partial_trace:
                     if  start != self.retracing_from:
-                        raise SwitchToBlackhole(ABORT_BAD_LOOP) # For now
+                        raise SwitchToBlackhole(Counters.ABORT_BAD_LOOP) # For now
                 self.compile_loop(original_boxes, live_arg_boxes, start, resumedescr)
                 # creation of the loop was cancelled!
                 self.cancel_count += 1
@@ -2005,7 +2040,7 @@
                     if memmgr:
                         if self.cancel_count > memmgr.max_unroll_loops:
                             self.staticdata.log('cancelled too many times!')
-                            raise SwitchToBlackhole(ABORT_BAD_LOOP)
+                            raise SwitchToBlackhole(Counters.ABORT_BAD_LOOP)
                 self.staticdata.log('cancelled, tracing more...')
 
         # Otherwise, no loop found so far, so continue tracing.
@@ -2299,7 +2334,8 @@
             if vinfo.tracing_after_residual_call(virtualizable):
                 # the virtualizable escaped during CALL_MAY_FORCE.
                 self.load_fields_from_virtualizable()
-                raise SwitchToBlackhole(ABORT_ESCAPE, raising_exception=True)
+                raise SwitchToBlackhole(Counters.ABORT_ESCAPE,
+                                        raising_exception=True)
                 # ^^^ we set 'raising_exception' to True because we must still
                 # have the eventual exception raised (this is normally done
                 # after the call to vable_after_residual_call()).
@@ -2512,6 +2548,89 @@
         else:
             return None
 
+    def direct_libffi_call(self):
+        """Generate a direct call to C code, patching the CALL_MAY_FORCE
+        to jit_ffi_call() that occurred just now.
+        """
+        # an 'assert' that constant-folds away the rest of this function
+        # if the codewriter didn't produce any OS_LIBFFI_CALL at all.
+        assert self.staticdata.has_libffi_call
+        #
+        from pypy.rpython.lltypesystem import llmemory
+        from pypy.rlib.jit_libffi import CIF_DESCRIPTION_P
+        from pypy.jit.backend.llsupport.ffisupport import get_arg_descr
+        #
+        num_extra_guards = 0
+        while True:
+            op = self.history.operations[-1-num_extra_guards]
+            if op.getopnum() == rop.CALL_MAY_FORCE:
+                break
+            assert op.is_guard()
+            num_extra_guards += 1
+        #
+        box_cif_description = op.getarg(1)
+        if not isinstance(box_cif_description, ConstInt):
+            return
+        cif_description = box_cif_description.getint()
+        cif_description = llmemory.cast_int_to_adr(cif_description)
+        cif_description = llmemory.cast_adr_to_ptr(cif_description,
+                                                   CIF_DESCRIPTION_P)
+        extrainfo = op.getdescr().get_extra_info()
+        calldescr = self.cpu.calldescrof_dynamic(cif_description, extrainfo)
+        if calldescr is None:
+            return
+        #
+        extra_guards = []
+        for i in range(num_extra_guards):
+            extra_guards.append(self.history.operations.pop())
+        extra_guards.reverse()
+        #
+        box_exchange_buffer = op.getarg(3)
+        self.history.operations.pop()
+        arg_boxes = []
+        for i in range(cif_description.nargs):
+            kind, descr = get_arg_descr(self.cpu, cif_description.atypes[i])
+            if kind == 'i':
+                box_arg = history.BoxInt()
+            elif kind == 'f':
+                box_arg = history.BoxFloat()
+            else:
+                assert kind == 'v'
+                continue
+            ofs = cif_description.exchange_args[i]
+            box_argpos = history.BoxInt()
+            self.history.record(rop.INT_ADD,
+                                [box_exchange_buffer, ConstInt(ofs)],
+                                box_argpos)
+            self.history.record(rop.GETARRAYITEM_RAW,
+                                [box_argpos, ConstInt(0)],
+                                box_arg, descr)
+            arg_boxes.append(box_arg)
+        #
+        kind, descr = get_arg_descr(self.cpu, cif_description.rtype)
+        if kind == 'i':
+            box_result = history.BoxInt()
+        elif kind == 'f':
+            box_result = history.BoxFloat()
+        else:
+            assert kind == 'v'
+            box_result = None
+        self.history.record(rop.CALL_RELEASE_GIL,
+                            [op.getarg(2)] + arg_boxes,
+                            box_result, calldescr)
+        #
+        self.history.operations.extend(extra_guards)
+        #
+        if box_result is not None:
+            ofs = cif_description.exchange_result
+            box_resultpos = history.BoxInt()
+            self.history.record(rop.INT_ADD,
+                                [box_exchange_buffer, ConstInt(ofs)],
+                                box_resultpos)
+            self.history.record(rop.SETARRAYITEM_RAW,
+                                [box_resultpos, ConstInt(0), box_result],
+                                None, descr)
+
 # ____________________________________________________________
 
 class ChangeFrame(JitException):
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -443,6 +443,7 @@
     'INT_IS_TRUE/1b',
     'INT_NEG/1',
     'INT_INVERT/1',
+    'INT_FORCE_GE_ZERO/1',
     #
     'SAME_AS/1',      # gets a Const or a Box, turns it into another Box
     'CAST_PTR_TO_INT/1',
@@ -459,6 +460,7 @@
     'GETFIELD_GC_PURE/1d',
     'GETFIELD_RAW_PURE/1d',
     'GETARRAYITEM_GC_PURE/2d',
+    'GETARRAYITEM_RAW_PURE/2d',
     'UNICODELEN/1',
     'UNICODEGETITEM/2',
     #
@@ -471,7 +473,7 @@
     'GETARRAYITEM_GC/2d',
     'GETARRAYITEM_RAW/2d',
     'GETINTERIORFIELD_GC/2d',
-    'GETINTERIORFIELD_RAW/2d',
+    'RAW_LOAD/2d',
     'GETFIELD_GC/1d',
     'GETFIELD_RAW/1d',
     '_MALLOC_FIRST',
@@ -490,7 +492,8 @@
     'SETARRAYITEM_GC/3d',
     'SETARRAYITEM_RAW/3d',
     'SETINTERIORFIELD_GC/3d',
-    'SETINTERIORFIELD_RAW/3d',
+    'SETINTERIORFIELD_RAW/3d',    # only used by llsupport/rewrite.py
+    'RAW_STORE/3d',
     'SETFIELD_GC/2d',
     'SETFIELD_RAW/2d',
     'STRSETITEM/3',
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -10,6 +10,7 @@
 from pypy.rpython import annlowlevel
 from pypy.rlib import rarithmetic, rstack
 from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.rlib.objectmodel import compute_unique_id
 from pypy.rlib.debug import have_debug_prints, ll_assert
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.jit.metainterp.optimize import InvalidLoop
@@ -254,9 +255,9 @@
         self.cached_virtuals.clear()
 
     def update_counters(self, profiler):
-        profiler.count(jitprof.NVIRTUALS, self.nvirtuals)
-        profiler.count(jitprof.NVHOLES, self.nvholes)
-        profiler.count(jitprof.NVREUSED, self.nvreused)
+        profiler.count(jitprof.Counters.NVIRTUALS, self.nvirtuals)
+        profiler.count(jitprof.Counters.NVHOLES, self.nvholes)
+        profiler.count(jitprof.Counters.NVREUSED, self.nvreused)
 
 _frame_info_placeholder = (None, 0, 0)
 
@@ -493,7 +494,7 @@
         return self.setfields(decoder, struct)
 
     def debug_prints(self):
-        debug_print("\tvirtualinfo", self.known_class.repr_rpython())
+        debug_print("\tvirtualinfo", self.known_class.repr_rpython(), " at ",  compute_unique_id(self))
         AbstractVirtualStructInfo.debug_prints(self)
 
 
@@ -509,7 +510,7 @@
         return self.setfields(decoder, struct)
 
     def debug_prints(self):
-        debug_print("\tvstructinfo", self.typedescr.repr_rpython())
+        debug_print("\tvstructinfo", self.typedescr.repr_rpython(), " at ",  compute_unique_id(self))
         AbstractVirtualStructInfo.debug_prints(self)
 
 class VArrayInfo(AbstractVirtualInfo):
@@ -539,7 +540,7 @@
         return array
 
     def debug_prints(self):
-        debug_print("\tvarrayinfo", self.arraydescr)
+        debug_print("\tvarrayinfo", self.arraydescr, " at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -550,7 +551,7 @@
         self.fielddescrs = fielddescrs
 
     def debug_prints(self):
-        debug_print("\tvarraystructinfo", self.arraydescr)
+        debug_print("\tvarraystructinfo", self.arraydescr, " at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -581,7 +582,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvstrplaininfo length", len(self.fieldnums))
+        debug_print("\tvstrplaininfo length", len(self.fieldnums), " at ",  compute_unique_id(self))
 
 
 class VStrConcatInfo(AbstractVirtualInfo):
@@ -599,7 +600,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvstrconcatinfo")
+        debug_print("\tvstrconcatinfo at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -615,7 +616,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvstrsliceinfo")
+        debug_print("\tvstrsliceinfo at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -636,7 +637,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvuniplaininfo length", len(self.fieldnums))
+        debug_print("\tvuniplaininfo length", len(self.fieldnums), " at ",  compute_unique_id(self))
 
 
 class VUniConcatInfo(AbstractVirtualInfo):
@@ -654,7 +655,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvuniconcatinfo")
+        debug_print("\tvuniconcatinfo at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -671,7 +672,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvunisliceinfo")
+        debug_print("\tvunisliceinfo at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -1280,7 +1281,6 @@
 
 def dump_storage(storage, liveboxes):
     "For profiling only."
-    from pypy.rlib.objectmodel import compute_unique_id
     debug_start("jit-resume")
     if have_debug_prints():
         debug_print('Log storage', compute_unique_id(storage))
@@ -1313,4 +1313,13 @@
                     debug_print('\t\t', 'None')
                 else:
                     virtual.debug_prints()
+        if storage.rd_pendingfields:
+            debug_print('\tpending setfields')
+            for i in range(len(storage.rd_pendingfields)):
+                lldescr  = storage.rd_pendingfields[i].lldescr
+                num      = storage.rd_pendingfields[i].num
+                fieldnum = storage.rd_pendingfields[i].fieldnum
+                itemindex= storage.rd_pendingfields[i].itemindex
+                debug_print("\t\t", str(lldescr), str(untag(num)), str(untag(fieldnum)), itemindex)
+
     debug_stop("jit-resume")
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -42,6 +42,9 @@
         trace_limit = sys.maxint
         enable_opts = ALL_OPTS_DICT
 
+    if kwds.pop('disable_optimizations', False):
+        FakeWarmRunnerState.enable_opts = {}
+
     func._jit_unroll_safe_ = True
     rtyper = support.annotate(func, values, type_system=type_system,
                               translationoptions=translationoptions)
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -3797,6 +3797,7 @@
         assert res == 3
 
     def test_float_bytes(self):
+        from pypy.rlib.rfloat import isnan
         def f(n):
             ll = float2longlong(n)
             return longlong2float(ll)
@@ -3804,7 +3805,7 @@
         for x in [2.5, float("nan"), -2.5, float("inf")]:
             # There are tests elsewhere to verify the correctness of this.
             res = self.interp_operations(f, [x])
-            assert res == x or math.isnan(x) and math.isnan(res)
+            assert res == x or isnan(x) and isnan(res)
 
 
 class TestLLtype(BaseLLtypeTests, LLJitMixin):
diff --git a/pypy/jit/metainterp/test/test_dict.py b/pypy/jit/metainterp/test/test_dict.py
--- a/pypy/jit/metainterp/test/test_dict.py
+++ b/pypy/jit/metainterp/test/test_dict.py
@@ -161,6 +161,22 @@
                            'guard_no_exception': 8, 'new': 2,
                            'guard_false': 2, 'int_is_true': 2})
 
+    def test_unrolling_of_dict_iter(self):
+        driver = JitDriver(greens = [], reds = ['n'])
+        
+        def f(n):
+            while n > 0:
+                driver.jit_merge_point(n=n)
+                d = {1: 1}
+                for elem in d:
+                    n -= elem
+            return n
+
+        res = self.meta_interp(f, [10], listops=True)
+        assert res == 0
+        self.check_simple_loop({'int_sub': 1, 'int_gt': 1, 'guard_true': 1,
+                                'jump': 1})
+
 
 class TestOOtype(DictTests, OOJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -1,210 +1,106 @@
-from __future__ import with_statement
 import py
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.jit.metainterp.test.support import LLJitMixin
+from pypy.rlib import jit
+from pypy.rlib.jit_libffi import types, CIF_DESCRIPTION, FFI_TYPE_PP
+from pypy.rlib.unroll import unrolling_iterable
 
-from pypy.jit.metainterp.test.support import LLJitMixin
-from pypy.rlib.jit import JitDriver, promote, dont_look_inside
-from pypy.rlib.libffi import (ArgChain, IS_32_BIT, array_getitem, array_setitem,
-    types, struct_setfield_int, struct_getfield_int)
-from pypy.rlib.objectmodel import specialize
-from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
-from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
-from pypy.rlib.unroll import unrolling_iterable
-from pypy.rpython.lltypesystem import lltype, rffi
-from pypy.tool.sourcetools import func_with_new_name
 
+def get_description(atypes, rtype):
+    p = lltype.malloc(CIF_DESCRIPTION, len(atypes),
+                      flavor='raw', immortal=True)
+    p.abi = 42
+    p.nargs = len(atypes)
+    p.rtype = rtype
+    p.atypes = lltype.malloc(FFI_TYPE_PP.TO, len(atypes),
+                             flavor='raw', immortal=True)
+    for i in range(len(atypes)):
+        p.atypes[i] = atypes[i]
+    return p
 
-class FfiCallTests(_TestLibffiCall):
-    # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, is_struct=False, jitif=[]):
-        """
-        Call the function specified by funcspec in a loop, and let the jit to
-        see and optimize it.
-        """
-        #
-        lib, name, argtypes, restype = funcspec
-        method_and_args = []
-        for argval in args:
-            if isinstance(argval, tuple):
-                method_name, argval = argval
+class FfiCallTests(object):
+
+    def _run(self, atypes, rtype, avalues, rvalue):
+        cif_description = get_description(atypes, rtype)
+
+        def verify(*args):
+            assert args == tuple(avalues)
+            return rvalue
+        FUNC = lltype.FuncType([lltype.typeOf(avalue) for avalue in avalues],
+                               lltype.typeOf(rvalue))
+        func = lltype.functionptr(FUNC, 'verify', _callable=verify)
+        func_addr = rffi.cast(rffi.VOIDP, func)
+
+        for i in range(len(avalues)):
+            cif_description.exchange_args[i] = (i+1) * 16
+        cif_description.exchange_result = (len(avalues)+1) * 16
+
+        unroll_avalues = unrolling_iterable(avalues)
+
+        @jit.oopspec("libffi_call(cif_description,func_addr,exchange_buffer)")
+        def fake_call(cif_description, func_addr, exchange_buffer):
+            ofs = 16
+            for avalue in unroll_avalues:
+                TYPE = rffi.CArray(lltype.typeOf(avalue))
+                data = rffi.ptradd(exchange_buffer, ofs)
+                assert rffi.cast(lltype.Ptr(TYPE), data)[0] == avalue
+                ofs += 16
+            if rvalue is not None:
+                write_rvalue = rvalue
             else:
-                method_name = 'arg'
-            method_and_args.append((method_name, argval))
-        method_and_args = unrolling_iterable(method_and_args)
-        #
-        reds = ['n', 'res', 'func']
-        if (RESULT is rffi.DOUBLE or
-            IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
-            reds = ['n', 'func', 'res'] # 'double' floats must be *after* refs
-        driver = JitDriver(reds=reds, greens=[])
-        init_result = rffi.cast(RESULT, 0)
-        #
-        def g(func):
-            # a different function, which is marked as "dont_look_inside"
-            # in case it uses an unsupported argument
-            argchain = ArgChain()
-            # this loop is unrolled
-            for method_name, argval in method_and_args:
-                getattr(argchain, method_name)(argval)
-            return func.call(argchain, RESULT, is_struct=is_struct)
-        #
-        def f(n):
-            func = lib.getpointer(name, argtypes, restype)
-            res = init_result
-            while n < 10:
-                driver.jit_merge_point(n=n, res=res, func=func)
-                promote(func)
-                res = g(func)
-                n += 1
+                write_rvalue = 12923  # ignored
+            TYPE = rffi.CArray(lltype.typeOf(write_rvalue))
+            data = rffi.ptradd(exchange_buffer, ofs)
+            rffi.cast(lltype.Ptr(TYPE), data)[0] = write_rvalue
+
+        def f():
+            exbuf = lltype.malloc(rffi.CCHARP.TO, (len(avalues)+2) * 16,
+                                  flavor='raw', zero=True)
+            ofs = 16
+            for avalue in unroll_avalues:
+                TYPE = rffi.CArray(lltype.typeOf(avalue))
+                data = rffi.ptradd(exbuf, ofs)
+                rffi.cast(lltype.Ptr(TYPE), data)[0] = avalue
+                ofs += 16
+
+            fake_call(cif_description, func_addr, exbuf)
+
+            if rvalue is None:
+                res = 654321
+            else:
+                TYPE = rffi.CArray(lltype.typeOf(rvalue))
+                data = rffi.ptradd(exbuf, ofs)
+                res = rffi.cast(lltype.Ptr(TYPE), data)[0]
+            lltype.free(exbuf, flavor='raw')
             return res
-        #
-        res = self.meta_interp(f, [0], backendopt=True,
-                               supports_floats       = self.supports_all,
-                               supports_longlong     = self.supports_all,
-                               supports_singlefloats = self.supports_all)
-        d = {'floats': self.supports_all,
-             'longlong': self.supports_all or not IS_32_BIT,
-             'singlefloats': self.supports_all,
-             'byval': False}
-        supported = all(d[check] for check in jitif)
-        if supported:
-            self.check_resops(
-                call_release_gil=2,   # a CALL_RELEASE_GIL, and no other CALLs
-                call=0,
-                call_may_force=0,
-                guard_no_exception=2,
-                guard_not_forced=2,
-                int_add=2,
-                int_lt=2,
-                guard_true=2,
-                jump=1)
-        else:
-            self.check_resops(
-                call_release_gil=0,   # no CALL_RELEASE_GIL
-                int_add=2,
-                int_lt=2,
-                guard_true=2,
-                jump=1)
-        return res
 
-    def test_byval_result(self):
-        _TestLibffiCall.test_byval_result(self)
-    test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
-    test_byval_result.dont_track_allocations = True
+        res = f()
+        assert res == rvalue or (res, rvalue) == (654321, None)
+        res = self.interp_operations(f, [])
+        assert res == rvalue or (res, rvalue) == (654321, None)
+        self.check_operations_history(call_may_force=0,
+                                      call_release_gil=1)
 
-class FfiLookupTests(object):
-    def test_array_fields(self):
-        myjitdriver = JitDriver(
-            greens = [],
-            reds = ["n", "i", "points", "result_point"],
-        )
+    def test_simple_call(self):
+        self._run([types.signed] * 2, types.signed, [456, 789], -42)
 
-        POINT = lltype.Struct("POINT",
-            ("x", lltype.Signed),
-            ("y", lltype.Signed),
-        )
-        def f(points, result_point, n):
-            i = 0
-            while i < n:
-                myjitdriver.jit_merge_point(i=i, points=points, n=n,
-                                            result_point=result_point)
-                x = array_getitem(
-                    types.slong, rffi.sizeof(lltype.Signed) * 2, points, i, 0
-                )
-                y = array_getitem(
-                    types.slong, rffi.sizeof(lltype.Signed) * 2, points, i, rffi.sizeof(lltype.Signed)
-                )
+    def test_many_arguments(self):
+        for i in [0, 6, 20]:
+            self._run([types.signed] * i, types.signed,
+                      [-123456*j for j in range(i)],
+                      -42434445)
 
-                cur_x = array_getitem(
-                    types.slong, rffi.sizeof(lltype.Signed) * 2, result_point, 0, 0
-                )
-                cur_y = array_getitem(
-                    types.slong, rffi.sizeof(lltype.Signed) * 2, result_point, 0, rffi.sizeof(lltype.Signed)
-                )
+    def test_simple_call_float(self):
+        self._run([types.double] * 2, types.double, [45.6, 78.9], -4.2)
 
-                array_setitem(
-                    types.slong, rffi.sizeof(lltype.Signed) * 2, result_point, 0, 0, cur_x + x
-                )
-                array_setitem(
-                    types.slong, rffi.sizeof(lltype.Signed) * 2, result_point, 0, rffi.sizeof(lltype.Signed), cur_y + y
-                )
-                i += 1
+    def test_returns_none(self):
+        self._run([types.signed] * 2, types.void, [456, 789], None)
 
-        def main(n):
-            with lltype.scoped_alloc(rffi.CArray(POINT), n) as points:
-                with lltype.scoped_alloc(rffi.CArray(POINT), 1) as result_point:
-                    for i in xrange(n):
-                        points[i].x = i * 2
-                        points[i].y = i * 2 + 1
-                    points = rffi.cast(rffi.CArrayPtr(lltype.Char), points)
-                    result_point[0].x = 0
-                    result_point[0].y = 0
-                    result_point = rffi.cast(rffi.CArrayPtr(lltype.Char), result_point)
-                    f(points, result_point, n)
-                    result_point = rffi.cast(rffi.CArrayPtr(POINT), result_point)
-                    return result_point[0].x * result_point[0].y
-
-        assert self.meta_interp(main, [10]) == main(10) == 9000
-        self.check_resops({'jump': 1, 'int_lt': 2, 'setinteriorfield_raw': 4,
-                           'getinteriorfield_raw': 8, 'int_add': 6, 'guard_true': 2})
-
-    def _test_getitem_type(self, TYPE, ffitype, COMPUTE_TYPE):
-        reds = ["n", "i", "s", "data"]
-        if COMPUTE_TYPE is lltype.Float:
-            # Move the float var to the back.
-            reds.remove("s")
-            reds.append("s")
-        myjitdriver = JitDriver(
-            greens = [],
-            reds = reds,
-        )
-        def f(data, n):
-            i = 0
-            s = rffi.cast(COMPUTE_TYPE, 0)
-            while i < n:
-                myjitdriver.jit_merge_point(n=n, i=i, s=s, data=data)
-                s += rffi.cast(COMPUTE_TYPE, array_getitem(ffitype, rffi.sizeof(TYPE), data, 0, 0))
-                i += 1
-            return s
-        def main(n):
-            with lltype.scoped_alloc(rffi.CArray(TYPE), 1) as data:
-                data[0] = rffi.cast(TYPE, 200)
-                return f(data, n)
-        assert self.meta_interp(main, [10]) == 2000
-
-    def test_array_getitem_uint8(self):
-        self._test_getitem_type(rffi.UCHAR, types.uchar, lltype.Signed)
-        self.check_resops({'jump': 1, 'int_lt': 2, 'getinteriorfield_raw': 2,
-                           'guard_true': 2, 'int_add': 4})
-
-    def test_array_getitem_float(self):
-        self._test_getitem_type(rffi.FLOAT, types.float, lltype.Float)
+    def test_returns_signedchar(self):
+        self._run([types.signed], types.sint8, [456],
+                  rffi.cast(rffi.SIGNEDCHAR, -42))
 
 
 class TestFfiCall(FfiCallTests, LLJitMixin):
-    supports_all = False
-
-class TestFfiCallSupportAll(FfiCallTests, LLJitMixin):
-    supports_all = True     # supports_{floats,longlong,singlefloats}
-
-    def test_struct_getfield(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'addr'])
-
-        def f(n):
-            i = 0
-            addr = lltype.malloc(rffi.VOIDP.TO, 10, flavor='raw')
-            while i < n:
-                myjitdriver.jit_merge_point(n=n, i=i, addr=addr)
-                struct_setfield_int(types.slong, addr, 0, 1)
-                i += struct_getfield_int(types.slong, addr, 0)
-            lltype.free(addr, flavor='raw')
-            return i
-        assert self.meta_interp(f, [20]) == f(20)
-        self.check_resops(
-            setfield_raw=2,
-            getfield_raw=2,
-            call=0)
-
-
-class TestFfiLookup(FfiLookupTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_immutable.py b/pypy/jit/metainterp/test/test_immutable.py
--- a/pypy/jit/metainterp/test/test_immutable.py
+++ b/pypy/jit/metainterp/test/test_immutable.py
@@ -89,6 +89,92 @@
                             int_add=3)
 
 
+    def test_raw_field_and_array(self):
+        from pypy.rpython.lltypesystem import lltype
+        X = lltype.Struct('X',
+            ('a', lltype.Signed),
+            ('b', lltype.Array(lltype.Signed,
+                               hints={'nolength': True, 'immutable': True})),
+            hints={'immutable': True})
+
+        x = lltype.malloc(X, 4, flavor='raw', immortal=True)
+        x.a = 6
+        x.b[2] = 7
+        xlist = [x, lltype.nullptr(X)]
+        def g(num):
+            if num < 0:
+                num = 0
+            return num
+        g._dont_inline_ = True
+        def f(num):
+            num = g(num)
+            x = xlist[num]
+            return x.a * x.b[2]
+        #
+        res = self.interp_operations(f, [0], disable_optimizations=True)
+        assert res == 42
+        self.check_operations_history(getfield_raw_pure=1,
+                                      getarrayitem_raw_pure=1,
+                                      int_mul=1)
+        #
+        # second try, in which we get num=0 constant-folded through f()
+        res = self.interp_operations(f, [-1], disable_optimizations=True)
+        assert res == 42
+        self.check_operations_history(getfield_raw_pure=0,
+                                      getarrayitem_raw_pure=0,
+                                      int_mul=0)
+
+    def test_read_on_promoted(self):
+        # this test used to fail because the n = f.n was staying alive
+        # in a box (not a const, as it was read before promote), and
+        # thus the second f.n was returning the same box, although it
+        # could now return a const.
+        class Foo(object):
+            _immutable_fields_ = ['n']
+            def __init__(self, n):
+                self.n = n
+        f1 = Foo(42); f2 = Foo(43)
+        @jit.dont_look_inside
+        def some(m):
+            return [f1, f2][m]
+        @jit.dont_look_inside
+        def do_stuff_with(n):
+            print n
+        def main(m):
+            f = some(m)
+            n = f.n
+            f = jit.hint(f, promote=True)
+            res = f.n * 6
+            do_stuff_with(n)
+            return res
+        res = self.interp_operations(main, [1])
+        assert res == 43 * 6
+        self.check_operations_history(int_mul=0)   # constant-folded
+
+    def test_read_on_promoted_array(self):
+        class Foo(object):
+            _immutable_fields_ = ['lst[*]']
+            def __init__(self, lst):
+                self.lst = lst
+        f1 = Foo([42]); f2 = Foo([43])
+        @jit.dont_look_inside
+        def some(m):
+            return [f1, f2][m]
+        @jit.dont_look_inside
+        def do_stuff_with(n):
+            print n
+        def main(m):
+            f = some(m)
+            n = f.lst[0]
+            f = jit.hint(f, promote=True)
+            res = f.lst[0] * 6
+            do_stuff_with(n)
+            return res
+        res = self.interp_operations(main, [1])
+        assert res == 43 * 6
+        self.check_operations_history(int_mul=0)   # constant-folded
+
+
 class TestLLtypeImmutableFieldsTests(ImmutableFieldsTests, LLJitMixin):
     pass
 
diff --git a/pypy/jit/metainterp/test/test_jitiface.py b/pypy/jit/metainterp/test/test_jitiface.py
--- a/pypy/jit/metainterp/test/test_jitiface.py
+++ b/pypy/jit/metainterp/test/test_jitiface.py
@@ -1,13 +1,15 @@
 
-from pypy.rlib.jit import JitDriver, JitHookInterface
+from pypy.rlib.jit import JitDriver, JitHookInterface, Counters
 from pypy.rlib import jit_hooks
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.jit.codewriter.policy import JitPolicy
-from pypy.jit.metainterp.jitprof import ABORT_FORCE_QUASIIMMUT
 from pypy.jit.metainterp.resoperation import rop
 from pypy.rpython.annlowlevel import hlstr
+from pypy.jit.metainterp.jitprof import Profiler
 
-class TestJitHookInterface(LLJitMixin):
+class JitHookInterfaceTests(object):
+    # !!!note!!! - don't subclass this from the backend. Subclass the LL
+    # class later instead
     def test_abort_quasi_immut(self):
         reasons = []
         
@@ -41,7 +43,7 @@
         assert f(100, 7) == 721
         res = self.meta_interp(f, [100, 7], policy=JitPolicy(iface))
         assert res == 721
-        assert reasons == [ABORT_FORCE_QUASIIMMUT] * 2
+        assert reasons == [Counters.ABORT_FORCE_QUASIIMMUT] * 2
 
     def test_on_compile(self):
         called = []
@@ -146,3 +148,74 @@
             assert jit_hooks.resop_getresult(op) == box5
 
         self.meta_interp(main, [])
+
+    def test_get_stats(self):
+        driver = JitDriver(greens = [], reds = ['i', 's'])
+
+        def loop(i):
+            s = 0
+            while i > 0:
+                driver.jit_merge_point(i=i, s=s)
+                if i % 2:
+                    s += 1
+                i -= 1
+                s+= 2
+            return s
+
+        def main():
+            loop(30)
+            assert jit_hooks.stats_get_counter_value(None,
+                                           Counters.TOTAL_COMPILED_LOOPS) == 1
+            assert jit_hooks.stats_get_counter_value(None,
+                                           Counters.TOTAL_COMPILED_BRIDGES) == 1
+            assert jit_hooks.stats_get_counter_value(None,
+                                                     Counters.TRACING) == 2
+            assert jit_hooks.stats_get_times_value(None, Counters.TRACING) >= 0
+
+        self.meta_interp(main, [], ProfilerClass=Profiler)
+
+class LLJitHookInterfaceTests(JitHookInterfaceTests):
+    # use this for any backend, instead of the super class
+    
+    def test_ll_get_stats(self):
+        driver = JitDriver(greens = [], reds = ['i', 's'])
+
+        def loop(i):
+            s = 0
+            while i > 0:
+                driver.jit_merge_point(i=i, s=s)
+                if i % 2:
+                    s += 1
+                i -= 1
+                s+= 2
+            return s
+
+        def main(b):
+            jit_hooks.stats_set_debug(None, b)
+            loop(30)
+            l = jit_hooks.stats_get_loop_run_times(None)
+            if b:
+                assert len(l) == 4
+                # completely specific test that would fail each time
+                # we change anything major. for now it's 4
+                # (loop, bridge, 2 entry points)
+                assert l[0].type == 'e'
+                assert l[0].number == 0
+                assert l[0].counter == 4
+                assert l[1].type == 'l'
+                assert l[1].counter == 4
+                assert l[2].type == 'l'
+                assert l[2].counter == 23
+                assert l[3].type == 'b'
+                assert l[3].number == 4
+                assert l[3].counter == 11
+            else:
+                assert len(l) == 0
+        self.meta_interp(main, [True], ProfilerClass=Profiler)
+        # this so far does not work because of the way setup_once is done,
+        # but fine, it's only about untranslated version anyway
+        #self.meta_interp(main, [False], ProfilerClass=Profiler)
+        
+
+class TestJitHookInterface(JitHookInterfaceTests, LLJitMixin):
+    pass
diff --git a/pypy/jit/metainterp/test/test_jitprof.py b/pypy/jit/metainterp/test/test_jitprof.py
--- a/pypy/jit/metainterp/test/test_jitprof.py
+++ b/pypy/jit/metainterp/test/test_jitprof.py
@@ -1,9 +1,9 @@
 
 from pypy.jit.metainterp.warmspot import ll_meta_interp
-from pypy.rlib.jit import JitDriver, dont_look_inside, elidable
+from pypy.rlib.jit import JitDriver, dont_look_inside, elidable, Counters
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.jit.metainterp import pyjitpl
-from pypy.jit.metainterp.jitprof import *
+from pypy.jit.metainterp.jitprof import Profiler
 
 class FakeProfiler(Profiler):
     def start(self):
@@ -46,10 +46,10 @@
         assert res == 84
         profiler = pyjitpl._warmrunnerdesc.metainterp_sd.profiler
         expected = [
-            TRACING,
-            BACKEND,
-            ~ BACKEND,
-            ~ TRACING,
+            Counters.TRACING,
+            Counters.BACKEND,
+            ~ Counters.BACKEND,
+            ~ Counters.TRACING,
             ]
         assert profiler.events == expected
         assert profiler.times == [2, 1]
diff --git a/pypy/jit/metainterp/test/test_list.py b/pypy/jit/metainterp/test/test_list.py
--- a/pypy/jit/metainterp/test/test_list.py
+++ b/pypy/jit/metainterp/test/test_list.py
@@ -251,6 +251,16 @@
         self.meta_interp(f, [10], listops=True)
         self.check_resops(new_array=0, call=0)
 
+    def test_list_mul(self):
+        def f(i):
+            l = [0] * i
+            return len(l)
+
+        r = self.interp_operations(f, [3])
+        assert r == 3
+        r = self.interp_operations(f, [-1])
+        assert r == 0
+
 class TestOOtype(ListTests, OOJitMixin):
     pass
 
diff --git a/pypy/jit/metainterp/test/test_loop.py b/pypy/jit/metainterp/test/test_loop.py
--- a/pypy/jit/metainterp/test/test_loop.py
+++ b/pypy/jit/metainterp/test/test_loop.py
@@ -871,6 +871,42 @@
         res = self.meta_interp(f, [20, 10, 1])
         assert res == f(20, 10, 1)
 
+    def test_boxed_unerased_pointers_in_short_preamble(self):
+        from pypy.rlib.rerased import new_erasing_pair
+        from pypy.rpython.lltypesystem import lltype
+        class A(object):
+            def __init__(self, val):
+                self.val = val
+            def tst(self):
+                return self.val
+
+        class Box(object):
+            def __init__(self, val):
+                self.val = val
+
+        erase_A, unerase_A = new_erasing_pair('A')
+        erase_TP, unerase_TP = new_erasing_pair('TP')
+        TP = lltype.GcArray(lltype.Signed)
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'sa', 'p'])
+        def f(n, m):
+            i = sa = 0
+            p = Box(erase_A(A(7)))
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, m=m, i=i, sa=sa, p=p)
+                if i < m:
+                    sa += unerase_A(p.val).tst()
+                elif i == m:
+                    a = lltype.malloc(TP, 5)
+                    a[0] = 42
+                    p = Box(erase_TP(a))
+                else:
+                    sa += unerase_TP(p.val)[0]
+                sa -= A(i).val
+                i += 1
+            return sa
+        res = self.meta_interp(f, [20, 10])
+        assert res == f(20, 10)
+
 class TestOOtype(LoopTest, OOJitMixin):
     pass
 
diff --git a/pypy/jit/metainterp/test/test_rawmem.py b/pypy/jit/metainterp/test/test_rawmem.py
--- a/pypy/jit/metainterp/test/test_rawmem.py
+++ b/pypy/jit/metainterp/test/test_rawmem.py
@@ -1,8 +1,9 @@
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.rawstorage import (alloc_raw_storage, raw_storage_setitem,
+                                  free_raw_storage, raw_storage_getitem)
 
-
-class TestJITRawMem(LLJitMixin):
+class RawMemTests(object):
     def test_cast_void_ptr(self):
         TP = lltype.Array(lltype.Float, hints={"nolength": True})
         VOID_TP = lltype.Array(lltype.Void, hints={"nolength": True, "uncast_on_llgraph": True})
@@ -18,7 +19,7 @@
             s += rffi.cast(lltype.Ptr(TP), a.storage)[0]
             lltype.free(x, flavor="raw")
             return s
-        res = self.interp_operations(f, [10])
+        self.interp_operations(f, [10])
 
     def test_fixed_size_malloc(self):
         TIMEVAL = lltype.Struct('dummy', ('tv_sec', rffi.LONG), ('tv_usec', rffi.LONG))
@@ -30,3 +31,32 @@
         assert res == 42
         self.check_operations_history({'call': 2, 'guard_no_exception': 1,
                                        'finish': 1})
+
+    def test_raw_storage_int(self):
+        def f():
+            p = alloc_raw_storage(15)
+            raw_storage_setitem(p, 3, 24)
+            res = raw_storage_getitem(lltype.Signed, p, 3)
+            free_raw_storage(p)
+            return res
+        res = self.interp_operations(f, [])
+        assert res == 24
+        self.check_operations_history({'call': 2, 'guard_no_exception': 1,
+                                       'raw_store': 1, 'raw_load': 1,
+                                       'finish': 1})
+
+    def test_raw_storage_float(self):
+        def f():
+            p = alloc_raw_storage(15)
+            raw_storage_setitem(p, 3, 2.4e15)
+            res = raw_storage_getitem(lltype.Float, p, 3)
+            free_raw_storage(p)
+            return res
+        res = self.interp_operations(f, [])
+        assert res == 2.4e15
+        self.check_operations_history({'call': 2, 'guard_no_exception': 1,
+                                       'raw_store': 1, 'raw_load': 1,
+                                       'finish': 1})
+
+class TestRawMem(RawMemTests, LLJitMixin):
+    pass
diff --git a/pypy/jit/metainterp/test/test_virtualstate.py b/pypy/jit/metainterp/test/test_virtualstate.py
--- a/pypy/jit/metainterp/test/test_virtualstate.py
+++ b/pypy/jit/metainterp/test/test_virtualstate.py
@@ -908,6 +908,141 @@
         """
         self.optimize_bridge(loop, bridge, expected, p5=self.myptr, p6=self.myptr2)
 
+    def test_licm_boxed_opaque_getitem(self):
+        loop = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1)
+        """
+        bridge = """
+        [p1]
+        guard_nonnull(p1) []
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        guard_nonnull(p1) []
+        p2 = getfield_gc(p1, descr=nextdescr)
+        jump(p1)
+        """        
+        self.optimize_bridge(loop, bridge, expected, 'Preamble')
+        
+        bridge = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2,  ConstClass(node_vtable2)) []
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2,  ConstClass(node_vtable2)) []
+        jump(p1)
+        """
+        self.optimize_bridge(loop, bridge, expected, 'Preamble')
+
+        bridge = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2,  ConstClass(node_vtable)) []
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        jump(p1, i3)
+        """
+        self.optimize_bridge(loop, bridge, expected, 'Loop')
+
+    def test_licm_unboxed_opaque_getitem(self):
+        loop = """
+        [p2]
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p2)
+        """
+        bridge = """
+        [p1]
+        guard_nonnull(p1) []
+        jump(p1)
+        """
+        self.optimize_bridge(loop, bridge, 'RETRACE', p1=self.myptr)
+        self.optimize_bridge(loop, bridge, 'RETRACE', p1=self.myptr2)
+        
+        bridge = """
+        [p2]
+        guard_class(p2,  ConstClass(node_vtable2)) []
+        jump(p2)
+        """
+        self.optimize_bridge(loop, bridge, 'RETRACE')
+
+        bridge = """
+        [p2]
+        guard_class(p2,  ConstClass(node_vtable)) []
+        jump(p2)
+        """
+        expected = """
+        [p2]
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        jump(p2, i3)
+        """
+        self.optimize_bridge(loop, bridge, expected, 'Loop')
+
+    def test_licm_virtual_opaque_getitem(self):
+        loop = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p3, p2, descr=nextdescr)
+        jump(p3)
+        """
+        bridge = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p3, p1, descr=nextdescr)
+        jump(p3)
+        """
+        self.optimize_bridge(loop, bridge, 'RETRACE', p1=self.myptr)
+        self.optimize_bridge(loop, bridge, 'RETRACE', p1=self.myptr2)
+
+        bridge = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        guard_class(p1,  ConstClass(node_vtable2)) []
+        setfield_gc(p3, p1, descr=nextdescr)
+        jump(p3)
+        """
+        self.optimize_bridge(loop, bridge, 'RETRACE')
+
+        bridge = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        guard_class(p1,  ConstClass(node_vtable)) []
+        setfield_gc(p3, p1, descr=nextdescr)
+        jump(p3)
+        """
+        expected = """
+        [p1]
+        guard_class(p1,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p1, descr=otherdescr)
+        jump(p1, i3)
+        """
+        self.optimize_bridge(loop, bridge, expected)
+
+
 class TestLLtypeGuards(BaseTestGenerateGuards, LLtypeMixin):
     pass
 
@@ -915,6 +1050,9 @@
     pass
 
 class FakeOptimizer:
+    def __init__(self):
+        self.opaque_pointers = {}
+        self.values = {}
     def make_equal_to(*args):
         pass
     def getvalue(*args):
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -260,6 +260,33 @@
                     pass   # other case
         self.meta_interp(f1, [18])
 
+    def test_bug_constant_int(self):
+        py.test.skip("crashes because a is a constant")
+        from pypy.rpython.lltypesystem import lltype, rffi
+        mydriver = JitDriver(greens=['a'], reds=['m'])
+        def f1(m, a):
+            while m > 0:
+                mydriver.jit_merge_point(a=a, m=m)
+                m = m - 1
+        def entry(m):
+            f1(m, 42)
+        self.meta_interp(entry, [18])
+
+    def test_bug_constant_instance(self):
+        py.test.skip("crashes because a is a constant")
+        from pypy.rpython.lltypesystem import lltype, rffi
+        mydriver = JitDriver(greens=['a'], reds=['m'])
+        class A(object):
+            pass
+        a1 = A()
+        def f1(m, a):
+            while m > 0:
+                mydriver.jit_merge_point(a=a, m=m)
+                m = m - 1
+        def entry(m):
+            f1(m, a1)
+        self.meta_interp(entry, [18])
+
     def test_bug_constant_rawptrs(self):
         py.test.skip("crashes because a is a constant")
         from pypy.rpython.lltypesystem import lltype, rffi
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -6,6 +6,7 @@
 from pypy.annotation import model as annmodel
 from pypy.rpython.llinterp import LLException
 from pypy.rpython.test.test_llinterp import get_interpreter, clear_tcache
+from pypy.rpython.annlowlevel import cast_instance_to_base_ptr
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
 from pypy.objspace.flow.model import checkgraph, Link, copygraph
 from pypy.rlib.objectmodel import we_are_translated
@@ -78,10 +79,6 @@
         translator.config.translation.list_comprehension_operations = True
     except ConfigError:
         pass
-    try:
-        translator.config.translation.jit_ffi = True
-    except ConfigError:
-        pass
     warmrunnerdesc = WarmRunnerDesc(translator, backendopt=backendopt, **kwds)
     for jd in warmrunnerdesc.jitdrivers_sd:
         jd.warmstate.set_param_threshold(3)          # for tests
@@ -221,7 +218,7 @@
         self.rewrite_access_helpers()
         self.codewriter.make_jitcodes(verbose=verbose)
         self.rewrite_can_enter_jits()
-        self.rewrite_set_param()
+        self.rewrite_set_param_and_get_stats()
         self.rewrite_force_virtual(vrefinfo)
         self.rewrite_force_quasi_immutable()
         self.add_finish()
@@ -632,14 +629,22 @@
             self.rewrite_access_helper(op)
 
     def rewrite_access_helper(self, op):
-        ARGS = [arg.concretetype for arg in op.args[2:]]
-        RESULT = op.result.concretetype
-        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, RESULT))
         # make sure we make a copy of function so it no longer belongs
         # to extregistry
         func = op.args[1].value
-        func = func_with_new_name(func, func.func_name + '_compiled')
-        ptr = self.helper_func(FUNCPTR, func)
+        if func.func_name.startswith('stats_'):
+            # get special treatment since we rewrite it to a call that accepts
+            # jit driver
+            func = func_with_new_name(func, func.func_name + '_compiled')
+            def new_func(ignored, *args):
+                return func(self, *args)
+            ARGS = [lltype.Void] + [arg.concretetype for arg in op.args[3:]]
+        else:
+            ARGS = [arg.concretetype for arg in op.args[2:]]
+            new_func = func_with_new_name(func, func.func_name + '_compiled')
+        RESULT = op.result.concretetype
+        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, RESULT))
+        ptr = self.helper_func(FUNCPTR, new_func)
         op.opname = 'direct_call'
         op.args = [Constant(ptr, FUNCPTR)] + op.args[2:]
 
@@ -859,7 +864,7 @@
             call_final_function(self.translator, finish,
                                 annhelper = self.annhelper)
 
-    def rewrite_set_param(self):
+    def rewrite_set_param_and_get_stats(self):
         from pypy.rpython.lltypesystem.rstr import STR
 
         closures = {}
diff --git a/pypy/jit/tl/pypyjit_demo.py b/pypy/jit/tl/pypyjit_demo.py
--- a/pypy/jit/tl/pypyjit_demo.py
+++ b/pypy/jit/tl/pypyjit_demo.py
@@ -1,19 +1,27 @@
 import pypyjit
 pypyjit.set_param(threshold=200)
 
+kwargs = {"z": 1}
 
-def g(*args):
-    return len(args)
+def f(*args, **kwargs):
+    result = g(1, *args, **kwargs)
+    return result + 2
 
-def f(n):
-    s = 0
-    for i in range(n):
-        l = [i, n, 2]
-        s += g(*l)
-    return s
+def g(x, y, z=2):
+    return x - y + z
+
+def main():
+    res = 0
+    i = 0
+    while i < 10000:
+        res = f(res, z=i)
+        g(1, res, **kwargs)
+        i += 1
+    return res
+
 
 try:
-    print f(301)
+    print main()
 
 except Exception, e:
     print "Exception: ", type(e)
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -43,6 +43,8 @@
         'do_what_I_mean'            : 'interp_magic.do_what_I_mean',
         'list_strategy'             : 'interp_magic.list_strategy',
         'validate_fd'               : 'interp_magic.validate_fd',
+        'newdict'                   : 'interp_dict.newdict',
+        'dictstrategy'              : 'interp_dict.dictstrategy',
     }
     if sys.platform == 'win32':
         interpleveldefs['get_console_cp'] = 'interp_magic.get_console_cp'
diff --git a/pypy/module/__pypy__/interp_dict.py b/pypy/module/__pypy__/interp_dict.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/interp_dict.py
@@ -0,0 +1,24 @@
+
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter.error import operationerrfmt, OperationError
+from pypy.objspace.std.dictmultiobject import W_DictMultiObject
+
+ at unwrap_spec(type=str)
+def newdict(space, type):
+    if type == 'module':
+        return space.newdict(module=True)
+    elif type == 'instance':
+        return space.newdict(instance=True)
+    elif type == 'kwargs':
+        return space.newdict(kwargs=True)
+    elif type == 'strdict':
+        return space.newdict(strdict=True)
+    else:
+        raise operationerrfmt(space.w_TypeError, "unknown type of dict %s",
+                              type)
+
+def dictstrategy(space, w_obj):
+    if not isinstance(w_obj, W_DictMultiObject):
+        raise OperationError(space.w_TypeError,
+                             space.wrap("expecting dict object"))
+    return space.wrap('%r' % (w_obj.strategy,))
diff --git a/pypy/module/__pypy__/interp_time.py b/pypy/module/__pypy__/interp_time.py
--- a/pypy/module/__pypy__/interp_time.py
+++ b/pypy/module/__pypy__/interp_time.py
@@ -1,5 +1,5 @@
 from __future__ import with_statement
-import os
+import sys
 
 from pypy.interpreter.error import exception_from_errno
 from pypy.interpreter.gateway import unwrap_spec
@@ -7,10 +7,11 @@
 from pypy.rpython.tool import rffi_platform
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
-if os.name == 'nt':
+if sys.platform == 'linux2':
+    libraries = ["rt"]
+else:
     libraries = []
-else:
-    libraries = ["rt"]
+
 
 class CConfig:
     _compilation_info_ = ExternalCompilationInfo(
diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -0,0 +1,42 @@
+from pypy.interpreter.mixedmodule import MixedModule
+
+class Module(MixedModule):
+
+    appleveldefs = {
+        }
+    interpleveldefs = {
+        '__version__': 'space.wrap("0.3")',
+
+        'nonstandard_integer_types': 'misc.nonstandard_integer_types',
+
+        'load_library': 'libraryobj.load_library',
+
+        'new_primitive_type': 'newtype.new_primitive_type',
+        'new_pointer_type': 'newtype.new_pointer_type',
+        'new_array_type': 'newtype.new_array_type',
+        'new_struct_type': 'newtype.new_struct_type',
+        'new_union_type': 'newtype.new_union_type',
+        'complete_struct_or_union': 'newtype.complete_struct_or_union',
+        'new_void_type': 'newtype.new_void_type',
+        'new_enum_type': 'newtype.new_enum_type',
+        'new_function_type': 'newtype.new_function_type',
+
+        'newp': 'func.newp',
+        'cast': 'func.cast',
+        'callback': 'func.callback',
+        'alignof': 'func.alignof',
+        'sizeof': 'func.sizeof',
+        'typeof': 'func.typeof',
+        'offsetof': 'func.offsetof',
+        '_getfields': 'func._getfields',
+        'getcname': 'func.getcname',
+
+        'string': 'func.string',
+        'buffer': 'cbuffer.buffer',
+
+        'get_errno': 'cerrno.get_errno',
+        'set_errno': 'cerrno.set_errno',
+
+        'FFI_DEFAULT_ABI': 'ctypefunc._get_abi(space, "FFI_DEFAULT_ABI")',
+        'FFI_CDECL': 'ctypefunc._get_abi(space,"FFI_DEFAULT_ABI")',#win32 name
+        }
diff --git a/pypy/module/_cffi_backend/cbuffer.py b/pypy/module/_cffi_backend/cbuffer.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/cbuffer.py
@@ -0,0 +1,55 @@
+from pypy.interpreter.error import operationerrfmt
+from pypy.interpreter.buffer import RWBuffer
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.rpython.lltypesystem import rffi
+from pypy.module._cffi_backend import cdataobj, ctypeptr, ctypearray
+
+
+class LLBuffer(RWBuffer):
+    _immutable_ = True
+
+    def __init__(self, raw_cdata, size):
+        self.raw_cdata = raw_cdata
+        self.size = size
+
+    def getlength(self):
+        return self.size
+
+    def getitem(self, index):
+        return self.raw_cdata[index]
+
+    def setitem(self, index, char):
+        self.raw_cdata[index] = char
+
+    def get_raw_address(self):
+        return self.raw_cdata
+
+    def getslice(self, start, stop, step, size):
+        if step == 1:
+            return rffi.charpsize2str(rffi.ptradd(self.raw_cdata, start), size)
+        return RWBuffer.getslice(self, start, stop, step, size)
+
+    def setslice(self, start, string):
+        raw_cdata = rffi.ptradd(self.raw_cdata, start)
+        for i in range(len(string)):
+            raw_cdata[i] = string[i]
+
+
+ at unwrap_spec(cdata=cdataobj.W_CData, size=int)
+def buffer(space, cdata, size=-1):
+    ctype = cdata.ctype
+    if isinstance(ctype, ctypeptr.W_CTypePointer):
+        if size < 0:
+            size = ctype.ctitem.size
+    elif isinstance(ctype, ctypearray.W_CTypeArray):
+        if size < 0:
+            size = cdata._sizeof()
+    else:
+        raise operationerrfmt(space.w_TypeError,
+                              "expected a pointer or array cdata, got '%s'",
+                              ctype.name)
+    if size < 0:
+        raise operationerrfmt(space.w_TypeError,
+                              "don't know the size pointed to by '%s'",
+                              ctype.name)
+    return space.wrap(LLBuffer(cdata._cdata, size))
diff --git a/pypy/module/_cffi_backend/ccallback.py b/pypy/module/_cffi_backend/ccallback.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/ccallback.py
@@ -0,0 +1,200 @@
+"""
+Callbacks.
+"""
+import os
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rlib.objectmodel import compute_unique_id, keepalive_until_here
+from pypy.rlib import clibffi, rweakref, rgc
+from pypy.rlib.rarithmetic import r_ulonglong
+
+from pypy.module._cffi_backend.cdataobj import W_CData
+from pypy.module._cffi_backend.ctypefunc import SIZE_OF_FFI_ARG, BIG_ENDIAN
+from pypy.module._cffi_backend.ctypefunc import W_CTypeFunc
+from pypy.module._cffi_backend.ctypeprim import W_CTypePrimitiveSigned
+from pypy.module._cffi_backend.ctypevoid import W_CTypeVoid
+from pypy.module._cffi_backend import cerrno, misc
+
+# ____________________________________________________________
+
+
+class W_CDataCallback(W_CData):
+    #_immutable_fields_ = ...
+    ll_error = lltype.nullptr(rffi.CCHARP.TO)
+
+    def __init__(self, space, ctype, w_callable, w_error):
+        raw_closure = rffi.cast(rffi.CCHARP, clibffi.closureHeap.alloc())
+        W_CData.__init__(self, space, raw_closure, ctype)
+        #
+        if not space.is_true(space.callable(w_callable)):
+            raise operationerrfmt(space.w_TypeError,
+                                  "expected a callable object, not %s",
+                                  space.type(w_callable).getname(space))
+        self.w_callable = w_callable
+        self.w_error = w_error
+        #
+        fresult = self.getfunctype().ctitem
+        size = fresult.size
+        if size > 0:
+            if fresult.is_primitive_integer and size < SIZE_OF_FFI_ARG:
+                size = SIZE_OF_FFI_ARG
+            self.ll_error = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw',
+                                          zero=True)
+        if not space.is_w(w_error, space.w_None):
+            convert_from_object_fficallback(fresult, self.ll_error, w_error)
+        #
+        self.unique_id = compute_unique_id(self)
+        global_callback_mapping.set(self.unique_id, self)
+        #
+        cif_descr = self.getfunctype().cif_descr
+        if not cif_descr:
+            raise OperationError(space.w_NotImplementedError,
+                                 space.wrap("callbacks with '...'"))
+        res = clibffi.c_ffi_prep_closure(self.get_closure(), cif_descr.cif,
+                                         invoke_callback,
+                                         rffi.cast(rffi.VOIDP, self.unique_id))
+        if rffi.cast(lltype.Signed, res) != clibffi.FFI_OK:
+            raise OperationError(space.w_SystemError,
+                space.wrap("libffi failed to build this callback"))
+
+    def get_closure(self):
+        return rffi.cast(clibffi.FFI_CLOSUREP, self._cdata)
+
+    #@rgc.must_be_light_finalizer
+    def __del__(self):
+        clibffi.closureHeap.free(self.get_closure())
+        if self.ll_error:
+            lltype.free(self.ll_error, flavor='raw')
+
+    def _repr_extra(self):
+        space = self.space
+        return 'calling ' + space.str_w(space.repr(self.w_callable))
+
+    def getfunctype(self):
+        ctype = self.ctype
+        if not isinstance(ctype, W_CTypeFunc):
+            space = self.space
+            raise OperationError(space.w_TypeError,
+                                 space.wrap("expected a function ctype"))
+        return ctype
+
+    def invoke(self, ll_args, ll_res):
+        space = self.space
+        ctype = self.getfunctype()
+        args_w = []
+        for i, farg in enumerate(ctype.fargs):
+            ll_arg = rffi.cast(rffi.CCHARP, ll_args[i])
+            args_w.append(farg.convert_to_object(ll_arg))
+        fresult = ctype.ctitem
+        #
+        w_res = space.call(self.w_callable, space.newtuple(args_w))
+        #
+        convert_from_object_fficallback(fresult, ll_res, w_res)
+
+    def print_error(self, operr):
+        space = self.space
+        operr.write_unraisable(space, "cffi callback", self.w_callable)
+
+    def write_error_return_value(self, ll_res):
+        fresult = self.getfunctype().ctitem
+        if fresult.size > 0:
+            misc._raw_memcopy(self.ll_error, ll_res, fresult.size)
+            keepalive_until_here(self)
+
+
+global_callback_mapping = rweakref.RWeakValueDictionary(int, W_CDataCallback)
+
+
+def convert_from_object_fficallback(fresult, ll_res, w_res):
+    space = fresult.space
+    small_result = fresult.size < SIZE_OF_FFI_ARG
+    if small_result and isinstance(fresult, W_CTypeVoid):
+        if not space.is_w(w_res, space.w_None):
+            raise OperationError(space.w_TypeError,
+                    space.wrap("callback with the return type 'void'"
+                               " must return None"))
+        return
+    #
+    if small_result and fresult.is_primitive_integer:
+        # work work work around a libffi irregularity: for integer return
+        # types we have to fill at least a complete 'ffi_arg'-sized result
+        # buffer.
+        if type(fresult) is W_CTypePrimitiveSigned:
+            # It's probably fine to always zero-extend, but you never
+            # know: maybe some code somewhere expects a negative
+            # 'short' result to be returned into EAX as a 32-bit
+            # negative number.  Better safe than sorry.  This code
+            # is about that case.  Let's ignore this for enums.
+            #
+            # do a first conversion only to detect overflows.  This
+            # conversion produces stuff that is otherwise ignored.
+            fresult.convert_from_object(ll_res, w_res)
+            #
+            # manual inlining and tweaking of
+            # W_CTypePrimitiveSigned.convert_from_object() in order
+            # to write a whole 'ffi_arg'.
+            value = misc.as_long_long(space, w_res)
+            value = r_ulonglong(value)
+            misc.write_raw_integer_data(ll_res, value, SIZE_OF_FFI_ARG)
+            return
+        else:
+            # zero extension: fill the '*result' with zeros, and (on big-
+            # endian machines) correct the 'result' pointer to write to
+            misc._raw_memclear(ll_res, SIZE_OF_FFI_ARG)
+            if BIG_ENDIAN:
+                diff = SIZE_OF_FFI_ARG - fresult.size
+                ll_res = rffi.ptradd(ll_res, diff)
+    #
+    fresult.convert_from_object(ll_res, w_res)
+
+
+# ____________________________________________________________
+
+STDERR = 2
+
+def invoke_callback(ffi_cif, ll_res, ll_args, ll_userdata):
+    """ Callback specification.
+    ffi_cif - something ffi specific, don't care
+    ll_args - rffi.VOIDPP - pointer to array of pointers to args
+    ll_restype - rffi.VOIDP - pointer to result
+    ll_userdata - a special structure which holds necessary information
+                  (what the real callback is for example), casted to VOIDP
+    """
+    e = cerrno.get_real_errno()
+    ll_res = rffi.cast(rffi.CCHARP, ll_res)
+    unique_id = rffi.cast(lltype.Signed, ll_userdata)
+    callback = global_callback_mapping.get(unique_id)
+    if callback is None:
+        # oups!
+        try:
+            os.write(STDERR, "SystemError: invoking a callback "
+                             "that was already freed\n")
+        except OSError:
+            pass
+        # In this case, we don't even know how big ll_res is.  Let's assume
+        # it is just a 'ffi_arg', and store 0 there.
+        misc._raw_memclear(ll_res, SIZE_OF_FFI_ARG)
+        return
+    #
+    ec = None
+    try:
+        ec = cerrno.get_errno_container(callback.space)
+        cerrno.save_errno_into(ec, e)
+        try:
+            callback.invoke(ll_args, ll_res)
+        except OperationError, e:
+            # got an app-level exception
+            callback.print_error(e)
+            callback.write_error_return_value(ll_res)
+        #
+    except Exception, e:
+        # oups! last-level attempt to recover.
+        try:
+            os.write(STDERR, "SystemError: callback raised ")
+            os.write(STDERR, str(e))
+            os.write(STDERR, "\n")
+        except OSError:
+            pass
+        callback.write_error_return_value(ll_res)
+    if ec is not None:
+        cerrno.restore_errno_from(ec)
diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/cdataobj.py
@@ -0,0 +1,309 @@
+import operator
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef, make_weakref_descr
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.objectmodel import keepalive_until_here
+from pypy.rlib import objectmodel, rgc
+from pypy.tool.sourcetools import func_with_new_name
+
+from pypy.module._cffi_backend import misc
+
+
+class W_CData(Wrappable):
+    _attrs_ = ['space', '_cdata', 'ctype', '_lifeline_']
+    _immutable_fields_ = ['_cdata', 'ctype']
+    _cdata = lltype.nullptr(rffi.CCHARP.TO)
+
+    def __init__(self, space, cdata, ctype):
+        from pypy.module._cffi_backend import ctypeprim
+        assert lltype.typeOf(cdata) == rffi.CCHARP
+        assert isinstance(ctype, ctypeprim.W_CType)
+        self.space = space
+        self._cdata = cdata    # don't forget keepalive_until_here!
+        self.ctype = ctype
+
+    def _repr_extra(self):
+        extra = self.ctype.extra_repr(self._cdata)
+        keepalive_until_here(self)
+        return extra
+
+    def _repr_extra_owning(self):
+        from pypy.module._cffi_backend.ctypeptr import W_CTypePointer
+        ctype = self.ctype
+        if isinstance(ctype, W_CTypePointer):
+            num_bytes = ctype.ctitem.size
+        else:
+            num_bytes = self._sizeof()
+        return 'owning %d bytes' % num_bytes
+
+    def repr(self):
+        extra2 = self._repr_extra()
+        extra1 = ''
+        if not isinstance(self, W_CDataNewOwning):
+            # it's slightly confusing to get "<cdata 'struct foo' 0x...>"
+            # because the struct foo is not owned.  Trying to make it
+            # clearer, write in this case "<cdata 'struct foo &' 0x...>".
+            from pypy.module._cffi_backend import ctypestruct
+            if isinstance(self.ctype, ctypestruct.W_CTypeStructOrUnion):
+                extra1 = ' &'
+        return self.space.wrap("<cdata '%s%s' %s>" % (
+            self.ctype.name, extra1, extra2))
+
+    def nonzero(self):
+        return self.space.wrap(bool(self._cdata))
+
+    def int(self):
+        w_result = self.ctype.int(self._cdata)
+        keepalive_until_here(self)
+        return w_result
+
+    def long(self):
+        w_result = self.int()
+        space = self.space
+        if space.is_w(space.type(w_result), space.w_int):
+            w_result = space.newlong(space.int_w(w_result))
+        return w_result
+
+    def float(self):
+        w_result = self.ctype.float(self._cdata)
+        keepalive_until_here(self)
+        return w_result
+
+    def len(self):
+        from pypy.module._cffi_backend import ctypearray
+        space = self.space
+        if isinstance(self.ctype, ctypearray.W_CTypeArray):
+            return space.wrap(self.get_array_length())
+        raise operationerrfmt(space.w_TypeError,
+                              "cdata of type '%s' has no len()",
+                              self.ctype.name)
+
+    def _make_comparison(name):
+        op = getattr(operator, name)
+        requires_ordering = name not in ('eq', 'ne')
+        #
+        def _cmp(self, w_other):
+            from pypy.module._cffi_backend.ctypeprim import W_CTypePrimitive
+            space = self.space
+            cdata1 = self._cdata
+            other = space.interpclass_w(w_other)
+            if isinstance(other, W_CData):
+                cdata2 = other._cdata
+            else:
+                return space.w_NotImplemented
+
+            if requires_ordering:
+                if (isinstance(self.ctype, W_CTypePrimitive) or
+                    isinstance(other.ctype, W_CTypePrimitive)):
+                    raise OperationError(space.w_TypeError,
+                        space.wrap("cannot do comparison on a primitive cdata"))
+                cdata1 = rffi.cast(lltype.Unsigned, cdata1)
+                cdata2 = rffi.cast(lltype.Unsigned, cdata2)
+            return space.newbool(op(cdata1, cdata2))
+        #
+        return func_with_new_name(_cmp, name)
+
+    lt = _make_comparison('lt')
+    le = _make_comparison('le')
+    eq = _make_comparison('eq')
+    ne = _make_comparison('ne')
+    gt = _make_comparison('gt')
+    ge = _make_comparison('ge')
+
+    def hash(self):
+        h = (objectmodel.compute_identity_hash(self.ctype) ^
+             rffi.cast(lltype.Signed, self._cdata))
+        return self.space.wrap(h)
+
+    def getitem(self, w_index):
+        space = self.space
+        i = space.getindex_w(w_index, space.w_IndexError)
+        ctype = self.ctype._check_subscript_index(self, i)
+        w_o = self._do_getitem(ctype, i)
+        keepalive_until_here(self)
+        return w_o
+
+    def _do_getitem(self, ctype, i):
+        ctitem = ctype.ctitem
+        return ctitem.convert_to_object(
+            rffi.ptradd(self._cdata, i * ctitem.size))
+
+    def setitem(self, w_index, w_value):
+        space = self.space
+        i = space.getindex_w(w_index, space.w_IndexError)
+        ctype = self.ctype._check_subscript_index(self, i)
+        ctitem = ctype.ctitem
+        ctitem.convert_from_object(
+            rffi.ptradd(self._cdata, i * ctitem.size),
+            w_value)
+        keepalive_until_here(self)
+
+    def _add_or_sub(self, w_other, sign):
+        space = self.space
+        i = sign * space.getindex_w(w_other, space.w_OverflowError)
+        return self.ctype.add(self._cdata, i)
+
+    def add(self, w_other):
+        return self._add_or_sub(w_other, +1)
+
+    def sub(self, w_other):
+        space = self.space
+        ob = space.interpclass_w(w_other)
+        if isinstance(ob, W_CData):
+            from pypy.module._cffi_backend import ctypeptr, ctypearray
+            ct = ob.ctype
+            if isinstance(ct, ctypearray.W_CTypeArray):
+                ct = ct.ctptr
+            #
+            if (ct is not self.ctype or
+                   not isinstance(ct, ctypeptr.W_CTypePointer) or
+                   ct.ctitem.size <= 0):
+                raise operationerrfmt(space.w_TypeError,
+                    "cannot subtract cdata '%s' and cdata '%s'",
+                    self.ctype.name, ct.name)
+            #
+            diff = (rffi.cast(lltype.Signed, self._cdata) -
+                    rffi.cast(lltype.Signed, ob._cdata)) // ct.ctitem.size
+            return space.wrap(diff)
+        #
+        return self._add_or_sub(w_other, -1)
+
+    def getcfield(self, w_attr):
+        return self.ctype.getcfield(self.space.str_w(w_attr))
+
+    def getattr(self, w_attr):
+        w_res = self.getcfield(w_attr).read(self._cdata)
+        keepalive_until_here(self)
+        return w_res
+
+    def setattr(self, w_attr, w_value):
+        self.getcfield(w_attr).write(self._cdata, w_value)
+        keepalive_until_here(self)
+
+    def call(self, args_w):
+        w_result = self.ctype.call(self._cdata, args_w)
+        keepalive_until_here(self)
+        return w_result
+
+    def iter(self):
+        return self.ctype.iter(self)
+
+    def write_raw_integer_data(self, source):
+        misc.write_raw_integer_data(self._cdata, source, self.ctype.size)
+        keepalive_until_here(self)
+
+    def write_raw_float_data(self, source):
+        misc.write_raw_float_data(self._cdata, source, self.ctype.size)
+        keepalive_until_here(self)
+
+    def convert_to_object(self):
+        w_obj = self.ctype.convert_to_object(self._cdata)
+        keepalive_until_here(self)
+        return w_obj
+
+    def get_array_length(self):
+        from pypy.module._cffi_backend import ctypearray
+        ctype = self.ctype
+        assert isinstance(ctype, ctypearray.W_CTypeArray)
+        length = ctype.length
+        assert length >= 0
+        return length
+
+    def _sizeof(self):
+        return self.ctype.size
+
+
+class W_CDataMem(W_CData):
+    """This is the base class used for cdata objects that own and free
+    their memory.  Used directly by the results of cffi.cast('int', x)
+    or other primitive explicitly-casted types.  It is further subclassed
+    by W_CDataNewOwning."""
+    _attrs_ = []
+
+    def __init__(self, space, size, ctype):
+        cdata = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw', zero=True)
+        W_CData.__init__(self, space, cdata, ctype)
+
+    @rgc.must_be_light_finalizer
+    def __del__(self):
+        lltype.free(self._cdata, flavor='raw')
+
+
+class W_CDataNewOwning(W_CDataMem):
+    """This is the class used for the cata objects created by newp()."""
+    _attrs_ = []
+
+    def _repr_extra(self):
+        return self._repr_extra_owning()
+
+
+class W_CDataNewOwningLength(W_CDataNewOwning):
+    """Subclass with an explicit length, for allocated instances of
+    the C type 'foo[]'."""
+    _attrs_ = ['length']
+    _immutable_fields_ = ['length']
+
+    def __init__(self, space, size, ctype, length):
+        W_CDataNewOwning.__init__(self, space, size, ctype)
+        self.length = length
+
+    def _sizeof(self):
+        from pypy.module._cffi_backend import ctypearray
+        ctype = self.ctype
+        assert isinstance(ctype, ctypearray.W_CTypeArray)
+        return self.length * ctype.ctitem.size
+
+    def get_array_length(self):
+        return self.length
+
+
+class W_CDataPtrToStructOrUnion(W_CData):
+    """This subclass is used for the pointer returned by new('struct foo').
+    It has a strong reference to a W_CDataNewOwning that really owns the
+    struct, which is the object returned by the app-level expression 'p[0]'.
+    But it is not itself owning any memory, although its repr says so;
+    it is merely a co-owner."""
+    _attrs_ = ['structobj']
+    _immutable_fields_ = ['structobj']
+
+    def __init__(self, space, cdata, ctype, structobj):
+        W_CData.__init__(self, space, cdata, ctype)
+        self.structobj = structobj
+
+    def _repr_extra(self):
+        return self._repr_extra_owning()
+
+    def _do_getitem(self, ctype, i):
+        assert i == 0
+        return self.structobj
+
+
+W_CData.typedef = TypeDef(
+    'CData',
+    __module__ = '_cffi_backend',
+    __repr__ = interp2app(W_CData.repr),
+    __nonzero__ = interp2app(W_CData.nonzero),
+    __int__ = interp2app(W_CData.int),
+    __long__ = interp2app(W_CData.long),
+    __float__ = interp2app(W_CData.float),
+    __len__ = interp2app(W_CData.len),
+    __lt__ = interp2app(W_CData.lt),
+    __le__ = interp2app(W_CData.le),
+    __eq__ = interp2app(W_CData.eq),
+    __ne__ = interp2app(W_CData.ne),
+    __gt__ = interp2app(W_CData.gt),
+    __ge__ = interp2app(W_CData.ge),
+    __hash__ = interp2app(W_CData.hash),
+    __getitem__ = interp2app(W_CData.getitem),
+    __setitem__ = interp2app(W_CData.setitem),
+    __add__ = interp2app(W_CData.add),
+    __sub__ = interp2app(W_CData.sub),
+    __getattr__ = interp2app(W_CData.getattr),
+    __setattr__ = interp2app(W_CData.setattr),
+    __call__ = interp2app(W_CData.call),
+    __iter__ = interp2app(W_CData.iter),
+    __weakref__ = make_weakref_descr(W_CData),
+    )
+W_CData.typedef.acceptable_as_base_class = False
diff --git a/pypy/module/_cffi_backend/cerrno.py b/pypy/module/_cffi_backend/cerrno.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/cerrno.py
@@ -0,0 +1,29 @@
+from pypy.rlib import rposix
+from pypy.interpreter.executioncontext import ExecutionContext
+from pypy.interpreter.gateway import unwrap_spec
+
+
+ExecutionContext._cffi_saved_errno = 0
+
+
+def get_errno_container(space):
+    return space.getexecutioncontext()
+
+get_real_errno = rposix.get_errno
+
+
+def restore_errno_from(ec):
+    rposix.set_errno(ec._cffi_saved_errno)
+
+def save_errno_into(ec, errno):
+    ec._cffi_saved_errno = errno