[pypy-commit] pypy jvm-improvements: Merge with default

Tue Jul 31 21:23:50 CEST 2012

Author: Michal Bendowski <michal at bendowski.pl>
Branch: jvm-improvements
Changeset: r56516:4d28cf3b14f8
Date: 2012-07-31 17:59 +0200
http://bitbucket.org/pypy/pypy/changeset/4d28cf3b14f8/

Log:	Merge with default

diff too long, truncating to 10000 out of 29790 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -20,6 +20,16 @@
 ^pypy/module/cpyext/test/.+\.obj$
 ^pypy/module/cpyext/test/.+\.manifest$
 ^pypy/module/test_lib_pypy/ctypes_tests/.+\.o$
+^pypy/module/cppyy/src/.+\.o$
+^pypy/module/cppyy/bench/.+\.so$
+^pypy/module/cppyy/bench/.+\.root$
+^pypy/module/cppyy/bench/.+\.d$
+^pypy/module/cppyy/src/.+\.errors$
+^pypy/module/cppyy/test/.+_rflx\.cpp$
+^pypy/module/cppyy/test/.+\.so$
+^pypy/module/cppyy/test/.+\.rootmap$
+^pypy/module/cppyy/test/.+\.exe$
+^pypy/module/cppyy/test/.+_cint.h$
 ^pypy/doc/.+\.html$
 ^pypy/doc/config/.+\.rst$
 ^pypy/doc/basicblock\.asc$
diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -216,6 +216,7 @@
     DFKI GmbH, Germany 
     Impara, Germany
     Change Maker, Sweden 
+    University of California Berkeley, USA
 
 The PyPy Logo as used by http://speed.pypy.org and others was created
 by Samuel Reis and is distributed on terms of Creative Commons Share Alike
diff --git a/ctypes_configure/cbuild.py b/ctypes_configure/cbuild.py
--- a/ctypes_configure/cbuild.py
+++ b/ctypes_configure/cbuild.py
@@ -372,7 +372,7 @@
         self.library_dirs = list(eci.library_dirs)
         self.compiler_exe = compiler_exe
         self.profbased = profbased
-        if not sys.platform in ('win32', 'darwin'): # xxx
+        if not sys.platform in ('win32', 'darwin', 'cygwin'): # xxx
             if 'm' not in self.libraries:
                 self.libraries.append('m')
             if 'pthread' not in self.libraries:
diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py
--- a/lib-python/2.7/ctypes/__init__.py
+++ b/lib-python/2.7/ctypes/__init__.py
@@ -351,7 +351,10 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            self._handle = _ffi.CDLL(name, mode)
+            if flags & _FUNCFLAG_CDECL:
+                self._handle = _ffi.CDLL(name, mode)
+            else:
+                self._handle = _ffi.WinDLL(name, mode)
         else:
             self._handle = handle
 
diff --git a/lib-python/2.7/distutils/sysconfig_pypy.py b/lib-python/2.7/distutils/sysconfig_pypy.py
--- a/lib-python/2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/2.7/distutils/sysconfig_pypy.py
@@ -39,11 +39,10 @@
     If 'prefix' is supplied, use it instead of sys.prefix or
     sys.exec_prefix -- i.e., ignore 'plat_specific'.
     """
-    if standard_lib:
-        raise DistutilsPlatformError(
-            "calls to get_python_lib(standard_lib=1) cannot succeed")
     if prefix is None:
         prefix = PREFIX
+    if standard_lib:
+        return os.path.join(prefix, "lib-python", get_python_version())
     return os.path.join(prefix, 'site-packages')
 
 
diff --git a/lib-python/2.7/pickle.py b/lib-python/2.7/pickle.py
--- a/lib-python/2.7/pickle.py
+++ b/lib-python/2.7/pickle.py
@@ -638,7 +638,7 @@
             # else tmp is empty, and we're done
 
     def save_dict(self, obj):
-        modict_saver = self._pickle_moduledict(obj)
+        modict_saver = self._pickle_maybe_moduledict(obj)
         if modict_saver is not None:
             return self.save_reduce(*modict_saver)
 
@@ -691,26 +691,20 @@
                 write(SETITEM)
             # else tmp is empty, and we're done
 
-    def _pickle_moduledict(self, obj):
+    def _pickle_maybe_moduledict(self, obj):
         # save module dictionary as "getattr(module, '__dict__')"
+        try:
+            name = obj['__name__']
+            if type(name) is not str:
+                return None
+            themodule = sys.modules[name]
+            if type(themodule) is not ModuleType:
+                return None
+            if themodule.__dict__ is not obj:
+                return None
+        except (AttributeError, KeyError, TypeError):
+            return None
 
-        # build index of module dictionaries
-        try:
-            modict = self.module_dict_ids
-        except AttributeError:
-            modict = {}
-            from sys import modules
-            for mod in modules.values():
-                if isinstance(mod, ModuleType):
-                    modict[id(mod.__dict__)] = mod
-            self.module_dict_ids = modict
-
-        thisid = id(obj)
-        try:
-            themodule = modict[thisid]
-        except KeyError:
-            return None
-        from __builtin__ import getattr
         return getattr, (themodule, '__dict__')
 
 
diff --git a/lib-python/stdlib-upgrade.txt b/lib-python/stdlib-upgrade.txt
new file mode 100644
--- /dev/null
+++ b/lib-python/stdlib-upgrade.txt
@@ -0,0 +1,19 @@
+Process for upgrading the stdlib to a new cpython version
+==========================================================
+
+.. note::
+
+    overly detailed
+
+1. check out the branch vendor/stdlib
+2. upgrade the files there
+3. update stdlib-versions.txt with the output of hg -id from the cpython repo
+4. commit
+5. update to default/py3k
+6. create a integration branch for the new stdlib
+   (just hg branch stdlib-$version)
+7. merge vendor/stdlib
+8. commit
+10. fix issues
+11. commit --close-branch
+12. merge to default
diff --git a/lib_pypy/PyQt4.py b/lib_pypy/PyQt4.py
deleted file mode 100644
--- a/lib_pypy/PyQt4.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from _rpyc_support import proxy_sub_module, remote_eval
-
-
-for name in ("QtCore", "QtGui", "QtWebKit"):
-    proxy_sub_module(globals(), name)
-
-s = "__import__('PyQt4').QtGui.QDialogButtonBox."
-QtGui.QDialogButtonBox.Cancel = remote_eval("%sCancel | %sCancel" % (s, s))
-QtGui.QDialogButtonBox.Ok = remote_eval("%sOk | %sOk" % (s, s))
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -47,10 +47,6 @@
         else:
             return self.from_param(as_parameter)
 
-    def get_ffi_param(self, value):
-        cdata = self.from_param(value)
-        return cdata, cdata._to_ffi_param()
-
     def get_ffi_argtype(self):
         if self._ffiargtype:
             return self._ffiargtype
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -391,7 +391,7 @@
         address = self._get_address()
         ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
         ffires = restype.get_ffi_argtype()
-        return _ffi.FuncPtr.fromaddr(address, '', ffiargs, ffires)
+        return _ffi.FuncPtr.fromaddr(address, '', ffiargs, ffires, self._flags_)
 
     def _getfuncptr(self, argtypes, restype, thisarg=None):
         if self._ptr is not None and (argtypes is self._argtypes_ or argtypes == self._argtypes_):
@@ -412,7 +412,7 @@
             ptr = thisarg[0][self._com_index - 0x1000]
             ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
             ffires = restype.get_ffi_argtype()
-            return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires)
+            return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires, self._flags_)
         
         cdll = self.dll._handle
         try:
@@ -444,10 +444,6 @@
 
     @classmethod
     def _conv_param(cls, argtype, arg):
-        if isinstance(argtype, _CDataMeta):
-            cobj, ffiparam = argtype.get_ffi_param(arg)
-            return cobj, ffiparam, argtype
-        
         if argtype is not None:
             arg = argtype.from_param(arg)
         if hasattr(arg, '_as_parameter_'):
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -249,6 +249,13 @@
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
 
+        elif tp == '?':  # regular bool
+            def _getvalue(self):
+                return bool(self._buffer[0])
+            def _setvalue(self, value):
+                self._buffer[0] = bool(value)
+            result.value = property(_getvalue, _setvalue)
+
         elif tp == 'v': # VARIANT_BOOL type
             def _getvalue(self):
                 return bool(self._buffer[0])
diff --git a/lib_pypy/_rpyc_support.py b/lib_pypy/_rpyc_support.py
deleted file mode 100644
--- a/lib_pypy/_rpyc_support.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import sys
-import socket
-
-from rpyc import connect, SlaveService
-from rpyc.utils.classic import DEFAULT_SERVER_PORT
-
-try:
-    conn = connect("localhost", DEFAULT_SERVER_PORT, SlaveService,
-           config=dict(call_by_value_for_builtin_mutable_types=True))
-except socket.error, e:
-    raise ImportError("Error while connecting: " + str(e))
-
-
-remote_eval = conn.eval
-
-
-def proxy_module(globals):
-    module = getattr(conn.modules, globals["__name__"])
-    for name in module.__dict__.keys():
-        globals[name] = getattr(module, name)
-
-def proxy_sub_module(globals, name):
-    fullname = globals["__name__"] + "." + name
-    sys.modules[fullname] = globals[name] = conn.modules[fullname]
diff --git a/lib_pypy/ctypes_support.py b/lib_pypy/ctypes_support.py
--- a/lib_pypy/ctypes_support.py
+++ b/lib_pypy/ctypes_support.py
@@ -12,6 +12,8 @@
 if sys.platform == 'win32':
     import _ffi
     standard_c_lib = ctypes.CDLL('msvcrt', handle=_ffi.get_libc())
+elif sys.platform == 'cygwin':
+    standard_c_lib = ctypes.CDLL(ctypes.util.find_library('cygwin'))
 else:
     standard_c_lib = ctypes.CDLL(ctypes.util.find_library('c'))
 
diff --git a/lib_pypy/disassembler.py b/lib_pypy/disassembler.py
--- a/lib_pypy/disassembler.py
+++ b/lib_pypy/disassembler.py
@@ -24,6 +24,11 @@
         self.lineno = lineno
         self.line_starts_here = False
 
+    def __str__(self):
+        if self.arg is None:
+            return "%s" % (self.__class__.__name__,)
+        return "%s (%s)" % (self.__class__.__name__, self.arg)
+
     def __repr__(self):
         if self.arg is None:
             return "<%s at %d>" % (self.__class__.__name__, self.pos)
diff --git a/lib_pypy/distributed/__init__.py b/lib_pypy/distributed/__init__.py
deleted file mode 100644
--- a/lib_pypy/distributed/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-
-try:
-    from protocol import RemoteProtocol, test_env, remote_loop, ObjectNotFound
-except ImportError:
-    # XXX fix it
-    # UGH. This is needed for tests
-    pass
diff --git a/lib_pypy/distributed/demo/sockdemo.py b/lib_pypy/distributed/demo/sockdemo.py
deleted file mode 100644
--- a/lib_pypy/distributed/demo/sockdemo.py
+++ /dev/null
@@ -1,42 +0,0 @@
-
-from distributed import RemoteProtocol, remote_loop
-from distributed.socklayer import Finished, socket_listener, socket_connecter
-
-PORT = 12122
-
-class X:
-    def __init__(self, z):
-        self.z = z
-        
-    def meth(self, x):
-        return self.z + x()
-
-    def raising(self):
-        1/0
-
-x = X(3)
-
-def remote():
-    send, receive = socket_listener(address=('', PORT))
-    remote_loop(RemoteProtocol(send, receive, globals()))
-
-def local():
-    send, receive = socket_connecter(('localhost', PORT))
-    return RemoteProtocol(send, receive)
-
-import sys
-if __name__ == '__main__':
-    if len(sys.argv) > 1 and sys.argv[1] == '-r':
-        try:
-            remote()
-        except Finished:
-            print "Finished"
-    else:
-        rp = local()
-        x = rp.get_remote("x")
-        try:
-            x.raising()
-        except:
-            import sys
-            import pdb
-            pdb.post_mortem(sys.exc_info()[2])
diff --git a/lib_pypy/distributed/faker.py b/lib_pypy/distributed/faker.py
deleted file mode 100644
--- a/lib_pypy/distributed/faker.py
+++ /dev/null
@@ -1,89 +0,0 @@
-
-""" This file is responsible for faking types
-"""
-
-class GetSetDescriptor(object):
-    def __init__(self, protocol, name):
-        self.protocol = protocol
-        self.name = name
-
-    def __get__(self, obj, type=None):
-        return self.protocol.get(self.name, obj, type)
-
-    def __set__(self, obj, value):
-        self.protocol.set(self.name, obj, value)
-
-class GetDescriptor(object):
-    def __init__(self, protocol, name):
-        self.protocol = protocol
-        self.name = name
-
-    def __get__(self, obj, type=None):
-        return self.protocol.get(self.name, obj, type)
-
-# these are one-go functions for wrapping/unwrapping types,
-# note that actual caching is defined in other files,
-# this is only the case when we *need* to wrap/unwrap
-# type
-
-from types import MethodType, FunctionType
-
-def not_ignore(name):
-    # we don't want to fake some default descriptors, because
-    # they'll alter the way we set attributes
-    l = ['__dict__', '__weakref__', '__class__', '__bases__',
-         '__getattribute__', '__getattr__', '__setattr__',
-         '__delattr__']
-    return not name in dict.fromkeys(l)
-
-def wrap_type(protocol, tp, tp_id):
-    """ Wrap type to transpotable entity, taking
-    care about descriptors
-    """
-    dict_w = {}
-    for item in tp.__dict__.keys():
-        value = getattr(tp, item)
-        if not_ignore(item):
-            # we've got shortcut for method
-            if hasattr(value, '__get__') and not type(value) is MethodType:
-                if hasattr(value, '__set__'):
-                    dict_w[item] = ('get', item)
-                else:
-                    dict_w[item] = ('set', item)
-            else:
-                dict_w[item] = protocol.wrap(value)
-    bases_w = [protocol.wrap(i) for i in tp.__bases__ if i is not object]
-    return tp_id, tp.__name__, dict_w, bases_w
-
-def unwrap_descriptor_gen(desc_class):
-    def unwrapper(protocol, data):
-        name = data
-        obj = desc_class(protocol, name)
-        obj.__name__ = name
-        return obj
-    return unwrapper
-
-unwrap_get_descriptor = unwrap_descriptor_gen(GetDescriptor)
-unwrap_getset_descriptor = unwrap_descriptor_gen(GetSetDescriptor)
-
-def unwrap_type(objkeeper, protocol, type_id, name_, dict_w, bases_w):
-    """ Unwrap remote type, based on it's description
-    """
-    if bases_w == []:
-        bases = (object,)
-    else:
-        bases = tuple([protocol.unwrap(i) for i in bases_w])
-    d = dict.fromkeys(dict_w)
-    # XXX we do it in two steps to avoid cyclic dependencies,
-    #     probably there is some smarter way of doing this
-    if '__doc__' in dict_w:
-        d['__doc__'] = protocol.unwrap(dict_w['__doc__'])
-    tp = type(name_, bases, d)
-    objkeeper.register_remote_type(tp, type_id)
-    for key, value in dict_w.items():
-        if key != '__doc__':
-            v = protocol.unwrap(value)
-            if isinstance(v, FunctionType):
-                setattr(tp, key, staticmethod(v))
-            else:
-                setattr(tp, key, v)
diff --git a/lib_pypy/distributed/objkeeper.py b/lib_pypy/distributed/objkeeper.py
deleted file mode 100644
--- a/lib_pypy/distributed/objkeeper.py
+++ /dev/null
@@ -1,63 +0,0 @@
-
-""" objkeeper - Storage for remoteprotocol
-"""
-
-from types import FunctionType
-from distributed import faker
-
-class ObjKeeper(object):
-    def __init__(self, exported_names = {}):
-        self.exported_objects = [] # list of object that we've exported outside
-        self.exported_names = exported_names # dictionary of visible objects
-        self.exported_types = {} # dict of exported types
-        self.remote_types = {}
-        self.reverse_remote_types = {}
-        self.remote_objects = {}
-        self.exported_types_id = 0 # unique id of exported types
-        self.exported_types_reverse = {} # reverse dict of exported types
-    
-    def register_object(self, obj):
-        # XXX: At some point it makes sense not to export them again and again...
-        self.exported_objects.append(obj)
-        return len(self.exported_objects) - 1
-    
-    def ignore(self, key, value):
-        # there are some attributes, which cannot be modified later, nor
-        # passed into default values, ignore them
-        if key in ('__dict__', '__weakref__', '__class__',
-                   '__dict__', '__bases__'):
-            return True
-        return False
-    
-    def register_type(self, protocol, tp):
-        try:
-            return self.exported_types[tp]
-        except KeyError:
-            self.exported_types[tp] = self.exported_types_id
-            self.exported_types_reverse[self.exported_types_id] = tp
-            tp_id = self.exported_types_id
-            self.exported_types_id += 1
-
-        protocol.send(('type_reg', faker.wrap_type(protocol, tp, tp_id)))
-        return tp_id
-    
-    def fake_remote_type(self, protocol, tp_data):
-        type_id, name_, dict_w, bases_w = tp_data
-        tp = faker.unwrap_type(self, protocol, type_id, name_, dict_w, bases_w)
-
-    def register_remote_type(self, tp, type_id):
-        self.remote_types[type_id] = tp
-        self.reverse_remote_types[tp] = type_id
-    
-    def get_type(self, id):
-        return self.remote_types[id]
-
-    def get_object(self, id):
-        return self.exported_objects[id]
-    
-    def register_remote_object(self, controller, id):
-        self.remote_objects[controller] = id
-
-    def get_remote_object(self, controller):
-        return self.remote_objects[controller]
-        
diff --git a/lib_pypy/distributed/protocol.py b/lib_pypy/distributed/protocol.py
deleted file mode 100644
--- a/lib_pypy/distributed/protocol.py
+++ /dev/null
@@ -1,447 +0,0 @@
-
-""" Distributed controller(s) for use with transparent proxy objects
-
-First idea:
-
-1. We use py.execnet to create a connection to wherever
-2. We run some code there (RSync in advance makes some sense)
-3. We access remote objects like normal ones, with a special protocol
-
-Local side:
-  - Request an object from remote side from global namespace as simple
-    --- request(name) --->
-  - Receive an object which is in protocol described below which is
-    constructed as shallow copy of the remote type.
-
-    Shallow copy is defined as follows:
-
-    - for interp-level object that we know we can provide transparent proxy
-      we just do that
-
-    - for others we fake or fail depending on object
-
-    - for user objects, we create a class which fakes all attributes of
-      a class as transparent proxies of remote objects, we create an instance
-      of that class and populate __dict__
-
-    - for immutable types, we just copy that
-
-Remote side:
-  - we run code, whatever we like
-  - additionally, we've got thread exporting stuff (or just exporting
-    globals, whatever)
-  - for every object, we just send an object, or provide a protocol for
-    sending it in a different way.
-
-"""
-
-try:
-    from __pypy__ import tproxy as proxy
-    from __pypy__ import get_tproxy_controller
-except ImportError:
-    raise ImportError("Cannot work without transparent proxy functionality")
-
-from distributed.objkeeper import ObjKeeper
-from distributed import faker
-import sys
-
-class ObjectNotFound(Exception):
-    pass
-
-# XXX We do not make any garbage collection. We'll need it at some point
-
-"""
-TODO list:
-
-1. Garbage collection - we would like probably to use weakrefs, but
-   since they're not perfectly working in pypy, let's leave it alone for now
-2. Some error handling - exceptions are working, there are still some
-   applications where it all explodes.
-3. Support inheritance and recursive types
-"""
-
-from __pypy__ import internal_repr
-
-import types
-from marshal import dumps
-import exceptions
-
-# just placeholders for letter_types value
-class RemoteBase(object):
-    pass
-
-class DataDescriptor(object):
-    pass
-
-class NonDataDescriptor(object):
-    pass
-# end of placeholders
-
-class AbstractProtocol(object):
-    immutable_primitives = (str, int, float, long, unicode, bool, types.NotImplementedType)
-    mutable_primitives = (list, dict, types.FunctionType, types.FrameType, types.TracebackType,
-        types.CodeType)
-    exc_dir = dict((val, name) for name, val in exceptions.__dict__.iteritems())
-    
-    letter_types = {
-        'l' : list,
-        'd' : dict,
-        'c' : types.CodeType,
-        't' : tuple,
-        'e' : Exception,
-        'ex': exceptions, # for instances
-        'i' : int,
-        'b' : bool,
-        'f' : float,
-        'u' : unicode,
-        'l' : long,
-        's' : str,
-        'ni' : types.NotImplementedType,
-        'n' : types.NoneType,
-        'lst' : list,
-        'fun' : types.FunctionType,
-        'cus' : object,
-        'meth' : types.MethodType,
-        'type' : type,
-        'tp' : None,
-        'fr' : types.FrameType,
-        'tb' : types.TracebackType,
-        'reg' : RemoteBase,
-        'get' : NonDataDescriptor,
-        'set' : DataDescriptor,
-    }
-    type_letters = dict([(value, key) for key, value in letter_types.items()])
-    assert len(type_letters) == len(letter_types)
-    
-    def __init__(self, exported_names={}):
-        self.keeper = ObjKeeper(exported_names)
-        #self.remote_objects = {} # a dictionary controller --> id
-        #self.objs = [] # we just store everything, maybe later
-        #   # we'll need some kind of garbage collection
-
-    def wrap(self, obj):
-        """ Wrap an object as sth prepared for sending
-        """
-        def is_element(x, iterable):
-            try:
-                return x in iterable
-            except (TypeError, ValueError):
-                return False
-        
-        tp = type(obj)
-        ctrl = get_tproxy_controller(obj)
-        if ctrl:
-            return "tp", self.keeper.get_remote_object(ctrl)
-        elif obj is None:
-            return self.type_letters[tp]
-        elif tp in self.immutable_primitives:
-            # simple, immutable object, just copy
-            return (self.type_letters[tp], obj)
-        elif hasattr(obj, '__class__') and obj.__class__ in self.exc_dir:
-            return (self.type_letters[Exception], (self.exc_dir[obj.__class__], \
-                self.wrap(obj.args)))
-        elif is_element(obj, self.exc_dir): # weird hashing problems
-            return (self.type_letters[exceptions], self.exc_dir[obj])
-        elif tp is tuple:
-            # we just pack all of the items
-            return ('t', tuple([self.wrap(elem) for elem in obj]))
-        elif tp in self.mutable_primitives:
-            id = self.keeper.register_object(obj)
-            return (self.type_letters[tp], id)
-        elif tp is type:
-            try:
-                return "reg", self.keeper.reverse_remote_types[obj]
-            except KeyError:
-                pass
-            try:
-                return self.type_letters[tp], self.type_letters[obj]
-            except KeyError:
-                id = self.register_type(obj)
-                return (self.type_letters[tp], id)
-        elif tp is types.MethodType:
-            w_class = self.wrap(obj.im_class)
-            w_func = self.wrap(obj.im_func)
-            w_self = self.wrap(obj.im_self)
-            return (self.type_letters[tp], (w_class, \
-                self.wrap(obj.im_func.func_name), w_func, w_self))
-        else:
-            id = self.keeper.register_object(obj)
-            w_tp = self.wrap(tp)
-            return ("cus", (w_tp, id))
-    
-    def unwrap(self, data):
-        """ Unwrap an object
-        """
-        if data == 'n':
-            return None
-        tp_letter, obj_data = data
-        tp = self.letter_types[tp_letter]
-        if tp is None:
-            return self.keeper.get_object(obj_data)
-        elif tp is RemoteBase:
-            return self.keeper.exported_types_reverse[obj_data]
-        elif tp in self.immutable_primitives:
-            return obj_data # this is the object
-        elif tp is tuple:
-            return tuple([self.unwrap(i) for i in obj_data])
-        elif tp in self.mutable_primitives:
-            id = obj_data
-            ro = RemoteBuiltinObject(self, id)
-            self.keeper.register_remote_object(ro.perform, id)
-            p = proxy(tp, ro.perform)
-            ro.obj = p
-            return p
-        elif tp is Exception:
-            cls_name, w_args = obj_data
-            return getattr(exceptions, cls_name)(self.unwrap(w_args))
-        elif tp is exceptions:
-            cls_name = obj_data
-            return getattr(exceptions, cls_name)
-        elif tp is types.MethodType:
-            w_class, w_name, w_func, w_self = obj_data
-            tp = self.unwrap(w_class)
-            name = self.unwrap(w_name)
-            self_ = self.unwrap(w_self)
-            if self_ is not None:
-                if tp is None:
-                    setattr(self_, name, classmethod(self.unwrap(w_func)))
-                    return getattr(self_, name)
-                return getattr(tp, name).__get__(self_, tp)
-            func = self.unwrap(w_func)
-            setattr(tp, name, func)
-            return getattr(tp, name)
-        elif tp is type:
-            if isinstance(obj_data, str):
-                return self.letter_types[obj_data]
-            id = obj_data
-            return self.get_type(obj_data)
-        elif tp is DataDescriptor:            
-            return faker.unwrap_getset_descriptor(self, obj_data)
-        elif tp is NonDataDescriptor:
-            return faker.unwrap_get_descriptor(self, obj_data)
-        elif tp is object:
-            # we need to create a proper type
-            w_tp, id = obj_data
-            real_tp = self.unwrap(w_tp)
-            ro = RemoteObject(self, id)
-            self.keeper.register_remote_object(ro.perform, id)
-            p = proxy(real_tp, ro.perform)
-            ro.obj = p
-            return p
-        else:
-            raise NotImplementedError("Cannot unwrap %s" % (data,))
-    
-    def perform(self, *args, **kwargs):
-        raise NotImplementedError("Abstract only protocol")
-    
-    # some simple wrappers
-    def pack_args(self, args, kwargs):
-        return self.pack_list(args), self.pack_dict(kwargs)
-    
-    def pack_list(self, lst):
-        return [self.wrap(i) for i in lst]
-    
-    def pack_dict(self, d):
-        return dict([(self.wrap(key), self.wrap(val)) for key, val in d.items()])
-    
-    def unpack_args(self, args, kwargs):
-        return self.unpack_list(args), self.unpack_dict(kwargs)
-    
-    def unpack_list(self, lst):
-        return [self.unwrap(i) for i in lst]
-    
-    def unpack_dict(self, d):
-        return dict([(self.unwrap(key), self.unwrap(val)) for key, val in d.items()])
-    
-    def register_type(self, tp):
-        return self.keeper.register_type(self, tp)
-    
-    def get_type(self, id):
-        return self.keeper.get_type(id)
-    
-class LocalProtocol(AbstractProtocol):
-    """ This is stupid protocol for testing purposes only
-    """
-    def __init__(self):
-        super(LocalProtocol, self).__init__()
-        self.types = []
-   
-    def perform(self, id, name, *args, **kwargs):
-        obj = self.keeper.get_object(id)
-        # we pack and than unpack, for tests
-        args, kwargs = self.pack_args(args, kwargs)
-        assert isinstance(name, str)
-        dumps((args, kwargs))
-        args, kwargs = self.unpack_args(args, kwargs)
-        return getattr(obj, name)(*args, **kwargs)
-    
-    def register_type(self, tp):
-        self.types.append(tp)
-        return len(self.types) - 1
-    
-    def get_type(self, id):
-        return self.types[id]
-
-def remote_loop(protocol):
-    # the simplest version possible, without any concurrency and such
-    wrap = protocol.wrap
-    unwrap = protocol.unwrap
-    send = protocol.send
-    receive = protocol.receive
-    # we need this for wrap/unwrap
-    while 1:
-        command, data = receive()
-        if command == 'get':
-            try:
-                item = protocol.keeper.exported_names[data]
-            except KeyError:
-                send(("finished_error",data))
-            else:
-                # XXX wrapping problems catching? do we have any?
-                send(("finished", wrap(item)))
-        elif command == 'call':
-            id, name, args, kwargs = data
-            args, kwargs = protocol.unpack_args(args, kwargs)
-            try:
-                retval = getattr(protocol.keeper.get_object(id), name)(*args, **kwargs)
-            except:
-                send(("raised", wrap(sys.exc_info())))
-            else:
-                send(("finished", wrap(retval)))
-        elif command == 'finished':
-            return unwrap(data)
-        elif command == 'finished_error':
-            raise ObjectNotFound("Cannot find name %s" % (data,))
-        elif command == 'raised':
-            exc, val, tb = unwrap(data)
-            raise exc, val, tb
-        elif command == 'type_reg':
-            protocol.keeper.fake_remote_type(protocol, data)
-        elif command == 'force':
-            obj = protocol.keeper.get_object(data)
-            w_obj = protocol.pack(obj)
-            send(("forced", w_obj))
-        elif command == 'forced':
-            obj = protocol.unpack(data)
-            return obj
-        elif command == 'desc_get':
-            name, w_obj, w_type = data
-            obj = protocol.unwrap(w_obj)
-            type_ = protocol.unwrap(w_type)
-            if obj:
-                type__ = type(obj)
-            else:
-                type__ = type_
-            send(('finished', protocol.wrap(getattr(type__, name).__get__(obj, type_))))
-
-        elif command == 'desc_set':
-            name, w_obj, w_value = data
-            obj = protocol.unwrap(w_obj)
-            value = protocol.unwrap(w_value)
-            getattr(type(obj), name).__set__(obj, value)
-            send(('finished', protocol.wrap(None)))
-        elif command == 'remote_keys':
-            keys = protocol.keeper.exported_names.keys()
-            send(('finished', protocol.wrap(keys)))
-        else:
-            raise NotImplementedError("command %s" % command)
-
-class RemoteProtocol(AbstractProtocol):
-    #def __init__(self, gateway, remote_code):
-    #    self.gateway = gateway
-    def __init__(self, send, receive, exported_names={}):
-        super(RemoteProtocol, self).__init__(exported_names)
-        #self.exported_names = exported_names
-        self.send = send
-        self.receive = receive
-        #self.type_cache = {}
-        #self.type_id = 0
-        #self.remote_types = {}
-    
-    def perform(self, id, name, *args, **kwargs):
-        args, kwargs = self.pack_args(args, kwargs)
-        self.send(('call', (id, name, args, kwargs)))
-        try:
-            retval = remote_loop(self)
-        except:
-            e, val, tb = sys.exc_info()
-            raise e, val, tb.tb_next.tb_next
-        return retval
-    
-    def get_remote(self, name):
-        self.send(("get", name))
-        retval = remote_loop(self)
-        return retval
-    
-    def force(self, id):
-        self.send(("force", id))
-        retval = remote_loop(self)
-        return retval
-    
-    def pack(self, obj):
-        if isinstance(obj, list):
-            return "l", self.pack_list(obj)
-        elif isinstance(obj, dict):
-            return "d", self.pack_dict(obj)
-        else:
-            raise NotImplementedError("Cannot pack %s" % obj)
-        
-    def unpack(self, data):
-        letter, w_obj = data
-        if letter == 'l':
-            return self.unpack_list(w_obj)
-        elif letter == 'd':
-            return self.unpack_dict(w_obj)
-        else:
-            raise NotImplementedError("Cannot unpack %s" % (data,))
-
-    def get(self, name, obj, type):
-        self.send(("desc_get", (name, self.wrap(obj), self.wrap(type))))
-        return remote_loop(self)
-
-    def set(self, obj, value):
-        self.send(("desc_set", (name, self.wrap(obj), self.wrap(value))))
-
-    def remote_keys(self):
-        self.send(("remote_keys",None))
-        return remote_loop(self)
-
-class RemoteObject(object):
-    def __init__(self, protocol, id):
-        self.id = id
-        self.protocol = protocol
-    
-    def perform(self, name, *args, **kwargs):
-        return self.protocol.perform(self.id, name, *args, **kwargs)
-
-class RemoteBuiltinObject(RemoteObject):
-    def __init__(self, protocol, id):
-        self.id = id
-        self.protocol = protocol
-        self.forced = False
-    
-    def perform(self, name, *args, **kwargs):
-        # XXX: Check who really goes here
-        if self.forced:
-            return getattr(self.obj, name)(*args, **kwargs)
-        if name in ('__eq__', '__ne__', '__lt__', '__gt__', '__ge__', '__le__',
-            '__cmp__'):
-            self.obj = self.protocol.force(self.id)
-            return getattr(self.obj, name)(*args, **kwargs)
-        return self.protocol.perform(self.id, name, *args, **kwargs)
-
-def test_env(exported_names):
-    from stackless import channel, tasklet, run
-    inp, out = channel(), channel()
-    remote_protocol = RemoteProtocol(inp.send, out.receive, exported_names)
-    t = tasklet(remote_loop)(remote_protocol)
-    
-    #def send_trace(data):
-    #    print "Sending %s" % (data,)
-    #    out.send(data)
-
-    #def receive_trace():
-    #    data = inp.receive()
-    #    print "Received %s" % (data,)
-    #    return data
-    return RemoteProtocol(out.send, inp.receive)
diff --git a/lib_pypy/distributed/socklayer.py b/lib_pypy/distributed/socklayer.py
deleted file mode 100644
--- a/lib_pypy/distributed/socklayer.py
+++ /dev/null
@@ -1,83 +0,0 @@
-
-import py
-from socket import socket
-
-raise ImportError("XXX needs import adaptation as 'green' is removed from py lib for years")
-from py.impl.green.msgstruct import decodemessage, message
-from socket import socket, AF_INET, SOCK_STREAM
-import marshal
-import sys
-
-TRACE = False
-def trace(msg):
-    if TRACE:
-        print >>sys.stderr, msg
-
-class Finished(Exception):
-    pass
-
-class SocketWrapper(object):
-    def __init__(self, conn):
-        self.buffer = ""
-        self.conn = conn
-
-class ReceiverWrapper(SocketWrapper):
-    def receive(self):
-        msg, self.buffer = decodemessage(self.buffer)
-        while msg is None:
-            data = self.conn.recv(8192)
-            if not data:
-                raise Finished()
-            self.buffer += data
-            msg, self.buffer = decodemessage(self.buffer)
-        assert msg[0] == 'c'
-        trace("received %s" % msg[1])
-        return marshal.loads(msg[1])
-
-class SenderWrapper(SocketWrapper):
-    def send(self, data):
-        trace("sending %s" % (data,))
-        self.conn.sendall(message('c', marshal.dumps(data)))
-        trace("done")
-
-def socket_listener(address, socket=socket):
-    s = socket(AF_INET, SOCK_STREAM)
-    s.bind(address)
-    s.listen(1)
-    print "Waiting for connection on %s" % (address,)
-    conn, addr = s.accept()
-    print "Connected from %s" % (addr,)
-
-    return SenderWrapper(conn).send, ReceiverWrapper(conn).receive
-
-def socket_loop(address, to_export, socket=socket):
-    from distributed import RemoteProtocol, remote_loop
-    try:
-        send, receive = socket_listener(address, socket)
-        remote_loop(RemoteProtocol(send, receive, to_export))
-    except Finished:
-        pass
-
-def socket_connecter(address, socket=socket):
-    s = socket(AF_INET, SOCK_STREAM)
-    print "Connecting %s" % (address,)
-    s.connect(address)
-    
-    return SenderWrapper(s).send, ReceiverWrapper(s).receive
-
-def connect(address, socket=socket):
-    from distributed.support import RemoteView
-    from distributed import RemoteProtocol
-    return RemoteView(RemoteProtocol(*socket_connecter(address, socket)))
-
-def spawn_remote_side(code, gw):
-    """ A very simple wrapper around greenexecnet to allow
-    spawning a remote side of lib/distributed
-    """
-    from distributed import RemoteProtocol
-    extra = str(py.code.Source("""
-    from distributed import remote_loop, RemoteProtocol
-    remote_loop(RemoteProtocol(channel.send, channel.receive, globals()))
-    """))
-    channel = gw.remote_exec(code + "\n" + extra)
-    return RemoteProtocol(channel.send, channel.receive)
diff --git a/lib_pypy/distributed/support.py b/lib_pypy/distributed/support.py
deleted file mode 100644
--- a/lib_pypy/distributed/support.py
+++ /dev/null
@@ -1,17 +0,0 @@
-
-""" Some random support functions
-"""
-
-from distributed.protocol import ObjectNotFound
-
-class RemoteView(object):
-    def __init__(self, protocol):
-        self.__dict__['__protocol'] = protocol
-
-    def __getattr__(self, name):
-        if name == '__dict__':
-            return super(RemoteView, self).__getattr__(name)
-        try:
-            return self.__dict__['__protocol'].get_remote(name)
-        except ObjectNotFound:
-            raise AttributeError(name)
diff --git a/lib_pypy/distributed/test/__init__.py b/lib_pypy/distributed/test/__init__.py
deleted file mode 100644
diff --git a/lib_pypy/distributed/test/test_distributed.py b/lib_pypy/distributed/test/test_distributed.py
deleted file mode 100644
--- a/lib_pypy/distributed/test/test_distributed.py
+++ /dev/null
@@ -1,301 +0,0 @@
-
-""" Controllers tests
-"""
-
-from pypy.conftest import gettestobjspace
-import sys
-import pytest
-
-class AppTestDistributed(object):
-    def setup_class(cls):
-        cls.space = gettestobjspace(**{"objspace.std.withtproxy": True,
-            "usemodules":("_continuation",)})
-
-    def test_init(self):
-        import distributed
-
-    def test_protocol(self):
-        from distributed.protocol import AbstractProtocol
-        protocol = AbstractProtocol()
-        for item in ("aaa", 3, u"aa", 344444444444444444L, 1.2, (1, "aa")):
-            assert protocol.unwrap(protocol.wrap(item)) == item
-        assert type(protocol.unwrap(protocol.wrap([1,2,3]))) is list
-        assert type(protocol.unwrap(protocol.wrap({"a":3}))) is dict
-        
-        def f():
-            pass
-        
-        assert type(protocol.unwrap(protocol.wrap(f))) is type(f)
-
-    def test_method_of_false_obj(self):
-        from distributed.protocol import AbstractProtocol
-        protocol = AbstractProtocol()
-        lst = []
-        m = lst.append
-        assert type(protocol.unwrap(protocol.wrap(m))) is type(m)
-
-    def test_protocol_run(self):
-        l = [1,2,3]
-        from distributed.protocol import LocalProtocol
-        protocol = LocalProtocol()
-        wrap = protocol.wrap
-        unwrap = protocol.unwrap
-        item = unwrap(wrap(l))
-        assert len(item) == 3
-        assert item[2] == 3
-        item += [1,1,1]
-        assert len(item) == 6
-
-    def test_protocol_call(self):
-        def f(x, y):
-            return x + y
-        
-        from distributed.protocol import LocalProtocol
-        protocol = LocalProtocol()
-        wrap = protocol.wrap
-        unwrap = protocol.unwrap
-        item = unwrap(wrap(f))
-        assert item(3, 2) == 5
-
-    def test_simulation_call(self):
-        def f(x, y):
-            return x + y
-        
-        import types
-        from distributed import RemoteProtocol
-        import sys
-
-        data = []
-        result = []
-        protocol = RemoteProtocol(result.append, data.pop)
-        data += [("finished", protocol.wrap(5)), ("finished", protocol.wrap(f))]
-        fun = protocol.get_remote("f")
-        assert isinstance(fun, types.FunctionType)
-        assert fun(2, 3) == 5
-
-    def test_local_obj(self):
-        class A(object):
-            def __init__(self, x):
-                self.x = x
-            
-            def __len__(self):
-                return self.x + 8
-        
-        from distributed.protocol import LocalProtocol
-        protocol = LocalProtocol()
-        wrap = protocol.wrap
-        unwrap = protocol.unwrap
-        item = unwrap(wrap(A(3)))
-        assert item.x == 3
-        assert len(item) == 11
-
-class AppTestDistributedTasklets(object):
-    spaceconfig = {"objspace.std.withtproxy": True,
-                   "objspace.usemodules._continuation": True}
-    def setup_class(cls):
-        cls.w_test_env = cls.space.appexec([], """():
-        from distributed import test_env
-        return test_env
-        """)
-        cls.reclimit = sys.getrecursionlimit()
-        sys.setrecursionlimit(100000)
-
-    def teardown_class(cls):
-        sys.setrecursionlimit(cls.reclimit)
-    
-    def test_remote_protocol_call(self):
-        def f(x, y):
-            return x + y
-        
-        protocol = self.test_env({"f": f})
-        fun = protocol.get_remote("f")
-        assert fun(2, 3) == 5
-
-    def test_callback(self):
-        def g():
-            return 8
-        
-        def f(x):
-            return x + g()
-        
-        protocol = self.test_env({"f":f})
-        fun = protocol.get_remote("f")
-        assert fun(8) == 16
-    
-    def test_remote_dict(self):
-        #skip("Land of infinite recursion")
-        d = {'a':3}
-        protocol = self.test_env({'d':d})
-        xd = protocol.get_remote('d')
-        #assert d['a'] == xd['a']
-        assert d.keys() == xd.keys()
-        assert d.values() == xd.values()
-        assert d == xd
-        
-    def test_remote_obj(self):
-        class A(object):
-            def __init__(self, x):
-                self.x = x
-            
-            def __len__(self):
-                return self.x + 8
-        a = A(3)
-        
-        protocol = self.test_env({'a':a})
-        xa = protocol.get_remote("a")
-        assert xa.x == 3
-        assert len(xa) == 11
-    
-    def test_remote_doc_and_callback(self):
-        class A(object):
-            """xxx"""
-            def __init__(self):
-                pass
-
-            def meth(self, x):
-                return x() + 3
-        
-        def x():
-            return 1
-        
-        a = A()
-        
-        protocol = self.test_env({'a':a})
-        xa = protocol.get_remote('a')
-        assert xa.__class__.__doc__ == 'xxx'
-        assert xa.meth(x) == 4
-
-    def test_double_reference(self):
-        class A(object):
-            def meth(self, one):
-                self.one = one
-            
-            def perform(self):
-                return 1 + len(self.one())
-        
-        class B(object):
-            def __call__(self):
-                return [1,2,3]
-        
-        a = A()
-        protocol = self.test_env({'a': a})
-        xa = protocol.get_remote('a')
-        xa.meth(B())
-        assert xa.perform() == 4
-
-    def test_frame(self):
-        #skip("Land of infinite recursion")
-        import sys
-        f = sys._getframe()
-        protocol = self.test_env({'f':f})
-        xf = protocol.get_remote('f')
-        assert f.f_globals.keys() == xf.f_globals.keys()
-        assert f.f_locals.keys() == xf.f_locals.keys()
-
-    def test_remote_exception(self):
-        def raising():
-            1/0
-        
-        protocol = self.test_env({'raising':raising})
-        xr = protocol.get_remote('raising')
-        try:
-            xr()
-        except ZeroDivisionError:
-            import sys
-            exc_info, val, tb  = sys.exc_info()
-            #assert tb.tb_next is None
-        else:
-            raise AssertionError("Did not raise")
-
-    def test_remote_classmethod(self):
-        class A(object):
-            z = 8
-
-            @classmethod
-            def x(cls):
-                return cls.z
-
-        a = A()
-        protocol = self.test_env({'a':a})
-        xa = protocol.get_remote("a")
-        res = xa.x()
-        assert res == 8
-
-    def test_types_reverse_mapping(self):
-        class A(object):
-            def m(self, tp):
-                assert type(self) is tp
-
-        a = A()
-        protocol = self.test_env({'a':a, 'A':A})
-        xa = protocol.get_remote('a')
-        xA = protocol.get_remote('A')
-        xa.m(xA)
-
-    def test_instantiate_remote_type(self):
-        class C(object):
-            def __init__(self, y):
-                self.y = y
-            
-            def x(self):
-                return self.y
-
-        protocol = self.test_env({'C':C})
-        xC = protocol.get_remote('C')
-        xc = xC(3)
-        res = xc.x()
-        assert res == 3
-
-    def test_remote_sys(self):
-        import sys
-
-        protocol = self.test_env({'sys':sys})
-        s = protocol.get_remote('sys')
-        l = dir(s)
-        assert l
-
-    def test_remote_file_access(self):
-        skip("Descriptor logic seems broken")
-        protocol = self.test_env({'f':open})
-        xf = protocol.get_remote('f')
-        data = xf('/etc/passwd').read()
-        assert data
-
-    def test_real_descriptor(self):
-        class getdesc(object):
-            def __get__(self, obj, val=None):
-                if obj is not None:
-                    assert type(obj) is X
-                return 3
-
-        class X(object):
-            x = getdesc()
-
-        x = X()
-
-        protocol = self.test_env({'x':x})
-        xx = protocol.get_remote('x')
-        assert xx.x == 3
-    
-    def test_bases(self):
-        class X(object):
-            pass
-
-        class Y(X):
-            pass
-
-        y = Y()
-        protocol = self.test_env({'y':y, 'X':X})
-        xy = protocol.get_remote('y')
-        xX = protocol.get_remote('X')
-        assert isinstance(xy, xX)
-
-    def test_key_error(self):
-        from distributed import ObjectNotFound
-        protocol = self.test_env({})
-        raises(ObjectNotFound, "protocol.get_remote('x')")
-
-    def test_list_items(self):
-        protocol = self.test_env({'x':3, 'y':8})
-        assert sorted(protocol.remote_keys()) == ['x', 'y']
-
diff --git a/lib_pypy/distributed/test/test_greensock.py b/lib_pypy/distributed/test/test_greensock.py
deleted file mode 100644
--- a/lib_pypy/distributed/test/test_greensock.py
+++ /dev/null
@@ -1,62 +0,0 @@
-
-import py
-from pypy.conftest import gettestobjspace, option
-
-def setup_module(mod):
-    py.test.importorskip("pygreen")   # found e.g. in py/trunk/contrib 
-
-class AppTestDistributedGreensock(object):
-    def setup_class(cls):
-        if not option.runappdirect:
-            py.test.skip("Cannot run this on top of py.py because of PopenGateway")
-        cls.space = gettestobjspace(**{"objspace.std.withtproxy": True,
-                                       "usemodules":("_continuation",)})
-        cls.w_remote_side_code = cls.space.appexec([], """():
-        import sys
-        sys.path.insert(0, '%s')
-        remote_side_code = '''
-class A:
-   def __init__(self, x):
-       self.x = x
-            
-   def __len__(self):
-       return self.x + 8
-
-   def raising(self):
-       1/0
-
-   def method(self, x):
-       return x() + self.x
-
-a = A(3)
-
-def count():
-    x = 10
-    # naive counting :)
-    result = 1
-    for i in range(x):
-        result += 1
-    return result
-'''
-        return remote_side_code
-        """ % str(py.path.local(__file__).dirpath().dirpath().dirpath().dirpath()))
-
-    def test_remote_call(self):
-        from distributed import socklayer
-        import sys
-        from pygreen.greenexecnet import PopenGateway
-        gw = PopenGateway()
-        rp = socklayer.spawn_remote_side(self.remote_side_code, gw)
-        a = rp.get_remote("a")
-        assert a.method(lambda : 13) == 16
-    
-    def test_remote_counting(self):
-        from distributed import socklayer
-        from pygreen.greensock2 import allof
-        from pygreen.greenexecnet import PopenGateway
-        gws = [PopenGateway() for i in range(3)]
-        rps = [socklayer.spawn_remote_side(self.remote_side_code, gw)
-               for gw in gws]
-        counters = [rp.get_remote("count") for rp in rps]
-        assert allof(*counters) == (11, 11, 11)
-
diff --git a/lib_pypy/distributed/test/test_socklayer.py b/lib_pypy/distributed/test/test_socklayer.py
deleted file mode 100644
--- a/lib_pypy/distributed/test/test_socklayer.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import py
-from pypy.conftest import gettestobjspace
-
-def setup_module(mod):
-    py.test.importorskip("pygreen")   # found e.g. in py/trunk/contrib 
-
-# XXX think how to close the socket
-
-class AppTestSocklayer:
-    def setup_class(cls):
-        cls.space = gettestobjspace(**{"objspace.std.withtproxy": True,
-                                       "usemodules":("_continuation",
-                                                     "_socket", "select")})
-    
-    def test_socklayer(self):
-        class X(object):
-            z = 3
-
-        x = X()
-
-        try:
-            import py
-        except ImportError:
-            skip("pylib not importable")
-        from pygreen.pipe.gsocke import GreenSocket
-        from distributed.socklayer import socket_loop, connect
-        from pygreen.greensock2 import oneof, allof
-
-        def one():
-            socket_loop(('127.0.0.1', 21211), {'x':x}, socket=GreenSocket)
-
-        def two():
-            rp = connect(('127.0.0.1', 21211), GreenSocket)
-            assert rp.x.z == 3
-
-        oneof(one, two)
diff --git a/lib_pypy/pyrepl/readline.py b/lib_pypy/pyrepl/readline.py
--- a/lib_pypy/pyrepl/readline.py
+++ b/lib_pypy/pyrepl/readline.py
@@ -194,7 +194,7 @@
         except _error:
             return _old_raw_input(prompt)
         reader.ps1 = prompt
-        return reader.readline(reader, startup_hook=self.startup_hook)
+        return reader.readline(startup_hook=self.startup_hook)
 
     def multiline_input(self, more_lines, ps1, ps2, returns_unicode=False):
         """Read an input on possibly multiple lines, asking for more
diff --git a/lib_pypy/sip.py b/lib_pypy/sip.py
deleted file mode 100644
--- a/lib_pypy/sip.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from _rpyc_support import proxy_module
-
-proxy_module(globals())
-del proxy_module
diff --git a/pypy/annotation/annrpython.py b/pypy/annotation/annrpython.py
--- a/pypy/annotation/annrpython.py
+++ b/pypy/annotation/annrpython.py
@@ -133,44 +133,6 @@
         self.build_graph_types(graph, inputcells, complete_now=False)
         self.complete_helpers(policy)
         return graph
-    
-    def annotate_helper_method(self, _class, attr, args_s, policy=None):
-        """ Warning! this method is meant to be used between
-        annotation and rtyping
-        """
-        if policy is None:
-            from pypy.annotation.policy import AnnotatorPolicy
-            policy = AnnotatorPolicy()
-        
-        assert attr != '__class__'
-        classdef = self.bookkeeper.getuniqueclassdef(_class)
-        attrdef = classdef.find_attribute(attr)
-        s_result = attrdef.getvalue()
-        classdef.add_source_for_attribute(attr, classdef.classdesc)
-        self.bookkeeper
-        assert isinstance(s_result, annmodel.SomePBC)
-        olddesc = s_result.any_description()
-        desc = olddesc.bind_self(classdef)
-        args = self.bookkeeper.build_args("simple_call", args_s[:])
-        desc.consider_call_site(self.bookkeeper, desc.getcallfamily(), [desc],
-            args, annmodel.s_ImpossibleValue, None)
-        result = []
-        def schedule(graph, inputcells):
-            result.append((graph, inputcells))
-            return annmodel.s_ImpossibleValue
-
-        prevpolicy = self.policy
-        self.policy = policy
-        self.bookkeeper.enter(None)
-        try:
-            desc.pycall(schedule, args, annmodel.s_ImpossibleValue)
-        finally:
-            self.bookkeeper.leave()
-            self.policy = prevpolicy
-        [(graph, inputcells)] = result
-        self.build_graph_types(graph, inputcells, complete_now=False)
-        self.complete_helpers(policy)
-        return graph
 
     def complete_helpers(self, policy):
         saved = self.policy, self.added_blocks
diff --git a/pypy/annotation/binaryop.py b/pypy/annotation/binaryop.py
--- a/pypy/annotation/binaryop.py
+++ b/pypy/annotation/binaryop.py
@@ -7,7 +7,7 @@
 from pypy.tool.pairtype import pair, pairtype
 from pypy.annotation.model import SomeObject, SomeInteger, SomeBool, s_Bool, SomeOOBoundMeth
 from pypy.annotation.model import SomeString, SomeChar, SomeList, SomeDict
-from pypy.annotation.model import SomeUnicodeCodePoint
+from pypy.annotation.model import SomeUnicodeCodePoint, SomeStringOrUnicode
 from pypy.annotation.model import SomeTuple, SomeImpossibleValue, s_ImpossibleValue
 from pypy.annotation.model import SomeInstance, SomeBuiltin, SomeIterator
 from pypy.annotation.model import SomePBC, SomeFloat, s_None
@@ -470,30 +470,37 @@
             "string formatting mixing strings and unicode not supported")
 
 
-class __extend__(pairtype(SomeString, SomeTuple)):
-    def mod((str, s_tuple)):
+class __extend__(pairtype(SomeString, SomeTuple),
+                 pairtype(SomeUnicodeString, SomeTuple)):
+    def mod((s_string, s_tuple)):
+        is_string = isinstance(s_string, SomeString)
+        is_unicode = isinstance(s_string, SomeUnicodeString)
+        assert is_string or is_unicode
         for s_item in s_tuple.items:
-            if isinstance(s_item, (SomeUnicodeCodePoint, SomeUnicodeString)):
+            if (is_unicode and isinstance(s_item, (SomeChar, SomeString)) or
+                is_string and isinstance(s_item, (SomeUnicodeCodePoint,
+                                                  SomeUnicodeString))):
                 raise NotImplementedError(
                     "string formatting mixing strings and unicode not supported")
-        getbookkeeper().count('strformat', str, s_tuple)
-        no_nul = str.no_nul
+        getbookkeeper().count('strformat', s_string, s_tuple)
+        no_nul = s_string.no_nul
         for s_item in s_tuple.items:
             if isinstance(s_item, SomeFloat):
                 pass   # or s_item is a subclass, like SomeInteger
-            elif isinstance(s_item, SomeString) and s_item.no_nul:
+            elif isinstance(s_item, SomeStringOrUnicode) and s_item.no_nul:
                 pass
             else:
                 no_nul = False
                 break
-        return SomeString(no_nul=no_nul)
+        return s_string.__class__(no_nul=no_nul)
 
 
-class __extend__(pairtype(SomeString, SomeObject)):
+class __extend__(pairtype(SomeString, SomeObject),
+                 pairtype(SomeUnicodeString, SomeObject)):
 
-    def mod((str, args)):
-        getbookkeeper().count('strformat', str, args)
-        return SomeString()
+    def mod((s_string, args)):
+        getbookkeeper().count('strformat', s_string, args)
+        return s_string.__class__()
 
 class __extend__(pairtype(SomeFloat, SomeFloat)):
     
@@ -659,7 +666,7 @@
 
     def mul((str1, int2)): # xxx do we want to support this
         getbookkeeper().count("str_mul", str1, int2)
-        return SomeString()
+        return SomeString(no_nul=str1.no_nul)
 
 class __extend__(pairtype(SomeUnicodeString, SomeInteger)):
     def getitem((str1, int2)):
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -201,6 +201,7 @@
                     for op in block.operations:
                         if op.opname in ('simple_call', 'call_args'):
                             yield op
+
                         # some blocks are partially annotated
                         if binding(op.result, None) is None:
                             break   # ignore the unannotated part
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -514,9 +514,9 @@
                     continue
                 self.add_source_attribute(name, value, mixin=True)
 
-    def add_sources_for_class(self, cls, mixin=False):
+    def add_sources_for_class(self, cls):
         for name, value in cls.__dict__.items():
-            self.add_source_attribute(name, value, mixin)
+            self.add_source_attribute(name, value)
 
     def getallclassdefs(self):
         return self._classdefs.values()
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -2138,6 +2138,15 @@
         assert isinstance(s, annmodel.SomeString)
         assert s.no_nul
 
+    def test_mul_str0(self):
+        def f(s):
+            return s*10
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [annmodel.SomeString(no_nul=True)])
+        assert isinstance(s, annmodel.SomeString)
+        assert s.no_nul
+        
+
     def test_non_none_and_none_with_isinstance(self):
         class A(object):
             pass
@@ -2738,20 +2747,6 @@
         s = a.build_types(f, [])
         assert s.knowntype == int
 
-    def test_helper_method_annotator(self):
-        def fun():
-            return 21
-
-        class A(object):
-            def helper(self):
-                return 42
-
-        a = self.RPythonAnnotator()
-        a.build_types(fun, [])
-        a.annotate_helper_method(A, "helper", [])
-        assert a.bookkeeper.getdesc(A.helper).getuniquegraph()
-        assert a.bookkeeper.getdesc(A().helper).getuniquegraph()
-
     def test_chr_out_of_bounds(self):
         def g(n, max):
             if n < max:
@@ -3394,6 +3389,22 @@
         s = a.build_types(f, [str])
         assert isinstance(s, annmodel.SomeString)
 
+    def test_unicodeformatting(self):
+        def f(x):
+            return u'%s' % x
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [unicode])
+        assert isinstance(s, annmodel.SomeUnicodeString)
+
+    def test_unicodeformatting_tuple(self):
+        def f(x):
+            return u'%s' % (x,)
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [unicode])
+        assert isinstance(s, annmodel.SomeUnicodeString)
+
 
     def test_negative_slice(self):
         def f(s, e):
@@ -3780,6 +3791,56 @@
         e = py.test.raises(Exception, a.build_types, f, [])
         assert 'object with a __call__ is not RPython' in str(e.value)
 
+    def test_os_getcwd(self):
+        import os
+        def fn():
+            return os.getcwd()
+        a = self.RPythonAnnotator()
+        s = a.build_types(fn, [])
+        assert isinstance(s, annmodel.SomeString)
+        assert s.no_nul
+
+    def test_os_getenv(self):
+        import os
+        def fn():
+            return os.environ.get('PATH')
+        a = self.RPythonAnnotator()
+        s = a.build_types(fn, [])
+        assert isinstance(s, annmodel.SomeString)
+        assert s.no_nul
+
+    def test_base_iter(self):
+        class A(object):
+            def __iter__(self):
+                return self
+        
+        def fn():
+            return iter(A())
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(fn, [])
+        assert isinstance(s, annmodel.SomeInstance)
+        assert s.classdef.name.endswith('.A')
+
+    def test_iter_next(self):
+        class A(object):
+            def __iter__(self):
+                return self
+
+            def next(self):
+                return 1
+        
+        def fn():
+            s = 0
+            for x in A():
+                s += x
+            return s
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(fn, [])
+        assert len(a.translator.graphs) == 3 # fn, __iter__, next
+        assert isinstance(s, annmodel.SomeInteger)
+
 def g(n):
     return [0,1,2,n]
 
diff --git a/pypy/annotation/unaryop.py b/pypy/annotation/unaryop.py
--- a/pypy/annotation/unaryop.py
+++ b/pypy/annotation/unaryop.py
@@ -609,33 +609,36 @@
 
 class __extend__(SomeInstance):
 
+    def _true_getattr(ins, attr):
+        if attr == '__class__':
+            return ins.classdef.read_attr__class__()
+        attrdef = ins.classdef.find_attribute(attr)
+        position = getbookkeeper().position_key
+        attrdef.read_locations[position] = True
+        s_result = attrdef.getvalue()
+        # hack: if s_result is a set of methods, discard the ones
+        #       that can't possibly apply to an instance of ins.classdef.
+        # XXX do it more nicely
+        if isinstance(s_result, SomePBC):
+            s_result = ins.classdef.lookup_filter(s_result, attr,
+                                                  ins.flags)
+        elif isinstance(s_result, SomeImpossibleValue):
+            ins.classdef.check_missing_attribute_update(attr)
+            # blocking is harmless if the attribute is explicitly listed
+            # in the class or a parent class.
+            for basedef in ins.classdef.getmro():
+                if basedef.classdesc.all_enforced_attrs is not None:
+                    if attr in basedef.classdesc.all_enforced_attrs:
+                        raise HarmlesslyBlocked("get enforced attr")
+        elif isinstance(s_result, SomeList):
+            s_result = ins.classdef.classdesc.maybe_return_immutable_list(
+                attr, s_result)
+        return s_result
+
     def getattr(ins, s_attr):
         if s_attr.is_constant() and isinstance(s_attr.const, str):
             attr = s_attr.const
-            if attr == '__class__':
-                return ins.classdef.read_attr__class__()
-            attrdef = ins.classdef.find_attribute(attr)
-            position = getbookkeeper().position_key
-            attrdef.read_locations[position] = True
-            s_result = attrdef.getvalue()
-            # hack: if s_result is a set of methods, discard the ones
-            #       that can't possibly apply to an instance of ins.classdef.
-            # XXX do it more nicely
-            if isinstance(s_result, SomePBC):
-                s_result = ins.classdef.lookup_filter(s_result, attr,
-                                                      ins.flags)
-            elif isinstance(s_result, SomeImpossibleValue):
-                ins.classdef.check_missing_attribute_update(attr)
-                # blocking is harmless if the attribute is explicitly listed
-                # in the class or a parent class.
-                for basedef in ins.classdef.getmro():
-                    if basedef.classdesc.all_enforced_attrs is not None:
-                        if attr in basedef.classdesc.all_enforced_attrs:
-                            raise HarmlesslyBlocked("get enforced attr")
-            elif isinstance(s_result, SomeList):
-                s_result = ins.classdef.classdesc.maybe_return_immutable_list(
-                    attr, s_result)
-            return s_result
+            return ins._true_getattr(attr)
         return SomeObject()
     getattr.can_only_throw = []
 
@@ -657,6 +660,19 @@
         if not ins.can_be_None:
             s.const = True
 
+    def iter(ins):
+        s_iterable = ins._true_getattr('__iter__')
+        bk = getbookkeeper()
+        # record for calltables
+        bk.emulate_pbc_call(bk.position_key, s_iterable, [])
+        return s_iterable.call(bk.build_args("simple_call", []))
+
+    def next(ins):
+        s_next = ins._true_getattr('next')
+        bk = getbookkeeper()
+        # record for calltables
+        bk.emulate_pbc_call(bk.position_key, s_next, [])
+        return s_next.call(bk.build_args("simple_call", []))
 
 class __extend__(SomeBuiltin):
     def _can_only_throw(bltn, *args):
diff --git a/pypy/bin/py.py b/pypy/bin/py.py
--- a/pypy/bin/py.py
+++ b/pypy/bin/py.py
@@ -89,12 +89,12 @@
     space.setitem(space.sys.w_dict, space.wrap('executable'),
                   space.wrap(argv[0]))
 
-    # call pypy_initial_path: the side-effect is that it sets sys.prefix and
+    # call pypy_find_stdlib: the side-effect is that it sets sys.prefix and
     # sys.exec_prefix
-    srcdir = os.path.dirname(os.path.dirname(pypy.__file__))
-    space.appexec([space.wrap(srcdir)], """(srcdir):
+    executable = argv[0]
+    space.appexec([space.wrap(executable)], """(executable):
         import sys
-        sys.pypy_initial_path(srcdir)
+        sys.pypy_find_stdlib(executable)
     """)
 
     # set warning control options (if any)
diff --git a/pypy/bin/rpython b/pypy/bin/rpython
old mode 100644
new mode 100755
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -41,6 +41,7 @@
 translation_modules.update(dict.fromkeys(
     ["fcntl", "rctime", "select", "signal", "_rawffi", "zlib",
      "struct", "_md5", "cStringIO", "array", "_ffi",
+     "binascii",
      # the following are needed for pyrepl (and hence for the
      # interactive prompt/pdb)
      "termios", "_minimal_curses",
@@ -79,6 +80,7 @@
 module_dependencies = {
     '_multiprocessing': [('objspace.usemodules.rctime', True),
                          ('objspace.usemodules.thread', True)],
+    'cpyext': [('objspace.usemodules.array', True)],
     }
 module_suggests = {
     # the reason you want _rawffi is for ctypes, which
diff --git a/pypy/config/test/test_pypyoption.py b/pypy/config/test/test_pypyoption.py
--- a/pypy/config/test/test_pypyoption.py
+++ b/pypy/config/test/test_pypyoption.py
@@ -71,7 +71,7 @@
         c = Config(descr)
         for path in c.getpaths(include_groups=True):
             fn = prefix + "." + path + ".txt"
-            yield check_file_exists, fn
+            yield fn, check_file_exists, fn
 
 def test__ffi_opt():
     config = get_pypy_config(translating=True)
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -255,7 +255,12 @@
   code if the translator can prove that they are non-negative.  When
   slicing a string it is necessary to prove that the slice start and
   stop indexes are non-negative. There is no implicit str-to-unicode cast
-  anywhere.
+  anywhere. Simple string formatting using the ``%`` operator works, as long
+  as the format string is known at translation time; the only supported
+  formatting specifiers are ``%s``, ``%d``, ``%x``, ``%o``, ``%f``, plus
+  ``%r`` but only for user-defined instances. Modifiers such as conversion
+  flags, precision, length etc. are not supported. Moreover, it is forbidden
+  to mix unicode and strings when formatting.
 
 **tuples**
 
@@ -341,8 +346,8 @@
 
 **objects**
 
-  Normal rules apply. Special methods are not honoured, except ``__init__`` and
-  ``__del__``.
+  Normal rules apply. Special methods are not honoured, except ``__init__``,
+  ``__del__`` and ``__iter__``.
 
 This layout makes the number of types to take care about quite limited.
 
@@ -610,10 +615,6 @@
     >>>> cPickle.__file__
     '/home/hpk/pypy-dist/lib_pypy/cPickle..py'
 
-    >>>> import opcode
-    >>>> opcode.__file__
-    '/home/hpk/pypy-dist/lib-python/modified-2.7/opcode.py'
-
     >>>> import os
     >>>> os.__file__
     '/home/hpk/pypy-dist/lib-python/2.7/os.py'
@@ -639,13 +640,9 @@
 
     contains pure Python reimplementation of modules.
 
-*lib-python/modified-2.7/*
-
-    The files and tests that we have modified from the CPython library.
-
 *lib-python/2.7/*
 
-    The unmodified CPython library. **Never ever check anything in there**.
+    The modified CPython library.
 
 .. _`modify modules`:
 
@@ -658,16 +655,9 @@
 by default and CPython has a number of places where it relies
 on some classes being old-style.
 
-If you want to change a module or test contained in ``lib-python/2.7``
-then make sure that you copy the file to our ``lib-python/modified-2.7``
-directory first.  In mercurial commandline terms this reads::
-
-    $ hg cp lib-python/2.7/somemodule.py lib-python/modified-2.7/
-
-and subsequently you edit and commit
-``lib-python/modified-2.7/somemodule.py``.  This copying operation is
-important because it keeps the original CPython tree clean and makes it
-obvious what we had to change.
+We just maintain those changes in place,
+to see what is changed we have a branch called `vendot/stdlib`
+wich contains the unmodified cpython stdlib
 
 .. _`mixed module mechanism`:
 .. _`mixed modules`:
diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py
--- a/pypy/doc/conf.py
+++ b/pypy/doc/conf.py
@@ -45,9 +45,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.8'
+version = '1.9'
 # The full version, including alpha/beta/rc tags.
-release = '1.8'
+release = '1.9'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/pypy/doc/config/objspace.usemodules.cppyy.txt b/pypy/doc/config/objspace.usemodules.cppyy.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.usemodules.cppyy.txt
@@ -0,0 +1,1 @@
+Use the 'cppyy' module
diff --git a/pypy/doc/cppyy.rst b/pypy/doc/cppyy.rst
--- a/pypy/doc/cppyy.rst
+++ b/pypy/doc/cppyy.rst
@@ -5,8 +5,10 @@
 The cppyy module provides C++ bindings for PyPy by using the reflection
 information extracted from C++ header files by means of the
 `Reflex package`_.
-For this to work, you have to both install Reflex and build PyPy from the
-reflex-support branch.
+For this to work, you have to both install Reflex and build PyPy from source,
+as the cppyy module is not enabled by default.
+Note that the development version of cppyy lives in the reflex-support
+branch.
 As indicated by this being a branch, support for Reflex is still
 experimental.
 However, it is functional enough to put it in the hands of those who want
@@ -71,23 +73,33 @@
 .. _`recent snapshot`: http://cern.ch/wlav/reflex-2012-05-02.tar.bz2
 .. _`gccxml`: http://www.gccxml.org
 
-Next, get the `PyPy sources`_, select the reflex-support branch, and build
-pypy-c.
+Next, get the `PyPy sources`_, optionally select the reflex-support branch,
+and build it.
 For the build to succeed, the ``$ROOTSYS`` environment variable must point to
-the location of your ROOT (or standalone Reflex) installation::
+the location of your ROOT (or standalone Reflex) installation, or the
+``root-config`` utility must be accessible through ``PATH`` (e.g. by adding
+``$ROOTSYS/bin`` to ``PATH``).
+In case of the former, include files are expected under ``$ROOTSYS/include``
+and libraries under ``$ROOTSYS/lib``.
+Then run the translation to build ``pypy-c``::
 
     $ hg clone https://bitbucket.org/pypy/pypy
     $ cd pypy
-    $ hg up reflex-support
+    $ hg up reflex-support         # optional
     $ cd pypy/translator/goal
+    
+    # This example shows python, but using pypy-c is faster and uses less memory
     $ python translate.py -O jit --gcrootfinder=shadowstack targetpypystandalone.py --withmod-cppyy
 
 This will build a ``pypy-c`` that includes the cppyy module, and through that,
 Reflex support.
 Of course, if you already have a pre-built version of the ``pypy`` interpreter,
 you can use that for the translation rather than ``python``.
+If not, you may want `to obtain a binary distribution`_ to speed up the
+translation step.
 
 .. _`PyPy sources`: https://bitbucket.org/pypy/pypy/overview
+.. _`to obtain a binary distribution`: http://doc.pypy.org/en/latest/getting-started.html#download-a-pre-built-pypy
 
 
 Basic example
@@ -115,7 +127,7 @@
 code::
 
     $ genreflex MyClass.h
-    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include MyClass_rflx.cpp -o libMyClassDict.so
+    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include MyClass_rflx.cpp -o libMyClassDict.so -L$ROOTSYS/lib -lReflex
 
 Now you're ready to use the bindings.
 Since the bindings are designed to look pythonistic, it should be
@@ -139,8 +151,57 @@
 That's all there is to it!
 
 
+Automatic class loader
+======================
+
+There is one big problem in the code above, that prevents its use in a (large
+scale) production setting: the explicit loading of the reflection library.
+Clearly, if explicit load statements such as these show up in code downstream
+from the ``MyClass`` package, then that prevents the ``MyClass`` author from
+repackaging or even simply renaming the dictionary library.
+
+The solution is to make use of an automatic class loader, so that downstream
+code never has to call ``load_reflection_info()`` directly.
+The class loader makes use of so-called rootmap files, which ``genreflex``
+can produce.
+These files contain the list of available C++ classes and specify the library
+that needs to be loaded for their use (as an aside, this listing allows for a
+cross-check to see whether reflection info is generated for all classes that
+you expect).
+By convention, the rootmap files should be located next to the reflection info
+libraries, so that they can be found through the normal shared library search
+path.
+They can be concatenated together, or consist of a single rootmap file per
+library.
+For example::
+
+    $ genreflex MyClass.h --rootmap=libMyClassDict.rootmap --rootmap-lib=libMyClassDict.so
+    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include MyClass_rflx.cpp -o libMyClassDict.so -L$ROOTSYS/lib -lReflex
+
+where the first option (``--rootmap``) specifies the output file name, and the
+second option (``--rootmap-lib``) the name of the reflection library where
+``MyClass`` will live.
+It is necessary to provide that name explicitly, since it is only in the
+separate linking step where this name is fixed.
+If the second option is not given, the library is assumed to be libMyClass.so,
+a name that is derived from the name of the header file.
+
+With the rootmap file in place, the above example can be rerun without explicit
+loading of the reflection info library::
+
+    $ pypy-c
+    >>>> import cppyy
+    >>>> myinst = cppyy.gbl.MyClass(42)
+    >>>> print myinst.GetMyInt()
+    42
+    >>>> # etc. ...
+
+As a caveat, note that the class loader is currently limited to classes only.
+
+
 Advanced example
 ================
+
 The following snippet of C++ is very contrived, to allow showing that such
 pathological code can be handled and to show how certain features play out in
 practice::
@@ -171,7 +232,7 @@
         std::string m_name;
     };
 
-    Base1* BaseFactory(const std::string& name, int i, double d) {
+    Base2* BaseFactory(const std::string& name, int i, double d) {
         return new Derived(name, i, d);
     }
 
@@ -196,6 +257,9 @@
 With the aid of a selection file, a large project can be easily managed:
 simply ``#include`` all relevant headers into a single header file that is
 handed to ``genreflex``.
+In fact, if you hand multiple header files to ``genreflex``, then a selection
+file is almost obligatory: without it, only classes from the last header will
+be selected.
 Then, apply a selection file to pick up all the relevant classes.
 For our purposes, the following rather straightforward selection will do
 (the name ``lcgdict`` for the root is historical, but required)::
@@ -213,7 +277,7 @@
 Now the reflection info can be generated and compiled::
 
     $ genreflex MyAdvanced.h --selection=MyAdvanced.xml
-    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include MyAdvanced_rflx.cpp -o libAdvExDict.so
+    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include MyAdvanced_rflx.cpp -o libAdvExDict.so -L$ROOTSYS/lib -lReflex
 
 and subsequently be used from PyPy::
 
@@ -237,7 +301,7 @@
 
 A couple of things to note, though.
 If you look back at the C++ definition of the ``BaseFactory`` function,
-you will see that it declares the return type to be a ``Base1``, yet the
+you will see that it declares the return type to be a ``Base2``, yet the
 bindings return an object of the actual type ``Derived``?
 This choice is made for a couple of reasons.
 First, it makes method dispatching easier: if bound objects are always their
@@ -268,15 +332,43 @@
 (active memory management is one such case), but by and large, if the use of a
 feature does not strike you as obvious, it is more likely to simply be a bug.
 That is a strong statement to make, but also a worthy goal.
+For the C++ side of the examples, refer to this `example code`_, which was
+bound using::
+
+    $ genreflex example.h --deep --rootmap=libexampleDict.rootmap --rootmap-lib=libexampleDict.so
+    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include example_rflx.cpp -o libexampleDict.so -L$ROOTSYS/lib -lReflex
+
+.. _`example code`: cppyy_example.html
 
 * **abstract classes**: Are represented as python classes, since they are
   needed to complete the inheritance hierarchies, but will raise an exception
   if an attempt is made to instantiate from them.
+  Example::
+
+    >>>> from cppyy.gbl import AbstractClass, ConcreteClass
+    >>>> a = AbstractClass()
+    Traceback (most recent call last):
+      File "<console>", line 1, in <module>
+    TypeError: cannot instantiate abstract class 'AbstractClass'
+    >>>> issubclass(ConcreteClass, AbstractClass)
+    True
+    >>>> c = ConcreteClass()
+    >>>> isinstance(c, AbstractClass)
+    True
+    >>>>
 
 * **arrays**: Supported for builtin data types only, as used from module
   ``array``.
   Out-of-bounds checking is limited to those cases where the size is known at
   compile time (and hence part of the reflection info).
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> from array import array
+    >>>> c = ConcreteClass()
+    >>>> c.array_method(array('d', [1., 2., 3., 4.]), 4)
+    1 2 3 4
+    >>>> 
 
 * **builtin data types**: Map onto the expected equivalent python types, with
   the caveat that there may be size differences, and thus it is possible that
@@ -287,23 +379,77 @@
   in the hierarchy of the object being returned.
   This is important to preserve object identity as well as to make casting,
   a pure C++ feature after all, superfluous.
+  Example::
+
+    >>>> from cppyy.gbl import AbstractClass, ConcreteClass
+    >>>> c = ConcreteClass()
+    >>>> ConcreteClass.show_autocast.__doc__
+    'AbstractClass* ConcreteClass::show_autocast()'
+    >>>> d = c.show_autocast()
+    >>>> type(d)
+    <class '__main__.ConcreteClass'>
+    >>>>
+
+  However, if need be, you can perform C++-style reinterpret_casts (i.e.
+  without taking offsets into account), by taking and rebinding the address
+  of an object::
+
+    >>>> from cppyy import addressof, bind_object
+    >>>> e = bind_object(addressof(d), AbstractClass)
+    >>>> type(e)
+    <class '__main__.AbstractClass'>
+    >>>>
 
 * **classes and structs**: Get mapped onto python classes, where they can be
   instantiated as expected.
   If classes are inner classes or live in a namespace, their naming and
   location will reflect that.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass, Namespace
+    >>>> ConcreteClass == Namespace.ConcreteClass
+    False
+    >>>> n = Namespace.ConcreteClass.NestedClass()
+    >>>> type(n)
+    <class '__main__.Namespace::ConcreteClass::NestedClass'>
+    >>>> 
 
 * **data members**: Public data members are represented as python properties
   and provide read and write access on instances as expected.
+  Private and protected data members are not accessible.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> c = ConcreteClass()
+    >>>> c.m_int
+    42
+    >>>>
 
 * **default arguments**: C++ default arguments work as expected, but python
   keywords are not supported.
   It is technically possible to support keywords, but for the C++ interface,
   the formal argument names have no meaning and are not considered part of the
   API, hence it is not a good idea to use keywords.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> c = ConcreteClass()       # uses default argument
+    >>>> c.m_int
+    42
+    >>>> c = ConcreteClass(13)
+    >>>> c.m_int
+    13
+    >>>>
 
 * **doc strings**: The doc string of a method or function contains the C++
   arguments and return types of all overloads of that name, as applicable.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> print ConcreteClass.array_method.__doc__
+    void ConcreteClass::array_method(int*, int)
+    void ConcreteClass::array_method(double*, int)
+    >>>> 
 
 * **enums**: Are translated as ints with no further checking.
 
@@ -318,6 +464,40 @@
   This is a current, not a fundamental, limitation.
   The C++ side will not see any overridden methods on the python side, as
   cross-inheritance is planned but not yet supported.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> help(ConcreteClass)
+    Help on class ConcreteClass in module __main__:
+
+    class ConcreteClass(AbstractClass)
+     |  Method resolution order:
+     |      ConcreteClass
+     |      AbstractClass
+     |      cppyy.CPPObject
+     |      __builtin__.CPPInstance
+     |      __builtin__.object
+     |  
+     |  Methods defined here:
+     |  
+     |  ConcreteClass(self, *args)
+     |      ConcreteClass::ConcreteClass(const ConcreteClass&)
+     |      ConcreteClass::ConcreteClass(int)
+     |      ConcreteClass::ConcreteClass()
+     |
+     etc. ....
+
+* **memory**: C++ instances created by calling their constructor from python
+  are owned by python.
+  You can check/change the ownership with the _python_owns flag that every
+  bound instance carries.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> c = ConcreteClass()
+    >>>> c._python_owns            # True: object created in Python
+    True
+    >>>> 
 
 * **methods**: Are represented as python methods and work as expected.
   They are first class objects and can be bound to an instance.
@@ -333,23 +513,34 @@
   Namespaces are more open-ended than classes, so sometimes initial access may
   result in updates as data and functions are looked up and constructed
   lazily.
-  Thus the result of ``dir()`` on a namespace should not be relied upon: it
-  only shows the already accessed members. (TODO: to be fixed by implementing
-  __dir__.)
+  Thus the result of ``dir()`` on a namespace shows the classes available,
+  even if they may not have been created yet.
+  It does not show classes that could potentially be loaded by the class
+  loader.
+  Once created, namespaces are registered as modules, to allow importing from
+  them.
+  Namespace currently do not work with the class loader.
+  Fixing these bootstrap problems is on the TODO list.
   The global namespace is ``cppyy.gbl``.
 
 * **operator conversions**: If defined in the C++ class and a python
   equivalent exists (i.e. all builtin integer and floating point types, as well
   as ``bool``), it will map onto that python conversion.
   Note that ``char*`` is mapped onto ``__str__``.
+  Example::
+
+    >>>> from cppyy.gbl import ConcreteClass
+    >>>> print ConcreteClass()
+    Hello operator const char*!
+    >>>> 
 
 * **operator overloads**: If defined in the C++ class and if a python
   equivalent is available (not always the case, think e.g. of ``operator||``),
   then they work as expected.
   Special care needs to be taken for global operator overloads in C++: first,
   make sure that they are actually reflected, especially for the global
-  overloads for ``operator==`` and ``operator!=`` of STL iterators in the case
-  of gcc.
+  overloads for ``operator==`` and ``operator!=`` of STL vector iterators in
+  the case of gcc (note that they are not needed to iterator over a vector).
   Second, make sure that reflection info is loaded in the proper order.
   I.e. that these global overloads are available before use.
 
@@ -361,6 +552,11 @@
   If a pointer is a global variable, the C++ side can replace the underlying
   object and the python side will immediately reflect that.
 
+* **PyObject***: Arguments and return types of ``PyObject*`` can be used, and
+  passed on to CPython API calls.
+  Since these CPython-like objects need to be created and tracked (this all
+  happens through ``cpyext``) this interface is not particularly fast.
+
 * **static data members**: Are represented as python property objects on the
   class and the meta-class.
   Both read and write access is as expected.
@@ -374,17 +570,30 @@
   will be returned if the return type is ``const char*``.
 
 * **templated classes**: Are represented in a meta-class style in python.
-  This looks a little bit confusing, but conceptually is rather natural.
+  This may look a little bit confusing, but conceptually is rather natural.
   For example, given the class ``std::vector<int>``, the meta-class part would
-  be ``std.vector`` in python.
+  be ``std.vector``.
   Then, to get the instantiation on ``int``, do ``std.vector(int)`` and to
-  create an instance of that class, do ``std.vector(int)()``.
+  create an instance of that class, do ``std.vector(int)()``::
+
+    >>>> import cppyy
+    >>>> cppyy.load_reflection_info('libexampleDict.so')
+    >>>> cppyy.gbl.std.vector                # template metatype
+    <cppyy.CppyyTemplateType object at 0x00007fcdd330f1a0>
+    >>>> cppyy.gbl.std.vector(int)           # instantiates template -> class
+    <class '__main__.std::vector<int>'>
+    >>>> cppyy.gbl.std.vector(int)()         # instantiates class -> object
+    <__main__.std::vector<int> object at 0x00007fe480ba4bc0>
+    >>>> 
+
   Note that templates can be build up by handing actual types to the class
   instantiation (as done in this vector example), or by passing in the list of
   template arguments as a string.
   The former is a lot easier to work with if you have template instantiations
-  using classes that themselves are templates (etc.) in the arguments.
-  All classes must already exist in the loaded reflection info.
+  using classes that themselves are templates in  the arguments (think e.g a
+  vector of vectors).
+  All template classes must already exist in the loaded reflection info, they
+  do not work (yet) with the class loader.
 
 * **typedefs**: Are simple python references to the actual classes to which
   they refer.
@@ -429,19 +638,30 @@
         int m_i;
     };
 
-    template class std::vector<MyClass>;
+    #ifdef __GCCXML__
+    template class std::vector<MyClass>;   // explicit instantiation
+    #endif
 
 If you know for certain that all symbols will be linked in from other sources,
 you can also declare the explicit template instantiation ``extern``.
+An alternative is to add an object to an unnamed namespace::
 
-Unfortunately, this is not enough for gcc.
-The iterators, if they are going to be used, need to be instantiated as well,
-as do the comparison operators on those iterators, as these live in an
-internal namespace, rather than in the iterator classes.
+    namespace {
+        std::vector<MyClass> vmc;
+    } // unnamed namespace
+
+Unfortunately, this is not always enough for gcc.
+The iterators of vectors, if they are going to be used, need to be
+instantiated as well, as do the comparison operators on those iterators, as
+these live in an internal namespace, rather than in the iterator classes.
+Note that you do NOT need this iterators to iterator over a vector.
+You only need them if you plan to explicitly call e.g. ``begin`` and ``end``
+methods, and do comparisons of iterators.
 One way to handle this, is to deal with this once in a macro, then reuse that
 macro for all ``vector`` classes.
-Thus, the header above needs this, instead of just the explicit instantiation
-of the ``vector<MyClass>``::
+Thus, the header above needs this (again protected with
+``#ifdef __GCCXML__``), instead of just the explicit instantiation of the
+``vector<MyClass>``::
 
     #define STLTYPES_EXPLICIT_INSTANTIATION_DECL(STLTYPE, TTYPE)                      \
     template class std::STLTYPE< TTYPE >;                                             \
@@ -462,11 +682,7 @@
     $ cat MyTemplate.xml
     <lcgdict>
         <class pattern="std::vector<*>" />
-        <class pattern="__gnu_cxx::__normal_iterator<*>" />
-        <class pattern="__gnu_cxx::new_allocator<*>" />
-        <class pattern="std::_Vector_base<*>" />
-        <class pattern="std::_Vector_base<*>::_Vector_impl" />
-        <class pattern="std::allocator<*>" />
+        <class pattern="std::vector<*>::iterator" />
         <function name="__gnu_cxx::operator=="/>
         <function name="__gnu_cxx::operator!="/>
 
@@ -475,13 +691,13 @@
 
 Run the normal ``genreflex`` and compilation steps::
 
-    $ genreflex MyTemplate.h --selection=MyTemplate.xm
-    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include MyTemplate_rflx.cpp -o libTemplateDict.so
+    $ genreflex MyTemplate.h --selection=MyTemplate.xml
+    $ g++ -fPIC -rdynamic -O2 -shared -I$ROOTSYS/include MyTemplate_rflx.cpp -o libTemplateDict.so -L$ROOTSYS/lib -lReflex
 
 Note: this is a dirty corner that clearly could do with some automation,
 even if the macro already helps.
 Such automation is planned.
-In fact, in the cling world, the backend can perform the template
+In fact, in the Cling world, the backend can perform the template
 instantations and generate the reflection info on the fly, and none of the
 above will any longer be necessary.
 
@@ -500,7 +716,8 @@
     1 2 3
     >>>>
 
-Other templates work similarly.
+Other templates work similarly, but are typically simpler, as there are no
+similar issues with iterators for e.g. ``std::list``.
 The arguments to the template instantiation can either be a string with the
 full list of arguments, or the explicit classes.
 The latter makes for easier code writing if the classes passed to the
@@ -550,7 +767,9 @@
 There are a couple of minor differences between PyCintex and cppyy, most to do
 with naming.
 The one that you will run into directly, is that PyCintex uses a function
-called ``loadDictionary`` rather than ``load_reflection_info``.
+called ``loadDictionary`` rather than ``load_reflection_info`` (it has the
+same rootmap-based class loader functionality, though, making this point
+somewhat moot).
 The reason for this is that Reflex calls the shared libraries that contain
 reflection info "dictionaries."
 However, in python, the name `dictionary` already has a well-defined meaning,
@@ -585,3 +804,15 @@
 In that wrapper script you can rename methods exactly the way you need it.
 
 In the cling world, all these differences will be resolved.
+
+
+Python3
+=======
+
+To change versions of CPython (to Python3, another version of Python, or later
+to the `Py3k`_ version of PyPy), the only part that requires recompilation is
+the bindings module, be it ``cppyy`` or ``libPyROOT.so`` (in PyCintex).
+Although ``genreflex`` is indeed a Python tool, the generated reflection
+information is completely independent of Python.
+
+.. _`Py3k`: https://bitbucket.org/pypy/pypy/src/py3k
diff --git a/pypy/doc/cppyy_example.rst b/pypy/doc/cppyy_example.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/cppyy_example.rst
@@ -0,0 +1,56 @@
+// File: example.h::
+
+    #include <iostream>
+    #include <vector>
+
+    class AbstractClass {
+    public:
+        virtual ~AbstractClass() {}
+        virtual void abstract_method() = 0;
+    };
+
+    class ConcreteClass : AbstractClass {
+    public:
+        ConcreteClass(int n=42) : m_int(n) {}
+        ~ConcreteClass() {}
+
+        virtual void abstract_method() {
+            std::cout << "called concrete method" << std::endl;
+        }
+
+        void array_method(int* ad, int size) {
+            for (int i=0; i < size; ++i)
+                std::cout << ad[i] << ' ';
+            std::cout << std::endl;
+        }
+
+        void array_method(double* ad, int size) {
+            for (int i=0; i < size; ++i)
+                std::cout << ad[i] << ' ';
+            std::cout << std::endl;
+        }
+
+        AbstractClass* show_autocast() {
+            return this;
+        }
+
+        operator const char*() {
+            return "Hello operator const char*!";
+        }
+
+    public:
+        int m_int;
+    };
+
+    namespace Namespace {
+
+       class ConcreteClass {
+       public:
+          class NestedClass {
+          public:
+             std::vector<int> m_v;
+          };
+
+       };
+
+    } // namespace Namespace
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -85,13 +85,6 @@
 
     _winreg
 
-  Note that only some of these modules are built-in in a typical
-  CPython installation, and the rest is from non built-in extension
-  modules.  This means that e.g. ``import parser`` will, on CPython,
-  find a local file ``parser.py``, while ``import sys`` will not find a
-  local file ``sys.py``.  In PyPy the difference does not exist: all
-  these modules are built-in.
-
 * Supported by being rewritten in pure Python (possibly using ``ctypes``):
   see the `lib_pypy/`_ directory.  Examples of modules that we
   support this way: ``ctypes``, ``cPickle``, ``cmath``, ``dbm``, ``datetime``...
@@ -324,5 +317,10 @@
   type and vice versa. For builtin types, a dictionary will be returned that
   cannot be changed (but still looks and behaves like a normal dictionary).
 
+* the ``__len__`` or ``__length_hint__`` special methods are sometimes
+  called by CPython to get a length estimate to preallocate internal arrays.
+  So far, PyPy never calls ``__len__`` for this purpose, and never calls
+  ``__length_hint__`` at all.
+
 
 .. include:: _ref.txt
diff --git a/pypy/doc/extending.rst b/pypy/doc/extending.rst
--- a/pypy/doc/extending.rst
+++ b/pypy/doc/extending.rst
@@ -23,7 +23,7 @@
 
 * Write them in RPython as mixedmodule_, using *rffi* as bindings.
 
-* Write them in C++ and bind them through Reflex_ (EXPERIMENTAL)
+* Write them in C++ and bind them through Reflex_
 
 .. _ctypes: #CTypes
 .. _\_ffi: #LibFFI
diff --git a/pypy/doc/getting-started-python.rst b/pypy/doc/getting-started-python.rst
--- a/pypy/doc/getting-started-python.rst
+++ b/pypy/doc/getting-started-python.rst
@@ -103,10 +103,12 @@
 executable. The executable behaves mostly like a normal Python interpreter::
 
     $ ./pypy-c
-    Python 2.7.2 (0e28b379d8b3, Feb 09 2012, 19:41:03)
-    [PyPy 1.8.0 with GCC 4.4.3] on linux2
+    Python 2.7.2 (341e1e3821ff, Jun 07 2012, 15:40:31)
+    [PyPy 1.9.0 with GCC 4.4.3] on linux2
     Type "help", "copyright", "credits" or "license" for more information.
-    And now for something completely different: ``this sentence is false''
+    And now for something completely different: ``RPython magically makes you rich
+    and famous (says so on the tin)''
+
     >>>> 46 - 4
     42
     >>>> from test import pystone
@@ -220,7 +222,6 @@
    ./include/
    ./lib_pypy/
    ./lib-python/2.7
-   ./lib-python/modified-2.7
    ./site-packages/
 
 The hierarchy shown above is relative to a PREFIX directory.  PREFIX is
diff --git a/pypy/doc/getting-started.rst b/pypy/doc/getting-started.rst
--- a/pypy/doc/getting-started.rst
+++ b/pypy/doc/getting-started.rst
@@ -53,10 +53,10 @@
 PyPy is ready to be executed as soon as you unpack the tarball or the zip
 file, with no need to install it in any specific location::
 
-    $ tar xf pypy-1.8-linux.tar.bz2
-    $ ./pypy-1.8/bin/pypy
-    Python 2.7.2 (0e28b379d8b3, Feb 09 2012, 19:41:03)
-    [PyPy 1.8.0 with GCC 4.4.3] on linux2
+    $ tar xf pypy-1.9-linux.tar.bz2
+    $ ./pypy-1.9/bin/pypy
+    Python 2.7.2 (341e1e3821ff, Jun 07 2012, 15:40:31)
+    [PyPy 1.9.0 with GCC 4.4.3] on linux2
     Type "help", "copyright", "credits" or "license" for more information.
     And now for something completely different: ``it seems to me that once you
     settle on an execution / object model and / or bytecode format, you've already
@@ -76,14 +76,14 @@
 
     $ curl -O https://raw.github.com/pypa/pip/master/contrib/get-pip.py
 
-    $ ./pypy-1.8/bin/pypy distribute_setup.py
+    $ ./pypy-1.9/bin/pypy distribute_setup.py
 
-    $ ./pypy-1.8/bin/pypy get-pip.py
+    $ ./pypy-1.9/bin/pypy get-pip.py
 
-    $ ./pypy-1.8/bin/pip install pygments  # for example
+    $ ./pypy-1.9/bin/pip install pygments  # for example
 
-3rd party libraries will be installed in ``pypy-1.8/site-packages``, and
-the scripts in ``pypy-1.8/bin``.
+3rd party libraries will be installed in ``pypy-1.9/site-packages``, and
+the scripts in ``pypy-1.9/bin``.
 
 Installing using virtualenv
 ---------------------------
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -23,7 +23,9 @@
   some of the next updates may be done before or after branching; make
   sure things are ported back to the trunk and to the branch as
   necessary
-* update pypy/doc/contributor.txt (and possibly LICENSE)
+* update pypy/doc/contributor.rst (and possibly LICENSE)
+* rename pypy/doc/whatsnew_head.rst to whatsnew_VERSION.rst
+  and create a fresh whatsnew_head.rst after the release
 * update README
 * change the tracker to have a new release tag to file bugs against
 * go to pypy/tool/release and run:
diff --git a/pypy/doc/image/agile-talk.jpg b/pypy/doc/image/agile-talk.jpg
deleted file mode 100644
Binary file pypy/doc/image/agile-talk.jpg has changed
diff --git a/pypy/doc/image/architecture-session.jpg b/pypy/doc/image/architecture-session.jpg
deleted file mode 100644
Binary file pypy/doc/image/architecture-session.jpg has changed
diff --git a/pypy/doc/image/bram.jpg b/pypy/doc/image/bram.jpg
deleted file mode 100644
Binary file pypy/doc/image/bram.jpg has changed
diff --git a/pypy/doc/image/coding-discussion.jpg b/pypy/doc/image/coding-discussion.jpg
deleted file mode 100644
Binary file pypy/doc/image/coding-discussion.jpg has changed
diff --git a/pypy/doc/image/guido.jpg b/pypy/doc/image/guido.jpg
deleted file mode 100644
Binary file pypy/doc/image/guido.jpg has changed
diff --git a/pypy/doc/image/interview-bobippolito.jpg b/pypy/doc/image/interview-bobippolito.jpg
deleted file mode 100644
Binary file pypy/doc/image/interview-bobippolito.jpg has changed
diff --git a/pypy/doc/image/interview-timpeters.jpg b/pypy/doc/image/interview-timpeters.jpg
deleted file mode 100644
Binary file pypy/doc/image/interview-timpeters.jpg has changed
diff --git a/pypy/doc/image/introductory-student-talk.jpg b/pypy/doc/image/introductory-student-talk.jpg
deleted file mode 100644
Binary file pypy/doc/image/introductory-student-talk.jpg has changed
diff --git a/pypy/doc/image/introductory-talk-pycon.jpg b/pypy/doc/image/introductory-talk-pycon.jpg
deleted file mode 100644
Binary file pypy/doc/image/introductory-talk-pycon.jpg has changed
diff --git a/pypy/doc/image/ironpython.jpg b/pypy/doc/image/ironpython.jpg
deleted file mode 100644
Binary file pypy/doc/image/ironpython.jpg has changed
diff --git a/pypy/doc/image/mallorca-trailer.jpg b/pypy/doc/image/mallorca-trailer.jpg
deleted file mode 100644
Binary file pypy/doc/image/mallorca-trailer.jpg has changed
diff --git a/pypy/doc/image/pycon-trailer.jpg b/pypy/doc/image/pycon-trailer.jpg
deleted file mode 100644
Binary file pypy/doc/image/pycon-trailer.jpg has changed
diff --git a/pypy/doc/image/sprint-tutorial.jpg b/pypy/doc/image/sprint-tutorial.jpg
deleted file mode 100644
Binary file pypy/doc/image/sprint-tutorial.jpg has changed
diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -15,7 +15,7 @@
 
 * `FAQ`_: some frequently asked questions.
 
-* `Release 1.8`_: the latest official release
+* `Release 1.9`_: the latest official release
 
 * `PyPy Blog`_: news and status info about PyPy 
 
@@ -75,7 +75,7 @@
 .. _`Getting Started`: getting-started.html
 .. _`Papers`: extradoc.html
 .. _`Videos`: video-index.html
-.. _`Release 1.8`: http://pypy.org/download.html
+.. _`Release 1.9`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
 .. _`potential project ideas`: project-ideas.html
@@ -120,9 +120,9 @@
 Windows, on top of .NET, and on top of Java.
 To dig into PyPy it is recommended to try out the current
 Mercurial default branch, which is always working or mostly working,
-instead of the latest release, which is `1.8`__.
+instead of the latest release, which is `1.9`__.
 
-.. __: release-1.8.0.html
+.. __: release-1.9.0.html
 
 PyPy is mainly developed on Linux and Mac OS X.  Windows is supported,
 but platform-specific bugs tend to take longer before we notice and fix
diff --git a/pypy/doc/release-1.9.0.rst b/pypy/doc/release-1.9.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-1.9.0.rst
@@ -0,0 +1,111 @@
+====================
+PyPy 1.9 - Yard Wolf
+====================
+
+We're pleased to announce the 1.9 release of PyPy. This release brings mostly
+bugfixes, performance improvements, other small improvements and overall
+progress on the `numpypy`_ effort.
+It also brings an improved situation on Windows and OS X.
+
+You can download the PyPy 1.9 release here:
+
+    http://pypy.org/download.html 
+
+.. _`numpypy`: http://pypy.org/numpydonate.html
+
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7. It's fast (`pypy 1.9 and cpython 2.7.2`_ performance comparison)
+due to its integrated tracing JIT compiler.
+
+This release supports x86 machines running Linux 32/64, Mac OS X 64 or
+Windows 32.  Windows 64 work is still stalling, we would welcome a volunteer
+to handle that.
+
+.. _`pypy 1.9 and cpython 2.7.2`: http://speed.pypy.org
+
+
+Thanks to our donors
+====================
+
+But first of all, we would like to say thank you to all people who
+donated some money to one of our four calls:
+
+  * `NumPy in PyPy`_ (got so far $44502 out of $60000, 74%)
+
+  * `Py3k (Python 3)`_ (got so far $43563 out of $105000, 41%)
+
+  * `Software Transactional Memory`_ (got so far $21791 of $50400, 43%)
+
+  * as well as our general PyPy pot.
+
+Thank you all for proving that it is indeed possible for a small team of
+programmers to get funded like that, at least for some
+time.  We want to include this thank you in the present release
+announcement even though most of the work is not finished yet.  More
+precisely, neither Py3k nor STM are ready to make it in an official release
+yet: people interested in them need to grab and (attempt to) translate
+PyPy from the corresponding branches (respectively ``py3k`` and
+``stm-thread``).
+
+.. _`NumPy in PyPy`: http://pypy.org/numpydonate.html
+.. _`Py3k (Python 3)`: http://pypy.org/py3donate.html
+.. _`Software Transactional Memory`: http://pypy.org/tmdonate.html
+
+Highlights
+==========
+
+* This release still implements Python 2.7.2.
+
+* Many bugs were corrected for Windows 32 bit.  This includes new
+  functionality to test the validity of file descriptors; and
+  correct handling of the calling convensions for ctypes.  (Still not
+  much progress on Win64.) A lot of work on this has been done by Matti Picus
+  and Amaury Forgeot d'Arc.
+
+* Improvements in ``cpyext``, our emulator for CPython C extension modules.
+  For example PyOpenSSL should now work.  We thank various people for help.
+
+* Sets now have strategies just like dictionaries. This means for example
+  that a set containing only ints will be more compact (and faster).
+
+* A lot of progress on various aspects of ``numpypy``. See the `numpy-status`_
+  page for the automatic report.
+
+* It is now possible to create and manipulate C-like structures using the
+  PyPy-only ``_ffi`` module.  The advantage over using e.g. ``ctypes`` is that
+  ``_ffi`` is very JIT-friendly, and getting/setting of fields is translated
+  to few assembler instructions by the JIT. However, this is mostly intended
+  as a low-level backend to be used by more user-friendly FFI packages, and
+  the API might change in the future. Use it at your own risk.
+
+* The non-x86 backends for the JIT are progressing but are still not
+  merged (ARMv7 and PPC64).
+
+* JIT hooks for inspecting the created assembler code have been improved.
+  See `JIT hooks documentation`_ for details.
+
+* ``select.kqueue`` has been added (BSD).
+
+* Handling of keyword arguments has been drastically improved in the best-case
+  scenario: proxy functions which simply forwards ``*args`` and ``**kwargs``
+  to another function now performs much better with the JIT.
+
+* List comprehension has been improved.
+
+.. _`numpy-status`: http://buildbot.pypy.org/numpy-status/latest.html
+.. _`JIT hooks documentation`: http://doc.pypy.org/en/latest/jit-hooks.html
+
+JitViewer
+=========
+
+There will be a corresponding 1.9 release of JitViewer which is guaranteed
+to work with PyPy 1.9. See the `JitViewer docs`_ for details.
+
+.. _`JitViewer docs`: http://bitbucket.org/pypy/jitviewer
+
+Cheers,
+The PyPy Team
diff --git a/pypy/doc/test/test_whatsnew.py b/pypy/doc/test/test_whatsnew.py
--- a/pypy/doc/test/test_whatsnew.py
+++ b/pypy/doc/test/test_whatsnew.py
@@ -16,6 +16,7 @@
             startrev = parseline(line)
         elif line.startswith('.. branch:'):
             branches.add(parseline(line))
+    branches.discard('default')
     return startrev, branches
 
 def get_merged_branches(path, startrev, endrev):
@@ -51,6 +52,10 @@
 .. branch: hello
 
 qqq www ttt
+
+.. branch: default
+
+"default" should be ignored and not put in the set of documented branches
 """
     startrev, branches = parse_doc(s)
     assert startrev == '12345'
diff --git a/pypy/doc/video-index.rst b/pypy/doc/video-index.rst
--- a/pypy/doc/video-index.rst
+++ b/pypy/doc/video-index.rst
@@ -2,39 +2,11 @@
 PyPy video documentation 
 =========================
 
-Requirements to download and view
----------------------------------
-
-In order to download the videos you need to point a
-BitTorrent client at the torrent files provided below. 
-We do not provide any other download method at this
-time.  Please get a BitTorrent client (such as bittorrent). 
-For a list of clients please 
-see http://en.wikipedia.org/wiki/Category:Free_BitTorrent_clients or 
-http://en.wikipedia.org/wiki/Comparison_of_BitTorrent_clients. 
-For more information about Bittorrent see 
-http://en.wikipedia.org/wiki/Bittorrent.
-
-In order to view the downloaded movies you need to 
-have a video player that supports DivX AVI files (DivX 5, mp3 audio)
-such as `mplayer`_, `xine`_, `vlc`_ or the windows media player.
-
-.. _`mplayer`: http://www.mplayerhq.hu/design7/dload.html
-.. _`xine`: http://www.xine-project.org
-.. _`vlc`: http://www.videolan.org/vlc/
-
-You can find the necessary codecs in the ffdshow-library:
-http://sourceforge.net/projects/ffdshow/
-
-or use the original divx codec (for Windows):
-http://www.divx.com/software/divx-plus
-
-
 Copyrights and Licensing 
 ----------------------------
 
-The following videos are copyrighted by merlinux gmbh and 
-published under the Creative Commons Attribution License 2.0 Germany: http://creativecommons.org/licenses/by/2.0/de/
+The following videos are copyrighted by merlinux gmbh and available on
+YouTube.
 
 If you need another license, don't hesitate to contact us. 
 
@@ -42,255 +14,202 @@
 Trailer: PyPy at the PyCon 2006
 -------------------------------
 
-130mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer.avi.torrent
+This trailer shows the PyPy team at the PyCon 2006, a behind-the-scenes at
+sprints, talks and everywhere else.
 
-71mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer-medium.avi.torrent
+.. raw:: html
 
-50mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer-320x240.avi.torrent
-
-.. image:: image/pycon-trailer.jpg
-   :scale: 100
-   :alt: Trailer PyPy at PyCon
-   :align: left
-
-This trailer shows the PyPy team at the PyCon 2006, a behind-the-scenes at sprints, talks and everywhere else.
-
-PAL, 9 min, DivX AVI
-
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/WfGszrRUdtc?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Interview with Tim Peters
 -------------------------
 
-440mb: http://buildbot.pypy.org/misc/torrent/interview-timpeters-v2.avi.torrent
+Interview with CPython core developer Tim Peters at PyCon 2006, Dallas,
+US. (2006-03-02)
 
-138mb: http://buildbot.pypy.org/misc/torrent/interview-timpeters-320x240.avi.torrent
+Tim Peters, a longtime CPython core developer talks about how he got into
+Python, what he thinks about the PyPy project and why he thinks it would have
+never been possible in the US.
 
-.. image:: image/interview-timpeters.jpg
-   :scale: 100
-   :alt: Interview with Tim Peters
-   :align: left
+.. raw:: html
 
-Interview with CPython core developer Tim Peters at PyCon 2006, Dallas, US. (2006-03-02)
-
-PAL, 23 min, DivX AVI
-
-Tim Peters, a longtime CPython core developer talks about how he got into Python, what he thinks about the PyPy project and why he thinks it would have never been possible in the US.
-
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/1wAOy88WxmY?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Interview with Bob Ippolito
 ---------------------------
 
-155mb: http://buildbot.pypy.org/misc/torrent/interview-bobippolito-v2.avi.torrent
+What do you think about PyPy? Interview with American software developer Bob
+Ippolito at PyCon 2006, Dallas, US. (2006-03-01)
 
-50mb: http://buildbot.pypy.org/misc/torrent/interview-bobippolito-320x240.avi.torrent
+Bob Ippolito is an Open Source software developer from San Francisco and has
+been to two PyPy sprints. In this interview he is giving his opinion on the
+project.
 
-.. image:: image/interview-bobippolito.jpg
-   :scale: 100
-   :alt: Interview with Bob Ippolito
-   :align: left
+.. raw:: html
 
-What do you think about PyPy? Interview with American software developer Bob Ippolito at tPyCon 2006, Dallas, US. (2006-03-01)
-
-PAL 8 min, DivX AVI
-
-Bob Ippolito is an Open Source software developer from San Francisco and has been to two PyPy sprints. In this interview he is giving his opinion on the project.
-
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/c5rq4Q03zgg?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Introductory talk on PyPy
 -------------------------
 
-430mb: http://buildbot.pypy.org/misc/torrent/introductory-talk-pycon-v1.avi.torrent
-
-166mb: http://buildbot.pypy.org/misc/torrent/introductory-talk-pycon-320x240.avi.torrent
-
-.. image:: image/introductory-talk-pycon.jpg
-   :scale: 100
-   :alt: Introductory talk at PyCon 2006
-   :align: left
-
-This introductory talk is given by core developers Michael Hudson and Christian Tismer at PyCon 2006, Dallas, US. (2006-02-26)
-
-PAL, 28 min, divx AVI
+This introductory talk is given by core developers Michael Hudson and
+Christian Tismer at PyCon 2006, Dallas, US. (2006-02-26)
 
 Michael Hudson talks about the basic building blocks of Python, the currently
 available back-ends, and the status of PyPy in general. Christian Tismer takes
-over to explain how co-routines can be used to implement things like
-Stackless and Greenlets in PyPy.
+over to explain how co-routines can be used to implement things like Stackless
+and Greenlets in PyPy.
 
+.. raw:: html
+
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/AWUhXW2pLDE?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Talk on Agile Open Source Methods in the PyPy project
 -----------------------------------------------------
 
-395mb: http://buildbot.pypy.org/misc/torrent/agile-talk-v1.avi.torrent
-
-153mb: http://buildbot.pypy.org/misc/torrent/agile-talk-320x240.avi.torrent
-
-.. image:: image/agile-talk.jpg
-   :scale: 100
-   :alt: Agile talk
-   :align: left
-
-Core developer Holger Krekel and project manager Beatrice During are giving a talk on the agile open source methods used in the PyPy project at PyCon 2006, Dallas, US. (2006-02-26)
-
-PAL, 26 min, divx AVI
+Core developer Holger Krekel and project manager Beatrice During are giving a
+talk on the agile open source methods used in the PyPy project at PyCon 2006,
+Dallas, US. (2006-02-26)
 
 Holger Krekel explains more about the goals and history of PyPy, and the
 structure and organization behind it. Bea During describes the intricacies of
 driving a distributed community in an agile way, and how to combine that with
 the formalities required for EU funding.
 
+.. raw:: html
+
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/ed-zAxZtGlY?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 PyPy Architecture session
 -------------------------
 
-744mb: http://buildbot.pypy.org/misc/torrent/architecture-session-v1.avi.torrent
-
-288mb: http://buildbot.pypy.org/misc/torrent/architecture-session-320x240.avi.torrent
-
-.. image:: image/architecture-session.jpg
-   :scale: 100
-   :alt: Architecture session
-   :align: left
-
-This architecture session is given by core developers Holger Krekel and Armin Rigo at PyCon 2006, Dallas, US. (2006-02-26)
-
-PAL, 48 min, divx AVI
+This architecture session is given by core developers Holger Krekel and Armin
+Rigo at PyCon 2006, Dallas, US. (2006-02-26)
 
 Holger Krekel and Armin Rigo talk about the basic implementation,
-implementation level aspects and the RPython translation toolchain. This
-talk also gives an insight into how a developer works with these tools on
-a daily basis, and pays special attention to flow graphs.
+implementation level aspects and the RPython translation toolchain. This talk
+also gives an insight into how a developer works with these tools on a daily
+basis, and pays special attention to flow graphs.
 
+.. raw:: html
+
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/7opXGaQUUA4?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Sprint tutorial
 ---------------
 
-680mb: http://buildbot.pypy.org/misc/torrent/sprint-tutorial-v2.avi.torrent
+Sprint tutorial by core developer Michael Hudson at PyCon 2006, Dallas,
+US. (2006-02-27)
 
-263mb: http://buildbot.pypy.org/misc/torrent/sprint-tutorial-320x240.avi.torrent
+Michael Hudson gives an in-depth, very technical introduction to a PyPy
+sprint. The film provides a detailed and hands-on overview about the
+architecture of PyPy, especially the RPython translation toolchain.
 
-.. image:: image/sprint-tutorial.jpg
-   :scale: 100
-   :alt: Sprint Tutorial
-   :align: left
+.. raw:: html
 
-Sprint tutorial by core developer Michael Hudson at PyCon 2006, Dallas, US. (2006-02-27)
-
-PAL, 44 min, divx AVI
-
-Michael Hudson gives an in-depth, very technical introduction to a PyPy sprint. The film provides a detailed and hands-on overview about the architecture of PyPy, especially the RPython translation toolchain.
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/1YV7J74xrMI?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Scripting .NET with IronPython by Jim Hugunin
 ---------------------------------------------
 
-372mb: http://buildbot.pypy.org/misc/torrent/ironpython-talk-v2.avi.torrent
+Talk by Jim Hugunin (Microsoft) on the IronPython implementation on the .NET
+framework at the PyCon 2006, Dallas, US.
 
-270mb: http://buildbot.pypy.org/misc/torrent/ironpython-talk-320x240.avi.torrent
+Jim Hugunin talks about regression tests, the code generation and the object
+layout, the new-style instance and gives a CLS interop demo.
 
-.. image:: image/ironpython.jpg
-   :scale: 100
-   :alt: Jim Hugunin on IronPython
-   :align: left
+.. raw:: html
 
-Talk by Jim Hugunin (Microsoft) on the IronPython implementation on the .NET framework at this years PyCon, Dallas, US.
-
-PAL, 44 min, DivX AVI
-
-Jim Hugunin talks about regression tests, the code generation and the object layout, the new-style instance and gives a CLS interop demo.
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/bq9ZGN3-o80?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Bram Cohen, founder and developer of BitTorrent
 -----------------------------------------------
 
-509mb: http://buildbot.pypy.org/misc/torrent/bram-cohen-interview-v1.avi.torrent
+Bram Cohen is interviewed by Steve Holden at the PyCon 2006, Dallas, US.
 
-370mb: http://buildbot.pypy.org/misc/torrent/bram-cohen-interview-320x240.avi.torrent
+.. raw:: html
 
-.. image:: image/bram.jpg
-   :scale: 100
-   :alt: Bram Cohen on BitTorrent
-   :align: left
-
-Bram Cohen is interviewed by Steve Holden at this years PyCon, Dallas, US.
-
-PAL, 60 min, DivX AVI
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/EopmJWrLmWI?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Keynote speech by Guido van Rossum on the new Python 2.5 features
 -----------------------------------------------------------------
 
-695mb: http://buildbot.pypy.org/misc/torrent/keynote-speech_guido-van-rossum_v1.avi.torrent
+Guido van Rossum explains the new Python 2.5 features at the PyCon 2006,
+Dallas, US.
 
-430mb: http://buildbot.pypy.org/misc/torrent/keynote-speech_guido-van-rossum_320x240.avi.torrent
+.. raw:: html
 
-.. image:: image/guido.jpg
-   :scale: 100
-   :alt: Guido van Rossum on Python 2.5
-   :align: left
-
-Guido van Rossum explains the new Python 2.5 features at this years PyCon, Dallas, US.
-
-PAL, 70 min, DivX AVI
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/RR2sX8tFGsI?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Trailer: PyPy sprint at the University of Palma de Mallorca
 -----------------------------------------------------------
 
-166mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-v1.avi.torrent
+This trailer shows the PyPy team at the sprint in Mallorca, a
+behind-the-scenes of a typical PyPy coding sprint and talk as well as
+everything else.
 
-88mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-medium.avi.torrent
+.. raw:: html
 
-64mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-320x240.avi.torrent
-
-.. image:: image/mallorca-trailer.jpg
-   :scale: 100
-   :alt: Trailer PyPy sprint in Mallorca
-   :align: left
-
-This trailer shows the PyPy team at the sprint in Mallorca, a behind-the-scenes of a typical PyPy coding sprint and talk as well as everything else.
-
-PAL, 11 min, DivX AVI
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/swsnRfj_cek?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 Coding discussion of core developers Armin Rigo and Samuele Pedroni
 -------------------------------------------------------------------
 
-620mb: http://buildbot.pypy.org/misc/torrent/coding-discussion-v1.avi.torrent
+Coding discussion between Armin Rigo and Samuele Pedroni during the PyPy
+sprint at the University of Palma de Mallorca, Spain. 27.1.2006
 
-240mb: http://buildbot.pypy.org/misc/torrent/coding-discussion-320x240.avi.torrent
+.. raw:: html
 
-.. image:: image/coding-discussion.jpg
-   :scale: 100
-   :alt: Coding discussion
-   :align: left
-
-Coding discussion between Armin Rigo and Samuele Pedroni during the PyPy sprint at the University of Palma de Mallorca, Spain. 27.1.2006
-
-PAL 40 min, DivX AVI
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/H_IgK9qmEss?rel=0"
+   frameborder="0" allowfullscreen></iframe>
 
 
 PyPy technical talk at the University of Palma de Mallorca
 ----------------------------------------------------------
 
-865mb: http://buildbot.pypy.org/misc/torrent/introductory-student-talk-v2.avi.torrent
-
-437mb: http://buildbot.pypy.org/misc/torrent/introductory-student-talk-320x240.avi.torrent
-
-.. image:: image/introductory-student-talk.jpg
-   :scale: 100
-   :alt: Introductory student talk
-   :align: left
-
 Technical talk on the PyPy project at the University of Palma de Mallorca, Spain. 27.1.2006
 
-PAL 72 min, DivX AVI
+Core developers Armin Rigo, Samuele Pedroni and Carl Friedrich Bolz are giving
+an overview of the PyPy architecture, the standard interpreter, the RPython
+translation toolchain and the just-in-time compiler.
 
-Core developers Armin Rigo, Samuele Pedroni and Carl Friedrich Bolz are giving an overview of the PyPy architecture, the standard interpreter, the RPython translation toolchain and the just-in-time compiler.
+.. raw:: html
 
+   <iframe width="420" height="315"
+   src="http://www.youtube.com/embed/6dnUzVQaSlg?rel=0"
+   frameborder="0" allowfullscreen></iframe>
+
diff --git a/pypy/doc/whatsnew-1.9.rst b/pypy/doc/whatsnew-1.9.rst
--- a/pypy/doc/whatsnew-1.9.rst
+++ b/pypy/doc/whatsnew-1.9.rst
@@ -5,8 +5,12 @@
 .. this is the revision just after the creation of the release-1.8.x branch
 .. startrev: a4261375b359
 
+.. branch: default
+* Working hash function for numpy types.
+
 .. branch: array_equal
 .. branch: better-jit-hooks-2
+Improved jit hooks
 .. branch: faster-heapcache
 .. branch: faster-str-decode-escape
 .. branch: float-bytes
@@ -16,9 +20,14 @@
 .. branch: jit-frame-counter
 Put more debug info into resops.
 .. branch: kill-geninterp
+Kill "geninterp", an old attempt to statically turn some fixed
+app-level code to interp-level.
 .. branch: kqueue
 Finished select.kqueue.
 .. branch: kwargsdict-strategy
+Special dictionary strategy for dealing with \*\*kwds. Now having a simple
+proxy ``def f(*args, **kwds): return x(*args, **kwds`` should not make
+any allocations at all.
 .. branch: matrixmath-dot
 numpypy can now handle matrix multiplication.
 .. branch: merge-2.7.2
@@ -29,13 +38,19 @@
 cpyext: Better support for PyEval_SaveThread and other PyTreadState_*
 functions.
 .. branch: numppy-flatitter
+flatitier for numpy
 .. branch: numpy-back-to-applevel
+reuse more of original numpy
 .. branch: numpy-concatenate
+concatenation support for numpy
 .. branch: numpy-indexing-by-arrays-bool
+indexing by bool arrays
 .. branch: numpy-record-dtypes
+record dtypes on numpy has been started
 .. branch: numpy-single-jitdriver
 .. branch: numpy-ufuncs2
 .. branch: numpy-ufuncs3
+various refactorings regarding numpy
 .. branch: numpypy-issue1137
 .. branch: numpypy-out
 The "out" argument was added to most of the numypypy functions.
@@ -43,8 +58,13 @@
 .. branch: numpypy-ufuncs
 .. branch: pytest
 .. branch: safe-getargs-freelist
+CPyext improvements. For example PyOpenSSL should now work
 .. branch: set-strategies
+Sets now have strategies just like dictionaries. This means a set
+containing only ints will be more compact (and faster)
 .. branch: speedup-list-comprehension
+The simplest case of list comprehension is preallocating the correct size
+of the list. This speeds up select benchmarks quite significantly.
 .. branch: stdlib-unification
 The directory "lib-python/modified-2.7" has been removed, and its
 content merged into "lib-python/2.7".
@@ -64,8 +84,11 @@
 _invalid_parameter_handler
 .. branch: win32-kill
 Add os.kill to windows even if translating python does not have os.kill
+.. branch: win_ffi
+Handle calling conventions for the _ffi and ctypes modules
 .. branch: win64-stage1
 .. branch: zlib-mem-pressure
+Memory "leaks" associated with zlib are fixed.
 
 .. branch: ffistruct
 The ``ffistruct`` branch adds a very low level way to express C structures
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/whatsnew-head.rst
@@ -0,0 +1,31 @@
+======================
+What's new in PyPy xxx
+======================
+
+.. this is the revision of the last merge from default to release-1.9.x
+.. startrev: 8d567513d04d
+
+.. branch: default
+.. branch: app_main-refactor
+.. branch: win-ordinal
+.. branch: reflex-support
+Provides cppyy module (disabled by default) for access to C++ through Reflex.
+See doc/cppyy.rst for full details and functionality.
+.. branch: nupypy-axis-arg-check
+Check that axis arg is valid in _numpypy
+
+.. branch: iterator-in-rpython
+.. branch: numpypy_count_nonzero
+.. branch: even-more-jit-hooks
+Implement better JIT hooks
+.. branch: virtual-arguments
+Improve handling of **kwds greatly, making them virtual sometimes.
+.. branch: improve-rbigint
+Introduce __int128 on systems where it's supported and improve the speed of
+rlib/rbigint.py greatly.
+
+.. "uninteresting" branches that we should just ignore for the whatsnew:
+.. branch: slightly-shorter-c
+.. branch: better-enforceargs
+.. branch: rpython-unicode-formatting
+.. branch: jit-opaque-licm
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -110,12 +110,10 @@
             make_sure_not_resized(self.keywords_w)
 
         make_sure_not_resized(self.arguments_w)
-        if w_stararg is not None:
-            self._combine_starargs_wrapped(w_stararg)
-        # if we have a call where **args are used at the callsite
-        # we shouldn't let the JIT see the argument matching
-        self._dont_jit = (w_starstararg is not None and
-                          self._combine_starstarargs_wrapped(w_starstararg))
+        self._combine_wrapped(w_stararg, w_starstararg)
+        # a flag that specifies whether the JIT can unroll loops that operate
+        # on the keywords
+        self._jit_few_keywords = self.keywords is None or jit.isconstant(len(self.keywords))
 
     def __repr__(self):
         """ NOT_RPYTHON """
@@ -129,7 +127,7 @@
 
     ###  Manipulation  ###
 
-    @jit.look_inside_iff(lambda self: not self._dont_jit)
+    @jit.look_inside_iff(lambda self: self._jit_few_keywords)
     def unpack(self): # slowish
         "Return a ([w1,w2...], {'kw':w3...}) pair."
         kwds_w = {}
@@ -176,13 +174,14 @@
         keywords, values_w = space.view_as_kwargs(w_starstararg)
         if keywords is not None: # this path also taken for empty dicts
             if self.keywords is None:
-                self.keywords = keywords[:] # copy to make non-resizable
-                self.keywords_w = values_w[:]
+                self.keywords = keywords
+                self.keywords_w = values_w
             else:
-                self._check_not_duplicate_kwargs(keywords, values_w)
+                _check_not_duplicate_kwargs(
+                    self.space, self.keywords, keywords, values_w)
                 self.keywords = self.keywords + keywords
                 self.keywords_w = self.keywords_w + values_w
-            return not jit.isconstant(len(self.keywords))
+            return
         if space.isinstance_w(w_starstararg, space.w_dict):
             keys_w = space.unpackiterable(w_starstararg)
         else:
@@ -198,57 +197,17 @@
                                    "a mapping, not %s" % (typename,)))
                 raise
             keys_w = space.unpackiterable(w_keys)
-        self._do_combine_starstarargs_wrapped(keys_w, w_starstararg)
-        return True
-
-    def _do_combine_starstarargs_wrapped(self, keys_w, w_starstararg):
-        space = self.space
         keywords_w = [None] * len(keys_w)
         keywords = [None] * len(keys_w)
-        i = 0
-        for w_key in keys_w:
-            try:
-                key = space.str_w(w_key)
-            except OperationError, e:
-                if e.match(space, space.w_TypeError):
-                    raise OperationError(
-                        space.w_TypeError,
-                        space.wrap("keywords must be strings"))
-                if e.match(space, space.w_UnicodeEncodeError):
-                    # Allow this to pass through
-                    key = None
-                else:
-                    raise
-            else:
-                if self.keywords and key in self.keywords:
-                    raise operationerrfmt(self.space.w_TypeError,
-                                          "got multiple values "
-                                          "for keyword argument "
-                                          "'%s'", key)
-            keywords[i] = key
-            keywords_w[i] = space.getitem(w_starstararg, w_key)
-            i += 1
+        _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords, keywords_w, self.keywords)
+        self.keyword_names_w = keys_w
         if self.keywords is None:
             self.keywords = keywords
             self.keywords_w = keywords_w
         else:
             self.keywords = self.keywords + keywords
             self.keywords_w = self.keywords_w + keywords_w
-        self.keyword_names_w = keys_w
 
-    @jit.look_inside_iff(lambda self, keywords, keywords_w:
-            jit.isconstant(len(keywords) and
-            jit.isconstant(self.keywords)))
-    def _check_not_duplicate_kwargs(self, keywords, keywords_w):
-        # looks quadratic, but the JIT should remove all of it nicely.
-        # Also, all the lists should be small
-        for key in keywords:
-            for otherkey in self.keywords:
-                if otherkey == key:
-                    raise operationerrfmt(self.space.w_TypeError,
-                                          "got multiple values "
-                                          "for keyword argument "
-                                          "'%s'", key)
 
     def fixedunpack(self, argcount):
         """The simplest argument parsing: get the 'argcount' arguments,
@@ -269,34 +228,14 @@
 
     ###  Parsing for function calls  ###
 
-    # XXX: this should be @jit.look_inside_iff, but we need key word arguments,
-    # and it doesn't support them for now.
+    @jit.unroll_safe
     def _match_signature(self, w_firstarg, scope_w, signature, defaults_w=None,
                          blindargs=0):
         """Parse args and kwargs according to the signature of a code object,
         or raise an ArgErr in case of failure.
-        Return the number of arguments filled in.
         """
-        if jit.we_are_jitted() and self._dont_jit:
-            return self._match_signature_jit_opaque(w_firstarg, scope_w,
-                                                    signature, defaults_w,
-                                                    blindargs)
-        return self._really_match_signature(w_firstarg, scope_w, signature,
-                                            defaults_w, blindargs)
-
-    @jit.dont_look_inside
-    def _match_signature_jit_opaque(self, w_firstarg, scope_w, signature,
-                                    defaults_w, blindargs):
-        return self._really_match_signature(w_firstarg, scope_w, signature,
-                                            defaults_w, blindargs)
-
-    @jit.unroll_safe
-    def _really_match_signature(self, w_firstarg, scope_w, signature,
-                                defaults_w=None, blindargs=0):
-        #
+        #   w_firstarg = a first argument to be inserted (e.g. self) or None
         #   args_w = list of the normal actual parameters, wrapped
-        #   kwds_w = real dictionary {'keyword': wrapped parameter}
-        #   argnames = list of formal parameter names
         #   scope_w = resulting list of wrapped values
         #
 
@@ -304,38 +243,29 @@
         # so all values coming from there can be assumed constant. It assumes
         # that the length of the defaults_w does not vary too much.
         co_argcount = signature.num_argnames() # expected formal arguments, without */**
-        has_vararg = signature.has_vararg()
-        has_kwarg = signature.has_kwarg()
-        extravarargs = None
-        input_argcount =  0
 
+        # put the special w_firstarg into the scope, if it exists
         if w_firstarg is not None:
             upfront = 1
             if co_argcount > 0:
                 scope_w[0] = w_firstarg
-                input_argcount = 1
-            else:
-                extravarargs = [w_firstarg]
         else:
             upfront = 0
 
         args_w = self.arguments_w
         num_args = len(args_w)
+        avail = num_args + upfront
 
         keywords = self.keywords
-        keywords_w = self.keywords_w
         num_kwds = 0
         if keywords is not None:
             num_kwds = len(keywords)
 
-        avail = num_args + upfront
 
+        # put as many positional input arguments into place as available
+        input_argcount = upfront
         if input_argcount < co_argcount:
-            # put as many positional input arguments into place as available
-            if avail > co_argcount:
-                take = co_argcount - input_argcount
-            else:
-                take = num_args
+            take = min(num_args, co_argcount - upfront)
 
             # letting the JIT unroll this loop is safe, because take is always
             # smaller than co_argcount
@@ -344,11 +274,10 @@
             input_argcount += take
 
         # collect extra positional arguments into the *vararg
-        if has_vararg:
+        if signature.has_vararg():
             args_left = co_argcount - upfront
             if args_left < 0:  # check required by rpython
-                assert extravarargs is not None
-                starargs_w = extravarargs
+                starargs_w = [w_firstarg]
                 if num_args:
                     starargs_w = starargs_w + args_w
             elif num_args > args_left:
@@ -357,86 +286,68 @@
                 starargs_w = []
             scope_w[co_argcount] = self.space.newtuple(starargs_w)
         elif avail > co_argcount:
-            raise ArgErrCount(avail, num_kwds,
-                              co_argcount, has_vararg, has_kwarg,
-                              defaults_w, 0)
+            raise ArgErrCount(avail, num_kwds, signature, defaults_w, 0)
 
-        # the code assumes that keywords can potentially be large, but that
-        # argnames is typically not too large
-        num_remainingkwds = num_kwds
-        used_keywords = None
-        if keywords:
-            # letting JIT unroll the loop is *only* safe if the callsite didn't
-            # use **args because num_kwds can be arbitrarily large otherwise.
-            used_keywords = [False] * num_kwds
-            for i in range(num_kwds):
-                name = keywords[i]
-                # If name was not encoded as a string, it could be None. In that
-                # case, it's definitely not going to be in the signature.
-                if name is None:
-                    continue
-                j = signature.find_argname(name)
-                if j < 0:
-                    continue
-                elif j < input_argcount:
-                    # check that no keyword argument conflicts with these. note
-                    # that for this purpose we ignore the first blindargs,
-                    # which were put into place by prepend().  This way,
-                    # keywords do not conflict with the hidden extra argument
-                    # bound by methods.
-                    if blindargs <= j:
-                        raise ArgErrMultipleValues(name)
+        # if a **kwargs argument is needed, create the dict
+        w_kwds = None
+        if signature.has_kwarg():
+            w_kwds = self.space.newdict(kwargs=True)
+            scope_w[co_argcount + signature.has_vararg()] = w_kwds
+
+        # handle keyword arguments
+        num_remainingkwds = 0
+        keywords_w = self.keywords_w
+        kwds_mapping = None
+        if num_kwds:
+            # kwds_mapping maps target indexes in the scope (minus input_argcount)
+            # to positions in the keywords_w list
+            cnt = (co_argcount - input_argcount)
+            if cnt < 0:
+                cnt = 0
+            kwds_mapping = [0] * cnt
+            # initialize manually, for the JIT :-(
+            for i in range(len(kwds_mapping)):
+                kwds_mapping[i] = -1
+            # match the keywords given at the call site to the argument names
+            # the called function takes
+            # this function must not take a scope_w, to make the scope not
+            # escape
+            num_remainingkwds = _match_keywords(
+                    signature, blindargs, input_argcount, keywords,
+                    kwds_mapping, self._jit_few_keywords)
+            if num_remainingkwds:
+                if w_kwds is not None:
+                    # collect extra keyword arguments into the **kwarg
+                    _collect_keyword_args(
+                            self.space, keywords, keywords_w, w_kwds,
+                            kwds_mapping, self.keyword_names_w, self._jit_few_keywords)
                 else:
-                    assert scope_w[j] is None
-                    scope_w[j] = keywords_w[i]
-                    used_keywords[i] = True # mark as used
-                    num_remainingkwds -= 1
+                    if co_argcount == 0:
+                        raise ArgErrCount(avail, num_kwds, signature, defaults_w, 0)
+                    raise ArgErrUnknownKwds(self.space, num_remainingkwds, keywords,
+                                            kwds_mapping, self.keyword_names_w)
+
+        # check for missing arguments and fill them from the kwds,
+        # or with defaults, if available
         missing = 0
         if input_argcount < co_argcount:
             def_first = co_argcount - (0 if defaults_w is None else len(defaults_w))
+            j = 0
+            kwds_index = -1
             for i in range(input_argcount, co_argcount):
-                if scope_w[i] is not None:
-                    continue
+                if kwds_mapping is not None:
+                    kwds_index = kwds_mapping[j]
+                    j += 1
+                    if kwds_index >= 0:
+                        scope_w[i] = keywords_w[kwds_index]
+                        continue
                 defnum = i - def_first
                 if defnum >= 0:
                     scope_w[i] = defaults_w[defnum]
                 else:
-                    # error: not enough arguments.  Don't signal it immediately
-                    # because it might be related to a problem with */** or
-                    # keyword arguments, which will be checked for below.
                     missing += 1
-
-        # collect extra keyword arguments into the **kwarg
-        if has_kwarg:
-            w_kwds = self.space.newdict(kwargs=True)
-            if num_remainingkwds:
-                #
-                limit = len(keywords)
-                if self.keyword_names_w is not None:
-                    limit -= len(self.keyword_names_w)
-                for i in range(len(keywords)):
-                    if not used_keywords[i]:
-                        if i < limit:
-                            w_key = self.space.wrap(keywords[i])
-                        else:
-                            w_key = self.keyword_names_w[i - limit]
-                        self.space.setitem(w_kwds, w_key, keywords_w[i])
-                #
-            scope_w[co_argcount + has_vararg] = w_kwds
-        elif num_remainingkwds:
-            if co_argcount == 0:
-                raise ArgErrCount(avail, num_kwds,
-                              co_argcount, has_vararg, has_kwarg,
-                              defaults_w, missing)
-            raise ArgErrUnknownKwds(self.space, num_remainingkwds, keywords,
-                                    used_keywords, self.keyword_names_w)
-
-        if missing:
-            raise ArgErrCount(avail, num_kwds,
-                              co_argcount, has_vararg, has_kwarg,
-                              defaults_w, missing)
-
-        return co_argcount + has_vararg + has_kwarg
+            if missing:
+                raise ArgErrCount(avail, num_kwds, signature, defaults_w, missing)
 
 
 
@@ -448,11 +359,12 @@
         scope_w must be big enough for signature.
         """
         try:
-            return self._match_signature(w_firstarg,
-                                         scope_w, signature, defaults_w, 0)
+            self._match_signature(w_firstarg,
+                                  scope_w, signature, defaults_w, 0)
         except ArgErr, e:
             raise operationerrfmt(self.space.w_TypeError,
                                   "%s() %s", fnname, e.getmsg())
+        return signature.scope_length()
 
     def _parse(self, w_firstarg, signature, defaults_w, blindargs=0):
         """Parse args and kwargs according to the signature of a code object,
@@ -499,6 +411,102 @@
                 space.setitem(w_kwds, w_key, self.keywords_w[i])
         return w_args, w_kwds
 
+# JIT helper functions
+# these functions contain functionality that the JIT is not always supposed to
+# look at. They should not get a self arguments, which makes the amount of
+# arguments annoying :-(
+
+ at jit.look_inside_iff(lambda space, existingkeywords, keywords, keywords_w:
+        jit.isconstant(len(keywords) and
+        jit.isconstant(existingkeywords)))
+def _check_not_duplicate_kwargs(space, existingkeywords, keywords, keywords_w):
+    # looks quadratic, but the JIT should remove all of it nicely.
+    # Also, all the lists should be small
+    for key in keywords:
+        for otherkey in existingkeywords:
+            if otherkey == key:
+                raise operationerrfmt(space.w_TypeError,
+                                      "got multiple values "
+                                      "for keyword argument "
+                                      "'%s'", key)
+
+def _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords,
+        keywords_w, existingkeywords):
+    i = 0
+    for w_key in keys_w:
+        try:
+            key = space.str_w(w_key)
+        except OperationError, e:
+            if e.match(space, space.w_TypeError):
+                raise OperationError(
+                    space.w_TypeError,
+                    space.wrap("keywords must be strings"))
+            if e.match(space, space.w_UnicodeEncodeError):
+                # Allow this to pass through
+                key = None
+            else:
+                raise
+        else:
+            if existingkeywords and key in existingkeywords:
+                raise operationerrfmt(space.w_TypeError,
+                                      "got multiple values "
+                                      "for keyword argument "
+                                      "'%s'", key)
+        keywords[i] = key
+        keywords_w[i] = space.getitem(w_starstararg, w_key)
+        i += 1
+
+ at jit.look_inside_iff(
+    lambda signature, blindargs, input_argcount,
+           keywords, kwds_mapping, jiton: jiton)
+def _match_keywords(signature, blindargs, input_argcount,
+                    keywords, kwds_mapping, _):
+    # letting JIT unroll the loop is *only* safe if the callsite didn't
+    # use **args because num_kwds can be arbitrarily large otherwise.
+    num_kwds = num_remainingkwds = len(keywords)
+    for i in range(num_kwds):
+        name = keywords[i]
+        # If name was not encoded as a string, it could be None. In that
+        # case, it's definitely not going to be in the signature.
+        if name is None:
+            continue
+        j = signature.find_argname(name)
+        # if j == -1 nothing happens, because j < input_argcount and
+        # blindargs > j
+        if j < input_argcount:
+            # check that no keyword argument conflicts with these. note
+            # that for this purpose we ignore the first blindargs,
+            # which were put into place by prepend().  This way,
+            # keywords do not conflict with the hidden extra argument
+            # bound by methods.
+            if blindargs <= j:
+                raise ArgErrMultipleValues(name)
+        else:
+            kwds_mapping[j - input_argcount] = i # map to the right index
+            num_remainingkwds -= 1
+    return num_remainingkwds
+
+ at jit.look_inside_iff(
+    lambda space, keywords, keywords_w, w_kwds, kwds_mapping,
+        keyword_names_w, jiton: jiton)
+def _collect_keyword_args(space, keywords, keywords_w, w_kwds, kwds_mapping,
+                          keyword_names_w, _):
+    limit = len(keywords)
+    if keyword_names_w is not None:
+        limit -= len(keyword_names_w)
+    for i in range(len(keywords)):
+        # again a dangerous-looking loop that either the JIT unrolls
+        # or that is not too bad, because len(kwds_mapping) is small
+        for j in kwds_mapping:
+            if i == j:
+                break
+        else:
+            if i < limit:
+                w_key = space.wrap(keywords[i])
+            else:
+                w_key = keyword_names_w[i - limit]
+            space.setitem(w_kwds, w_key, keywords_w[i])
+
 class ArgumentsForTranslation(Arguments):
     def __init__(self, space, args_w, keywords=None, keywords_w=None,
                  w_stararg=None, w_starstararg=None):
@@ -654,11 +662,9 @@
 
 class ArgErrCount(ArgErr):
 
-    def __init__(self, got_nargs, nkwds, expected_nargs, has_vararg, has_kwarg,
+    def __init__(self, got_nargs, nkwds, signature,
                  defaults_w, missing_args):
-        self.expected_nargs = expected_nargs
-        self.has_vararg = has_vararg
-        self.has_kwarg = has_kwarg
+        self.signature = signature
 
         self.num_defaults = 0 if defaults_w is None else len(defaults_w)
         self.missing_args = missing_args
@@ -666,16 +672,16 @@
         self.num_kwds = nkwds
 
     def getmsg(self):
-        n = self.expected_nargs
+        n = self.signature.num_argnames()
         if n == 0:
             msg = "takes no arguments (%d given)" % (
                 self.num_args + self.num_kwds)
         else:
             defcount = self.num_defaults
-            has_kwarg = self.has_kwarg
+            has_kwarg = self.signature.has_kwarg()
             num_args = self.num_args
             num_kwds = self.num_kwds
-            if defcount == 0 and not self.has_vararg:
+            if defcount == 0 and not self.signature.has_vararg():
                 msg1 = "exactly"
                 if not has_kwarg:
                     num_args += num_kwds
@@ -714,13 +720,13 @@
 
 class ArgErrUnknownKwds(ArgErr):
 
-    def __init__(self, space, num_remainingkwds, keywords, used_keywords,
+    def __init__(self, space, num_remainingkwds, keywords, kwds_mapping,
                  keyword_names_w):
         name = ''
         self.num_kwds = num_remainingkwds
         if num_remainingkwds == 1:
             for i in range(len(keywords)):
-                if not used_keywords[i]:
+                if i not in kwds_mapping:
                     name = keywords[i]
                     if name is None:
                         # We'll assume it's unicode. Encode it.
diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py
--- a/pypy/interpreter/astcompiler/test/test_astbuilder.py
+++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py
@@ -1105,6 +1105,17 @@
         assert isinstance(s, ast.Str)
         assert space.eq_w(s.s, space.wrap(sentence))
 
+    def test_string_bug(self):
+        space = self.space
+        source = '# -*- encoding: utf8 -*-\nstuff = "x \xc3\xa9 \\n"\n'
+        info = pyparse.CompileInfo("<test>", "exec")
+        tree = self.parser.parse_source(source, info)
+        assert info.encoding == "utf8"
+        s = ast_from_node(space, tree, info).body[0].value
+        assert isinstance(s, ast.Str)
+        expected = ['x', ' ', chr(0xc3), chr(0xa9), ' ', '\n']
+        assert space.eq_w(s.s, space.wrap(''.join(expected)))
+
     def test_number(self):
         def get_num(s):
             node = self.get_first_expr(s)
diff --git a/pypy/interpreter/buffer.py b/pypy/interpreter/buffer.py
--- a/pypy/interpreter/buffer.py
+++ b/pypy/interpreter/buffer.py
@@ -44,6 +44,9 @@
         # May be overridden.  No bounds checks.
         return ''.join([self.getitem(i) for i in range(start, stop, step)])
 
+    def get_raw_address(self):
+        raise ValueError("no raw buffer")
+
     # __________ app-level support __________
 
     def descr_len(self, space):
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -496,7 +496,12 @@
 
     # apply kw_spec
     for name, spec in kw_spec.items():
-        unwrap_spec[argnames.index(name)] = spec
+        try:
+            unwrap_spec[argnames.index(name)] = spec
+        except ValueError:
+            raise ValueError("unwrap_spec() got a keyword %r but it is not "
+                             "the name of an argument of the following "
+                             "function" % (name,))
 
     return unwrap_spec
 
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -97,7 +97,8 @@
         return space.wrap(v)
 
     need_encoding = (encoding is not None and
-                     encoding != "utf-8" and encoding != "iso-8859-1")
+                     encoding != "utf-8" and encoding != "utf8" and
+                     encoding != "iso-8859-1")
     assert 0 <= ps <= q
     substr = s[ps : q]
     if rawmode or '\\' not in s[ps:]:
@@ -129,19 +130,18 @@
     builder = StringBuilder(len(s))
     ps = 0
     end = len(s)
-    while 1:
-        ps2 = ps
-        while ps < end and s[ps] != '\\':
+    while ps < end:
+        if s[ps] != '\\':
+            # note that the C code has a label here.
+            # the logic is the same.
             if recode_encoding and ord(s[ps]) & 0x80:
                 w, ps = decode_utf8(space, s, ps, end, recode_encoding)
+                # Append bytes to output buffer.
                 builder.append(w)
-                ps2 = ps
             else:
+                builder.append(s[ps])
                 ps += 1
-        if ps > ps2:
-            builder.append_slice(s, ps2, ps)
-        if ps == end:
-            break
+            continue
 
         ps += 1
         if ps == end:
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -84,3 +84,10 @@
         s = '"""' + '\\' + '\n"""'
         w_ret = parsestring.parsestr(space, None, s)
         assert space.str_w(w_ret) == ''
+
+    def test_bug1(self):
+        space = self.space
+        expected = ['x', ' ', chr(0xc3), chr(0xa9), ' ', '\n']
+        input = ["'", 'x', ' ', chr(0xc3), chr(0xa9), ' ', chr(92), 'n', "'"]
+        w_ret = parsestring.parsestr(space, 'utf8', ''.join(input))
+        assert space.str_w(w_ret) == ''.join(expected)
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -57,6 +57,9 @@
     def __nonzero__(self):
         raise NotImplementedError
 
+class kwargsdict(dict):
+    pass
+
 class DummySpace(object):
     def newtuple(self, items):
         return tuple(items)
@@ -76,9 +79,13 @@
         return list(it)
 
     def view_as_kwargs(self, x):
+        if len(x) == 0:
+            return [], []
         return None, None
 
     def newdict(self, kwargs=False):
+        if kwargs:
+            return kwargsdict()
         return {}
 
     def newlist(self, l=[]):
@@ -299,6 +306,22 @@
             args._match_signature(None, l, Signature(["a", "b", "c"], None, "**"))
             assert l == [1, 2, 3, {'d': 4}]
 
+    def test_match_kwds_creates_kwdict(self):
+        space = DummySpace()
+        kwds = [("c", 3), ('d', 4)]
+        for i in range(4):
+            kwds_w = dict(kwds[:i])
+            keywords = kwds_w.keys()
+            keywords_w = kwds_w.values()
+            w_kwds = dummy_wrapped_dict(kwds[i:])
+            if i == 3:
+                w_kwds = None
+            args = Arguments(space, [1, 2], keywords, keywords_w, w_starstararg=w_kwds)
+            l = [None, None, None, None]
+            args._match_signature(None, l, Signature(["a", "b", "c"], None, "**"))
+            assert l == [1, 2, 3, {'d': 4}]
+            assert isinstance(l[-1], kwargsdict)
+
     def test_duplicate_kwds(self):
         space = DummySpace()
         excinfo = py.test.raises(OperationError, Arguments, space, [], ["a"],
@@ -546,34 +569,47 @@
     def test_missing_args(self):
         # got_nargs, nkwds, expected_nargs, has_vararg, has_kwarg,
         # defaults_w, missing_args
-        err = ArgErrCount(1, 0, 0, False, False, None, 0)
+        sig = Signature([], None, None)
+        err = ArgErrCount(1, 0, sig, None, 0)
         s = err.getmsg()
         assert s == "takes no arguments (1 given)"
-        err = ArgErrCount(0, 0, 1, False, False, [], 1)
+
+        sig = Signature(['a'], None, None)
+        err = ArgErrCount(0, 0, sig, [], 1)
         s = err.getmsg()
         assert s == "takes exactly 1 argument (0 given)"
-        err = ArgErrCount(3, 0, 2, False, False, [], 0)
+
+        sig = Signature(['a', 'b'], None, None)
+        err = ArgErrCount(3, 0, sig, [], 0)
         s = err.getmsg()
         assert s == "takes exactly 2 arguments (3 given)"
-        err = ArgErrCount(3, 0, 2, False, False, ['a'], 0)
+        err = ArgErrCount(3, 0, sig, ['a'], 0)
         s = err.getmsg()
         assert s == "takes at most 2 arguments (3 given)"
-        err = ArgErrCount(1, 0, 2, True, False, [], 1)
+
+        sig = Signature(['a', 'b'], '*', None)
+        err = ArgErrCount(1, 0, sig, [], 1)
         s = err.getmsg()
         assert s == "takes at least 2 arguments (1 given)"
-        err = ArgErrCount(0, 1, 2, True, False, ['a'], 1)
+        err = ArgErrCount(0, 1, sig, ['a'], 1)
         s = err.getmsg()
         assert s == "takes at least 1 non-keyword argument (0 given)"
-        err = ArgErrCount(2, 1, 1, False, True, [], 0)
+
+        sig = Signature(['a'], None, '**')
+        err = ArgErrCount(2, 1, sig, [], 0)
         s = err.getmsg()
         assert s == "takes exactly 1 non-keyword argument (2 given)"
-        err = ArgErrCount(0, 1, 1, False, True, [], 1)
+        err = ArgErrCount(0, 1, sig, [], 1)
         s = err.getmsg()
         assert s == "takes exactly 1 non-keyword argument (0 given)"
-        err = ArgErrCount(0, 1, 1, True, True, [], 1)
+
+        sig = Signature(['a'], '*', '**')
+        err = ArgErrCount(0, 1, sig, [], 1)
         s = err.getmsg()
         assert s == "takes at least 1 non-keyword argument (0 given)"
-        err = ArgErrCount(2, 1, 1, False, True, ['a'], 0)
+
+        sig = Signature(['a'], None, '**')
+        err = ArgErrCount(2, 1, sig, ['a'], 0)
         s = err.getmsg()
         assert s == "takes at most 1 non-keyword argument (2 given)"
 
@@ -596,11 +632,14 @@
 
     def test_unknown_keywords(self):
         space = DummySpace()
-        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [True, False], None)
+        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [0], None)
         s = err.getmsg()
         assert s == "got an unexpected keyword argument 'b'"
+        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [1], None)
+        s = err.getmsg()
+        assert s == "got an unexpected keyword argument 'a'"
         err = ArgErrUnknownKwds(space, 2, ['a', 'b', 'c'],
-                                [True, False, False], None)
+                                [0], None)
         s = err.getmsg()
         assert s == "got 2 unexpected keyword arguments"
 
@@ -610,7 +649,7 @@
                 defaultencoding = 'utf-8'
         space = DummySpaceUnicode()
         err = ArgErrUnknownKwds(space, 1, ['a', None, 'b', 'c'],
-                                [True, False, True, True],
+                                [0, 3, 2],
                                 [unichr(0x1234), u'b', u'c'])
         s = err.getmsg()
         assert s == "got an unexpected keyword argument '\xe1\x88\xb4'"
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -96,6 +96,7 @@
     'int_add_ovf'     : (('int', 'int'), 'int'),
     'int_sub_ovf'     : (('int', 'int'), 'int'),
     'int_mul_ovf'     : (('int', 'int'), 'int'),
+    'int_force_ge_zero':(('int',), 'int'),
     'uint_add'        : (('int', 'int'), 'int'),
     'uint_sub'        : (('int', 'int'), 'int'),
     'uint_mul'        : (('int', 'int'), 'int'),
@@ -1522,6 +1523,7 @@
 
 def do_new_array(arraynum, count):
     TYPE = symbolic.Size2Type[arraynum]
+    assert count >= 0 # explode if it's not
     x = lltype.malloc(TYPE, count, zero=True)
     return cast_to_ptr(x)
 
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -4,6 +4,7 @@
 
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.jit_hooks import LOOP_RUN_CONTAINER
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.llinterp import LLInterpreter
@@ -33,6 +34,10 @@
         self.arg_types = arg_types
         self.count_fields_if_immut = count_fields_if_immut
         self.ffi_flags = ffi_flags
+        self._debug = False
+
+    def set_debug(self, v):
+        self._debug = True
 
     def get_arg_types(self):
         return self.arg_types
@@ -583,6 +588,9 @@
             for x in args_f:
                 llimpl.do_call_pushfloat(x)
 
+    def get_all_loop_runs(self):
+        return lltype.malloc(LOOP_RUN_CONTAINER, 0)
+
     def force(self, force_token):
         token = llmemory.cast_int_to_adr(force_token)
         frame = llimpl.get_forced_token_frame(token)
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -577,7 +577,6 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
-        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = gc_ll_descr.fielddescr_tid
         #
         GCClass = gc_ll_descr.GCClass
@@ -592,6 +591,11 @@
             self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT
             self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = (
                 self.extract_flag_byte(self.jit_wb_cards_set))
+            #
+            # the x86 backend uses the following "accidental" facts to
+            # avoid one instruction:
+            assert self.jit_wb_cards_set_byteofs == self.jit_wb_if_flag_byteofs
+            assert self.jit_wb_cards_set_singlebyte == -0x80
         else:
             self.jit_wb_cards_set = 0
 
@@ -615,7 +619,7 @@
         # returns a function with arguments [array, index, newvalue]
         llop1 = self.llop1
         funcptr = llop1.get_write_barrier_from_array_failing_case(
-            self.WB_ARRAY_FUNCPTR)
+            self.WB_FUNCPTR)
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)    # this may return 0
 
@@ -655,10 +659,11 @@
 
     def _check_valid_gc(self):
         # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
-        # to work
-        if self.gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
+        # to work.  Additionally, 'hybrid' is missing some stuff like
+        # jit_remember_young_pointer() for now.
+        if self.gcdescr.config.translation.gc not in ('minimark',):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
-                                      (gcdescr.config.translation.gc,))
+                                      (self.gcdescr.config.translation.gc,))
 
     def _make_gcrootmap(self):
         # to find roots in the assembler, make a GcRootMap
@@ -699,9 +704,7 @@
 
     def _setup_write_barrier(self):
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
-            [llmemory.Address, llmemory.Address], lltype.Void))
-        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
-            [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void))
+            [llmemory.Address], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
 
     def _make_functions(self, really_not_translated):
@@ -859,8 +862,7 @@
             # the GC, and call it immediately
             llop1 = self.llop1
             funcptr = llop1.get_write_barrier_failing_case(self.WB_FUNCPTR)
-            funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
-                    llmemory.cast_ptr_to_adr(gcref_newptr))
+            funcptr(llmemory.cast_ptr_to_adr(gcref_struct))
 
     def can_use_nursery_malloc(self, size):
         return size < self.max_size_of_young_obj
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -276,8 +276,8 @@
                             repr(offset_to_length), p))
         return p
 
-    def _write_barrier_failing_case(self, adr_struct, adr_newptr):
-        self.record.append(('barrier', adr_struct, adr_newptr))
+    def _write_barrier_failing_case(self, adr_struct):
+        self.record.append(('barrier', adr_struct))
 
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
@@ -296,7 +296,7 @@
 
 
 class TestFramework(object):
-    gc = 'hybrid'
+    gc = 'minimark'
 
     def setup_method(self, meth):
         class config_(object):
@@ -402,7 +402,7 @@
         #
         s_hdr.tid |= gc_ll_descr.GCClass.JIT_WB_IF_FLAG
         gc_ll_descr.do_write_barrier(s_gcref, r_gcref)
-        assert self.llop1.record == [('barrier', s_adr, r_adr)]
+        assert self.llop1.record == [('barrier', s_adr)]
 
     def test_gen_write_barrier(self):
         gc_ll_descr = self.gc_ll_descr
diff --git a/pypy/jit/backend/llsupport/test/test_rewrite.py b/pypy/jit/backend/llsupport/test/test_rewrite.py
--- a/pypy/jit/backend/llsupport/test/test_rewrite.py
+++ b/pypy/jit/backend/llsupport/test/test_rewrite.py
@@ -205,7 +205,7 @@
     def setup_method(self, meth):
         class config_(object):
             class translation(object):
-                gc = 'hybrid'
+                gc = 'minimark'
                 gcrootfinder = 'asmgcc'
                 gctransformer = 'framework'
                 gcremovetypeptr = False
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -55,6 +55,21 @@
         """Called once by the front-end when the program stops."""
         pass
 
+    def get_all_loop_runs(self):
+        """ Function that will return number of times all the loops were run.
+        Requires earlier setting of set_debug(True), otherwise you won't
+        get the information.
+
+        Returns an instance of LOOP_RUN_CONTAINER from rlib.jit_hooks
+        """
+        raise NotImplementedError
+
+    def set_debug(self, value):
+        """ Enable or disable debugging info. Does nothing by default. Returns
+        the previous setting.
+        """
+        return False
+
     def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """Assemble the given loop.
         Should create and attach a fresh CompiledLoopToken to
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1110,6 +1110,79 @@
     def test_virtual_ref_finish(self):
         pass   # VIRTUAL_REF_FINISH must not reach the backend nowadays
 
+    def test_arguments_to_execute_token(self):
+        # this test checks that execute_token() can be called with any
+        # variant of ints and floats as arguments
+        if self.cpu.supports_floats:
+            numkinds = 2
+        else:
+            numkinds = 1
+        seed = random.randrange(0, 10000)
+        print 'Seed is', seed    # or choose it by changing the previous line
+        r = random.Random()
+        r.seed(seed)
+        for nb_args in range(50):
+            print 'Passing %d arguments to execute_token...' % nb_args
+            #
+            inputargs = []
+            values = []
+            for k in range(nb_args):
+                kind = r.randrange(0, numkinds)
+                if kind == 0:
+                    inputargs.append(BoxInt())
+                    values.append(r.randrange(-100000, 100000))
+                else:
+                    inputargs.append(BoxFloat())
+                    values.append(longlong.getfloatstorage(r.random()))
+            #
+            looptoken = JitCellToken()
+            faildescr = BasicFailDescr(42)
+            operations = []
+            retboxes = []
+            retvalues = []
+            #
+            ks = range(nb_args)
+            random.shuffle(ks)
+            for k in ks:
+                if isinstance(inputargs[k], BoxInt):
+                    newbox = BoxInt()
+                    x = r.randrange(-100000, 100000)
+                    operations.append(
+                        ResOperation(rop.INT_ADD, [inputargs[k],
+                                                   ConstInt(x)], newbox)
+                        )
+                    y = values[k] + x
+                else:
+                    newbox = BoxFloat()
+                    x = r.random()
+                    operations.append(
+                        ResOperation(rop.FLOAT_ADD, [inputargs[k],
+                                                     constfloat(x)], newbox)
+                        )
+                    y = longlong.getrealfloat(values[k]) + x
+                    y = longlong.getfloatstorage(y)
+                kk = r.randrange(0, len(retboxes)+1)
+                retboxes.insert(kk, newbox)
+                retvalues.insert(kk, y)
+            #
+            operations.append(
+                ResOperation(rop.FINISH, retboxes, None, descr=faildescr)
+                )
+            print inputargs
+            for op in operations:
+                print op
+            self.cpu.compile_loop(inputargs, operations, looptoken)
+            #
+            fail = self.cpu.execute_token(looptoken, *values)
+            assert fail.identifier == 42
+            #
+            for k in range(len(retvalues)):
+                if isinstance(retboxes[k], BoxInt):
+                    got = self.cpu.get_latest_value_int(k)
+                else:
+                    got = self.cpu.get_latest_value_float(k)
+                assert got == retvalues[k]
+
     def test_jump(self):
         # this test generates small loops where the JUMP passes many
         # arguments of various types, shuffling them around.
@@ -1835,12 +1908,12 @@
         assert not excvalue
 
     def test_cond_call_gc_wb(self):
-        def func_void(a, b):
-            record.append((a, b))
+        def func_void(a):
+            record.append(a)
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1866,26 +1939,25 @@
                                    [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, t)]
+                assert record == [s]
             else:
                 assert record == []
 
     def test_cond_call_gc_wb_array(self):
-        def func_void(a, b, c):
-            record.append((a, b, c))
+        def func_void(a):
+            record.append(a)
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
-                             lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
             jit_wb_if_flag = 4096
             jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
-            jit_wb_cards_set = 0
-            def get_write_barrier_from_array_fn(self, cpu):
+            jit_wb_cards_set = 0       # <= without card marking
+            def get_write_barrier_fn(self, cpu):
                 return funcbox.getint()
         #
         for cond in [False, True]:
@@ -1902,13 +1974,15 @@
                        [BoxPtr(sgcref), ConstInt(123), BoxPtr(sgcref)],
                        'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, 123, s)]
+                assert record == [s]
             else:
                 assert record == []
 
     def test_cond_call_gc_wb_array_card_marking_fast_path(self):
-        def func_void(a, b, c):
-            record.append((a, b, c))
+        def func_void(a):
+            record.append(a)
+            if cond == 1:      # the write barrier sets the flag
+                s.data.tid |= 32768
         record = []
         #
         S = lltype.Struct('S', ('tid', lltype.Signed))
@@ -1922,34 +1996,40 @@
                                      ('card6', lltype.Char),
                                      ('card7', lltype.Char),
                                      ('data',  S))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
-                             lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
             jit_wb_if_flag = 4096
             jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
-            jit_wb_cards_set = 8192
-            jit_wb_cards_set_byteofs = struct.pack("i", 8192).index('\x20')
-            jit_wb_cards_set_singlebyte = 0x20
+            jit_wb_cards_set = 32768
+            jit_wb_cards_set_byteofs = struct.pack("i", 32768).index('\x80')
+            jit_wb_cards_set_singlebyte = -0x80
             jit_wb_card_page_shift = 7
             def get_write_barrier_from_array_fn(self, cpu):
                 return funcbox.getint()
         #
-        for BoxIndexCls in [BoxInt, ConstInt]:
-            for cond in [False, True]:
+        for BoxIndexCls in [BoxInt, ConstInt]*3:
+            for cond in [-1, 0, 1, 2]:
+                # cond=-1:GCFLAG_TRACK_YOUNG_PTRS, GCFLAG_CARDS_SET are not set
+                # cond=0: GCFLAG_CARDS_SET is never set
+                # cond=1: GCFLAG_CARDS_SET is not set, but the wb sets it
+                # cond=2: GCFLAG_CARDS_SET is already set
                 print
                 print '_'*79
                 print 'BoxIndexCls =', BoxIndexCls
-                print 'JIT_WB_CARDS_SET =', cond
+                print 'testing cond =', cond
                 print
                 value = random.randrange(-sys.maxint, sys.maxint)
-                value |= 4096
-                if cond:
-                    value |= 8192
+                if cond >= 0:
+                    value |= 4096
                 else:
-                    value &= ~8192
+                    value &= ~4096
+                if cond == 2:
+                    value |= 32768
+                else:
+                    value &= ~32768
                 s = lltype.malloc(S_WITH_CARDS, immortal=True, zero=True)
                 s.data.tid = value
                 sgcref = rffi.cast(llmemory.GCREF, s.data)
@@ -1958,11 +2038,13 @@
                 self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
                            [BoxPtr(sgcref), box_index, BoxPtr(sgcref)],
                            'void', descr=WriteBarrierDescr())
-                if cond:
+                if cond in [0, 1]:
+                    assert record == [s.data]
+                else:
                     assert record == []
+                if cond in [1, 2]:
                     assert s.card6 == '\x02'
                 else:
-                    assert record == [(s.data, (9<<7) + 17, s.data)]
                     assert s.card6 == '\x00'
                 assert s.card0 == '\x00'
                 assert s.card1 == '\x00'
@@ -1971,6 +2053,9 @@
                 assert s.card4 == '\x00'
                 assert s.card5 == '\x00'
                 assert s.card7 == '\x00'
+                if cond == 1:
+                    value |= 32768
+                assert s.data.tid == value
 
     def test_force_operations_returning_void(self):
         values = []
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -10,7 +10,7 @@
 from pypy.rlib.jit import AsmInfo
 from pypy.jit.backend.model import CompiledLoopToken
 from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
-    gpr_reg_mgr_cls, _valid_addressing_size)
+    gpr_reg_mgr_cls, xmm_reg_mgr_cls, _valid_addressing_size)
 
 from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
                                        IS_X86_32, IS_X86_64)
@@ -83,6 +83,7 @@
         self.float_const_abs_addr = 0
         self.malloc_slowpath1 = 0
         self.malloc_slowpath2 = 0
+        self.wb_slowpath = [0, 0, 0, 0]
         self.memcpy_addr = 0
         self.setup_failure_recovery()
         self._debug = False
@@ -100,7 +101,9 @@
                                       llmemory.cast_ptr_to_adr(ptrs))
 
     def set_debug(self, v):
+        r = self._debug
         self._debug = v
+        return r
 
     def setup_once(self):
         # the address of the function called by 'new'
@@ -109,9 +112,13 @@
         self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
         self._build_failure_recovery(False)
         self._build_failure_recovery(True)
+        self._build_wb_slowpath(False)
+        self._build_wb_slowpath(True)
         if self.cpu.supports_floats:
             self._build_failure_recovery(False, withfloats=True)
             self._build_failure_recovery(True, withfloats=True)
+            self._build_wb_slowpath(False, withfloats=True)
+            self._build_wb_slowpath(True, withfloats=True)
             support.ensure_sse2_floats()
             self._build_float_constants()
         self._build_propagate_exception_path()
@@ -344,6 +351,82 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
+    def _build_wb_slowpath(self, withcards, withfloats=False):
+        descr = self.cpu.gc_ll_descr.write_barrier_descr
+        if descr is None:
+            return
+        if not withcards:
+            func = descr.get_write_barrier_fn(self.cpu)
+        else:
+            if descr.jit_wb_cards_set == 0:
+                return
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            if func == 0:
+                return
+        #
+        # This builds a helper function called from the slow path of
+        # write barriers.  It must save all registers, and optionally
+        # all XMM registers.  It takes a single argument just pushed
+        # on the stack even on X86_64.  It must restore stack alignment
+        # accordingly.
+        mc = codebuf.MachineCodeBlockWrapper()
+        #
+        frame_size = (1 +     # my argument, considered part of my frame
+                      1 +     # my return address
+                      len(gpr_reg_mgr_cls.save_around_call_regs))
+        if withfloats:
+            frame_size += 16     # X86_32: 16 words for 8 registers;
+                                 # X86_64: just 16 registers
+        if IS_X86_32:
+            frame_size += 1      # argument to pass to the call
+        #
+        # align to a multiple of 16 bytes
+        frame_size = (frame_size + (CALL_ALIGN-1)) & ~(CALL_ALIGN-1)
+        #
+        correct_esp_by = (frame_size - 2) * WORD
+        mc.SUB_ri(esp.value, correct_esp_by)
+        #
+        ofs = correct_esp_by
+        if withfloats:
+            for reg in xmm_reg_mgr_cls.save_around_call_regs:
+                ofs -= 8
+                mc.MOVSD_sx(ofs, reg.value)
+        for reg in gpr_reg_mgr_cls.save_around_call_regs:
+            ofs -= WORD
+            mc.MOV_sr(ofs, reg.value)
+        #
+        if IS_X86_32:
+            mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
+            mc.MOV_sr(0, eax.value)
+        elif IS_X86_64:
+            mc.MOV_rs(edi.value, (frame_size - 1) * WORD)
+        mc.CALL(imm(func))
+        #
+        if withcards:
+            # A final TEST8 before the RET, for the caller.  Careful to
+            # not follow this instruction with another one that changes
+            # the status of the CPU flags!
+            mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
+            mc.TEST8(addr_add_const(eax, descr.jit_wb_if_flag_byteofs),
+                     imm(-0x80))
+        #
+        ofs = correct_esp_by
+        if withfloats:
+            for reg in xmm_reg_mgr_cls.save_around_call_regs:
+                ofs -= 8
+                mc.MOVSD_xs(reg.value, ofs)
+        for reg in gpr_reg_mgr_cls.save_around_call_regs:
+            ofs -= WORD
+            mc.MOV_rs(reg.value, ofs)
+        #
+        # ADD esp, correct_esp_by --- but cannot use ADD, because
+        # of its effects on the CPU flags
+        mc.LEA_rs(esp.value, correct_esp_by)
+        mc.RET16_i(WORD)
+        #
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
     @staticmethod
     @rgc.no_collect
     def _release_gil_asmgcc(css):
@@ -669,7 +752,6 @@
     @specialize.argtype(1)
     def _inject_debugging_code(self, looptoken, operations, tp, number):
         if self._debug:
-            # before doing anything, let's increase a counter
             s = 0
             for op in operations:
                 s += op.getopnum()
@@ -1293,6 +1375,11 @@
     genop_cast_ptr_to_int = genop_same_as
     genop_cast_int_to_ptr = genop_same_as
 
+    def genop_int_force_ge_zero(self, op, arglocs, resloc):
+        self.mc.TEST(arglocs[0], arglocs[0])
+        self.mov(imm0, resloc)
+        self.mc.CMOVNS(arglocs[0], resloc)
+
     def genop_int_mod(self, op, arglocs, resloc):
         if IS_X86_32:
             self.mc.CDQ()
@@ -2324,102 +2411,83 @@
 
     def genop_discard_cond_call_gc_wb(self, op, arglocs):
         # Write code equivalent to write_barrier() in the GC: it checks
-        # a flag in the object at arglocs[0], and if set, it calls the
-        # function remember_young_pointer() from the GC.  The arguments
-        # to the call are in arglocs[:N].  The rest, arglocs[N:], contains
-        # registers that need to be saved and restored across the call.
-        # N is either 2 (regular write barrier) or 3 (array write barrier).
+        # a flag in the object at arglocs[0], and if set, it calls a
+        # helper piece of assembler.  The latter saves registers as needed
+        # and call the function jit_remember_young_pointer() from the GC.
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
         #
         opnum = op.getopnum()
-        if opnum == rop.COND_CALL_GC_WB:
-            N = 2
-            func = descr.get_write_barrier_fn(self.cpu)
-            card_marking = False
-        elif opnum == rop.COND_CALL_GC_WB_ARRAY:
-            N = 3
-            func = descr.get_write_barrier_from_array_fn(self.cpu)
-            assert func != 0
-            card_marking = descr.jit_wb_cards_set != 0
-        else:
-            raise AssertionError(opnum)
+        card_marking = False
+        mask = descr.jit_wb_if_flag_singlebyte
+        if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+            # assumptions the rest of the function depends on:
+            assert (descr.jit_wb_cards_set_byteofs ==
+                    descr.jit_wb_if_flag_byteofs)
+            assert descr.jit_wb_cards_set_singlebyte == -0x80
+            card_marking = True
+            mask = descr.jit_wb_if_flag_singlebyte | -0x80
         #
         loc_base = arglocs[0]
         self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
-                      imm(descr.jit_wb_if_flag_singlebyte))
+                      imm(mask))
         self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
         jz_location = self.mc.get_relative_pos()
 
         # for cond_call_gc_wb_array, also add another fast path:
         # if GCFLAG_CARDS_SET, then we can just set one bit and be done
         if card_marking:
-            self.mc.TEST8(addr_add_const(loc_base,
-                                         descr.jit_wb_cards_set_byteofs),
-                          imm(descr.jit_wb_cards_set_singlebyte))
-            self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
-            jnz_location = self.mc.get_relative_pos()
+            # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
+            # been checked by the status flags of the previous TEST8
+            self.mc.J_il8(rx86.Conditions['S'], 0) # patched later
+            js_location = self.mc.get_relative_pos()
         else:
-            jnz_location = 0
+            js_location = 0
 
-        # the following is supposed to be the slow path, so whenever possible
-        # we choose the most compact encoding over the most efficient one.
-        if IS_X86_32:
-            limit = -1      # push all arglocs on the stack
-        elif IS_X86_64:
-            limit = N - 1   # push only arglocs[N:] on the stack
-        for i in range(len(arglocs)-1, limit, -1):
-            loc = arglocs[i]
-            if isinstance(loc, RegLoc):
-                self.mc.PUSH_r(loc.value)
-            else:
-                assert not IS_X86_64 # there should only be regs in arglocs[N:]
-                self.mc.PUSH_i32(loc.getint())
-        if IS_X86_64:
-            # We clobber these registers to pass the arguments, but that's
-            # okay, because consider_cond_call_gc_wb makes sure that any
-            # caller-save registers with values in them are present in
-            # arglocs[N:] too, so they are saved on the stack above and
-            # restored below.
-            if N == 2:
-                callargs = [edi, esi]
-            else:
-                callargs = [edi, esi, edx]
-            remap_frame_layout(self, arglocs[:N], callargs,
-                               X86_64_SCRATCH_REG)
+        # Write only a CALL to the helper prepared in advance, passing it as
+        # argument the address of the structure we are writing into
+        # (the first argument to COND_CALL_GC_WB).
+        helper_num = card_marking
+        if self._regalloc.xrm.reg_bindings:
+            helper_num += 2
+        if self.wb_slowpath[helper_num] == 0:    # tests only
+            assert not we_are_translated()
+            self.cpu.gc_ll_descr.write_barrier_descr = descr
+            self._build_wb_slowpath(card_marking,
+                                    bool(self._regalloc.xrm.reg_bindings))
+            assert self.wb_slowpath[helper_num] != 0
         #
-        # misaligned stack in the call, but it's ok because the write barrier
-        # is not going to call anything more.  Also, this assumes that the
-        # write barrier does not touch the xmm registers.  (Slightly delicate
-        # assumption, given that the write barrier can end up calling the
-        # platform's malloc() from AddressStack.append().  XXX may need to
-        # be done properly)
-        self.mc.CALL(imm(func))
-        if IS_X86_32:
-            self.mc.ADD_ri(esp.value, N*WORD)
-        for i in range(N, len(arglocs)):
-            loc = arglocs[i]
-            assert isinstance(loc, RegLoc)
-            self.mc.POP_r(loc.value)
+        self.mc.PUSH(loc_base)
+        self.mc.CALL(imm(self.wb_slowpath[helper_num]))
 
-        # if GCFLAG_CARDS_SET, then we can do the whole thing that would
-        # be done in the CALL above with just four instructions, so here
-        # is an inline copy of them
         if card_marking:
-            self.mc.JMP_l8(0) # jump to the exit, patched later
-            jmp_location = self.mc.get_relative_pos()
-            # patch the JNZ above
-            offset = self.mc.get_relative_pos() - jnz_location
+            # The helper ends again with a check of the flag in the object.
+            # So here, we can simply write again a 'JNS', which will be
+            # taken if GCFLAG_CARDS_SET is still not set.
+            self.mc.J_il8(rx86.Conditions['NS'], 0) # patched later
+            jns_location = self.mc.get_relative_pos()
+            #
+            # patch the JS above
+            offset = self.mc.get_relative_pos() - js_location
             assert 0 < offset <= 127
-            self.mc.overwrite(jnz_location-1, chr(offset))
+            self.mc.overwrite(js_location-1, chr(offset))
             #
+            # case GCFLAG_CARDS_SET: emit a few instructions to do
+            # directly the card flag setting
             loc_index = arglocs[1]
             if isinstance(loc_index, RegLoc):
-                # choose a scratch register
-                tmp1 = loc_index
-                self.mc.PUSH_r(tmp1.value)
+                if IS_X86_64 and isinstance(loc_base, RegLoc):
+                    # copy loc_index into r11
+                    tmp1 = X86_64_SCRATCH_REG
+                    self.mc.MOV_rr(tmp1.value, loc_index.value)
+                    final_pop = False
+                else:
+                    # must save the register loc_index before it is mutated
+                    self.mc.PUSH_r(loc_index.value)
+                    tmp1 = loc_index
+                    final_pop = True
                 # SHR tmp, card_page_shift
                 self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
                 # XOR tmp, -8
@@ -2427,7 +2495,9 @@
                 # BTS [loc_base], tmp
                 self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
                 # done
-                self.mc.POP_r(tmp1.value)
+                if final_pop:
+                    self.mc.POP_r(loc_index.value)
+                #
             elif isinstance(loc_index, ImmedLoc):
                 byte_index = loc_index.value >> descr.jit_wb_card_page_shift
                 byte_ofs = ~(byte_index >> 3)
@@ -2435,11 +2505,12 @@
                 self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
             else:
                 raise AssertionError("index is neither RegLoc nor ImmedLoc")
-            # patch the JMP above
-            offset = self.mc.get_relative_pos() - jmp_location
+            #
+            # patch the JNS above
+            offset = self.mc.get_relative_pos() - jns_location
             assert 0 < offset <= 127
-            self.mc.overwrite(jmp_location-1, chr(offset))
-        #
+            self.mc.overwrite(jns_location-1, chr(offset))
+
         # patch the JZ above
         offset = self.mc.get_relative_pos() - jz_location
         assert 0 < offset <= 127
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -980,16 +980,6 @@
         # or setarrayitem_gc. It avoids loading it twice from the memory.
         arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
                    for i in range(N)]
-        # add eax, ecx and edx as extra "arguments" to ensure they are
-        # saved and restored.  Fish in self.rm to know which of these
-        # registers really need to be saved (a bit of a hack).  Moreover,
-        # we don't save and restore any SSE register because the called
-        # function, a GC write barrier, is known not to touch them.
-        # See remember_young_pointer() in rpython/memory/gc/generation.py.
-        for v, reg in self.rm.reg_bindings.items():
-            if (reg in self.rm.save_around_call_regs
-                and self.rm.stays_alive(v)):
-                arglocs.append(reg)
         self.PerformDiscard(op, arglocs)
         self.rm.possibly_free_vars_for_op(op)
 
@@ -1198,6 +1188,12 @@
     consider_cast_ptr_to_int = consider_same_as
     consider_cast_int_to_ptr = consider_same_as
 
+    def consider_int_force_ge_zero(self, op):
+        argloc = self.make_sure_var_in_reg(op.getarg(0))
+        resloc = self.force_allocate_reg(op.result, [op.getarg(0)])
+        self.possibly_free_var(op.getarg(0))
+        self.Perform(op, [argloc], resloc)
+
     def consider_strlen(self, op):
         args = op.getarglist()
         base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -548,6 +548,7 @@
     # Avoid XCHG because it always implies atomic semantics, which is
     # slower and does not pair well for dispatch.
     #XCHG = _binaryop('XCHG')
+    CMOVNS = _binaryop('CMOVNS')
 
     PUSH = _unaryop('PUSH')
     POP = _unaryop('POP')
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -3,6 +3,7 @@
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.llinterp import LLInterpreter
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.jit_hooks import LOOP_RUN_CONTAINER
 from pypy.jit.codewriter import longlong
 from pypy.jit.metainterp import history, compile
 from pypy.jit.backend.x86.assembler import Assembler386
@@ -44,6 +45,9 @@
 
         self.profile_agent = profile_agent
 
+    def set_debug(self, flag):
+        return self.assembler.set_debug(flag)
+
     def setup(self):
         if self.opts is not None:
             failargs_limit = self.opts.failargs_limit
@@ -181,6 +185,14 @@
         # positions invalidated
         looptoken.compiled_loop_token.invalidate_positions = []
 
+    def get_all_loop_runs(self):
+        l = lltype.malloc(LOOP_RUN_CONTAINER,
+                          len(self.assembler.loop_run_counters))
+        for i, ll_s in enumerate(self.assembler.loop_run_counters):
+            l[i].type = ll_s.type
+            l[i].number = ll_s.number
+            l[i].counter = ll_s.i
+        return l
 
 class CPU386(AbstractX86CPU):
     backend_name = 'x86'
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -316,6 +316,13 @@
         assert rexbyte == 0
     return 0
 
+# REX prefixes: 'rex_w' generates a REX_W, forcing the instruction
+# to operate on 64-bit.  'rex_nw' doesn't, so the instruction operates
+# on 32-bit or less; the complete REX prefix is omitted if unnecessary.
+# 'rex_fw' is a special case which doesn't generate a REX_W but forces
+# the REX prefix in all cases.  It is only useful on instructions which
+# have an 8-bit register argument, to force access to the "sil" or "dil"
+# registers (as opposed to "ah-dh").
 rex_w  = encode_rex, 0, (0x40 | REX_W), None      # a REX.W prefix
 rex_nw = encode_rex, 0, 0, None                   # an optional REX prefix
 rex_fw = encode_rex, 0, 0x40, None                # a forced REX prefix
@@ -496,9 +503,9 @@
     AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0')
 
     OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0')
-    OR8_mi = insn(rex_fw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
+    OR8_mi = insn(rex_nw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
                   immediate(2, 'b'))
-    OR8_ji = insn(rex_fw, '\x80', orbyte(1<<3), abs_, immediate(1),
+    OR8_ji = insn(rex_nw, '\x80', orbyte(1<<3), abs_, immediate(1),
                   immediate(2, 'b'))
 
     NEG_r = insn(rex_w, '\xF7', register(1), '\xD8')
@@ -523,6 +530,8 @@
     NOT_r = insn(rex_w, '\xF7', register(1), '\xD0')
     NOT_b = insn(rex_w, '\xF7', orbyte(2<<3), stack_bp(1))
 
+    CMOVNS_rr = insn(rex_w, '\x0F\x49', register(2, 8), register(1), '\xC0')
+
     # ------------------------------ Misc stuff ------------------------------
 
     NOP = insn('\x90')
@@ -531,7 +540,13 @@
 
     PUSH_r = insn(rex_nw, register(1), '\x50')
     PUSH_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
+    PUSH_i8 = insn('\x6A', immediate(1, 'b'))
     PUSH_i32 = insn('\x68', immediate(1, 'i'))
+    def PUSH_i(mc, immed):
+        if single_byte(immed):
+            mc.PUSH_i8(immed)
+        else:
+            mc.PUSH_i32(immed)
 
     POP_r = insn(rex_nw, register(1), '\x58')
     POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
diff --git a/pypy/jit/backend/x86/test/test_rx86.py b/pypy/jit/backend/x86/test/test_rx86.py
--- a/pypy/jit/backend/x86/test/test_rx86.py
+++ b/pypy/jit/backend/x86/test/test_rx86.py
@@ -183,7 +183,8 @@
 
 def test_push32():
     cb = CodeBuilder32
-    assert_encodes_as(cb, 'PUSH_i32', (9,), '\x68\x09\x00\x00\x00')
+    assert_encodes_as(cb, 'PUSH_i', (0x10009,), '\x68\x09\x00\x01\x00')
+    assert_encodes_as(cb, 'PUSH_i', (9,), '\x6A\x09')
 
 def test_sub_ji8():
     cb = CodeBuilder32
diff --git a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -317,7 +317,9 @@
                 # CALL_j is actually relative, so tricky to test
                 (instrname == 'CALL' and argmodes == 'j') or
                 # SET_ir must be tested manually
-                (instrname == 'SET' and argmodes == 'ir')
+                (instrname == 'SET' and argmodes == 'ir') or
+                # asm gets CMOVNS args the wrong way
+                (instrname.startswith('CMOV'))
         )
 
 
diff --git a/pypy/jit/backend/x86/test/test_ztranslation.py b/pypy/jit/backend/x86/test/test_ztranslation.py
--- a/pypy/jit/backend/x86/test/test_ztranslation.py
+++ b/pypy/jit/backend/x86/test/test_ztranslation.py
@@ -3,6 +3,7 @@
 from pypy.rlib.jit import JitDriver, unroll_parameters, set_param
 from pypy.rlib.jit import PARAMETERS, dont_look_inside
 from pypy.rlib.jit import promote
+from pypy.rlib import jit_hooks
 from pypy.jit.metainterp.jitprof import Profiler
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.test.support import CCompiledMixin
@@ -69,7 +70,7 @@
         #
         from pypy.rpython.lltypesystem import lltype, rffi
         from pypy.rlib.libffi import types, CDLL, ArgChain
-        from pypy.rlib.test.test_libffi import get_libm_name
+        from pypy.rlib.test.test_clibffi import get_libm_name
         libm_name = get_libm_name(sys.platform)
         jitdriver2 = JitDriver(greens=[], reds = ['i', 'func', 'res', 'x'])
         def libffi_stuff(i, j):
@@ -170,6 +171,23 @@
         assert 1024 <= bound <= 131072
         assert bound & (bound-1) == 0       # a power of two
 
+    def test_jit_get_stats(self):
+        driver = JitDriver(greens = [], reds = ['i'])
+        
+        def f():
+            i = 0
+            while i < 100000:
+                driver.jit_merge_point(i=i)
+                i += 1
+
+        def main():
+            jit_hooks.stats_set_debug(None, True)
+            f()
+            ll_times = jit_hooks.stats_get_loop_run_times(None)
+            return len(ll_times)
+
+        res = self.meta_interp(main, [])
+        assert res == 1
 
 class TestTranslationRemoveTypePtrX86(CCompiledMixin):
     CPUClass = getcpuclass()
diff --git a/pypy/jit/backend/x86/tool/viewcode.py b/pypy/jit/backend/x86/tool/viewcode.py
--- a/pypy/jit/backend/x86/tool/viewcode.py
+++ b/pypy/jit/backend/x86/tool/viewcode.py
@@ -253,7 +253,7 @@
                 self.logentries[addr] = pieces[3]
             elif line.startswith('SYS_EXECUTABLE '):
                 filename = line[len('SYS_EXECUTABLE '):].strip()
-                if filename != self.executable_name:
+                if filename != self.executable_name and filename != '??':
                     self.symbols.update(load_symbols(filename))
                     self.executable_name = filename
 
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -1430,7 +1430,19 @@
 
     def do_fixed_newlist(self, op, args, arraydescr):
         v_length = self._get_initial_newlist_length(op, args)
-        return SpaceOperation('new_array', [arraydescr, v_length], op.result)
+        assert v_length.concretetype is lltype.Signed
+        ops = []
+        if isinstance(v_length, Constant):
+            if v_length.value >= 0:
+                v = v_length
+            else:
+                v = Constant(0, lltype.Signed)
+        else:
+            v = Variable('new_length')
+            v.concretetype = lltype.Signed
+            ops.append(SpaceOperation('int_force_ge_zero', [v_length], v))
+        ops.append(SpaceOperation('new_array', [arraydescr, v], op.result))
+        return ops
 
     def do_fixed_list_len(self, op, args, arraydescr):
         if args[0] in self.vable_array_vars:     # virtualizable array
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -48,8 +48,6 @@
         mod = func.__module__ or '?'
         if mod.startswith('pypy.rpython.module.'):
             return True
-        if mod == 'pypy.translator.goal.nanos':    # more helpers
-            return True
         return False
 
     def look_inside_graph(self, graph):
diff --git a/pypy/jit/codewriter/test/test_codewriter.py b/pypy/jit/codewriter/test/test_codewriter.py
--- a/pypy/jit/codewriter/test/test_codewriter.py
+++ b/pypy/jit/codewriter/test/test_codewriter.py
@@ -221,3 +221,17 @@
     assert 'setarrayitem_raw_i' in s
     assert 'getarrayitem_raw_i' in s
     assert 'residual_call_ir_v $<* fn _ll_1_raw_free__arrayPtr>' in s
+
+def test_newlist_negativ():
+    def f(n):
+        l = [0] * n
+        return len(l)
+
+    rtyper = support.annotate(f, [-1])
+    jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0])
+    cw = CodeWriter(FakeCPU(rtyper), [jitdriver_sd])
+    cw.find_all_graphs(FakePolicy())
+    cw.make_jitcodes(verbose=True)
+    s = jitdriver_sd.mainjitcode.dump()
+    assert 'int_force_ge_zero' in s
+    assert 'new_array' in s
diff --git a/pypy/jit/codewriter/test/test_list.py b/pypy/jit/codewriter/test/test_list.py
--- a/pypy/jit/codewriter/test/test_list.py
+++ b/pypy/jit/codewriter/test/test_list.py
@@ -85,8 +85,11 @@
                  """new_array <ArrayDescr>, $0 -> %r0""")
     builtin_test('newlist', [Constant(5, lltype.Signed)], FIXEDLIST,
                  """new_array <ArrayDescr>, $5 -> %r0""")
+    builtin_test('newlist', [Constant(-2, lltype.Signed)], FIXEDLIST,
+                 """new_array <ArrayDescr>, $0 -> %r0""")
     builtin_test('newlist', [varoftype(lltype.Signed)], FIXEDLIST,
-                 """new_array <ArrayDescr>, %i0 -> %r0""")
+                 """int_force_ge_zero %i0 -> %i1\n"""
+                 """new_array <ArrayDescr>, %i1 -> %r0""")
     builtin_test('newlist', [Constant(5, lltype.Signed),
                              Constant(0, lltype.Signed)], FIXEDLIST,
                  """new_array <ArrayDescr>, $5 -> %r0""")
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -477,6 +477,11 @@
     @arguments("i", "i", "i", returns="i")
     def bhimpl_int_between(a, b, c):
         return a <= b < c
+    @arguments("i", returns="i")
+    def bhimpl_int_force_ge_zero(i):
+        if i < 0:
+            return 0
+        return i
 
     @arguments("i", "i", returns="i")
     def bhimpl_uint_lt(a, b):
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -5,7 +5,7 @@
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.rlib import rstack
-from pypy.rlib.jit import JitDebugInfo
+from pypy.rlib.jit import JitDebugInfo, Counters
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -22,8 +22,7 @@
 
 def giveup():
     from pypy.jit.metainterp.pyjitpl import SwitchToBlackhole
-    from pypy.jit.metainterp.jitprof import ABORT_BRIDGE
-    raise SwitchToBlackhole(ABORT_BRIDGE)
+    raise SwitchToBlackhole(Counters.ABORT_BRIDGE)
 
 def show_procedures(metainterp_sd, procedure=None, error=None):
     # debugging
@@ -226,6 +225,8 @@
     assert isinstance(target_token, TargetToken)
     assert loop_jitcell_token.target_tokens
     loop_jitcell_token.target_tokens.append(target_token)
+    if target_token.short_preamble:
+        metainterp_sd.logger_ops.log_short_preamble([], target_token.short_preamble)
 
     loop = partial_trace
     loop.operations = loop.operations[:-1] + part.operations
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -706,6 +706,7 @@
 
         self.virtual_state = None
         self.exported_state = None
+        self.short_preamble = None
 
     def repr_of_descr(self):
         return 'TargetToken(%d)' % compute_unique_id(self)
diff --git a/pypy/jit/metainterp/jitprof.py b/pypy/jit/metainterp/jitprof.py
--- a/pypy/jit/metainterp/jitprof.py
+++ b/pypy/jit/metainterp/jitprof.py
@@ -6,42 +6,11 @@
 from pypy.rlib.debug import debug_print, debug_start, debug_stop
 from pypy.rlib.debug import have_debug_prints
 from pypy.jit.metainterp.jitexc import JitException
+from pypy.rlib.jit import Counters
 
-counters="""
-TRACING
-BACKEND
-OPS
-RECORDED_OPS
-GUARDS
-OPT_OPS
-OPT_GUARDS
-OPT_FORCINGS
-ABORT_TOO_LONG
-ABORT_BRIDGE
-ABORT_BAD_LOOP
-ABORT_ESCAPE
-ABORT_FORCE_QUASIIMMUT
-NVIRTUALS
-NVHOLES
-NVREUSED
-TOTAL_COMPILED_LOOPS
-TOTAL_COMPILED_BRIDGES
-TOTAL_FREED_LOOPS
-TOTAL_FREED_BRIDGES
-"""
 
-counter_names = []
-
-def _setup():
-    names = counters.split()
-    for i, name in enumerate(names):
-        globals()[name] = i
-        counter_names.append(name)
-    global ncounters
-    ncounters = len(names)
-_setup()
-
-JITPROF_LINES = ncounters + 1 + 1 # one for TOTAL, 1 for calls, update if needed
+JITPROF_LINES = Counters.ncounters + 1 + 1
+# one for TOTAL, 1 for calls, update if needed
 _CPU_LINES = 4       # the last 4 lines are stored on the cpu
 
 class BaseProfiler(object):
@@ -71,9 +40,12 @@
     def count(self, kind, inc=1):
         pass
 
-    def count_ops(self, opnum, kind=OPS):
+    def count_ops(self, opnum, kind=Counters.OPS):
         pass
 
+    def get_counter(self, num):
+        return -1.0
+
 class Profiler(BaseProfiler):
     initialized = False
     timer = time.time
@@ -89,7 +61,7 @@
         self.starttime = self.timer()
         self.t1 = self.starttime
         self.times = [0, 0]
-        self.counters = [0] * (ncounters - _CPU_LINES)
+        self.counters = [0] * (Counters.ncounters - _CPU_LINES)
         self.calls = 0
         self.current = []
 
@@ -117,19 +89,30 @@
             return
         self.times[ev1] += self.t1 - t0
 
-    def start_tracing(self):   self._start(TRACING)
-    def end_tracing(self):     self._end  (TRACING)
+    def start_tracing(self):   self._start(Counters.TRACING)
+    def end_tracing(self):     self._end  (Counters.TRACING)
 
-    def start_backend(self):   self._start(BACKEND)
-    def end_backend(self):     self._end  (BACKEND)
+    def start_backend(self):   self._start(Counters.BACKEND)
+    def end_backend(self):     self._end  (Counters.BACKEND)
 
     def count(self, kind, inc=1):
         self.counters[kind] += inc        
-    
-    def count_ops(self, opnum, kind=OPS):
+
+    def get_counter(self, num):
+        if num == Counters.TOTAL_COMPILED_LOOPS:
+            return self.cpu.total_compiled_loops
+        elif num == Counters.TOTAL_COMPILED_BRIDGES:
+            return self.cpu.total_compiled_bridges
+        elif num == Counters.TOTAL_FREED_LOOPS:
+            return self.cpu.total_freed_loops
+        elif num == Counters.TOTAL_FREED_BRIDGES:
+            return self.cpu.total_freed_bridges
+        return self.counters[num]
+
+    def count_ops(self, opnum, kind=Counters.OPS):
         from pypy.jit.metainterp.resoperation import rop
         self.counters[kind] += 1
-        if opnum == rop.CALL and kind == RECORDED_OPS:# or opnum == rop.OOSEND:
+        if opnum == rop.CALL and kind == Counters.RECORDED_OPS:# or opnum == rop.OOSEND:
             self.calls += 1
 
     def print_stats(self):
@@ -142,26 +125,29 @@
         cnt = self.counters
         tim = self.times
         calls = self.calls
-        self._print_line_time("Tracing", cnt[TRACING],   tim[TRACING])
-        self._print_line_time("Backend", cnt[BACKEND],   tim[BACKEND])
+        self._print_line_time("Tracing", cnt[Counters.TRACING],
+                              tim[Counters.TRACING])
+        self._print_line_time("Backend", cnt[Counters.BACKEND],
+                              tim[Counters.BACKEND])
         line = "TOTAL:      \t\t%f" % (self.tk - self.starttime, )
         debug_print(line)
-        self._print_intline("ops", cnt[OPS])
-        self._print_intline("recorded ops", cnt[RECORDED_OPS])
+        self._print_intline("ops", cnt[Counters.OPS])
+        self._print_intline("recorded ops", cnt[Counters.RECORDED_OPS])
         self._print_intline("  calls", calls)
-        self._print_intline("guards", cnt[GUARDS])
-        self._print_intline("opt ops", cnt[OPT_OPS])
-        self._print_intline("opt guards", cnt[OPT_GUARDS])
-        self._print_intline("forcings", cnt[OPT_FORCINGS])
-        self._print_intline("abort: trace too long", cnt[ABORT_TOO_LONG])
-        self._print_intline("abort: compiling", cnt[ABORT_BRIDGE])
-        self._print_intline("abort: vable escape", cnt[ABORT_ESCAPE])
-        self._print_intline("abort: bad loop", cnt[ABORT_BAD_LOOP])
+        self._print_intline("guards", cnt[Counters.GUARDS])
+        self._print_intline("opt ops", cnt[Counters.OPT_OPS])
+        self._print_intline("opt guards", cnt[Counters.OPT_GUARDS])
+        self._print_intline("forcings", cnt[Counters.OPT_FORCINGS])
+        self._print_intline("abort: trace too long",
+                            cnt[Counters.ABORT_TOO_LONG])
+        self._print_intline("abort: compiling", cnt[Counters.ABORT_BRIDGE])
+        self._print_intline("abort: vable escape", cnt[Counters.ABORT_ESCAPE])
+        self._print_intline("abort: bad loop", cnt[Counters.ABORT_BAD_LOOP])
         self._print_intline("abort: force quasi-immut",
-                                               cnt[ABORT_FORCE_QUASIIMMUT])
-        self._print_intline("nvirtuals", cnt[NVIRTUALS])
-        self._print_intline("nvholes", cnt[NVHOLES])
-        self._print_intline("nvreused", cnt[NVREUSED])
+                            cnt[Counters.ABORT_FORCE_QUASIIMMUT])
+        self._print_intline("nvirtuals", cnt[Counters.NVIRTUALS])
+        self._print_intline("nvholes", cnt[Counters.NVHOLES])
+        self._print_intline("nvreused", cnt[Counters.NVREUSED])
         cpu = self.cpu
         if cpu is not None:   # for some tests
             self._print_intline("Total # of loops",
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -133,7 +133,7 @@
     optimize_CALL_MAY_FORCE = optimize_CALL
 
     def optimize_FORCE_TOKEN(self, op):
-        # The handling of force_token needs a bit of exaplanation.
+        # The handling of force_token needs a bit of explanation.
         # The original trace which is getting optimized looks like this:
         #    i1 = force_token()
         #    setfield_gc(p0, i1, ...)
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -1,7 +1,7 @@
 import os
 
 from pypy.jit.metainterp.jitexc import JitException
-from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, MODE_ARRAY
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, MODE_ARRAY, LEVEL_KNOWNCLASS
 from pypy.jit.metainterp.history import ConstInt, Const
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.resoperation import rop, ResOperation
@@ -128,8 +128,12 @@
             op = self._cached_fields_getfield_op[structvalue]
             if not op:
                 continue
-            if optimizer.getvalue(op.getarg(0)) in optimizer.opaque_pointers:
-                continue
+            value = optimizer.getvalue(op.getarg(0))
+            if value in optimizer.opaque_pointers:
+                if value.level < LEVEL_KNOWNCLASS:
+                    continue
+                if op.getopnum() != rop.SETFIELD_GC and op.getopnum() != rop.GETFIELD_GC:
+                    continue
             if structvalue in self._cached_fields:
                 if op.getopnum() == rop.SETFIELD_GC:
                     result = op.getarg(1)
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -401,7 +401,7 @@
             o.turned_constant(value)
 
     def forget_numberings(self, virtualbox):
-        self.metainterp_sd.profiler.count(jitprof.OPT_FORCINGS)
+        self.metainterp_sd.profiler.count(jitprof.Counters.OPT_FORCINGS)
         self.resumedata_memo.forget_numberings(virtualbox)
 
     def getinterned(self, box):
@@ -535,9 +535,9 @@
             else:
                 self.ensure_imported(value)
                 op.setarg(i, value.force_box(self))
-        self.metainterp_sd.profiler.count(jitprof.OPT_OPS)
+        self.metainterp_sd.profiler.count(jitprof.Counters.OPT_OPS)
         if op.is_guard():
-            self.metainterp_sd.profiler.count(jitprof.OPT_GUARDS)
+            self.metainterp_sd.profiler.count(jitprof.Counters.OPT_GUARDS)
             if self.replaces_guard and op in self.replaces_guard:
                 self.replace_op(self.replaces_guard[op], op)
                 del self.replaces_guard[op]
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -241,6 +241,16 @@
             # guard_nonnull_class on this value, which is rather silly.
             # replace the original guard with a guard_value
             old_guard_op = value.last_guard
+            if old_guard_op.getopnum() != rop.GUARD_NONNULL:
+                # This is only safe if the class of the guard_value matches the
+                # class of the guard_*_class, otherwise the intermediate ops might
+                # be executed with wrong classes.
+                previous_classbox = value.get_constant_class(self.optimizer.cpu)            
+                expected_classbox = self.optimizer.cpu.ts.cls_of_box(op.getarg(1))
+                assert previous_classbox is not None
+                assert expected_classbox is not None
+                if not previous_classbox.same_constant(expected_classbox):
+                    raise InvalidLoop('A GUARD_VALUE was proven to always fail')
             op = old_guard_op.copy_and_change(rop.GUARD_VALUE,
                                       args = [old_guard_op.getarg(0), op.getarg(1)])
             self.optimizer.replaces_guard[op] = old_guard_op
@@ -251,6 +261,8 @@
             assert isinstance(descr, compile.ResumeGuardDescr)
             descr.guard_opnum = rop.GUARD_VALUE
             descr.make_a_counter_per_value(op)
+            # to be safe
+            value.last_guard = None
         constbox = op.getarg(1)
         assert isinstance(constbox, Const)
         self.optimize_guard(op, constbox)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -431,7 +431,53 @@
         jump(i55, i81)
         """
         self.optimize_loop(ops, expected)
-        
+
+    def test_boxed_opaque_unknown_class(self):
+        ops = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        i3 = getfield_gc(p2, descr=otherdescr)
+        label(p1)
+        i4 = getfield_gc(p1, descr=otherdescr)
+        label(p1)
+        p5 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p5)        
+        i6 = getfield_gc(p5, descr=otherdescr)
+        i7 = call(i6, descr=nonwritedescr)
+        """
+        expected = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        i3 = getfield_gc(p2, descr=otherdescr)
+        label(p1)
+        i4 = getfield_gc(p1, descr=otherdescr)
+        label(p1)
+        p5 = getfield_gc(p1, descr=nextdescr) 
+        i6 = getfield_gc(p5, descr=otherdescr)
+        i7 = call(i6, descr=nonwritedescr)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_opaque_pointer_fails_to_close_loop(self):
+        ops = """
+        [p1, p11]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2, ConstClass(node_vtable)) []
+        mark_opaque_ptr(p2)        
+        i3 = getfield_gc(p2, descr=otherdescr)
+        label(p1, p11)
+        p12 = getfield_gc(p1, descr=nextdescr) 
+        i13 = getfield_gc(p2, descr=otherdescr)
+        i14 = call(i13, descr=nonwritedescr)        
+        jump(p11, p1)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+            
+
+
 class OptRenameStrlen(Optimization):
     def propagate_forward(self, op):
         dispatch_opt(self, op)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -7862,6 +7862,84 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_only_strengthen_guard_if_class_matches(self):
+        ops = """
+        [p1]
+        guard_class(p1, ConstClass(node_vtable2)) []
+        guard_value(p1, ConstPtr(myptr)) []
+        jump(p1)
+        """
+        self.raises(InvalidLoop, self.optimize_loop,
+                       ops, ops)
+
+    def test_licm_boxed_opaque_getitem(self):
+        ops = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1)
+        """
+        expected = """
+        [p1, i3]
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1, i3)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_licm_boxed_opaque_getitem_unknown_class(self):
+        ops = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1)
+        """
+        expected = """
+        [p1, p2]
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1, p2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_licm_unboxed_opaque_getitem(self):
+        ops = """
+        [p2]
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p2)
+        """
+        expected = """
+        [p1, i3]
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1, i3)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_licm_unboxed_opaque_getitem_unknown_class(self):
+        ops = """
+        [p2]
+        mark_opaque_ptr(p2)        
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p2)
+        """
+        expected = """
+        [p2]
+        i3 = getfield_gc(p2, descr=otherdescr) 
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p2)
+        """
+        self.optimize_loop(ops, expected)
+
+
+
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
     pass
 
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -120,9 +120,9 @@
                 limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
                 if cell_token.retraced_count < limit:
                     cell_token.retraced_count += 1
-                    #debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
+                    debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
                 else:
-                    #debug_print("Retrace count reached, jumping to preamble")
+                    debug_print("Retrace count reached, jumping to preamble")
                     assert cell_token.target_tokens[0].virtual_state is None
                     jumpop.setdescr(cell_token.target_tokens[0])
                     self.optimizer.send_extra_operation(jumpop)
@@ -341,6 +341,12 @@
             op = self.short[i]
             newop = self.short_inliner.inline_op(op)
             self.optimizer.send_extra_operation(newop)
+            if op.result in self.short_boxes.assumed_classes:
+                classbox = self.getvalue(newop.result).get_constant_class(self.optimizer.cpu)
+                assumed_classbox = self.short_boxes.assumed_classes[op.result]
+                if not classbox or not classbox.same_constant(assumed_classbox):
+                    raise InvalidLoop('Class of opaque pointer needed in short ' +
+                                      'preamble unknown at end of loop')
             i += 1
 
         # Import boxes produced in the preamble but used in the loop
@@ -432,9 +438,13 @@
                 newargs[i] = a.clonebox()
                 boxmap[a] = newargs[i]
         inliner = Inliner(short_inputargs, newargs)
+        target_token.assumed_classes = {}
         for i in range(len(short)):
-            short[i] = inliner.inline_op(short[i])
-
+            op = short[i]
+            newop = inliner.inline_op(op)
+            if op.result and op.result in self.short_boxes.assumed_classes:
+                target_token.assumed_classes[newop.result] = self.short_boxes.assumed_classes[op.result]
+            short[i] = newop
         target_token.resume_at_jump_descr = target_token.resume_at_jump_descr.clone_if_mutable()
         inliner.inline_descr_inplace(target_token.resume_at_jump_descr)
 
@@ -588,6 +598,12 @@
                     for shop in target.short_preamble[1:]:
                         newop = inliner.inline_op(shop)
                         self.optimizer.send_extra_operation(newop)
+                        if shop.result in target.assumed_classes:
+                            classbox = self.getvalue(newop.result).get_constant_class(self.optimizer.cpu)
+                            if not classbox or not classbox.same_constant(target.assumed_classes[shop.result]):
+                                raise InvalidLoop('The class of an opaque pointer at the end ' +
+                                                  'of the bridge does not mach the class ' + 
+                                                  'it has at the start of the target loop')
                 except InvalidLoop:
                     #debug_print("Inlining failed unexpectedly",
                     #            "jumping to preamble instead")
diff --git a/pypy/jit/metainterp/optimizeopt/virtualstate.py b/pypy/jit/metainterp/optimizeopt/virtualstate.py
--- a/pypy/jit/metainterp/optimizeopt/virtualstate.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualstate.py
@@ -288,7 +288,8 @@
 
 
 class NotVirtualStateInfo(AbstractVirtualStateInfo):
-    def __init__(self, value):
+    def __init__(self, value, is_opaque=False):
+        self.is_opaque = is_opaque
         self.known_class = value.known_class
         self.level = value.level
         if value.intbound is None:
@@ -357,6 +358,9 @@
         if self.lenbound or other.lenbound:
             raise InvalidLoop('The array length bounds does not match.')
 
+        if self.is_opaque:
+            raise InvalidLoop('Generating guards for opaque pointers is not safe')
+
         if self.level == LEVEL_KNOWNCLASS and \
            box.nonnull() and \
            self.known_class.same_constant(cpu.ts.cls_of_box(box)):
@@ -560,7 +564,8 @@
         return VirtualState([self.state(box) for box in jump_args])
 
     def make_not_virtual(self, value):
-        return NotVirtualStateInfo(value)
+        is_opaque = value in self.optimizer.opaque_pointers
+        return NotVirtualStateInfo(value, is_opaque)
 
     def make_virtual(self, known_class, fielddescrs):
         return VirtualStateInfo(known_class, fielddescrs)
@@ -585,6 +590,7 @@
         self.rename = {}
         self.optimizer = optimizer
         self.availible_boxes = availible_boxes
+        self.assumed_classes = {}
 
         if surviving_boxes is not None:
             for box in surviving_boxes:
@@ -678,6 +684,12 @@
             raise BoxNotProducable
 
     def add_potential(self, op, synthetic=False):
+        if op.result and op.result in self.optimizer.values:
+            value = self.optimizer.values[op.result]
+            if value in self.optimizer.opaque_pointers:
+                classbox = value.get_constant_class(self.optimizer.cpu)
+                if classbox:
+                    self.assumed_classes[op.result] = classbox
         if op.result not in self.potential_ops:
             self.potential_ops[op.result] = op
         else:
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -13,9 +13,7 @@
 from pypy.jit.metainterp import executor
 from pypy.jit.metainterp.logger import Logger
 from pypy.jit.metainterp.jitprof import EmptyProfiler
-from pypy.jit.metainterp.jitprof import GUARDS, RECORDED_OPS, ABORT_ESCAPE
-from pypy.jit.metainterp.jitprof import ABORT_TOO_LONG, ABORT_BRIDGE, \
-                                        ABORT_FORCE_QUASIIMMUT, ABORT_BAD_LOOP
+from pypy.rlib.jit import Counters
 from pypy.jit.metainterp.jitexc import JitException, get_llexception
 from pypy.jit.metainterp.heapcache import HeapCache
 from pypy.rlib.objectmodel import specialize
@@ -224,7 +222,7 @@
                     'float_neg', 'float_abs',
                     'cast_ptr_to_int', 'cast_int_to_ptr',
                     'convert_float_bytes_to_longlong',
-                    'convert_longlong_bytes_to_float',
+                    'convert_longlong_bytes_to_float', 'int_force_ge_zero',
                     ]:
         exec py.code.Source('''
             @arguments("box")
@@ -675,7 +673,7 @@
             from pypy.jit.metainterp.quasiimmut import do_force_quasi_immutable
             do_force_quasi_immutable(self.metainterp.cpu, box.getref_base(),
                                      mutatefielddescr)
-            raise SwitchToBlackhole(ABORT_FORCE_QUASIIMMUT)
+            raise SwitchToBlackhole(Counters.ABORT_FORCE_QUASIIMMUT)
         self.generate_guard(rop.GUARD_ISNULL, mutatebox, resumepc=orgpc)
 
     def _nonstandard_virtualizable(self, pc, box):
@@ -1255,7 +1253,7 @@
         guard_op = metainterp.history.record(opnum, moreargs, None,
                                              descr=resumedescr)
         self.capture_resumedata(resumedescr, resumepc)
-        self.metainterp.staticdata.profiler.count_ops(opnum, GUARDS)
+        self.metainterp.staticdata.profiler.count_ops(opnum, Counters.GUARDS)
         # count
         metainterp.attach_debug_info(guard_op)
         return guard_op
@@ -1776,7 +1774,7 @@
             return resbox.constbox()
         # record the operation
         profiler = self.staticdata.profiler
-        profiler.count_ops(opnum, RECORDED_OPS)
+        profiler.count_ops(opnum, Counters.RECORDED_OPS)
         self.heapcache.invalidate_caches(opnum, descr, argboxes)
         op = self.history.record(opnum, argboxes, resbox, descr)
         self.attach_debug_info(op)
@@ -1837,7 +1835,7 @@
             if greenkey_of_huge_function is not None:
                 warmrunnerstate.disable_noninlinable_function(
                     greenkey_of_huge_function)
-            raise SwitchToBlackhole(ABORT_TOO_LONG)
+            raise SwitchToBlackhole(Counters.ABORT_TOO_LONG)
 
     def _interpret(self):
         # Execute the frames forward until we raise a DoneWithThisFrame,
@@ -1921,7 +1919,7 @@
         try:
             self.prepare_resume_from_failure(key.guard_opnum, dont_change_position)
             if self.resumekey_original_loop_token is None:   # very rare case
-                raise SwitchToBlackhole(ABORT_BRIDGE)
+                raise SwitchToBlackhole(Counters.ABORT_BRIDGE)
             self.interpret()
         except SwitchToBlackhole, stb:
             self.run_blackhole_interp_to_cancel_tracing(stb)
@@ -1996,7 +1994,7 @@
                 # raises in case it works -- which is the common case
                 if self.partial_trace:
                     if  start != self.retracing_from:
-                        raise SwitchToBlackhole(ABORT_BAD_LOOP) # For now
+                        raise SwitchToBlackhole(Counters.ABORT_BAD_LOOP) # For now
                 self.compile_loop(original_boxes, live_arg_boxes, start, resumedescr)
                 # creation of the loop was cancelled!
                 self.cancel_count += 1
@@ -2005,7 +2003,7 @@
                     if memmgr:
                         if self.cancel_count > memmgr.max_unroll_loops:
                             self.staticdata.log('cancelled too many times!')
-                            raise SwitchToBlackhole(ABORT_BAD_LOOP)
+                            raise SwitchToBlackhole(Counters.ABORT_BAD_LOOP)
                 self.staticdata.log('cancelled, tracing more...')
 
         # Otherwise, no loop found so far, so continue tracing.
@@ -2299,7 +2297,8 @@
             if vinfo.tracing_after_residual_call(virtualizable):
                 # the virtualizable escaped during CALL_MAY_FORCE.
                 self.load_fields_from_virtualizable()
-                raise SwitchToBlackhole(ABORT_ESCAPE, raising_exception=True)
+                raise SwitchToBlackhole(Counters.ABORT_ESCAPE,
+                                        raising_exception=True)
                 # ^^^ we set 'raising_exception' to True because we must still
                 # have the eventual exception raised (this is normally done
                 # after the call to vable_after_residual_call()).
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -443,6 +443,7 @@
     'INT_IS_TRUE/1b',
     'INT_NEG/1',
     'INT_INVERT/1',
+    'INT_FORCE_GE_ZERO/1',
     #
     'SAME_AS/1',      # gets a Const or a Box, turns it into another Box
     'CAST_PTR_TO_INT/1',
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -10,6 +10,7 @@
 from pypy.rpython import annlowlevel
 from pypy.rlib import rarithmetic, rstack
 from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.rlib.objectmodel import compute_unique_id
 from pypy.rlib.debug import have_debug_prints, ll_assert
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.jit.metainterp.optimize import InvalidLoop
@@ -254,9 +255,9 @@
         self.cached_virtuals.clear()
 
     def update_counters(self, profiler):
-        profiler.count(jitprof.NVIRTUALS, self.nvirtuals)
-        profiler.count(jitprof.NVHOLES, self.nvholes)
-        profiler.count(jitprof.NVREUSED, self.nvreused)
+        profiler.count(jitprof.Counters.NVIRTUALS, self.nvirtuals)
+        profiler.count(jitprof.Counters.NVHOLES, self.nvholes)
+        profiler.count(jitprof.Counters.NVREUSED, self.nvreused)
 
 _frame_info_placeholder = (None, 0, 0)
 
@@ -493,7 +494,7 @@
         return self.setfields(decoder, struct)
 
     def debug_prints(self):
-        debug_print("\tvirtualinfo", self.known_class.repr_rpython())
+        debug_print("\tvirtualinfo", self.known_class.repr_rpython(), " at ",  compute_unique_id(self))
         AbstractVirtualStructInfo.debug_prints(self)
 
 
@@ -509,7 +510,7 @@
         return self.setfields(decoder, struct)
 
     def debug_prints(self):
-        debug_print("\tvstructinfo", self.typedescr.repr_rpython())
+        debug_print("\tvstructinfo", self.typedescr.repr_rpython(), " at ",  compute_unique_id(self))
         AbstractVirtualStructInfo.debug_prints(self)
 
 class VArrayInfo(AbstractVirtualInfo):
@@ -539,7 +540,7 @@
         return array
 
     def debug_prints(self):
-        debug_print("\tvarrayinfo", self.arraydescr)
+        debug_print("\tvarrayinfo", self.arraydescr, " at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -550,7 +551,7 @@
         self.fielddescrs = fielddescrs
 
     def debug_prints(self):
-        debug_print("\tvarraystructinfo", self.arraydescr)
+        debug_print("\tvarraystructinfo", self.arraydescr, " at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -581,7 +582,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvstrplaininfo length", len(self.fieldnums))
+        debug_print("\tvstrplaininfo length", len(self.fieldnums), " at ",  compute_unique_id(self))
 
 
 class VStrConcatInfo(AbstractVirtualInfo):
@@ -599,7 +600,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvstrconcatinfo")
+        debug_print("\tvstrconcatinfo at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -615,7 +616,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvstrsliceinfo")
+        debug_print("\tvstrsliceinfo at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -636,7 +637,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvuniplaininfo length", len(self.fieldnums))
+        debug_print("\tvuniplaininfo length", len(self.fieldnums), " at ",  compute_unique_id(self))
 
 
 class VUniConcatInfo(AbstractVirtualInfo):
@@ -654,7 +655,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvuniconcatinfo")
+        debug_print("\tvuniconcatinfo at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -671,7 +672,7 @@
         return string
 
     def debug_prints(self):
-        debug_print("\tvunisliceinfo")
+        debug_print("\tvunisliceinfo at ",  compute_unique_id(self))
         for i in self.fieldnums:
             debug_print("\t\t", str(untag(i)))
 
@@ -1280,7 +1281,6 @@
 
 def dump_storage(storage, liveboxes):
     "For profiling only."
-    from pypy.rlib.objectmodel import compute_unique_id
     debug_start("jit-resume")
     if have_debug_prints():
         debug_print('Log storage', compute_unique_id(storage))
@@ -1313,4 +1313,13 @@
                     debug_print('\t\t', 'None')
                 else:
                     virtual.debug_prints()
+        if storage.rd_pendingfields:
+            debug_print('\tpending setfields')
+            for i in range(len(storage.rd_pendingfields)):
+                lldescr  = storage.rd_pendingfields[i].lldescr
+                num      = storage.rd_pendingfields[i].num
+                fieldnum = storage.rd_pendingfields[i].fieldnum
+                itemindex= storage.rd_pendingfields[i].itemindex
+                debug_print("\t\t", str(lldescr), str(untag(num)), str(untag(fieldnum)), itemindex)
+
     debug_stop("jit-resume")
diff --git a/pypy/jit/metainterp/test/test_dict.py b/pypy/jit/metainterp/test/test_dict.py
--- a/pypy/jit/metainterp/test/test_dict.py
+++ b/pypy/jit/metainterp/test/test_dict.py
@@ -161,6 +161,22 @@
                            'guard_no_exception': 8, 'new': 2,
                            'guard_false': 2, 'int_is_true': 2})
 
+    def test_unrolling_of_dict_iter(self):
+        driver = JitDriver(greens = [], reds = ['n'])
+        
+        def f(n):
+            while n > 0:
+                driver.jit_merge_point(n=n)
+                d = {1: 1}
+                for elem in d:
+                    n -= elem
+            return n
+
+        res = self.meta_interp(f, [10], listops=True)
+        assert res == 0
+        self.check_simple_loop({'int_sub': 1, 'int_gt': 1, 'guard_true': 1,
+                                'jump': 1})
+
 
 class TestOOtype(DictTests, OOJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_jitiface.py b/pypy/jit/metainterp/test/test_jitiface.py
--- a/pypy/jit/metainterp/test/test_jitiface.py
+++ b/pypy/jit/metainterp/test/test_jitiface.py
@@ -1,13 +1,15 @@
 
-from pypy.rlib.jit import JitDriver, JitHookInterface
+from pypy.rlib.jit import JitDriver, JitHookInterface, Counters
 from pypy.rlib import jit_hooks
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.jit.codewriter.policy import JitPolicy
-from pypy.jit.metainterp.jitprof import ABORT_FORCE_QUASIIMMUT
 from pypy.jit.metainterp.resoperation import rop
 from pypy.rpython.annlowlevel import hlstr
+from pypy.jit.metainterp.jitprof import Profiler
 
-class TestJitHookInterface(LLJitMixin):
+class JitHookInterfaceTests(object):
+    # !!!note!!! - don't subclass this from the backend. Subclass the LL
+    # class later instead
     def test_abort_quasi_immut(self):
         reasons = []
         
@@ -41,7 +43,7 @@
         assert f(100, 7) == 721
         res = self.meta_interp(f, [100, 7], policy=JitPolicy(iface))
         assert res == 721
-        assert reasons == [ABORT_FORCE_QUASIIMMUT] * 2
+        assert reasons == [Counters.ABORT_FORCE_QUASIIMMUT] * 2
 
     def test_on_compile(self):
         called = []
@@ -146,3 +148,74 @@
             assert jit_hooks.resop_getresult(op) == box5
 
         self.meta_interp(main, [])
+
+    def test_get_stats(self):
+        driver = JitDriver(greens = [], reds = ['i', 's'])
+
+        def loop(i):
+            s = 0
+            while i > 0:
+                driver.jit_merge_point(i=i, s=s)
+                if i % 2:
+                    s += 1
+                i -= 1
+                s+= 2
+            return s
+
+        def main():
+            loop(30)
+            assert jit_hooks.stats_get_counter_value(None,
+                                           Counters.TOTAL_COMPILED_LOOPS) == 1
+            assert jit_hooks.stats_get_counter_value(None,
+                                           Counters.TOTAL_COMPILED_BRIDGES) == 1
+            assert jit_hooks.stats_get_counter_value(None,
+                                                     Counters.TRACING) == 2
+            assert jit_hooks.stats_get_times_value(None, Counters.TRACING) >= 0
+
+        self.meta_interp(main, [], ProfilerClass=Profiler)
+
+class LLJitHookInterfaceTests(JitHookInterfaceTests):
+    # use this for any backend, instead of the super class
+    
+    def test_ll_get_stats(self):
+        driver = JitDriver(greens = [], reds = ['i', 's'])
+
+        def loop(i):
+            s = 0
+            while i > 0:
+                driver.jit_merge_point(i=i, s=s)
+                if i % 2:
+                    s += 1
+                i -= 1
+                s+= 2
+            return s
+
+        def main(b):
+            jit_hooks.stats_set_debug(None, b)
+            loop(30)
+            l = jit_hooks.stats_get_loop_run_times(None)
+            if b:
+                assert len(l) == 4
+                # completely specific test that would fail each time
+                # we change anything major. for now it's 4
+                # (loop, bridge, 2 entry points)
+                assert l[0].type == 'e'
+                assert l[0].number == 0
+                assert l[0].counter == 4
+                assert l[1].type == 'l'
+                assert l[1].counter == 4
+                assert l[2].type == 'l'
+                assert l[2].counter == 23
+                assert l[3].type == 'b'
+                assert l[3].number == 4
+                assert l[3].counter == 11
+            else:
+                assert len(l) == 0
+        self.meta_interp(main, [True], ProfilerClass=Profiler)
+        # this so far does not work because of the way setup_once is done,
+        # but fine, it's only about untranslated version anyway
+        #self.meta_interp(main, [False], ProfilerClass=Profiler)
+        
+
+class TestJitHookInterface(JitHookInterfaceTests, LLJitMixin):
+    pass
diff --git a/pypy/jit/metainterp/test/test_jitprof.py b/pypy/jit/metainterp/test/test_jitprof.py
--- a/pypy/jit/metainterp/test/test_jitprof.py
+++ b/pypy/jit/metainterp/test/test_jitprof.py
@@ -1,9 +1,9 @@
 
 from pypy.jit.metainterp.warmspot import ll_meta_interp
-from pypy.rlib.jit import JitDriver, dont_look_inside, elidable
+from pypy.rlib.jit import JitDriver, dont_look_inside, elidable, Counters
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.jit.metainterp import pyjitpl
-from pypy.jit.metainterp.jitprof import *
+from pypy.jit.metainterp.jitprof import Profiler
 
 class FakeProfiler(Profiler):
     def start(self):
@@ -46,10 +46,10 @@
         assert res == 84
         profiler = pyjitpl._warmrunnerdesc.metainterp_sd.profiler
         expected = [
-            TRACING,
-            BACKEND,
-            ~ BACKEND,
-            ~ TRACING,
+            Counters.TRACING,
+            Counters.BACKEND,
+            ~ Counters.BACKEND,
+            ~ Counters.TRACING,
             ]
         assert profiler.events == expected
         assert profiler.times == [2, 1]
diff --git a/pypy/jit/metainterp/test/test_list.py b/pypy/jit/metainterp/test/test_list.py
--- a/pypy/jit/metainterp/test/test_list.py
+++ b/pypy/jit/metainterp/test/test_list.py
@@ -251,6 +251,16 @@
         self.meta_interp(f, [10], listops=True)
         self.check_resops(new_array=0, call=0)
 
+    def test_list_mul(self):
+        def f(i):
+            l = [0] * i
+            return len(l)
+
+        r = self.interp_operations(f, [3])
+        assert r == 3
+        r = self.interp_operations(f, [-1])
+        assert r == 0
+
 class TestOOtype(ListTests, OOJitMixin):
     pass
 
diff --git a/pypy/jit/metainterp/test/test_loop.py b/pypy/jit/metainterp/test/test_loop.py
--- a/pypy/jit/metainterp/test/test_loop.py
+++ b/pypy/jit/metainterp/test/test_loop.py
@@ -871,6 +871,42 @@
         res = self.meta_interp(f, [20, 10, 1])
         assert res == f(20, 10, 1)
 
+    def test_boxed_unerased_pointers_in_short_preamble(self):
+        from pypy.rlib.rerased import new_erasing_pair
+        from pypy.rpython.lltypesystem import lltype
+        class A(object):
+            def __init__(self, val):
+                self.val = val
+            def tst(self):
+                return self.val
+
+        class Box(object):
+            def __init__(self, val):
+                self.val = val
+
+        erase_A, unerase_A = new_erasing_pair('A')
+        erase_TP, unerase_TP = new_erasing_pair('TP')
+        TP = lltype.GcArray(lltype.Signed)
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'sa', 'p'])
+        def f(n, m):
+            i = sa = 0
+            p = Box(erase_A(A(7)))
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, m=m, i=i, sa=sa, p=p)
+                if i < m:
+                    sa += unerase_A(p.val).tst()
+                elif i == m:
+                    a = lltype.malloc(TP, 5)
+                    a[0] = 42
+                    p = Box(erase_TP(a))
+                else:
+                    sa += unerase_TP(p.val)[0]
+                sa -= A(i).val
+                i += 1
+            return sa
+        res = self.meta_interp(f, [20, 10])
+        assert res == f(20, 10)
+
 class TestOOtype(LoopTest, OOJitMixin):
     pass
 
diff --git a/pypy/jit/metainterp/test/test_virtualstate.py b/pypy/jit/metainterp/test/test_virtualstate.py
--- a/pypy/jit/metainterp/test/test_virtualstate.py
+++ b/pypy/jit/metainterp/test/test_virtualstate.py
@@ -908,6 +908,141 @@
         """
         self.optimize_bridge(loop, bridge, expected, p5=self.myptr, p6=self.myptr2)
 
+    def test_licm_boxed_opaque_getitem(self):
+        loop = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p1)
+        """
+        bridge = """
+        [p1]
+        guard_nonnull(p1) []
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        guard_nonnull(p1) []
+        p2 = getfield_gc(p1, descr=nextdescr)
+        jump(p1)
+        """        
+        self.optimize_bridge(loop, bridge, expected, 'Preamble')
+        
+        bridge = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2,  ConstClass(node_vtable2)) []
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2,  ConstClass(node_vtable2)) []
+        jump(p1)
+        """
+        self.optimize_bridge(loop, bridge, expected, 'Preamble')
+
+        bridge = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2,  ConstClass(node_vtable)) []
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        jump(p1, i3)
+        """
+        self.optimize_bridge(loop, bridge, expected, 'Loop')
+
+    def test_licm_unboxed_opaque_getitem(self):
+        loop = """
+        [p2]
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        jump(p2)
+        """
+        bridge = """
+        [p1]
+        guard_nonnull(p1) []
+        jump(p1)
+        """
+        self.optimize_bridge(loop, bridge, 'RETRACE', p1=self.myptr)
+        self.optimize_bridge(loop, bridge, 'RETRACE', p1=self.myptr2)
+        
+        bridge = """
+        [p2]
+        guard_class(p2,  ConstClass(node_vtable2)) []
+        jump(p2)
+        """
+        self.optimize_bridge(loop, bridge, 'RETRACE')
+
+        bridge = """
+        [p2]
+        guard_class(p2,  ConstClass(node_vtable)) []
+        jump(p2)
+        """
+        expected = """
+        [p2]
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        jump(p2, i3)
+        """
+        self.optimize_bridge(loop, bridge, expected, 'Loop')
+
+    def test_licm_virtual_opaque_getitem(self):
+        loop = """
+        [p1]
+        p2 = getfield_gc(p1, descr=nextdescr) 
+        mark_opaque_ptr(p2)        
+        guard_class(p2,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p2, descr=otherdescr)
+        i4 = call(i3, descr=nonwritedescr)
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p3, p2, descr=nextdescr)
+        jump(p3)
+        """
+        bridge = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p3, p1, descr=nextdescr)
+        jump(p3)
+        """
+        self.optimize_bridge(loop, bridge, 'RETRACE', p1=self.myptr)
+        self.optimize_bridge(loop, bridge, 'RETRACE', p1=self.myptr2)
+
+        bridge = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        guard_class(p1,  ConstClass(node_vtable2)) []
+        setfield_gc(p3, p1, descr=nextdescr)
+        jump(p3)
+        """
+        self.optimize_bridge(loop, bridge, 'RETRACE')
+
+        bridge = """
+        [p1]
+        p3 = new_with_vtable(ConstClass(node_vtable))
+        guard_class(p1,  ConstClass(node_vtable)) []
+        setfield_gc(p3, p1, descr=nextdescr)
+        jump(p3)
+        """
+        expected = """
+        [p1]
+        guard_class(p1,  ConstClass(node_vtable)) []
+        i3 = getfield_gc(p1, descr=otherdescr)
+        jump(p1, i3)
+        """
+        self.optimize_bridge(loop, bridge, expected)
+
+
 class TestLLtypeGuards(BaseTestGenerateGuards, LLtypeMixin):
     pass
 
@@ -915,6 +1050,9 @@
     pass
 
 class FakeOptimizer:
+    def __init__(self):
+        self.opaque_pointers = {}
+        self.values = {}
     def make_equal_to(*args):
         pass
     def getvalue(*args):
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -6,6 +6,7 @@
 from pypy.annotation import model as annmodel
 from pypy.rpython.llinterp import LLException
 from pypy.rpython.test.test_llinterp import get_interpreter, clear_tcache
+from pypy.rpython.annlowlevel import cast_instance_to_base_ptr
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant
 from pypy.objspace.flow.model import checkgraph, Link, copygraph
 from pypy.rlib.objectmodel import we_are_translated
@@ -221,7 +222,7 @@
         self.rewrite_access_helpers()
         self.codewriter.make_jitcodes(verbose=verbose)
         self.rewrite_can_enter_jits()
-        self.rewrite_set_param()
+        self.rewrite_set_param_and_get_stats()
         self.rewrite_force_virtual(vrefinfo)
         self.rewrite_force_quasi_immutable()
         self.add_finish()
@@ -632,14 +633,22 @@
             self.rewrite_access_helper(op)
 
     def rewrite_access_helper(self, op):
-        ARGS = [arg.concretetype for arg in op.args[2:]]
-        RESULT = op.result.concretetype
-        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, RESULT))
         # make sure we make a copy of function so it no longer belongs
         # to extregistry
         func = op.args[1].value
-        func = func_with_new_name(func, func.func_name + '_compiled')
-        ptr = self.helper_func(FUNCPTR, func)
+        if func.func_name.startswith('stats_'):
+            # get special treatment since we rewrite it to a call that accepts
+            # jit driver
+            func = func_with_new_name(func, func.func_name + '_compiled')
+            def new_func(ignored, *args):
+                return func(self, *args)
+            ARGS = [lltype.Void] + [arg.concretetype for arg in op.args[3:]]
+        else:
+            ARGS = [arg.concretetype for arg in op.args[2:]]
+            new_func = func_with_new_name(func, func.func_name + '_compiled')
+        RESULT = op.result.concretetype
+        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, RESULT))
+        ptr = self.helper_func(FUNCPTR, new_func)
         op.opname = 'direct_call'
         op.args = [Constant(ptr, FUNCPTR)] + op.args[2:]
 
@@ -859,7 +868,7 @@
             call_final_function(self.translator, finish,
                                 annhelper = self.annhelper)
 
-    def rewrite_set_param(self):
+    def rewrite_set_param_and_get_stats(self):
         from pypy.rpython.lltypesystem.rstr import STR
 
         closures = {}
diff --git a/pypy/jit/tl/pypyjit.py b/pypy/jit/tl/pypyjit.py
--- a/pypy/jit/tl/pypyjit.py
+++ b/pypy/jit/tl/pypyjit.py
@@ -43,6 +43,7 @@
 config.objspace.usemodules._lsprof = False
 #
 config.objspace.usemodules._ffi = True
+#config.objspace.usemodules.cppyy = True
 config.objspace.usemodules.micronumpy = False
 #
 set_pypy_opt_level(config, level='jit')
diff --git a/pypy/jit/tl/pypyjit_demo.py b/pypy/jit/tl/pypyjit_demo.py
--- a/pypy/jit/tl/pypyjit_demo.py
+++ b/pypy/jit/tl/pypyjit_demo.py
@@ -1,19 +1,27 @@
 import pypyjit
 pypyjit.set_param(threshold=200)
 
+kwargs = {"z": 1}
 
-def g(*args):
-    return len(args)
+def f(*args, **kwargs):
+    result = g(1, *args, **kwargs)
+    return result + 2
 
-def f(n):
-    s = 0
-    for i in range(n):
-        l = [i, n, 2]
-        s += g(*l)
-    return s
+def g(x, y, z=2):
+    return x - y + z
+
+def main():
+    res = 0
+    i = 0
+    while i < 10000:
+        res = f(res, z=i)
+        g(1, res, **kwargs)
+        i += 1
+    return res
+
 
 try:
-    print f(301)
+    print main()
 
 except Exception, e:
     print "Exception: ", type(e)
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -43,7 +43,11 @@
         'do_what_I_mean'            : 'interp_magic.do_what_I_mean',
         'list_strategy'             : 'interp_magic.list_strategy',
         'validate_fd'               : 'interp_magic.validate_fd',
+        'newdict'                   : 'interp_dict.newdict',
+        'dictstrategy'              : 'interp_dict.dictstrategy',
     }
+    if sys.platform == 'win32':
+        interpleveldefs['get_console_cp'] = 'interp_magic.get_console_cp'
 
     submodules = {
         "builders": BuildersModule,
diff --git a/pypy/module/__pypy__/interp_dict.py b/pypy/module/__pypy__/interp_dict.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/interp_dict.py
@@ -0,0 +1,24 @@
+
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter.error import operationerrfmt, OperationError
+from pypy.objspace.std.dictmultiobject import W_DictMultiObject
+
+ at unwrap_spec(type=str)
+def newdict(space, type):
+    if type == 'module':
+        return space.newdict(module=True)
+    elif type == 'instance':
+        return space.newdict(instance=True)
+    elif type == 'kwargs':
+        return space.newdict(kwargs=True)
+    elif type == 'strdict':
+        return space.newdict(strdict=True)
+    else:
+        raise operationerrfmt(space.w_TypeError, "unknown type of dict %s",
+                              type)
+
+def dictstrategy(space, w_obj):
+    if not isinstance(w_obj, W_DictMultiObject):
+        raise OperationError(space.w_TypeError,
+                             space.wrap("expecting dict object"))
+    return space.wrap('%r' % (w_obj.strategy,))
diff --git a/pypy/module/__pypy__/interp_magic.py b/pypy/module/__pypy__/interp_magic.py
--- a/pypy/module/__pypy__/interp_magic.py
+++ b/pypy/module/__pypy__/interp_magic.py
@@ -88,3 +88,10 @@
         rposix.validate_fd(fd)
     except OSError, e:
         raise wrap_oserror(space, e)
+
+def get_console_cp(space):
+    from pypy.rlib import rwin32    # Windows only
+    return space.newtuple([
+        space.wrap('cp%d' % rwin32.GetConsoleCP()),
+        space.wrap('cp%d' % rwin32.GetConsoleOutputCP()),
+        ])
diff --git a/pypy/module/_ffi/__init__.py b/pypy/module/_ffi/__init__.py
--- a/pypy/module/_ffi/__init__.py
+++ b/pypy/module/_ffi/__init__.py
@@ -1,4 +1,5 @@
 from pypy.interpreter.mixedmodule import MixedModule
+import os
 
 class Module(MixedModule):
 
@@ -10,7 +11,8 @@
         '_StructDescr': 'interp_struct.W__StructDescr',
         'Field':     'interp_struct.W_Field',
     }
-
+    if os.name == 'nt':
+        interpleveldefs['WinDLL'] = 'interp_funcptr.W_WinDLL'
     appleveldefs = {
         'Structure': 'app_struct.Structure',
         }
diff --git a/pypy/module/_ffi/interp_funcptr.py b/pypy/module/_ffi/interp_funcptr.py
--- a/pypy/module/_ffi/interp_funcptr.py
+++ b/pypy/module/_ffi/interp_funcptr.py
@@ -9,11 +9,57 @@
 #
 from pypy.rlib import jit
 from pypy.rlib import libffi
+from pypy.rlib.clibffi import get_libc_name, StackCheckError
 from pypy.rlib.rdynload import DLOpenError
 from pypy.rlib.rarithmetic import intmask, r_uint
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.module._ffi.type_converter import FromAppLevelConverter, ToAppLevelConverter
 
+import os
+if os.name == 'nt':
+    def _getfunc(space, CDLL, w_name, w_argtypes, w_restype):
+        argtypes_w, argtypes, w_restype, restype = unpack_argtypes(
+            space, w_argtypes, w_restype)
+        if space.isinstance_w(w_name, space.w_str):
+            name = space.str_w(w_name)
+            try:
+                func = CDLL.cdll.getpointer(name, argtypes, restype, 
+                                            flags = CDLL.flags)
+            except KeyError:
+                raise operationerrfmt(
+                    space.w_AttributeError,
+                    "No symbol %s found in library %s", name, CDLL.name)
+
+            return W_FuncPtr(func, argtypes_w, w_restype)
+        elif space.isinstance_w(w_name, space.w_int):
+            ordinal = space.int_w(w_name)
+            try:
+                func = CDLL.cdll.getpointer_by_ordinal(
+                    ordinal, argtypes, restype, 
+                    flags = CDLL.flags)
+            except KeyError:
+                raise operationerrfmt(
+                    space.w_AttributeError,
+                    "No ordinal %d found in library %s", ordinal, CDLL.name)
+            return W_FuncPtr(func, argtypes_w, w_restype)
+        else:
+            raise OperationError(space.w_TypeError, space.wrap(
+                    'function name must be a string or integer'))
+else:    
+    @unwrap_spec(name=str)
+    def _getfunc(space, CDLL, w_name, w_argtypes, w_restype):
+        name = space.str_w(w_name)
+        argtypes_w, argtypes, w_restype, restype = unpack_argtypes(
+            space, w_argtypes, w_restype)
+        try:
+            func = CDLL.cdll.getpointer(name, argtypes, restype, 
+                                        flags = CDLL.flags)
+        except KeyError:
+            raise operationerrfmt(
+                space.w_AttributeError,
+                "No symbol %s found in library %s", name, CDLL.name)
+
+        return W_FuncPtr(func, argtypes_w, w_restype)
 
 def unwrap_ffitype(space, w_argtype, allow_void=False):
     res = w_argtype.get_ffitype()
@@ -59,7 +105,10 @@
         self = jit.promote(self)
         argchain = self.build_argchain(space, args_w)
         func_caller = CallFunctionConverter(space, self.func, argchain)
-        return func_caller.do_and_wrap(self.w_restype)
+        try:
+            return func_caller.do_and_wrap(self.w_restype)
+        except StackCheckError, e:
+            raise OperationError(space.w_ValueError, space.wrap(e.message))
         #return self._do_call(space, argchain)
 
     def free_temp_buffers(self, space):
@@ -230,13 +279,14 @@
     restype = unwrap_ffitype(space, w_restype, allow_void=True)
     return argtypes_w, argtypes, w_restype, restype
 
- at unwrap_spec(addr=r_uint, name=str)
-def descr_fromaddr(space, w_cls, addr, name, w_argtypes, w_restype):
+ at unwrap_spec(addr=r_uint, name=str, flags=int)
+def descr_fromaddr(space, w_cls, addr, name, w_argtypes, 
+                    w_restype, flags=libffi.FUNCFLAG_CDECL):
     argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
                                                                w_argtypes,
                                                                w_restype)
     addr = rffi.cast(rffi.VOIDP, addr)
-    func = libffi.Func(name, argtypes, restype, addr)
+    func = libffi.Func(name, argtypes, restype, addr, flags)
     return W_FuncPtr(func, argtypes_w, w_restype)
 
 
@@ -254,6 +304,7 @@
 
 class W_CDLL(Wrappable):
     def __init__(self, space, name, mode):
+        self.flags = libffi.FUNCFLAG_CDECL
         self.space = space
         if name is None:
             self.name = "<None>"
@@ -265,18 +316,8 @@
             raise operationerrfmt(space.w_OSError, '%s: %s', self.name,
                                   e.msg or 'unspecified error')
 
-    @unwrap_spec(name=str)
-    def getfunc(self, space, name, w_argtypes, w_restype):
-        argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
-                                                                   w_argtypes,
-                                                                   w_restype)
-        try:
-            func = self.cdll.getpointer(name, argtypes, restype)
-        except KeyError:
-            raise operationerrfmt(space.w_AttributeError,
-                                  "No symbol %s found in library %s", name, self.name)
-
-        return W_FuncPtr(func, argtypes_w, w_restype)
+    def getfunc(self, space, w_name, w_argtypes, w_restype):
+        return _getfunc(space, self, w_name, w_argtypes, w_restype)
 
     @unwrap_spec(name=str)
     def getaddressindll(self, space, name):
@@ -284,8 +325,9 @@
             address_as_uint = rffi.cast(lltype.Unsigned,
                                         self.cdll.getaddressindll(name))
         except KeyError:
-            raise operationerrfmt(space.w_ValueError,
-                                  "No symbol %s found in library %s", name, self.name)
+            raise operationerrfmt(
+                space.w_ValueError,
+                "No symbol %s found in library %s", name, self.name)
         return space.wrap(address_as_uint)
 
 @unwrap_spec(name='str_or_None', mode=int)
@@ -300,10 +342,26 @@
     getaddressindll = interp2app(W_CDLL.getaddressindll),
     )
 
+class W_WinDLL(W_CDLL):
+    def __init__(self, space, name, mode):
+        W_CDLL.__init__(self, space, name, mode)
+        self.flags = libffi.FUNCFLAG_STDCALL
+
+ at unwrap_spec(name='str_or_None', mode=int)
+def descr_new_windll(space, w_type, name, mode=-1):
+    return space.wrap(W_WinDLL(space, name, mode))
+
+
+W_WinDLL.typedef = TypeDef(
+    '_ffi.WinDLL',
+    __new__     = interp2app(descr_new_windll),
+    getfunc     = interp2app(W_WinDLL.getfunc),
+    getaddressindll = interp2app(W_WinDLL.getaddressindll),
+    )
+
 # ========================================================================
 
 def get_libc(space):
-    from pypy.rlib.clibffi import get_libc_name
     try:
         return space.wrap(W_CDLL(space, get_libc_name(), -1))
     except OSError, e:
diff --git a/pypy/module/_ffi/interp_struct.py b/pypy/module/_ffi/interp_struct.py
--- a/pypy/module/_ffi/interp_struct.py
+++ b/pypy/module/_ffi/interp_struct.py
@@ -56,8 +56,7 @@
 
 class W__StructDescr(Wrappable):
 
-    def __init__(self, space, name):
-        self.space = space
+    def __init__(self, name):
         self.w_ffitype = W_FFIType('struct %s' % name, clibffi.FFI_TYPE_NULL,
                                    w_structdescr=self)
         self.fields_w = None
@@ -69,7 +68,6 @@
             raise operationerrfmt(space.w_ValueError,
                                   "%s's fields has already been defined",
                                   self.w_ffitype.name)
-        space = self.space
         fields_w = space.fixedview(w_fields)
         # note that the fields_w returned by compute_size_and_alignement has a
         # different annotation than the original: list(W_Root) vs list(W_Field)
@@ -104,11 +102,11 @@
         return W__StructInstance(self, allocate=False, autofree=True, rawmem=rawmem)
 
     @jit.elidable_promote('0')
-    def get_type_and_offset_for_field(self, name):
+    def get_type_and_offset_for_field(self, space, name):
         try:
             w_field = self.name2w_field[name]
         except KeyError:
-            raise operationerrfmt(self.space.w_AttributeError, '%s', name)
+            raise operationerrfmt(space.w_AttributeError, '%s', name)
 
         return w_field.w_ffitype, w_field.offset
 
@@ -116,7 +114,7 @@
 
 @unwrap_spec(name=str)
 def descr_new_structdescr(space, w_type, name, w_fields=None):
-    descr = W__StructDescr(space, name)
+    descr = W__StructDescr(name)
     if w_fields is not space.w_None:
         descr.define_fields(space, w_fields)
     return descr
@@ -185,13 +183,15 @@
 
     @unwrap_spec(name=str)
     def getfield(self, space, name):
-        w_ffitype, offset = self.structdescr.get_type_and_offset_for_field(name)
+        w_ffitype, offset = self.structdescr.get_type_and_offset_for_field(
+            space, name)
         field_getter = GetFieldConverter(space, self.rawmem, offset)
         return field_getter.do_and_wrap(w_ffitype)
 
     @unwrap_spec(name=str)
     def setfield(self, space, name, w_value):
-        w_ffitype, offset = self.structdescr.get_type_and_offset_for_field(name)
+        w_ffitype, offset = self.structdescr.get_type_and_offset_for_field(
+            space, name)
         field_setter = SetFieldConverter(space, self.rawmem, offset)
         field_setter.unwrap_and_do(w_ffitype, w_value)
 
diff --git a/pypy/module/_ffi/test/test_funcptr.py b/pypy/module/_ffi/test/test_funcptr.py
--- a/pypy/module/_ffi/test/test_funcptr.py
+++ b/pypy/module/_ffi/test/test_funcptr.py
@@ -1,11 +1,11 @@
 from pypy.conftest import gettestobjspace
-from pypy.translator.platform import platform
-from pypy.translator.tool.cbuild import ExternalCompilationInfo
-from pypy.module._rawffi.interp_rawffi import TYPEMAP
-from pypy.module._rawffi.tracker import Tracker
-from pypy.translator.platform import platform
+from pypy.rpython.lltypesystem import rffi
+from pypy.rlib.clibffi import get_libc_name
+from pypy.rlib.libffi import types
+from pypy.rlib.libffi import CDLL
+from pypy.rlib.test.test_clibffi import get_libm_name
 
-import os, sys, py
+import sys, py
 
 class BaseAppTestFFI(object):
 
@@ -37,9 +37,6 @@
         return str(platform.compile([c_file], eci, 'x', standalone=False))
 
     def setup_class(cls):
-        from pypy.rpython.lltypesystem import rffi
-        from pypy.rlib.libffi import get_libc_name, CDLL, types
-        from pypy.rlib.test.test_libffi import get_libm_name
         space = gettestobjspace(usemodules=('_ffi', '_rawffi'))
         cls.space = space
         cls.w_iswin32 = space.wrap(sys.platform == 'win32')
@@ -96,7 +93,7 @@
 
     def test_getaddressindll(self):
         import sys
-        from _ffi import CDLL, types
+        from _ffi import CDLL
         libm = CDLL(self.libm_name)
         pow_addr = libm.getaddressindll('pow')
         fff = sys.maxint*2-1
@@ -105,7 +102,6 @@
         assert pow_addr == self.pow_addr & fff
 
     def test_func_fromaddr(self):
-        import sys
         from _ffi import CDLL, types, FuncPtr
         libm = CDLL(self.libm_name)
         pow_addr = libm.getaddressindll('pow')
@@ -569,3 +565,79 @@
             skip("unix specific")
         libnone = CDLL(None)
         raises(AttributeError, "libnone.getfunc('I_do_not_exist', [], types.void)")
+
+    def test_calling_convention1(self):
+        if not self.iswin32:
+            skip("windows specific")
+        from _ffi import WinDLL, types
+        libm = WinDLL(self.libm_name)
+        pow = libm.getfunc('pow', [types.double, types.double], types.double)
+        try:
+            pow(2, 3)
+        except ValueError, e:
+            assert e.message.startswith('Procedure called with')
+        else:
+            assert 0, 'test must assert, wrong calling convention'
+
+    def test_calling_convention2(self):
+        if not self.iswin32:
+            skip("windows specific")
+        from _ffi import WinDLL, types
+        kernel = WinDLL('Kernel32.dll')
+        sleep = kernel.getfunc('Sleep', [types.uint], types.void)
+        sleep(10)
+
+    def test_calling_convention3(self):
+        if not self.iswin32:
+            skip("windows specific")
+        from _ffi import CDLL, types
+        wrong_kernel = CDLL('Kernel32.dll')
+        wrong_sleep = wrong_kernel.getfunc('Sleep', [types.uint], types.void)
+        try:
+            wrong_sleep(10)
+        except ValueError, e:
+            assert e.message.startswith('Procedure called with')
+        else:
+            assert 0, 'test must assert, wrong calling convention'
+
+    def test_func_fromaddr2(self):
+        if not self.iswin32:
+            skip("windows specific")
+        from _ffi import CDLL, types, FuncPtr
+        from _rawffi import FUNCFLAG_STDCALL
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        wrong_pow = FuncPtr.fromaddr(pow_addr, 'pow', 
+                [types.double, types.double], types.double, FUNCFLAG_STDCALL)
+        try:
+            wrong_pow(2, 3) == 8
+        except ValueError, e:
+            assert e.message.startswith('Procedure called with')
+        else:
+            assert 0, 'test must assert, wrong calling convention'
+
+    def test_func_fromaddr3(self):
+        if not self.iswin32:
+            skip("windows specific")
+        from _ffi import WinDLL, types, FuncPtr
+        from _rawffi import FUNCFLAG_STDCALL
+        kernel = WinDLL('Kernel32.dll')
+        sleep_addr = kernel.getaddressindll('Sleep')
+        sleep = FuncPtr.fromaddr(sleep_addr, 'sleep', [types.uint], 
+                            types.void, FUNCFLAG_STDCALL)
+        sleep(10)
+
+    def test_by_ordinal(self):
+        """
+            int DLLEXPORT AAA_first_ordinal_function()
+            {
+                return 42;
+            }
+        """
+        if not self.iswin32:
+            skip("windows specific")
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        f_name = libfoo.getfunc('AAA_first_ordinal_function', [], types.sint)
+        f_ordinal = libfoo.getfunc(1, [], types.sint)
+        assert f_name.getaddr() == f_ordinal.getaddr()
diff --git a/pypy/module/_ffi/test/test_type_converter.py b/pypy/module/_ffi/test/test_type_converter.py
--- a/pypy/module/_ffi/test/test_type_converter.py
+++ b/pypy/module/_ffi/test/test_type_converter.py
@@ -144,6 +144,7 @@
     get_unichar_p = get_all
     get_float = get_all
     get_singlefloat = get_all
+    get_unsigned_which_fits_into_a_signed = get_all
     
     def convert(self, w_ffitype, val):
         self.val = val
diff --git a/pypy/module/_ffi/test/test_ztranslation.py b/pypy/module/_ffi/test/test_ztranslation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_ffi/test/test_ztranslation.py
@@ -0,0 +1,4 @@
+from pypy.objspace.fake.checkmodule import checkmodule
+
+def test__ffi_translates():
+    checkmodule('_ffi', '_rawffi')
diff --git a/pypy/module/_hashlib/interp_hashlib.py b/pypy/module/_hashlib/interp_hashlib.py
--- a/pypy/module/_hashlib/interp_hashlib.py
+++ b/pypy/module/_hashlib/interp_hashlib.py
@@ -96,6 +96,9 @@
         block_size = rffi.getintfield(digest_type, 'c_block_size')
         return space.wrap(block_size)
 
+    def get_name(self, space):
+        return space.wrap(self.name)
+
     def _digest(self, space):
         with lltype.scoped_alloc(ropenssl.EVP_MD_CTX.TO) as ctx:
             with self.lock:
@@ -118,6 +121,7 @@
     digest_size=GetSetProperty(W_Hash.get_digest_size),
     digestsize=GetSetProperty(W_Hash.get_digest_size),
     block_size=GetSetProperty(W_Hash.get_block_size),
+    name=GetSetProperty(W_Hash.get_name),
     )
 W_Hash.acceptable_as_base_class = False
 
diff --git a/pypy/module/_hashlib/test/test_hashlib.py b/pypy/module/_hashlib/test/test_hashlib.py
--- a/pypy/module/_hashlib/test/test_hashlib.py
+++ b/pypy/module/_hashlib/test/test_hashlib.py
@@ -20,6 +20,7 @@
                                     'sha512': 64,
                                     }.items():
             h = hashlib.new(name)
+            assert h.name == name
             assert h.digest_size == expected_size
             assert h.digestsize == expected_size
             #
diff --git a/pypy/module/_minimal_curses/fficurses.py b/pypy/module/_minimal_curses/fficurses.py
--- a/pypy/module/_minimal_curses/fficurses.py
+++ b/pypy/module/_minimal_curses/fficurses.py
@@ -8,11 +8,20 @@
 from pypy.rpython.extfunc import register_external
 from pypy.module._minimal_curses import interp_curses
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from sys import platform
 
-eci = ExternalCompilationInfo(
-    includes = ['curses.h', 'term.h'],
-    libraries = ['curses'],
-)
+_CYGWIN = platform == 'cygwin'
+
+if _CYGWIN:
+    eci = ExternalCompilationInfo(
+        includes = ['ncurses/curses.h', 'ncurses/term.h'],
+        libraries = ['curses'],
+    )
+else:
+    eci = ExternalCompilationInfo(
+        includes = ['curses.h', 'term.h'],
+        libraries = ['curses'],
+    )
 
 rffi_platform.verify_eci(eci)
 
diff --git a/pypy/module/_socket/test/test_sock_app.py b/pypy/module/_socket/test/test_sock_app.py
--- a/pypy/module/_socket/test/test_sock_app.py
+++ b/pypy/module/_socket/test/test_sock_app.py
@@ -618,9 +618,12 @@
         except timeout:
             pass
         t.recv(count)    
-        # test sendall() timeout, be sure to send data larger than the
-        # socket buffer
-        raises(timeout, cli.sendall, 'foobar' * 7000)
+        # test sendall() timeout
+        try:
+            while 1:
+                cli.sendall('foobar' * 70)
+        except timeout:
+            pass
         # done
         cli.close()
         t.close()
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -7,7 +7,7 @@
 from pypy.interpreter.error import OperationError
 from pypy.rlib.rarithmetic import intmask
 from pypy.tool.pairtype import extendabletype
-
+from pypy.rlib import jit
 
 # ____________________________________________________________
 #
@@ -344,6 +344,7 @@
         raise OperationError(space.w_TypeError,
                              space.wrap("cannot copy this match object"))
 
+    @jit.look_inside_iff(lambda self, args_w: jit.isconstant(len(args_w)))
     def group_w(self, args_w):
         space = self.space
         ctx = self.ctx
diff --git a/pypy/module/_ssl/__init__.py b/pypy/module/_ssl/__init__.py
--- a/pypy/module/_ssl/__init__.py
+++ b/pypy/module/_ssl/__init__.py
@@ -31,5 +31,6 @@
     def startup(self, space):
         from pypy.rlib.ropenssl import init_ssl
         init_ssl()
-        from pypy.module._ssl.interp_ssl import setup_ssl_threads
-        setup_ssl_threads()
+        if space.config.objspace.usemodules.thread:
+            from pypy.module._ssl.thread_lock import setup_ssl_threads
+            setup_ssl_threads()
diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py
--- a/pypy/module/_ssl/interp_ssl.py
+++ b/pypy/module/_ssl/interp_ssl.py
@@ -789,7 +789,11 @@
 def _ssl_seterror(space, ss, ret):
     assert ret <= 0
 
-    if ss and ss.ssl:
+    if ss is None:
+        errval = libssl_ERR_peek_last_error()
+        errstr = rffi.charp2str(libssl_ERR_error_string(errval, None))
+        return ssl_error(space, errstr, errval)
+    elif ss.ssl:
         err = libssl_SSL_get_error(ss.ssl, ret)
     else:
         err = SSL_ERROR_SSL
@@ -880,38 +884,3 @@
             libssl_X509_free(x)
     finally:
         libssl_BIO_free(cert)
-
-# this function is needed to perform locking on shared data
-# structures. (Note that OpenSSL uses a number of global data
-# structures that will be implicitly shared whenever multiple threads
-# use OpenSSL.) Multi-threaded applications will crash at random if
-# it is not set.
-#
-# locking_function() must be able to handle up to CRYPTO_num_locks()
-# different mutex locks. It sets the n-th lock if mode & CRYPTO_LOCK, and
-# releases it otherwise.
-#
-# filename and line are the file number of the function setting the
-# lock. They can be useful for debugging.
-_ssl_locks = []
-
-def _ssl_thread_locking_function(mode, n, filename, line):
-    n = intmask(n)
-    if n < 0 or n >= len(_ssl_locks):
-        return
-
-    if intmask(mode) & CRYPTO_LOCK:
-        _ssl_locks[n].acquire(True)
-    else:
-        _ssl_locks[n].release()
-
-def _ssl_thread_id_function():
-    from pypy.module.thread import ll_thread
-    return rffi.cast(rffi.LONG, ll_thread.get_ident())
-
-def setup_ssl_threads():
-    from pypy.module.thread import ll_thread
-    for i in range(libssl_CRYPTO_num_locks()):
-        _ssl_locks.append(ll_thread.allocate_lock())
-    libssl_CRYPTO_set_locking_callback(_ssl_thread_locking_function)
-    libssl_CRYPTO_set_id_callback(_ssl_thread_id_function)
diff --git a/pypy/module/_ssl/test/test_ztranslation.py b/pypy/module/_ssl/test/test_ztranslation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_ssl/test/test_ztranslation.py
@@ -0,0 +1,4 @@
+from pypy.objspace.fake.checkmodule import checkmodule
+
+def test__ffi_translates():
+    checkmodule('_ssl')
diff --git a/pypy/module/_ssl/thread_lock.py b/pypy/module/_ssl/thread_lock.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_ssl/thread_lock.py
@@ -0,0 +1,80 @@
+from pypy.rlib.ropenssl import *
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+
+# CRYPTO_set_locking_callback:
+#
+# this function is needed to perform locking on shared data
+# structures. (Note that OpenSSL uses a number of global data
+# structures that will be implicitly shared whenever multiple threads
+# use OpenSSL.) Multi-threaded applications will crash at random if
+# it is not set.
+#
+# locking_function() must be able to handle up to CRYPTO_num_locks()
+# different mutex locks. It sets the n-th lock if mode & CRYPTO_LOCK, and
+# releases it otherwise.
+#
+# filename and line are the file number of the function setting the
+# lock. They can be useful for debugging.
+
+
+# This logic is moved to C code so that the callbacks can be invoked
+# without caring about the GIL.
+
+separate_module_source = """
+
+#include <openssl/crypto.h>
+
+static unsigned int _ssl_locks_count = 0;
+static struct RPyOpaque_ThreadLock *_ssl_locks;
+
+static unsigned long _ssl_thread_id_function(void) {
+    return RPyThreadGetIdent();
+}
+
+static void _ssl_thread_locking_function(int mode, int n, const char *file,
+                                         int line) {
+    if ((_ssl_locks == NULL) ||
+        (n < 0) || ((unsigned)n >= _ssl_locks_count))
+        return;
+
+    if (mode & CRYPTO_LOCK) {
+        RPyThreadAcquireLock(_ssl_locks + n, 1);
+    } else {
+        RPyThreadReleaseLock(_ssl_locks + n);
+    }
+}
+
+int _PyPy_SSL_SetupThreads(void)
+{
+    unsigned int i;
+    _ssl_locks_count = CRYPTO_num_locks();
+    _ssl_locks = calloc(_ssl_locks_count, sizeof(struct RPyOpaque_ThreadLock));
+    if (_ssl_locks == NULL)
+        return 0;
+    for (i=0; i<_ssl_locks_count; i++) {
+        if (RPyThreadLockInit(_ssl_locks + i) == 0)
+            return 0;
+    }
+    CRYPTO_set_locking_callback(_ssl_thread_locking_function);
+    CRYPTO_set_id_callback(_ssl_thread_id_function);
+    return 1;
+}
+"""
+
+
+eci = ExternalCompilationInfo(
+    separate_module_sources=[separate_module_source],
+    post_include_bits=[
+        "int _PyPy_SSL_SetupThreads(void);"],
+    export_symbols=['_PyPy_SSL_SetupThreads'],
+)
+
+_PyPy_SSL_SetupThreads = rffi.llexternal('_PyPy_SSL_SetupThreads',
+                                         [], rffi.INT,
+                                         compilation_info=eci)
+
+def setup_ssl_threads():
+    result = _PyPy_SSL_SetupThreads()
+    if rffi.cast(lltype.Signed, result) == 0:
+        raise MemoryError
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -9,7 +9,7 @@
 from pypy.objspace.std.multimethod import FailedToImplement
 from pypy.objspace.std.stdtypedef import SMM, StdTypeDef
 from pypy.objspace.std.register_all import register_all
-from pypy.rlib.rarithmetic import ovfcheck
+from pypy.rlib.rarithmetic import ovfcheck, widen
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.objectmodel import specialize, keepalive_until_here
 from pypy.rpython.lltypesystem import lltype, rffi
@@ -164,6 +164,8 @@
         data[index] = char
         array._charbuf_stop()
 
+    def get_raw_address(self):
+        return self.array._charbuf_start()
 
 def make_array(mytype):
     W_ArrayBase = globals()['W_ArrayBase']
@@ -225,20 +227,29 @@
             # length
             self.setlen(0)
 
-        def setlen(self, size):
+        def setlen(self, size, zero=False, overallocate=True):
             if size > 0:
                 if size > self.allocated or size < self.allocated / 2:
-                    if size < 9:
-                        some = 3
+                    if overallocate:
+                        if size < 9:
+                            some = 3
+                        else:
+                            some = 6
+                        some += size >> 3
                     else:
-                        some = 6
-                    some += size >> 3
+                        some = 0
                     self.allocated = size + some
-                    new_buffer = lltype.malloc(mytype.arraytype,
-                                               self.allocated, flavor='raw',
-                                               add_memory_pressure=True)
-                    for i in range(min(size, self.len)):
-                        new_buffer[i] = self.buffer[i]
+                    if zero:
+                        new_buffer = lltype.malloc(mytype.arraytype,
+                                                   self.allocated, flavor='raw',
+                                                   add_memory_pressure=True,
+                                                   zero=True)
+                    else:
+                        new_buffer = lltype.malloc(mytype.arraytype,
+                                                   self.allocated, flavor='raw',
+                                                   add_memory_pressure=True)
+                        for i in range(min(size, self.len)):
+                            new_buffer[i] = self.buffer[i]
                 else:
                     self.len = size
                     return
@@ -344,7 +355,7 @@
     def getitem__Array_Slice(space, self, w_slice):
         start, stop, step, size = space.decode_index4(w_slice, self.len)
         w_a = mytype.w_class(self.space)
-        w_a.setlen(size)
+        w_a.setlen(size, overallocate=False)
         assert step != 0
         j = 0
         for i in range(start, stop, step):
@@ -366,26 +377,18 @@
     def setitem__Array_Slice_Array(space, self, w_idx, w_item):
         start, stop, step, size = self.space.decode_index4(w_idx, self.len)
         assert step != 0
-        if w_item.len != size:
+        if w_item.len != size or self is w_item:
+            # XXX this is a giant slow hack
             w_lst = array_tolist__Array(space, self)
             w_item = space.call_method(w_item, 'tolist')
             space.setitem(w_lst, w_idx, w_item)
             self.setlen(0)
             self.fromsequence(w_lst)
         else:
-            if self is w_item:
-                with lltype.scoped_alloc(mytype.arraytype, self.allocated) as new_buffer:
-                    for i in range(self.len):
-                        new_buffer[i] = w_item.buffer[i]
-                    j = 0
-                    for i in range(start, stop, step):
-                        self.buffer[i] = new_buffer[j]
-                        j += 1
-            else:
-                j = 0
-                for i in range(start, stop, step):
-                    self.buffer[i] = w_item.buffer[j]
-                    j += 1
+            j = 0
+            for i in range(start, stop, step):
+                self.buffer[i] = w_item.buffer[j]
+                j += 1
 
     def setslice__Array_ANY_ANY_ANY(space, self, w_i, w_j, w_x):
         space.setitem(self, space.newslice(w_i, w_j, space.w_None), w_x)
@@ -457,6 +460,7 @@
         self.buffer[i] = val
 
     def delitem__Array_ANY(space, self, w_idx):
+        # XXX this is a giant slow hack
         w_lst = array_tolist__Array(space, self)
         space.delitem(w_lst, w_idx)
         self.setlen(0)
@@ -469,7 +473,7 @@
 
     def add__Array_Array(space, self, other):
         a = mytype.w_class(space)
-        a.setlen(self.len + other.len)
+        a.setlen(self.len + other.len, overallocate=False)
         for i in range(self.len):
             a.buffer[i] = self.buffer[i]
         for i in range(other.len):
@@ -485,46 +489,58 @@
         return self
 
     def mul__Array_ANY(space, self, w_repeat):
+        return _mul_helper(space, self, w_repeat, False)
+
+    def mul__ANY_Array(space, w_repeat, self):
+        return _mul_helper(space, self, w_repeat, False)
+
+    def inplace_mul__Array_ANY(space, self, w_repeat):
+        return _mul_helper(space, self, w_repeat, True)
+
+    def _mul_helper(space, self, w_repeat, is_inplace):
         try:
             repeat = space.getindex_w(w_repeat, space.w_OverflowError)
         except OperationError, e:
             if e.match(space, space.w_TypeError):
                 raise FailedToImplement
             raise
-        a = mytype.w_class(space)
         repeat = max(repeat, 0)
         try:
             newlen = ovfcheck(self.len * repeat)
         except OverflowError:
             raise MemoryError
-        a.setlen(newlen)
-        for r in range(repeat):
-            for i in range(self.len):
-                a.buffer[r * self.len + i] = self.buffer[i]
+        oldlen = self.len
+        if is_inplace:
+            a = self
+            start = 1
+        else:
+            a = mytype.w_class(space)
+            start = 0
+        # <a performance hack>
+        if oldlen == 1:
+            if mytype.unwrap == 'str_w' or mytype.unwrap == 'unicode_w':
+                zero = not ord(self.buffer[0])
+            elif mytype.unwrap == 'int_w' or mytype.unwrap == 'bigint_w':
+                zero = not widen(self.buffer[0])
+            #elif mytype.unwrap == 'float_w':
+            #    value = ...float(self.buffer[0])  xxx handle the case of -0.0
+            else:
+                zero = False
+            if zero:
+                a.setlen(newlen, zero=True, overallocate=False)
+                return a
+            a.setlen(newlen, overallocate=False)
+            item = self.buffer[0]
+            for r in range(start, repeat):
+                a.buffer[r] = item
+            return a
+        # </a performance hack>
+        a.setlen(newlen, overallocate=False)
+        for r in range(start, repeat):
+            for i in range(oldlen):
+                a.buffer[r * oldlen + i] = self.buffer[i]
         return a
 
-    def mul__ANY_Array(space, w_repeat, self):
-        return mul__Array_ANY(space, self, w_repeat)
-
-    def inplace_mul__Array_ANY(space, self, w_repeat):
-        try:
-            repeat = space.getindex_w(w_repeat, space.w_OverflowError)
-        except OperationError, e:
-            if e.match(space, space.w_TypeError):
-                raise FailedToImplement
-            raise
-        oldlen = self.len
-        repeat = max(repeat, 0)
-        try:
-            newlen = ovfcheck(self.len * repeat)
-        except OverflowError:
-            raise MemoryError
-        self.setlen(newlen)
-        for r in range(1, repeat):
-            for i in range(oldlen):
-                self.buffer[r * oldlen + i] = self.buffer[i]
-        return self
-
     # Convertions
 
     def array_tolist__Array(space, self):
@@ -600,6 +616,7 @@
     # Compare methods
     @specialize.arg(3)
     def _cmp_impl(space, self, other, space_fn):
+        # XXX this is a giant slow hack
         w_lst1 = array_tolist__Array(space, self)
         w_lst2 = space.call_method(other, 'tolist')
         return space_fn(w_lst1, w_lst2)
@@ -646,7 +663,7 @@
 
     def array_copy__Array(space, self):
         w_a = mytype.w_class(self.space)
-        w_a.setlen(self.len)
+        w_a.setlen(self.len, overallocate=False)
         rffi.c_memcpy(
             rffi.cast(rffi.VOIDP, w_a.buffer),
             rffi.cast(rffi.VOIDP, self.buffer),
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -890,6 +890,54 @@
         a[::-1] = a
         assert a == self.array('b', [3, 2, 1, 0])
 
+    def test_array_multiply(self):
+        a = self.array('b', [0])
+        b = a * 13
+        assert b[12] == 0
+        b = 13 * a
+        assert b[12] == 0
+        a *= 13
+        assert a[12] == 0
+        a = self.array('b', [1])
+        b = a * 13
+        assert b[12] == 1
+        b = 13 * a
+        assert b[12] == 1
+        a *= 13
+        assert a[12] == 1
+        a = self.array('i', [0])
+        b = a * 13
+        assert b[12] == 0
+        b = 13 * a
+        assert b[12] == 0
+        a *= 13
+        assert a[12] == 0
+        a = self.array('i', [1])
+        b = a * 13
+        assert b[12] == 1
+        b = 13 * a
+        assert b[12] == 1
+        a *= 13
+        assert a[12] == 1
+        a = self.array('i', [0, 0])
+        b = a * 13
+        assert len(b) == 26
+        assert b[22] == 0
+        b = 13 * a
+        assert len(b) == 26
+        assert b[22] == 0
+        a *= 13
+        assert a[22] == 0
+        assert len(a) == 26
+        a = self.array('f', [-0.0])
+        b = a * 13
+        assert len(b) == 13
+        assert str(b[12]) == "-0.0"
+        a = self.array('d', [-0.0])
+        b = a * 13
+        assert len(b) == 13
+        assert str(b[12]) == "-0.0"
+
 
 class AppTestArrayBuiltinShortcut(AppTestArray):
     OPTIONS = {'objspace.std.builtinshortcut': True}
diff --git a/pypy/module/cStringIO/interp_stringio.py b/pypy/module/cStringIO/interp_stringio.py
--- a/pypy/module/cStringIO/interp_stringio.py
+++ b/pypy/module/cStringIO/interp_stringio.py
@@ -221,7 +221,8 @@
 }
 
 W_InputType.typedef = TypeDef(
-    "cStringIO.StringI",
+    "StringI",
+    __module__   = "cStringIO",
     __doc__      = "Simple type for treating strings as input file streams",
     closed       = GetSetProperty(descr_closed, cls=W_InputType),
     softspace    = GetSetProperty(descr_softspace,
@@ -232,7 +233,8 @@
     )
 
 W_OutputType.typedef = TypeDef(
-    "cStringIO.StringO",
+    "StringO",
+    __module__   = "cStringIO",
     __doc__      = "Simple type for output to strings.",
     truncate     = interp2app(W_OutputType.descr_truncate),
     write        = interp2app(W_OutputType.descr_write),
diff --git a/pypy/module/cppyy/__init__.py b/pypy/module/cppyy/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/__init__.py
@@ -0,0 +1,33 @@
+from pypy.interpreter.mixedmodule import MixedModule
+
+class Module(MixedModule):
+    "This module provides runtime bindings to C++ code for which reflection\n\
+    info has been generated. Current supported back-ends are Reflex and CINT.\n\
+    See http://doc.pypy.org/en/latest/cppyy.html for full details."
+
+    interpleveldefs = {
+        '_load_dictionary'       : 'interp_cppyy.load_dictionary',
+        '_resolve_name'          : 'interp_cppyy.resolve_name',
+        '_scope_byname'          : 'interp_cppyy.scope_byname',
+        '_template_byname'       : 'interp_cppyy.template_byname',
+        '_set_class_generator'   : 'interp_cppyy.set_class_generator',
+        '_register_class'        : 'interp_cppyy.register_class',
+        'CPPInstance'            : 'interp_cppyy.W_CPPInstance',
+        'addressof'              : 'interp_cppyy.addressof',
+        'bind_object'            : 'interp_cppyy.bind_object',
+    }
+
+    appleveldefs = {
+        'gbl'                    : 'pythonify.gbl',
+        'load_reflection_info'   : 'pythonify.load_reflection_info',
+        'add_pythonization'      : 'pythonify.add_pythonization',
+    }
+
+    def __init__(self, space, *args):
+        "NOT_RPYTHON"
+        MixedModule.__init__(self, space, *args)
+
+        # pythonization functions may be written in RPython, but the interp2app
+        # code generation is not, so give it a chance to run now
+        from pypy.module.cppyy import capi
+        capi.register_pythonizations(space)
diff --git a/pypy/module/cppyy/bench/Makefile b/pypy/module/cppyy/bench/Makefile
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/bench/Makefile
@@ -0,0 +1,29 @@
+all: bench02Dict_reflex.so
+
+ROOTSYS := ${ROOTSYS}
+
+ifeq ($(ROOTSYS),)
+  genreflex=genreflex
+  cppflags=
+else
+  genreflex=$(ROOTSYS)/bin/genreflex
+  cppflags=-I$(ROOTSYS)/include -L$(ROOTSYS)/lib
+endif
+
+PLATFORM := $(shell uname -s)
+ifeq ($(PLATFORM),Darwin)
+  cppflags+=-dynamiclib -single_module -arch x86_64
+endif
+
+ifeq ($(shell $(genreflex) --help | grep -- --with-methptrgetter),)
+  genreflexflags=
+  cppflags2=-O3 -fPIC
+else
+  genreflexflags=--with-methptrgetter
+  cppflags2=-Wno-pmf-conversions -O3 -fPIC
+endif
+
+
+bench02Dict_reflex.so: bench02.h bench02.cxx bench02.xml
+	$(genreflex) bench02.h $(genreflexflags) --selection=bench02.xml -I$(ROOTSYS)/include
+	g++ -o $@ bench02.cxx bench02_rflx.cpp -I$(ROOTSYS)/include -shared -lReflex -lHistPainter `root-config --libs` $(cppflags) $(cppflags2)
diff --git a/pypy/module/cppyy/bench/bench02.cxx b/pypy/module/cppyy/bench/bench02.cxx
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/bench/bench02.cxx
@@ -0,0 +1,79 @@
+#include "bench02.h"
+
+#include "TROOT.h"
+#include "TApplication.h"
+#include "TDirectory.h"
+#include "TInterpreter.h"
+#include "TSystem.h"
+#include "TBenchmark.h"
+#include "TStyle.h"
+#include "TError.h"
+#include "Getline.h"
+#include "TVirtualX.h"
+
+#include "Api.h"
+
+#include <iostream>
+
+TClass *TClass::GetClass(const char*, Bool_t, Bool_t) {
+    static TClass* dummy = new TClass("__dummy__", kTRUE);
+    return dummy;  // is deleted by gROOT at shutdown
+}
+
+class TTestApplication : public TApplication {
+public:
+    TTestApplication(
+        const char* acn, Int_t* argc, char** argv, Bool_t bLoadLibs = kTRUE);
+    virtual ~TTestApplication();
+};
+
+TTestApplication::TTestApplication(
+        const char* acn, int* argc, char** argv, bool do_load) : TApplication(acn, argc, argv) {
+    if (do_load) {
+        // follow TRint to minimize differences with CINT
+        ProcessLine("#include <iostream>", kTRUE);
+        ProcessLine("#include <_string>",  kTRUE); // for std::string iostream.
+        ProcessLine("#include <vector>",   kTRUE); // needed because they're used within the
+        ProcessLine("#include <pair>",     kTRUE); //  core ROOT dicts and CINT won't be able
+                                                   //  to properly unload these files
+    }
+
+    // save current interpreter context
+    gInterpreter->SaveContext();
+    gInterpreter->SaveGlobalsContext();
+
+    // prevent crashes on accessing history
+    Gl_histinit((char*)"-");
+
+    // prevent ROOT from exiting python
+    SetReturnFromRun(kTRUE);
+}
+
+TTestApplication::~TTestApplication() {}
+
+static const char* appname = "pypy-cppyy";
+
+Bench02RootApp::Bench02RootApp() {
+    gROOT->SetBatch(kTRUE);
+    if (!gApplication) {
+        int argc = 1;
+        char* argv[1]; argv[0] = (char*)appname;
+        gApplication = new TTestApplication(appname, &argc, argv, kFALSE);
+    }
+}
+
+Bench02RootApp::~Bench02RootApp() {
+    // TODO: ROOT globals cleanup ... (?)
+}
+
+void Bench02RootApp::report() {
+    std::cout << "gROOT is: " << gROOT << std::endl;
+    std::cout << "gApplication is: " << gApplication << std::endl;
+}
+
+void Bench02RootApp::close_file(TFile* f) {
+    std::cout << "closing file " << f->GetName() << " ... " << std::endl;
+    f->Write();
+    f->Close();
+    std::cout << "... file closed" << std::endl;
+}
diff --git a/pypy/module/cppyy/bench/bench02.h b/pypy/module/cppyy/bench/bench02.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/bench/bench02.h
@@ -0,0 +1,72 @@
+#include "TString.h"
+
+#include "TCanvas.h"
+#include "TFile.h"
+#include "TProfile.h"
+#include "TNtuple.h"
+#include "TH1F.h"
+#include "TH2F.h"
+#include "TRandom.h"
+#include "TRandom3.h"
+
+#include "TROOT.h"
+#include "TApplication.h"
+#include "TSystem.h"
+
+#include "TArchiveFile.h"
+#include "TBasket.h"
+#include "TBenchmark.h"
+#include "TBox.h"
+#include "TBranchRef.h"
+#include "TBrowser.h"
+#include "TClassGenerator.h"
+#include "TClassRef.h"
+#include "TClassStreamer.h"
+#include "TContextMenu.h"
+#include "TEntryList.h"
+#include "TEventList.h"
+#include "TF1.h"
+#include "TFileCacheRead.h"
+#include "TFileCacheWrite.h"
+#include "TFileMergeInfo.h"
+#include "TFitResult.h"
+#include "TFolder.h"
+//#include "TFormulaPrimitive.h"
+#include "TFunction.h"
+#include "TFrame.h"
+#include "TGlobal.h"
+#include "THashList.h"
+#include "TInetAddress.h"
+#include "TInterpreter.h"
+#include "TKey.h"
+#include "TLegend.h"
+#include "TMethodCall.h"
+#include "TPluginManager.h"
+#include "TProcessUUID.h"
+#include "TSchemaRuleSet.h"
+#include "TStyle.h"
+#include "TSysEvtHandler.h"
+#include "TTimer.h"
+#include "TView.h"
+//#include "TVirtualCollectionProxy.h"
+#include "TVirtualFFT.h"
+#include "TVirtualHistPainter.h"
+#include "TVirtualIndex.h"
+#include "TVirtualIsAProxy.h"
+#include "TVirtualPadPainter.h"
+#include "TVirtualRefProxy.h"
+#include "TVirtualStreamerInfo.h"
+#include "TVirtualViewer3D.h"
+
+#include <typeinfo>
+#include <ostream>
+
+
+class Bench02RootApp {
+public:
+   Bench02RootApp();
+   ~Bench02RootApp();
+
+   void report();
+   void close_file(TFile* f);
+};
diff --git a/pypy/module/cppyy/bench/bench02.xml b/pypy/module/cppyy/bench/bench02.xml
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/bench/bench02.xml
@@ -0,0 +1,41 @@
+<lcgdict>
+
+  <selection>
+
+     <!-- ROOT classes -->
+     <class pattern="T[A-Z]*" />
+     <class pattern="ROOT::T[A-Z]*" />
+     <class pattern="ROOT::Fit::*" />
+
+     <!-- ROOT globals -->
+     <variable name="gROOT" />
+     <variable name="gSystem" />
+     <variable name="gRandom" />
+
+     <!-- STL classes actually used -->
+     <class name="std::string" />
+     <class name="std::ostream" />
+     <class name="std::type_info" />
+     <class pattern="std::vector<*>" />
+     <class pattern="std::_Vector_base<*>" />
+
+     <!-- helper -->
+     <class name="Bench02RootApp" />
+
+  </selection>
+
+  <exclusion>
+
+     <struct pattern="TString::*" />
+     <class name="TString" >
+         <field name="fRep" transient="true"/>
+     </class>
+
+     <class name="TUUID::uuid_time_t" />
+
+     <class name="TClass::TNameMapNode" />
+     <class name="TFileOpenHandle" />
+
+  </exclusion>
+
+</lcgdict>
diff --git a/pypy/module/cppyy/bench/hsimple.C b/pypy/module/cppyy/bench/hsimple.C
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/bench/hsimple.C
@@ -0,0 +1,109 @@
+#include <TFile.h>
+#include <TNtuple.h>
+#include <TH2.h>
+#include <TProfile.h>
+#include <TCanvas.h>
+#include <TFrame.h>
+#include <TROOT.h>
+#include <TSystem.h>
+#include <TRandom3.h>
+#include <TBenchmark.h>
+#include <TInterpreter.h>
+
+TFile *hsimple(Int_t get=0)
+{
+//  This program creates :
+//    - a one dimensional histogram
+//    - a two dimensional histogram
+//    - a profile histogram
+//    - a memory-resident ntuple
+//
+//  These objects are filled with some random numbers and saved on a file.
+//  If get=1 the macro returns a pointer to the TFile of "hsimple.root"
+//          if this file exists, otherwise it is created.
+//  The file "hsimple.root" is created in $ROOTSYS/tutorials if the caller has
+//  write access to this directory, otherwise the file is created in $PWD
+
+   TString filename = "hsimple.root";
+   TString dir = gSystem->UnixPathName(gInterpreter->GetCurrentMacroName());
+   dir.ReplaceAll("hsimple.C","");
+   dir.ReplaceAll("/./","/");
+   TFile *hfile = 0;
+   if (get) {
+      // if the argument get =1 return the file "hsimple.root"
+      // if the file does not exist, it is created
+      TString fullPath = dir+"hsimple.root";
+      if (!gSystem->AccessPathName(fullPath,kFileExists)) {
+	 hfile = TFile::Open(fullPath); //in $ROOTSYS/tutorials
+         if (hfile) return hfile;
+      }
+      //otherwise try $PWD/hsimple.root
+      if (!gSystem->AccessPathName("hsimple.root",kFileExists)) {
+         hfile = TFile::Open("hsimple.root"); //in current dir
+         if (hfile) return hfile;
+      }
+   }
+   //no hsimple.root file found. Must generate it !
+   //generate hsimple.root in $ROOTSYS/tutorials if we have write access
+   if (!gSystem->AccessPathName(dir,kWritePermission)) {
+      filename = dir+"hsimple.root";
+   } else if (!gSystem->AccessPathName(".",kWritePermission)) {
+      //otherwise generate hsimple.root in the current directory
+   } else {
+      printf("you must run the script in a directory with write access\n");
+      return 0;
+   }
+   hfile = (TFile*)gROOT->FindObject(filename); if (hfile) hfile->Close();
+   hfile = new TFile(filename,"RECREATE","Demo ROOT file with histograms");
+
+   // Create some histograms, a profile histogram and an ntuple
+   TH1F *hpx = new TH1F("hpx","This is the px distribution",100,-4,4);
+   hpx->SetFillColor(48);
+   TH2F *hpxpy = new TH2F("hpxpy","py vs px",40,-4,4,40,-4,4);
+   TProfile *hprof = new TProfile("hprof","Profile of pz versus px",100,-4,4,0,20);
+   TNtuple *ntuple = new TNtuple("ntuple","Demo ntuple","px:py:pz:random:i");
+
+   gBenchmark->Start("hsimple");
+  
+   // Create a new canvas.
+   TCanvas *c1 = new TCanvas("c1","Dynamic Filling Example",200,10,700,500);
+   c1->SetFillColor(42);
+   c1->GetFrame()->SetFillColor(21);
+   c1->GetFrame()->SetBorderSize(6);
+   c1->GetFrame()->SetBorderMode(-1);
+
+
+   // Fill histograms randomly
+   TRandom3 random;
+   Float_t px, py, pz;
+   const Int_t kUPDATE = 1000;
+   for (Int_t i = 0; i < 50000; i++) {
+   //      random.Rannor(px,py);
+      px = random.Gaus(0, 1);
+      py = random.Gaus(0, 1);
+      pz = px*px + py*py;
+      Float_t rnd = random.Rndm(1);
+      hpx->Fill(px);
+      hpxpy->Fill(px,py);
+      hprof->Fill(px,pz);
+      ntuple->Fill(px,py,pz,rnd,i);
+      if (i && (i%kUPDATE) == 0) {
+         if (i == kUPDATE) hpx->Draw();
+         c1->Modified();
+         c1->Update();
+         if (gSystem->ProcessEvents())
+            break;
+      }
+   }
+   gBenchmark->Show("hsimple");
+
+   // Save all objects in this file
+   hpx->SetFillColor(0);
+   hfile->Write();
+   hpx->SetFillColor(48);
+   c1->Modified();
+   return hfile;
+  
+// Note that the file is automatically close when application terminates
+// or when the file destructor is called.
+}
diff --git a/pypy/module/cppyy/bench/hsimple.py b/pypy/module/cppyy/bench/hsimple.py
new file mode 100755
--- /dev/null
+++ b/pypy/module/cppyy/bench/hsimple.py
@@ -0,0 +1,110 @@
+#*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
+#*-*
+#*-*  This program creates :
+#*-*    - a one dimensional histogram
+#*-*    - a two dimensional histogram
+#*-*    - a profile histogram
+#*-*    - a memory-resident ntuple
+#*-*
+#*-*  These objects are filled with some random numbers and saved on a file.
+#*-*
+#*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
+
+_reflex = True     # to keep things equal, set to False for full macro
+
+try:
+    import cppyy, random
+
+    if not hasattr(cppyy.gbl, 'gROOT'):
+        cppyy.load_reflection_info('bench02Dict_reflex.so')
+        _reflex = True
+
+    TCanvas  = cppyy.gbl.TCanvas
+    TFile    = cppyy.gbl.TFile
+    TProfile = cppyy.gbl.TProfile
+    TNtuple  = cppyy.gbl.TNtuple
+    TH1F     = cppyy.gbl.TH1F
+    TH2F     = cppyy.gbl.TH2F
+    TRandom3 = cppyy.gbl.TRandom3
+
+    gROOT      = cppyy.gbl.gROOT
+    gBenchmark = cppyy.gbl.TBenchmark()
+    gSystem    = cppyy.gbl.gSystem
+
+except ImportError:
+    from ROOT import TCanvas, TFile, TProfile, TNtuple, TH1F, TH2F, TRandom3
+    from ROOT import gROOT, gBenchmark, gSystem
+    import random
+
+if _reflex:
+   gROOT.SetBatch(True)
+
+# Create a new ROOT binary machine independent file.
+# Note that this file may contain any kind of ROOT objects, histograms,
+# pictures, graphics objects, detector geometries, tracks, events, etc..
+# This file is now becoming the current directory.
+
+if not _reflex:
+    hfile = gROOT.FindObject('hsimple.root')
+    if hfile:
+        hfile.Close()
+    hfile = TFile('hsimple.root', 'RECREATE', 'Demo ROOT file with histograms' )
+
+# Create some histograms, a profile histogram and an ntuple
+hpx    = TH1F('hpx', 'This is the px distribution', 100, -4, 4)
+hpx.SetFillColor(48)
+hpxpy  = TH2F('hpxpy', 'py vs px', 40, -4, 4, 40, -4, 4)
+hprof  = TProfile('hprof', 'Profile of pz versus px', 100, -4, 4, 0, 20)
+if not _reflex:
+    ntuple = TNtuple('ntuple', 'Demo ntuple', 'px:py:pz:random:i')
+
+gBenchmark.Start('hsimple')
+
+# Create a new canvas, and customize it.
+c1 = TCanvas('c1', 'Dynamic Filling Example', 200, 10, 700, 500)
+c1.SetFillColor(42)
+c1.GetFrame().SetFillColor(21)
+c1.GetFrame().SetBorderSize(6)
+c1.GetFrame().SetBorderMode(-1)
+
+# Fill histograms randomly.
+random = TRandom3()
+kUPDATE = 1000
+for i in xrange(50000):
+    # Generate random numbers
+#    px, py = random.gauss(0, 1), random.gauss(0, 1)
+    px, py = random.Gaus(0, 1), random.Gaus(0, 1)
+    pz = px*px + py*py
+#    rnd = random.random()
+    rnd = random.Rndm(1)
+
+    # Fill histograms
+    hpx.Fill(px)
+    hpxpy.Fill(px, py)
+    hprof.Fill(px, pz)
+    if not _reflex:
+        ntuple.Fill(px, py, pz, rnd, i)
+
+    # Update display every kUPDATE events
+    if i and i%kUPDATE == 0:
+        if i == kUPDATE:
+            hpx.Draw()
+
+        c1.Modified(True)
+        c1.Update()
+
+        if gSystem.ProcessEvents():          # allow user interrupt
+            break
+
+gBenchmark.Show( 'hsimple' )
+
+# Save all objects in this file
+hpx.SetFillColor(0)
+if not _reflex:
+    hfile.Write()
+hpx.SetFillColor(48)
+c1.Modified(True)
+c1.Update()
+
+# Note that the file is automatically closed when application terminates
+# or when the file destructor is called.
diff --git a/pypy/module/cppyy/bench/hsimple_rflx.py b/pypy/module/cppyy/bench/hsimple_rflx.py
new file mode 100755
--- /dev/null
+++ b/pypy/module/cppyy/bench/hsimple_rflx.py
@@ -0,0 +1,120 @@
+#*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
+#*-*
+#*-*  This program creates :
+#*-*    - a one dimensional histogram
+#*-*    - a two dimensional histogram
+#*-*    - a profile histogram
+#*-*    - a memory-resident ntuple
+#*-*
+#*-*  These objects are filled with some random numbers and saved on a file.
+#*-*
+#*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
+
+try:
+    import warnings
+    warnings.simplefilter("ignore")
+
+    import cppyy, random
+    cppyy.load_reflection_info('bench02Dict_reflex.so')
+
+    app      = cppyy.gbl.Bench02RootApp()
+    TCanvas  = cppyy.gbl.TCanvas
+    TFile    = cppyy.gbl.TFile
+    TProfile = cppyy.gbl.TProfile
+    TNtuple  = cppyy.gbl.TNtuple
+    TH1F     = cppyy.gbl.TH1F
+    TH2F     = cppyy.gbl.TH2F
+    TRandom  = cppyy.gbl.TRandom
+except ImportError:
+    from ROOT import TCanvas, TFile, TProfile, TNtuple, TH1F, TH2F, TRandom
+    import random
+
+import math
+
+#gROOT      = cppyy.gbl.gROOT
+#gBenchmark = cppyy.gbl.gBenchmark
+#gRandom    = cppyy.gbl.gRandom
+#gSystem    = cppyy.gbl.gSystem
+
+#gROOT.Reset()
+
+# Create a new canvas, and customize it.
+#c1 = TCanvas( 'c1', 'Dynamic Filling Example', 200, 10, 700, 500 )
+#c1.SetFillColor( 42 )
+#c1.GetFrame().SetFillColor( 21 )
+#c1.GetFrame().SetBorderSize( 6 )
+#c1.GetFrame().SetBorderMode( -1 )
+
+# Create a new ROOT binary machine independent file.
+# Note that this file may contain any kind of ROOT objects, histograms,
+# pictures, graphics objects, detector geometries, tracks, events, etc..
+# This file is now becoming the current directory.
+
+#hfile = gROOT.FindObject( 'hsimple.root' )
+#if hfile:
+#   hfile.Close()
+#hfile = TFile( 'hsimple.root', 'RECREATE', 'Demo ROOT file with histograms' )
+
+# Create some histograms, a profile histogram and an ntuple
+hpx    = TH1F('hpx', 'This is the px distribution', 100, -4, 4)
+hpx.Print()
+#hpxpy  = TH2F( 'hpxpy', 'py vs px', 40, -4, 4, 40, -4, 4 )
+#hprof  = TProfile( 'hprof', 'Profile of pz versus px', 100, -4, 4, 0, 20 )
+#ntuple = TNtuple( 'ntuple', 'Demo ntuple', 'px:py:pz:random:i' )
+
+# Set canvas/frame attributes.
+#hpx.SetFillColor( 48 )
+
+#gBenchmark.Start( 'hsimple' )
+
+# Initialize random number generator.
+#gRandom.SetSeed()
+#rannor, rndm = gRandom.Rannor, gRandom.Rndm
+
+random = TRandom()
+random.SetSeed(0)
+
+# Fill histograms randomly.
+#px, py = Double(), Double()
+kUPDATE = 1000
+for i in xrange(2500000):
+ # Generate random values.
+#   px, py = random.gauss(0, 1), random.gauss(0, 1)
+   px, py = random.Gaus(0, 1), random.Gaus(0, 1)
+#   pt = (px*px + py*py)**0.5
+   pt = math.sqrt(px*px + py*py)
+#   pt = (px*px + py*py)
+#   random = rndm(1)
+
+ # Fill histograms.
+   hpx.Fill(pt)
+#   hpxpyFill( px, py )
+#   hprofFill( px, pz )
+#   ntupleFill( px, py, pz, random, i )
+
+ # Update display every kUPDATE events.
+#   if i and i%kUPDATE == 0:
+#      if i == kUPDATE:
+#         hpx.Draw()
+
+#      c1.Modified()
+#      c1.Update()
+
+#      if gSystem.ProcessEvents():            # allow user interrupt
+#         break
+
+#gBenchmark.Show( 'hsimple' )
+
+hpx.Print() 
+
+# Save all objects in this file.
+#hpx.SetFillColor( 0 )
+#hfile.Write()
+#hfile.Close()
+#hpx.SetFillColor( 48 )
+#c1.Modified()
+#c1.Update()
+#c1.Draw()
+
+# Note that the file is automatically closed when application terminates
+# or when the file destructor is called.
diff --git a/pypy/module/cppyy/capi/__init__.py b/pypy/module/cppyy/capi/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/capi/__init__.py
@@ -0,0 +1,483 @@
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rlib import jit
+
+import reflex_capi as backend
+#import cint_capi as backend
+
+identify  = backend.identify
+pythonize = backend.pythonize
+register_pythonizations = backend.register_pythonizations
+
+ts_reflect = backend.ts_reflect
+ts_call    = backend.ts_call
+ts_memory  = backend.ts_memory
+ts_helper  = backend.ts_helper
+
+_C_OPAQUE_PTR = rffi.LONG
+_C_OPAQUE_NULL = lltype.nullptr(rffi.LONGP.TO)# ALT: _C_OPAQUE_PTR.TO
+
+C_SCOPE = _C_OPAQUE_PTR
+C_NULL_SCOPE = rffi.cast(C_SCOPE, _C_OPAQUE_NULL)
+
+C_TYPE = C_SCOPE
+C_NULL_TYPE = C_NULL_SCOPE
+
+C_OBJECT = _C_OPAQUE_PTR
+C_NULL_OBJECT = rffi.cast(C_OBJECT, _C_OPAQUE_NULL)
+
+C_METHOD = _C_OPAQUE_PTR
+C_INDEX = rffi.LONG
+WLAVC_INDEX = rffi.LONG
+
+C_METHPTRGETTER = lltype.FuncType([C_OBJECT], rffi.VOIDP)
+C_METHPTRGETTER_PTR = lltype.Ptr(C_METHPTRGETTER)
+
+def direct_ptradd(ptr, offset):
+    offset = rffi.cast(rffi.SIZE_T, offset)
+    jit.promote(offset)
+    assert lltype.typeOf(ptr) == C_OBJECT
+    address = rffi.cast(rffi.CCHARP, ptr)
+    return rffi.cast(C_OBJECT, lltype.direct_ptradd(address, offset))
+
+c_load_dictionary = backend.c_load_dictionary
+
+# name to opaque C++ scope representation ------------------------------------
+_c_num_scopes = rffi.llexternal(
+    "cppyy_num_scopes",
+    [C_SCOPE], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_num_scopes(cppscope):
+    return _c_num_scopes(cppscope.handle)
+_c_scope_name = rffi.llexternal(
+    "cppyy_scope_name",
+    [C_SCOPE, rffi.INT], rffi.CCHARP,
+    compilation_info = backend.eci)
+def c_scope_name(cppscope, iscope):
+    return charp2str_free(_c_scope_name(cppscope.handle, iscope))
+
+_c_resolve_name = rffi.llexternal(
+    "cppyy_resolve_name",
+    [rffi.CCHARP], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_resolve_name(name):
+    return charp2str_free(_c_resolve_name(name))
+c_get_scope_opaque = rffi.llexternal(
+    "cppyy_get_scope",
+    [rffi.CCHARP], C_SCOPE,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+c_get_template = rffi.llexternal(
+    "cppyy_get_template",
+    [rffi.CCHARP], C_TYPE,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+_c_actual_class = rffi.llexternal(
+    "cppyy_actual_class",
+    [C_TYPE, C_OBJECT], C_TYPE,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_actual_class(cppclass, cppobj):
+    return _c_actual_class(cppclass.handle, cppobj)
+
+# memory management ----------------------------------------------------------
+_c_allocate = rffi.llexternal(
+    "cppyy_allocate",
+    [C_TYPE], C_OBJECT,
+    threadsafe=ts_memory,
+    compilation_info=backend.eci)
+def c_allocate(cppclass):
+    return _c_allocate(cppclass.handle)
+_c_deallocate = rffi.llexternal(
+    "cppyy_deallocate",
+    [C_TYPE, C_OBJECT], lltype.Void,
+    threadsafe=ts_memory,
+    compilation_info=backend.eci)
+def c_deallocate(cppclass, cppobject):
+    _c_deallocate(cppclass.handle, cppobject)
+_c_destruct = rffi.llexternal(
+    "cppyy_destruct",
+    [C_TYPE, C_OBJECT], lltype.Void,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+def c_destruct(cppclass, cppobject):
+    _c_destruct(cppclass.handle, cppobject)
+
+# method/function dispatching ------------------------------------------------
+c_call_v = rffi.llexternal(
+    "cppyy_call_v",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], lltype.Void,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_b = rffi.llexternal(
+    "cppyy_call_b",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.UCHAR,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_c = rffi.llexternal(
+    "cppyy_call_c",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.CHAR,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_h = rffi.llexternal(
+    "cppyy_call_h",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.SHORT,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_i = rffi.llexternal(
+    "cppyy_call_i",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.INT,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_l = rffi.llexternal(
+    "cppyy_call_l",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.LONG,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_ll = rffi.llexternal(
+    "cppyy_call_ll",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.LONGLONG,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_f = rffi.llexternal(
+    "cppyy_call_f",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.FLOAT,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_d = rffi.llexternal(
+    "cppyy_call_d",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.DOUBLE,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+
+c_call_r = rffi.llexternal(
+    "cppyy_call_r",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.VOIDP,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+c_call_s = rffi.llexternal(
+    "cppyy_call_s",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], rffi.CCHARP,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+
+c_constructor = rffi.llexternal(
+    "cppyy_constructor",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP], lltype.Void,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+_c_call_o = rffi.llexternal(
+    "cppyy_call_o",
+    [C_METHOD, C_OBJECT, rffi.INT, rffi.VOIDP, C_TYPE], rffi.LONG,
+    threadsafe=ts_call,
+    compilation_info=backend.eci)
+def c_call_o(method, cppobj, nargs, args, cppclass):
+    return _c_call_o(method, cppobj, nargs, args, cppclass.handle)
+
+_c_get_methptr_getter = rffi.llexternal(
+    "cppyy_get_methptr_getter",
+    [C_SCOPE, C_INDEX], C_METHPTRGETTER_PTR,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci,
+    elidable_function=True)
+def c_get_methptr_getter(cppscope, index):
+    return _c_get_methptr_getter(cppscope.handle, index)
+
+# handling of function argument buffer ---------------------------------------
+c_allocate_function_args = rffi.llexternal(
+    "cppyy_allocate_function_args",
+    [rffi.SIZE_T], rffi.VOIDP,
+    threadsafe=ts_memory,
+    compilation_info=backend.eci)
+c_deallocate_function_args = rffi.llexternal(
+    "cppyy_deallocate_function_args",
+    [rffi.VOIDP], lltype.Void,
+    threadsafe=ts_memory,
+    compilation_info=backend.eci)
+c_function_arg_sizeof = rffi.llexternal(
+    "cppyy_function_arg_sizeof",
+    [], rffi.SIZE_T,
+    threadsafe=ts_memory,
+    compilation_info=backend.eci,
+    elidable_function=True)
+c_function_arg_typeoffset = rffi.llexternal(
+    "cppyy_function_arg_typeoffset",
+    [], rffi.SIZE_T,
+    threadsafe=ts_memory,
+    compilation_info=backend.eci,
+    elidable_function=True)
+
+# scope reflection information -----------------------------------------------
+c_is_namespace = rffi.llexternal(
+    "cppyy_is_namespace",
+    [C_SCOPE], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+c_is_enum = rffi.llexternal(
+    "cppyy_is_enum",
+    [rffi.CCHARP], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+
+# type/class reflection information ------------------------------------------
+_c_final_name = rffi.llexternal(
+    "cppyy_final_name",
+    [C_TYPE], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_final_name(cpptype):
+    return charp2str_free(_c_final_name(cpptype))
+_c_scoped_final_name = rffi.llexternal(
+    "cppyy_scoped_final_name",
+    [C_TYPE], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_scoped_final_name(cpptype):
+    return charp2str_free(_c_scoped_final_name(cpptype))
+c_has_complex_hierarchy = rffi.llexternal(
+    "cppyy_has_complex_hierarchy",
+    [C_TYPE], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+_c_num_bases = rffi.llexternal(
+    "cppyy_num_bases",
+    [C_TYPE], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_num_bases(cppclass):
+    return _c_num_bases(cppclass.handle)
+_c_base_name = rffi.llexternal(
+    "cppyy_base_name",
+    [C_TYPE, rffi.INT], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_base_name(cppclass, base_index):
+    return charp2str_free(_c_base_name(cppclass.handle, base_index))
+_c_is_subtype = rffi.llexternal(
+    "cppyy_is_subtype",
+    [C_TYPE, C_TYPE], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci,
+    elidable_function=True)
+ at jit.elidable_promote()
+def c_is_subtype(derived, base):
+    if derived == base:
+        return 1
+    return _c_is_subtype(derived.handle, base.handle)
+
+_c_base_offset = rffi.llexternal(
+    "cppyy_base_offset",
+    [C_TYPE, C_TYPE, C_OBJECT, rffi.INT], rffi.SIZE_T,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci,
+    elidable_function=True)
+ at jit.elidable_promote()
+def c_base_offset(derived, base, address, direction):
+    if derived == base:
+        return 0
+    return _c_base_offset(derived.handle, base.handle, address, direction)
+
+# method/function reflection information -------------------------------------
+_c_num_methods = rffi.llexternal(
+    "cppyy_num_methods",
+    [C_SCOPE], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_num_methods(cppscope):
+    return _c_num_methods(cppscope.handle)
+_c_method_index_at = rffi.llexternal(
+    "cppyy_method_index_at",
+    [C_SCOPE, rffi.INT], C_INDEX,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_index_at(cppscope, imethod):
+    return _c_method_index_at(cppscope.handle, imethod)
+_c_method_index_from_name = rffi.llexternal(
+    "cppyy_method_index_from_name",
+    [C_SCOPE, rffi.CCHARP], C_INDEX,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_index_from_name(cppscope, name):
+    return _c_method_index_from_name(cppscope.handle, name)
+
+_c_method_name = rffi.llexternal(
+    "cppyy_method_name",
+    [C_SCOPE, C_INDEX], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_name(cppscope, index):
+    return charp2str_free(_c_method_name(cppscope.handle, index))
+_c_method_result_type = rffi.llexternal(
+    "cppyy_method_result_type",
+    [C_SCOPE, C_INDEX], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_result_type(cppscope, index):
+    return charp2str_free(_c_method_result_type(cppscope.handle, index))
+_c_method_num_args = rffi.llexternal(
+    "cppyy_method_num_args",
+    [C_SCOPE, C_INDEX], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_num_args(cppscope, index):
+    return _c_method_num_args(cppscope.handle, index)
+_c_method_req_args = rffi.llexternal(
+    "cppyy_method_req_args",
+    [C_SCOPE, C_INDEX], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_req_args(cppscope, index):
+    return _c_method_req_args(cppscope.handle, index)
+_c_method_arg_type = rffi.llexternal(
+    "cppyy_method_arg_type",
+    [C_SCOPE, C_INDEX, rffi.INT], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_arg_type(cppscope, index, arg_index):
+    return charp2str_free(_c_method_arg_type(cppscope.handle, index, arg_index))
+_c_method_arg_default = rffi.llexternal(
+    "cppyy_method_arg_default",
+    [C_SCOPE, C_INDEX, rffi.INT], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_arg_default(cppscope, index, arg_index):
+    return charp2str_free(_c_method_arg_default(cppscope.handle, index, arg_index))
+_c_method_signature = rffi.llexternal(
+    "cppyy_method_signature",
+    [C_SCOPE, C_INDEX], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_method_signature(cppscope, index):
+    return charp2str_free(_c_method_signature(cppscope.handle, index))
+
+_c_get_method = rffi.llexternal(
+    "cppyy_get_method",
+    [C_SCOPE, C_INDEX], C_METHOD,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_get_method(cppscope, index):
+    return _c_get_method(cppscope.handle, index)
+_c_get_global_operator = rffi.llexternal(
+    "cppyy_get_global_operator",
+    [C_SCOPE, C_SCOPE, C_SCOPE, rffi.CCHARP], WLAVC_INDEX,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_get_global_operator(nss, lc, rc, op):
+    if nss is not None:
+        return _c_get_global_operator(nss.handle, lc.handle, rc.handle, op)
+    return rffi.cast(WLAVC_INDEX, -1)
+
+# method properties ----------------------------------------------------------
+_c_is_constructor = rffi.llexternal(
+    "cppyy_is_constructor",
+    [C_TYPE, C_INDEX], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_is_constructor(cppclass, index):
+    return _c_is_constructor(cppclass.handle, index)
+_c_is_staticmethod = rffi.llexternal(
+    "cppyy_is_staticmethod",
+    [C_TYPE, C_INDEX], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_is_staticmethod(cppclass, index):
+    return _c_is_staticmethod(cppclass.handle, index)
+
+# data member reflection information -----------------------------------------
+_c_num_datamembers = rffi.llexternal(
+    "cppyy_num_datamembers",
+    [C_SCOPE], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_num_datamembers(cppscope):
+    return _c_num_datamembers(cppscope.handle)
+_c_datamember_name = rffi.llexternal(
+    "cppyy_datamember_name",
+    [C_SCOPE, rffi.INT], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_datamember_name(cppscope, datamember_index):
+    return charp2str_free(_c_datamember_name(cppscope.handle, datamember_index))
+_c_datamember_type = rffi.llexternal(
+    "cppyy_datamember_type",
+    [C_SCOPE, rffi.INT], rffi.CCHARP,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_datamember_type(cppscope, datamember_index):
+    return charp2str_free(_c_datamember_type(cppscope.handle, datamember_index))
+_c_datamember_offset = rffi.llexternal(
+    "cppyy_datamember_offset",
+    [C_SCOPE, rffi.INT], rffi.SIZE_T,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_datamember_offset(cppscope, datamember_index):
+    return _c_datamember_offset(cppscope.handle, datamember_index)
+
+_c_datamember_index = rffi.llexternal(
+    "cppyy_datamember_index",
+    [C_SCOPE, rffi.CCHARP], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_datamember_index(cppscope, name):
+    return _c_datamember_index(cppscope.handle, name)
+
+# data member properties -----------------------------------------------------
+_c_is_publicdata = rffi.llexternal(
+    "cppyy_is_publicdata",
+    [C_SCOPE, rffi.INT], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_is_publicdata(cppscope, datamember_index):
+    return _c_is_publicdata(cppscope.handle, datamember_index)
+_c_is_staticdata = rffi.llexternal(
+    "cppyy_is_staticdata",
+    [C_SCOPE, rffi.INT], rffi.INT,
+    threadsafe=ts_reflect,
+    compilation_info=backend.eci)
+def c_is_staticdata(cppscope, datamember_index):
+    return _c_is_staticdata(cppscope.handle, datamember_index)
+
+# misc helpers ---------------------------------------------------------------
+c_strtoll = rffi.llexternal(
+    "cppyy_strtoll",
+    [rffi.CCHARP], rffi.LONGLONG,
+    threadsafe=ts_helper,
+    compilation_info=backend.eci)
+c_strtoull = rffi.llexternal(
+    "cppyy_strtoull",
+    [rffi.CCHARP], rffi.ULONGLONG,
+    threadsafe=ts_helper,
+    compilation_info=backend.eci)
+c_free = rffi.llexternal(
+    "cppyy_free",
+    [rffi.VOIDP], lltype.Void,
+    threadsafe=ts_memory,
+    compilation_info=backend.eci)
+
+def charp2str_free(charp):
+    string = rffi.charp2str(charp)
+    voidp = rffi.cast(rffi.VOIDP, charp)
+    c_free(voidp)
+    return string
+
+c_charp2stdstring = rffi.llexternal(
+    "cppyy_charp2stdstring",
+    [rffi.CCHARP], C_OBJECT,
+    threadsafe=ts_helper,
+    compilation_info=backend.eci)
+c_stdstring2stdstring = rffi.llexternal(
+    "cppyy_stdstring2stdstring",
+    [C_OBJECT], C_OBJECT,
+    threadsafe=ts_helper,
+    compilation_info=backend.eci)
+c_assign2stdstring = rffi.llexternal(
+    "cppyy_assign2stdstring",
+    [C_OBJECT, rffi.CCHARP], lltype.Void,
+    threadsafe=ts_helper,
+    compilation_info=backend.eci)
+c_free_stdstring = rffi.llexternal(
+    "cppyy_free_stdstring",
+    [C_OBJECT], lltype.Void,
+    threadsafe=ts_helper,
+    compilation_info=backend.eci)
diff --git a/pypy/module/cppyy/capi/cint_capi.py b/pypy/module/cppyy/capi/cint_capi.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/capi/cint_capi.py
@@ -0,0 +1,236 @@
+import py, os, sys
+
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef
+from pypy.interpreter.baseobjspace import Wrappable
+
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.rpython.lltypesystem import rffi
+from pypy.rlib import libffi, rdynload
+
+from pypy.module.itertools import interp_itertools
+
+
+__all__ = ['identify', 'eci', 'c_load_dictionary']
+
+pkgpath = py.path.local(__file__).dirpath().join(os.pardir)
+srcpath = pkgpath.join("src")
+incpath = pkgpath.join("include")
+
+if os.environ.get("ROOTSYS"):
+    import commands
+    (stat, incdir) = commands.getstatusoutput("root-config --incdir")
+    if stat != 0:        # presumably Reflex-only
+        rootincpath = [os.path.join(os.environ["ROOTSYS"], "include")]
+        rootlibpath = [os.path.join(os.environ["ROOTSYS"], "lib64"), os.path.join(os.environ["ROOTSYS"], "lib")]
+    else:
+        rootincpath = [incdir]
+        rootlibpath = commands.getoutput("root-config --libdir").split()
+else:
+    rootincpath = []
+    rootlibpath = []
+
+def identify():
+    return 'CINT'
+
+ts_reflect = False
+ts_call    = False
+ts_memory  = 'auto'
+ts_helper  = 'auto'
+
+# force loading in global mode of core libraries, rather than linking with
+# them as PyPy uses various version of dlopen in various places; note that
+# this isn't going to fly on Windows (note that locking them in objects and
+# calling dlclose in __del__ seems to come too late, so this'll do for now)
+with rffi.scoped_str2charp('libCint.so') as ll_libname:
+    _cintdll = rdynload.dlopen(ll_libname, rdynload.RTLD_GLOBAL | rdynload.RTLD_NOW)
+with rffi.scoped_str2charp('libCore.so') as ll_libname:
+    _coredll = rdynload.dlopen(ll_libname, rdynload.RTLD_GLOBAL | rdynload.RTLD_NOW)
+
+eci = ExternalCompilationInfo(
+    separate_module_files=[srcpath.join("cintcwrapper.cxx")],
+    include_dirs=[incpath] + rootincpath,
+    includes=["cintcwrapper.h"],
+    library_dirs=rootlibpath,
+    link_extra=["-lCore", "-lCint"],
+    use_cpp_linker=True,
+)
+
+_c_load_dictionary = rffi.llexternal(
+    "cppyy_load_dictionary",
+    [rffi.CCHARP], rdynload.DLLHANDLE,
+    threadsafe=False,
+    compilation_info=eci)
+
+def c_load_dictionary(name):
+    result = _c_load_dictionary(name)
+    if not result:
+        err = rdynload.dlerror()
+        raise rdynload.DLOpenError(err)
+    return libffi.CDLL(name)       # should return handle to already open file
+
+
+# CINT-specific pythonizations ===============================================
+
+### TTree --------------------------------------------------------------------
+_ttree_Branch = rffi.llexternal(
+    "cppyy_ttree_Branch",
+    [rffi.VOIDP, rffi.CCHARP, rffi.CCHARP, rffi.VOIDP, rffi.INT, rffi.INT], rffi.LONG,
+    threadsafe=False,
+    compilation_info=eci)
+
+ at unwrap_spec(args_w='args_w')
+def ttree_Branch(space, w_self, args_w):
+    """Pythonized version of TTree::Branch(): takes proxy objects and by-passes
+    the CINT-manual layer."""
+
+    from pypy.module.cppyy import interp_cppyy
+    tree_class = interp_cppyy.scope_byname(space, "TTree")
+
+    # sigs to modify (and by-pass CINT):
+    #  1. (const char*, const char*, T**,               Int_t=32000, Int_t=99)
+    #  2. (const char*, T**,                            Int_t=32000, Int_t=99)
+    argc = len(args_w)
+
+    # basic error handling of wrong arguments is best left to the original call,
+    # so that error messages etc. remain consistent in appearance: the following
+    # block may raise TypeError or IndexError to break out anytime
+
+    try:
+        if argc < 2 or 5 < argc:
+            raise TypeError("wrong number of arguments")
+
+        tree = space.interp_w(interp_cppyy.W_CPPInstance, w_self, can_be_None=True)
+        if (tree is None) or (tree.cppclass != tree_class):
+            raise TypeError("not a TTree")
+
+        # first argument must always always be cont char*
+        branchname = space.str_w(args_w[0])
+
+        # if args_w[1] is a classname, then case 1, else case 2
+        try:
+            classname = space.str_w(args_w[1])
+            addr_idx  = 2
+            w_address = args_w[addr_idx]
+        except OperationError:
+            addr_idx  = 1
+            w_address = args_w[addr_idx]
+
+        bufsize, splitlevel = 32000, 99
+        if addr_idx+1 < argc: bufsize = space.c_int_w(args_w[addr_idx+1])
+        if addr_idx+2 < argc: splitlevel = space.c_int_w(args_w[addr_idx+2])
+
+        # now retrieve the W_CPPInstance and build other stub arguments
+        space = tree.space    # holds the class cache in State
+        cppinstance = space.interp_w(interp_cppyy.W_CPPInstance, w_address)
+        address = rffi.cast(rffi.VOIDP, cppinstance.get_rawobject())
+        klassname = cppinstance.cppclass.full_name()
+        vtree = rffi.cast(rffi.VOIDP, tree.get_rawobject())
+
+        # call the helper stub to by-pass CINT
+        vbranch = _ttree_Branch(vtree, branchname, klassname, address, bufsize, splitlevel)
+        branch_class = interp_cppyy.scope_byname(space, "TBranch")
+        w_branch = interp_cppyy.wrap_cppobject(
+            space, space.w_None, branch_class, vbranch, isref=False, python_owns=False)
+        return w_branch
+    except (OperationError, TypeError, IndexError), e:
+        pass
+
+    # return control back to the original, unpythonized overload
+    return tree_class.get_overload("Branch").call(w_self, args_w)
+
+def activate_branch(space, w_branch):
+    w_branches = space.call_method(w_branch, "GetListOfBranches")
+    for i in range(space.int_w(space.call_method(w_branches, "GetEntriesFast"))):
+        w_b = space.call_method(w_branches, "At", space.wrap(i))
+        activate_branch(space, w_b)
+    space.call_method(w_branch, "SetStatus", space.wrap(1))
+    space.call_method(w_branch, "ResetReadEntry")
+
+ at unwrap_spec(args_w='args_w')
+def ttree_getattr(space, w_self, args_w):
+    """Specialized __getattr__ for TTree's that allows switching on/off the
+    reading of individual branchs."""
+
+    from pypy.module.cppyy import interp_cppyy
+    tree = space.interp_w(interp_cppyy.W_CPPInstance, w_self)
+
+    # setup branch as a data member and enable it for reading
+    space = tree.space            # holds the class cache in State
+    w_branch = space.call_method(w_self, "GetBranch", args_w[0])
+    w_klassname = space.call_method(w_branch, "GetClassName")
+    klass = interp_cppyy.scope_byname(space, space.str_w(w_klassname))
+    w_obj = klass.construct()
+    #space.call_method(w_branch, "SetStatus", space.wrap(1)) 
+    activate_branch(space, w_branch)
+    space.call_method(w_branch, "SetObject", w_obj)
+    space.call_method(w_branch, "GetEntry", space.wrap(0))
+    space.setattr(w_self, args_w[0], w_obj)
+    return w_obj
+
+class W_TTreeIter(Wrappable):
+    def __init__(self, space, w_tree):
+
+        from pypy.module.cppyy import interp_cppyy
+        tree = space.interp_w(interp_cppyy.W_CPPInstance, w_tree)
+        self.tree = tree.get_cppthis(tree.cppclass)
+        self.w_tree = w_tree
+
+        self.getentry = tree.cppclass.get_overload("GetEntry").functions[0]
+        self.current  = 0
+        self.maxentry = space.int_w(space.call_method(w_tree, "GetEntriesFast"))
+
+        space = self.space = tree.space          # holds the class cache in State
+        space.call_method(w_tree, "SetBranchStatus", space.wrap("*"), space.wrap(0))
+
+    def iter_w(self):
+        return self.space.wrap(self)
+
+    def next_w(self):
+        if self.current == self.maxentry:
+            raise OperationError(self.space.w_StopIteration, self.space.w_None)
+        # TODO: check bytes read?
+        self.getentry.call(self.tree, [self.space.wrap(self.current)])
+        self.current += 1 
+        return self.w_tree
+
+W_TTreeIter.typedef = TypeDef(
+    'TTreeIter',
+    __iter__ = interp2app(W_TTreeIter.iter_w),
+    next = interp2app(W_TTreeIter.next_w),
+)
+
+def ttree_iter(space, w_self):
+    """Allow iteration over TTree's. Also initializes branch data members and
+    sets addresses, if needed."""
+    w_treeiter = W_TTreeIter(space, w_self)
+    return w_treeiter
+
+# setup pythonizations for later use at run-time
+_pythonizations = {}
+def register_pythonizations(space):
+    "NOT_RPYTHON"
+
+    ### TTree
+    _pythonizations['ttree_Branch']  = space.wrap(interp2app(ttree_Branch))
+    _pythonizations['ttree_iter']    = space.wrap(interp2app(ttree_iter))
+    _pythonizations['ttree_getattr'] = space.wrap(interp2app(ttree_getattr))
+
+# callback coming in when app-level bound classes have been created
+def pythonize(space, name, w_pycppclass):
+
+    if name == 'TFile':
+        space.setattr(w_pycppclass, space.wrap("__getattr__"),
+                      space.getattr(w_pycppclass, space.wrap("Get")))
+
+    elif name == 'TTree':
+        space.setattr(w_pycppclass, space.wrap("_unpythonized_Branch"),
+                      space.getattr(w_pycppclass, space.wrap("Branch")))
+        space.setattr(w_pycppclass, space.wrap("Branch"), _pythonizations["ttree_Branch"])
+        space.setattr(w_pycppclass, space.wrap("__iter__"), _pythonizations["ttree_iter"])
+        space.setattr(w_pycppclass, space.wrap("__getattr__"), _pythonizations["ttree_getattr"])
+
+    elif name[0:8] == "TVectorT":    # TVectorT<> template
+        space.setattr(w_pycppclass, space.wrap("__len__"),
+                      space.getattr(w_pycppclass, space.wrap("GetNoElements")))
diff --git a/pypy/module/cppyy/capi/reflex_capi.py b/pypy/module/cppyy/capi/reflex_capi.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/capi/reflex_capi.py
@@ -0,0 +1,52 @@
+import py, os
+
+from pypy.rlib import libffi
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+
+__all__ = ['identify', 'eci', 'c_load_dictionary']
+
+pkgpath = py.path.local(__file__).dirpath().join(os.pardir)
+srcpath = pkgpath.join("src")
+incpath = pkgpath.join("include")
+
+if os.environ.get("ROOTSYS"):
+    import commands
+    (stat, incdir) = commands.getstatusoutput("root-config --incdir")
+    if stat != 0:        # presumably Reflex-only
+        rootincpath = [os.path.join(os.environ["ROOTSYS"], "include")]
+        rootlibpath = [os.path.join(os.environ["ROOTSYS"], "lib64"), os.path.join(os.environ["ROOTSYS"], "lib")]
+    else:
+        rootincpath = [incdir]
+        rootlibpath = commands.getoutput("root-config --libdir").split()
+else:
+    rootincpath = []
+    rootlibpath = []
+
+def identify():
+    return 'Reflex'
+
+ts_reflect = False
+ts_call    = 'auto'
+ts_memory  = 'auto'
+ts_helper  = 'auto'
+
+eci = ExternalCompilationInfo(
+    separate_module_files=[srcpath.join("reflexcwrapper.cxx")],
+    include_dirs=[incpath] + rootincpath,
+    includes=["reflexcwrapper.h"],
+    library_dirs=rootlibpath,
+    link_extra=["-lReflex"],
+    use_cpp_linker=True,
+)
+
+def c_load_dictionary(name):
+    return libffi.CDLL(name)
+
+
+# Reflex-specific pythonizations
+def register_pythonizations(space):
+    "NOT_RPYTHON"
+    pass
+
+def pythonize(space, name, w_pycppclass):
+    pass
diff --git a/pypy/module/cppyy/converter.py b/pypy/module/cppyy/converter.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/converter.py
@@ -0,0 +1,747 @@
+import sys
+
+from pypy.interpreter.error import OperationError
+
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rlib.rarithmetic import r_singlefloat
+from pypy.rlib import libffi, clibffi, rfloat
+
+from pypy.module._rawffi.interp_rawffi import unpack_simple_shape
+from pypy.module._rawffi.array import W_Array
+
+from pypy.module.cppyy import helper, capi, ffitypes
+
+# Converter objects are used to translate between RPython and C++. They are
+# defined by the type name for which they provide conversion. Uses are for
+# function arguments, as well as for read and write access to data members.
+# All type conversions are fully checked.
+#
+# Converter instances are greated by get_converter(<type name>), see below.
+# The name given should be qualified in case there is a specialised, exact
+# match for the qualified type.
+
+
+def get_rawobject(space, w_obj):
+    from pypy.module.cppyy.interp_cppyy import W_CPPInstance
+    cppinstance = space.interp_w(W_CPPInstance, w_obj, can_be_None=True)
+    if cppinstance:
+        rawobject = cppinstance.get_rawobject()
+        assert lltype.typeOf(rawobject) == capi.C_OBJECT
+        return rawobject
+    return capi.C_NULL_OBJECT
+
+def set_rawobject(space, w_obj, address):
+    from pypy.module.cppyy.interp_cppyy import W_CPPInstance
+    cppinstance = space.interp_w(W_CPPInstance, w_obj, can_be_None=True)
+    if cppinstance:
+        assert lltype.typeOf(cppinstance._rawobject) == capi.C_OBJECT
+        cppinstance._rawobject = rffi.cast(capi.C_OBJECT, address)
+
+def get_rawobject_nonnull(space, w_obj):
+    from pypy.module.cppyy.interp_cppyy import W_CPPInstance
+    cppinstance = space.interp_w(W_CPPInstance, w_obj, can_be_None=True)
+    if cppinstance:
+        cppinstance._nullcheck()
+        rawobject = cppinstance.get_rawobject()
+        assert lltype.typeOf(rawobject) == capi.C_OBJECT
+        return rawobject
+    return capi.C_NULL_OBJECT
+
+def get_rawbuffer(space, w_obj):
+    try:
+        buf = space.buffer_w(w_obj)
+        return rffi.cast(rffi.VOIDP, buf.get_raw_address())
+    except Exception:
+        pass
+    # special case: allow integer 0 as NULL
+    try:
+        buf = space.int_w(w_obj)
+        if buf == 0:
+            return rffi.cast(rffi.VOIDP, 0)
+    except Exception:
+        pass
+    # special case: allow None as NULL
+    if space.is_true(space.is_(w_obj, space.w_None)):
+        return rffi.cast(rffi.VOIDP, 0)
+    raise TypeError("not an addressable buffer")
+
+
+class TypeConverter(object):
+    _immutable_ = True
+    libffitype = lltype.nullptr(clibffi.FFI_TYPE_P.TO)
+    uses_local = False
+
+    name = ""
+
+    def __init__(self, space, extra):
+        pass
+
+    def _get_raw_address(self, space, w_obj, offset):
+        rawobject = get_rawobject_nonnull(space, w_obj)
+        assert lltype.typeOf(rawobject) == capi.C_OBJECT
+        if rawobject:
+            fieldptr = capi.direct_ptradd(rawobject, offset)
+        else:
+            fieldptr = rffi.cast(capi.C_OBJECT, offset)
+        return fieldptr
+
+    def _is_abstract(self, space):
+        raise OperationError(space.w_TypeError, space.wrap("no converter available for '%s'" % self.name))
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        self._is_abstract(space)
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        from pypy.module.cppyy.interp_cppyy import FastCallNotPossible
+        raise FastCallNotPossible
+
+    def default_argument_libffi(self, space, argchain):
+        from pypy.module.cppyy.interp_cppyy import FastCallNotPossible
+        raise FastCallNotPossible
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        self._is_abstract(space)
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        self._is_abstract(space)
+
+    def finalize_call(self, space, w_obj, call_local):
+        pass
+
+    def free_argument(self, space, arg, call_local):
+        pass
+
+
+class ArrayCache(object):
+    def __init__(self, space):
+        self.space = space
+    def __getattr__(self, name):
+        if name.startswith('array_'):
+            typecode = name[len('array_'):]
+            arr = self.space.interp_w(W_Array, unpack_simple_shape(self.space, self.space.wrap(typecode)))
+            setattr(self, name, arr)
+            return arr
+        raise AttributeError(name)
+
+    def _freeze_(self):
+        return True
+
+class ArrayTypeConverterMixin(object):
+    _mixin_ = True
+    _immutable_ = True
+
+    def __init__(self, space, array_size):
+        if array_size <= 0:
+            self.size = sys.maxint
+        else:
+            self.size = array_size
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        if hasattr(space, "fake"):
+            raise NotImplementedError
+        # read access, so no copy needed
+        address_value = self._get_raw_address(space, w_obj, offset)
+        address = rffi.cast(rffi.ULONG, address_value)
+        cache = space.fromcache(ArrayCache)
+        arr = getattr(cache, 'array_' + self.typecode)
+        return arr.fromaddress(space, address, self.size)
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        # copy the full array (uses byte copy for now)
+        address = rffi.cast(rffi.CCHARP, self._get_raw_address(space, w_obj, offset))
+        buf = space.buffer_w(w_value)
+        # TODO: report if too many items given?
+        for i in range(min(self.size*self.typesize, buf.getlength())):
+            address[i] = buf.getitem(i)
+
+
+class PtrTypeConverterMixin(object):
+    _mixin_ = True
+    _immutable_ = True
+
+    def __init__(self, space, array_size):
+        self.size = sys.maxint
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        w_tc = space.findattr(w_obj, space.wrap('typecode'))
+        if w_tc is not None and space.str_w(w_tc) != self.typecode:
+            msg = "expected %s pointer type, but received %s" % (self.typecode, space.str_w(w_tc))
+            raise OperationError(space.w_TypeError, space.wrap(msg))
+        x = rffi.cast(rffi.LONGP, address)
+        try:
+            x[0] = rffi.cast(rffi.LONG, get_rawbuffer(space, w_obj))
+        except TypeError:
+            raise OperationError(space.w_TypeError,
+                                 space.wrap("raw buffer interface not supported"))
+        ba = rffi.cast(rffi.CCHARP, address)
+        ba[capi.c_function_arg_typeoffset()] = 'o'
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        # read access, so no copy needed
+        address_value = self._get_raw_address(space, w_obj, offset)
+        address = rffi.cast(rffi.ULONGP, address_value)
+        cache = space.fromcache(ArrayCache)
+        arr = getattr(cache, 'array_' + self.typecode)
+        return arr.fromaddress(space, address[0], self.size)
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        # copy only the pointer value
+        rawobject = get_rawobject_nonnull(space, w_obj)
+        byteptr = rffi.cast(rffi.CCHARPP, capi.direct_ptradd(rawobject, offset))
+        buf = space.buffer_w(w_value)
+        try:
+            byteptr[0] = buf.get_raw_address()
+        except ValueError:
+            raise OperationError(space.w_TypeError,
+                                 space.wrap("raw buffer interface not supported"))
+
+
+class NumericTypeConverterMixin(object):
+    _mixin_ = True
+    _immutable_ = True
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        argchain.arg(self._unwrap_object(space, w_obj))
+
+    def default_argument_libffi(self, space, argchain):
+        argchain.arg(self.default)
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = self._get_raw_address(space, w_obj, offset)
+        rffiptr = rffi.cast(self.c_ptrtype, address)
+        return space.wrap(rffiptr[0])
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        address = self._get_raw_address(space, w_obj, offset)
+        rffiptr = rffi.cast(self.c_ptrtype, address)
+        rffiptr[0] = self._unwrap_object(space, w_value)
+
+class ConstRefNumericTypeConverterMixin(NumericTypeConverterMixin):
+    _mixin_ = True
+    _immutable_ = True
+    uses_local = True
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        assert rffi.sizeof(self.c_type) <= 2*rffi.sizeof(rffi.VOIDP)  # see interp_cppyy.py
+        obj = self._unwrap_object(space, w_obj)
+        typed_buf = rffi.cast(self.c_ptrtype, call_local)
+        typed_buf[0] = obj
+        argchain.arg(call_local)
+
+class IntTypeConverterMixin(NumericTypeConverterMixin):
+    _mixin_ = True
+    _immutable_ = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        x = rffi.cast(self.c_ptrtype, address)
+        x[0] = self._unwrap_object(space, w_obj)
+
+class FloatTypeConverterMixin(NumericTypeConverterMixin):
+    _mixin_ = True
+    _immutable_ = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        x = rffi.cast(self.c_ptrtype, address)
+        x[0] = self._unwrap_object(space, w_obj)
+        ba = rffi.cast(rffi.CCHARP, address)
+        ba[capi.c_function_arg_typeoffset()] = self.typecode
+
+
+class VoidConverter(TypeConverter):
+    _immutable_ = True
+    libffitype = libffi.types.void
+
+    def __init__(self, space, name):
+        self.name = name
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        raise OperationError(space.w_TypeError,
+                             space.wrap('no converter available for type "%s"' % self.name))
+
+
+class BoolConverter(ffitypes.typeid(bool), TypeConverter):
+    _immutable_ = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        x = rffi.cast(rffi.LONGP, address)
+        x[0] = self._unwrap_object(space, w_obj)
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        argchain.arg(self._unwrap_object(space, w_obj))
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = rffi.cast(rffi.CCHARP, self._get_raw_address(space, w_obj, offset))
+        if address[0] == '\x01':
+            return space.w_True
+        return space.w_False
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        address = rffi.cast(rffi.CCHARP, self._get_raw_address(space, w_obj, offset))
+        arg = self._unwrap_object(space, w_value)
+        if arg:
+            address[0] = '\x01'
+        else:
+            address[0] = '\x00'
+
+class CharConverter(ffitypes.typeid(rffi.CHAR), TypeConverter):
+    _immutable_ = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        x = rffi.cast(rffi.CCHARP, address)
+        x[0] = self._unwrap_object(space, w_obj)
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        argchain.arg(self._unwrap_object(space, w_obj))
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = rffi.cast(rffi.CCHARP, self._get_raw_address(space, w_obj, offset))
+        return space.wrap(address[0])
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        address = rffi.cast(rffi.CCHARP, self._get_raw_address(space, w_obj, offset))
+        address[0] = self._unwrap_object(space, w_value)
+
+class FloatConverter(ffitypes.typeid(rffi.FLOAT), FloatTypeConverterMixin, TypeConverter):
+    _immutable_ = True
+
+    def __init__(self, space, default):
+        if default:
+            fval = float(rfloat.rstring_to_float(default))
+        else:
+            fval = float(0.)
+        self.default = r_singlefloat(fval)
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = self._get_raw_address(space, w_obj, offset)
+        rffiptr = rffi.cast(self.c_ptrtype, address)
+        return space.wrap(float(rffiptr[0]))
+
+class ConstFloatRefConverter(FloatConverter):
+    _immutable_ = True
+    libffitype = libffi.types.pointer
+    typecode = 'F'
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        from pypy.module.cppyy.interp_cppyy import FastCallNotPossible
+        raise FastCallNotPossible
+
+class DoubleConverter(ffitypes.typeid(rffi.DOUBLE), FloatTypeConverterMixin, TypeConverter):
+    _immutable_ = True
+
+    def __init__(self, space, default):
+        if default:
+            self.default = rffi.cast(self.c_type, rfloat.rstring_to_float(default))
+        else:
+            self.default = rffi.cast(self.c_type, 0.)
+
+class ConstDoubleRefConverter(ConstRefNumericTypeConverterMixin, DoubleConverter):
+    _immutable_ = True
+    libffitype = libffi.types.pointer
+    typecode = 'D'
+
+
+class CStringConverter(TypeConverter):
+    _immutable_ = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        x = rffi.cast(rffi.LONGP, address)
+        arg = space.str_w(w_obj)
+        x[0] = rffi.cast(rffi.LONG, rffi.str2charp(arg))
+        ba = rffi.cast(rffi.CCHARP, address)
+        ba[capi.c_function_arg_typeoffset()] = 'o'
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = self._get_raw_address(space, w_obj, offset)
+        charpptr = rffi.cast(rffi.CCHARPP, address)
+        return space.wrap(rffi.charp2str(charpptr[0]))
+
+    def free_argument(self, space, arg, call_local):
+        lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw')
+
+
+class VoidPtrConverter(TypeConverter):
+    _immutable_ = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        x = rffi.cast(rffi.VOIDPP, address)
+        ba = rffi.cast(rffi.CCHARP, address)
+        try:
+            x[0] = get_rawbuffer(space, w_obj)
+        except TypeError:
+            x[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
+        ba[capi.c_function_arg_typeoffset()] = 'o'
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        argchain.arg(get_rawobject(space, w_obj))
+
+class VoidPtrPtrConverter(TypeConverter):
+    _immutable_ = True
+    uses_local = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        x = rffi.cast(rffi.VOIDPP, address)
+        ba = rffi.cast(rffi.CCHARP, address)
+        r = rffi.cast(rffi.VOIDPP, call_local)
+        try:
+            r[0] = get_rawbuffer(space, w_obj)
+        except TypeError:
+            r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
+        x[0] = rffi.cast(rffi.VOIDP, call_local)
+        ba[capi.c_function_arg_typeoffset()] = 'a'
+
+    def finalize_call(self, space, w_obj, call_local):
+        r = rffi.cast(rffi.VOIDPP, call_local)
+        try:
+            set_rawobject(space, w_obj, r[0])
+        except OperationError:
+            pass             # no set on buffer/array/None
+
+class VoidPtrRefConverter(VoidPtrPtrConverter):
+    _immutable_ = True
+    uses_local = True
+
+class InstancePtrConverter(TypeConverter):
+    _immutable_ = True
+
+    def __init__(self, space, cppclass):
+        from pypy.module.cppyy.interp_cppyy import W_CPPClass
+        assert isinstance(cppclass, W_CPPClass)
+        self.cppclass = cppclass
+
+    def _unwrap_object(self, space, w_obj):
+        from pypy.module.cppyy.interp_cppyy import W_CPPInstance
+        obj = space.interpclass_w(w_obj)
+        if isinstance(obj, W_CPPInstance):
+            if capi.c_is_subtype(obj.cppclass, self.cppclass):
+                rawobject = obj.get_rawobject()
+                offset = capi.c_base_offset(obj.cppclass, self.cppclass, rawobject, 1)
+                obj_address = capi.direct_ptradd(rawobject, offset)
+                return rffi.cast(capi.C_OBJECT, obj_address)
+        raise OperationError(space.w_TypeError,
+                             space.wrap("cannot pass %s as %s" %
+                             (space.type(w_obj).getname(space, "?"), self.cppclass.name)))
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        x = rffi.cast(rffi.VOIDPP, address)
+        x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj))
+        address = rffi.cast(capi.C_OBJECT, address)
+        ba = rffi.cast(rffi.CCHARP, address)
+        ba[capi.c_function_arg_typeoffset()] = 'o'
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        argchain.arg(self._unwrap_object(space, w_obj))
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
+        from pypy.module.cppyy import interp_cppyy
+        return interp_cppyy.wrap_cppobject_nocast(
+            space, w_pycppclass, self.cppclass, address, isref=True, python_owns=False)
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset))
+        address[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_value))
+
+class InstanceConverter(InstancePtrConverter):
+    _immutable_ = True
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
+        from pypy.module.cppyy import interp_cppyy
+        return interp_cppyy.wrap_cppobject_nocast(
+            space, w_pycppclass, self.cppclass, address, isref=False, python_owns=False)
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        self._is_abstract(space)
+
+class InstancePtrPtrConverter(InstancePtrConverter):
+    _immutable_ = True
+    uses_local = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        r = rffi.cast(rffi.VOIDPP, call_local)
+        r[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj))
+        x = rffi.cast(rffi.VOIDPP, address)
+        x[0] = rffi.cast(rffi.VOIDP, call_local)
+        address = rffi.cast(capi.C_OBJECT, address)
+        ba = rffi.cast(rffi.CCHARP, address)
+        ba[capi.c_function_arg_typeoffset()] = 'o'
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        self._is_abstract(space)
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        self._is_abstract(space)
+
+    def finalize_call(self, space, w_obj, call_local):
+        from pypy.module.cppyy.interp_cppyy import W_CPPInstance
+        obj = space.interpclass_w(w_obj)
+        assert isinstance(obj, W_CPPInstance)
+        r = rffi.cast(rffi.VOIDPP, call_local)
+        obj._rawobject = rffi.cast(capi.C_OBJECT, r[0])
+
+
+class StdStringConverter(InstanceConverter):
+    _immutable_ = True
+
+    def __init__(self, space, extra):
+        from pypy.module.cppyy import interp_cppyy
+        cppclass = interp_cppyy.scope_byname(space, "std::string")
+        InstanceConverter.__init__(self, space, cppclass)
+
+    def _unwrap_object(self, space, w_obj):
+        try:
+            charp = rffi.str2charp(space.str_w(w_obj))
+            arg = capi.c_charp2stdstring(charp)
+            rffi.free_charp(charp)
+            return arg
+        except OperationError:
+            arg = InstanceConverter._unwrap_object(self, space, w_obj)
+            return capi.c_stdstring2stdstring(arg)
+
+    def to_memory(self, space, w_obj, w_value, offset):
+        try:
+            address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
+            charp = rffi.str2charp(space.str_w(w_value))
+            capi.c_assign2stdstring(address, charp)
+            rffi.free_charp(charp)
+            return
+        except Exception:
+            pass
+        return InstanceConverter.to_memory(self, space, w_obj, w_value, offset)
+
+    def free_argument(self, space, arg, call_local):
+        capi.c_free_stdstring(rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, arg)[0]))
+
+class StdStringRefConverter(InstancePtrConverter):
+    _immutable_ = True
+
+    def __init__(self, space, extra):
+        from pypy.module.cppyy import interp_cppyy
+        cppclass = interp_cppyy.scope_byname(space, "std::string")
+        InstancePtrConverter.__init__(self, space, cppclass)
+
+
+class PyObjectConverter(TypeConverter):
+    _immutable_ = True
+
+    def convert_argument(self, space, w_obj, address, call_local):
+        if hasattr(space, "fake"):
+            raise NotImplementedError
+        space.getbuiltinmodule("cpyext")
+        from pypy.module.cpyext.pyobject import make_ref
+        ref = make_ref(space, w_obj)
+        x = rffi.cast(rffi.VOIDPP, address)
+        x[0] = rffi.cast(rffi.VOIDP, ref)
+        ba = rffi.cast(rffi.CCHARP, address)
+        ba[capi.c_function_arg_typeoffset()] = 'a'
+
+    def convert_argument_libffi(self, space, w_obj, argchain, call_local):
+        if hasattr(space, "fake"):
+            raise NotImplementedError
+        space.getbuiltinmodule("cpyext")
+        from pypy.module.cpyext.pyobject import make_ref
+        ref = make_ref(space, w_obj)
+        argchain.arg(rffi.cast(rffi.VOIDP, ref))
+
+    def free_argument(self, space, arg, call_local):
+        if hasattr(space, "fake"):
+            raise NotImplementedError
+        from pypy.module.cpyext.pyobject import Py_DecRef, PyObject
+        Py_DecRef(space, rffi.cast(PyObject, rffi.cast(rffi.VOIDPP, arg)[0]))
+
+
+_converters = {}         # builtin and custom types
+_a_converters = {}       # array and ptr versions of above
+def get_converter(space, name, default):
+    # The matching of the name to a converter should follow:
+    #   1) full, exact match
+    #       1a) const-removed match
+    #   2) match of decorated, unqualified type
+    #   3) accept ref as pointer (for the stubs, const& can be
+    #       by value, but that does not work for the ffi path)
+    #   4) generalized cases (covers basically all user classes)
+    #   5) void converter, which fails on use
+
+    name = capi.c_resolve_name(name)
+
+    #   1) full, exact match
+    try:
+        return _converters[name](space, default)
+    except KeyError:
+        pass
+
+    #   1a) const-removed match
+    try:
+        return _converters[helper.remove_const(name)](space, default)
+    except KeyError:
+        pass
+
+    #   2) match of decorated, unqualified type
+    compound = helper.compound(name)
+    clean_name = capi.c_resolve_name(helper.clean_type(name))
+    try:
+        # array_index may be negative to indicate no size or no size found
+        array_size = helper.array_size(name)
+        return _a_converters[clean_name+compound](space, array_size)
+    except KeyError:
+        pass
+
+    #   3) TODO: accept ref as pointer
+
+    #   4) generalized cases (covers basically all user classes)
+    from pypy.module.cppyy import interp_cppyy
+    cppclass = interp_cppyy.scope_byname(space, clean_name)
+    if cppclass:
+        # type check for the benefit of the annotator
+        from pypy.module.cppyy.interp_cppyy import W_CPPClass
+        cppclass = space.interp_w(W_CPPClass, cppclass, can_be_None=False)
+        if compound == "*" or compound == "&":
+            return InstancePtrConverter(space, cppclass)
+        elif compound == "**":
+            return InstancePtrPtrConverter(space, cppclass)
+        elif compound == "":
+            return InstanceConverter(space, cppclass)
+    elif capi.c_is_enum(clean_name):
+        return _converters['unsigned'](space, default)
+
+    #   5) void converter, which fails on use
+    #
+    # return a void converter here, so that the class can be build even
+    # when some types are unknown; this overload will simply fail on use
+    return VoidConverter(space, name)
+
+
+_converters["bool"]                     = BoolConverter
+_converters["char"]                     = CharConverter
+_converters["float"]                    = FloatConverter
+_converters["const float&"]             = ConstFloatRefConverter
+_converters["double"]                   = DoubleConverter
+_converters["const double&"]            = ConstDoubleRefConverter
+_converters["const char*"]              = CStringConverter
+_converters["void*"]                    = VoidPtrConverter
+_converters["void**"]                   = VoidPtrPtrConverter
+_converters["void*&"]                   = VoidPtrRefConverter
+
+# special cases (note: CINT backend requires the simple name 'string')
+_converters["std::basic_string<char>"]           = StdStringConverter
+_converters["const std::basic_string<char>&"]    = StdStringConverter     # TODO: shouldn't copy
+_converters["std::basic_string<char>&"]          = StdStringRefConverter
+
+_converters["PyObject*"]                         = PyObjectConverter
+
+# add basic (builtin) converters
+def _build_basic_converters():
+    "NOT_RPYTHON"
+    # signed types (use strtoll in setting of default in __init__)
+    type_info = (
+        (rffi.SHORT,      ("short", "short int")),
+        (rffi.INT,        ("int",)),
+    )
+
+    # constref converters exist only b/c the stubs take constref by value, whereas
+    # libffi takes them by pointer (hence it needs the fast-path in testing); note
+    # that this is list is not complete, as some classes are specialized
+
+    for c_type, names in type_info:
+        class BasicConverter(ffitypes.typeid(c_type), IntTypeConverterMixin, TypeConverter):
+            _immutable_ = True
+            def __init__(self, space, default):
+                self.default = rffi.cast(self.c_type, capi.c_strtoll(default))
+        class ConstRefConverter(ConstRefNumericTypeConverterMixin, BasicConverter):
+            _immutable_ = True
+            libffitype = libffi.types.pointer
+        for name in names:
+            _converters[name] = BasicConverter
+            _converters["const "+name+"&"] = ConstRefConverter
+
+    type_info = (
+        (rffi.LONG,       ("long", "long int")),
+        (rffi.LONGLONG,   ("long long", "long long int")),
+    )
+
+    for c_type, names in type_info:
+        class BasicConverter(ffitypes.typeid(c_type), IntTypeConverterMixin, TypeConverter):
+            _immutable_ = True
+            def __init__(self, space, default):
+                self.default = rffi.cast(self.c_type, capi.c_strtoll(default))
+        class ConstRefConverter(ConstRefNumericTypeConverterMixin, BasicConverter):
+            _immutable_ = True
+            libffitype = libffi.types.pointer
+            typecode = 'r'
+            def convert_argument(self, space, w_obj, address, call_local):
+                x = rffi.cast(self.c_ptrtype, address)
+                x[0] = self._unwrap_object(space, w_obj)
+                ba = rffi.cast(rffi.CCHARP, address)
+                ba[capi.c_function_arg_typeoffset()] = self.typecode
+        for name in names:
+            _converters[name] = BasicConverter
+            _converters["const "+name+"&"] = ConstRefConverter
+
+    # unsigned integer types (use strtoull in setting of default in __init__)
+    type_info = (
+        (rffi.USHORT,     ("unsigned short", "unsigned short int")),
+        (rffi.UINT,       ("unsigned", "unsigned int")),
+        (rffi.ULONG,      ("unsigned long", "unsigned long int")),
+        (rffi.ULONGLONG,  ("unsigned long long", "unsigned long long int")),
+    )
+
+    for c_type, names in type_info:
+        class BasicConverter(ffitypes.typeid(c_type), IntTypeConverterMixin, TypeConverter):
+            _immutable_ = True
+            def __init__(self, space, default):
+                self.default = rffi.cast(self.c_type, capi.c_strtoull(default))
+        class ConstRefConverter(ConstRefNumericTypeConverterMixin, BasicConverter):
+            _immutable_ = True
+            libffitype = libffi.types.pointer
+        for name in names:
+            _converters[name] = BasicConverter
+            _converters["const "+name+"&"] = ConstRefConverter
+_build_basic_converters()
+
+# create the array and pointer converters; all real work is in the mixins
+def _build_array_converters():
+    "NOT_RPYTHON"
+    array_info = (
+        ('b', rffi.sizeof(rffi.UCHAR),  ("bool",)),    # is debatable, but works ...
+        ('h', rffi.sizeof(rffi.SHORT),  ("short int", "short")),
+        ('H', rffi.sizeof(rffi.USHORT), ("unsigned short int", "unsigned short")),
+        ('i', rffi.sizeof(rffi.INT),    ("int",)),
+        ('I', rffi.sizeof(rffi.UINT),   ("unsigned int", "unsigned")),
+        ('l', rffi.sizeof(rffi.LONG),   ("long int", "long")),
+        ('L', rffi.sizeof(rffi.ULONG),  ("unsigned long int", "unsigned long")),
+        ('f', rffi.sizeof(rffi.FLOAT),  ("float",)),
+        ('d', rffi.sizeof(rffi.DOUBLE), ("double",)),
+    )
+
+    for tcode, tsize, names in array_info:
+        class ArrayConverter(ArrayTypeConverterMixin, TypeConverter):
+            _immutable_ = True
+            typecode = tcode
+            typesize = tsize
+        class PtrConverter(PtrTypeConverterMixin, TypeConverter):
+            _immutable_ = True
+            typecode = tcode
+            typesize = tsize
+        for name in names:
+            _a_converters[name+'[]'] = ArrayConverter
+            _a_converters[name+'*']  = PtrConverter
+_build_array_converters()
+
+# add another set of aliased names
+def _add_aliased_converters():
+    "NOT_RPYTHON"
+    aliases = (
+        ("char",                            "unsigned char"),
+        ("const char*",                     "char*"),
+
+        ("std::basic_string<char>",         "string"),
+        ("const std::basic_string<char>&",  "const string&"),
+        ("std::basic_string<char>&",        "string&"),
+
+        ("PyObject*",                       "_object*"),
+    )
+ 
+    for c_type, alias in aliases:
+        _converters[alias] = _converters[c_type]
+_add_aliased_converters()
+
diff --git a/pypy/module/cppyy/executor.py b/pypy/module/cppyy/executor.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cppyy/executor.py
@@ -0,0 +1,367 @@
+import sys
+
+from pypy.interpreter.error import OperationError
+
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rlib import libffi, clibffi
+
+from pypy.module._rawffi.interp_rawffi import unpack_simple_shape
+from pypy.module._rawffi.array import W_Array, W_ArrayInstance
+
+from pypy.module.cppyy import helper, capi, ffitypes
+
+# Executor objects are used to dispatch C++ methods. They are defined by their
+# return type only: arguments are converted by Converter objects, and Executors
+# only deal with arrays of memory that are either passed to a stub or libffi.
+# No argument checking or conversions are done.
+#
+# If a libffi function is not implemented, FastCallNotPossible is raised. If a
+# stub function is missing (e.g. if no reflection info is available for the
+# return type), an app-level TypeError is raised.
+#
+# Executor instances are created by get_executor(<return type name>), see
+# below. The name given should be qualified in case there is a specialised,
+# exact match for the qualified type.
+
+
+NULL = lltype.nullptr(clibffi.FFI_TYPE_P.TO)
+
+class FunctionExecutor(object):
+    _immutable_ = True
+    libffitype = NULL
+
+    def __init__(self, space, extra):
+        pass
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        raise OperationError(space.w_TypeError,
+                             space.wrap('return type not available or supported'))
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        from pypy.module.cppyy.interp_cppyy import FastCallNotPossible
+        raise FastCallNotPossible
+
+
+class PtrTypeExecutor(FunctionExecutor):
+    _immutable_ = True
+    typecode = 'P'
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        if hasattr(space, "fake"):
+            raise NotImplementedError
+        lresult = capi.c_call_l(cppmethod, cppthis, num_args, args)
+        address = rffi.cast(rffi.ULONG, lresult)
+        arr = space.interp_w(W_Array, unpack_simple_shape(space, space.wrap(self.typecode)))
+        if address == 0:
+            # TODO: fix this hack; fromaddress() will allocate memory if address
+            # is null and there seems to be no way around it (ll_buffer can not
+            # be touched directly)
+            nullarr = arr.fromaddress(space, address, 0)
+            assert isinstance(nullarr, W_ArrayInstance)
+            nullarr.free(space)
+            return nullarr
+        return arr.fromaddress(space, address, sys.maxint)
+
+
+class VoidExecutor(FunctionExecutor):
+    _immutable_ = True
+    libffitype = libffi.types.void
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        capi.c_call_v(cppmethod, cppthis, num_args, args)
+        return space.w_None
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        libffifunc.call(argchain, lltype.Void)
+        return space.w_None
+
+
+class NumericExecutorMixin(object):
+    _mixin_ = True
+    _immutable_ = True
+
+    def _wrap_object(self, space, obj):
+        return space.wrap(obj)
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        result = self.c_stubcall(cppmethod, cppthis, num_args, args)
+        return self._wrap_object(space, rffi.cast(self.c_type, result))
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        result = libffifunc.call(argchain, self.c_type)
+        return self._wrap_object(space, result)
+
+class NumericRefExecutorMixin(object):
+    _mixin_ = True
+    _immutable_ = True
+
+    def __init__(self, space, extra):
+        FunctionExecutor.__init__(self, space, extra)
+        self.do_assign = False
+        self.item = rffi.cast(self.c_type, 0)
+
+    def set_item(self, space, w_item):
+        self.item = self._unwrap_object(space, w_item)
+        self.do_assign = True
+
+    def _wrap_object(self, space, obj):
+        return space.wrap(rffi.cast(self.c_type, obj))
+
+    def _wrap_reference(self, space, rffiptr):
+        if self.do_assign:
+            rffiptr[0] = self.item
+        self.do_assign = False
+        return self._wrap_object(space, rffiptr[0])    # all paths, for rtyper
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        result = capi.c_call_r(cppmethod, cppthis, num_args, args)
+        return self._wrap_reference(space, rffi.cast(self.c_ptrtype, result))
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        result = libffifunc.call(argchain, self.c_ptrtype)
+        return self._wrap_reference(space, result)
+
+
+class CStringExecutor(FunctionExecutor):
+    _immutable_ = True
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        lresult = capi.c_call_l(cppmethod, cppthis, num_args, args)
+        ccpresult = rffi.cast(rffi.CCHARP, lresult)
+        result = rffi.charp2str(ccpresult)  # TODO: make it a choice to free
+        return space.wrap(result)
+
+
+class ConstructorExecutor(VoidExecutor):
+    _immutable_ = True
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        capi.c_constructor(cppmethod, cppthis, num_args, args)
+        return space.w_None
+
+
+class InstancePtrExecutor(FunctionExecutor):
+    _immutable_ = True
+    libffitype = libffi.types.pointer
+
+    def __init__(self, space, cppclass):
+        FunctionExecutor.__init__(self, space, cppclass)
+        self.cppclass = cppclass
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        from pypy.module.cppyy import interp_cppyy
+        long_result = capi.c_call_l(cppmethod, cppthis, num_args, args)
+        ptr_result = rffi.cast(capi.C_OBJECT, long_result)
+        return interp_cppyy.wrap_cppobject(
+            space, space.w_None, self.cppclass, ptr_result, isref=False, python_owns=False)
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        from pypy.module.cppyy import interp_cppyy
+        ptr_result = rffi.cast(capi.C_OBJECT, libffifunc.call(argchain, rffi.VOIDP))
+        return interp_cppyy.wrap_cppobject(
+            space, space.w_None, self.cppclass, ptr_result, isref=False, python_owns=False)
+
+class InstancePtrPtrExecutor(InstancePtrExecutor):
+    _immutable_ = True
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        from pypy.module.cppyy import interp_cppyy
+        voidp_result = capi.c_call_r(cppmethod, cppthis, num_args, args)
+        ref_address = rffi.cast(rffi.VOIDPP, voidp_result)
+        ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0])
+        return interp_cppyy.wrap_cppobject(
+            space, space.w_None, self.cppclass, ptr_result, isref=False, python_owns=False)
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        from pypy.module.cppyy.interp_cppyy import FastCallNotPossible
+        raise FastCallNotPossible
+
+class InstanceExecutor(InstancePtrExecutor):
+    _immutable_ = True
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        from pypy.module.cppyy import interp_cppyy
+        long_result = capi.c_call_o(cppmethod, cppthis, num_args, args, self.cppclass)
+        ptr_result = rffi.cast(capi.C_OBJECT, long_result)
+        return interp_cppyy.wrap_cppobject(
+            space, space.w_None, self.cppclass, ptr_result, isref=False, python_owns=True)
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        from pypy.module.cppyy.interp_cppyy import FastCallNotPossible
+        raise FastCallNotPossible
+
+
+class StdStringExecutor(InstancePtrExecutor):
+    _immutable_ = True
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        charp_result = capi.c_call_s(cppmethod, cppthis, num_args, args)
+        return space.wrap(capi.charp2str_free(charp_result))
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        from pypy.module.cppyy.interp_cppyy import FastCallNotPossible
+        raise FastCallNotPossible
+
+
+class PyObjectExecutor(PtrTypeExecutor):
+    _immutable_ = True
+
+    def wrap_result(self, space, lresult):
+        space.getbuiltinmodule("cpyext")
+        from pypy.module.cpyext.pyobject import PyObject, from_ref, make_ref, Py_DecRef
+        result = rffi.cast(PyObject, lresult)
+        w_obj = from_ref(space, result)
+        if result:
+            Py_DecRef(space, result)
+        return w_obj
+
+    def execute(self, space, cppmethod, cppthis, num_args, args):
+        if hasattr(space, "fake"):
+            raise NotImplementedError
+        lresult = capi.c_call_l(cppmethod, cppthis, num_args, args)
+        return self.wrap_result(space, lresult)
+
+    def execute_libffi(self, space, libffifunc, argchain):
+        if hasattr(space, "fake"):
+            raise NotImplementedError
+        lresult = libffifunc.call(argchain, rffi.LONG)
+        return self.wrap_result(space, lresult)
+
+
+_executors = {}
+def get_executor(space, name):
+    # Matching of 'name' to an executor factory goes through up to four levels:
+    #   1) full, qualified match
+    #   2) drop '&': by-ref is pretty much the same as by-value, python-wise
+    #   3) types/classes, either by ref/ptr or by value
+    #   4) additional special cases
+    #
+    # If all fails, a default is used, which can be ignored at least until use.
+
+    name = capi.c_resolve_name(name)
+
+    #   1) full, qualified match
+    try:
+        return _executors[name](space, None)
+    except KeyError:
+        pass
+
+    compound = helper.compound(name)
+    clean_name = capi.c_resolve_name(helper.clean_type(name))
+
+    #   1a) clean lookup
+    try:
+        return _executors[clean_name+compound](space, None)
+    except KeyError:
+        pass
+
+    #   2) drop '&': by-ref is pretty much the same as by-value, python-wise
+    if compound and compound[len(compound)-1] == "&":
+        # TODO: this does not actually work with Reflex (?)
+        try:
+            return _executors[clean_name](space, None)
+        except KeyError:
+            pass
+
+    #   3) types/classes, either by ref/ptr or by value
+    from pypy.module.cppyy import interp_cppyy
+    cppclass = interp_cppyy.scope_byname(space, clean_name)
+    if cppclass:
+        # type check for the benefit of the annotator
+        from pypy.module.cppyy.interp_cppyy import W_CPPClass
+        cppclass = space.interp_w(W_CPPClass, cppclass, can_be_None=False)
+        if compound == "":
+            return InstanceExecutor(space, cppclass)
+        elif compound == "*" or compound == "&":
+            return InstancePtrExecutor(space, cppclass)
+        elif compound == "**" or compound == "*&":
+            return InstancePtrPtrExecutor(space, cppclass)
+    elif capi.c_is_enum(clean_name):
+        return _executors['unsigned int'](space, None)
+
+    # 4) additional special cases
+    # ... none for now
+
+    # currently used until proper lazy instantiation available in interp_cppyy
+    return FunctionExecutor(space, None)
+ 
+
+_executors["void"]                = VoidExecutor
+_executors["void*"]               = PtrTypeExecutor
+_executors["const char*"]         = CStringExecutor
+
+# special cases
+_executors["constructor"]         = ConstructorExecutor
+
+_executors["std::basic_string<char>"]         = StdStringExecutor
+_executors["const std::basic_string<char>&"]  = StdStringExecutor
+_executors["std::basic_string<char>&"]        = StdStringExecutor    # TODO: shouldn't copy
+
+_executors["PyObject*"]           = PyObjectExecutor
+
+# add basic (builtin) executors
+def _build_basic_executors():
+    "NOT_RPYTHON"
+    type_info = (
+        (bool,            capi.c_call_b,   ("bool",)),
+        (rffi.CHAR,       capi.c_call_c,   ("char", "unsigned char")),
+        (rffi.SHORT,      capi.c_call_h,   ("short", "short int", "unsigned short", "unsigned short int")),
+        (rffi.INT,        capi.c_call_i,   ("int",)),
+        (rffi.UINT,       capi.c_call_l,   ("unsigned", "unsigned int")),
+        (rffi.LONG,       capi.c_call_l,   ("long", "long int")),
+        (rffi.ULONG,      capi.c_call_l,   ("unsigned long", "unsigned long int")),
+        (rffi.LONGLONG,   capi.c_call_ll,  ("long long", "long long int")),
+        (rffi.ULONGLONG,  capi.c_call_ll,  ("unsigned long long", "unsigned long long int")),