[pypy-commit] pypy list-strategies: merged default to list-strategies (test_all.py was not working anymore)

Fri Sep 23 13:14:39 CEST 2011

Author: Lukas Diekmann <lukas.diekmann at uni-duesseldorf.de>
Branch: list-strategies
Changeset: r47513:fe37239b1269
Date: 2011-07-06 17:39 +0200
http://bitbucket.org/pypy/pypy/changeset/fe37239b1269/

Log:	merged default to list-strategies (test_all.py was not working
	anymore)

diff too long, truncating to 10000 out of 37899 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -1,6 +1,7 @@
 syntax: glob
 *.py[co]
 *~
+.*.swp
 
 syntax: regexp
 ^testresult$
@@ -38,6 +39,8 @@
 ^pypy/translator/benchmark/shootout_benchmarks$
 ^pypy/translator/goal/pypy-translation-snapshot$
 ^pypy/translator/goal/pypy-c
+^pypy/translator/goal/pypy-jvm
+^pypy/translator/goal/pypy-jvm.jar
 ^pypy/translator/goal/.+\.exe$
 ^pypy/translator/goal/.+\.dll$
 ^pypy/translator/goal/target.+-c$
@@ -64,6 +67,7 @@
 ^pypy/doc/image/lattice3\.png$
 ^pypy/doc/image/stackless_informal\.png$
 ^pypy/doc/image/parsing_example.+\.png$
+^pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test\.o$
 ^compiled
 ^.git/
 ^release/
diff --git a/_pytest/__init__.py b/_pytest/__init__.py
--- a/_pytest/__init__.py
+++ b/_pytest/__init__.py
@@ -1,2 +1,2 @@
 #
-__version__ = '2.0.3'
+__version__ = '2.1.0.dev4'
diff --git a/_pytest/assertion.py b/_pytest/assertion.py
deleted file mode 100644
--- a/_pytest/assertion.py
+++ /dev/null
@@ -1,177 +0,0 @@
-"""
-support for presented detailed information in failing assertions.
-"""
-import py
-import sys
-from _pytest.monkeypatch import monkeypatch
-
-def pytest_addoption(parser):
-    group = parser.getgroup("debugconfig")
-    group._addoption('--no-assert', action="store_true", default=False,
-        dest="noassert",
-        help="disable python assert expression reinterpretation."),
-
-def pytest_configure(config):
-    # The _reprcompare attribute on the py.code module is used by
-    # py._code._assertionnew to detect this plugin was loaded and in
-    # turn call the hooks defined here as part of the
-    # DebugInterpreter.
-    m = monkeypatch()
-    config._cleanup.append(m.undo)
-    warn_about_missing_assertion()
-    if not config.getvalue("noassert") and not config.getvalue("nomagic"):
-        def callbinrepr(op, left, right):
-            hook_result = config.hook.pytest_assertrepr_compare(
-                config=config, op=op, left=left, right=right)
-            for new_expl in hook_result:
-                if new_expl:
-                    return '\n~'.join(new_expl)
-        m.setattr(py.builtin.builtins,
-                  'AssertionError', py.code._AssertionError)
-        m.setattr(py.code, '_reprcompare', callbinrepr)
-
-def warn_about_missing_assertion():
-    try:
-        assert False
-    except AssertionError:
-        pass
-    else:
-        sys.stderr.write("WARNING: failing tests may report as passing because "
-        "assertions are turned off!  (are you using python -O?)\n")
-
-# Provide basestring in python3
-try:
-    basestring = basestring
-except NameError:
-    basestring = str
-
-
-def pytest_assertrepr_compare(op, left, right):
-    """return specialised explanations for some operators/operands"""
-    width = 80 - 15 - len(op) - 2 # 15 chars indentation, 1 space around op
-    left_repr = py.io.saferepr(left, maxsize=int(width/2))
-    right_repr = py.io.saferepr(right, maxsize=width-len(left_repr))
-    summary = '%s %s %s' % (left_repr, op, right_repr)
-
-    issequence = lambda x: isinstance(x, (list, tuple))
-    istext = lambda x: isinstance(x, basestring)
-    isdict = lambda x: isinstance(x, dict)
-    isset = lambda x: isinstance(x, set)
-
-    explanation = None
-    try:
-        if op == '==':
-            if istext(left) and istext(right):
-                explanation = _diff_text(left, right)
-            elif issequence(left) and issequence(right):
-                explanation = _compare_eq_sequence(left, right)
-            elif isset(left) and isset(right):
-                explanation = _compare_eq_set(left, right)
-            elif isdict(left) and isdict(right):
-                explanation = _diff_text(py.std.pprint.pformat(left),
-                                         py.std.pprint.pformat(right))
-        elif op == 'not in':
-            if istext(left) and istext(right):
-                explanation = _notin_text(left, right)
-    except py.builtin._sysex:
-        raise
-    except:
-        excinfo = py.code.ExceptionInfo()
-        explanation = ['(pytest_assertion plugin: representation of '
-            'details failed. Probably an object has a faulty __repr__.)',
-            str(excinfo)
-            ]
-
-
-    if not explanation:
-        return None
-
-    # Don't include pageloads of data, should be configurable
-    if len(''.join(explanation)) > 80*8:
-        explanation = ['Detailed information too verbose, truncated']
-
-    return [summary] + explanation
-
-
-def _diff_text(left, right):
-    """Return the explanation for the diff between text
-
-    This will skip leading and trailing characters which are
-    identical to keep the diff minimal.
-    """
-    explanation = []
-    i = 0 # just in case left or right has zero length
-    for i in range(min(len(left), len(right))):
-        if left[i] != right[i]:
-            break
-    if i > 42:
-        i -= 10                 # Provide some context
-        explanation = ['Skipping %s identical '
-                       'leading characters in diff' % i]
-        left = left[i:]
-        right = right[i:]
-    if len(left) == len(right):
-        for i in range(len(left)):
-            if left[-i] != right[-i]:
-                break
-        if i > 42:
-            i -= 10     # Provide some context
-            explanation += ['Skipping %s identical '
-                            'trailing characters in diff' % i]
-            left = left[:-i]
-            right = right[:-i]
-    explanation += [line.strip('\n')
-                    for line in py.std.difflib.ndiff(left.splitlines(),
-                                                     right.splitlines())]
-    return explanation
-
-
-def _compare_eq_sequence(left, right):
-    explanation = []
-    for i in range(min(len(left), len(right))):
-        if left[i] != right[i]:
-            explanation += ['At index %s diff: %r != %r' %
-                            (i, left[i], right[i])]
-            break
-    if len(left) > len(right):
-        explanation += ['Left contains more items, '
-            'first extra item: %s' % py.io.saferepr(left[len(right)],)]
-    elif len(left) < len(right):
-        explanation += ['Right contains more items, '
-            'first extra item: %s' % py.io.saferepr(right[len(left)],)]
-    return explanation # + _diff_text(py.std.pprint.pformat(left),
-                       #             py.std.pprint.pformat(right))
-
-
-def _compare_eq_set(left, right):
-    explanation = []
-    diff_left = left - right
-    diff_right = right - left
-    if diff_left:
-        explanation.append('Extra items in the left set:')
-        for item in diff_left:
-            explanation.append(py.io.saferepr(item))
-    if diff_right:
-        explanation.append('Extra items in the right set:')
-        for item in diff_right:
-            explanation.append(py.io.saferepr(item))
-    return explanation
-
-
-def _notin_text(term, text):
-    index = text.find(term)
-    head = text[:index]
-    tail = text[index+len(term):]
-    correct_text = head + tail
-    diff = _diff_text(correct_text, text)
-    newdiff = ['%s is contained here:' % py.io.saferepr(term, maxsize=42)]
-    for line in diff:
-        if line.startswith('Skipping'):
-            continue
-        if line.startswith('- '):
-            continue
-        if line.startswith('+ '):
-            newdiff.append('  ' + line[2:])
-        else:
-            newdiff.append(line)
-    return newdiff
diff --git a/_pytest/assertion/__init__.py b/_pytest/assertion/__init__.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/__init__.py
@@ -0,0 +1,128 @@
+"""
+support for presenting detailed information in failing assertions.
+"""
+import py
+import imp
+import marshal
+import struct
+import sys
+import pytest
+from _pytest.monkeypatch import monkeypatch
+from _pytest.assertion import reinterpret, util
+
+try:
+    from _pytest.assertion.rewrite import rewrite_asserts
+except ImportError:
+    rewrite_asserts = None
+else:
+    import ast
+
+def pytest_addoption(parser):
+    group = parser.getgroup("debugconfig")
+    group.addoption('--assertmode', action="store", dest="assertmode",
+                    choices=("on", "old", "off", "default"), default="default",
+                    metavar="on|old|off",
+                    help="""control assertion debugging tools.
+'off' performs no assertion debugging.
+'old' reinterprets the expressions in asserts to glean information.
+'on' (the default) rewrites the assert statements in test modules to provide
+sub-expression results.""")
+    group.addoption('--no-assert', action="store_true", default=False,
+        dest="noassert", help="DEPRECATED equivalent to --assertmode=off")
+    group.addoption('--nomagic', action="store_true", default=False,
+        dest="nomagic", help="DEPRECATED equivalent to --assertmode=off")
+
+class AssertionState:
+    """State for the assertion plugin."""
+
+    def __init__(self, config, mode):
+        self.mode = mode
+        self.trace = config.trace.root.get("assertion")
+
+def pytest_configure(config):
+    warn_about_missing_assertion()
+    mode = config.getvalue("assertmode")
+    if config.getvalue("noassert") or config.getvalue("nomagic"):
+        if mode not in ("off", "default"):
+            raise pytest.UsageError("assertion options conflict")
+        mode = "off"
+    elif mode == "default":
+        mode = "on"
+    if mode != "off":
+        def callbinrepr(op, left, right):
+            hook_result = config.hook.pytest_assertrepr_compare(
+                config=config, op=op, left=left, right=right)
+            for new_expl in hook_result:
+                if new_expl:
+                    return '\n~'.join(new_expl)
+        m = monkeypatch()
+        config._cleanup.append(m.undo)
+        m.setattr(py.builtin.builtins, 'AssertionError',
+                  reinterpret.AssertionError)
+        m.setattr(util, '_reprcompare', callbinrepr)
+    if mode == "on" and rewrite_asserts is None:
+        mode = "old"
+    config._assertstate = AssertionState(config, mode)
+    config._assertstate.trace("configured with mode set to %r" % (mode,))
+
+def _write_pyc(co, source_path):
+    if hasattr(imp, "cache_from_source"):
+        # Handle PEP 3147 pycs.
+        pyc = py.path.local(imp.cache_from_source(str(source_path)))
+        pyc.ensure()
+    else:
+        pyc = source_path + "c"
+    mtime = int(source_path.mtime())
+    fp = pyc.open("wb")
+    try:
+        fp.write(imp.get_magic())
+        fp.write(struct.pack("<l", mtime))
+        marshal.dump(co, fp)
+    finally:
+        fp.close()
+    return pyc
+
+def before_module_import(mod):
+    if mod.config._assertstate.mode != "on":
+        return
+    # Some deep magic: load the source, rewrite the asserts, and write a
+    # fake pyc, so that it'll be loaded when the module is imported.
+    source = mod.fspath.read()
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        # Let this pop up again in the real import.
+        mod.config._assertstate.trace("failed to parse: %r" % (mod.fspath,))
+        return
+    rewrite_asserts(tree)
+    try:
+        co = compile(tree, str(mod.fspath), "exec")
+    except SyntaxError:
+        # It's possible that this error is from some bug in the assertion
+        # rewriting, but I don't know of a fast way to tell.
+        mod.config._assertstate.trace("failed to compile: %r" % (mod.fspath,))
+        return
+    mod._pyc = _write_pyc(co, mod.fspath)
+    mod.config._assertstate.trace("wrote pyc: %r" % (mod._pyc,))
+
+def after_module_import(mod):
+    if not hasattr(mod, "_pyc"):
+        return
+    state = mod.config._assertstate
+    try:
+        mod._pyc.remove()
+    except py.error.ENOENT:
+        state.trace("couldn't find pyc: %r" % (mod._pyc,))
+    else:
+        state.trace("removed pyc: %r" % (mod._pyc,))
+
+def warn_about_missing_assertion():
+    try:
+        assert False
+    except AssertionError:
+        pass
+    else:
+        sys.stderr.write("WARNING: failing tests may report as passing because "
+        "assertions are turned off!  (are you using python -O?)\n")
+
+pytest_assertrepr_compare = util.assertrepr_compare
diff --git a/_pytest/assertion/newinterpret.py b/_pytest/assertion/newinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/newinterpret.py
@@ -0,0 +1,333 @@
+"""
+Find intermediate evalutation results in assert statements through builtin AST.
+This should replace oldinterpret.py eventually.
+"""
+
+import sys
+import ast
+
+import py
+from _pytest.assertion import util
+from _pytest.assertion.reinterpret import BuiltinAssertionError
+
+
+if sys.platform.startswith("java") and sys.version_info < (2, 5, 2):
+    # See http://bugs.jython.org/issue1497
+    _exprs = ("BoolOp", "BinOp", "UnaryOp", "Lambda", "IfExp", "Dict",
+              "ListComp", "GeneratorExp", "Yield", "Compare", "Call",
+              "Repr", "Num", "Str", "Attribute", "Subscript", "Name",
+              "List", "Tuple")
+    _stmts = ("FunctionDef", "ClassDef", "Return", "Delete", "Assign",
+              "AugAssign", "Print", "For", "While", "If", "With", "Raise",
+              "TryExcept", "TryFinally", "Assert", "Import", "ImportFrom",
+              "Exec", "Global", "Expr", "Pass", "Break", "Continue")
+    _expr_nodes = set(getattr(ast, name) for name in _exprs)
+    _stmt_nodes = set(getattr(ast, name) for name in _stmts)
+    def _is_ast_expr(node):
+        return node.__class__ in _expr_nodes
+    def _is_ast_stmt(node):
+        return node.__class__ in _stmt_nodes
+else:
+    def _is_ast_expr(node):
+        return isinstance(node, ast.expr)
+    def _is_ast_stmt(node):
+        return isinstance(node, ast.stmt)
+
+
+class Failure(Exception):
+    """Error found while interpreting AST."""
+
+    def __init__(self, explanation=""):
+        self.cause = sys.exc_info()
+        self.explanation = explanation
+
+
+def interpret(source, frame, should_fail=False):
+    mod = ast.parse(source)
+    visitor = DebugInterpreter(frame)
+    try:
+        visitor.visit(mod)
+    except Failure:
+        failure = sys.exc_info()[1]
+        return getfailure(failure)
+    if should_fail:
+        return ("(assertion failed, but when it was re-run for "
+                "printing intermediate values, it did not fail.  Suggestions: "
+                "compute assert expression before the assert or use --no-assert)")
+
+def run(offending_line, frame=None):
+    if frame is None:
+        frame = py.code.Frame(sys._getframe(1))
+    return interpret(offending_line, frame)
+
+def getfailure(e):
+    explanation = util.format_explanation(e.explanation)
+    value = e.cause[1]
+    if str(value):
+        lines = explanation.split('\n')
+        lines[0] += "  << %s" % (value,)
+        explanation = '\n'.join(lines)
+    text = "%s: %s" % (e.cause[0].__name__, explanation)
+    if text.startswith('AssertionError: assert '):
+        text = text[16:]
+    return text
+
+operator_map = {
+    ast.BitOr : "|",
+    ast.BitXor : "^",
+    ast.BitAnd : "&",
+    ast.LShift : "<<",
+    ast.RShift : ">>",
+    ast.Add : "+",
+    ast.Sub : "-",
+    ast.Mult : "*",
+    ast.Div : "/",
+    ast.FloorDiv : "//",
+    ast.Mod : "%",
+    ast.Eq : "==",
+    ast.NotEq : "!=",
+    ast.Lt : "<",
+    ast.LtE : "<=",
+    ast.Gt : ">",
+    ast.GtE : ">=",
+    ast.Pow : "**",
+    ast.Is : "is",
+    ast.IsNot : "is not",
+    ast.In : "in",
+    ast.NotIn : "not in"
+}
+
+unary_map = {
+    ast.Not : "not %s",
+    ast.Invert : "~%s",
+    ast.USub : "-%s",
+    ast.UAdd : "+%s"
+}
+
+
+class DebugInterpreter(ast.NodeVisitor):
+    """Interpret AST nodes to gleam useful debugging information. """
+
+    def __init__(self, frame):
+        self.frame = frame
+
+    def generic_visit(self, node):
+        # Fallback when we don't have a special implementation.
+        if _is_ast_expr(node):
+            mod = ast.Expression(node)
+            co = self._compile(mod)
+            try:
+                result = self.frame.eval(co)
+            except Exception:
+                raise Failure()
+            explanation = self.frame.repr(result)
+            return explanation, result
+        elif _is_ast_stmt(node):
+            mod = ast.Module([node])
+            co = self._compile(mod, "exec")
+            try:
+                self.frame.exec_(co)
+            except Exception:
+                raise Failure()
+            return None, None
+        else:
+            raise AssertionError("can't handle %s" %(node,))
+
+    def _compile(self, source, mode="eval"):
+        return compile(source, "<assertion interpretation>", mode)
+
+    def visit_Expr(self, expr):
+        return self.visit(expr.value)
+
+    def visit_Module(self, mod):
+        for stmt in mod.body:
+            self.visit(stmt)
+
+    def visit_Name(self, name):
+        explanation, result = self.generic_visit(name)
+        # See if the name is local.
+        source = "%r in locals() is not globals()" % (name.id,)
+        co = self._compile(source)
+        try:
+            local = self.frame.eval(co)
+        except Exception:
+            # have to assume it isn't
+            local = None
+        if local is None or not self.frame.is_true(local):
+            return name.id, result
+        return explanation, result
+
+    def visit_Compare(self, comp):
+        left = comp.left
+        left_explanation, left_result = self.visit(left)
+        for op, next_op in zip(comp.ops, comp.comparators):
+            next_explanation, next_result = self.visit(next_op)
+            op_symbol = operator_map[op.__class__]
+            explanation = "%s %s %s" % (left_explanation, op_symbol,
+                                        next_explanation)
+            source = "__exprinfo_left %s __exprinfo_right" % (op_symbol,)
+            co = self._compile(source)
+            try:
+                result = self.frame.eval(co, __exprinfo_left=left_result,
+                                         __exprinfo_right=next_result)
+            except Exception:
+                raise Failure(explanation)
+            try:
+                if not self.frame.is_true(result):
+                    break
+            except KeyboardInterrupt:
+                raise
+            except:
+                break
+            left_explanation, left_result = next_explanation, next_result
+
+        if util._reprcompare is not None:
+            res = util._reprcompare(op_symbol, left_result, next_result)
+            if res:
+                explanation = res
+        return explanation, result
+
+    def visit_BoolOp(self, boolop):
+        is_or = isinstance(boolop.op, ast.Or)
+        explanations = []
+        for operand in boolop.values:
+            explanation, result = self.visit(operand)
+            explanations.append(explanation)
+            if result == is_or:
+                break
+        name = is_or and " or " or " and "
+        explanation = "(" + name.join(explanations) + ")"
+        return explanation, result
+
+    def visit_UnaryOp(self, unary):
+        pattern = unary_map[unary.op.__class__]
+        operand_explanation, operand_result = self.visit(unary.operand)
+        explanation = pattern % (operand_explanation,)
+        co = self._compile(pattern % ("__exprinfo_expr",))
+        try:
+            result = self.frame.eval(co, __exprinfo_expr=operand_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, result
+
+    def visit_BinOp(self, binop):
+        left_explanation, left_result = self.visit(binop.left)
+        right_explanation, right_result = self.visit(binop.right)
+        symbol = operator_map[binop.op.__class__]
+        explanation = "(%s %s %s)" % (left_explanation, symbol,
+                                      right_explanation)
+        source = "__exprinfo_left %s __exprinfo_right" % (symbol,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, __exprinfo_left=left_result,
+                                     __exprinfo_right=right_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, result
+
+    def visit_Call(self, call):
+        func_explanation, func = self.visit(call.func)
+        arg_explanations = []
+        ns = {"__exprinfo_func" : func}
+        arguments = []
+        for arg in call.args:
+            arg_explanation, arg_result = self.visit(arg)
+            arg_name = "__exprinfo_%s" % (len(ns),)
+            ns[arg_name] = arg_result
+            arguments.append(arg_name)
+            arg_explanations.append(arg_explanation)
+        for keyword in call.keywords:
+            arg_explanation, arg_result = self.visit(keyword.value)
+            arg_name = "__exprinfo_%s" % (len(ns),)
+            ns[arg_name] = arg_result
+            keyword_source = "%s=%%s" % (keyword.arg)
+            arguments.append(keyword_source % (arg_name,))
+            arg_explanations.append(keyword_source % (arg_explanation,))
+        if call.starargs:
+            arg_explanation, arg_result = self.visit(call.starargs)
+            arg_name = "__exprinfo_star"
+            ns[arg_name] = arg_result
+            arguments.append("*%s" % (arg_name,))
+            arg_explanations.append("*%s" % (arg_explanation,))
+        if call.kwargs:
+            arg_explanation, arg_result = self.visit(call.kwargs)
+            arg_name = "__exprinfo_kwds"
+            ns[arg_name] = arg_result
+            arguments.append("**%s" % (arg_name,))
+            arg_explanations.append("**%s" % (arg_explanation,))
+        args_explained = ", ".join(arg_explanations)
+        explanation = "%s(%s)" % (func_explanation, args_explained)
+        args = ", ".join(arguments)
+        source = "__exprinfo_func(%s)" % (args,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, **ns)
+        except Exception:
+            raise Failure(explanation)
+        pattern = "%s\n{%s = %s\n}"
+        rep = self.frame.repr(result)
+        explanation = pattern % (rep, rep, explanation)
+        return explanation, result
+
+    def _is_builtin_name(self, name):
+        pattern = "%r not in globals() and %r not in locals()"
+        source = pattern % (name.id, name.id)
+        co = self._compile(source)
+        try:
+            return self.frame.eval(co)
+        except Exception:
+            return False
+
+    def visit_Attribute(self, attr):
+        if not isinstance(attr.ctx, ast.Load):
+            return self.generic_visit(attr)
+        source_explanation, source_result = self.visit(attr.value)
+        explanation = "%s.%s" % (source_explanation, attr.attr)
+        source = "__exprinfo_expr.%s" % (attr.attr,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, __exprinfo_expr=source_result)
+        except Exception:
+            raise Failure(explanation)
+        explanation = "%s\n{%s = %s.%s\n}" % (self.frame.repr(result),
+                                              self.frame.repr(result),
+                                              source_explanation, attr.attr)
+        # Check if the attr is from an instance.
+        source = "%r in getattr(__exprinfo_expr, '__dict__', {})"
+        source = source % (attr.attr,)
+        co = self._compile(source)
+        try:
+            from_instance = self.frame.eval(co, __exprinfo_expr=source_result)
+        except Exception:
+            from_instance = None
+        if from_instance is None or self.frame.is_true(from_instance):
+            rep = self.frame.repr(result)
+            pattern = "%s\n{%s = %s\n}"
+            explanation = pattern % (rep, rep, explanation)
+        return explanation, result
+
+    def visit_Assert(self, assrt):
+        test_explanation, test_result = self.visit(assrt.test)
+        explanation = "assert %s" % (test_explanation,)
+        if not self.frame.is_true(test_result):
+            try:
+                raise BuiltinAssertionError
+            except Exception:
+                raise Failure(explanation)
+        return explanation, test_result
+
+    def visit_Assign(self, assign):
+        value_explanation, value_result = self.visit(assign.value)
+        explanation = "... = %s" % (value_explanation,)
+        name = ast.Name("__exprinfo_expr", ast.Load(),
+                        lineno=assign.value.lineno,
+                        col_offset=assign.value.col_offset)
+        new_assign = ast.Assign(assign.targets, name, lineno=assign.lineno,
+                                col_offset=assign.col_offset)
+        mod = ast.Module([new_assign])
+        co = self._compile(mod, "exec")
+        try:
+            self.frame.exec_(co, __exprinfo_expr=value_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, value_result
diff --git a/_pytest/assertion/oldinterpret.py b/_pytest/assertion/oldinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/oldinterpret.py
@@ -0,0 +1,552 @@
+import py
+import sys, inspect
+from compiler import parse, ast, pycodegen
+from _pytest.assertion.util import format_explanation
+from _pytest.assertion.reinterpret import BuiltinAssertionError
+
+passthroughex = py.builtin._sysex
+
+class Failure:
+    def __init__(self, node):
+        self.exc, self.value, self.tb = sys.exc_info()
+        self.node = node
+
+class View(object):
+    """View base class.
+
+    If C is a subclass of View, then C(x) creates a proxy object around
+    the object x.  The actual class of the proxy is not C in general,
+    but a *subclass* of C determined by the rules below.  To avoid confusion
+    we call view class the class of the proxy (a subclass of C, so of View)
+    and object class the class of x.
+
+    Attributes and methods not found in the proxy are automatically read on x.
+    Other operations like setting attributes are performed on the proxy, as
+    determined by its view class.  The object x is available from the proxy
+    as its __obj__ attribute.
+
+    The view class selection is determined by the __view__ tuples and the
+    optional __viewkey__ method.  By default, the selected view class is the
+    most specific subclass of C whose __view__ mentions the class of x.
+    If no such subclass is found, the search proceeds with the parent
+    object classes.  For example, C(True) will first look for a subclass
+    of C with __view__ = (..., bool, ...) and only if it doesn't find any
+    look for one with __view__ = (..., int, ...), and then ..., object,...
+    If everything fails the class C itself is considered to be the default.
+
+    Alternatively, the view class selection can be driven by another aspect
+    of the object x, instead of the class of x, by overriding __viewkey__.
+    See last example at the end of this module.
+    """
+
+    _viewcache = {}
+    __view__ = ()
+
+    def __new__(rootclass, obj, *args, **kwds):
+        self = object.__new__(rootclass)
+        self.__obj__ = obj
+        self.__rootclass__ = rootclass
+        key = self.__viewkey__()
+        try:
+            self.__class__ = self._viewcache[key]
+        except KeyError:
+            self.__class__ = self._selectsubclass(key)
+        return self
+
+    def __getattr__(self, attr):
+        # attributes not found in the normal hierarchy rooted on View
+        # are looked up in the object's real class
+        return getattr(self.__obj__, attr)
+
+    def __viewkey__(self):
+        return self.__obj__.__class__
+
+    def __matchkey__(self, key, subclasses):
+        if inspect.isclass(key):
+            keys = inspect.getmro(key)
+        else:
+            keys = [key]
+        for key in keys:
+            result = [C for C in subclasses if key in C.__view__]
+            if result:
+                return result
+        return []
+
+    def _selectsubclass(self, key):
+        subclasses = list(enumsubclasses(self.__rootclass__))
+        for C in subclasses:
+            if not isinstance(C.__view__, tuple):
+                C.__view__ = (C.__view__,)
+        choices = self.__matchkey__(key, subclasses)
+        if not choices:
+            return self.__rootclass__
+        elif len(choices) == 1:
+            return choices[0]
+        else:
+            # combine the multiple choices
+            return type('?', tuple(choices), {})
+
+    def __repr__(self):
+        return '%s(%r)' % (self.__rootclass__.__name__, self.__obj__)
+
+
+def enumsubclasses(cls):
+    for subcls in cls.__subclasses__():
+        for subsubclass in enumsubclasses(subcls):
+            yield subsubclass
+    yield cls
+
+
+class Interpretable(View):
+    """A parse tree node with a few extra methods."""
+    explanation = None
+
+    def is_builtin(self, frame):
+        return False
+
+    def eval(self, frame):
+        # fall-back for unknown expression nodes
+        try:
+            expr = ast.Expression(self.__obj__)
+            expr.filename = '<eval>'
+            self.__obj__.filename = '<eval>'
+            co = pycodegen.ExpressionCodeGenerator(expr).getCode()
+            result = frame.eval(co)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        self.result = result
+        self.explanation = self.explanation or frame.repr(self.result)
+
+    def run(self, frame):
+        # fall-back for unknown statement nodes
+        try:
+            expr = ast.Module(None, ast.Stmt([self.__obj__]))
+            expr.filename = '<run>'
+            co = pycodegen.ModuleCodeGenerator(expr).getCode()
+            frame.exec_(co)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+
+    def nice_explanation(self):
+        return format_explanation(self.explanation)
+
+
+class Name(Interpretable):
+    __view__ = ast.Name
+
+    def is_local(self, frame):
+        source = '%r in locals() is not globals()' % self.name
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def is_global(self, frame):
+        source = '%r in globals()' % self.name
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def is_builtin(self, frame):
+        source = '%r not in locals() and %r not in globals()' % (
+            self.name, self.name)
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def eval(self, frame):
+        super(Name, self).eval(frame)
+        if not self.is_local(frame):
+            self.explanation = self.name
+
+class Compare(Interpretable):
+    __view__ = ast.Compare
+
+    def eval(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        for operation, expr2 in self.ops:
+            if hasattr(self, 'result'):
+                # shortcutting in chained expressions
+                if not frame.is_true(self.result):
+                    break
+            expr2 = Interpretable(expr2)
+            expr2.eval(frame)
+            self.explanation = "%s %s %s" % (
+                expr.explanation, operation, expr2.explanation)
+            source = "__exprinfo_left %s __exprinfo_right" % operation
+            try:
+                self.result = frame.eval(source,
+                                         __exprinfo_left=expr.result,
+                                         __exprinfo_right=expr2.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+            expr = expr2
+
+class And(Interpretable):
+    __view__ = ast.And
+
+    def eval(self, frame):
+        explanations = []
+        for expr in self.nodes:
+            expr = Interpretable(expr)
+            expr.eval(frame)
+            explanations.append(expr.explanation)
+            self.result = expr.result
+            if not frame.is_true(expr.result):
+                break
+        self.explanation = '(' + ' and '.join(explanations) + ')'
+
+class Or(Interpretable):
+    __view__ = ast.Or
+
+    def eval(self, frame):
+        explanations = []
+        for expr in self.nodes:
+            expr = Interpretable(expr)
+            expr.eval(frame)
+            explanations.append(expr.explanation)
+            self.result = expr.result
+            if frame.is_true(expr.result):
+                break
+        self.explanation = '(' + ' or '.join(explanations) + ')'
+
+
+# == Unary operations ==
+keepalive = []
+for astclass, astpattern in {
+    ast.Not    : 'not __exprinfo_expr',
+    ast.Invert : '(~__exprinfo_expr)',
+    }.items():
+
+    class UnaryArith(Interpretable):
+        __view__ = astclass
+
+        def eval(self, frame, astpattern=astpattern):
+            expr = Interpretable(self.expr)
+            expr.eval(frame)
+            self.explanation = astpattern.replace('__exprinfo_expr',
+                                                  expr.explanation)
+            try:
+                self.result = frame.eval(astpattern,
+                                         __exprinfo_expr=expr.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+    keepalive.append(UnaryArith)
+
+# == Binary operations ==
+for astclass, astpattern in {
+    ast.Add    : '(__exprinfo_left + __exprinfo_right)',
+    ast.Sub    : '(__exprinfo_left - __exprinfo_right)',
+    ast.Mul    : '(__exprinfo_left * __exprinfo_right)',
+    ast.Div    : '(__exprinfo_left / __exprinfo_right)',
+    ast.Mod    : '(__exprinfo_left % __exprinfo_right)',
+    ast.Power  : '(__exprinfo_left ** __exprinfo_right)',
+    }.items():
+
+    class BinaryArith(Interpretable):
+        __view__ = astclass
+
+        def eval(self, frame, astpattern=astpattern):
+            left = Interpretable(self.left)
+            left.eval(frame)
+            right = Interpretable(self.right)
+            right.eval(frame)
+            self.explanation = (astpattern
+                                .replace('__exprinfo_left',  left .explanation)
+                                .replace('__exprinfo_right', right.explanation))
+            try:
+                self.result = frame.eval(astpattern,
+                                         __exprinfo_left=left.result,
+                                         __exprinfo_right=right.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+    keepalive.append(BinaryArith)
+
+
+class CallFunc(Interpretable):
+    __view__ = ast.CallFunc
+
+    def is_bool(self, frame):
+        source = 'isinstance(__exprinfo_value, bool)'
+        try:
+            return frame.is_true(frame.eval(source,
+                                            __exprinfo_value=self.result))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def eval(self, frame):
+        node = Interpretable(self.node)
+        node.eval(frame)
+        explanations = []
+        vars = {'__exprinfo_fn': node.result}
+        source = '__exprinfo_fn('
+        for a in self.args:
+            if isinstance(a, ast.Keyword):
+                keyword = a.name
+                a = a.expr
+            else:
+                keyword = None
+            a = Interpretable(a)
+            a.eval(frame)
+            argname = '__exprinfo_%d' % len(vars)
+            vars[argname] = a.result
+            if keyword is None:
+                source += argname + ','
+                explanations.append(a.explanation)
+            else:
+                source += '%s=%s,' % (keyword, argname)
+                explanations.append('%s=%s' % (keyword, a.explanation))
+        if self.star_args:
+            star_args = Interpretable(self.star_args)
+            star_args.eval(frame)
+            argname = '__exprinfo_star'
+            vars[argname] = star_args.result
+            source += '*' + argname + ','
+            explanations.append('*' + star_args.explanation)
+        if self.dstar_args:
+            dstar_args = Interpretable(self.dstar_args)
+            dstar_args.eval(frame)
+            argname = '__exprinfo_kwds'
+            vars[argname] = dstar_args.result
+            source += '**' + argname + ','
+            explanations.append('**' + dstar_args.explanation)
+        self.explanation = "%s(%s)" % (
+            node.explanation, ', '.join(explanations))
+        if source.endswith(','):
+            source = source[:-1]
+        source += ')'
+        try:
+            self.result = frame.eval(source, **vars)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        if not node.is_builtin(frame) or not self.is_bool(frame):
+            r = frame.repr(self.result)
+            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
+
+class Getattr(Interpretable):
+    __view__ = ast.Getattr
+
+    def eval(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        source = '__exprinfo_expr.%s' % self.attrname
+        try:
+            self.result = frame.eval(source, __exprinfo_expr=expr.result)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        self.explanation = '%s.%s' % (expr.explanation, self.attrname)
+        # if the attribute comes from the instance, its value is interesting
+        source = ('hasattr(__exprinfo_expr, "__dict__") and '
+                  '%r in __exprinfo_expr.__dict__' % self.attrname)
+        try:
+            from_instance = frame.is_true(
+                frame.eval(source, __exprinfo_expr=expr.result))
+        except passthroughex:
+            raise
+        except:
+            from_instance = True
+        if from_instance:
+            r = frame.repr(self.result)
+            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
+
+# == Re-interpretation of full statements ==
+
+class Assert(Interpretable):
+    __view__ = ast.Assert
+
+    def run(self, frame):
+        test = Interpretable(self.test)
+        test.eval(frame)
+        # print the result as  'assert <explanation>'
+        self.result = test.result
+        self.explanation = 'assert ' + test.explanation
+        if not frame.is_true(test.result):
+            try:
+                raise BuiltinAssertionError
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+class Assign(Interpretable):
+    __view__ = ast.Assign
+
+    def run(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        self.result = expr.result
+        self.explanation = '... = ' + expr.explanation
+        # fall-back-run the rest of the assignment
+        ass = ast.Assign(self.nodes, ast.Name('__exprinfo_expr'))
+        mod = ast.Module(None, ast.Stmt([ass]))
+        mod.filename = '<run>'
+        co = pycodegen.ModuleCodeGenerator(mod).getCode()
+        try:
+            frame.exec_(co, __exprinfo_expr=expr.result)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+
+class Discard(Interpretable):
+    __view__ = ast.Discard
+
+    def run(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        self.result = expr.result
+        self.explanation = expr.explanation
+
+class Stmt(Interpretable):
+    __view__ = ast.Stmt
+
+    def run(self, frame):
+        for stmt in self.nodes:
+            stmt = Interpretable(stmt)
+            stmt.run(frame)
+
+
+def report_failure(e):
+    explanation = e.node.nice_explanation()
+    if explanation:
+        explanation = ", in: " + explanation
+    else:
+        explanation = ""
+    sys.stdout.write("%s: %s%s\n" % (e.exc.__name__, e.value, explanation))
+
+def check(s, frame=None):
+    if frame is None:
+        frame = sys._getframe(1)
+        frame = py.code.Frame(frame)
+    expr = parse(s, 'eval')
+    assert isinstance(expr, ast.Expression)
+    node = Interpretable(expr.node)
+    try:
+        node.eval(frame)
+    except passthroughex:
+        raise
+    except Failure:
+        e = sys.exc_info()[1]
+        report_failure(e)
+    else:
+        if not frame.is_true(node.result):
+            sys.stderr.write("assertion failed: %s\n" % node.nice_explanation())
+
+
+###########################################################
+# API / Entry points
+# #########################################################
+
+def interpret(source, frame, should_fail=False):
+    module = Interpretable(parse(source, 'exec').node)
+    #print "got module", module
+    if isinstance(frame, py.std.types.FrameType):
+        frame = py.code.Frame(frame)
+    try:
+        module.run(frame)
+    except Failure:
+        e = sys.exc_info()[1]
+        return getfailure(e)
+    except passthroughex:
+        raise
+    except:
+        import traceback
+        traceback.print_exc()
+    if should_fail:
+        return ("(assertion failed, but when it was re-run for "
+                "printing intermediate values, it did not fail.  Suggestions: "
+                "compute assert expression before the assert or use --nomagic)")
+    else:
+        return None
+
+def getmsg(excinfo):
+    if isinstance(excinfo, tuple):
+        excinfo = py.code.ExceptionInfo(excinfo)
+    #frame, line = gettbline(tb)
+    #frame = py.code.Frame(frame)
+    #return interpret(line, frame)
+
+    tb = excinfo.traceback[-1]
+    source = str(tb.statement).strip()
+    x = interpret(source, tb.frame, should_fail=True)
+    if not isinstance(x, str):
+        raise TypeError("interpret returned non-string %r" % (x,))
+    return x
+
+def getfailure(e):
+    explanation = e.node.nice_explanation()
+    if str(e.value):
+        lines = explanation.split('\n')
+        lines[0] += "  << %s" % (e.value,)
+        explanation = '\n'.join(lines)
+    text = "%s: %s" % (e.exc.__name__, explanation)
+    if text.startswith('AssertionError: assert '):
+        text = text[16:]
+    return text
+
+def run(s, frame=None):
+    if frame is None:
+        frame = sys._getframe(1)
+        frame = py.code.Frame(frame)
+    module = Interpretable(parse(s, 'exec').node)
+    try:
+        module.run(frame)
+    except Failure:
+        e = sys.exc_info()[1]
+        report_failure(e)
+
+
+if __name__ == '__main__':
+    # example:
+    def f():
+        return 5
+    def g():
+        return 3
+    def h(x):
+        return 'never'
+    check("f() * g() == 5")
+    check("not f()")
+    check("not (f() and g() or 0)")
+    check("f() == g()")
+    i = 4
+    check("i == f()")
+    check("len(f()) == 0")
+    check("isinstance(2+3+4, float)")
+
+    run("x = i")
+    check("x == 5")
+
+    run("assert not f(), 'oops'")
+    run("a, b, c = 1, 2")
+    run("a, b, c = f()")
+
+    check("max([f(),g()]) == 4")
+    check("'hello'[g()] == 'h'")
+    run("'guk%d' % h(f())")
diff --git a/_pytest/assertion/reinterpret.py b/_pytest/assertion/reinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/reinterpret.py
@@ -0,0 +1,48 @@
+import sys
+import py
+
+BuiltinAssertionError = py.builtin.builtins.AssertionError
+
+class AssertionError(BuiltinAssertionError):
+    def __init__(self, *args):
+        BuiltinAssertionError.__init__(self, *args)
+        if args:
+            try:
+                self.msg = str(args[0])
+            except py.builtin._sysex:
+                raise
+            except:
+                self.msg = "<[broken __repr__] %s at %0xd>" %(
+                    args[0].__class__, id(args[0]))
+        else:
+            f = py.code.Frame(sys._getframe(1))
+            try:
+                source = f.code.fullsource
+                if source is not None:
+                    try:
+                        source = source.getstatement(f.lineno, assertion=True)
+                    except IndexError:
+                        source = None
+                    else:
+                        source = str(source.deindent()).strip()
+            except py.error.ENOENT:
+                source = None
+                # this can also occur during reinterpretation, when the
+                # co_filename is set to "<run>".
+            if source:
+                self.msg = reinterpret(source, f, should_fail=True)
+            else:
+                self.msg = "<could not determine information>"
+            if not self.args:
+                self.args = (self.msg,)
+
+if sys.version_info > (3, 0):
+    AssertionError.__module__ = "builtins"
+    reinterpret_old = "old reinterpretation not available for py3"
+else:
+    from _pytest.assertion.oldinterpret import interpret as reinterpret_old
+if sys.version_info >= (2, 6) or (sys.platform.startswith("java")):
+    from _pytest.assertion.newinterpret import interpret as reinterpret
+else:
+    reinterpret = reinterpret_old
+
diff --git a/_pytest/assertion/rewrite.py b/_pytest/assertion/rewrite.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/rewrite.py
@@ -0,0 +1,340 @@
+"""Rewrite assertion AST to produce nice error messages"""
+
+import ast
+import collections
+import itertools
+import sys
+
+import py
+from _pytest.assertion import util
+
+
+def rewrite_asserts(mod):
+    """Rewrite the assert statements in mod."""
+    AssertionRewriter().run(mod)
+
+
+_saferepr = py.io.saferepr
+from _pytest.assertion.util import format_explanation as _format_explanation
+
+def _format_boolop(operands, explanations, is_or):
+    show_explanations = []
+    for operand, expl in zip(operands, explanations):
+        show_explanations.append(expl)
+        if operand == is_or:
+            break
+    return "(" + (is_or and " or " or " and ").join(show_explanations) + ")"
+
+def _call_reprcompare(ops, results, expls, each_obj):
+    for i, res, expl in zip(range(len(ops)), results, expls):
+        try:
+            done = not res
+        except Exception:
+            done = True
+        if done:
+            break
+    if util._reprcompare is not None:
+        custom = util._reprcompare(ops[i], each_obj[i], each_obj[i + 1])
+        if custom is not None:
+            return custom
+    return expl
+
+
+unary_map = {
+    ast.Not : "not %s",
+    ast.Invert : "~%s",
+    ast.USub : "-%s",
+    ast.UAdd : "+%s"
+}
+
+binop_map = {
+    ast.BitOr : "|",
+    ast.BitXor : "^",
+    ast.BitAnd : "&",
+    ast.LShift : "<<",
+    ast.RShift : ">>",
+    ast.Add : "+",
+    ast.Sub : "-",
+    ast.Mult : "*",
+    ast.Div : "/",
+    ast.FloorDiv : "//",
+    ast.Mod : "%",
+    ast.Eq : "==",
+    ast.NotEq : "!=",
+    ast.Lt : "<",
+    ast.LtE : "<=",
+    ast.Gt : ">",
+    ast.GtE : ">=",
+    ast.Pow : "**",
+    ast.Is : "is",
+    ast.IsNot : "is not",
+    ast.In : "in",
+    ast.NotIn : "not in"
+}
+
+
+def set_location(node, lineno, col_offset):
+    """Set node location information recursively."""
+    def _fix(node, lineno, col_offset):
+        if "lineno" in node._attributes:
+            node.lineno = lineno
+        if "col_offset" in node._attributes:
+            node.col_offset = col_offset
+        for child in ast.iter_child_nodes(node):
+            _fix(child, lineno, col_offset)
+    _fix(node, lineno, col_offset)
+    return node
+
+
+class AssertionRewriter(ast.NodeVisitor):
+
+    def run(self, mod):
+        """Find all assert statements in *mod* and rewrite them."""
+        if not mod.body:
+            # Nothing to do.
+            return
+        # Insert some special imports at the top of the module but after any
+        # docstrings and __future__ imports.
+        aliases = [ast.alias(py.builtin.builtins.__name__, "@py_builtins"),
+                   ast.alias("_pytest.assertion.rewrite", "@pytest_ar")]
+        expect_docstring = True
+        pos = 0
+        lineno = 0
+        for item in mod.body:
+            if (expect_docstring and isinstance(item, ast.Expr) and
+                isinstance(item.value, ast.Str)):
+                doc = item.value.s
+                if "PYTEST_DONT_REWRITE" in doc:
+                    # The module has disabled assertion rewriting.
+                    return
+                lineno += len(doc) - 1
+                expect_docstring = False
+            elif (not isinstance(item, ast.ImportFrom) or item.level > 0 and
+                  item.identifier != "__future__"):
+                lineno = item.lineno
+                break
+            pos += 1
+        imports = [ast.Import([alias], lineno=lineno, col_offset=0)
+                   for alias in aliases]
+        mod.body[pos:pos] = imports
+        # Collect asserts.
+        nodes = collections.deque([mod])
+        while nodes:
+            node = nodes.popleft()
+            for name, field in ast.iter_fields(node):
+                if isinstance(field, list):
+                    new = []
+                    for i, child in enumerate(field):
+                        if isinstance(child, ast.Assert):
+                            # Transform assert.
+                            new.extend(self.visit(child))
+                        else:
+                            new.append(child)
+                            if isinstance(child, ast.AST):
+                                nodes.append(child)
+                    setattr(node, name, new)
+                elif (isinstance(field, ast.AST) and
+                      # Don't recurse into expressions as they can't contain
+                      # asserts.
+                      not isinstance(field, ast.expr)):
+                    nodes.append(field)
+
+    def variable(self):
+        """Get a new variable."""
+        # Use a character invalid in python identifiers to avoid clashing.
+        name = "@py_assert" + str(next(self.variable_counter))
+        self.variables.add(name)
+        return name
+
+    def assign(self, expr):
+        """Give *expr* a name."""
+        name = self.variable()
+        self.statements.append(ast.Assign([ast.Name(name, ast.Store())], expr))
+        return ast.Name(name, ast.Load())
+
+    def display(self, expr):
+        """Call py.io.saferepr on the expression."""
+        return self.helper("saferepr", expr)
+
+    def helper(self, name, *args):
+        """Call a helper in this module."""
+        py_name = ast.Name("@pytest_ar", ast.Load())
+        attr = ast.Attribute(py_name, "_" + name, ast.Load())
+        return ast.Call(attr, list(args), [], None, None)
+
+    def builtin(self, name):
+        """Return the builtin called *name*."""
+        builtin_name = ast.Name("@py_builtins", ast.Load())
+        return ast.Attribute(builtin_name, name, ast.Load())
+
+    def explanation_param(self, expr):
+        specifier = "py" + str(next(self.variable_counter))
+        self.explanation_specifiers[specifier] = expr
+        return "%(" + specifier + ")s"
+
+    def push_format_context(self):
+        self.explanation_specifiers = {}
+        self.stack.append(self.explanation_specifiers)
+
+    def pop_format_context(self, expl_expr):
+        current = self.stack.pop()
+        if self.stack:
+            self.explanation_specifiers = self.stack[-1]
+        keys = [ast.Str(key) for key in current.keys()]
+        format_dict = ast.Dict(keys, list(current.values()))
+        form = ast.BinOp(expl_expr, ast.Mod(), format_dict)
+        name = "@py_format" + str(next(self.variable_counter))
+        self.on_failure.append(ast.Assign([ast.Name(name, ast.Store())], form))
+        return ast.Name(name, ast.Load())
+
+    def generic_visit(self, node):
+        """Handle expressions we don't have custom code for."""
+        assert isinstance(node, ast.expr)
+        res = self.assign(node)
+        return res, self.explanation_param(self.display(res))
+
+    def visit_Assert(self, assert_):
+        if assert_.msg:
+            # There's already a message. Don't mess with it.
+            return [assert_]
+        self.statements = []
+        self.variables = set()
+        self.variable_counter = itertools.count()
+        self.stack = []
+        self.on_failure = []
+        self.push_format_context()
+        # Rewrite assert into a bunch of statements.
+        top_condition, explanation = self.visit(assert_.test)
+        # Create failure message.
+        body = self.on_failure
+        negation = ast.UnaryOp(ast.Not(), top_condition)
+        self.statements.append(ast.If(negation, body, []))
+        explanation = "assert " + explanation
+        template = ast.Str(explanation)
+        msg = self.pop_format_context(template)
+        fmt = self.helper("format_explanation", msg)
+        err_name = ast.Name("AssertionError", ast.Load())
+        exc = ast.Call(err_name, [fmt], [], None, None)
+        if sys.version_info[0] >= 3:
+            raise_ = ast.Raise(exc, None)
+        else:
+            raise_ = ast.Raise(exc, None, None)
+        body.append(raise_)
+        # Delete temporary variables.
+        names = [ast.Name(name, ast.Del()) for name in self.variables]
+        if names:
+            delete = ast.Delete(names)
+            self.statements.append(delete)
+        # Fix line numbers.
+        for stmt in self.statements:
+            set_location(stmt, assert_.lineno, assert_.col_offset)
+        return self.statements
+
+    def visit_Name(self, name):
+        # Check if the name is local or not.
+        locs = ast.Call(self.builtin("locals"), [], [], None, None)
+        globs = ast.Call(self.builtin("globals"), [], [], None, None)
+        ops = [ast.In(), ast.IsNot()]
+        test = ast.Compare(ast.Str(name.id), ops, [locs, globs])
+        expr = ast.IfExp(test, self.display(name), ast.Str(name.id))
+        return name, self.explanation_param(expr)
+
+    def visit_BoolOp(self, boolop):
+        operands = []
+        explanations = []
+        self.push_format_context()
+        for operand in boolop.values:
+            res, explanation = self.visit(operand)
+            operands.append(res)
+            explanations.append(explanation)
+        expls = ast.Tuple([ast.Str(expl) for expl in explanations], ast.Load())
+        is_or = ast.Num(isinstance(boolop.op, ast.Or))
+        expl_template = self.helper("format_boolop",
+                                    ast.Tuple(operands, ast.Load()), expls,
+                                    is_or)
+        expl = self.pop_format_context(expl_template)
+        res = self.assign(ast.BoolOp(boolop.op, operands))
+        return res, self.explanation_param(expl)
+
+    def visit_UnaryOp(self, unary):
+        pattern = unary_map[unary.op.__class__]
+        operand_res, operand_expl = self.visit(unary.operand)
+        res = self.assign(ast.UnaryOp(unary.op, operand_res))
+        return res, pattern % (operand_expl,)
+
+    def visit_BinOp(self, binop):
+        symbol = binop_map[binop.op.__class__]
+        left_expr, left_expl = self.visit(binop.left)
+        right_expr, right_expl = self.visit(binop.right)
+        explanation = "(%s %s %s)" % (left_expl, symbol, right_expl)
+        res = self.assign(ast.BinOp(left_expr, binop.op, right_expr))
+        return res, explanation
+
+    def visit_Call(self, call):
+        new_func, func_expl = self.visit(call.func)
+        arg_expls = []
+        new_args = []
+        new_kwargs = []
+        new_star = new_kwarg = None
+        for arg in call.args:
+            res, expl = self.visit(arg)
+            new_args.append(res)
+            arg_expls.append(expl)
+        for keyword in call.keywords:
+            res, expl = self.visit(keyword.value)
+            new_kwargs.append(ast.keyword(keyword.arg, res))
+            arg_expls.append(keyword.arg + "=" + expl)
+        if call.starargs:
+            new_star, expl = self.visit(call.starargs)
+            arg_expls.append("*" + expl)
+        if call.kwargs:
+            new_kwarg, expl = self.visit(call.kwarg)
+            arg_expls.append("**" + expl)
+        expl = "%s(%s)" % (func_expl, ', '.join(arg_expls))
+        new_call = ast.Call(new_func, new_args, new_kwargs, new_star, new_kwarg)
+        res = self.assign(new_call)
+        res_expl = self.explanation_param(self.display(res))
+        outer_expl = "%s\n{%s = %s\n}" % (res_expl, res_expl, expl)
+        return res, outer_expl
+
+    def visit_Attribute(self, attr):
+        if not isinstance(attr.ctx, ast.Load):
+            return self.generic_visit(attr)
+        value, value_expl = self.visit(attr.value)
+        res = self.assign(ast.Attribute(value, attr.attr, ast.Load()))
+        res_expl = self.explanation_param(self.display(res))
+        pat = "%s\n{%s = %s.%s\n}"
+        expl = pat % (res_expl, res_expl, value_expl, attr.attr)
+        return res, expl
+
+    def visit_Compare(self, comp):
+        self.push_format_context()
+        left_res, left_expl = self.visit(comp.left)
+        res_variables = [self.variable() for i in range(len(comp.ops))]
+        load_names = [ast.Name(v, ast.Load()) for v in res_variables]
+        store_names = [ast.Name(v, ast.Store()) for v in res_variables]
+        it = zip(range(len(comp.ops)), comp.ops, comp.comparators)
+        expls = []
+        syms = []
+        results = [left_res]
+        for i, op, next_operand in it:
+            next_res, next_expl = self.visit(next_operand)
+            results.append(next_res)
+            sym = binop_map[op.__class__]
+            syms.append(ast.Str(sym))
+            expl = "%s %s %s" % (left_expl, sym, next_expl)
+            expls.append(ast.Str(expl))
+            res_expr = ast.Compare(left_res, [op], [next_res])
+            self.statements.append(ast.Assign([store_names[i]], res_expr))
+            left_res, left_expl = next_res, next_expl
+        # Use py.code._reprcompare if that's available.
+        expl_call = self.helper("call_reprcompare",
+                                ast.Tuple(syms, ast.Load()),
+                                ast.Tuple(load_names, ast.Load()),
+                                ast.Tuple(expls, ast.Load()),
+                                ast.Tuple(results, ast.Load()))
+        if len(comp.ops) > 1:
+            res = ast.BoolOp(ast.And(), load_names)
+        else:
+            res = load_names[0]
+        return res, self.explanation_param(self.pop_format_context(expl_call))
diff --git a/_pytest/assertion/util.py b/_pytest/assertion/util.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/util.py
@@ -0,0 +1,213 @@
+"""Utilities for assertion debugging"""
+
+import py
+
+
+# The _reprcompare attribute on the util module is used by the new assertion
+# interpretation code and assertion rewriter to detect this plugin was
+# loaded and in turn call the hooks defined here as part of the
+# DebugInterpreter.
+_reprcompare = None
+
+def format_explanation(explanation):
+    """This formats an explanation
+
+    Normally all embedded newlines are escaped, however there are
+    three exceptions: \n{, \n} and \n~.  The first two are intended
+    cover nested explanations, see function and attribute explanations
+    for examples (.visit_Call(), visit_Attribute()).  The last one is
+    for when one explanation needs to span multiple lines, e.g. when
+    displaying diffs.
+    """
+    # simplify 'assert False where False = ...'
+    where = 0
+    while True:
+        start = where = explanation.find("False\n{False = ", where)
+        if where == -1:
+            break
+        level = 0
+        for i, c in enumerate(explanation[start:]):
+            if c == "{":
+                level += 1
+            elif c == "}":
+                level -= 1
+                if not level:
+                    break
+        else:
+            raise AssertionError("unbalanced braces: %r" % (explanation,))
+        end = start + i
+        where = end
+        if explanation[end - 1] == '\n':
+            explanation = (explanation[:start] + explanation[start+15:end-1] +
+                           explanation[end+1:])
+            where -= 17
+    raw_lines = (explanation or '').split('\n')
+    # escape newlines not followed by {, } and ~
+    lines = [raw_lines[0]]
+    for l in raw_lines[1:]:
+        if l.startswith('{') or l.startswith('}') or l.startswith('~'):
+            lines.append(l)
+        else:
+            lines[-1] += '\\n' + l
+
+    result = lines[:1]
+    stack = [0]
+    stackcnt = [0]
+    for line in lines[1:]:
+        if line.startswith('{'):
+            if stackcnt[-1]:
+                s = 'and   '
+            else:
+                s = 'where '
+            stack.append(len(result))
+            stackcnt[-1] += 1
+            stackcnt.append(0)
+            result.append(' +' + '  '*(len(stack)-1) + s + line[1:])
+        elif line.startswith('}'):
+            assert line.startswith('}')
+            stack.pop()
+            stackcnt.pop()
+            result[stack[-1]] += line[1:]
+        else:
+            assert line.startswith('~')
+            result.append('  '*len(stack) + line[1:])
+    assert len(stack) == 1
+    return '\n'.join(result)
+
+
+# Provide basestring in python3
+try:
+    basestring = basestring
+except NameError:
+    basestring = str
+
+
+def assertrepr_compare(op, left, right):
+    """return specialised explanations for some operators/operands"""
+    width = 80 - 15 - len(op) - 2 # 15 chars indentation, 1 space around op
+    left_repr = py.io.saferepr(left, maxsize=int(width/2))
+    right_repr = py.io.saferepr(right, maxsize=width-len(left_repr))
+    summary = '%s %s %s' % (left_repr, op, right_repr)
+
+    issequence = lambda x: isinstance(x, (list, tuple))
+    istext = lambda x: isinstance(x, basestring)
+    isdict = lambda x: isinstance(x, dict)
+    isset = lambda x: isinstance(x, set)
+
+    explanation = None
+    try:
+        if op == '==':
+            if istext(left) and istext(right):
+                explanation = _diff_text(left, right)
+            elif issequence(left) and issequence(right):
+                explanation = _compare_eq_sequence(left, right)
+            elif isset(left) and isset(right):
+                explanation = _compare_eq_set(left, right)
+            elif isdict(left) and isdict(right):
+                explanation = _diff_text(py.std.pprint.pformat(left),
+                                         py.std.pprint.pformat(right))
+        elif op == 'not in':
+            if istext(left) and istext(right):
+                explanation = _notin_text(left, right)
+    except py.builtin._sysex:
+        raise
+    except:
+        excinfo = py.code.ExceptionInfo()
+        explanation = ['(pytest_assertion plugin: representation of '
+            'details failed. Probably an object has a faulty __repr__.)',
+            str(excinfo)
+            ]
+
+
+    if not explanation:
+        return None
+
+    # Don't include pageloads of data, should be configurable
+    if len(''.join(explanation)) > 80*8:
+        explanation = ['Detailed information too verbose, truncated']
+
+    return [summary] + explanation
+
+
+def _diff_text(left, right):
+    """Return the explanation for the diff between text
+
+    This will skip leading and trailing characters which are
+    identical to keep the diff minimal.
+    """
+    explanation = []
+    i = 0 # just in case left or right has zero length
+    for i in range(min(len(left), len(right))):
+        if left[i] != right[i]:
+            break
+    if i > 42:
+        i -= 10                 # Provide some context
+        explanation = ['Skipping %s identical '
+                       'leading characters in diff' % i]
+        left = left[i:]
+        right = right[i:]
+    if len(left) == len(right):
+        for i in range(len(left)):
+            if left[-i] != right[-i]:
+                break
+        if i > 42:
+            i -= 10     # Provide some context
+            explanation += ['Skipping %s identical '
+                            'trailing characters in diff' % i]
+            left = left[:-i]
+            right = right[:-i]
+    explanation += [line.strip('\n')
+                    for line in py.std.difflib.ndiff(left.splitlines(),
+                                                     right.splitlines())]
+    return explanation
+
+
+def _compare_eq_sequence(left, right):
+    explanation = []
+    for i in range(min(len(left), len(right))):
+        if left[i] != right[i]:
+            explanation += ['At index %s diff: %r != %r' %
+                            (i, left[i], right[i])]
+            break
+    if len(left) > len(right):
+        explanation += ['Left contains more items, '
+            'first extra item: %s' % py.io.saferepr(left[len(right)],)]
+    elif len(left) < len(right):
+        explanation += ['Right contains more items, '
+            'first extra item: %s' % py.io.saferepr(right[len(left)],)]
+    return explanation # + _diff_text(py.std.pprint.pformat(left),
+                       #             py.std.pprint.pformat(right))
+
+
+def _compare_eq_set(left, right):
+    explanation = []
+    diff_left = left - right
+    diff_right = right - left
+    if diff_left:
+        explanation.append('Extra items in the left set:')
+        for item in diff_left:
+            explanation.append(py.io.saferepr(item))
+    if diff_right:
+        explanation.append('Extra items in the right set:')
+        for item in diff_right:
+            explanation.append(py.io.saferepr(item))
+    return explanation
+
+
+def _notin_text(term, text):
+    index = text.find(term)
+    head = text[:index]
+    tail = text[index+len(term):]
+    correct_text = head + tail
+    diff = _diff_text(correct_text, text)
+    newdiff = ['%s is contained here:' % py.io.saferepr(term, maxsize=42)]
+    for line in diff:
+        if line.startswith('Skipping'):
+            continue
+        if line.startswith('- '):
+            continue
+        if line.startswith('+ '):
+            newdiff.append('  ' + line[2:])
+        else:
+            newdiff.append(line)
+    return newdiff
diff --git a/_pytest/doctest.py b/_pytest/doctest.py
--- a/_pytest/doctest.py
+++ b/_pytest/doctest.py
@@ -59,7 +59,7 @@
                 inner_excinfo = py.code.ExceptionInfo(excinfo.value.exc_info)
                 lines += ["UNEXPECTED EXCEPTION: %s" %
                             repr(inner_excinfo.value)]
-
+                lines += py.std.traceback.format_exception(*excinfo.value.exc_info)
             return ReprFailDoctest(reprlocation, lines)
         else:
             return super(DoctestItem, self).repr_failure(excinfo)
diff --git a/_pytest/helpconfig.py b/_pytest/helpconfig.py
--- a/_pytest/helpconfig.py
+++ b/_pytest/helpconfig.py
@@ -16,9 +16,6 @@
     group.addoption('--traceconfig',
                action="store_true", dest="traceconfig", default=False,
                help="trace considerations of conftest.py files."),
-    group._addoption('--nomagic',
-               action="store_true", dest="nomagic", default=False,
-               help="don't reinterpret asserts, no traceback cutting. ")
     group.addoption('--debug',
                action="store_true", dest="debug", default=False,
                help="generate and show internal debugging information.")
diff --git a/_pytest/junitxml.py b/_pytest/junitxml.py
--- a/_pytest/junitxml.py
+++ b/_pytest/junitxml.py
@@ -65,7 +65,8 @@
 
 class LogXML(object):
     def __init__(self, logfile, prefix):
-        self.logfile = logfile
+        logfile = os.path.expanduser(os.path.expandvars(logfile))
+        self.logfile = os.path.normpath(logfile)
         self.prefix = prefix
         self.test_logs = []
         self.passed = self.skipped = 0
@@ -76,7 +77,7 @@
         names = report.nodeid.split("::")
         names[0] = names[0].replace("/", '.')
         names = tuple(names)
-        d = {'time': self._durations.pop(names, "0")}
+        d = {'time': self._durations.pop(report.nodeid, "0")}
         names = [x.replace(".py", "") for x in names if x != "()"]
         classnames = names[:-1]
         if self.prefix:
@@ -170,12 +171,11 @@
             self.append_skipped(report)
 
     def pytest_runtest_call(self, item, __multicall__):
-        names = tuple(item.listnames())
         start = time.time()
         try:
             return __multicall__.execute()
         finally:
-            self._durations[names] = time.time() - start
+            self._durations[item.nodeid] = time.time() - start
 
     def pytest_collectreport(self, report):
         if not report.passed:
diff --git a/_pytest/main.py b/_pytest/main.py
--- a/_pytest/main.py
+++ b/_pytest/main.py
@@ -46,23 +46,25 @@
 
 
 def pytest_namespace():
-    return dict(collect=dict(Item=Item, Collector=Collector, File=File))
+    collect = dict(Item=Item, Collector=Collector, File=File, Session=Session)
+    return dict(collect=collect)
         
 def pytest_configure(config):
     py.test.config = config # compatibiltiy
     if config.option.exitfirst:
         config.option.maxfail = 1
 
-def pytest_cmdline_main(config):
-    """ default command line protocol for initialization, session,
-    running tests and reporting. """
+def wrap_session(config, doit):
+    """Skeleton command line program"""
     session = Session(config)
     session.exitstatus = EXIT_OK
+    initstate = 0
     try:
         config.pluginmanager.do_configure(config)
+        initstate = 1
         config.hook.pytest_sessionstart(session=session)
-        config.hook.pytest_collection(session=session)
-        config.hook.pytest_runtestloop(session=session)
+        initstate = 2
+        doit(config, session)
     except pytest.UsageError:
         raise
     except KeyboardInterrupt:
@@ -77,18 +79,24 @@
             sys.stderr.write("mainloop: caught Spurious SystemExit!\n")
     if not session.exitstatus and session._testsfailed:
         session.exitstatus = EXIT_TESTSFAILED
-    config.hook.pytest_sessionfinish(session=session,
-        exitstatus=session.exitstatus)
-    config.pluginmanager.do_unconfigure(config)
+    if initstate >= 2:
+        config.hook.pytest_sessionfinish(session=session,
+            exitstatus=session.exitstatus)
+    if initstate >= 1:
+        config.pluginmanager.do_unconfigure(config)
     return session.exitstatus
 
+def pytest_cmdline_main(config):
+    return wrap_session(config, _main)
+
+def _main(config, session):
+    """ default command line protocol for initialization, session,
+    running tests and reporting. """
+    config.hook.pytest_collection(session=session)
+    config.hook.pytest_runtestloop(session=session)
+
 def pytest_collection(session):
-    session.perform_collect()
-    hook = session.config.hook
-    hook.pytest_collection_modifyitems(session=session,
-        config=session.config, items=session.items)
-    hook.pytest_collection_finish(session=session)
-    return True
+    return session.perform_collect()
 
 def pytest_runtestloop(session):
     if session.config.option.collectonly:
@@ -374,6 +382,16 @@
         return HookProxy(fspath, self.config)
 
     def perform_collect(self, args=None, genitems=True):
+        hook = self.config.hook
+        try:
+            items = self._perform_collect(args, genitems)
+            hook.pytest_collection_modifyitems(session=self,
+                config=self.config, items=items)
+        finally:
+            hook.pytest_collection_finish(session=self)
+        return items
+
+    def _perform_collect(self, args, genitems):
         if args is None:
             args = self.config.args
         self.trace("perform_collect", self, args)
diff --git a/_pytest/mark.py b/_pytest/mark.py
--- a/_pytest/mark.py
+++ b/_pytest/mark.py
@@ -153,7 +153,7 @@
 
     def __repr__(self):
         return "<MarkInfo %r args=%r kwargs=%r>" % (
-                self._name, self.args, self.kwargs)
+                self.name, self.args, self.kwargs)
 
 def pytest_itemcollected(item):
     if not isinstance(item, pytest.Function):
diff --git a/_pytest/pytester.py b/_pytest/pytester.py
--- a/_pytest/pytester.py
+++ b/_pytest/pytester.py
@@ -6,7 +6,7 @@
 import inspect
 import time
 from fnmatch import fnmatch
-from _pytest.main import Session
+from _pytest.main import Session, EXIT_OK
 from py.builtin import print_
 from _pytest.core import HookRelay
 
@@ -292,13 +292,19 @@
         assert '::' not in str(arg)
         p = py.path.local(arg)
         x = session.fspath.bestrelpath(p)
-        return session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionstart(session=session)
+        res = session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
+        return res
 
     def getpathnode(self, path):
-        config = self.parseconfig(path)
+        config = self.parseconfigure(path)
         session = Session(config)
         x = session.fspath.bestrelpath(path)
-        return session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionstart(session=session)
+        res = session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
+        return res
 
     def genitems(self, colitems):
         session = colitems[0].session
@@ -312,7 +318,9 @@
         config = self.parseconfigure(*args)
         rec = self.getreportrecorder(config)
         session = Session(config)
+        config.hook.pytest_sessionstart(session=session)
         session.perform_collect()
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
         return session.items, rec
 
     def runitem(self, source):
@@ -382,6 +390,8 @@
             c.basetemp = py.path.local.make_numbered_dir(prefix="reparse",
                 keep=0, rootdir=self.tmpdir, lock_timeout=None)
             c.parse(args)
+            c.pluginmanager.do_configure(c)
+            self.request.addfinalizer(lambda: c.pluginmanager.do_unconfigure(c))
             return c
         finally:
             py.test.config = oldconfig
diff --git a/_pytest/python.py b/_pytest/python.py
--- a/_pytest/python.py
+++ b/_pytest/python.py
@@ -226,8 +226,13 @@
 
     def _importtestmodule(self):
         # we assume we are only called once per module
+        from _pytest import assertion
+        assertion.before_module_import(self)
         try:
-            mod = self.fspath.pyimport(ensuresyspath=True)
+            try:
+                mod = self.fspath.pyimport(ensuresyspath=True)
+            finally:
+                assertion.after_module_import(self)
         except SyntaxError:
             excinfo = py.code.ExceptionInfo()
             raise self.CollectError(excinfo.getrepr(style="short"))
@@ -374,7 +379,7 @@
         # test generators are seen as collectors but they also
         # invoke setup/teardown on popular request
         # (induced by the common "test_*" naming shared with normal tests)
-        self.config._setupstate.prepare(self)
+        self.session._setupstate.prepare(self)
         # see FunctionMixin.setup and test_setupstate_is_preserved_134
         self._preservedparent = self.parent.obj
         l = []
@@ -721,7 +726,7 @@
 
     def _addfinalizer(self, finalizer, scope):
         colitem = self._getscopeitem(scope)
-        self.config._setupstate.addfinalizer(
+        self._pyfuncitem.session._setupstate.addfinalizer(
             finalizer=finalizer, colitem=colitem)
 
     def __repr__(self):
@@ -742,8 +747,10 @@
         raise self.LookupError(msg)
 
 def showfuncargs(config):
-    from _pytest.main import Session
-    session = Session(config)
+    from _pytest.main import wrap_session
+    return wrap_session(config, _showfuncargs_main)
+
+def _showfuncargs_main(config, session):
     session.perform_collect()
     if session.items:
         plugins = session.items[0].getplugins()
diff --git a/_pytest/runner.py b/_pytest/runner.py
--- a/_pytest/runner.py
+++ b/_pytest/runner.py
@@ -14,17 +14,15 @@
 #
 # pytest plugin hooks
 
-# XXX move to pytest_sessionstart and fix py.test owns tests
-def pytest_configure(config):
-    config._setupstate = SetupState()
+def pytest_sessionstart(session):
+    session._setupstate = SetupState()
 
 def pytest_sessionfinish(session, exitstatus):
-    if hasattr(session.config, '_setupstate'):
-        hook = session.config.hook
-        rep = hook.pytest__teardown_final(session=session)
-        if rep:
-            hook.pytest__teardown_final_logerror(session=session, report=rep)
-            session.exitstatus = 1
+    hook = session.config.hook
+    rep = hook.pytest__teardown_final(session=session)
+    if rep:
+        hook.pytest__teardown_final_logerror(session=session, report=rep)
+        session.exitstatus = 1
 
 class NodeInfo:
     def __init__(self, location):
@@ -46,16 +44,16 @@
     return reports
 
 def pytest_runtest_setup(item):
-    item.config._setupstate.prepare(item)
+    item.session._setupstate.prepare(item)
 
 def pytest_runtest_call(item):
     item.runtest()
 
 def pytest_runtest_teardown(item):
-    item.config._setupstate.teardown_exact(item)
+    item.session._setupstate.teardown_exact(item)
 
 def pytest__teardown_final(session):
-    call = CallInfo(session.config._setupstate.teardown_all, when="teardown")
+    call = CallInfo(session._setupstate.teardown_all, when="teardown")
     if call.excinfo:
         ntraceback = call.excinfo.traceback .cut(excludepath=py._pydir)
         call.excinfo.traceback = ntraceback.filter()
diff --git a/lib-python/TODO b/lib-python/TODO
deleted file mode 100644
--- a/lib-python/TODO
+++ /dev/null
@@ -1,100 +0,0 @@
-TODO list for 2.7.0
-===================
-
-You can find the results of the most recent buildbot run at:
-http://buildbot.pypy.org/
-
-
-Probably easy tasks
--------------------
-
-- (unicode|bytearray).(index|find) should accept None as indices (see
-  test_unicode.py)
-
-- missing posix.confstr and posix.confstr_names
-
-- remove code duplication: bit_length() and _count_bits() in rlib/rbigint.py,
-  objspace/std/longobject.py and objspace/std/longtype.py.
-
-- missing module pyexpat.errors
-
-- support for PYTHONIOENCODING, this needs a way to update file.encoding
-
-- implement format__Complex_ANY() in pypy/objspace/std/complexobject.py
-
-- Code like this does not work, for two reasons::
-
-  \
-  from __future__ import (with_statement,
-                          unicode_literals)
-  assert type("") is unicode
-
-- Code like::
-
-  assert(x is not None, "error message")
-
-  should emit a SyntaxWarning when compiled (the tuple is always true)
-
-
-Medium tasks
-------------
-
-- socket module has a couple of changes (including AF_TIPC packet range)
-
-Longer tasks
-------------
-
-- Fix usage of __cmp__ in subclasses::
-
-    class badint(int):
-        def __cmp__(self, other):
-            raise RuntimeError
-    raises(RuntimeError, cmp, 0, badint(1))
-
-- Fix comparison of objects layout: if two classes have the same __slots__, it
-  should be possible to change the instances __class__::
-
-      class A(object): __slots__ = ('a', 'b')
-      class B(object): __slots__ = ('b', 'a')
-      a = A()
-      a.__class__ = B
-
-- Show a ResourceWarning when a file/socket is not explicitely closed, like
-  CPython did for 3.2: http://svn.python.org/view?view=rev&revision=85920
-  in PyPy this should be enabled by default
-
-Won't do for this release
--------------------------
-
-Note: when you give up with a missing feature, please mention it here, as well
-as the various skips added to the test suite.
-
-- py3k warnings
-
-  * the -3 flag is accepted on the command line, but displays a warning (see
-    `translator/goal/app_main.py`)
-
-- CJK codecs.
-
-  * In `./conftest.py`, skipped all `test_codecencodings_*.py` and
-    `test_codecmaps_*.py`.
-
-  * In test_codecs, commented out various items in `all_unicode_encodings`.
-
-- Error messages about ill-formed calls (like "argument after ** must be a
-  mapping") don't always show the function name.  That's hard to fix for
-  the case of errors raised when the Argument object is created (as opposed
-  to when parsing for a given target function, which occurs later).
-
-  * Some "..." were added to doctests in test_extcall.py
-
-- CPython's builtin methods are both functions and unbound methods (for
-  example, `str.upper is dict(str.__dict__)['upper']`). This is not the case
-  in pypy, and assertions like `object.__str__ is object.__str__` are False
-  with pypy.  Use the `==` operator instead.
-
-  * pprint.py, _threading_local.py
-
-- When importing a nested module fails, the ImportError message mentions the
-  name of the package up to the component that could not be imported (CPython
-  prefers to display the names starting with the failing part).
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -569,7 +569,6 @@
 #
 import os
 import time
-import socket
 import getpass
 
 class ReallyRunFileExternal(py.test.collect.Item): 
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -7,6 +7,7 @@
 
 __version__ = "1.1.0"
 
+import _ffi
 from _ctypes import Union, Structure, Array
 from _ctypes import _Pointer
 from _ctypes import CFuncPtr as _CFuncPtr
@@ -350,7 +351,7 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            self._handle = _dlopen(self._name, mode)
+            self._handle = _ffi.CDLL(name)
         else:
             self._handle = handle
 
diff --git a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
--- a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
+++ b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
@@ -3,8 +3,8 @@
 
 import unittest
 from ctypes import *
-
 import _ctypes_test
+from test.test_support import impl_detail
 
 class CFunctions(unittest.TestCase):
     _dll = CDLL(_ctypes_test.__file__)
@@ -158,12 +158,14 @@
         self.assertEqual(self._dll.tf_bd(0, 42.), 14.)
         self.assertEqual(self.S(), 42)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble(self):
         self._dll.tf_D.restype = c_longdouble
         self._dll.tf_D.argtypes = (c_longdouble,)
         self.assertEqual(self._dll.tf_D(42.), 14.)
         self.assertEqual(self.S(), 42)
-
+        
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble_plus(self):
         self._dll.tf_bD.restype = c_longdouble
         self._dll.tf_bD.argtypes = (c_byte, c_longdouble)
diff --git a/lib-python/modified-2.7/ctypes/test/test_functions.py b/lib-python/modified-2.7/ctypes/test/test_functions.py
--- a/lib-python/modified-2.7/ctypes/test/test_functions.py
+++ b/lib-python/modified-2.7/ctypes/test/test_functions.py
@@ -8,6 +8,7 @@
 from ctypes import *
 import sys, unittest
 from ctypes.test import xfail
+from test.test_support import impl_detail
 
 try:
     WINFUNCTYPE
@@ -144,6 +145,7 @@
         self.assertEqual(result, -21)
         self.assertEqual(type(result), float)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdoubleresult(self):
         f = dll._testfunc_D_bhilfD
         f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_longdouble]
diff --git a/lib-python/modified-2.7/ctypes/test/test_libc.py b/lib-python/modified-2.7/ctypes/test/test_libc.py
--- a/lib-python/modified-2.7/ctypes/test/test_libc.py
+++ b/lib-python/modified-2.7/ctypes/test/test_libc.py
@@ -26,6 +26,7 @@
         self.assertEqual(chars.raw, "   ,,aaaadmmmnpppsss\x00")
 
     def test_no_more_xfail(self):
+        import socket
         import ctypes.test
         self.assertTrue(not hasattr(ctypes.test, 'xfail'),
                         "You should incrementally grep for '@xfail' and remove them, they are real failures")
diff --git a/lib-python/modified-2.7/distutils/cygwinccompiler.py b/lib-python/modified-2.7/distutils/cygwinccompiler.py
--- a/lib-python/modified-2.7/distutils/cygwinccompiler.py
+++ b/lib-python/modified-2.7/distutils/cygwinccompiler.py
@@ -75,6 +75,9 @@
         elif msc_ver == '1500':
             # VS2008 / MSVC 9.0
             return ['msvcr90']
+        elif msc_ver == '1600':
+            # VS2010 / MSVC 10.0
+            return ['msvcr100']
         else:
             raise ValueError("Unknown MS Compiler version %s " % msc_ver)
 
diff --git a/lib-python/modified-2.7/distutils/sysconfig.py b/lib-python/modified-2.7/distutils/sysconfig.py
--- a/lib-python/modified-2.7/distutils/sysconfig.py
+++ b/lib-python/modified-2.7/distutils/sysconfig.py
@@ -20,8 +20,10 @@
 if '__pypy__' in sys.builtin_module_names:
     from distutils.sysconfig_pypy import *
     from distutils.sysconfig_pypy import _config_vars # needed by setuptools
+    from distutils.sysconfig_pypy import _variable_rx # read_setup_file()
 else:
     from distutils.sysconfig_cpython import *
     from distutils.sysconfig_cpython import _config_vars # needed by setuptools
+    from distutils.sysconfig_cpython import _variable_rx # read_setup_file()
 
 
diff --git a/lib-python/modified-2.7/distutils/sysconfig_pypy.py b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
--- a/lib-python/modified-2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
@@ -116,3 +116,7 @@
     if compiler.compiler_type == "unix":
         compiler.compiler_so.extend(['-fPIC', '-Wimplicit'])
         compiler.shared_lib_extension = get_config_var('SO')
+
+from sysconfig_cpython import (
+    parse_makefile, _variable_rx, expand_makefile_vars)
+
diff --git a/lib-python/modified-2.7/opcode.py b/lib-python/modified-2.7/opcode.py
--- a/lib-python/modified-2.7/opcode.py
+++ b/lib-python/modified-2.7/opcode.py
@@ -189,7 +189,6 @@
 def_op('MAP_ADD', 147)
 
 # pypy modification, experimental bytecode
-def_op('CALL_LIKELY_BUILTIN', 200)    # #args + (#kwargs << 8)
 def_op('LOOKUP_METHOD', 201)          # Index in name list
 hasname.append(201)
 def_op('CALL_METHOD', 202)            # #args not including 'self'
diff --git a/lib-python/modified-2.7/pickle.py b/lib-python/modified-2.7/pickle.py
--- a/lib-python/modified-2.7/pickle.py
+++ b/lib-python/modified-2.7/pickle.py
@@ -873,7 +873,7 @@
 
 # Unpickling machinery
 
-class Unpickler:
+class Unpickler(object):
 
     def __init__(self, file):
         """This takes a file-like object for reading a pickle data stream.
diff --git a/lib-python/modified-2.7/test/test_descr.py b/lib-python/modified-2.7/test/test_descr.py
--- a/lib-python/modified-2.7/test/test_descr.py
+++ b/lib-python/modified-2.7/test/test_descr.py
@@ -4399,13 +4399,10 @@
         self.assertTrue(l.__add__ != [5].__add__)
         self.assertTrue(l.__add__ != l.__mul__)
         self.assertTrue(l.__add__.__name__ == '__add__')
-        if hasattr(l.__add__, '__self__'):
-            # CPython
-            self.assertTrue(l.__add__.__self__ is l)
+        self.assertTrue(l.__add__.__self__ is l)
+        if hasattr(l.__add__, '__objclass__'):   # CPython
             self.assertTrue(l.__add__.__objclass__ is list)
-        else:
-            # Python implementations where [].__add__ is a normal bound method
-            self.assertTrue(l.__add__.im_self is l)
+        else:                                    # PyPy
             self.assertTrue(l.__add__.im_class is list)
         self.assertEqual(l.__add__.__doc__, list.__add__.__doc__)
         try:
diff --git a/lib-python/modified-2.7/test/test_dis.py b/lib-python/modified-2.7/test/test_dis.py
deleted file mode 100644
--- a/lib-python/modified-2.7/test/test_dis.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Minimal tests for dis module
-
-from test.test_support import run_unittest
-import unittest
-import sys
-import dis
-import StringIO
-
-
-def _f(a):
-    print a
-    return 1
-
-dis_f = """\
- %-4d         0 LOAD_FAST                0 (a)
-              3 PRINT_ITEM
-              4 PRINT_NEWLINE
-
- %-4d         5 LOAD_CONST               1 (1)
-              8 RETURN_VALUE
-"""%(_f.func_code.co_firstlineno + 1,
-     _f.func_code.co_firstlineno + 2)
-
-
-# we "call" rangexxx() instead of range() to disable the
-# pypy optimization that turns it into CALL_LIKELY_BUILTIN.
-def bug708901():
-    for res in rangexxx(1,
-                        10):
-        pass
-
-dis_bug708901 = """\
- %-4d         0 SETUP_LOOP              23 (to 26)
-              3 LOAD_GLOBAL              0 (rangexxx)
-              6 LOAD_CONST               1 (1)
-
- %-4d         9 LOAD_CONST               2 (10)
-             12 CALL_FUNCTION            2
-             15 GET_ITER
-        >>   16 FOR_ITER                 6 (to 25)
-             19 STORE_FAST               0 (res)
-
- %-4d        22 JUMP_ABSOLUTE           16
-        >>   25 POP_BLOCK
-        >>   26 LOAD_CONST               0 (None)
-             29 RETURN_VALUE
-"""%(bug708901.func_code.co_firstlineno + 1,
-     bug708901.func_code.co_firstlineno + 2,
-     bug708901.func_code.co_firstlineno + 3)
-
-
-def bug1333982(x=[]):
-    assert 0, ([s for s in x] +
-              1)
-    pass
-
-dis_bug1333982 = """\
- %-4d         0 LOAD_CONST               1 (0)
-              3 POP_JUMP_IF_TRUE        38
-              6 LOAD_GLOBAL              0 (AssertionError)
-              9 BUILD_LIST               0
-             12 LOAD_FAST                0 (x)
-             15 GET_ITER
-        >>   16 FOR_ITER                12 (to 31)
-             19 STORE_FAST               1 (s)
-             22 LOAD_FAST                1 (s)
-             25 LIST_APPEND              2
-             28 JUMP_ABSOLUTE           16
-
- %-4d   >>   31 LOAD_CONST               2 (1)
-             34 BINARY_ADD
-             35 RAISE_VARARGS            2
-
- %-4d   >>   38 LOAD_CONST               0 (None)
-             41 RETURN_VALUE
-"""%(bug1333982.func_code.co_firstlineno + 1,
-     bug1333982.func_code.co_firstlineno + 2,
-     bug1333982.func_code.co_firstlineno + 3)
-
-_BIG_LINENO_FORMAT = """\
-%3d           0 LOAD_GLOBAL              0 (spam)
-              3 POP_TOP
-              4 LOAD_CONST               0 (None)
-              7 RETURN_VALUE
-"""
-
-class DisTests(unittest.TestCase):
-    def do_disassembly_test(self, func, expected):
-        s = StringIO.StringIO()
-        save_stdout = sys.stdout
-        sys.stdout = s
-        dis.dis(func)
-        sys.stdout = save_stdout
-        got = s.getvalue()
-        # Trim trailing blanks (if any).
-        lines = got.split('\n')
-        lines = [line.rstrip() for line in lines]
-        expected = expected.split("\n")
-        import difflib
-        if expected != lines:
-            self.fail(
-                "events did not match expectation:\n" +
-                "\n".join(difflib.ndiff(expected,
-                                        lines)))
-
-    def test_opmap(self):
-        self.assertEqual(dis.opmap["STOP_CODE"], 0)
-        self.assertIn(dis.opmap["LOAD_CONST"], dis.hasconst)
-        self.assertIn(dis.opmap["STORE_NAME"], dis.hasname)
-
-    def test_opname(self):
-        self.assertEqual(dis.opname[dis.opmap["LOAD_FAST"]], "LOAD_FAST")
-
-    def test_boundaries(self):
-        self.assertEqual(dis.opmap["EXTENDED_ARG"], dis.EXTENDED_ARG)
-        self.assertEqual(dis.opmap["STORE_NAME"], dis.HAVE_ARGUMENT)
-
-    def test_dis(self):
-        self.do_disassembly_test(_f, dis_f)
-
-    def test_bug_708901(self):
-        self.do_disassembly_test(bug708901, dis_bug708901)
-
-    def test_bug_1333982(self):
-        # This one is checking bytecodes generated for an `assert` statement,
-        # so fails if the tests are run with -O.  Skip this test then.
-        if __debug__:
-            self.do_disassembly_test(bug1333982, dis_bug1333982)
-
-    def test_big_linenos(self):
-        def func(count):
-            namespace = {}
-            func = "def foo():\n " + "".join(["\n "] * count + ["spam\n"])
-            exec func in namespace
-            return namespace['foo']
-
-        # Test all small ranges
-        for i in xrange(1, 300):
-            expected = _BIG_LINENO_FORMAT % (i + 2)
-            self.do_disassembly_test(func(i), expected)
-
-        # Test some larger ranges too
-        for i in xrange(300, 5000, 10):
-            expected = _BIG_LINENO_FORMAT % (i + 2)
-            self.do_disassembly_test(func(i), expected)
-
-def test_main():
-    run_unittest(DisTests)
-
-
-if __name__ == "__main__":
-    test_main()
diff --git a/lib-python/modified-2.7/test/test_extcall.py b/lib-python/modified-2.7/test/test_extcall.py
--- a/lib-python/modified-2.7/test/test_extcall.py
+++ b/lib-python/modified-2.7/test/test_extcall.py
@@ -299,7 +299,7 @@
         def f(a):
             return a
         self.assertEqual(f(**{u'a': 4}), 4)
-        self.assertRaises(TypeError, lambda: f(**{u'st&#246;ren': 4}))
+        self.assertRaises(TypeError, f, **{u'st&#246;ren': 4})
         self.assertRaises(TypeError, f, **{u'someLongString':2})
         try:
             f(a=4, **{u'a': 4})
diff --git a/lib-python/2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
copy from lib-python/2.7/test/test_multibytecodec.py
copy to lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -42,7 +42,7 @@
         dec = codecs.getdecoder('euc-kr')
         myreplace  = lambda exc: (u'', sys.maxint+1)
         codecs.register_error('test.cjktest', myreplace)
-        self.assertRaises(IndexError, dec,
+        self.assertRaises((IndexError, OverflowError), dec,
                           'apple\x92ham\x93spam', 'test.cjktest')
 
     def test_codingspec(self):
diff --git a/lib-python/2.7/test/test_multibytecodec_support.py b/lib-python/modified-2.7/test/test_multibytecodec_support.py
copy from lib-python/2.7/test/test_multibytecodec_support.py
copy to lib-python/modified-2.7/test/test_multibytecodec_support.py
--- a/lib-python/2.7/test/test_multibytecodec_support.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec_support.py
@@ -107,8 +107,8 @@
         def myreplace(exc):
             return (u'x', sys.maxint + 1)
         codecs.register_error("test.cjktest", myreplace)
-        self.assertRaises(IndexError, self.encode, self.unmappedunicode,
-                          'test.cjktest')
+        self.assertRaises((IndexError, OverflowError), self.encode,
+                          self.unmappedunicode, 'test.cjktest')
 
     def test_callback_None_index(self):
         def myreplace(exc):
diff --git a/lib-python/modified-2.7/test/test_support.py b/lib-python/modified-2.7/test/test_support.py
--- a/lib-python/modified-2.7/test/test_support.py
+++ b/lib-python/modified-2.7/test/test_support.py
@@ -1066,7 +1066,7 @@
         if '--pdb' in sys.argv:
             import pdb, traceback
             traceback.print_tb(exc_info[2])
-            pdb.post_mortem(exc_info[2], pdb.Pdb)
+            pdb.post_mortem(exc_info[2])
 
 # ----------------------------------
 
diff --git a/lib-python/modified-2.7/test/test_weakref.py b/lib-python/modified-2.7/test/test_weakref.py
--- a/lib-python/modified-2.7/test/test_weakref.py
+++ b/lib-python/modified-2.7/test/test_weakref.py
@@ -993,13 +993,13 @@
         self.assertTrue(len(weakdict) == 2)
         k, v = weakdict.popitem()
         self.assertTrue(len(weakdict) == 1)
-        if k is key1:
+        if k == key1:
             self.assertTrue(v is value1)
         else:
             self.assertTrue(v is value2)
         k, v = weakdict.popitem()
         self.assertTrue(len(weakdict) == 0)
-        if k is key1:
+        if k == key1:
             self.assertTrue(v is value1)
         else:
             self.assertTrue(v is value2)
diff --git a/lib_pypy/_ctypes/__init__.py b/lib_pypy/_ctypes/__init__.py
--- a/lib_pypy/_ctypes/__init__.py
+++ b/lib_pypy/_ctypes/__init__.py
@@ -18,7 +18,16 @@
 if _os.name in ("nt", "ce"):
     from _rawffi import FormatError
     from _rawffi import check_HRESULT as _check_HRESULT
-    CopyComPointer = None # XXX
+
+    def CopyComPointer(src, dst):
+        from ctypes import c_void_p, cast
+        if src:
+            hr = src[0][0].AddRef(src)
+            if hr & 0x80000000:
+                return hr
+        dst[0] = cast(src, c_void_p).value
+        return 0
+
     LoadLibrary = dlopen
 
 from _rawffi import FUNCFLAG_STDCALL, FUNCFLAG_CDECL, FUNCFLAG_PYTHONAPI
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -208,6 +208,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._get_buffer_value()
+
 ARRAY_CACHE = {}
 
 def create_array_type(base, length):
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -1,5 +1,6 @@
 
 import _rawffi
+import _ffi
 import sys
 
 keepalive_key = str # XXX fix this when provided with test
@@ -46,6 +47,14 @@
         else:
             return self.from_param(as_parameter)
 
+    def get_ffi_param(self, value):
+        return self.from_param(value)._to_ffi_param()
+
+    def get_ffi_argtype(self):
+        if self._ffiargtype:
+            return self._ffiargtype
+        return _shape_to_ffi_type(self._ffiargshape)
+
     def _CData_output(self, resbuffer, base=None, index=-1):
         #assert isinstance(resbuffer, _rawffi.ArrayInstance)
         """Used when data exits ctypes and goes into user code.
@@ -99,6 +108,7 @@
     """
     __metaclass__ = _CDataMeta
     _objects = None
+    _ffiargtype = None
 
     def __init__(self, *args, **kwds):
         raise TypeError("%s has no type" % (type(self),))
@@ -119,11 +129,20 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _to_ffi_param(self):
+        if self.__class__._is_pointer_like():
+            return self._get_buffer_value()
+        else:
+            return self.value
+
     def __buffer__(self):
         return buffer(self._buffer)
 
     def _get_b_base(self):
-        return self._base
+        try:
+            return self._base
+        except AttributeError:
+            return None
     _b_base_ = property(_get_b_base)
     _b_needsfree_ = False
 
@@ -150,7 +169,7 @@
     return pointer(cdata)
 
 def cdata_from_address(self, address):
-    # fix the address, in case it's unsigned
+    # fix the address: turn it into as unsigned, in case it's a negative number
     address = address & (sys.maxint * 2 + 1)
     instance = self.__new__(self)
     lgt = getattr(self, '_length_', 1)
@@ -159,3 +178,50 @@
 
 def addressof(tp):
     return tp._buffer.buffer
+
+
+# ----------------------------------------------------------------------
+
+def is_struct_shape(shape):
+    # see the corresponding code to set the shape in
+    # _ctypes.structure._set_shape
+    return (isinstance(shape, tuple) and
+            len(shape) == 2 and
+            isinstance(shape[0], _rawffi.Structure) and
+            shape[1] == 1)
+
+def _shape_to_ffi_type(shape):
+    try:
+        return _shape_to_ffi_type.typemap[shape]
+    except KeyError:
+        pass
+    if is_struct_shape(shape):
+        return shape[0].get_ffi_type()
+    #
+    assert False, 'unknown shape %s' % (shape,)
+
+
+_shape_to_ffi_type.typemap =  {
+    'c' : _ffi.types.char,
+    'b' : _ffi.types.sbyte,
+    'B' : _ffi.types.ubyte,
+    'h' : _ffi.types.sshort,
+    'u' : _ffi.types.unichar,
+    'H' : _ffi.types.ushort,
+    'i' : _ffi.types.sint,
+    'I' : _ffi.types.uint,
+    'l' : _ffi.types.slong,
+    'L' : _ffi.types.ulong,
+    'q' : _ffi.types.slonglong,
+    'Q' : _ffi.types.ulonglong,
+    'f' : _ffi.types.float,
+    'd' : _ffi.types.double,
+    's' : _ffi.types.void_p,
+    'P' : _ffi.types.void_p,
+    'z' : _ffi.types.void_p,
+    'O' : _ffi.types.void_p,
+    'Z' : _ffi.types.void_p,
+    'X' : _ffi.types.void_p,
+    'v' : _ffi.types.sshort,
+    }
+
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -1,12 +1,15 @@
+
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+from _ctypes.primitive import SimpleType, _SimpleCData
+from _ctypes.basics import ArgumentError, keepalive_key
+from _ctypes.basics import is_struct_shape
+from _ctypes.builtin import set_errno, set_last_error
 import _rawffi
+import _ffi
 import sys
 import traceback
 import warnings
 
-from _ctypes.basics import ArgumentError, keepalive_key
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
-from _ctypes.builtin import set_errno, set_last_error
-from _ctypes.primitive import SimpleType
 
 # XXX this file needs huge refactoring I fear
 
@@ -24,6 +27,7 @@
 
 WIN64 = sys.platform == 'win32' and sys.maxint == 2**63 - 1
 
+
 def get_com_error(errcode, riid, pIunk):
     "Win32 specific: build a COM Error exception"
     # XXX need C support code
@@ -36,6 +40,7 @@
     funcptr.restype = int
     return funcptr(*args)
 
+
 class CFuncPtrType(_CDataMeta):
     # XXX write down here defaults and such things
 
@@ -50,6 +55,7 @@
 
     from_address = cdata_from_address
 
+
 class CFuncPtr(_CData):
     __metaclass__ = CFuncPtrType
 
@@ -65,10 +71,12 @@
     callable = None
     _ptr = None
     _buffer = None
+    _address = None
     # win32 COM properties
     _paramflags = None
     _com_index = None
     _com_iid = None
+    _is_fastpath = False
 
     __restype_set = False
 
@@ -85,8 +93,11 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            self._argtypes_ = argtypes
-
+            #
+            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
+                fastpath_cls = make_fastpath_subclass(self.__class__)
+                fastpath_cls.enable_fastpath_maybe(self)
+            self._argtypes_ = list(argtypes)
     argtypes = property(_getargtypes, _setargtypes)
 
     def _getparamflags(self):
@@ -133,6 +144,7 @@
 
     paramflags = property(_getparamflags, _setparamflags)
 
+
     def _getrestype(self):
         return self._restype_
 
@@ -146,27 +158,24 @@
                 callable(restype)):
             raise TypeError("restype must be a type, a callable, or None")
         self._restype_ = restype
-
+        
     def _delrestype(self):
         self._ptr = None
         del self._restype_
-
+        
     restype = property(_getrestype, _setrestype, _delrestype)
 
     def _geterrcheck(self):
         return getattr(self, '_errcheck_', None)
-
     def _seterrcheck(self, errcheck):
         if not callable(errcheck):
             raise TypeError("The errcheck attribute must be callable")
         self._errcheck_ = errcheck
-
     def _delerrcheck(self):
         try:
             del self._errcheck_
         except AttributeError:
             pass
-
     errcheck = property(_geterrcheck, _seterrcheck, _delerrcheck)
 
     def _ffishapes(self, args, restype):
@@ -181,6 +190,14 @@
             restype = 'O' # void
         return argtypes, restype
 
+    def _set_address(self, address):
+        if not self._buffer:
+            self._buffer = _rawffi.Array('P')(1)
+        self._buffer[0] = address
+
+    def _get_address(self):
+        return self._buffer[0]
+
     def __init__(self, *args):
         self.name = None
         self._objects = {keepalive_key(0):self}
@@ -188,7 +205,7 @@
 
         # Empty function object -- this is needed for casts
         if not args:
-            self._buffer = _rawffi.Array('P')(1)
+            self._set_address(0)
             return
 
         argsl = list(args)
@@ -196,20 +213,24 @@
 
         # Direct construction from raw address
         if isinstance(argument, (int, long)) and not argsl:
-            ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
-            self._ptr = _rawffi.FuncPtr(argument, ffiargs, ffires, self._flags_)
-            self._buffer = self._ptr.byptr()
+            self._set_address(argument)
+            restype = self._restype_
+            if restype is None:
+                import ctypes
+                restype = ctypes.c_int
+            self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
             return
 
-        # A callback into Python
+        
+        # A callback into python
         if callable(argument) and not argsl:
             self.callable = argument
             ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
             if self._restype_ is None:
                 ffires = None
-            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(
-                argument, self.argtypes
-                ), ffiargs, ffires, self._flags_)
+            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(argument,
+                                                                self.argtypes),
+                                            ffiargs, ffires, self._flags_)
             self._buffer = self._ptr.byptr()
             return
 
@@ -218,7 +239,7 @@
             import ctypes
             self.name, dll = argument
             if isinstance(dll, str):
-                self.dll = ctypes.CDLL(dll)
+                self.dll = ctypes.CDLL(self.dll)
             else:
                 self.dll = dll
             if argsl:
@@ -227,7 +248,7 @@
                     raise TypeError("Unknown constructor %s" % (args,))
             # We need to check dll anyway
             ptr = self._getfuncptr([], ctypes.c_int)
-            self._buffer = ptr.byptr()
+            self._set_address(ptr.getaddr())
             return
 
         # A COM function call, by index
@@ -270,15 +291,15 @@
                     # than the length of the argtypes tuple.
                     args = args[:len(self._argtypes_)]
             else:
-                plural = len(argtypes) > 1 and "s" or ""
+                plural = len(self._argtypes_) > 1 and "s" or ""
                 raise TypeError(
                     "This function takes %d argument%s (%s given)"
-                    % (len(argtypes), plural, len(args)))
+                    % (len(self._argtypes_), plural, len(args)))
 
             # check that arguments are convertible
             ## XXX Not as long as ctypes.cast is a callback function with
             ## py_object arguments...
-            ## self._convert_args(argtypes, args, {})
+            ## self._convert_args(self._argtypes_, args, {})
 
             try:
                 res = self.callable(*args)
@@ -306,83 +327,73 @@
                 raise ValueError(
                     "native COM method call without 'this' parameter"
                     )
-            thisarg = cast(args[0], POINTER(POINTER(c_void_p))).contents
-            argtypes = [c_void_p] + list(argtypes)
-            args = list(args)
-            args[0] = args[0].value
+            thisarg = cast(args[0], POINTER(POINTER(c_void_p)))
+            newargs, argtypes, outargs = self._convert_args(argtypes, args[1:], kwargs)
+            newargs.insert(0, args[0].value)
+            argtypes.insert(0, c_void_p)
         else:
             thisarg = None
+            newargs, argtypes, outargs = self._convert_args(argtypes, args, kwargs)
 
-        args, outargs = self._convert_args(argtypes, args, kwargs)
-        argtypes = [type(arg) for arg in args]
+        funcptr = self._getfuncptr(argtypes, self._restype_, thisarg)
+        result = self._call_funcptr(funcptr, *newargs)
+        result = self._do_errcheck(result, args)
 
-        restype = self._restype_
-        funcptr = self._getfuncptr(argtypes, restype, thisarg)
+        if not outargs:
+            return result
+
+        simple_cdata = type(c_void_p()).__bases__[0]
+        outargs = [x.value if type(x).__bases__[0] is simple_cdata else x
+                   for x in outargs]
+
+        if len(outargs) == 1:
+            return outargs[0]
+        return tuple(outargs)
+
+    def _call_funcptr(self, funcptr, *newargs):
+
         if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
             set_errno(_rawffi.get_errno())
         if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
             set_last_error(_rawffi.get_last_error())
         try:
-            resbuffer = funcptr(*[arg._get_buffer_for_param()._buffer
-                                  for arg in args])
+            result = funcptr(*newargs)
         finally:
             if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
                 set_errno(_rawffi.get_errno())
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
                 set_last_error(_rawffi.get_last_error())
+        #
+        return self._build_result(self._restype_, result, newargs)
 
-        result = None
-        if self._com_index:
-            if resbuffer[0] & 0x80000000:
-                raise get_com_error(resbuffer[0],
-                                    self._com_iid, args[0])
-            else:
-                result = int(resbuffer[0])
-        elif restype is not None:
-            checker = getattr(self.restype, '_check_retval_', None)
-            if checker:
-                val = restype(resbuffer[0])
-                # the original ctypes seems to make the distinction between
-                # classes defining a new type, and their subclasses
-                if '_type_' in restype.__dict__:
-                    val = val.value
-                result = checker(val)
-            elif not isinstance(restype, _CDataMeta):
-                result = restype(resbuffer[0])
-            else:
-                result = restype._CData_retval(resbuffer)
-
+    def _do_errcheck(self, result, args):
         # The 'errcheck' protocol
         if self._errcheck_:
             v = self._errcheck_(result, self, args)
             # If the errcheck funtion failed, let it throw
-            # If the errcheck function returned callargs unchanged,
+            # If the errcheck function returned newargs unchanged,
             # continue normal processing.
             # If the errcheck function returned something else,
             # use that as result.
             if v is not args:
-                result = v
+                return v
+        return result
 
-        if not outargs:
-            return result
-
-        if len(outargs) == 1:
-            return outargs[0]
-
-        return tuple(outargs)
+    def _getfuncptr_fromaddress(self, argtypes, restype):
+        address = self._get_address()
+        ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
+        ffires = restype.get_ffi_argtype()
+        return _ffi.FuncPtr.fromaddr(address, '', ffiargs, ffires)
 
     def _getfuncptr(self, argtypes, restype, thisarg=None):
-        if self._ptr is not None and argtypes is self._argtypes_:
+        if self._ptr is not None and (argtypes is self._argtypes_ or argtypes == self._argtypes_):
             return self._ptr
         if restype is None or not isinstance(restype, _CDataMeta):
             import ctypes
             restype = ctypes.c_int
-        argshapes = [arg._ffiargshape for arg in argtypes]
-        resshape = restype._ffiargshape
         if self._buffer is not None:
-            ptr = _rawffi.FuncPtr(self._buffer[0], argshapes, resshape,
-                                  self._flags_)
-            if argtypes is self._argtypes_:
+            ptr = self._getfuncptr_fromaddress(argtypes, restype)
+            if argtypes == self._argtypes_:
                 self._ptr = ptr
             return ptr
 
@@ -390,15 +401,21 @@
             # extract the address from the object's virtual table
             if not thisarg:
                 raise ValueError("COM method call without VTable")
-            ptr = thisarg[self._com_index - 0x1000]
-            return _rawffi.FuncPtr(ptr, argshapes, resshape, self._flags_)
-
+            ptr = thisarg[0][self._com_index - 0x1000]
+            ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
+            ffires = restype.get_ffi_argtype()
+            return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires)
+        
         cdll = self.dll._handle
         try:
-            return cdll.ptr(self.name, argshapes, resshape, self._flags_)
+            ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes]
+            ffi_restype = restype.get_ffi_argtype()
+            self._ptr = cdll.getfunc(self.name, ffi_argtypes, ffi_restype)
+            return self._ptr
         except AttributeError:
             if self._flags_ & _rawffi.FUNCFLAG_CDECL:
                 raise
+
             # Win64 has no stdcall calling conv, so it should also not have the
             # name mangling of it.
             if WIN64:
@@ -409,23 +426,33 @@
             for i in range(33):
                 mangled_name = "_%s@%d" % (self.name, i*4)
                 try:
-                    return cdll.ptr(mangled_name, argshapes, resshape,
-                                    self._flags_)
+                    return cdll.getfunc(mangled_name,
+                                        ffi_argtypes, ffi_restype,
+                                        # XXX self._flags_
+                                        )
                 except AttributeError:
                     pass
             raise
 
-    @staticmethod
-    def _conv_param(argtype, arg):
-        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+    @classmethod
+    def _conv_param(cls, argtype, arg):
+        if isinstance(argtype, _CDataMeta):
+            #arg = argtype.from_param(arg)
+            arg = argtype.get_ffi_param(arg)
+            return arg, argtype
+        
         if argtype is not None:
             arg = argtype.from_param(arg)
         if hasattr(arg, '_as_parameter_'):
             arg = arg._as_parameter_
         if isinstance(arg, _CData):
-            # The usual case when argtype is defined
-            cobj = arg
-        elif isinstance(arg, str):
+            return arg._to_ffi_param(), type(arg)
+        #
+        # non-usual case: we do the import here to save a lot of code in the
+        # jit trace of the normal case
+        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+        #
+        if isinstance(arg, str):
             cobj = c_char_p(arg)
         elif isinstance(arg, unicode):
             cobj = c_wchar_p(arg)
@@ -435,18 +462,16 @@
             cobj = c_int(arg)
         else:
             raise TypeError("Don't know how to handle %s" % (arg,))
-        return cobj
+
+        return cobj._to_ffi_param(), type(cobj)
 
     def _convert_args(self, argtypes, args, kwargs, marker=object()):
-        callargs = []
+        newargs = []
         outargs = []
+        newargtypes = []
         total = len(args)
         paramflags = self._paramflags
-
-        if self._com_index:
-            inargs_idx = 1
-        else:
-            inargs_idx = 0
+        inargs_idx = 0
 
         if not paramflags and total < len(argtypes):
             raise TypeError("not enough arguments")
@@ -470,8 +495,9 @@
                     val = defval
                     if val is marker:
                         val = 0
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    newarg, newargtype = self._conv_param(argtype, val)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag in (0, PARAMFLAG_FIN):
                     if inargs_idx < total:
                         val = args[inargs_idx]
@@ -485,38 +511,102 @@
                         raise TypeError("required argument '%s' missing" % name)
                     else:
                         raise TypeError("not enough arguments")
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    newarg, newargtype = self._conv_param(argtype, val)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag == PARAMFLAG_FOUT:
                     if defval is not marker:
                         outargs.append(defval)
-                        wrapped = self._conv_param(argtype, defval)
+                        newarg, newargtype = self._conv_param(argtype, defval)
                     else:
                         import ctypes
                         val = argtype._type_()
                         outargs.append(val)
-                        wrapped = ctypes.byref(val)
-                    callargs.append(wrapped)
+                        newarg = ctypes.byref(val)
+                        newargtype = type(newarg)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 else:
                     raise ValueError("paramflag %d not yet implemented" % flag)
             else:
                 try:
-                    wrapped = self._conv_param(argtype, args[i])
+                    newarg, newargtype = self._conv_param(argtype, args[i])
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
                 inargs_idx += 1
 
-        if len(callargs) < total:
-            extra = args[len(callargs):]
+        if len(newargs) < len(args):
+            extra = args[len(newargs):]
             for i, arg in enumerate(extra):
                 try:
-                    wrapped = self._conv_param(None, arg)
+                    newarg, newargtype = self._conv_param(None, arg)
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
+        return newargs, newargtypes, outargs
 
-        return callargs, outargs
+    
+    def _wrap_result(self, restype, result):
+        """
+        Convert from low-level repr of the result to the high-level python
+        one.
+        """
+        # hack for performance: if restype is a "simple" primitive type, don't
+        # allocate the buffer because it's going to be thrown away immediately
+        if restype.__bases__[0] is _SimpleCData and not restype._is_pointer_like():
+            return result
+        #
+        shape = restype._ffishape
+        if is_struct_shape(shape):
+            buf = result
+        else:
+            buf = _rawffi.Array(shape)(1, autofree=True)
+            buf[0] = result
+        retval = restype._CData_retval(buf)
+        return retval
+
+    def _build_result(self, restype, result, argsandobjs):
+        """Build the function result:
+           If there is no OUT parameter, return the actual function result
+           If there is one OUT parameter, return it
+           If there are many OUT parameters, return a tuple"""
+
+        # XXX: note for the future: the function used to take a "resbuffer",
+        # i.e. an array of ints. Now it takes a result, which is already a
+        # python object. All places that do "resbuffer[0]" should check that
+        # result is actually an int and just use it.
+        #
+        # Also, argsandobjs used to be "args" in __call__, now it's "newargs"
+        # (i.e., the already unwrapped objects). It's used only when we have a
+        # PARAMFLAG_FOUT and it's probably wrong, I'll fix it when I find a
+        # failing test
+
+        retval = None
+
+        if restype is not None:
+            checker = getattr(self.restype, '_check_retval_', None)
+            if checker:
+                val = restype(result)
+                # the original ctypes seems to make the distinction between
+                # classes defining a new type, and their subclasses
+                if '_type_' in restype.__dict__:
+                    val = val.value
+                # XXX Raise a COMError when restype is HRESULT and
+                # checker(val) fails.  How to check for restype == HRESULT?
+                if self._com_index:
+                    if result & 0x80000000:
+                        raise get_com_error(result, None, None)
+                else:
+                    retval = checker(val)
+            elif not isinstance(restype, _CDataMeta):
+                retval = restype(result)
+            else:
+                retval = self._wrap_result(restype, result)
+
+        return retval
 
     def __nonzero__(self):
         return self._com_index is not None or bool(self._buffer[0])
@@ -532,3 +622,61 @@
                 self._ptr.free()
                 self._ptr = None
             self._needs_free = False
+
+
+def make_fastpath_subclass(CFuncPtr):
+    if CFuncPtr._is_fastpath:
+        return CFuncPtr
+    #
+    try:
+        return make_fastpath_subclass.memo[CFuncPtr]
+    except KeyError:
+        pass
+
+    class CFuncPtrFast(CFuncPtr):
+
+        _is_fastpath = True
+        _slowpath_allowed = True # set to False by tests
+
+        @classmethod
+        def enable_fastpath_maybe(cls, obj):
+            if (obj.callable is None and
+                obj._com_index is None):
+                obj.__class__ = cls
+
+        def __rollback(self):
+            assert self._slowpath_allowed
+            self.__class__ = CFuncPtr
+
+        # disable the fast path if we reset argtypes
+        def _setargtypes(self, argtypes):
+            self.__rollback()
+            self._setargtypes(argtypes)
+        argtypes = property(CFuncPtr._getargtypes, _setargtypes)
+
+        def _setcallable(self, func):
+            self.__rollback()
+            self.callable = func
+        callable = property(lambda x: None, _setcallable)
+
+        def _setcom_index(self, idx):
+            self.__rollback()
+            self._com_index = idx
+        _com_index = property(lambda x: None, _setcom_index)
+
+        def __call__(self, *args):
+            thisarg = None
+            argtypes = self._argtypes_
+            restype = self._restype_
+            funcptr = self._getfuncptr(argtypes, restype, thisarg)
+            try:
+                result = self._call_funcptr(funcptr, *args)
+                result = self._do_errcheck(result, args)
+            except (TypeError, ArgumentError): # XXX, should be FFITypeError
+                assert self._slowpath_allowed
+                return CFuncPtr.__call__(self, *args)
+            return result
+
+    make_fastpath_subclass.memo[CFuncPtr] = CFuncPtrFast
+    return CFuncPtrFast
+make_fastpath_subclass.memo = {}
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -1,6 +1,7 @@
 
 import _rawffi
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+import _ffi
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address, ArgumentError
 from _ctypes.basics import keepalive_key, store_reference, ensure_objects
 from _ctypes.basics import sizeof, byref
 from _ctypes.array import Array, array_get_slice_params, array_slice_getitem,\
@@ -19,7 +20,7 @@
             length     = 1,
             _ffiargshape = 'P',
             _ffishape  = 'P',
-            _fficompositesize = None
+            _fficompositesize = None,
         )
         # XXX check if typedict['_type_'] is any sane
         # XXX remember about paramfunc
@@ -66,6 +67,7 @@
         self._ffiarray = ffiarray
         self.__init__ = __init__
         self._type_ = TP
+        self._ffiargtype = _ffi.types.Pointer(TP.get_ffi_argtype())
 
     from_address = cdata_from_address
 
@@ -114,6 +116,17 @@
 
     contents = property(getcontents, setcontents)
 
+    def _as_ffi_pointer_(self, ffitype):
+        return as_ffi_pointer(self, ffitype)
+
+def as_ffi_pointer(value, ffitype):
+    my_ffitype = type(value).get_ffi_argtype()
+    # for now, we always allow types.pointer, else a lot of tests
+    # break. We need to rethink how pointers are represented, though
+    if my_ffitype is not ffitype and ffitype is not _ffi.types.void_p:
+        raise ArgumentError, "expected %s instance, got %s" % (type(value), ffitype)
+    return value._get_buffer_value()
+
 def _cast_addr(obj, _, tp):
     if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()):
         raise TypeError("cast() argument 2 must be a pointer type, not %s"
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -1,3 +1,4 @@
+import _ffi
 import _rawffi
 import weakref
 import sys
@@ -8,7 +9,7 @@
      CArgObject
 from _ctypes.builtin import ConvMode
 from _ctypes.array import Array
-from _ctypes.pointer import _Pointer
+from _ctypes.pointer import _Pointer, as_ffi_pointer
 
 class NULL(object):
     pass
@@ -140,6 +141,8 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.char)
+
         elif tp == 'Z':
             # c_wchar_p
             def _getvalue(self):
@@ -162,6 +165,7 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.unichar)
 
         elif tp == 'P':
             # c_void_p
@@ -212,10 +216,15 @@
             result.value = property(_getvalue, _setvalue)
 
         elif tp == 'X':
-            from ctypes import windll
-            SysAllocStringLen = windll.oleaut32.SysAllocStringLen
-            SysStringLen = windll.oleaut32.SysStringLen
-            SysFreeString = windll.oleaut32.SysFreeString
+            from ctypes import WinDLL
+            # Use WinDLL("oleaut32") instead of windll.oleaut32
+            # because the latter is a shared (cached) object; and
+            # other code may set their own restypes. We need out own
+            # restype here.
+            oleaut32 = WinDLL("oleaut32")
+            SysAllocStringLen = oleaut32.SysAllocStringLen
+            SysStringLen = oleaut32.SysStringLen
+            SysFreeString = oleaut32.SysFreeString
             def _getvalue(self):
                 addr = self._buffer[0]
                 if addr == 0:
@@ -248,6 +257,12 @@
                     self._buffer[0] = 0  # VARIANT_FALSE
             result.value = property(_getvalue, _setvalue)
 
+        # make pointer-types compatible with the _ffi fast path
+        if result._is_pointer_like():
+            def _as_ffi_pointer_(self, ffitype):
+                return as_ffi_pointer(self, ffitype)
+            result._as_ffi_pointer_ = _as_ffi_pointer_
+            
         return result
 
     from_address = cdata_from_address
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -240,6 +240,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._buffer
+
 
 class StructureMeta(StructOrUnionMeta):
     _is_union = False
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -275,7 +275,8 @@
     return unicode(x, 'utf-8')
 
 class Connection(object):
-    def __init__(self, database, isolation_level="", detect_types=0, timeout=None, cached_statements=None, factory=None):
+    def __init__(self, database, timeout=5.0, detect_types=0, isolation_level="",
+                 check_same_thread=True, factory=None, cached_statements=100):
         self.db = c_void_p()
         if sqlite.sqlite3_open(database, byref(self.db)) != SQLITE_OK:
             raise OperationalError("Could not open database")
@@ -308,7 +309,8 @@
         self._aggregates = {}
         self.aggregate_instances = {}
         self._collations = {}
-        self.thread_ident = thread_get_ident()
+        if check_same_thread:
+            self.thread_ident = thread_get_ident()
 
     def _get_exception(self, error_code = None):
         if error_code is None:
diff --git a/lib_pypy/ctypes_support.py b/lib_pypy/ctypes_support.py
--- a/lib_pypy/ctypes_support.py
+++ b/lib_pypy/ctypes_support.py
@@ -10,8 +10,8 @@
 # __________ the standard C library __________
 
 if sys.platform == 'win32':
-    import _rawffi
-    standard_c_lib = ctypes.CDLL('msvcrt', handle=_rawffi.get_libc())
+    import _ffi
+    standard_c_lib = ctypes.CDLL('msvcrt', handle=_ffi.get_libc())
 else:
     standard_c_lib = ctypes.CDLL(ctypes.util.find_library('c'))
 
diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py
--- a/lib_pypy/datetime.py
+++ b/lib_pypy/datetime.py
@@ -1422,12 +1422,17 @@
             converter = _time.localtime
         else:
             converter = _time.gmtime
-        if 1 - (t % 1.0) < 0.000001:
-            t = float(int(t)) + 1
-        if t < 0:
-            t -= 1
+        if t < 0.0:
+            us = int(round(((-t) % 1.0) * 1000000))
+            if us > 0:
+                us = 1000000 - us
+                t -= 1.0
+        else:
+            us = int(round((t % 1.0) * 1000000))
+            if us == 1000000:
+                us = 0
+                t += 1.0
         y, m, d, hh, mm, ss, weekday, jday, dst = converter(t)
-        us = int((t % 1.0) * 1000000)
         ss = min(ss, 59)    # clamp out leap seconds if the platform has them
         result = cls(y, m, d, hh, mm, ss, us, tz)
         if tz is not None:
diff --git a/lib_pypy/msvcrt.py b/lib_pypy/msvcrt.py
--- a/lib_pypy/msvcrt.py
+++ b/lib_pypy/msvcrt.py
@@ -46,4 +46,42 @@
         e = get_errno()
         raise IOError(e, errno.errorcode[e])
 
+# Console I/O routines
+
+kbhit = _c._kbhit
+kbhit.argtypes = []
+kbhit.restype = ctypes.c_int
+
+getch = _c._getch
+getch.argtypes = []
+getch.restype = ctypes.c_char
+
+getwch = _c._getwch
+getwch.argtypes = []
+getwch.restype = ctypes.c_wchar
+
+getche = _c._getche
+getche.argtypes = []
+getche.restype = ctypes.c_char
+
+getwche = _c._getwche
+getwche.argtypes = []
+getwche.restype = ctypes.c_wchar
+
+putch = _c._putch
+putch.argtypes = [ctypes.c_char]
+putch.restype = None
+
+putwch = _c._putwch
+putwch.argtypes = [ctypes.c_wchar]
+putwch.restype = None
+
+ungetch = _c._ungetch
+ungetch.argtypes = [ctypes.c_char]
+ungetch.restype = None
+
+ungetwch = _c._ungetwch
+ungetwch.argtypes = [ctypes.c_wchar]
+ungetwch.restype = None
+
 del ctypes
diff --git a/lib_pypy/pwd.py b/lib_pypy/pwd.py
--- a/lib_pypy/pwd.py
+++ b/lib_pypy/pwd.py
@@ -16,6 +16,7 @@
 
 from ctypes_support import standard_c_lib as libc
 from ctypes import Structure, POINTER, c_int, c_char_p, c_long
+from _structseq import structseqtype, structseqfield
 
 try: from __pypy__ import builtinify
 except ImportError: builtinify = lambda f: f
@@ -68,7 +69,7 @@
             yield self.pw_dir
             yield self.pw_shell
 
-class struct_passwd(tuple):
+class struct_passwd:
     """
     pwd.struct_passwd: Results from getpw*() routines.
 
@@ -76,15 +77,15 @@
       (pw_name,pw_passwd,pw_uid,pw_gid,pw_gecos,pw_dir,pw_shell)
     or via the object attributes as named in the above tuple.
     """
-    def __init__(self, passwd):
-        self.pw_name = passwd.pw_name
-        self.pw_passwd = passwd.pw_passwd
-        self.pw_uid = passwd.pw_uid
-        self.pw_gid = passwd.pw_gid
-        self.pw_gecos = passwd.pw_gecos
-        self.pw_dir = passwd.pw_dir
-        self.pw_shell = passwd.pw_shell
-        tuple.__init__(self, passwd)
+    __metaclass__ = structseqtype
+    name = "pwd.struct_passwd"
+    pw_name = structseqfield(0)
+    pw_passwd = structseqfield(1)
+    pw_uid = structseqfield(2)
+    pw_gid = structseqfield(3)
+    pw_gecos = structseqfield(4)
+    pw_dir = structseqfield(5)
+    pw_shell = structseqfield(6)
 
 passwd_p = POINTER(passwd)
 
diff --git a/lib_pypy/pypy_test/test_datetime.py b/lib_pypy/pypy_test/test_datetime.py
--- a/lib_pypy/pypy_test/test_datetime.py
+++ b/lib_pypy/pypy_test/test_datetime.py
@@ -32,4 +32,28 @@
     assert datetime.datetime.utcfromtimestamp(a).microsecond == 0
     assert datetime.datetime.utcfromtimestamp(a).second == 1
 
-    
+def test_more_datetime_rounding():
+    # this test verified on top of CPython 2.7 (using a plain
+    # "import datetime" above)
+    expected_results = {
+        -1000.0: 'datetime.datetime(1970, 1, 1, 0, 43, 20)',
+        -999.9999996: 'datetime.datetime(1970, 1, 1, 0, 43, 20)',
+        -999.4: 'datetime.datetime(1970, 1, 1, 0, 43, 20, 600000)',
+        -999.0000004: 'datetime.datetime(1970, 1, 1, 0, 43, 21)',
+        -1.0: 'datetime.datetime(1970, 1, 1, 0, 59, 59)',
+        -0.9999996: 'datetime.datetime(1970, 1, 1, 0, 59, 59)',
+        -0.4: 'datetime.datetime(1970, 1, 1, 0, 59, 59, 600000)',
+        -0.0000004: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.0: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.0000004: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.4: 'datetime.datetime(1970, 1, 1, 1, 0, 0, 400000)',
+        0.9999996: 'datetime.datetime(1970, 1, 1, 1, 0, 1)',
+        1000.0: 'datetime.datetime(1970, 1, 1, 1, 16, 40)',
+        1000.0000004: 'datetime.datetime(1970, 1, 1, 1, 16, 40)',
+        1000.4: 'datetime.datetime(1970, 1, 1, 1, 16, 40, 400000)',
+        1000.9999996: 'datetime.datetime(1970, 1, 1, 1, 16, 41)',
+        1293843661.191: 'datetime.datetime(2011, 1, 1, 2, 1, 1, 191000)',
+        }
+    for t in sorted(expected_results):
+        dt = datetime.datetime.fromtimestamp(t)
+        assert repr(dt) == expected_results[t]
diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py
--- a/lib_pypy/resource.py
+++ b/lib_pypy/resource.py
@@ -7,7 +7,7 @@
 
 from ctypes_support import standard_c_lib as libc
 from ctypes_support import get_errno
-from ctypes import Structure, c_int, c_long, byref, sizeof
+from ctypes import Structure, c_int, c_long, byref, sizeof, POINTER
 from errno import EINVAL, EPERM
 import _structseq
 
@@ -25,6 +25,8 @@
 _setrlimit = libc.setrlimit
 try:
     _getpagesize = libc.getpagesize
+    _getpagesize.argtypes = ()
+    _getpagesize.restype = c_int
 except AttributeError:
     from os import sysconf
     _getpagesize = None
@@ -61,6 +63,10 @@
         ("ru_nivcsw", c_long),
     )
 
+_getrusage.argtypes = (c_int, POINTER(_struct_rusage))
+_getrusage.restype = c_int
+
+
 class struct_rusage:
     __metaclass__ = _structseq.structseqtype
 
@@ -94,6 +100,12 @@
         ("rlim_max", rlim_t),
     )
 
+_getrlimit.argtypes = (c_int, POINTER(rlimit))
+_getrlimit.restype = c_int
+_setrlimit.argtypes = (c_int, POINTER(rlimit))
+_setrlimit.restype = c_int
+
+
 @builtinify
 def getrusage(who):
     ru = _struct_rusage()
diff --git a/lib_pypy/stackless.py b/lib_pypy/stackless.py
--- a/lib_pypy/stackless.py
+++ b/lib_pypy/stackless.py
@@ -200,14 +200,15 @@
     # I can't think of a better solution without a real transform.
 
 def rewrite_stackless_primitive(coro_state, alive, tempval):
-    flags, state, thunk, parent = coro_state
-    for i, frame in enumerate(state):
+    flags, frame, thunk, parent = coro_state
+    while frame is not None:
         retval_expr = _stackless_primitive_registry.get(frame.f_code)
         if retval_expr:
             # this tasklet needs to stop pickling here and return its value.
             tempval = eval(retval_expr, globals(), frame.f_locals)
-            state = state[:i]
-            coro_state = flags, state, thunk, parent
+            coro_state = flags, frame, thunk, parent
+            break
+        frame = frame.f_back
     return coro_state, alive, tempval
 
 #
@@ -492,23 +493,22 @@
         assert two == ()
         # we want to get rid of the parent thing.
         # for now, we just drop it
-        a, b, c, d = coro_state
-        
+        a, frame, c, d = coro_state
+
         # Removing all frames related to stackless.py.
         # They point to stuff we don't want to be pickled.
-        frame_list = list(b)
-        new_frame_list = []
-        for frame in frame_list:
+
+        pickleframe = frame
+        while frame is not None:
             if frame.f_code == schedule.func_code:
                 # Removing everything including and after the
                 # call to stackless.schedule()
+                pickleframe = frame.f_back
                 break
-            new_frame_list.append(frame)
-        b = tuple(new_frame_list)
-        
+            frame = frame.f_back
         if d:
             assert isinstance(d, coroutine)
-        coro_state = a, b, c, None
+        coro_state = a, pickleframe, c, None
         coro_state, alive, tempval = rewrite_stackless_primitive(coro_state, self.alive, self.tempval)
         inst_dict = self.__dict__.copy()
         inst_dict.pop('tempval', None)
diff --git a/py/__init__.py b/py/__init__.py
--- a/py/__init__.py
+++ b/py/__init__.py
@@ -8,7 +8,7 @@
 
 (c) Holger Krekel and others, 2004-2010
 """
-__version__ = '1.4.3'
+__version__ = '1.4.4.dev1'
 
 from py import _apipkg
 
@@ -70,10 +70,6 @@
         'getrawcode'        : '._code.code:getrawcode',
         'patch_builtins'    : '._code.code:patch_builtins',
         'unpatch_builtins'  : '._code.code:unpatch_builtins',
-        '_AssertionError'   : '._code.assertion:AssertionError',
-        '_reinterpret_old'  : '._code.assertion:reinterpret_old',
-        '_reinterpret'      : '._code.assertion:reinterpret',
-        '_reprcompare'      : '._code.assertion:_reprcompare',
     },
 
     # backports and additions of builtins
diff --git a/py/_code/_assertionnew.py b/py/_code/_assertionnew.py
deleted file mode 100644
--- a/py/_code/_assertionnew.py
+++ /dev/null
@@ -1,339 +0,0 @@
-"""
-Find intermediate evalutation results in assert statements through builtin AST.
-This should replace _assertionold.py eventually.
-"""
-
-import sys
-import ast
-
-import py
-from py._code.assertion import _format_explanation, BuiltinAssertionError
-
-
-if sys.platform.startswith("java") and sys.version_info < (2, 5, 2):
-    # See http://bugs.jython.org/issue1497
-    _exprs = ("BoolOp", "BinOp", "UnaryOp", "Lambda", "IfExp", "Dict",
-              "ListComp", "GeneratorExp", "Yield", "Compare", "Call",
-              "Repr", "Num", "Str", "Attribute", "Subscript", "Name",
-              "List", "Tuple")
-    _stmts = ("FunctionDef", "ClassDef", "Return", "Delete", "Assign",
-              "AugAssign", "Print", "For", "While", "If", "With", "Raise",
-              "TryExcept", "TryFinally", "Assert", "Import", "ImportFrom",
-              "Exec", "Global", "Expr", "Pass", "Break", "Continue")
-    _expr_nodes = set(getattr(ast, name) for name in _exprs)
-    _stmt_nodes = set(getattr(ast, name) for name in _stmts)
-    def _is_ast_expr(node):
-        return node.__class__ in _expr_nodes
-    def _is_ast_stmt(node):
-        return node.__class__ in _stmt_nodes
-else:
-    def _is_ast_expr(node):
-        return isinstance(node, ast.expr)
-    def _is_ast_stmt(node):
-        return isinstance(node, ast.stmt)
-
-
-class Failure(Exception):
-    """Error found while interpreting AST."""
-
-    def __init__(self, explanation=""):
-        self.cause = sys.exc_info()
-        self.explanation = explanation
-
-
-def interpret(source, frame, should_fail=False):
-    mod = ast.parse(source)
-    visitor = DebugInterpreter(frame)
-    try:
-        visitor.visit(mod)
-    except Failure:
-        failure = sys.exc_info()[1]
-        return getfailure(failure)
-    if should_fail:
-        return ("(assertion failed, but when it was re-run for "
-                "printing intermediate values, it did not fail.  Suggestions: "
-                "compute assert expression before the assert or use --no-assert)")
-
-def run(offending_line, frame=None):
-    if frame is None:
-        frame = py.code.Frame(sys._getframe(1))
-    return interpret(offending_line, frame)
-
-def getfailure(failure):
-    explanation = _format_explanation(failure.explanation)
-    value = failure.cause[1]
-    if str(value):
-        lines = explanation.splitlines()
-        if not lines:
-            lines.append("")
-        lines[0] += " << %s" % (value,)
-        explanation = "\n".join(lines)
-    text = "%s: %s" % (failure.cause[0].__name__, explanation)
-    if text.startswith("AssertionError: assert "):
-        text = text[16:]
-    return text
-
-
-operator_map = {
-    ast.BitOr : "|",
-    ast.BitXor : "^",
-    ast.BitAnd : "&",
-    ast.LShift : "<<",
-    ast.RShift : ">>",
-    ast.Add : "+",
-    ast.Sub : "-",
-    ast.Mult : "*",
-    ast.Div : "/",
-    ast.FloorDiv : "//",
-    ast.Mod : "%",
-    ast.Eq : "==",
-    ast.NotEq : "!=",
-    ast.Lt : "<",
-    ast.LtE : "<=",
-    ast.Gt : ">",
-    ast.GtE : ">=",
-    ast.Pow : "**",
-    ast.Is : "is",
-    ast.IsNot : "is not",
-    ast.In : "in",
-    ast.NotIn : "not in"
-}
-
-unary_map = {
-    ast.Not : "not %s",
-    ast.Invert : "~%s",
-    ast.USub : "-%s",
-    ast.UAdd : "+%s"
-}
-
-
-class DebugInterpreter(ast.NodeVisitor):
-    """Interpret AST nodes to gleam useful debugging information. """
-
-    def __init__(self, frame):
-        self.frame = frame
-
-    def generic_visit(self, node):
-        # Fallback when we don't have a special implementation.
-        if _is_ast_expr(node):
-            mod = ast.Expression(node)
-            co = self._compile(mod)
-            try:
-                result = self.frame.eval(co)
-            except Exception:
-                raise Failure()
-            explanation = self.frame.repr(result)
-            return explanation, result
-        elif _is_ast_stmt(node):
-            mod = ast.Module([node])
-            co = self._compile(mod, "exec")
-            try:
-                self.frame.exec_(co)
-            except Exception:
-                raise Failure()
-            return None, None
-        else:
-            raise AssertionError("can't handle %s" %(node,))
-
-    def _compile(self, source, mode="eval"):
-        return compile(source, "<assertion interpretation>", mode)
-
-    def visit_Expr(self, expr):
-        return self.visit(expr.value)
-
-    def visit_Module(self, mod):
-        for stmt in mod.body:
-            self.visit(stmt)
-
-    def visit_Name(self, name):
-        explanation, result = self.generic_visit(name)
-        # See if the name is local.
-        source = "%r in locals() is not globals()" % (name.id,)
-        co = self._compile(source)
-        try:
-            local = self.frame.eval(co)
-        except Exception:
-            # have to assume it isn't
-            local = False
-        if not local:
-            return name.id, result
-        return explanation, result
-
-    def visit_Compare(self, comp):
-        left = comp.left
-        left_explanation, left_result = self.visit(left)
-        for op, next_op in zip(comp.ops, comp.comparators):
-            next_explanation, next_result = self.visit(next_op)
-            op_symbol = operator_map[op.__class__]
-            explanation = "%s %s %s" % (left_explanation, op_symbol,
-                                        next_explanation)
-            source = "__exprinfo_left %s __exprinfo_right" % (op_symbol,)
-            co = self._compile(source)
-            try:
-                result = self.frame.eval(co, __exprinfo_left=left_result,
-                                         __exprinfo_right=next_result)
-            except Exception:
-                raise Failure(explanation)
-            try:
-                if not result:
-                    break
-            except KeyboardInterrupt:
-                raise
-            except:
-                break
-            left_explanation, left_result = next_explanation, next_result
-
-        rcomp = py.code._reprcompare
-        if rcomp:
-            res = rcomp(op_symbol, left_result, next_result)
-            if res:
-                explanation = res
-        return explanation, result
-
-    def visit_BoolOp(self, boolop):
-        is_or = isinstance(boolop.op, ast.Or)
-        explanations = []
-        for operand in boolop.values:
-            explanation, result = self.visit(operand)
-            explanations.append(explanation)
-            if result == is_or:
-                break
-        name = is_or and " or " or " and "
-        explanation = "(" + name.join(explanations) + ")"
-        return explanation, result
-
-    def visit_UnaryOp(self, unary):
-        pattern = unary_map[unary.op.__class__]
-        operand_explanation, operand_result = self.visit(unary.operand)
-        explanation = pattern % (operand_explanation,)
-        co = self._compile(pattern % ("__exprinfo_expr",))
-        try:
-            result = self.frame.eval(co, __exprinfo_expr=operand_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, result
-
-    def visit_BinOp(self, binop):
-        left_explanation, left_result = self.visit(binop.left)
-        right_explanation, right_result = self.visit(binop.right)
-        symbol = operator_map[binop.op.__class__]
-        explanation = "(%s %s %s)" % (left_explanation, symbol,
-                                      right_explanation)
-        source = "__exprinfo_left %s __exprinfo_right" % (symbol,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, __exprinfo_left=left_result,
-                                     __exprinfo_right=right_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, result
-
-    def visit_Call(self, call):
-        func_explanation, func = self.visit(call.func)
-        arg_explanations = []
-        ns = {"__exprinfo_func" : func}
-        arguments = []
-        for arg in call.args:
-            arg_explanation, arg_result = self.visit(arg)
-            arg_name = "__exprinfo_%s" % (len(ns),)
-            ns[arg_name] = arg_result
-            arguments.append(arg_name)
-            arg_explanations.append(arg_explanation)
-        for keyword in call.keywords:
-            arg_explanation, arg_result = self.visit(keyword.value)
-            arg_name = "__exprinfo_%s" % (len(ns),)
-            ns[arg_name] = arg_result
-            keyword_source = "%s=%%s" % (keyword.arg)
-            arguments.append(keyword_source % (arg_name,))
-            arg_explanations.append(keyword_source % (arg_explanation,))
-        if call.starargs:
-            arg_explanation, arg_result = self.visit(call.starargs)
-            arg_name = "__exprinfo_star"
-            ns[arg_name] = arg_result
-            arguments.append("*%s" % (arg_name,))
-            arg_explanations.append("*%s" % (arg_explanation,))
-        if call.kwargs:
-            arg_explanation, arg_result = self.visit(call.kwargs)
-            arg_name = "__exprinfo_kwds"
-            ns[arg_name] = arg_result
-            arguments.append("**%s" % (arg_name,))
-            arg_explanations.append("**%s" % (arg_explanation,))
-        args_explained = ", ".join(arg_explanations)
-        explanation = "%s(%s)" % (func_explanation, args_explained)
-        args = ", ".join(arguments)
-        source = "__exprinfo_func(%s)" % (args,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, **ns)
-        except Exception:
-            raise Failure(explanation)
-        pattern = "%s\n{%s = %s\n}"
-        rep = self.frame.repr(result)
-        explanation = pattern % (rep, rep, explanation)
-        return explanation, result
-
-    def _is_builtin_name(self, name):
-        pattern = "%r not in globals() and %r not in locals()"
-        source = pattern % (name.id, name.id)
-        co = self._compile(source)
-        try:
-            return self.frame.eval(co)
-        except Exception:
-            return False
-
-    def visit_Attribute(self, attr):
-        if not isinstance(attr.ctx, ast.Load):
-            return self.generic_visit(attr)
-        source_explanation, source_result = self.visit(attr.value)
-        explanation = "%s.%s" % (source_explanation, attr.attr)
-        source = "__exprinfo_expr.%s" % (attr.attr,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, __exprinfo_expr=source_result)
-        except Exception:
-            raise Failure(explanation)
-        explanation = "%s\n{%s = %s.%s\n}" % (self.frame.repr(result),
-                                              self.frame.repr(result),
-                                              source_explanation, attr.attr)
-        # Check if the attr is from an instance.
-        source = "%r in getattr(__exprinfo_expr, '__dict__', {})"
-        source = source % (attr.attr,)
-        co = self._compile(source)
-        try:
-            from_instance = self.frame.eval(co, __exprinfo_expr=source_result)
-        except Exception:
-            from_instance = True
-        if from_instance:
-            rep = self.frame.repr(result)
-            pattern = "%s\n{%s = %s\n}"
-            explanation = pattern % (rep, rep, explanation)
-        return explanation, result
-
-    def visit_Assert(self, assrt):
-        test_explanation, test_result = self.visit(assrt.test)
-        if test_explanation.startswith("False\n{False =") and \
-                test_explanation.endswith("\n"):
-            test_explanation = test_explanation[15:-2]
-        explanation = "assert %s" % (test_explanation,)
-        if not test_result:
-            try:
-                raise BuiltinAssertionError
-            except Exception:
-                raise Failure(explanation)
-        return explanation, test_result
-
-    def visit_Assign(self, assign):
-        value_explanation, value_result = self.visit(assign.value)
-        explanation = "... = %s" % (value_explanation,)
-        name = ast.Name("__exprinfo_expr", ast.Load(),
-                        lineno=assign.value.lineno,
-                        col_offset=assign.value.col_offset)
-        new_assign = ast.Assign(assign.targets, name, lineno=assign.lineno,
-                                col_offset=assign.col_offset)
-        mod = ast.Module([new_assign])
-        co = self._compile(mod, "exec")
-        try:
-            self.frame.exec_(co, __exprinfo_expr=value_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, value_result
diff --git a/py/_code/_assertionold.py b/py/_code/_assertionold.py
deleted file mode 100644
--- a/py/_code/_assertionold.py
+++ /dev/null
@@ -1,555 +0,0 @@
-import py
-import sys, inspect
-from compiler import parse, ast, pycodegen
-from py._code.assertion import BuiltinAssertionError, _format_explanation
-
-passthroughex = py.builtin._sysex
-
-class Failure:
-    def __init__(self, node):
-        self.exc, self.value, self.tb = sys.exc_info()
-        self.node = node
-
-class View(object):
-    """View base class.
-
-    If C is a subclass of View, then C(x) creates a proxy object around
-    the object x.  The actual class of the proxy is not C in general,
-    but a *subclass* of C determined by the rules below.  To avoid confusion
-    we call view class the class of the proxy (a subclass of C, so of View)
-    and object class the class of x.
-
-    Attributes and methods not found in the proxy are automatically read on x.
-    Other operations like setting attributes are performed on the proxy, as
-    determined by its view class.  The object x is available from the proxy
-    as its __obj__ attribute.
-
-    The view class selection is determined by the __view__ tuples and the
-    optional __viewkey__ method.  By default, the selected view class is the
-    most specific subclass of C whose __view__ mentions the class of x.
-    If no such subclass is found, the search proceeds with the parent
-    object classes.  For example, C(True) will first look for a subclass
-    of C with __view__ = (..., bool, ...) and only if it doesn't find any
-    look for one with __view__ = (..., int, ...), and then ..., object,...
-    If everything fails the class C itself is considered to be the default.
-
-    Alternatively, the view class selection can be driven by another aspect
-    of the object x, instead of the class of x, by overriding __viewkey__.
-    See last example at the end of this module.
-    """
-
-    _viewcache = {}
-    __view__ = ()
-
-    def __new__(rootclass, obj, *args, **kwds):
-        self = object.__new__(rootclass)
-        self.__obj__ = obj
-        self.__rootclass__ = rootclass
-        key = self.__viewkey__()
-        try:
-            self.__class__ = self._viewcache[key]
-        except KeyError:
-            self.__class__ = self._selectsubclass(key)
-        return self
-
-    def __getattr__(self, attr):
-        # attributes not found in the normal hierarchy rooted on View
-        # are looked up in the object's real class
-        return getattr(self.__obj__, attr)
-
-    def __viewkey__(self):
-        return self.__obj__.__class__
-
-    def __matchkey__(self, key, subclasses):
-        if inspect.isclass(key):
-            keys = inspect.getmro(key)
-        else:
-            keys = [key]
-        for key in keys:
-            result = [C for C in subclasses if key in C.__view__]
-            if result:
-                return result
-        return []
-
-    def _selectsubclass(self, key):
-        subclasses = list(enumsubclasses(self.__rootclass__))
-        for C in subclasses:
-            if not isinstance(C.__view__, tuple):
-                C.__view__ = (C.__view__,)
-        choices = self.__matchkey__(key, subclasses)
-        if not choices:
-            return self.__rootclass__
-        elif len(choices) == 1:
-            return choices[0]
-        else:
-            # combine the multiple choices
-            return type('?', tuple(choices), {})
-
-    def __repr__(self):
-        return '%s(%r)' % (self.__rootclass__.__name__, self.__obj__)
-
-
-def enumsubclasses(cls):
-    for subcls in cls.__subclasses__():
-        for subsubclass in enumsubclasses(subcls):
-            yield subsubclass
-    yield cls
-
-
-class Interpretable(View):
-    """A parse tree node with a few extra methods."""
-    explanation = None
-
-    def is_builtin(self, frame):
-        return False
-
-    def eval(self, frame):
-        # fall-back for unknown expression nodes
-        try:
-            expr = ast.Expression(self.__obj__)
-            expr.filename = '<eval>'
-            self.__obj__.filename = '<eval>'
-            co = pycodegen.ExpressionCodeGenerator(expr).getCode()
-            result = frame.eval(co)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        self.result = result
-        self.explanation = self.explanation or frame.repr(self.result)
-
-    def run(self, frame):
-        # fall-back for unknown statement nodes
-        try:
-            expr = ast.Module(None, ast.Stmt([self.__obj__]))
-            expr.filename = '<run>'
-            co = pycodegen.ModuleCodeGenerator(expr).getCode()
-            frame.exec_(co)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-
-    def nice_explanation(self):
-        return _format_explanation(self.explanation)
-
-
-class Name(Interpretable):
-    __view__ = ast.Name
-
-    def is_local(self, frame):
-        source = '%r in locals() is not globals()' % self.name
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def is_global(self, frame):
-        source = '%r in globals()' % self.name
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def is_builtin(self, frame):
-        source = '%r not in locals() and %r not in globals()' % (
-            self.name, self.name)
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def eval(self, frame):
-        super(Name, self).eval(frame)
-        if not self.is_local(frame):
-            self.explanation = self.name
-
-class Compare(Interpretable):
-    __view__ = ast.Compare
-
-    def eval(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        for operation, expr2 in self.ops:
-            if hasattr(self, 'result'):
-                # shortcutting in chained expressions
-                if not frame.is_true(self.result):
-                    break
-            expr2 = Interpretable(expr2)
-            expr2.eval(frame)
-            self.explanation = "%s %s %s" % (
-                expr.explanation, operation, expr2.explanation)
-            source = "__exprinfo_left %s __exprinfo_right" % operation
-            try:
-                self.result = frame.eval(source,
-                                         __exprinfo_left=expr.result,
-                                         __exprinfo_right=expr2.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-            expr = expr2
-
-class And(Interpretable):
-    __view__ = ast.And
-
-    def eval(self, frame):
-        explanations = []
-        for expr in self.nodes:
-            expr = Interpretable(expr)
-            expr.eval(frame)
-            explanations.append(expr.explanation)
-            self.result = expr.result
-            if not frame.is_true(expr.result):
-                break
-        self.explanation = '(' + ' and '.join(explanations) + ')'
-
-class Or(Interpretable):
-    __view__ = ast.Or
-
-    def eval(self, frame):
-        explanations = []
-        for expr in self.nodes:
-            expr = Interpretable(expr)
-            expr.eval(frame)
-            explanations.append(expr.explanation)
-            self.result = expr.result
-            if frame.is_true(expr.result):
-                break
-        self.explanation = '(' + ' or '.join(explanations) + ')'
-
-
-# == Unary operations ==
-keepalive = []
-for astclass, astpattern in {
-    ast.Not    : 'not __exprinfo_expr',
-    ast.Invert : '(~__exprinfo_expr)',
-    }.items():
-
-    class UnaryArith(Interpretable):
-        __view__ = astclass
-
-        def eval(self, frame, astpattern=astpattern):
-            expr = Interpretable(self.expr)
-            expr.eval(frame)
-            self.explanation = astpattern.replace('__exprinfo_expr',
-                                                  expr.explanation)
-            try:
-                self.result = frame.eval(astpattern,
-                                         __exprinfo_expr=expr.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-    keepalive.append(UnaryArith)
-
-# == Binary operations ==
-for astclass, astpattern in {
-    ast.Add    : '(__exprinfo_left + __exprinfo_right)',
-    ast.Sub    : '(__exprinfo_left - __exprinfo_right)',
-    ast.Mul    : '(__exprinfo_left * __exprinfo_right)',
-    ast.Div    : '(__exprinfo_left / __exprinfo_right)',
-    ast.Mod    : '(__exprinfo_left % __exprinfo_right)',
-    ast.Power  : '(__exprinfo_left ** __exprinfo_right)',
-    }.items():
-
-    class BinaryArith(Interpretable):
-        __view__ = astclass
-
-        def eval(self, frame, astpattern=astpattern):
-            left = Interpretable(self.left)
-            left.eval(frame)
-            right = Interpretable(self.right)
-            right.eval(frame)
-            self.explanation = (astpattern
-                                .replace('__exprinfo_left',  left .explanation)
-                                .replace('__exprinfo_right', right.explanation))
-            try:
-                self.result = frame.eval(astpattern,
-                                         __exprinfo_left=left.result,
-                                         __exprinfo_right=right.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-    keepalive.append(BinaryArith)
-
-
-class CallFunc(Interpretable):
-    __view__ = ast.CallFunc
-
-    def is_bool(self, frame):
-        source = 'isinstance(__exprinfo_value, bool)'
-        try:
-            return frame.is_true(frame.eval(source,
-                                            __exprinfo_value=self.result))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def eval(self, frame):
-        node = Interpretable(self.node)
-        node.eval(frame)
-        explanations = []
-        vars = {'__exprinfo_fn': node.result}
-        source = '__exprinfo_fn('
-        for a in self.args:
-            if isinstance(a, ast.Keyword):
-                keyword = a.name
-                a = a.expr
-            else:
-                keyword = None
-            a = Interpretable(a)
-            a.eval(frame)
-            argname = '__exprinfo_%d' % len(vars)
-            vars[argname] = a.result
-            if keyword is None:
-                source += argname + ','
-                explanations.append(a.explanation)
-            else:
-                source += '%s=%s,' % (keyword, argname)
-                explanations.append('%s=%s' % (keyword, a.explanation))
-        if self.star_args:
-            star_args = Interpretable(self.star_args)
-            star_args.eval(frame)
-            argname = '__exprinfo_star'
-            vars[argname] = star_args.result
-            source += '*' + argname + ','
-            explanations.append('*' + star_args.explanation)
-        if self.dstar_args:
-            dstar_args = Interpretable(self.dstar_args)
-            dstar_args.eval(frame)
-            argname = '__exprinfo_kwds'
-            vars[argname] = dstar_args.result
-            source += '**' + argname + ','
-            explanations.append('**' + dstar_args.explanation)
-        self.explanation = "%s(%s)" % (
-            node.explanation, ', '.join(explanations))
-        if source.endswith(','):
-            source = source[:-1]
-        source += ')'
-        try:
-            self.result = frame.eval(source, **vars)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        if not node.is_builtin(frame) or not self.is_bool(frame):
-            r = frame.repr(self.result)
-            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
-
-class Getattr(Interpretable):
-    __view__ = ast.Getattr
-
-    def eval(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        source = '__exprinfo_expr.%s' % self.attrname
-        try:
-            self.result = frame.eval(source, __exprinfo_expr=expr.result)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        self.explanation = '%s.%s' % (expr.explanation, self.attrname)
-        # if the attribute comes from the instance, its value is interesting
-        source = ('hasattr(__exprinfo_expr, "__dict__") and '
-                  '%r in __exprinfo_expr.__dict__' % self.attrname)
-        try:
-            from_instance = frame.is_true(
-                frame.eval(source, __exprinfo_expr=expr.result))
-        except passthroughex:
-            raise
-        except:
-            from_instance = True
-        if from_instance:
-            r = frame.repr(self.result)
-            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
-
-# == Re-interpretation of full statements ==
-
-class Assert(Interpretable):
-    __view__ = ast.Assert
-
-    def run(self, frame):
-        test = Interpretable(self.test)
-        test.eval(frame)
-        # simplify 'assert False where False = ...'
-        if (test.explanation.startswith('False\n{False = ') and
-            test.explanation.endswith('\n}')):
-            test.explanation = test.explanation[15:-2]
-        # print the result as  'assert <explanation>'
-        self.result = test.result
-        self.explanation = 'assert ' + test.explanation
-        if not frame.is_true(test.result):
-            try:
-                raise BuiltinAssertionError
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-class Assign(Interpretable):
-    __view__ = ast.Assign
-
-    def run(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        self.result = expr.result
-        self.explanation = '... = ' + expr.explanation
-        # fall-back-run the rest of the assignment
-        ass = ast.Assign(self.nodes, ast.Name('__exprinfo_expr'))
-        mod = ast.Module(None, ast.Stmt([ass]))
-        mod.filename = '<run>'
-        co = pycodegen.ModuleCodeGenerator(mod).getCode()
-        try:
-            frame.exec_(co, __exprinfo_expr=expr.result)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-
-class Discard(Interpretable):
-    __view__ = ast.Discard
-
-    def run(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        self.result = expr.result
-        self.explanation = expr.explanation
-
-class Stmt(Interpretable):
-    __view__ = ast.Stmt
-
-    def run(self, frame):
-        for stmt in self.nodes:
-            stmt = Interpretable(stmt)
-            stmt.run(frame)
-
-
-def report_failure(e):
-    explanation = e.node.nice_explanation()
-    if explanation:
-        explanation = ", in: " + explanation
-    else:
-        explanation = ""
-    sys.stdout.write("%s: %s%s\n" % (e.exc.__name__, e.value, explanation))
-
-def check(s, frame=None):
-    if frame is None:
-        frame = sys._getframe(1)
-        frame = py.code.Frame(frame)
-    expr = parse(s, 'eval')
-    assert isinstance(expr, ast.Expression)
-    node = Interpretable(expr.node)
-    try:
-        node.eval(frame)
-    except passthroughex:
-        raise
-    except Failure:
-        e = sys.exc_info()[1]
-        report_failure(e)
-    else:
-        if not frame.is_true(node.result):
-            sys.stderr.write("assertion failed: %s\n" % node.nice_explanation())
-
-
-###########################################################
-# API / Entry points
-# #########################################################
-
-def interpret(source, frame, should_fail=False):
-    module = Interpretable(parse(source, 'exec').node)
-    #print "got module", module
-    if isinstance(frame, py.std.types.FrameType):
-        frame = py.code.Frame(frame)
-    try:
-        module.run(frame)
-    except Failure:
-        e = sys.exc_info()[1]
-        return getfailure(e)
-    except passthroughex:
-        raise
-    except:
-        import traceback
-        traceback.print_exc()
-    if should_fail:
-        return ("(assertion failed, but when it was re-run for "
-                "printing intermediate values, it did not fail.  Suggestions: "
-                "compute assert expression before the assert or use --nomagic)")
-    else:
-        return None
-
-def getmsg(excinfo):
-    if isinstance(excinfo, tuple):
-        excinfo = py.code.ExceptionInfo(excinfo)
-    #frame, line = gettbline(tb)
-    #frame = py.code.Frame(frame)
-    #return interpret(line, frame)
-
-    tb = excinfo.traceback[-1]
-    source = str(tb.statement).strip()
-    x = interpret(source, tb.frame, should_fail=True)
-    if not isinstance(x, str):
-        raise TypeError("interpret returned non-string %r" % (x,))
-    return x
-
-def getfailure(e):
-    explanation = e.node.nice_explanation()
-    if str(e.value):
-        lines = explanation.split('\n')
-        lines[0] += "  << %s" % (e.value,)
-        explanation = '\n'.join(lines)
-    text = "%s: %s" % (e.exc.__name__, explanation)
-    if text.startswith('AssertionError: assert '):
-        text = text[16:]
-    return text
-
-def run(s, frame=None):
-    if frame is None:
-        frame = sys._getframe(1)
-        frame = py.code.Frame(frame)
-    module = Interpretable(parse(s, 'exec').node)
-    try:
-        module.run(frame)
-    except Failure:
-        e = sys.exc_info()[1]
-        report_failure(e)
-
-
-if __name__ == '__main__':
-    # example:
-    def f():
-        return 5
-    def g():
-        return 3
-    def h(x):
-        return 'never'
-    check("f() * g() == 5")
-    check("not f()")
-    check("not (f() and g() or 0)")
-    check("f() == g()")
-    i = 4
-    check("i == f()")
-    check("len(f()) == 0")
-    check("isinstance(2+3+4, float)")
-
-    run("x = i")
-    check("x == 5")
-
-    run("assert not f(), 'oops'")
-    run("a, b, c = 1, 2")
-    run("a, b, c = f()")
-
-    check("max([f(),g()]) == 4")
-    check("'hello'[g()] == 'h'")
-    run("'guk%d' % h(f())")
diff --git a/py/_code/assertion.py b/py/_code/assertion.py
deleted file mode 100644
--- a/py/_code/assertion.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import sys
-import py
-
-BuiltinAssertionError = py.builtin.builtins.AssertionError
-
-_reprcompare = None # if set, will be called by assert reinterp for comparison ops
-
-def _format_explanation(explanation):
-    """This formats an explanation
-
-    Normally all embedded newlines are escaped, however there are
-    three exceptions: \n{, \n} and \n~.  The first two are intended
-    cover nested explanations, see function and attribute explanations
-    for examples (.visit_Call(), visit_Attribute()).  The last one is
-    for when one explanation needs to span multiple lines, e.g. when
-    displaying diffs.
-    """
-    raw_lines = (explanation or '').split('\n')
-    # escape newlines not followed by {, } and ~
-    lines = [raw_lines[0]]
-    for l in raw_lines[1:]:
-        if l.startswith('{') or l.startswith('}') or l.startswith('~'):
-            lines.append(l)
-        else:
-            lines[-1] += '\\n' + l
-
-    result = lines[:1]
-    stack = [0]
-    stackcnt = [0]
-    for line in lines[1:]:
-        if line.startswith('{'):
-            if stackcnt[-1]:
-                s = 'and   '
-            else:
-                s = 'where '
-            stack.append(len(result))
-            stackcnt[-1] += 1
-            stackcnt.append(0)
-            result.append(' +' + '  '*(len(stack)-1) + s + line[1:])
-        elif line.startswith('}'):
-            assert line.startswith('}')
-            stack.pop()
-            stackcnt.pop()
-            result[stack[-1]] += line[1:]
-        else:
-            assert line.startswith('~')
-            result.append('  '*len(stack) + line[1:])
-    assert len(stack) == 1
-    return '\n'.join(result)
-
-
-class AssertionError(BuiltinAssertionError):
-    def __init__(self, *args):
-        BuiltinAssertionError.__init__(self, *args)
-        if args:
-            try:
-                self.msg = str(args[0])
-            except py.builtin._sysex:
-                raise
-            except:
-                self.msg = "<[broken __repr__] %s at %0xd>" %(
-                    args[0].__class__, id(args[0]))
-        else:
-            f = py.code.Frame(sys._getframe(1))
-            try:
-                source = f.code.fullsource
-                if source is not None:
-                    try:
-                        source = source.getstatement(f.lineno, assertion=True)
-                    except IndexError:
-                        source = None
-                    else:
-                        source = str(source.deindent()).strip()
-            except py.error.ENOENT:
-                source = None
-                # this can also occur during reinterpretation, when the
-                # co_filename is set to "<run>".
-            if source:
-                self.msg = reinterpret(source, f, should_fail=True)
-            else:
-                self.msg = "<could not determine information>"
-            if not self.args:
-                self.args = (self.msg,)
-
-if sys.version_info > (3, 0):
-    AssertionError.__module__ = "builtins"
-    reinterpret_old = "old reinterpretation not available for py3"
-else:
-    from py._code._assertionold import interpret as reinterpret_old
-if sys.version_info >= (2, 6) or (sys.platform.startswith("java")):
-    from py._code._assertionnew import interpret as reinterpret
-else:
-    reinterpret = reinterpret_old
-
diff --git a/py/_code/code.py b/py/_code/code.py
--- a/py/_code/code.py
+++ b/py/_code/code.py
@@ -145,17 +145,6 @@
         return self.frame.f_locals
     locals = property(getlocals, None, None, "locals of underlaying frame")
 
-    def reinterpret(self):
-        """Reinterpret the failing statement and returns a detailed information
-           about what operations are performed."""
-        if self.exprinfo is None:
-            source = str(self.statement).strip()
-            x = py.code._reinterpret(source, self.frame, should_fail=True)
-            if not isinstance(x, str):
-                raise TypeError("interpret returned non-string %r" % (x,))
-            self.exprinfo = x
-        return self.exprinfo
-
     def getfirstlinesource(self):
         # on Jython this firstlineno can be -1 apparently
         return max(self.frame.code.firstlineno, 0)
@@ -310,7 +299,7 @@
         #     ExceptionInfo-like classes may have different attributes.
         if tup is None:
             tup = sys.exc_info()
-            if exprinfo is None and isinstance(tup[1], py.code._AssertionError):
+            if exprinfo is None and isinstance(tup[1], AssertionError):
                 exprinfo = getattr(tup[1], 'msg', None)
                 if exprinfo is None:
                     exprinfo = str(tup[1])
@@ -690,22 +679,15 @@
 
 oldbuiltins = {}
 
-def patch_builtins(assertion=True, compile=True):
-    """ put compile and AssertionError builtins to Python's builtins. """
-    if assertion:
-        from py._code import assertion
-        l = oldbuiltins.setdefault('AssertionError', [])
-        l.append(py.builtin.builtins.AssertionError)
-        py.builtin.builtins.AssertionError = assertion.AssertionError
+def patch_builtins(compile=True):
+    """ put compile builtins to Python's builtins. """
     if compile:
         l = oldbuiltins.setdefault('compile', [])
         l.append(py.builtin.builtins.compile)
         py.builtin.builtins.compile = py.code.compile
 
-def unpatch_builtins(assertion=True, compile=True):
+def unpatch_builtins(compile=True):
     """ remove compile and AssertionError builtins from Python builtins. """
-    if assertion:
-        py.builtin.builtins.AssertionError = oldbuiltins['AssertionError'].pop()
     if compile:
         py.builtin.builtins.compile = oldbuiltins['compile'].pop()
 
diff --git a/pypy/annotation/annrpython.py b/pypy/annotation/annrpython.py
--- a/pypy/annotation/annrpython.py
+++ b/pypy/annotation/annrpython.py
@@ -228,7 +228,7 @@
             # graph -- it's already low-level operations!
             for a, s_newarg in zip(graph.getargs(), cells):
                 s_oldarg = self.binding(a)
-                assert s_oldarg.contains(s_newarg)
+                assert annmodel.unionof(s_oldarg, s_newarg) == s_oldarg
         else:
             assert not self.frozen
             for a in cells:
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -279,13 +279,13 @@
         desc = self.getdesc(cls)
         return desc.getuniqueclassdef()
 
-    def getlistdef(self, **flags):
+    def getlistdef(self, **flags_if_new):
         """Get the ListDef associated with the current position."""
         try:
             listdef = self.listdefs[self.position_key]
         except KeyError:
             listdef = self.listdefs[self.position_key] = ListDef(self)
-            listdef.listitem.__dict__.update(flags)
+            listdef.listitem.__dict__.update(flags_if_new)
         return listdef
 
     def newlist(self, *s_values, **flags):
@@ -294,14 +294,18 @@
         listdef = self.getlistdef(**flags)
         for s_value in s_values:
             listdef.generalize(s_value)
+        if flags:
+            assert flags.keys() == ['range_step']
+            listdef.generalize_range_step(flags['range_step'])
         return SomeList(listdef)
 
-    def getdictdef(self, is_r_dict=False):
+    def getdictdef(self, is_r_dict=False, force_non_null=False):
         """Get the DictDef associated with the current position."""
         try:
             dictdef = self.dictdefs[self.position_key]
         except KeyError:
-            dictdef = DictDef(self, is_r_dict=is_r_dict)
+            dictdef = DictDef(self, is_r_dict=is_r_dict,
+                              force_non_null=force_non_null)
             self.dictdefs[self.position_key] = dictdef
         return dictdef
 
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -311,8 +311,14 @@
 def robjmodel_we_are_translated():
     return immutablevalue(True)
 
-def robjmodel_r_dict(s_eqfn, s_hashfn):
-    dictdef = getbookkeeper().getdictdef(is_r_dict=True)
+def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
+    if s_force_non_null is None:
+        force_non_null = False
+    else:
+        assert s_force_non_null.is_constant()
+        force_non_null = s_force_non_null.const
+    dictdef = getbookkeeper().getdictdef(is_r_dict=True,
+                                         force_non_null=force_non_null)
     dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
     return SomeDict(dictdef)
 
@@ -351,17 +357,6 @@
 def llmemory_cast_int_to_adr(s):
     return SomeAddress()
 
-
-##def rarith_ovfcheck(s_obj):
-##    if isinstance(s_obj, SomeInteger) and s_obj.unsigned:
-##        getbookkeeper().warning("ovfcheck on unsigned")
-##    return s_obj
-
-##def rarith_ovfcheck_lshift(s_obj1, s_obj2):
-##    if isinstance(s_obj1, SomeInteger) and s_obj1.unsigned:
-##        getbookkeeper().warning("ovfcheck_lshift with unsigned")
-##    return SomeInteger()
-
 def unicodedata_decimal(s_uchr):
     raise TypeError, "unicodedate.decimal() calls should not happen at interp-level"    
 
@@ -379,8 +374,6 @@
         original = getattr(__builtin__, name[8:])
         BUILTIN_ANALYZERS[original] = value
 
-##BUILTIN_ANALYZERS[pypy.rlib.rarithmetic.ovfcheck] = rarith_ovfcheck
-##BUILTIN_ANALYZERS[pypy.rlib.rarithmetic.ovfcheck_lshift] = rarith_ovfcheck_lshift
 BUILTIN_ANALYZERS[pypy.rlib.rarithmetic.intmask] = rarith_intmask
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.instantiate] = robjmodel_instantiate
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.we_are_translated] = (
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -565,7 +565,7 @@
         if self.is_exception_class():
             if self.pyobj.__module__ == 'exceptions':
                 return True
-            if self.pyobj is py.code._AssertionError:
+            if issubclass(self.pyobj, AssertionError):
                 return True
         return False
 
diff --git a/pypy/annotation/dictdef.py b/pypy/annotation/dictdef.py
--- a/pypy/annotation/dictdef.py
+++ b/pypy/annotation/dictdef.py
@@ -85,12 +85,14 @@
 
     def __init__(self, bookkeeper, s_key = s_ImpossibleValue,
                                  s_value = s_ImpossibleValue,
-                               is_r_dict = False):
+                               is_r_dict = False,
+                           force_non_null = False):
         self.dictkey = DictKey(bookkeeper, s_key, is_r_dict)
         self.dictkey.itemof[self] = True
         self.dictvalue = DictValue(bookkeeper, s_value)
         self.dictvalue.itemof[self] = True
         self.bookkeeper = bookkeeper
+        self.force_non_null = force_non_null
 
     def read_key(self, position_key=None):
         if position_key is None:
diff --git a/pypy/annotation/listdef.py b/pypy/annotation/listdef.py
--- a/pypy/annotation/listdef.py
+++ b/pypy/annotation/listdef.py
@@ -184,6 +184,11 @@
     def generalize(self, s_value):
         self.listitem.generalize(s_value)
 
+    def generalize_range_step(self, range_step):
+        newlistitem = ListItem(self.listitem.bookkeeper, s_ImpossibleValue)
+        newlistitem.range_step = range_step
+        self.listitem.merge(newlistitem)
+
     def __repr__(self):
         return '<[%r]%s%s%s%s>' % (self.listitem.s_value,
                                self.listitem.mutated and 'm' or '',
diff --git a/pypy/annotation/model.py b/pypy/annotation/model.py
--- a/pypy/annotation/model.py
+++ b/pypy/annotation/model.py
@@ -32,13 +32,15 @@
 import pypy
 from pypy.tool import descriptor
 from pypy.tool.pairtype import pair, extendabletype
-from pypy.tool.tls import tlsobject
 from pypy.rlib.rarithmetic import r_uint, r_ulonglong, base_int
 from pypy.rlib.rarithmetic import r_singlefloat, r_longfloat
 import inspect, weakref
 
 DEBUG = False    # set to False to disable recording of debugging information
-TLS = tlsobject()
+
+class State(object):
+    pass
+TLS = State()
 
 class SomeObject(object):
     """The set of all objects.  Each instance stands
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -3483,6 +3483,17 @@
         a = self.RPythonAnnotator()
         raises(Exception, a.build_types, f, [int])
 
+    def test_range_variable_step(self):
+        def g(n):
+            return range(0, 10, n)
+        def f(n):
+            r = g(1)    # constant step, at first
+            s = g(n)    # but it becomes a variable step
+            return r
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [int])
+        assert s.listdef.listitem.range_step == 0
+
 
 def g(n):
     return [0,1,2,n]
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -33,13 +33,17 @@
      "struct", "_hashlib", "_md5", "_sha", "_minimal_curses", "cStringIO",
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
-     "_collections", "_multibytecodec", "micronumpy"]
+     "_collections", "_multibytecodec", "micronumpy", "_ffi"]
 ))
 
 translation_modules = default_modules.copy()
 translation_modules.update(dict.fromkeys(
     ["fcntl", "rctime", "select", "signal", "_rawffi", "zlib",
-     "struct", "_md5", "cStringIO", "array"]))
+     "struct", "_md5", "cStringIO", "array", "_ffi",
+     # the following are needed for pyrepl (and hence for the
+     # interactive prompt/pdb)
+     "termios", "_minimal_curses",
+     ]))
 
 working_oo_modules = default_modules.copy()
 working_oo_modules.update(dict.fromkeys(
@@ -80,6 +84,7 @@
     "_rawffi": [("objspace.usemodules.struct", True)],
     "cpyext": [("translation.secondaryentrypoints", "cpyext"),
                ("translation.shared", sys.platform == "win32")],
+    "_ffi":    [("translation.jit_ffi", True)],
 }
 
 module_import_dependencies = {
@@ -124,9 +129,6 @@
                  cmdline='--objspace -o'),
 
     OptionDescription("opcodes", "opcodes to enable in the interpreter", [
-        BoolOption("CALL_LIKELY_BUILTIN", "emit a special bytecode for likely calls to builtin functions",
-                   default=False,
-                   requires=[("translation.stackless", False)]),
         BoolOption("CALL_METHOD", "emit a special bytecode for expr.name()",
                    default=False),
         ]),
@@ -261,13 +263,7 @@
         BoolOption("withcelldict",
                    "use dictionaries that are optimized for being used as module dicts",
                    default=False,
-                   requires=[("objspace.opcodes.CALL_LIKELY_BUILTIN", False),
-                             ("objspace.honor__builtins__", False)]),
-
-        BoolOption("withdictmeasurement",
-                   "create huge files with masses of information "
-                   "about dictionaries",
-                   default=False),
+                   requires=[("objspace.honor__builtins__", False)]),
 
         BoolOption("withmapdict",
                    "make instances really small but slow without the JIT",
@@ -350,8 +346,6 @@
     backend = config.translation.backend
 
     # all the good optimizations for PyPy should be listed here
-    if level in ['2', '3']:
-        config.objspace.opcodes.suggest(CALL_LIKELY_BUILTIN=True)
     if level in ['2', '3', 'jit']:
         config.objspace.opcodes.suggest(CALL_METHOD=True)
         config.objspace.std.suggest(withrangelist=True)
diff --git a/pypy/config/test/test_pypyoption.py b/pypy/config/test/test_pypyoption.py
--- a/pypy/config/test/test_pypyoption.py
+++ b/pypy/config/test/test_pypyoption.py
@@ -73,3 +73,7 @@
             fn = prefix + "." + path + ".txt"
             yield check_file_exists, fn
 
+def test__ffi_opt():
+    config = get_pypy_config(translating=True)
+    config.objspace.usemodules._ffi = True
+    assert config.translation.jit_ffi
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -117,6 +117,8 @@
     ChoiceOption("jit_profiler", "integrate profiler support into the JIT",
                  ["off", "oprofile"],
                  default="off"),
+    # jit_ffi is automatically turned on by withmod-_ffi (which is enabled by default)
+    BoolOption("jit_ffi", "optimize libffi calls", default=False, cmdline=None),
 
     # misc
     BoolOption("verbose", "Print extra information", default=False),
diff --git a/pypy/doc/config/objspace.opcodes.CALL_LIKELY_BUILTIN.txt b/pypy/doc/config/objspace.opcodes.CALL_LIKELY_BUILTIN.txt
deleted file mode 100644
--- a/pypy/doc/config/objspace.opcodes.CALL_LIKELY_BUILTIN.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Introduce a new opcode called ``CALL_LIKELY_BUILTIN``. It is used when something
-is called, that looks like a builtin function (but could in reality be shadowed
-by a name in the module globals). For all module globals dictionaries it is
-then tracked which builtin name is shadowed in this module. If the
-``CALL_LIKELY_BUILTIN`` opcode is executed, it is checked whether the builtin is
-shadowed. If not, the corresponding builtin is called. Otherwise the object that
-is shadowing it is called instead. If no shadowing is happening, this saves two
-dictionary lookups on calls to builtins.
-
-For more information, see the section in `Standard Interpreter Optimizations`_.
-
-.. _`Standard Interpreter Optimizations`: ../interpreter-optimizations.html#call-likely-builtin
diff --git a/pypy/doc/config/objspace.std.withdictmeasurement.txt b/pypy/doc/config/objspace.std.withdictmeasurement.txt
deleted file mode 100644
--- a/pypy/doc/config/objspace.std.withdictmeasurement.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Internal option.
-
-.. internal
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -136,6 +136,11 @@
 next access.  Any code that uses weak proxies must carefully catch such
 ``ReferenceError`` at any place that uses them.
 
+As a side effect, the ``finally`` clause inside a generator will be executed
+only when the generator object is garbage collected (see `issue 736`__).
+
+.. __: http://bugs.pypy.org/issue736
+
 There are a few extra implications for the difference in the GC.  Most
 notably, if an object has a ``__del__``, the ``__del__`` is never called more
 than once in PyPy; but CPython will call the same ``__del__`` several times
@@ -168,6 +173,11 @@
     >>>> A.__del__ = lambda self: None
     __main__:1: RuntimeWarning: a __del__ method added to an existing type will not be called
 
+Even more obscure: the same is true, for old-style classes, if you attach
+the ``__del__`` to an instance (even in CPython this does not work with
+new-style classes).  You get a RuntimeWarning in PyPy.  To fix these cases
+just make sure there is a ``__del__`` method in the class to start with.
+
 
 Subclasses of built-in types
 ----------------------------
@@ -238,5 +248,7 @@
   never a dictionary as it sometimes is in CPython. Assigning to
   ``__builtins__`` has no effect.
 
+* object identity of immutable keys in dictionaries is not necessarily preserved.
+  Never compare immutable objects with ``is``.
+
 .. include:: _ref.txt
-
diff --git a/pypy/doc/garbage_collection.rst b/pypy/doc/garbage_collection.rst
--- a/pypy/doc/garbage_collection.rst
+++ b/pypy/doc/garbage_collection.rst
@@ -212,90 +212,4 @@
   becomes free garbage, to be collected at the next major collection.
 
 
-Minimark GC
------------
-
-This is a simplification and rewrite of the ideas from the Hybrid GC.
-It uses a nursery for the young objects, and mark-and-sweep for the old
-objects.  This is a moving GC, but objects may only move once (from
-the nursery to the old stage).
-
-The main difference with the Hybrid GC is that the mark-and-sweep
-objects (the "old stage") are directly handled by the GC's custom
-allocator, instead of being handled by malloc() calls.  The gain is that
-it is then possible, during a major collection, to walk through all old
-generation objects without needing to store a list of pointers to them.
-So as a first approximation, when compared to the Hybrid GC, the
-Minimark GC saves one word of memory per old object.
-
-There are a number of environment variables that can be tweaked to
-influence the GC.  (Their default value should be ok for most usages.)
-You can read more about them at the start of
-`pypy/rpython/memory/gc/minimark.py`_.
-
-In more details:
-
-- The small newly malloced objects are allocated in the nursery (case 1).
-  All objects living in the nursery are "young".
-
-- The big objects are always handled directly by the system malloc().
-  But the big newly malloced objects are still "young" when they are
-  allocated (case 2), even though they don't live in the nursery.
-
-- When the nursery is full, we do a minor collection, i.e. we find
-  which "young" objects are still alive (from cases 1 and 2).  The
-  "young" flag is then removed.  The surviving case 1 objects are moved
-  to the old stage. The dying case 2 objects are immediately freed.
-
-- The old stage is an area of memory containing old (small) objects.  It
-  is handled by `pypy/rpython/memory/gc/minimarkpage.py`_.  It is organized
-  as "arenas" of 256KB or 512KB, subdivided into "pages" of 4KB or 8KB.
-  Each page can either be free, or contain small objects of all the same
-  size.  Furthermore at any point in time each object location can be
-  either allocated or freed.  The basic design comes from ``obmalloc.c``
-  from CPython (which itself comes from the same source as the Linux
-  system malloc()).
-
-- New objects are added to the old stage at every minor collection.
-  Immediately after a minor collection, when we reach some threshold, we
-  trigger a major collection.  This is the mark-and-sweep step.  It walks
-  over *all* objects (mark), and then frees some fraction of them (sweep).
-  This means that the only time when we want to free objects is while
-  walking over all of them; we never ask to free an object given just its
-  address.  This allows some simplifications and memory savings when
-  compared to ``obmalloc.c``.
-
-- As with all generational collectors, this GC needs a write barrier to
-  record which old objects have a reference to young objects.
-
-- Additionally, we found out that it is useful to handle the case of
-  big arrays specially: when we allocate a big array (with the system
-  malloc()), we reserve a small number of bytes before.  When the array
-  grows old, we use the extra bytes as a set of bits.  Each bit
-  represents 128 entries in the array.  Whenever the write barrier is
-  called to record a reference from the Nth entry of the array to some
-  young object, we set the bit number ``(N/128)`` to 1.  This can
-  considerably speed up minor collections, because we then only have to
-  scan 128 entries of the array instead of all of them.
-
-- As usual, we need special care about weak references, and objects with
-  finalizers.  Weak references are allocated in the nursery, and if they
-  survive they move to the old stage, as usual for all objects; the
-  difference is that the reference they contain must either follow the
-  object, or be set to NULL if the object dies.  And the objects with
-  finalizers, considered rare enough, are immediately allocated old to
-  simplify the design.  In particular their ``__del__`` method can only
-  be called just after a major collection.
-
-- The objects move once only, so we can use a trick to implement id()
-  and hash().  If the object is not in the nursery, it won't move any
-  more, so its id() and hash() are the object's address, cast to an
-  integer.  If the object is in the nursery, and we ask for its id()
-  or its hash(), then we pre-reserve a location in the old stage, and
-  return the address of that location.  If the object survives the
-  next minor collection, we move it there, and so its id() and hash()
-  are preserved.  If the object dies then the pre-reserved location
-  becomes free garbage, to be collected at the next major collection.
-
-
 .. include:: _ref.txt
diff --git a/pypy/doc/getting-started.rst b/pypy/doc/getting-started.rst
--- a/pypy/doc/getting-started.rst
+++ b/pypy/doc/getting-started.rst
@@ -51,7 +51,7 @@
 ---------------
 
 PyPy is ready to be executed as soon as you unpack the tarball or the zip
-file, with no need install it in any specific location::
+file, with no need to install it in any specific location::
 
     $ tar xf pypy-1.5-linux.tar.bz2
 
diff --git a/pypy/doc/image/jitviewer.png b/pypy/doc/image/jitviewer.png
new file mode 100644
index 0000000000000000000000000000000000000000..ad2abca5c88125061fa519dcf3f9fada577573ee
GIT binary patch

[cut]

diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -11,6 +11,10 @@
 Getting into PyPy ... 
 =============================================
 
+* `Getting started`_: how to install and run the PyPy Python interpreter
+
+* `FAQ`_: some frequently asked questions.
+
 * `Release 1.5`_: the latest official release
 
 * `PyPy Blog`_: news and status info about PyPy 
@@ -21,16 +25,11 @@
 
 * `speed.pypy.org`_: Daily benchmarks of how fast PyPy is
 
+* `potential project ideas`_: In case you want to get your feet wet...
+
 Documentation for the PyPy Python Interpreter
 ===============================================
 
-`getting started`_ provides hands-on instructions 
-including a two-liner to run the PyPy Python interpreter 
-on your system, examples on advanced features and 
-entry points for using the `RPython toolchain`_.
-
-`FAQ`_ contains some frequently asked questions.
-
 New features of PyPy's Python Interpreter and 
 Translation Framework: 
 
@@ -59,8 +58,6 @@
   (if they are not already developed in the FAQ_).
   You can find logs of the channel here_.
 
-.. XXX play1? 
-
 Meeting PyPy developers
 =======================
 
@@ -83,7 +80,7 @@
 .. _`Release 1.5`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
-
+.. _`potential project ideas`: project-ideas.html
 
 Project Documentation
 =====================================
diff --git a/pypy/doc/interpreter-optimizations.rst b/pypy/doc/interpreter-optimizations.rst
--- a/pypy/doc/interpreter-optimizations.rst
+++ b/pypy/doc/interpreter-optimizations.rst
@@ -157,32 +157,6 @@
 A more advanced version of sharing dicts, called *map dicts,* is available
 with the :config:`objspace.std.withmapdict` option.
 
-Builtin-Shadowing
-+++++++++++++++++
-
-Usually the calling of builtins in Python requires two dictionary lookups: first
-to see whether the current global dictionary contains an object with the same
-name, then a lookup in the ``__builtin__`` dictionary. This is somehow
-circumvented by storing an often used builtin into a local variable to get
-the fast local lookup (which is a rather strange and ugly hack).
-
-The same problem is solved in a different way by "wary" dictionaries. They are
-another dictionary representation used together with multidicts. This
-representation is used only for module dictionaries. The representation checks on
-every setitem whether the key that is used is the name of a builtin. If this is
-the case, the dictionary is marked as shadowing that particular builtin.
-
-To identify calls to builtins easily, a new bytecode (``CALL_LIKELY_BUILTIN``)
-is introduced. Whenever it is executed, the globals dictionary is checked
-to see whether it masks the builtin (which is possible without a dictionary
-lookup).  Then the ``__builtin__`` dict is checked in the same way,
-to see whether somebody replaced the real builtin with something else. In the
-common case, the program didn't do any of these; the proper builtin can then
-be called without using any dictionary lookup at all.
-
-You can enable this feature with the
-:config:`objspace.opcodes.CALL_LIKELY_BUILTIN` option.
-
 
 List Optimizations
 ------------------
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/project-ideas.rst
@@ -0,0 +1,149 @@
+
+Potential project list
+======================
+
+This is a list of projects that are interesting for potential contributors
+who are seriously interested in the PyPy project. They mostly share common
+patterns - they're mid-to-large in size, they're usually well defined as
+a standalone projects and they're not being actively worked on. For small
+projects that you might want to work on, it's much better to either look
+at the `issue tracker`_, pop up on #pypy on irc.freenode.net or write to the
+`mailing list`_. This is simply for the reason that small possible projects
+tend to change very rapidly.
+
+This list is mostly for having on overview on potential projects. This list is
+by definition not exhaustive and we're pleased if people come up with their
+own improvement ideas. In any case, if you feel like working on some of those
+projects, or anything else in PyPy, pop up on IRC or write to us on the
+`mailing list`_.
+
+Numpy improvements
+------------------
+
+This is more of a project-container than a single project. Possible ideas:
+
+* experiment with auto-vectorization using SSE or implement vectorization
+  without automatically detecting it for array operations.
+
+* improve numpy, for example implement memory views.
+
+* interface with fortran/C libraries.
+
+Improving the jitviewer
+------------------------
+
+Analyzing performance of applications is always tricky. We have various
+tools, for example a `jitviewer`_ that help us analyze performance.
+
+The jitviewer shows the code generated by the PyPy JIT in a hierarchical way,
+as shown by the screenshot below:
+
+  - at the bottom level, it shows the Python source code of the compiled loops
+
+  - for each source code line, it shows the corresponding Python bytecode
+
+  - for each opcode, it shows the corresponding jit operations, which are the
+    ones actually sent to the backend for compiling (such as ``i15 = i10 <
+    2000`` in the example)
+
+.. image:: image/jitviewer.png
+
+We would like to add one level to this hierarchy, by showing the generated
+machine code for each jit operation.  The necessary information is already in
+the log file produced by the JIT, so it is "only" a matter of teaching the
+jitviewer to display it.  Ideally, the machine code should be hidden by
+default and viewable on request.
+
+The jitviewer is a web application based on flask and jinja2 (and jQuery on
+the client): if you have great web developing skills and want to help PyPy,
+this is an ideal task to get started, because it does not require any deep
+knowledge of the internals.
+
+Translation Toolchain
+---------------------
+
+* Incremental or distributed translation.
+
+* Allow separate compilation of extension modules.
+
+Work on some of other languages
+-------------------------------
+
+There are various languages implemented using the RPython translation toolchain.
+One of the most interesting is the `JavaScript implementation`_, but there
+are others like scheme or prolog. An interesting project would be to improve
+the jittability of those or to experiment with various optimizations.
+
+Various GCs
+-----------
+
+PyPy has pluggable garbage collection policy. This means that various garbage
+collectors can be written for specialized purposes, or even various
+experiments can be done for the general purpose. Examples
+
+* An incremental garbage collector that has specified maximal pause times,
+  crucial for games
+
+* A garbage collector that compact memory better for mobile devices
+
+* A concurrent garbage collector (a lot of work)
+
+Remove the GIL
+--------------
+
+This is a major task that requires lots of thinking. However, few subprojects
+can be potentially specified, unless a better plan can be thought out:
+
+* A thread-aware garbage collector
+
+* Better RPython primitives for dealing with concurrency
+
+* JIT passes to remove locks on objects
+
+* (maybe) implement locking in Python interpreter
+
+* alternatively, look at Software Transactional Memory
+
+Introduce new benchmarks
+------------------------
+
+We're usually happy to introduce new benchmarks. Please consult us
+before, but in general something that's real-world python code
+and is not already represented is welcome. We need at least a standalone
+script that can run without parameters. Example ideas (benchmarks need
+to be got from them!):
+
+* `hg`
+
+* `sympy`
+
+Experiment (again) with LLVM backend for RPython compilation
+------------------------------------------------------------
+
+We already tried working with LLVM and at the time, LLVM was not mature enough
+for our needs. It's possible that this has changed, reviving the LLVM backend
+(or writing new from scratch) for static compilation would be a good project.
+
+(On the other hand, just generating C code and using clang might be enough.
+The issue with that is the so-called "asmgcc GC root finder", which has tons
+of issues of this own.  In my opinion (arigo), it would be definitely a
+better project to try to optimize the alternative, the "shadowstack" GC root
+finder, which is nicely portable.  So far it gives a pypy that is around
+7% slower.)
+
+Embedding PyPy
+----------------------------------------
+
+Being able to embed PyPy, say with its own limited C API, would be
+useful.  But here is the most interesting variant, straight from
+EuroPython live discussion :-)  We can have a generic "libpypy.so" that
+can be used as a placeholder dynamic library, and when it gets loaded,
+it runs a .py module that installs (via ctypes) the interface it wants
+exported.  This would give us a one-size-fits-all generic .so file to be
+imported by any application that wants to load .so files :-)
+
+
+.. _`issue tracker`: http://bugs.pypy.org
+.. _`mailing list`: http://mail.python.org/mailman/listinfo/pypy-dev
+.. _`jitviewer`: http://bitbucket.org/pypy/jitviewer
+.. _`JavaScript implementation`: https://bitbucket.org/pypy/lang-js/overview
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -17,7 +17,7 @@
         self.varargname = varargname
         self.kwargname = kwargname
 
-    @jit.purefunction
+    @jit.elidable
     def find_argname(self, name):
         try:
             return self.argnames.index(name)
@@ -90,15 +90,18 @@
     ###  Construction  ###
 
     def __init__(self, space, args_w, keywords=None, keywords_w=None,
-                 w_stararg=None, w_starstararg=None):
+                 w_stararg=None, w_starstararg=None, keyword_names_w=None):
         self.space = space
         assert isinstance(args_w, list)
         self.arguments_w = args_w
         self.keywords = keywords
         self.keywords_w = keywords_w
+        self.keyword_names_w = keyword_names_w  # matches the tail of .keywords
         if keywords is not None:
             assert keywords_w is not None
             assert len(keywords_w) == len(keywords)
+            assert (keyword_names_w is None or
+                    len(keyword_names_w) <= len(keywords))
             make_sure_not_resized(self.keywords)
             make_sure_not_resized(self.keywords_w)
 
@@ -132,7 +135,8 @@
 
     def replace_arguments(self, args_w):
         "Return a new Arguments with a args_w as positional arguments."
-        return Arguments(self.space, args_w, self.keywords, self.keywords_w)
+        return Arguments(self.space, args_w, self.keywords, self.keywords_w,
+                         keyword_names_w = self.keyword_names_w)
 
     def prepend(self, w_firstarg):
         "Return a new Arguments with a new argument inserted first."
@@ -201,15 +205,16 @@
                         space.w_TypeError,
                         space.wrap("keywords must be strings"))
                 if e.match(space, space.w_UnicodeEncodeError):
-                    raise OperationError(
-                        space.w_TypeError,
-                        space.wrap("keyword cannot be encoded to ascii"))
-                raise
-            if self.keywords and key in self.keywords:
-                raise operationerrfmt(self.space.w_TypeError,
-                                      "got multiple values "
-                                      "for keyword argument "
-                                      "'%s'", key)
+                    # Allow this to pass through
+                    key = None
+                else:
+                    raise
+            else:
+                if self.keywords and key in self.keywords:
+                    raise operationerrfmt(self.space.w_TypeError,
+                                          "got multiple values "
+                                          "for keyword argument "
+                                          "'%s'", key)
             keywords[i] = key
             keywords_w[i] = space.getitem(w_starstararg, w_key)
             i += 1
@@ -219,6 +224,7 @@
         else:
             self.keywords = self.keywords + keywords
             self.keywords_w = self.keywords_w + keywords_w
+        self.keyword_names_w = keys_w
 
     def fixedunpack(self, argcount):
         """The simplest argument parsing: get the 'argcount' arguments,
@@ -339,6 +345,10 @@
             used_keywords = [False] * num_kwds
             for i in range(num_kwds):
                 name = keywords[i]
+                # If name was not encoded as a string, it could be None. In that
+                # case, it's definitely not going to be in the signature.
+                if name is None:
+                    continue
                 j = signature.find_argname(name)
                 if j < 0:
                     continue
@@ -374,17 +384,26 @@
         if has_kwarg:
             w_kwds = self.space.newdict()
             if num_remainingkwds:
+                #
+                limit = len(keywords)
+                if self.keyword_names_w is not None:
+                    limit -= len(self.keyword_names_w)
                 for i in range(len(keywords)):
                     if not used_keywords[i]:
-                        key = keywords[i]
-                        self.space.setitem(w_kwds, self.space.wrap(key), keywords_w[i])
+                        if i < limit:
+                            w_key = self.space.wrap(keywords[i])
+                        else:
+                            w_key = self.keyword_names_w[i - limit]
+                        self.space.setitem(w_kwds, w_key, keywords_w[i])
+                #
             scope_w[co_argcount + has_vararg] = w_kwds
         elif num_remainingkwds:
             if co_argcount == 0:
                 raise ArgErrCount(avail, num_kwds,
                               co_argcount, has_vararg, has_kwarg,
                               defaults_w, missing)
-            raise ArgErrUnknownKwds(num_remainingkwds, keywords, used_keywords)
+            raise ArgErrUnknownKwds(self.space, num_remainingkwds, keywords,
+                                    used_keywords, self.keyword_names_w)
 
         if missing:
             raise ArgErrCount(avail, num_kwds,
@@ -443,9 +462,15 @@
         w_args = space.newtuple(self.arguments_w)
         w_kwds = space.newdict()
         if self.keywords is not None:
+            limit = len(self.keywords)
+            if self.keyword_names_w is not None:
+                limit -= len(self.keyword_names_w)
             for i in range(len(self.keywords)):
-                space.setitem(w_kwds, space.wrap(self.keywords[i]),
-                                      self.keywords_w[i])
+                if i < limit:
+                    w_key = space.wrap(self.keywords[i])
+                else:
+                    w_key = self.keyword_names_w[i - limit]
+                space.setitem(w_kwds, w_key, self.keywords_w[i])
         return w_args, w_kwds
 
 class ArgumentsForTranslation(Arguments):
@@ -666,14 +691,33 @@
 
 class ArgErrUnknownKwds(ArgErr):
 
-    def __init__(self, num_remainingkwds, keywords, used_keywords):
-        self.kwd_name = ''
+    def __init__(self, space, num_remainingkwds, keywords, used_keywords,
+                 keyword_names_w):
+        name = ''
         self.num_kwds = num_remainingkwds
         if num_remainingkwds == 1:
             for i in range(len(keywords)):
                 if not used_keywords[i]:
-                    self.kwd_name = keywords[i]
+                    name = keywords[i]
+                    if name is None:
+                        # We'll assume it's unicode. Encode it.
+                        # Careful, I *think* it should not be possible to
+                        # get an IndexError here but you never know.
+                        try:
+                            if keyword_names_w is None:
+                                raise IndexError
+                            # note: negative-based indexing from the end
+                            w_name = keyword_names_w[i - len(keywords)]
+                        except IndexError:
+                            name = '?'
+                        else:
+                            w_enc = space.wrap(space.sys.defaultencoding)
+                            w_err = space.wrap("replace")
+                            w_name = space.call_method(w_name, "encode", w_enc,
+                                                       w_err)
+                            name = space.str_w(w_name)
                     break
+        self.kwd_name = name
 
     def getmsg(self, fnname):
         if self.num_kwds == 1:
diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -655,9 +655,6 @@
 def _compute_CALL_FUNCTION_VAR_KW(arg):
     return -_num_args(arg) - 2
 
-def _compute_CALL_LIKELY_BUILTIN(arg):
-    return -(arg & 0xFF) + 1
-
 def _compute_CALL_METHOD(arg):
     return -_num_args(arg) - 1
 
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -12,7 +12,6 @@
 from pypy.interpreter.pyparser.error import SyntaxError
 from pypy.tool import stdlib_opcode as ops
 from pypy.interpreter.error import OperationError
-from pypy.module.__builtin__.__init__ import BUILTIN_TO_INDEX
 
 
 def compile_ast(space, module, info):
@@ -134,7 +133,7 @@
 
     def accept_comp_iteration(self, codegen, index):
         self.elt.walkabout(codegen)
-        codegen.emit_op_arg(ops.SET_ADD, index)
+        codegen.emit_op_arg(ops.SET_ADD, index + 1)
 
 
 class __extend__(ast.DictComp):
@@ -148,7 +147,7 @@
     def accept_comp_iteration(self, codegen, index):
         self.value.walkabout(codegen)
         self.key.walkabout(codegen)
-        codegen.emit_op_arg(ops.MAP_ADD, index)
+        codegen.emit_op_arg(ops.MAP_ADD, index + 1)
 
 
 # These are frame blocks.
@@ -942,8 +941,7 @@
 
     def visit_Call(self, call):
         self.update_position(call.lineno)
-        if self._optimize_builtin_call(call) or \
-                self._optimize_method_call(call):
+        if self._optimize_method_call(call):
             return
         call.func.walkabout(self)
         arg = 0
@@ -977,28 +975,6 @@
     def _call_has_simple_args(self, call):
         return self._call_has_no_star_args(call) and not call.keywords
 
-    def _optimize_builtin_call(self, call):
-        if not self.space.config.objspace.opcodes.CALL_LIKELY_BUILTIN or \
-                not self._call_has_simple_args(call) or \
-                not isinstance(call.func, ast.Name):
-            return False
-        func_name = call.func
-        assert isinstance(func_name, ast.Name)
-        name_scope = self.scope.lookup(func_name.id)
-        if name_scope == symtable.SCOPE_GLOBAL_IMPLICIT or \
-                name_scope == symtable.SCOPE_UNKNOWN:
-            builtin_index = BUILTIN_TO_INDEX.get(func_name.id, -1)
-            if builtin_index != -1:
-                if call.args:
-                    args_count = len(call.args)
-                    self.visit_sequence(call.args)
-                else:
-                    args_count = 0
-                arg = builtin_index << 8 | args_count
-                self.emit_op_arg(ops.CALL_LIKELY_BUILTIN, arg)
-                return True
-        return False
-
     def _optimize_method_call(self, call):
         if not self.space.config.objspace.opcodes.CALL_METHOD or \
                 not self._call_has_no_star_args(call) or \
diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -31,11 +31,12 @@
     future_lineno = 0
     future_column = 0
     have_docstring = False
+    body = None
     if isinstance(tree, ast.Module):
         body = tree.body
     elif isinstance(tree, ast.Interactive):
         body = tree.body
-    else:
+    if body is None:
         return 0, 0
     for stmt in body:
         if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Str):
@@ -91,7 +92,10 @@
         return name
     if len(name) + 2 >= MANGLE_LEN:
         return name
-    if name.endswith('__'):
+    # Don't mangle __id__ or names with dots. The only time a name with a dot
+    # can occur is when we are compiling an import statement that has a package
+    # name.
+    if name.endswith('__') or '.' in name:
         return name
     try:
         i = 0
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -55,7 +55,7 @@
         co_expr = compile(evalexpr, '<evalexpr>', 'eval')
         space = self.space
         pyco_expr = PyCode._from_code(space, co_expr)
-        w_res = pyco_expr.exec_host_bytecode(space, w_dict, w_dict)
+        w_res = pyco_expr.exec_host_bytecode(w_dict, w_dict)
         res = space.str_w(space.repr(w_res))
         if not isinstance(expected, float):
             assert res == repr(expected)
@@ -308,6 +308,15 @@
                "p.__name__", os.path.__name__)
         yield (self.st, 'from os import *',
                "path.__name__, sep", (os.path.__name__, os.sep))
+        yield (self.st, '''
+            class A(object):
+                def m(self):
+                    from __foo__.bar import x
+            try:
+                A().m()
+            except ImportError, e:
+                msg = str(e)
+            ''', "msg", "No module named __foo__")
 
     def test_if_stmts(self):
         yield self.st, "a = 42\nif a > 10: a += 2", "a", 44
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -237,7 +237,7 @@
 
 class ObjSpace(object):
     """Base class for the interpreter-level implementations of object spaces.
-    http://codespeak.net/pypy/dist/pypy/doc/objspace.html"""
+    http://pypy.readthedocs.org/en/latest/objspace.html"""
 
     full_exceptions = True  # full support for exceptions (normalization & more)
 
@@ -311,9 +311,6 @@
             mod = self.interpclass_w(w_mod)
             if isinstance(mod, Module) and mod.startup_called:
                 mod.shutdown(self)
-        if self.config.objspace.std.withdictmeasurement:
-            from pypy.objspace.std.dictmultiobject import report
-            report()
         if self.config.objspace.logbytecodes:
             self.reportbytecodecounts()
         if self.config.objspace.std.logspaceoptypes:
@@ -989,10 +986,7 @@
             compiler = self.createcompiler()
             expression = compiler.compile(expression, '?', 'eval', 0,
                                          hidden_applevel=hidden_applevel)
-        if isinstance(expression, types.CodeType):
-            # XXX only used by appsupport
-            expression = PyCode._from_code(self, expression)
-        if not isinstance(expression, PyCode):
+        else:
             raise TypeError, 'space.eval(): expected a string, code or PyCode object'
         return expression.exec_code(self, w_globals, w_locals)
 
@@ -1007,9 +1001,6 @@
             compiler = self.createcompiler()
             statement = compiler.compile(statement, filename, 'exec', 0,
                                          hidden_applevel=hidden_applevel)
-        if isinstance(statement, types.CodeType):
-            # XXX only used by appsupport
-            statement = PyCode._from_code(self, statement)
         if not isinstance(statement, PyCode):
             raise TypeError, 'space.exec_(): expected a string, code or PyCode object'
         w_key = self.wrap('__builtins__')
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -11,14 +11,14 @@
     """Interpreter-level exception that signals an exception that should be
     sent to the application level.
 
-    OperationError instances have three public attributes (and no .args),
-    w_type, w_value and application_traceback, which contain the wrapped
+    OperationError instances have three attributes (and no .args),
+    w_type, _w_value and _application_traceback, which contain the wrapped
     type and value describing the exception, and a chained list of
     PyTraceback objects making the application-level traceback.
     """
 
     _w_value = None
-    application_traceback = None
+    _application_traceback = None
 
     def __init__(self, w_type, w_value, tb=None):
         if not we_are_translated() and w_type is None:
@@ -26,7 +26,7 @@
             raise FlowingError(w_value)
         self.setup(w_type)
         self._w_value = w_value
-        self.application_traceback = tb
+        self._application_traceback = tb
 
     def setup(self, w_type):
         self.w_type = w_type
@@ -37,7 +37,7 @@
         # for sys.exc_clear()
         self.w_type = space.w_None
         self._w_value = space.w_None
-        self.application_traceback = None
+        self._application_traceback = None
         if not we_are_translated():
             del self.debug_excs[:]
 
@@ -103,7 +103,7 @@
 
     def print_app_tb_only(self, file):
         "NOT_RPYTHON"
-        tb = self.application_traceback
+        tb = self._application_traceback
         if tb:
             import linecache
             print >> file, "Traceback (application-level):"
@@ -251,6 +251,30 @@
     def _compute_value(self):
         raise NotImplementedError
 
+    def get_traceback(self):
+        """Calling this marks the PyTraceback as escaped, i.e. it becomes
+        accessible and inspectable by app-level Python code.  For the JIT.
+        Note that this has no effect if there are already several traceback
+        frames recorded, because in this case they are already marked as
+        escaping by executioncontext.leave() being called with
+        got_exception=True.
+        """
+        from pypy.interpreter.pytraceback import PyTraceback
+        tb = self._application_traceback
+        if tb is not None and isinstance(tb, PyTraceback):
+            tb.frame.mark_as_escaped()
+        return tb
+
+    def set_traceback(self, traceback):
+        """Set the current traceback.  It should either be a traceback
+        pointing to some already-escaped frame, or a traceback for the
+        current frame.  To support the latter case we do not mark the
+        frame as escaped.  The idea is that it will be marked as escaping
+        only if the exception really propagates out of this frame, by
+        executioncontext.leave() being called with got_exception=True.
+        """
+        self._application_traceback = traceback
+
 # ____________________________________________________________
 # optimization only: avoid the slowest operation -- the string
 # formatting with '%' -- in the common case were we don't
diff --git a/pypy/interpreter/eval.py b/pypy/interpreter/eval.py
--- a/pypy/interpreter/eval.py
+++ b/pypy/interpreter/eval.py
@@ -2,6 +2,7 @@
 This module defines the abstract base classes that support execution:
 Code and Frame.
 """
+from pypy.rlib import jit
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import Wrappable
 
@@ -97,21 +98,23 @@
         "Abstract. Get the expected number of locals."
         raise TypeError, "abstract"
 
+    @jit.dont_look_inside
     def fast2locals(self):
-        # Copy values from self.fastlocals_w to self.w_locals
+        # Copy values from the fastlocals to self.w_locals
         if self.w_locals is None:
             self.w_locals = self.space.newdict()
         varnames = self.getcode().getvarnames()
         fastscope_w = self.getfastscope()
-        for i in range(min(len(varnames), len(fastscope_w))):
+        for i in range(min(len(varnames), self.getfastscopelength())):
             name = varnames[i]
             w_value = fastscope_w[i]
             if w_value is not None:
                 w_name = self.space.wrap(name)
                 self.space.setitem(self.w_locals, w_name, w_value)
 
+    @jit.dont_look_inside
     def locals2fast(self):
-        # Copy values from self.w_locals to self.fastlocals_w
+        # Copy values from self.w_locals to the fastlocals
         assert self.w_locals is not None
         varnames = self.getcode().getvarnames()
         numlocals = self.getfastscopelength()
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -58,13 +58,23 @@
         frame.f_backref = self.topframeref
         self.topframeref = jit.virtual_ref(frame)
 
-    def leave(self, frame, w_exitvalue):
+    def leave(self, frame, w_exitvalue, got_exception):
         try:
             if self.profilefunc:
                 self._trace(frame, 'leaveframe', w_exitvalue)
         finally:
+            frame_vref = self.topframeref
             self.topframeref = frame.f_backref
-            jit.virtual_ref_finish(frame)
+            if frame.escaped or got_exception:
+                # if this frame escaped to applevel, we must ensure that also
+                # f_back does
+                f_back = frame.f_backref()
+                if f_back:
+                    f_back.mark_as_escaped()
+                # force the frame (from the JIT point of view), so that it can
+                # be accessed also later
+                frame_vref()
+            jit.virtual_ref_finish(frame_vref, frame)
 
         if self.w_tracefunc is not None and not frame.hide():
             self.space.frame_trace_action.fire()
@@ -102,18 +112,16 @@
 
         # the following interface is for pickling and unpickling
         def getstate(self, space):
-            # XXX we could just save the top frame, which brings
-            # the whole frame stack, but right now we get the whole stack
-            items = [space.wrap(f) for f in self.getframestack()]
-            return space.newtuple(items)
+            if self.topframe is None:
+                return space.w_None
+            return self.topframe
 
         def setstate(self, space, w_state):
             from pypy.interpreter.pyframe import PyFrame
-            frames_w = space.unpackiterable(w_state)
-            if len(frames_w) > 0:
-                self.topframe = space.interp_w(PyFrame, frames_w[-1])
+            if space.is_w(w_state, space.w_None):
+                self.topframe = None
             else:
-                self.topframe = None
+                self.topframe = space.interp_w(PyFrame, w_state)
 
         def getframestack(self):
             lst = []
@@ -278,7 +286,7 @@
             if operr is not None:
                 w_value = operr.get_w_value(space)
                 w_arg = space.newtuple([operr.w_type, w_value,
-                                     space.wrap(operr.application_traceback)])
+                                     space.wrap(operr.get_traceback())])
 
             frame.fast2locals()
             self.is_tracing += 1
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -16,7 +16,7 @@
 
 funccallunrolling = unrolling_iterable(range(4))
 
- at jit.purefunction_promote()
+ at jit.elidable_promote()
 def _get_immutable_code(func):
     assert not func.can_change_code
     return func.code
@@ -63,7 +63,7 @@
         if jit.we_are_jitted():
             if not self.can_change_code:
                 return _get_immutable_code(self)
-            return jit.hint(self.code, promote=True)
+            return jit.promote(self.code)
         return self.code
 
     def funccall(self, *args_w): # speed hack
@@ -98,7 +98,7 @@
                                                    self.closure)
                 for i in funccallunrolling:
                     if i < nargs:
-                        new_frame.fastlocals_w[i] = args_w[i]
+                        new_frame.locals_stack_w[i] = args_w[i]
                 return new_frame.run()
         elif nargs >= 1 and fast_natural_arity == Code.PASSTHROUGHARGS1:
             assert isinstance(code, gateway.BuiltinCodePassThroughArguments1)
@@ -158,7 +158,7 @@
                                                    self.closure)
         for i in xrange(nargs):
             w_arg = frame.peekvalue(nargs-1-i)
-            new_frame.fastlocals_w[i] = w_arg
+            new_frame.locals_stack_w[i] = w_arg
 
         return new_frame.run()
 
@@ -169,13 +169,13 @@
                                                    self.closure)
         for i in xrange(nargs):
             w_arg = frame.peekvalue(nargs-1-i)
-            new_frame.fastlocals_w[i] = w_arg
+            new_frame.locals_stack_w[i] = w_arg
 
         ndefs = len(self.defs_w)
         start = ndefs - defs_to_load
         i = nargs
         for j in xrange(start, ndefs):
-            new_frame.fastlocals_w[i] = self.defs_w[j]
+            new_frame.locals_stack_w[i] = self.defs_w[j]
             i += 1
         return new_frame.run()
 
@@ -465,19 +465,23 @@
                 space.abstract_isinstance_w(w_firstarg, self.w_class)):
             pass  # ok
         else:
-            myname = self.getname(space,"")
-            clsdescr = self.w_class.getname(space,"")
+            myname = self.getname(space, "")
+            clsdescr = self.w_class.getname(space, "")
             if clsdescr:
-                clsdescr+=" "
+                clsdescr += " instance"
+            else:
+                clsdescr = "instance"
             if w_firstarg is None:
                 instdescr = "nothing"
             else:
-                instname = space.abstract_getclass(w_firstarg).getname(space,"")
+                instname = space.abstract_getclass(w_firstarg).getname(space,
+                                                                       "")
                 if instname:
-                    instname += " "
-                instdescr = "%sinstance" %instname
-            msg = ("unbound method %s() must be called with %s"
-                   "instance as first argument (got %s instead)")
+                    instdescr = instname + " instance"
+                else:
+                    instdescr = "instance"
+            msg = ("unbound method %s() must be called with %s "
+                   "as first argument (got %s instead)")
             raise operationerrfmt(space.w_TypeError, msg,
                                   myname, clsdescr, instdescr)
         return space.call_args(self.w_function, args)
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -62,7 +62,7 @@
             raise operr
         # XXX it's not clear that last_instr should be promoted at all
         # but as long as it is necessary for call_assembler, let's do it early
-        last_instr = jit.hint(frame.last_instr, promote=True)
+        last_instr = jit.promote(frame.last_instr)
         if last_instr == -1:
             if w_arg and not space.is_w(w_arg, space.w_None):
                 msg = "can't send non-None value to a just-started generator"
diff --git a/pypy/interpreter/main.py b/pypy/interpreter/main.py
--- a/pypy/interpreter/main.py
+++ b/pypy/interpreter/main.py
@@ -118,7 +118,7 @@
         operationerr.normalize_exception(space)
         w_type = operationerr.w_type
         w_value = operationerr.get_w_value(space)
-        w_traceback = space.wrap(operationerr.application_traceback)
+        w_traceback = space.wrap(operationerr.get_traceback())
 
         # for debugging convenience we also insert the exception into
         # the interpreter-level sys.last_xxx
diff --git a/pypy/interpreter/nestedscope.py b/pypy/interpreter/nestedscope.py
--- a/pypy/interpreter/nestedscope.py
+++ b/pypy/interpreter/nestedscope.py
@@ -127,6 +127,7 @@
         if self.cells is not None:
             self.cells[:ncellvars] = cellvars
 
+    @jit.dont_look_inside
     def fast2locals(self):
         super_fast2locals(self)
         # cellvars are values exported to inner scopes
@@ -145,6 +146,7 @@
                 w_name = self.space.wrap(name)
                 self.space.setitem(self.w_locals, w_name, w_value)
 
+    @jit.dont_look_inside
     def locals2fast(self):
         super_locals2fast(self)
         freevarnames = self.pycode.co_cellvars + self.pycode.co_freevars
@@ -168,7 +170,7 @@
         for i in range(len(args_to_copy)):
             argnum = args_to_copy[i]
             if argnum >= 0:
-                self.cells[i].set(self.fastlocals_w[argnum])
+                self.cells[i].set(self.locals_stack_w[argnum])
 
     def getfreevarname(self, index):
         freevarnames = self.pycode.co_cellvars + self.pycode.co_freevars
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -63,6 +63,7 @@
         the pypy compiler"""
         self.space = space
         eval.Code.__init__(self, name)
+        assert nlocals >= 0
         self.co_argcount = argcount
         self.co_nlocals = nlocals
         self.co_stacksize = stacksize
@@ -95,7 +96,7 @@
             if self.co_flags & CO_VARKEYWORDS:
                 argcount += 1
             # Cell vars could shadow already-set arguments.
-            # astcompiler.pyassem used to be clever about the order of
+            # The compiler used to be clever about the order of
             # the variables in both co_varnames and co_cellvars, but
             # it no longer is for the sake of simplicity.  Moreover
             # code objects loaded from CPython don't necessarily follow
@@ -202,7 +203,7 @@
         # speed hack
         fresh_frame = jit.hint(frame, access_directly=True,
                                       fresh_virtualizable=True)
-        args_matched = args.parse_into_scope(None, fresh_frame.fastlocals_w,
+        args_matched = args.parse_into_scope(None, fresh_frame.locals_stack_w,
                                              func.name,
                                              sig, func.defs_w)
         fresh_frame.init_cells()
@@ -215,7 +216,7 @@
         # speed hack
         fresh_frame = jit.hint(frame, access_directly=True,
                                       fresh_virtualizable=True)
-        args_matched = args.parse_into_scope(w_obj, fresh_frame.fastlocals_w,
+        args_matched = args.parse_into_scope(w_obj, fresh_frame.locals_stack_w,
                                              func.name,
                                              sig, func.defs_w)
         fresh_frame.init_cells()
@@ -256,7 +257,7 @@
                          tuple(self.co_freevars),
                          tuple(self.co_cellvars) )
 
-    def exec_host_bytecode(self, w_dict, w_globals, w_locals):
+    def exec_host_bytecode(self, w_globals, w_locals):
         from pypy.interpreter.pyframe import CPythonFrame
         frame = CPythonFrame(self.space, self, w_globals, None)
         frame.setdictscope(w_locals)
diff --git a/pypy/interpreter/pycompiler.py b/pypy/interpreter/pycompiler.py
--- a/pypy/interpreter/pycompiler.py
+++ b/pypy/interpreter/pycompiler.py
@@ -101,9 +101,9 @@
     """
     def __init__(self, space, override_version=None):
         PyCodeCompiler.__init__(self, space)
-        self.parser = pyparse.PythonParser(space)
+        self.future_flags = future.futureFlags_2_7
+        self.parser = pyparse.PythonParser(space, self.future_flags)
         self.additional_rules = {}
-        self.future_flags = future.futureFlags_2_7
         self.compiler_flags = self.future_flags.allowed_flags
 
     def compile_ast(self, node, filename, mode, flags):
@@ -140,9 +140,6 @@
     def _compile_to_ast(self, source, info):
         space = self.space
         try:
-            f_flags, future_info = future.get_futures(self.future_flags, source)
-            info.last_future_import = future_info
-            info.flags |= f_flags
             parse_tree = self.parser.parse_source(source, info)
             mod = astbuilder.ast_from_node(space, parse_tree, info)
         except parseerror.IndentationError, e:
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -9,9 +9,9 @@
 from pypy.interpreter import pytraceback
 from pypy.rlib.objectmodel import we_are_translated, instantiate
 from pypy.rlib.jit import hint
-from pypy.rlib.debug import make_sure_not_resized
+from pypy.rlib.debug import make_sure_not_resized, check_nonneg
 from pypy.rlib.rarithmetic import intmask
-from pypy.rlib import jit, rstack
+from pypy.rlib import jit
 from pypy.tool import stdlib_opcode
 from pypy.tool.stdlib_opcode import host_bytecode_spec
 
@@ -49,24 +49,36 @@
     instr_ub                 = 0
     instr_prev_plus_one      = 0
     is_being_profiled        = False
+    escaped                  = False  # see mark_as_escaped()
 
     def __init__(self, space, code, w_globals, closure):
         self = hint(self, access_directly=True, fresh_virtualizable=True)
         assert isinstance(code, pycode.PyCode)
         self.pycode = code
         eval.Frame.__init__(self, space, w_globals)
-        self.valuestack_w = [None] * code.co_stacksize
-        self.valuestackdepth = 0
+        self.locals_stack_w = [None] * (code.co_nlocals + code.co_stacksize)
+        self.nlocals = code.co_nlocals
+        self.valuestackdepth = code.co_nlocals
         self.lastblock = None
+        make_sure_not_resized(self.locals_stack_w)
+        check_nonneg(self.nlocals)
+        #
         if space.config.objspace.honor__builtins__:
             self.builtin = space.builtin.pick_builtin(w_globals)
         # regular functions always have CO_OPTIMIZED and CO_NEWLOCALS.
         # class bodies only have CO_NEWLOCALS.
         self.initialize_frame_scopes(closure, code)
-        self.fastlocals_w = [None] * code.co_nlocals
-        make_sure_not_resized(self.fastlocals_w)
         self.f_lineno = code.co_firstlineno
 
+    def mark_as_escaped(self):
+        """
+        Must be called on frames that are exposed to applevel, e.g. by
+        sys._getframe().  This ensures that the virtualref holding the frame
+        is properly forced by ec.leave(), and thus the frame will be still
+        accessible even after the corresponding C stack died.
+        """
+        self.escaped = True
+
     def append_block(self, block):
         block.previous = self.lastblock
         self.lastblock = block
@@ -138,6 +150,7 @@
                 not self.space.config.translating)
         executioncontext = self.space.getexecutioncontext()
         executioncontext.enter(self)
+        got_exception = True
         w_exitvalue = self.space.w_None
         try:
             executioncontext.call_trace(self)
@@ -157,8 +170,6 @@
             try:
                 w_exitvalue = self.dispatch(self.pycode, next_instr,
                                             executioncontext)
-                rstack.resume_point("execute_frame", self, executioncontext,
-                                    returns=w_exitvalue)
             except Exception:
                 executioncontext.return_trace(self, self.space.w_None)
                 raise
@@ -166,22 +177,23 @@
             # clean up the exception, might be useful for not
             # allocating exception objects in some cases
             self.last_exception = None
+            got_exception = False
         finally:
-            executioncontext.leave(self, w_exitvalue)
+            executioncontext.leave(self, w_exitvalue, got_exception)
         return w_exitvalue
     execute_frame.insert_stack_check_here = True
 
     # stack manipulation helpers
     def pushvalue(self, w_object):
         depth = self.valuestackdepth
-        self.valuestack_w[depth] = w_object
+        self.locals_stack_w[depth] = w_object
         self.valuestackdepth = depth + 1
 
     def popvalue(self):
         depth = self.valuestackdepth - 1
-        assert depth >= 0, "pop from empty value stack"
-        w_object = self.valuestack_w[depth]
-        self.valuestack_w[depth] = None
+        assert depth >= self.nlocals, "pop from empty value stack"
+        w_object = self.locals_stack_w[depth]
+        self.locals_stack_w[depth] = None
         self.valuestackdepth = depth
         return w_object
 
@@ -207,24 +219,24 @@
     def peekvalues(self, n):
         values_w = [None] * n
         base = self.valuestackdepth - n
-        assert base >= 0
+        assert base >= self.nlocals
         while True:
             n -= 1
             if n < 0:
                 break
-            values_w[n] = self.valuestack_w[base+n]
+            values_w[n] = self.locals_stack_w[base+n]
         return values_w
 
     @jit.unroll_safe
     def dropvalues(self, n):
         n = hint(n, promote=True)
         finaldepth = self.valuestackdepth - n
-        assert finaldepth >= 0, "stack underflow in dropvalues()"        
+        assert finaldepth >= self.nlocals, "stack underflow in dropvalues()"
         while True:
             n -= 1
             if n < 0:
                 break
-            self.valuestack_w[finaldepth+n] = None
+            self.locals_stack_w[finaldepth+n] = None
         self.valuestackdepth = finaldepth
 
     @jit.unroll_safe
@@ -251,30 +263,30 @@
         # Contrast this with CPython where it's PEEK(-1).
         index_from_top = hint(index_from_top, promote=True)
         index = self.valuestackdepth + ~index_from_top
-        assert index >= 0, "peek past the bottom of the stack"
-        return self.valuestack_w[index]
+        assert index >= self.nlocals, "peek past the bottom of the stack"
+        return self.locals_stack_w[index]
 
     def settopvalue(self, w_object, index_from_top=0):
         index_from_top = hint(index_from_top, promote=True)
         index = self.valuestackdepth + ~index_from_top
-        assert index >= 0, "settop past the bottom of the stack"
-        self.valuestack_w[index] = w_object
+        assert index >= self.nlocals, "settop past the bottom of the stack"
+        self.locals_stack_w[index] = w_object
 
     @jit.unroll_safe
     def dropvaluesuntil(self, finaldepth):
         depth = self.valuestackdepth - 1
         finaldepth = hint(finaldepth, promote=True)
         while depth >= finaldepth:
-            self.valuestack_w[depth] = None
+            self.locals_stack_w[depth] = None
             depth -= 1
         self.valuestackdepth = finaldepth
 
-    def savevaluestack(self):
-        return self.valuestack_w[:self.valuestackdepth]
+    def save_locals_stack(self):
+        return self.locals_stack_w[:self.valuestackdepth]
 
-    def restorevaluestack(self, items_w):
-        assert None not in items_w
-        self.valuestack_w[:len(items_w)] = items_w
+    def restore_locals_stack(self, items_w):
+        self.locals_stack_w[:len(items_w)] = items_w
+        self.init_cells()
         self.dropvaluesuntil(len(items_w))
 
     def make_arguments(self, nargs):
@@ -304,17 +316,18 @@
         else:
             f_lineno = self.f_lineno
 
-        values_w = self.valuestack_w[0:self.valuestackdepth]
+        values_w = self.locals_stack_w[self.nlocals:self.valuestackdepth]
         w_valuestack = maker.slp_into_tuple_with_nulls(space, values_w)
         
         w_blockstack = nt([block._get_state_(space) for block in self.get_blocklist()])
-        w_fastlocals = maker.slp_into_tuple_with_nulls(space, self.fastlocals_w)
+        w_fastlocals = maker.slp_into_tuple_with_nulls(
+            space, self.locals_stack_w[:self.nlocals])
         if self.last_exception is None:
             w_exc_value = space.w_None
             w_tb = space.w_None
         else:
             w_exc_value = self.last_exception.get_w_value(space)
-            w_tb = w(self.last_exception.application_traceback)
+            w_tb = w(self.last_exception.get_traceback())
         
         tup_state = [
             w(self.f_backref()),
@@ -389,7 +402,8 @@
         new_frame.last_instr = space.int_w(w_last_instr)
         new_frame.frame_finished_execution = space.is_true(w_finished)
         new_frame.f_lineno = space.int_w(w_f_lineno)
-        new_frame.fastlocals_w = maker.slp_from_tuple_with_nulls(space, w_fastlocals)
+        fastlocals_w = maker.slp_from_tuple_with_nulls(space, w_fastlocals)
+        new_frame.locals_stack_w[:len(fastlocals_w)] = fastlocals_w
 
         if space.is_w(w_f_trace, space.w_None):
             new_frame.w_f_trace = None
@@ -413,27 +427,28 @@
     @jit.dont_look_inside
     def getfastscope(self):
         "Get the fast locals as a list."
-        return self.fastlocals_w
+        return self.locals_stack_w
 
+    @jit.dont_look_inside
     def setfastscope(self, scope_w):
         """Initialize the fast locals from a list of values,
         where the order is according to self.pycode.signature()."""
         scope_len = len(scope_w)
-        if scope_len > len(self.fastlocals_w):
+        if scope_len > self.nlocals:
             raise ValueError, "new fastscope is longer than the allocated area"
-        # don't assign directly to 'fastlocals_w[:scope_len]' to be
+        # don't assign directly to 'locals_stack_w[:scope_len]' to be
         # virtualizable-friendly
         for i in range(scope_len):
-            self.fastlocals_w[i] = scope_w[i]
+            self.locals_stack_w[i] = scope_w[i]
         self.init_cells()
 
     def init_cells(self):
-        """Initialize cellvars from self.fastlocals_w
+        """Initialize cellvars from self.locals_stack_w.
         This is overridden in nestedscope.py"""
         pass
 
     def getfastscopelength(self):
-        return self.pycode.co_nlocals
+        return self.nlocals
 
     def getclosure(self):
         return None
@@ -634,7 +649,7 @@
             while f is not None and f.last_exception is None:
                 f = f.f_backref()
             if f is not None:
-                return space.wrap(f.last_exception.application_traceback)
+                return space.wrap(f.last_exception.get_traceback())
         return space.w_None
          
     def fget_f_restricted(self, space):
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -11,7 +11,7 @@
 from pypy.interpreter.pycode import PyCode
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib import jit, rstackovf, rstack
+from pypy.rlib import jit, rstackovf
 from pypy.rlib.rarithmetic import r_uint, intmask
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import check_nonneg
@@ -83,16 +83,12 @@
         try:
             while True:
                 next_instr = self.handle_bytecode(co_code, next_instr, ec)
-                rstack.resume_point("dispatch", self, co_code, ec,
-                                    returns=next_instr)
         except ExitFrame:
             return self.popvalue()
 
     def handle_bytecode(self, co_code, next_instr, ec):
         try:
             next_instr = self.dispatch_bytecode(co_code, next_instr, ec)
-            rstack.resume_point("handle_bytecode", self, co_code, ec,
-                                returns=next_instr)
         except OperationError, operr:
             next_instr = self.handle_operation_error(ec, operr)
         except Reraise:
@@ -248,9 +244,6 @@
                         # dispatch to the opcode method
                         meth = getattr(self, opdesc.methodname)
                         res = meth(oparg, next_instr)
-                        if opdesc.index == self.opcodedesc.CALL_FUNCTION.index:
-                            rstack.resume_point("dispatch_call", self, co_code,
-                                                next_instr, ec)
                         # !! warning, for the annotator the next line is not
                         # comparing an int and None - you can't do that.
                         # Instead, it's constant-folded to either True or False
@@ -331,7 +324,7 @@
 
     def LOAD_FAST(self, varindex, next_instr):
         # access a local variable directly
-        w_value = self.fastlocals_w[varindex]
+        w_value = self.locals_stack_w[varindex]
         if w_value is None:
             self._load_fast_failed(varindex)
         self.pushvalue(w_value)
@@ -350,7 +343,7 @@
     def STORE_FAST(self, varindex, next_instr):
         w_newvalue = self.popvalue()
         assert w_newvalue is not None
-        self.fastlocals_w[varindex] = w_newvalue
+        self.locals_stack_w[varindex] = w_newvalue
 
     def POP_TOP(self, oparg, next_instr):
         self.popvalue()
@@ -573,7 +566,7 @@
         else:
             msg = "raise: arg 3 must be a traceback or None"
             tb = pytraceback.check_traceback(space, w_traceback, msg)
-            operror.application_traceback = tb
+            operror.set_traceback(tb)
             # special 3-arguments raise, no new traceback obj will be attached
             raise RaiseWithExplicitTraceback(operror)
 
@@ -703,12 +696,12 @@
     LOAD_GLOBAL._always_inline_ = True
 
     def DELETE_FAST(self, varindex, next_instr):
-        if self.fastlocals_w[varindex] is None:
+        if self.locals_stack_w[varindex] is None:
             varname = self.getlocalvarname(varindex)
             message = "local variable '%s' referenced before assignment"
             raise operationerrfmt(self.space.w_UnboundLocalError, message,
                                   varname)
-        self.fastlocals_w[varindex] = None
+        self.locals_stack_w[varindex] = None
 
     def BUILD_TUPLE(self, itemcount, next_instr):
         items = self.popvalues(itemcount)
@@ -953,7 +946,7 @@
                       isinstance(unroller, SApplicationException))
         if is_app_exc:
             operr = unroller.operr
-            w_traceback = self.space.wrap(operr.application_traceback)
+            w_traceback = self.space.wrap(operr.get_traceback())
             w_suppress = self.call_contextmanager_exit_function(
                 w_exitfunc,
                 operr.w_type,
@@ -997,7 +990,6 @@
                                                           args)
         else:
             w_result = self.space.call_args(w_function, args)
-        rstack.resume_point("call_function", self, returns=w_result)
         self.pushvalue(w_result)
 
     def CALL_FUNCTION(self, oparg, next_instr):
@@ -1008,8 +1000,6 @@
             w_function = self.peekvalue(nargs)
             try:
                 w_result = self.space.call_valuestack(w_function, nargs, self)
-                rstack.resume_point("CALL_FUNCTION", self, nargs,
-                                    returns=w_result)
             finally:
                 self.dropvalues(nargs + 1)
             self.pushvalue(w_result)
@@ -1058,30 +1048,18 @@
 
     def SET_ADD(self, oparg, next_instr):
         w_value = self.popvalue()
-        w_set = self.peekvalue(oparg)
+        w_set = self.peekvalue(oparg - 1)
         self.space.call_method(w_set, 'add', w_value)
 
     def MAP_ADD(self, oparg, next_instr):
         w_key = self.popvalue()
         w_value = self.popvalue()
-        w_dict = self.peekvalue(oparg)
+        w_dict = self.peekvalue(oparg - 1)
         self.space.setitem(w_dict, w_key, w_value)
 
     def SET_LINENO(self, lineno, next_instr):
         pass
 
-    def CALL_LIKELY_BUILTIN(self, oparg, next_instr):
-        # overridden by faster version in the standard object space.
-        from pypy.module.__builtin__ import OPTIMIZED_BUILTINS
-        varname = OPTIMIZED_BUILTINS[oparg >> 8]
-        w_function = self._load_global(varname)
-        nargs = oparg&0xFF
-        try:
-            w_result = self.space.call_valuestack(w_function, nargs, self)
-        finally:
-            self.dropvalues(nargs)
-        self.pushvalue(w_result)
-
     # overridden by faster version in the standard object space.
     LOOKUP_METHOD = LOAD_ATTR
     CALL_METHOD = CALL_FUNCTION
@@ -1099,13 +1077,12 @@
         w_dict = self.space.newdict()
         self.pushvalue(w_dict)
 
+    @jit.unroll_safe
     def BUILD_SET(self, itemcount, next_instr):
-        w_set = self.space.call_function(self.space.w_set)
-        if itemcount:
-            w_add = self.space.getattr(w_set, self.space.wrap("add"))
-            for i in range(itemcount):
-                w_item = self.popvalue()
-                self.space.call_function(w_add, w_item)
+        w_set = self.space.newset()
+        for i in range(itemcount):
+            w_item = self.popvalue()
+            self.space.call_method(w_set, 'add', w_item)
         self.pushvalue(w_set)
 
     def STORE_MAP(self, oparg, next_instr):
diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -1,6 +1,6 @@
 from pypy.interpreter import gateway
 from pypy.interpreter.error import OperationError
-from pypy.interpreter.pyparser import parser, pytokenizer, pygram, error
+from pypy.interpreter.pyparser import future, parser, pytokenizer, pygram, error
 from pypy.interpreter.astcompiler import consts
 
 
@@ -88,9 +88,11 @@
 
 class PythonParser(parser.Parser):
 
-    def __init__(self, space, grammar=pygram.python_grammar):
+    def __init__(self, space, future_flags=future.futureFlags_2_7,
+                 grammar=pygram.python_grammar):
         parser.Parser.__init__(self, grammar)
         self.space = space
+        self.future_flags = future_flags
 
     def parse_source(self, textsrc, compile_info):
         """Main entry point for parsing Python source.
@@ -133,6 +135,10 @@
                         raise error.SyntaxError(space.str_w(w_message))
                     raise
 
+        f_flags, future_info = future.get_futures(self.future_flags, textsrc)
+        compile_info.last_future_import = future_info
+        compile_info.flags |= f_flags
+
         flags = compile_info.flags
 
         if flags & consts.CO_FUTURE_PRINT_FUNCTION:
diff --git a/pypy/interpreter/pytraceback.py b/pypy/interpreter/pytraceback.py
--- a/pypy/interpreter/pytraceback.py
+++ b/pypy/interpreter/pytraceback.py
@@ -51,9 +51,9 @@
 def record_application_traceback(space, operror, frame, last_instruction):
     if frame.pycode.hidden_applevel:
         return
-    tb = operror.application_traceback
+    tb = operror.get_traceback()
     tb = PyTraceback(space, frame, last_instruction, tb)
-    operror.application_traceback = tb
+    operror.set_traceback(tb)
 
 def offset2lineno(c, stopat):
     tab = c.co_lnotab
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 import py
 from pypy.interpreter.argument import (Arguments, ArgumentsForTranslation,
     ArgErr, ArgErrUnknownKwds, ArgErrMultipleValues, ArgErrCount, rawshape,
@@ -126,6 +127,7 @@
     w_AttributeError = AttributeError
     w_UnicodeEncodeError = UnicodeEncodeError
     w_dict = dict
+    w_str = str
 
 class TestArgumentsNormal(object):
 
@@ -485,26 +487,6 @@
         args._match_signature(None, l, Signature(['abc']))
         assert len(l) == 1
         assert l[0] == space.wrap(5)
-        #
-        def str_w(w):
-            try:
-                return str(w)
-            except UnicodeEncodeError:
-                raise OperationError(space.w_UnicodeEncodeError,
-                                     space.wrap("oups"))
-        space.str_w = str_w
-        w_starstar = space.wrap({u'\u1234': 5})
-        err = py.test.raises(OperationError, Arguments,
-                             space, [], w_starstararg=w_starstar)
-        # Check that we get a TypeError.  On CPython it is because of
-        # "no argument called '?'".  On PyPy we get a TypeError too, but
-        # earlier: "keyword cannot be encoded to ascii".  The
-        # difference, besides the error message, is only apparent if the
-        # receiver also takes a **arg.  Then CPython passes the
-        # non-ascii unicode unmodified, whereas PyPy complains.  We will
-        # not care until someone has a use case for that.
-        assert not err.value.match(space, space.w_UnicodeEncodeError)
-        assert     err.value.match(space, space.w_TypeError)
 
 class TestErrorHandling(object):
     def test_missing_args(self):
@@ -559,13 +541,26 @@
             assert 0, "did not raise"
 
     def test_unknown_keywords(self):
-        err = ArgErrUnknownKwds(1, ['a', 'b'], [True, False])
+        space = DummySpace()
+        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [True, False], None)
         s = err.getmsg('foo')
         assert s == "foo() got an unexpected keyword argument 'b'"
-        err = ArgErrUnknownKwds(2, ['a', 'b', 'c'], [True, False, False])
+        err = ArgErrUnknownKwds(space, 2, ['a', 'b', 'c'],
+                                [True, False, False], None)
         s = err.getmsg('foo')
         assert s == "foo() got 2 unexpected keyword arguments"
 
+    def test_unknown_unicode_keyword(self):
+        class DummySpaceUnicode(DummySpace):
+            class sys:
+                defaultencoding = 'utf-8'
+        space = DummySpaceUnicode()
+        err = ArgErrUnknownKwds(space, 1, ['a', None, 'b', 'c'],
+                                [True, False, True, True],
+                                [unichr(0x1234), u'b', u'c'])
+        s = err.getmsg('foo')
+        assert s == "foo() got an unexpected keyword argument '\xe1\x88\xb4'"
+
     def test_multiple_values(self):
         err = ArgErrMultipleValues('bla')
         s = err.getmsg('foo')
@@ -592,6 +587,14 @@
         exc = raises(TypeError, (lambda a, b, **kw: 0), a=1)
         assert exc.value.message == "<lambda>() takes exactly 2 non-keyword arguments (0 given)"
 
+    def test_unicode_keywords(self):
+        def f(**kwargs):
+            assert kwargs[u"&#32654;"] == 42
+        f(**{u"&#32654;" : 42})
+        def f(x): pass
+        e = raises(TypeError, "f(**{u'&#252;' : 19})")
+        assert "?" in str(e.value)
+
 def make_arguments_for_translation(space, args_w, keywords_w={},
                                    w_stararg=None, w_starstararg=None):
     return ArgumentsForTranslation(space, args_w, keywords_w.keys(),
diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -714,6 +714,12 @@
 
 class AppTestCompiler:
 
+    def test_bom_with_future(self):
+        s = '\xef\xbb\xbffrom __future__ import division\nx = 1/2'
+        ns = {}
+        exec s in ns
+        assert ns["x"] == .5
+
     def test_values_of_different_types(self):
         exec "a = 0; b = 0L; c = 0.0; d = 0j"
         assert type(a) is int
diff --git a/pypy/interpreter/test/test_eval.py b/pypy/interpreter/test/test_eval.py
--- a/pypy/interpreter/test/test_eval.py
+++ b/pypy/interpreter/test/test_eval.py
@@ -15,16 +15,16 @@
                 self.code = code
                 Frame.__init__(self, space)
                 self.numlocals = numlocals
-                self.fastlocals_w = [None] * self.numlocals
+                self._fastlocals_w = [None] * self.numlocals
 
             def getcode(self):
                 return self.code
 
             def setfastscope(self, scope_w):
-                self.fastlocals_w = scope_w
+                self._fastlocals_w = scope_w
 
             def getfastscope(self):
-                return self.fastlocals_w
+                return self._fastlocals_w
 
             def getfastscopelength(self):
                 return self.numlocals
@@ -38,11 +38,11 @@
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({}))
         
-        self.f.fastlocals_w[0] = w(5)
+        self.f._fastlocals_w[0] = w(5)
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({'x': 5}))
 
-        self.f.fastlocals_w[2] = w(7)
+        self.f._fastlocals_w[2] = w(7)
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({'x': 5, 'args': 7}))
 
@@ -57,13 +57,13 @@
         w = self.space.wrap
         self.f.w_locals = self.space.wrap({})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [None]*5)
+        self.sameList(self.f._fastlocals_w, [None]*5)
 
         self.f.w_locals = self.space.wrap({'x': 5})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [w(5)] + [None]*4)
+        self.sameList(self.f._fastlocals_w, [w(5)] + [None]*4)
 
         self.f.w_locals = self.space.wrap({'x':5, 'args':7})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [w(5), None, w(7),
-                                            None, None])
+        self.sameList(self.f._fastlocals_w, [w(5), None, w(7),
+                                             None, None])
diff --git a/pypy/interpreter/test/test_executioncontext.py b/pypy/interpreter/test/test_executioncontext.py
--- a/pypy/interpreter/test/test_executioncontext.py
+++ b/pypy/interpreter/test/test_executioncontext.py
@@ -106,7 +106,7 @@
             if isinstance(seen[0], Method):
                 found = 'method %s of %s' % (
                     seen[0].w_function.name,
-                    seen[0].w_class.getname(space, '?'))
+                    seen[0].w_class.getname(space))
             else:
                 assert isinstance(seen[0], Function)
                 found = 'builtin %s' % seen[0].name
@@ -232,31 +232,6 @@
         assert [i[0] for i in events] == ['c_call', 'c_return', 'return', 'c_call']
         assert events[0][1] == events[1][1]
 
-    def test_tracing_range_builtinshortcut(self):
-        opts = {"objspace.opcodes.CALL_LIKELY_BUILTIN": True}
-        space = gettestobjspace(**opts)
-        source = """def f(profile):
-        import sys
-        sys.setprofile(profile)
-        range(10)
-        sys.setprofile(None)
-        """
-        w_events = space.appexec([space.wrap(source)], """(source):
-        import sys
-        l = []
-        def profile(frame, event, arg):
-            l.append((event, arg))
-        d = {}
-        exec source in d
-        f = d['f']
-        f(profile)
-        import dis
-        print dis.dis(f)
-        return l
-        """)
-        events = space.unwrap(w_events)
-        assert [i[0] for i in events] == ['c_call', 'c_return', 'c_call']
-
     def test_profile_and_exception(self):
         space = self.space
         w_res = space.appexec([], """():
@@ -280,9 +255,6 @@
         """)
 
 
-class TestExecutionContextWithCallLikelyBuiltin(TestExecutionContext):
-    keywords = {'objspace.opcodes.CALL_LIKELY_BUILTIN': True}
-
 class TestExecutionContextWithCallMethod(TestExecutionContext):
     keywords = {'objspace.opcodes.CALL_METHOD': True}
 
diff --git a/pypy/interpreter/test/test_pyframe.py b/pypy/interpreter/test/test_pyframe.py
--- a/pypy/interpreter/test/test_pyframe.py
+++ b/pypy/interpreter/test/test_pyframe.py
@@ -98,6 +98,15 @@
             return sys._getframe().f_back.f_code.co_name 
         f()
 
+    def test_f_back_virtualref(self):
+        import sys
+        def f():
+            return g()
+        def g():
+            return sys._getframe()
+        frame = f()
+        assert frame.f_back.f_code.co_name == 'f'
+
     def test_f_exc_xxx(self):
         import sys
 
@@ -122,6 +131,21 @@
         except:
             g(sys.exc_info())
 
+    def test_virtualref_through_traceback(self):
+        import sys
+        def g():
+            try:
+                raise ValueError
+            except:
+                _, _, tb = sys.exc_info()
+            return tb
+        def f():
+            return g()
+        #
+        tb = f()
+        assert tb.tb_frame.f_code.co_name == 'g'
+        assert tb.tb_frame.f_back.f_code.co_name == 'f'
+
     def test_trace_basic(self):
         import sys
         l = []
diff --git a/pypy/interpreter/test/test_typedef.py b/pypy/interpreter/test/test_typedef.py
--- a/pypy/interpreter/test/test_typedef.py
+++ b/pypy/interpreter/test/test_typedef.py
@@ -16,7 +16,7 @@
 
         def g():
             f()
-        
+
         try:
             g()
         except:
@@ -203,3 +203,27 @@
         lst = seen[:]
         assert lst == [5, 10, 2]
         raises(OSError, os.lseek, fd, 7, 0)
+
+    def test_method_attrs(self):
+        import sys
+        class A(object):
+            def m(self):
+                "aaa"
+            m.x = 3
+        class B(A):
+            pass
+
+        bm = B().m
+        assert bm.__func__ is bm.im_func
+        assert bm.__self__ is bm.im_self
+        assert bm.im_class is B
+        assert bm.__doc__ == "aaa"
+        assert bm.x == 3
+        raises(AttributeError, setattr, bm, 'x', 15)
+        l = []
+        assert l.append.__self__ is l
+        assert l.__add__.__self__ is l
+        # note: 'l.__add__.__objclass__' is not defined in pypy
+        # because it's a regular method, and .__objclass__
+        # differs from .im_class in case the method is
+        # defined in some parent class of l's actual class
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -9,7 +9,7 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.tool.sourcetools import compile2, func_with_new_name
 from pypy.rlib.objectmodel import instantiate, compute_identity_hash, specialize
-from pypy.rlib.jit import hint
+from pypy.rlib.jit import promote
 
 class TypeDef:
     def __init__(self, __name, __base=None, **rawdict):
@@ -206,7 +206,7 @@
             user_overridden_class = True
 
             def getclass(self, space):
-                return hint(self.w__class__, promote=True)
+                return promote(self.w__class__)
 
             def setclass(self, space, w_subtype):
                 # only used by descr_set___class__
@@ -761,12 +761,15 @@
     )
 Function.typedef.acceptable_as_base_class = False
 
-Method.typedef = TypeDef("method",
+Method.typedef = TypeDef(
+    "method",
     __new__ = interp2app(Method.descr_method__new__.im_func),
     __call__ = interp2app(Method.descr_method_call),
     __get__ = interp2app(Method.descr_method_get),
     im_func  = interp_attrproperty_w('w_function', cls=Method),
+    __func__ = interp_attrproperty_w('w_function', cls=Method),
     im_self  = interp_attrproperty_w('w_instance', cls=Method),
+    __self__ = interp_attrproperty_w('w_instance', cls=Method),
     im_class = interp_attrproperty_w('w_class', cls=Method),
     __getattribute__ = interp2app(Method.descr_method_getattribute),
     __eq__ = interp2app(Method.descr_method_eq),
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -136,6 +136,7 @@
     'call'            : (('ref', 'varargs'), 'intorptr'),
     'call_assembler'  : (('varargs',), 'intorptr'),
     'cond_call_gc_wb' : (('ptr', 'ptr'), None),
+    'cond_call_gc_wb_array': (('ptr', 'int', 'ptr'), None),
     'oosend'          : (('varargs',), 'intorptr'),
     'oosend_pure'     : (('varargs',), 'intorptr'),
     'guard_true'      : (('bool',), None),
@@ -600,15 +601,15 @@
         #
         return _op_default_implementation
 
-    def op_debug_merge_point(self, _, value, recdepth):
+    def op_debug_merge_point(self, _, *args):
         from pypy.jit.metainterp.warmspot import get_stats
-        loc = ConstPtr(value)._get_str()
         try:
             stats = get_stats()
         except AttributeError:
             pass
         else:
-            stats.add_merge_point_location(loc)
+            stats.add_merge_point_location(args[1:])
+        pass
 
     def op_guard_true(self, _, value):
         if not value:
@@ -820,6 +821,12 @@
             raise NotImplementedError
 
     def op_call(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=False)
+
+    def op_call_release_gil(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=True)
+
+    def _do_call(self, calldescr, func, args, call_with_llptr):
         global _last_exception
         assert _last_exception is None, "exception left behind"
         assert _call_args_i == _call_args_r == _call_args_f == []
@@ -838,7 +845,8 @@
             else:
                 raise TypeError(x)
         try:
-            return _do_call_common(func, args_in_order, calldescr)
+            return _do_call_common(func, args_in_order, calldescr,
+                                   call_with_llptr)
         except LLException, lle:
             _last_exception = lle
             d = {'v': None,
@@ -850,6 +858,9 @@
     def op_cond_call_gc_wb(self, descr, a, b):
         py.test.skip("cond_call_gc_wb not supported")
 
+    def op_cond_call_gc_wb_array(self, descr, a, b, c):
+        py.test.skip("cond_call_gc_wb_array not supported")
+
     def op_oosend(self, descr, obj, *args):
         raise NotImplementedError("oosend for lltype backend??")
 
@@ -1480,17 +1491,20 @@
     'v': lltype.Void,
     }
 
-def _do_call_common(f, args_in_order=None, calldescr=None):
+def _do_call_common(f, args_in_order=None, calldescr=None,
+                    call_with_llptr=False):
     ptr = llmemory.cast_int_to_adr(f).ptr
     PTR = lltype.typeOf(ptr)
     if PTR == rffi.VOIDP:
         # it's a pointer to a C function, so we don't have a precise
         # signature: create one from the descr
+        assert call_with_llptr is True
         ARGS = map(kind2TYPE.get, calldescr.arg_types)
         RESULT = kind2TYPE[calldescr.typeinfo]
         FUNC = lltype.FuncType(ARGS, RESULT)
         func_to_call = rffi.cast(lltype.Ptr(FUNC), ptr)
     else:
+        assert call_with_llptr is False
         FUNC = PTR.TO
         ARGS = FUNC.ARGS
         func_to_call = ptr._obj._callable
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -134,7 +134,7 @@
         old, oldindex = faildescr._compiled_fail
         llimpl.compile_redirect_fail(old, oldindex, c)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """In a real assembler backend, this should assemble the given
         list of operations.  Here we just generate a similar CompiledLoop
         instance.  The code here is RPython, whereas the code in llimpl
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -1,5 +1,6 @@
 import py
 from pypy.rpython.lltypesystem import lltype, rffi, llmemory, rclass
+from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.jit.backend.llsupport import symbolic, support
 from pypy.jit.metainterp.history import AbstractDescr, getkind, BoxInt, BoxPtr
 from pypy.jit.metainterp.history import BasicFailDescr, LoopToken, BoxFloat
@@ -45,6 +46,8 @@
     size = 0      # help translation
     is_immutable = False
 
+    tid = llop.combine_ushort(lltype.Signed, 0, 0)
+
     def __init__(self, size, count_fields_if_immut=-1):
         self.size = size
         self.count_fields_if_immut = count_fields_if_immut
@@ -149,6 +152,7 @@
 
 class BaseArrayDescr(AbstractDescr):
     _clsname = ''
+    tid = llop.combine_ushort(lltype.Signed, 0, 0)
 
     def get_base_size(self, translate_support_code):
         basesize, _, _ = symbolic.get_array_token(_A, translate_support_code)
@@ -263,6 +267,9 @@
 
     def __repr__(self):
         res = '%s(%s)' % (self.__class__.__name__, self.arg_classes)
+        extraeffect = getattr(self.extrainfo, 'extraeffect', None)
+        if extraeffect is not None:
+            res += ' EF=%r' % extraeffect
         oopspecindex = getattr(self.extrainfo, 'oopspecindex', 0)
         if oopspecindex:
             from pypy.jit.codewriter.effectinfo import EffectInfo
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -3,13 +3,16 @@
 from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
     FloatCallDescr, VoidCallDescr
 
+class UnsupportedKind(Exception):
+    pass
+
 def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
         reskind = get_ffi_type_kind(ffi_result)
         argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
-    except KeyError:
+    except UnsupportedKind:
         return None # ??
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
@@ -33,7 +36,7 @@
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
-    assert False, "Unsupported kind '%s'" % kind
+    raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
     from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -34,7 +34,7 @@
         pass
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         pass
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         return operations
     def can_inline_malloc(self, descr):
         return False
@@ -146,78 +146,6 @@
 # All code below is for the hybrid or minimark GC
 
 
-class GcRefList:
-    """Handles all references from the generated assembler to GC objects.
-    This is implemented as a nonmovable, but GC, list; the assembler contains
-    code that will (for now) always read from this list."""
-
-    GCREF_LIST = lltype.GcArray(llmemory.GCREF)     # followed by the GC
-
-    HASHTABLE = rffi.CArray(llmemory.Address)      # ignored by the GC
-    HASHTABLE_BITS = 10
-    HASHTABLE_SIZE = 1 << HASHTABLE_BITS
-
-    def initialize(self):
-        if we_are_translated(): n = 2000
-        else:                   n = 10    # tests only
-        self.list = self.alloc_gcref_list(n)
-        self.nextindex = 0
-        self.oldlists = []
-        # A pseudo dictionary: it is fixed size, and it may contain
-        # random nonsense after a collection moved the objects.  It is only
-        # used to avoid too many duplications in the GCREF_LISTs.
-        self.hashtable = lltype.malloc(self.HASHTABLE,
-                                       self.HASHTABLE_SIZE+1,
-                                       flavor='raw', track_allocation=False)
-        dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable),
-                                     self.HASHTABLE_SIZE)
-        dummy = llmemory.cast_ptr_to_adr(dummy)
-        for i in range(self.HASHTABLE_SIZE+1):
-            self.hashtable[i] = dummy
-
-    def alloc_gcref_list(self, n):
-        # Important: the GRREF_LISTs allocated are *non-movable*.  This
-        # requires support in the gc (hybrid GC or minimark GC so far).
-        if we_are_translated():
-            list = rgc.malloc_nonmovable(self.GCREF_LIST, n)
-            assert list, "malloc_nonmovable failed!"
-        else:
-            list = lltype.malloc(self.GCREF_LIST, n)     # for tests only
-        return list
-
-    def get_address_of_gcref(self, gcref):
-        assert lltype.typeOf(gcref) == llmemory.GCREF
-        # first look in the hashtable, using an inexact hash (fails after
-        # the object moves)
-        addr = llmemory.cast_ptr_to_adr(gcref)
-        hash = llmemory.cast_adr_to_int(addr, "forced")
-        hash -= hash >> self.HASHTABLE_BITS
-        hash &= self.HASHTABLE_SIZE - 1
-        addr_ref = self.hashtable[hash]
-        # the following test is safe anyway, because the addresses found
-        # in the hashtable are always the addresses of nonmovable stuff
-        # ('addr_ref' is an address inside self.list, not directly the
-        # address of a real moving GC object -- that's 'addr_ref.address[0]'.)
-        if addr_ref.address[0] == addr:
-            return addr_ref
-        # if it fails, add an entry to the list
-        if self.nextindex == len(self.list):
-            # reallocate first, increasing a bit the size every time
-            self.oldlists.append(self.list)
-            self.list = self.alloc_gcref_list(len(self.list) // 4 * 5)
-            self.nextindex = 0
-        # add it
-        index = self.nextindex
-        self.list[index] = gcref
-        addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list),
-                                        index)
-        addr_ref = llmemory.cast_ptr_to_adr(addr_ref)
-        self.nextindex = index + 1
-        # record it in the hashtable
-        self.hashtable[hash] = addr_ref
-        return addr_ref
-
-
 class GcRootMap_asmgcc(object):
     """Handles locating the stack roots in the assembler.
     This is the class supporting --gcrootfinder=asmgcc.
@@ -527,6 +455,7 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = get_field_descr(gc_ll_descr,
                                               gc_ll_descr.GCClass.HDR, 'tid')
         self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +475,14 @@
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)
 
+    def get_write_barrier_from_array_fn(self, cpu):
+        # returns a function with arguments [array, index, newvalue]
+        llop1 = self.llop1
+        funcptr = llop1.get_write_barrier_from_array_failing_case(
+            self.WB_ARRAY_FUNCPTR)
+        funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+        return cpu.cast_adr_to_int(funcaddr)    # this may return 0
+
 
 class GcLLDescr_framework(GcLLDescription):
     DEBUG = False    # forced to True by x86/test/test_zrpy_gc.py
@@ -559,7 +496,7 @@
         self.translator = translator
         self.llop1 = llop1
 
-        # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list()
+        # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
         # to work
         if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
@@ -574,8 +511,6 @@
                                       " with the JIT" % (name,))
         gcrootmap = cls(gcdescr)
         self.gcrootmap = gcrootmap
-        self.gcrefs = GcRefList()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
 
         # make a TransformerLayoutBuilder and save it on the translator
         # where it can be fished and reused by the FrameworkGCTransformer
@@ -617,6 +552,8 @@
             [lltype.Signed, lltype.Signed], llmemory.GCREF))
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, llmemory.Address], lltype.Void))
+        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+            [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
         #
         def malloc_array(itemsize, tid, num_elem):
@@ -706,7 +643,6 @@
         return rffi.cast(lltype.Signed, fptr)
 
     def initialize(self):
-        self.gcrefs.initialize()
         self.gcrootmap.initialize()
 
     def init_size_descr(self, S, descr):
@@ -768,54 +704,32 @@
             funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
                     llmemory.cast_ptr_to_adr(gcref_newptr))
 
-    def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
-        # xxx some performance issue here
-        newargs = [None] * op.numargs()
-        needs_copy = False
+    def record_constptrs(self, op, gcrefs_output_list):
         for i in range(op.numargs()):
             v = op.getarg(i)
-            newargs[i] = v
             if isinstance(v, ConstPtr) and bool(v.value):
-                addr = self.gcrefs.get_address_of_gcref(v.value)
-                # ^^^even for non-movable objects, to record their presence
-                if rgc.can_move(v.value):
-                    box = BoxPtr(v.value)
-                    addr = cpu.cast_adr_to_int(addr)
-                    newops.append(ResOperation(rop.GETFIELD_RAW,
-                                               [ConstInt(addr)], box,
-                                               self.single_gcref_descr))
-                    newargs[i] = box
-                    needs_copy = True
-        #
-        if needs_copy:
-            return op.copy_and_change(op.getopnum(), args=newargs)
-        else:
-            return op
+                p = v.value
+                rgc._make_sure_does_not_move(p)
+                gcrefs_output_list.append(p)
 
-
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         # Perform two kinds of rewrites in parallel:
         #
         # - Add COND_CALLs to the write barrier before SETFIELD_GC and
         #   SETARRAYITEM_GC operations.
         #
-        # - Remove all uses of ConstPtrs away from the assembler.
-        #   Idea: when running on a moving GC, we can't (easily) encode
-        #   the ConstPtrs in the assembler, because they can move at any
-        #   point in time.  Instead, we store them in 'gcrefs.list', a GC
-        #   but nonmovable list; and here, we modify 'operations' to
-        #   replace direct usage of ConstPtr with a BoxPtr loaded by a
-        #   GETFIELD_RAW from the array 'gcrefs.list'.
+        # - Record the ConstPtrs from the assembler.
         #
         newops = []
+        known_lengths = {}
         # we can only remember one malloc since the next malloc can possibly
         # collect
         last_malloc = None
         for op in operations:
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 continue
-            # ---------- replace ConstPtrs with GETFIELD_RAW ----------
-            op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
+            # ---------- record the ConstPtrs ----------
+            self.record_constptrs(op, gcrefs_output_list)
             if op.is_malloc():
                 last_malloc = op.result
             elif op.can_malloc():
@@ -838,10 +752,14 @@
                     v = op.getarg(2)
                     if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                             bool(v.value)): # store a non-NULL
-                        # XXX detect when we should produce a
-                        # write_barrier_from_array
-                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        self._gen_write_barrier_array(newops, op.getarg(0),
+                                                      op.getarg(1), v,
+                                                      cpu, known_lengths)
                         op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+            elif op.getopnum() == rop.NEW_ARRAY:
+                v_length = op.getarg(0)
+                if isinstance(v_length, ConstInt):
+                    known_lengths[op.result] = v_length.getint()
             # ----------
             newops.append(op)
         return newops
@@ -851,6 +769,24 @@
         newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
                                    descr=self.write_barrier_descr))
 
+    def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+                                 cpu, known_lengths):
+        if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+            # If we know statically the length of 'v', and it is not too
+            # big, then produce a regular write_barrier.  If it's unknown or
+            # too big, produce instead a write_barrier_from_array.
+            LARGE = 130
+            length = known_lengths.get(v_base, LARGE)
+            if length >= LARGE:
+                # unknown or too big: produce a write_barrier_from_array
+                args = [v_base, v_index, v_value]
+                newops.append(ResOperation(rop.COND_CALL_GC_WB_ARRAY, args,
+                                           None,
+                                           descr=self.write_barrier_descr))
+                return
+        # fall-back case: produce a write_barrier
+        self._gen_write_barrier(newops, v_base, v_value)
+
     def can_inline_malloc(self, descr):
         assert isinstance(descr, BaseSizeDescr)
         if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -143,11 +143,11 @@
         STACK_CHECK_SLOWPATH = lltype.Ptr(lltype.FuncType([lltype.Signed],
                                                           lltype.Void))
         def insert_stack_check():
-            startaddr = rstack._stack_get_start_adr()
-            length = rstack._stack_get_length()
+            endaddr = rstack._stack_get_end_adr()
+            lengthaddr = rstack._stack_get_length_adr()
             f = llhelper(STACK_CHECK_SLOWPATH, rstack.stack_check_slowpath)
             slowpathaddr = rffi.cast(lltype.Signed, f)
-            return startaddr, length, slowpathaddr
+            return endaddr, lengthaddr, slowpathaddr
 
         self.pos_exception = pos_exception
         self.pos_exc_value = pos_exc_value
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -37,6 +37,11 @@
         self.frame_depth += size
         return newloc
 
+    def reserve_location_in_frame(self, size):
+        frame_depth = self.frame_depth
+        self.frame_depth += size
+        return frame_depth
+
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
@@ -213,6 +218,15 @@
         self.reg_bindings[v] = loc
         return loc
 
+    def force_spill_var(self, var):
+        self._sync_var(var)
+        try:
+            loc = self.reg_bindings[var]
+            del self.reg_bindings[var]
+            self.free_regs.append(loc)
+        except KeyError:
+            pass   # 'var' is already not in a register
+
     def loc(self, box):
         """ Return the location of 'box'.
         """
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -9,7 +9,7 @@
 from pypy.jit.metainterp.resoperation import get_deep_immutable_oplist
 from pypy.jit.tool.oparser import parse
 from pypy.rpython.lltypesystem.rclass import OBJECT, OBJECT_VTABLE
-from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
+from pypy.jit.metainterp.optimizeopt.util import equaloplists
 
 def test_boehm():
     gc_ll_descr = GcLLDescr_boehm(None, None, None)
@@ -49,19 +49,6 @@
 
 # ____________________________________________________________
 
-def test_GcRefList():
-    S = lltype.GcStruct('S')
-    order = range(50) * 4
-    random.shuffle(order)
-    allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
-              for i in range(50)]
-    allocs = [allocs[i] for i in order]
-    #
-    gcrefs = GcRefList()
-    gcrefs.initialize()
-    addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs]
-    for i in range(len(allocs)):
-        assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i])
 
 class TestGcRootMapAsmGcc:
 
@@ -288,6 +275,18 @@
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
 
+    _have_wb_from_array = False
+
+    def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+        self.record.append(('barrier_from_array', adr_struct, v_index))
+
+    def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+        if self._have_wb_from_array:
+            return llhelper(FPTRTYPE,
+                            self._write_barrier_from_array_failing_case)
+        else:
+            return lltype.nullptr(FPTRTYPE.TO)
+
 
 class TestFramework(object):
     gc = 'hybrid'
@@ -303,9 +302,20 @@
             config = config_
         class FakeCPU(object):
             def cast_adr_to_int(self, adr):
-                ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
-                assert ptr._obj._callable == llop1._write_barrier_failing_case
-                return 42
+                if not adr:
+                    return 0
+                try:
+                    ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_failing_case
+                    return 42
+                except lltype.InvalidCast:
+                    ptr = llmemory.cast_adr_to_ptr(
+                        adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_from_array_failing_case
+                    return 43
+
         gcdescr = get_description(config_)
         translator = FakeTranslator()
         llop1 = FakeLLOp()
@@ -414,11 +424,11 @@
             ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
             ]
         gc_ll_descr = self.gc_ll_descr
-        operations = gc_ll_descr.rewrite_assembler(None, operations)
+        operations = gc_ll_descr.rewrite_assembler(None, operations, [])
         assert len(operations) == 0
 
     def test_rewrite_assembler_1(self):
-        # check rewriting of ConstPtrs
+        # check recording of ConstPtrs
         class MyFakeCPU(object):
             def cast_adr_to_int(self, adr):
                 assert adr == "some fake address"
@@ -438,56 +448,12 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
+        gcrefs = []
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        assert len(operations) == 2
-        assert operations[0].getopnum() == rop.GETFIELD_RAW
-        assert operations[0].getarg(0) == ConstInt(43)
-        assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr
-        v_box = operations[0].result
-        assert isinstance(v_box, BoxPtr)
-        assert operations[1].getopnum() == rop.PTR_EQ
-        assert operations[1].getarg(0) == v_random_box
-        assert operations[1].getarg(1) == v_box
-        assert operations[1].result == v_result
-
-    def test_rewrite_assembler_1_cannot_move(self):
-        # check rewriting of ConstPtrs
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                xxx    # should not be called
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                seen.append(s_gcref1)
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        seen = []
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = BoxPtr()
-        v_result = BoxInt()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)],
-                         v_result),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        old_can_move = rgc.can_move
-        operations = get_deep_immutable_oplist(operations)
-        try:
-            rgc.can_move = lambda s: False
-            operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        finally:
-            rgc.can_move = old_can_move
-        assert len(operations) == 1
-        assert operations[0].getopnum() == rop.PTR_EQ
-        assert operations[0].getarg(0) == v_random_box
-        assert operations[0].getarg(1) == ConstPtr(s_gcref)
-        assert operations[0].result == v_result
-        # check that s_gcref gets added to the list anyway, to make sure
-        # that the GC sees it
-        assert seen == [s_gcref]
+        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
+                                                   gcrefs)
+        assert operations2 == operations
+        assert gcrefs == [s_gcref]
 
     def test_rewrite_assembler_2(self):
         # check write barriers before SETFIELD_GC
@@ -500,7 +466,8 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
+                                                   [])
         assert len(operations) == 2
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -515,29 +482,93 @@
 
     def test_rewrite_assembler_3(self):
         # check write barriers before SETARRAYITEM_GC
-        v_base = BoxPtr()
-        v_index = BoxInt()
-        v_value = BoxPtr()
-        array_descr = AbstractDescr()
-        operations = [
-            ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
-                         descr=array_descr),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
-        assert len(operations) == 2
-        #
-        assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-        assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
-        assert operations[0].result is None
-        #
-        assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-        assert operations[1].getarg(0) == v_base
-        assert operations[1].getarg(1) == v_index
-        assert operations[1].getarg(2) == v_value
-        assert operations[1].getdescr() == array_descr
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            assert operations[0].getarg(1) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
+
+    def test_rewrite_assembler_4(self):
+        # check write barriers before SETARRAYITEM_GC,
+        # if we have actually a write_barrier_from_array.
+        self.llop1._have_wb_from_array = True
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+                assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+                assert operations[0].getarg(0) == v_base
+                assert operations[0].getarg(1) == v_value
+            else:
+                assert operations[0].getopnum() == rop.COND_CALL_GC_WB_ARRAY
+                assert operations[0].getarg(0) == v_base
+                assert operations[0].getarg(1) == v_index
+                assert operations[0].getarg(2) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
 
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -558,7 +589,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_2(self):
@@ -583,7 +615,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_3(self):
@@ -602,7 +635,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
 class TestFrameworkMiniMark(TestFramework):
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -53,7 +53,7 @@
         """Called once by the front-end when the program stops."""
         pass
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """Assemble the given loop.
         Should create and attach a fresh CompiledLoopToken to
         looptoken.compiled_loop_token and stick extra attributes
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -23,6 +23,7 @@
 
 def constfloat(x):
     return ConstFloat(longlong.getfloatstorage(x))
+
 class FakeStats(object):
     pass
 class TestCallingConv(Runner):
@@ -30,56 +31,172 @@
     Ptr = lltype.Ptr
     FuncType = lltype.FuncType
 
-    def __init__(self):
-        self.cpu = getcpuclass()(rtyper=None, stats=FakeStats())
-        self.cpu.setup_once()
+    def setup_class(cls):
+        cls.cpu = getcpuclass()(rtyper=None, stats=FakeStats())
+        cls.cpu.setup_once()
+
+    def _prepare_args(self, args, floats, ints):
+        local_floats = list(floats)
+        local_ints = list(ints)
+        expected_result = 0.0
+        for i in range(len(args)):
+            x = args[i]
+            if x[0] == 'f':
+                x = local_floats.pop()
+                t = longlong.getfloatstorage(x)
+                self.cpu.set_future_value_float(i, t)
+            else:
+                x = local_ints.pop()
+                self.cpu.set_future_value_int(i, x)
+            expected_result += x
+        return expected_result
 
     @classmethod
     def get_funcbox(cls, cpu, func_ptr):
         addr = llmemory.cast_ptr_to_adr(func_ptr)
         return ConstInt(heaptracker.adr2int(addr))
 
+    def test_call_aligned_with_spilled_values(self):
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
+
+
+        def func(*args):
+            return float(sum(args))
+
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            local_floats = list(floats)
+            local_ints = list(ints)
+            args = []
+            spills = []
+            funcargs = []
+            float_count = 0
+            int_count = 0
+            for i in range(8):
+                if case & (1<<i):
+                    args.append('f%d' % float_count)
+                    spills.append('force_spill(f%d)' % float_count)
+                    float_count += 1
+                    funcargs.append(F)
+                else:
+                    args.append('i%d' % int_count)
+                    spills.append('force_spill(i%d)' % int_count)
+                    int_count += 1
+                    funcargs.append(I)
+
+            arguments = ', '.join(args)
+            spill_ops = '\n'.join(spills)
+
+            FUNC = self.FuncType(funcargs, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            ops = '[%s]\n' % arguments
+            ops += '%s\n' % spill_ops
+            ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
+            ops += 'finish(f99, %s)\n' % arguments
+
+            loop = parse(ops, namespace=locals())
+            looptoken = LoopToken()
+            done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            expected_result = self._prepare_args(args, floats, ints)
+
+            res = self.cpu.execute_token(looptoken)
+            x = longlong.getrealfloat(cpu.get_latest_value_float(0))
+            assert abs(x - expected_result) < 0.0001
+
+    def test_call_aligned_with_imm_values(self):
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
+
+
+        def func(*args):
+            return float(sum(args))
+
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(constfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(ConstInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
+
     def test_call_aligned_with_args_on_the_stack(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(boxfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(BoxInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(boxfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(BoxInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_alignment_call_assembler(self):
         from pypy.rlib.libffi import types
@@ -104,21 +221,6 @@
 
         floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
         ints = [7, 11, 23, 42, -42, 1111, 95, 1]
-        def _prepare_args(args):
-            local_floats = list(floats)
-            local_ints = list(ints)
-            expected_result = 0.0
-            for i in range(len(args)):
-                x = args[i]
-                if x[0] == 'f':
-                    x = local_floats.pop()
-                    t = longlong.getfloatstorage(x)
-                    cpu.set_future_value_float(i, t)
-                else:
-                    x = local_ints.pop()
-                    cpu.set_future_value_int(i, x)
-                expected_result += x
-            return expected_result
 
         for case in range(256):
             float_count = 0
@@ -152,7 +254,7 @@
             done_number = self.cpu.get_fail_descr_number(called_loop.operations[-1].getdescr())
             self.cpu.compile_loop(called_loop.inputargs, called_loop.operations, called_looptoken)
 
-            expected_result = _prepare_args(args)
+            expected_result = self._prepare_args(args, floats, ints)
             res = cpu.execute_token(called_looptoken)
             assert res.identifier == 3
             t = longlong.getrealfloat(cpu.get_latest_value_float(0))
@@ -181,7 +283,7 @@
                 self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
 
                 # prepare call to called_loop
-                _prepare_args(args)
+                self._prepare_args(args, floats, ints)
                 res = cpu.execute_token(othertoken)
                 x = longlong.getrealfloat(cpu.get_latest_value_float(0))
                 assert res.identifier == 4
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -560,23 +560,6 @@
                                          'int', descr=calldescr)
             assert res.value == func_ints(*args)
 
-    def test_call_to_c_function(self):
-        from pypy.rlib.libffi import CDLL, types, ArgChain
-        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
-        libc = CDLL(libc_name)
-        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
-        argchain = ArgChain().arg(ord('A'))
-        assert c_tolower.call(argchain, rffi.INT) == ord('a')
-
-        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
-        funcbox = ConstInt(heaptracker.adr2int(func_adr))
-        calldescr = self.cpu.calldescrof_dynamic([types.uchar], types.sint)
-        res = self.execute_operation(rop.CALL,
-                                     [funcbox, BoxInt(ord('A'))],
-                                     'int',
-                                     descr=calldescr)
-        assert res.value == ord('a')
-
     def test_call_with_const_floats(self):
         def func(f1, f2):
             return f1 + f2
@@ -1680,7 +1663,7 @@
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1699,12 +1682,49 @@
             s = lltype.malloc(S)
             s.tid = value
             sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            t = lltype.malloc(S)
+            tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t)
             del record[:]
             self.execute_operation(rop.COND_CALL_GC_WB,
-                                   [BoxPtr(sgcref), ConstInt(-2121)],
+                                   [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, -2121)]
+                assert record == [(s, t)]
+            else:
+                assert record == []
+
+    def test_cond_call_gc_wb_array(self):
+        def func_void(a, b, c):
+            record.append((a, b, c))
+        record = []
+        #
+        S = lltype.GcStruct('S', ('tid', lltype.Signed))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
+                             lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for cond in [False, True]:
+            value = random.randrange(-sys.maxint, sys.maxint)
+            if cond:
+                value |= 4096
+            else:
+                value &= ~4096
+            s = lltype.malloc(S)
+            s.tid = value
+            sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            del record[:]
+            self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
+                       [BoxPtr(sgcref), ConstInt(123), BoxPtr(sgcref)],
+                       'void', descr=WriteBarrierDescr())
+            if cond:
+                assert record == [(s, 123, s)]
             else:
                 assert record == []
 
@@ -1843,6 +1863,99 @@
         assert self.cpu.get_latest_value_int(2) == 10
         assert values == [1, 10]
 
+    def test_call_to_c_function(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
+        argchain = ArgChain().arg(ord('A'))
+        assert c_tolower.call(argchain, rffi.INT) == ord('a')
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.uchar], types.sint)
+        i1 = BoxInt()
+        i2 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i1], i2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([i1, i2])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, ord('G'))
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == ord('g')
+
+    def test_call_to_c_function_with_callback(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+        c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                            types_size_t, types.pointer],
+                                  types.void)
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        fn = llhelper(CALLBACK, callback)
+        S = lltype.Struct('S', ('x', rffi.INT), ('y', rffi.INT))
+        raw = lltype.malloc(S, flavor='raw')
+        argchain = ArgChain()
+        argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 4))
+        argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+        glob.lst = []
+        c_qsort.call(argchain, lltype.Void)
+        assert len(glob.lst) > 0
+        del glob.lst[:]
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_qsort.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.pointer, types_size_t,
+                                             types_size_t, types.pointer],
+                                            types.void)
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        i3 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i0, i1, i2, i3], None,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1, i2, i3], ops, looptoken)
+        self.cpu.set_future_value_int(0, rffi.cast(lltype.Signed, raw))
+        self.cpu.set_future_value_int(1, 2)
+        self.cpu.set_future_value_int(2, 4)
+        self.cpu.set_future_value_int(3, rffi.cast(lltype.Signed, fn))
+        assert glob.lst == []
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert len(glob.lst) > 0
+        lltype.free(raw, flavor='raw')
+
     def test_guard_not_invalidated(self):
         cpu = self.cpu
         i0 = BoxInt()
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -128,6 +128,8 @@
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
+        if gc_ll_descr.gcrootmap:
+            self._build_release_gil(gc_ll_descr.gcrootmap)
         debug_start('jit-backend-counts')
         self.set_debug(have_debug_prints())
         debug_stop('jit-backend-counts')
@@ -137,10 +139,11 @@
         self.current_clt = looptoken.compiled_loop_token
         self.pending_guard_tokens = []
         self.mc = codebuf.MachineCodeBlockWrapper()
-        if self.datablockwrapper is None:
-            allblocks = self.get_asmmemmgr_blocks(looptoken)
-            self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
-                                                            allblocks)
+        #assert self.datablockwrapper is None --- but obscure case
+        # possible, e.g. getting MemoryError and continuing
+        allblocks = self.get_asmmemmgr_blocks(looptoken)
+        self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
+                                                        allblocks)
 
     def teardown(self):
         self.pending_guard_tokens = None
@@ -305,7 +308,66 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
-    def assemble_loop(self, inputargs, operations, looptoken, log):
+    @staticmethod
+    def _release_gil_asmgcc(css):
+        # similar to trackgcroot.py:pypy_asm_stackwalk, first part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        next = asmgcroot.gcrootanchor.next
+        new.next = next
+        new.prev = asmgcroot.gcrootanchor
+        asmgcroot.gcrootanchor.next = new
+        next.prev = new
+        # and now release the GIL
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_asmgcc(css):
+        # first reacquire the GIL
+        after = rffi.aroundstate.after
+        if after:
+            after()
+        # similar to trackgcroot.py:pypy_asm_stackwalk, second part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        prev = old.prev
+        next = old.next
+        prev.next = next
+        next.prev = prev
+
+    @staticmethod
+    def _release_gil_shadowstack():
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_shadowstack():
+        after = rffi.aroundstate.after
+        if after:
+            after()
+
+    _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+    _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
+                                                  lltype.Void))
+
+    def _build_release_gil(self, gcrootmap):
+        if gcrootmap.is_shadow_stack:
+            releasegil_func = llhelper(self._NOARG_FUNC,
+                                       self._release_gil_shadowstack)
+            reacqgil_func = llhelper(self._NOARG_FUNC,
+                                     self._reacquire_gil_shadowstack)
+        else:
+            releasegil_func = llhelper(self._CLOSESTACK_FUNC,
+                                       self._release_gil_asmgcc)
+            reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
+                                     self._reacquire_gil_asmgcc)
+        self.releasegil_addr  = self.cpu.cast_ptr_to_int(releasegil_func)
+        self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
+
+    def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
         '''adds the following attributes to looptoken:
                _x86_loop_code       (an integer giving an address)
                _x86_bootstrap_code  (an integer giving an address)
@@ -321,6 +383,7 @@
         # for the duration of compiling one loop or a one bridge.
 
         clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
         looptoken.compiled_loop_token = clt
         if not we_are_translated():
             # Arguments should be unique
@@ -328,13 +391,13 @@
 
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken)
+        arglocs, operations = regalloc.prepare_loop(inputargs, operations,
+                                                    looptoken, clt.allgcrefs)
         looptoken._x86_arglocs = arglocs
 
         bootstrappos = self.mc.get_relative_pos()
@@ -354,7 +417,7 @@
         #
         rawstart = self.materialize_loop(looptoken)
         debug_print("Loop #%d (%s) has address %x to %x" % (
-            looptoken.number, funcname,
+            looptoken.number, loopname,
             rawstart + self.looppos,
             rawstart + directbootstrappos))
         self._patch_stackadjust(rawstart + stackadjustpos,
@@ -374,7 +437,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Loop # %s: %s" % (looptoken.number, funcname)
+            name = "Loop # %s: %s" % (looptoken.number, loopname)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -394,7 +457,6 @@
             return
 
         self.setup(original_loop_token)
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(faildescr, operations)
@@ -406,7 +468,8 @@
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
-                                             operations)
+                                             operations,
+                                             self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
         frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -416,8 +479,8 @@
         #
         rawstart = self.materialize_loop(original_loop_token)
 
-        debug_print("Bridge out of guard %d (%s) has address %x to %x" %
-                    (descr_number, funcname, rawstart, rawstart + codeendpos))
+        debug_print("Bridge out of guard %d has address %x to %x" %
+                    (descr_number, rawstart, rawstart + codeendpos))
         self._patch_stackadjust(rawstart + stackadjustpos,
                                 frame_depth + param_depth)
         self.patch_pending_failure_recoveries(rawstart)
@@ -431,7 +494,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Bridge # %s: %s" % (descr_number, funcname)
+            name = "Bridge # %s" % (descr_number,)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -491,17 +554,6 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _find_debug_merge_point(self, operations):
-
-        for op in operations:
-            if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                funcname = op.getarg(0)._get_str()
-                break
-        else:
-            funcname = "<loop %d>" % len(self.loop_run_counters)
-        # invent the counter, so we don't get too confused
-        return funcname
-
     def _register_counter(self):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
@@ -620,11 +672,11 @@
         if self.stack_check_slowpath == 0:
             pass                # no stack check (e.g. not translated)
         else:
-            startaddr, length, _ = self.cpu.insert_stack_check()
-            self.mc.MOV(eax, esp)                       # MOV eax, current
-            self.mc.SUB(eax, heap(startaddr))           # SUB eax, [startaddr]
-            self.mc.CMP(eax, imm(length))               # CMP eax, length
-            self.mc.J_il8(rx86.Conditions['B'], 0)      # JB .skip
+            endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
+            self.mc.MOV(eax, heap(endaddr))             # MOV eax, [start]
+            self.mc.SUB(eax, esp)                       # SUB eax, current
+            self.mc.CMP(eax, heap(lengthaddr))          # CMP eax, [length]
+            self.mc.J_il8(rx86.Conditions['BE'], 0)     # JBE .skip
             jb_location = self.mc.get_relative_pos()
             self.mc.CALL(imm(self.stack_check_slowpath))# CALL slowpath
             # patch the JB above                        # .skip:
@@ -651,22 +703,28 @@
         # we need to put two words into the shadowstack: the MARKER
         # and the address of the frame (ebp, actually)
         rst = gcrootmap.get_root_stack_top_addr()
-        assert rx86.fits_in_32bits(rst)
-        if IS_X86_64:
-            # cannot use rdx here, it's used to pass arguments!
-            tmp = X86_64_SCRATCH_REG
+        if rx86.fits_in_32bits(rst):
+            self.mc.MOV_rj(eax.value, rst)            # MOV eax, [rootstacktop]
         else:
-            tmp = edx
-        self.mc.MOV_rj(eax.value, rst)                # MOV eax, [rootstacktop]
-        self.mc.LEA_rm(tmp.value, (eax.value, 2*WORD))  # LEA edx, [eax+2*WORD]
+            self.mc.MOV_ri(r13.value, rst)            # MOV r13, rootstacktop
+            self.mc.MOV_rm(eax.value, (r13.value, 0)) # MOV eax, [r13]
+        #
+        self.mc.LEA_rm(ebx.value, (eax.value, 2*WORD))  # LEA ebx, [eax+2*WORD]
         self.mc.MOV_mi((eax.value, 0), gcrootmap.MARKER)    # MOV [eax], MARKER
         self.mc.MOV_mr((eax.value, WORD), ebp.value)      # MOV [eax+WORD], ebp
-        self.mc.MOV_jr(rst, tmp.value)                # MOV [rootstacktop], edx
+        #
+        if rx86.fits_in_32bits(rst):
+            self.mc.MOV_jr(rst, ebx.value)            # MOV [rootstacktop], ebx
+        else:
+            self.mc.MOV_mr((r13.value, 0), ebx.value) # MOV [r13], ebx
 
     def _call_footer_shadowstack(self, gcrootmap):
         rst = gcrootmap.get_root_stack_top_addr()
-        assert rx86.fits_in_32bits(rst)
-        self.mc.SUB_ji8(rst, 2*WORD)       # SUB [rootstacktop], 2*WORD
+        if rx86.fits_in_32bits(rst):
+            self.mc.SUB_ji8(rst, 2*WORD)       # SUB [rootstacktop], 2*WORD
+        else:
+            self.mc.MOV_ri(ebx.value, rst)           # MOV ebx, rootstacktop
+            self.mc.SUB_mi8((ebx.value, 0), 2*WORD)  # SUB [ebx], 2*WORD
 
     def _assemble_bootstrap_direct_call(self, arglocs, jmppos, stackdepth):
         if IS_X86_64:
@@ -837,7 +895,7 @@
 
     def regalloc_push(self, loc):
         if isinstance(loc, RegLoc) and loc.is_xmm:
-            self.mc.SUB_ri(esp.value, 2*WORD)
+            self.mc.SUB_ri(esp.value, 8)   # = size of doubles
             self.mc.MOVSD_sx(0, loc.value)
         elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
             # XXX evil trick
@@ -849,7 +907,7 @@
     def regalloc_pop(self, loc):
         if isinstance(loc, RegLoc) and loc.is_xmm:
             self.mc.MOVSD_xs(loc.value, 0)
-            self.mc.ADD_ri(esp.value, 2*WORD)
+            self.mc.ADD_ri(esp.value, 8)   # = size of doubles
         elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
             # XXX evil trick
             self.mc.POP_b(get_ebp_ofs(loc.position + 1))
@@ -1101,6 +1159,8 @@
             self.mc.MOV_bi(FORCE_INDEX_OFS, force_index)
             return force_index
         else:
+            # the return value is ignored, apart from the fact that it
+            # is not negative.
             return 0
 
     genop_int_neg = _unaryop("NEG")
@@ -1984,6 +2044,102 @@
         self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
         self.implement_guard(guard_token, 'L')
 
+    def genop_guard_call_release_gil(self, op, guard_op, guard_token,
+                                     arglocs, result_loc):
+        # first, close the stack in the sense of the asmgcc GC root tracker
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap:
+            self.call_release_gil(gcrootmap, arglocs)
+        # do the call
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
+        self._genop_call(op, arglocs, result_loc, fail_index)
+        # then reopen the stack
+        if gcrootmap:
+            self.call_reacquire_gil(gcrootmap, result_loc)
+        # finally, the guard_not_forced
+        self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
+        self.implement_guard(guard_token, 'L')
+
+    def call_release_gil(self, gcrootmap, save_registers):
+        # First, we need to save away the registers listed in
+        # 'save_registers' that are not callee-save.  XXX We assume that
+        # the XMM registers won't be modified.  We store them in
+        # [ESP+4], [ESP+8], etc., leaving enough room in [ESP] for the
+        # single argument to closestack_addr below.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_sr(p, reg.value)
+                p += WORD
+        self._regalloc.reserve_param(p//WORD)
+        #
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            # note that regalloc.py used save_all_regs=True to save all
+            # registers, so we don't have to care about saving them (other
+            # than ebp) in the close_stack_struct.  But if they are registers
+            # like %eax that would be destroyed by this call, *and* they are
+            # used by arglocs for the *next* call, then trouble; for now we
+            # will just push/pop them.
+            from pypy.rpython.memory.gctransform import asmgcroot
+            css = self._regalloc.close_stack_struct
+            if css == 0:
+                use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
+                                     asmgcroot.FRAME_PTR) + 1)
+                pos = self._regalloc.fm.reserve_location_in_frame(use_words)
+                css = get_ebp_ofs(pos + use_words - 1)
+                self._regalloc.close_stack_struct = css
+            # The location where the future CALL will put its return address
+            # will be [ESP-WORD], so save that as the next frame's top address
+            self.mc.LEA_rs(eax.value, -WORD)        # LEA EAX, [ESP-4]
+            frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
+            self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
+            # Save ebp
+            index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
+            self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
+            # Call the closestack() function (also releasing the GIL)
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        #
+        self._emit_call(-1, imm(self.releasegil_addr), args)
+        # Finally, restore the registers saved above.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_rs(reg.value, p)
+                p += WORD
+
+    def call_reacquire_gil(self, gcrootmap, save_loc):
+        # save the previous result (eax/xmm0) into the stack temporarily.
+        # XXX like with call_release_gil(), we assume that we don't need
+        # to save xmm0 in this case.
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_sr(WORD, save_loc.value)
+            self._regalloc.reserve_param(2)
+        # call the reopenstack() function (also reacquiring the GIL)
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            css = self._regalloc.close_stack_struct
+            assert css != 0
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        self._emit_call(-1, imm(self.reacqgil_addr), args)
+        # restore the result from the stack
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_rs(save_loc.value, WORD)
+
     def genop_guard_call_assembler(self, op, guard_op, guard_token,
                                    arglocs, result_loc):
         faildescr = guard_op.getdescr()
@@ -2073,13 +2229,26 @@
     def genop_discard_cond_call_gc_wb(self, op, arglocs):
         # Write code equivalent to write_barrier() in the GC: it checks
         # a flag in the object at arglocs[0], and if set, it calls the
-        # function remember_young_pointer() from the GC.  The two arguments
-        # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
+        # function remember_young_pointer() from the GC.  The arguments
+        # to the call are in arglocs[:N].  The rest, arglocs[N:], contains
         # registers that need to be saved and restored across the call.
+        # N is either 2 (regular write barrier) or 3 (array write barrier).
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
+        #
+        opnum = op.getopnum()
+        if opnum == rop.COND_CALL_GC_WB:
+            N = 2
+            func = descr.get_write_barrier_fn(self.cpu)
+        elif opnum == rop.COND_CALL_GC_WB_ARRAY:
+            N = 3
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            assert func != 0
+        else:
+            raise AssertionError(opnum)
+        #
         loc_base = arglocs[0]
         self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
                       imm(descr.jit_wb_if_flag_singlebyte))
@@ -2090,33 +2259,37 @@
         if IS_X86_32:
             limit = -1      # push all arglocs on the stack
         elif IS_X86_64:
-            limit = 1       # push only arglocs[2:] on the stack
+            limit = N - 1   # push only arglocs[N:] on the stack
         for i in range(len(arglocs)-1, limit, -1):
             loc = arglocs[i]
             if isinstance(loc, RegLoc):
                 self.mc.PUSH_r(loc.value)
             else:
-                assert not IS_X86_64 # there should only be regs in arglocs[2:]
+                assert not IS_X86_64 # there should only be regs in arglocs[N:]
                 self.mc.PUSH_i32(loc.getint())
         if IS_X86_64:
             # We clobber these registers to pass the arguments, but that's
             # okay, because consider_cond_call_gc_wb makes sure that any
             # caller-save registers with values in them are present in
-            # arglocs[2:] too, so they are saved on the stack above and
+            # arglocs[N:] too, so they are saved on the stack above and
             # restored below.
-            remap_frame_layout(self, arglocs[:2], [edi, esi],
+            if N == 2:
+                callargs = [edi, esi]
+            else:
+                callargs = [edi, esi, edx]
+            remap_frame_layout(self, arglocs[:N], callargs,
                                X86_64_SCRATCH_REG)
-
+        #
         # misaligned stack in the call, but it's ok because the write barrier
         # is not going to call anything more.  Also, this assumes that the
         # write barrier does not touch the xmm registers.  (Slightly delicate
         # assumption, given that the write barrier can end up calling the
         # platform's malloc() from AddressStack.append().  XXX may need to
         # be done properly)
-        self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+        self.mc.CALL(imm(func))
         if IS_X86_32:
-            self.mc.ADD_ri(esp.value, 2*WORD)
-        for i in range(2, len(arglocs)):
+            self.mc.ADD_ri(esp.value, N*WORD)
+        for i in range(N, len(arglocs)):
             loc = arglocs[i]
             assert isinstance(loc, RegLoc)
             self.mc.POP_r(loc.value)
@@ -2125,6 +2298,8 @@
         assert 0 < offset <= 127
         self.mc.overwrite(jz_location-1, chr(offset))
 
+    genop_discard_cond_call_gc_wb_array = genop_discard_cond_call_gc_wb
+
     def genop_force_token(self, op, arglocs, resloc):
         # RegAlloc.consider_force_token ensures this:
         assert isinstance(resloc, RegLoc)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -156,12 +156,14 @@
         self.translate_support_code = translate_support_code
         # to be read/used by the assembler too
         self.jump_target_descr = None
+        self.close_stack_struct = 0
 
-    def _prepare(self, inputargs, operations):
+    def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
         self.param_depth = 0
         cpu = self.assembler.cpu
-        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+                                                       allgcrefs)
         # compute longevity of variables
         longevity = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
@@ -172,15 +174,16 @@
                                    assembler = self.assembler)
         return operations
 
-    def prepare_loop(self, inputargs, operations, looptoken):
-        operations = self._prepare(inputargs, operations)
+    def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         jump = operations[-1]
         loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
         self.loop_consts = loop_consts
         return self._process_inputargs(inputargs), operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
-        operations = self._prepare(inputargs, operations)
+    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
+                       allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
@@ -268,6 +271,12 @@
             return self.rm.force_allocate_reg(var, forbidden_vars,
                                               selected_reg, need_lower_byte)
 
+    def force_spill_var(self, var):
+        if var.type == FLOAT:
+            return self.xrm.force_spill_var(var)
+        else:
+            return self.rm.force_spill_var(var)
+
     def load_xmm_aligned_16_bytes(self, var, forbidden_vars=[]):
         # Load 'var' in a register; but if it is a constant, we can return
         # a 16-bytes-aligned ConstFloatLoc.
@@ -382,7 +391,9 @@
         self.assembler.regalloc_perform_discard(op, arglocs)
 
     def can_merge_with_next_guard(self, op, i, operations):
-        if op.getopnum() == rop.CALL_MAY_FORCE or op.getopnum() == rop.CALL_ASSEMBLER:
+        if (op.getopnum() == rop.CALL_MAY_FORCE or
+            op.getopnum() == rop.CALL_ASSEMBLER or
+            op.getopnum() == rop.CALL_RELEASE_GIL):
             assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
             return True
         if not op.is_comparison():
@@ -418,6 +429,8 @@
             if self.can_merge_with_next_guard(op, i, operations):
                 oplist_with_guard[op.getopnum()](self, op, operations[i + 1])
                 i += 1
+            elif not we_are_translated() and op.getopnum() == -124: 
+                self._consider_force_spill(op)
             else:
                 oplist[op.getopnum()](self, op)
             if op.result is not None:
@@ -771,6 +784,19 @@
         self.xrm.possibly_free_var(op.getarg(1))
 
     def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+        # we need to save registers on the stack:
+        #
+        #  - at least the non-callee-saved registers
+        #
+        #  - for shadowstack, we assume that any call can collect, and we
+        #    save also the callee-saved registers that contain GC pointers,
+        #    so that they can be found by follow_stack_frame_of_assembler()
+        #
+        #  - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
+        #    anyway, in case we need to do cpu.force().  The issue is that
+        #    grab_frame_values() would not be able to locate values in
+        #    callee-saved registers.
+        #
         save_all_regs = guard_not_forced_op is not None
         self.xrm.before_call(force_store, save_all_regs=save_all_regs)
         if not save_all_regs:
@@ -837,6 +863,8 @@
         assert guard_op is not None
         self._consider_call(op, guard_op)
 
+    consider_call_release_gil = consider_call_may_force
+
     def consider_call_assembler(self, op, guard_op):
         descr = op.getdescr()
         assert isinstance(descr, LoopToken)
@@ -856,12 +884,12 @@
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
-        loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
-        # ^^^ we force loc_newvalue in a reg (unless it's a Const),
-        # because it will be needed anyway by the following setfield_gc.
-        # It avoids loading it twice from the memory.
-        loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        arglocs = [loc_base, loc_newvalue]
+        N = len(args)
+        # we force all arguments in a reg (unless they are Consts),
+        # because it will be needed anyway by the following setfield_gc
+        # or setarrayitem_gc. It avoids loading it twice from the memory.
+        arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
+                   for i in range(N)]
         # add eax, ecx and edx as extra "arguments" to ensure they are
         # saved and restored.  Fish in self.rm to know which of these
         # registers really need to be saved (a bit of a hack).  Moreover,
@@ -875,6 +903,8 @@
         self.PerformDiscard(op, arglocs)
         self.rm.possibly_free_vars_for_op(op)
 
+    consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
+
     def fastpath_malloc_fixedsize(self, op, descr):
         assert isinstance(descr, BaseSizeDescr)
         self._do_fastpath_malloc(op, descr.size, descr.tid)
@@ -1293,6 +1323,10 @@
     def consider_jit_debug(self, op):
         pass
 
+    def _consider_force_spill(self, op):
+        # This operation is used only for testing
+        self.force_spill_var(op.getarg(0))
+
     def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
         shape = gcrootmap.get_basic_shape(IS_X86_64)
         for v, val in self.fm.frame_bindings.items():
@@ -1346,7 +1380,9 @@
         name = name[len('consider_'):]
         num = getattr(rop, name.upper())
         if (is_comparison_or_ovf_op(num)
-            or num == rop.CALL_MAY_FORCE or num == rop.CALL_ASSEMBLER):
+            or num == rop.CALL_MAY_FORCE
+            or num == rop.CALL_ASSEMBLER
+            or num == rop.CALL_RELEASE_GIL):
             oplist_with_guard[num] = value
             oplist[num] = add_none_argument(value)
         else:
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -318,7 +318,9 @@
             # must be careful not to combine it with location types that
             # might need to use the scratch register themselves.
             if loc2 is X86_64_SCRATCH_REG:
-                assert code1 != 'j'
+                if code1 == 'j':
+                    assert (name.startswith("MOV") and
+                            rx86.fits_in_32bits(loc1.value_j()))
             if loc1 is X86_64_SCRATCH_REG and not name.startswith("MOV"):
                 assert code2 not in ('j', 'i')
 
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -22,6 +22,7 @@
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
+    with_threads = False
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
@@ -38,6 +39,7 @@
                 if not oprofile.OPROFILE_AVAILABLE:
                     log.WARNING('oprofile support was explicitly enabled, but oprofile headers seem not to be available')
                 profile_agent = oprofile.OProfileAgent()
+            self.with_threads = config.translation.thread
 
         self.profile_agent = profile_agent
 
@@ -77,9 +79,9 @@
         lines = machine_code_dump(data, addr, self.backend_name, label_list)
         print ''.join(lines)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
-        return self.assembler.assemble_loop(inputargs, operations, looptoken,
-                                            log=log)
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
+        return self.assembler.assemble_loop(name, inputargs, operations,
+                                            looptoken, log=log)
 
     def compile_bridge(self, faildescr, inputargs, operations,
                        original_loop_token, log=True):
@@ -122,8 +124,8 @@
         addr = executable_token._x86_bootstrap_code
         #llop.debug_print(lltype.Void, ">>>> Entering", addr)
         func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr)
+        fail_index = self._execute_call(func)
         #llop.debug_print(lltype.Void, "<<<< Back")
-        fail_index = self._execute_call(func)
         return self.get_fail_descr_from_number(fail_index)
 
     def _execute_call(self, func):
@@ -140,10 +142,11 @@
                 LLInterpreter.current_interpreter = prev_interpreter
         return res
 
-    @staticmethod
     def cast_ptr_to_int(x):
         adr = llmemory.cast_ptr_to_adr(x)
         return CPU386.cast_adr_to_int(adr)
+    cast_ptr_to_int._annspecialcase_ = 'specialize:arglltype(0)'
+    cast_ptr_to_int = staticmethod(cast_ptr_to_int)
 
     all_null_registers = lltype.malloc(rffi.LONGP.TO, 24,
                                        flavor='raw', zero=True,
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -283,7 +283,7 @@
 # with immediate(argnum)).
 
 def encode_abs(mc, _1, _2, orbyte):
-    # expands to either '\x05' on 32-bit, or '\x04\x25' or 64-bit
+    # expands to either '\x05' on 32-bit, or '\x04\x25' on 64-bit
     if mc.WORD == 8:
         mc.writechar(chr(0x04 | orbyte))
         mc.writechar(chr(0x25))
@@ -370,6 +370,8 @@
     INSN_rj = insn(rex_w, chr(base+3), register(1,8), abs_, immediate(2))
     INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_, immediate(1),
                     immediate(2,'b'))
+    INSN_mi8 = insn(rex_w, '\x83', orbyte(base), mem_reg_plus_const(1),
+                    immediate(2,'b'))
     INSN_bi8 = insn(rex_w, '\x83', orbyte(base), stack_bp(1), immediate(2,'b'))
     INSN_bi32= insn(rex_w, '\x81', orbyte(base), stack_bp(1), immediate(2))
 
@@ -388,7 +390,7 @@
     INSN_bi._always_inline_ = True      # try to constant-fold single_byte()
 
     return (INSN_ri, INSN_rr, INSN_rb, INSN_bi, INSN_br, INSN_rm, INSN_rj,
-            INSN_ji8)
+            INSN_ji8, INSN_mi8)
 
 def select_8_or_32_bit_immed(insn_8, insn_32):
     def INSN(*args):
@@ -467,13 +469,13 @@
 
     # ------------------------------ Arithmetic ------------------------------
 
-    ADD_ri, ADD_rr, ADD_rb, _, _, ADD_rm, ADD_rj, _ = common_modes(0)
-    OR_ri,  OR_rr,  OR_rb,  _, _, OR_rm,  OR_rj,  _ = common_modes(1)
-    AND_ri, AND_rr, AND_rb, _, _, AND_rm, AND_rj, _ = common_modes(4)
-    SUB_ri, SUB_rr, SUB_rb, _, _, SUB_rm, SUB_rj, SUB_ji8 = common_modes(5)
-    SBB_ri, SBB_rr, SBB_rb, _, _, SBB_rm, SBB_rj, _ = common_modes(3)
-    XOR_ri, XOR_rr, XOR_rb, _, _, XOR_rm, XOR_rj, _ = common_modes(6)
-    CMP_ri, CMP_rr, CMP_rb, CMP_bi, CMP_br, CMP_rm, CMP_rj, _ = common_modes(7)
+    ADD_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_ = common_modes(0)
+    OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_ = common_modes(1)
+    AND_ri,AND_rr,AND_rb,_,_,AND_rm,AND_rj,_,_ = common_modes(4)
+    SUB_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8 = common_modes(5)
+    SBB_ri,SBB_rr,SBB_rb,_,_,SBB_rm,SBB_rj,_,_ = common_modes(3)
+    XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_ = common_modes(6)
+    CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_ = common_modes(7)
 
     CMP_mi8 = insn(rex_w, '\x83', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'b'))
     CMP_mi32 = insn(rex_w, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
@@ -530,6 +532,7 @@
     POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
 
     LEA_rb = insn(rex_w, '\x8D', register(1,8), stack_bp(2))
+    LEA_rs = insn(rex_w, '\x8D', register(1,8), stack_sp(2))
     LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
     LEA_ra = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_scaled_reg_plus_const(2))
     LEA_rm = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_const(2))
diff --git a/pypy/jit/backend/x86/test/test_assembler.py b/pypy/jit/backend/x86/test/test_assembler.py
--- a/pypy/jit/backend/x86/test/test_assembler.py
+++ b/pypy/jit/backend/x86/test/test_assembler.py
@@ -1,13 +1,15 @@
 from pypy.jit.backend.x86.regloc import *
 from pypy.jit.backend.x86.assembler import Assembler386
 from pypy.jit.backend.x86.regalloc import X86FrameManager, get_ebp_ofs
-from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, INT, REF, FLOAT
+from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, ConstFloat
+from pypy.jit.metainterp.history import INT, REF, FLOAT
 from pypy.rlib.rarithmetic import intmask
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.jit.backend.x86.arch import WORD, IS_X86_32, IS_X86_64
 from pypy.jit.backend.detect_cpu import getcpuclass 
 from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86_64_RegisterManager, X86XMMRegisterManager, X86_64_XMMRegisterManager
 from pypy.jit.codewriter import longlong
+import ctypes
 
 ACTUAL_CPU = getcpuclass()
 
@@ -238,3 +240,103 @@
         assert assembler.fail_boxes_int.getitem(i) == expected_ints[i]
         assert assembler.fail_boxes_ptr.getitem(i) == expected_ptrs[i]
         assert assembler.fail_boxes_float.getitem(i) == expected_floats[i]
+
+# ____________________________________________________________
+
+class TestRegallocPushPop(object):
+
+    def do_test(self, callback):
+        from pypy.jit.backend.x86.regalloc import X86FrameManager
+        from pypy.jit.backend.x86.regalloc import X86XMMRegisterManager
+        class FakeToken:
+            class compiled_loop_token:
+                asmmemmgr_blocks = None
+        cpu = ACTUAL_CPU(None, None)
+        cpu.setup()
+        looptoken = FakeToken()
+        asm = cpu.assembler
+        asm.setup_once()
+        asm.setup(looptoken)
+        self.fm = X86FrameManager()
+        self.xrm = X86XMMRegisterManager(None, frame_manager=self.fm,
+                                         assembler=asm)
+        callback(asm)
+        asm.mc.RET()
+        rawstart = asm.materialize_loop(looptoken)
+        #
+        F = ctypes.CFUNCTYPE(ctypes.c_long)
+        fn = ctypes.cast(rawstart, F)
+        res = fn()
+        return res
+
+    def test_simple(self):
+        def callback(asm):
+            asm.mov(imm(42), edx)
+            asm.regalloc_push(edx)
+            asm.regalloc_pop(eax)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_push_stack(self):
+        def callback(asm):
+            loc = self.fm.frame_pos(5, INT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(imm(42), loc)
+            asm.regalloc_push(loc)
+            asm.regalloc_pop(eax)
+            asm.mc.ADD_ri(esp.value, 64)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_pop_stack(self):
+        def callback(asm):
+            loc = self.fm.frame_pos(5, INT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(imm(42), edx)
+            asm.regalloc_push(edx)
+            asm.regalloc_pop(loc)
+            asm.mov(loc, eax)
+            asm.mc.ADD_ri(esp.value, 64)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_simple_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            asm.mov(loc, xmm5)
+            asm.regalloc_push(xmm5)
+            asm.regalloc_pop(xmm0)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
+
+    def test_push_stack_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            loc2 = self.fm.frame_pos(4, FLOAT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(loc, xmm5)
+            asm.mov(xmm5, loc2)
+            asm.regalloc_push(loc2)
+            asm.regalloc_pop(xmm0)
+            asm.mc.ADD_ri(esp.value, 64)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
+
+    def test_pop_stack_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            loc2 = self.fm.frame_pos(4, FLOAT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(loc, xmm5)
+            asm.regalloc_push(xmm5)
+            asm.regalloc_pop(loc2)
+            asm.mov(loc2, xmm0)
+            asm.mc.ADD_ri(esp.value, 64)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -16,7 +16,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -51,11 +51,9 @@
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
-        self.gcrefs = GcRefList()
-        self.gcrefs.initialize()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
+        pass
 
-    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
+    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -6,6 +6,7 @@
                                          ConstPtr, Box, BoxFloat, BasicFailDescr)
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.arch import WORD
+from pypy.jit.backend.x86.rx86 import fits_in_32bits
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp.executor import execute
@@ -241,6 +242,23 @@
         c = self.execute_operation(rop.GETFIELD_GC, [res], 'int', ofsc3)
         assert c.value == 3
 
+    def test_bug_setfield_64bit(self):
+        if WORD == 4:
+            py.test.skip("only for 64 bits")
+        TP = lltype.GcStruct('S', ('i', lltype.Signed))
+        ofsi = self.cpu.fielddescrof(TP, 'i')
+        for i in range(500):
+            p = lltype.malloc(TP)
+            addr = rffi.cast(lltype.Signed, p)
+            if fits_in_32bits(addr):
+                break    # fitting in 32 bits, good
+        else:
+            py.test.skip("cannot get a 32-bit pointer")
+        res = ConstPtr(rffi.cast(llmemory.GCREF, addr))
+        self.execute_operation(rop.SETFIELD_RAW, [res, ConstInt(3**33)],
+                               'void', ofsi)
+        assert p.i == 3**33
+
     def test_nullity_with_guard(self):
         allops = [rop.INT_IS_TRUE]
         guards = [rop.GUARD_TRUE, rop.GUARD_FALSE]
@@ -330,6 +348,7 @@
                         assert result != expected
 
     def test_compile_bridge_check_profile_info(self):
+        py.test.skip("does not work, reinvestigate")
         class FakeProfileAgent(object):
             def __init__(self):
                 self.functions = []
@@ -362,7 +381,7 @@
         operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
-        assert name == "Loop # 17: hello"
+        assert name == "Loop # 17: hello (loop counter 0)"
         assert loopaddress <= looptoken._x86_loop_code
         assert loopsize >= 40 # randomish number
 
@@ -378,7 +397,7 @@
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
         name, address, size = agent.functions[1]
-        assert name == "Bridge # 0: bye"
+        assert name == "Bridge # 0: bye (loop counter 1)"
         # Would be exactly ==, but there are some guard failure recovery
         # stubs in-between
         assert address >= loopaddress + loopsize
diff --git a/pypy/jit/backend/x86/test/test_rx86.py b/pypy/jit/backend/x86/test/test_rx86.py
--- a/pypy/jit/backend/x86/test/test_rx86.py
+++ b/pypy/jit/backend/x86/test/test_rx86.py
@@ -185,6 +185,13 @@
     cb = CodeBuilder32
     assert_encodes_as(cb, 'PUSH_i32', (9,), '\x68\x09\x00\x00\x00')
 
+def test_sub_ji8():
+    cb = CodeBuilder32
+    assert_encodes_as(cb, 'SUB_ji8', (11223344, 55),
+                      '\x83\x2D\x30\x41\xAB\x00\x37')
+    assert_encodes_as(cb, 'SUB_mi8', ((edx, 16), 55),
+                      '\x83\x6A\x10\x37')
+
 class CodeBuilder64(CodeBuilderMixin, X86_64_CodeBuilder):
     pass
 
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
 """
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
 """
 
 import weakref
@@ -10,16 +9,11 @@
 from pypy.annotation import policy as annpolicy
 from pypy.rlib import rgc
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib.jit import JitDriver, dont_look_inside
-from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
+from pypy.rlib.jit import elidable, unroll_safe
 from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
 from pypy.config.translationoption import DEFL_GC
-import py.test
 
 class X(object):
     def __init__(self, x=0):
@@ -86,7 +80,7 @@
     #
     return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
 
-def compile(f, gc, **kwds):
+def compile(f, gc, enable_opts='', **kwds):
     from pypy.annotation.listdef import s_list_of_strings
     from pypy.translator.translator import TranslationContext
     from pypy.jit.metainterp.warmspot import apply_jit
@@ -110,14 +104,14 @@
                 old_value[obj, attr] = getattr(obj, attr)
                 setattr(obj, attr, value)
             #
-            apply_jit(t, enable_opts='')
+            apply_jit(t, enable_opts=enable_opts)
             #
         finally:
             for (obj, attr), oldvalue in old_value.items():
                 setattr(obj, attr, oldvalue)
 
     cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
+    cbuilder.generate_source(defines=cbuilder.DEBUG_DEFINES)
     cbuilder.compile()
     return cbuilder
 
@@ -154,8 +148,10 @@
 
 # ______________________________________________________________________
 
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
+
+class BaseFrameworkTests(object):
+    compile_kwds = {}
+
     def setup_class(cls):
         funcs = []
         name_to_func = {}
@@ -205,7 +201,8 @@
         try:
             GcLLDescr_framework.DEBUG = True
             cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
+                                   gcrootfinder=cls.gcrootfinder, jit=True,
+                                   **cls.compile_kwds)
         finally:
             GcLLDescr_framework.DEBUG = OLD_DEBUG
 
@@ -224,32 +221,36 @@
     def run_orig(self, name, n, x):
         self.main_allfuncs(name, n, x)
 
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
-        #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
+
+class CompileFrameworkTests(BaseFrameworkTests):
+    # Test suite using (so far) the minimark GC.
+
+##    def define_libffi_workaround(cls):
+##        # XXX: this is a workaround for a bug in database.py.  It seems that
+##        # the problem is triggered by optimizeopt/fficall.py, and in
+##        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
+##        # these tests, that line is the only place where libffi.Func is
+##        # referenced.
+##        #
+##        # The problem occurs because the gctransformer tries to annotate a
+##        # low-level helper to call the __del__ of libffi.Func when it's too
+##        # late.
+##        #
+##        # This workaround works by forcing the annotator (and all the rest of
+##        # the toolchain) to see libffi.Func in a "proper" context, not just as
+##        # the target of cast_base_ptr_to_instance.  Note that the function
+##        # below is *never* called by any actual test, it's just annotated.
+##        #
+##        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
+##        libc_name = get_libc_name()
+##        def f(n, x, *args):
+##            libc = CDLL(libc_name)
+##            ptr = libc.getpointer('labs', [types.slong], types.slong)
+##            chain = ArgChain()
+##            chain.arg(n)
+##            n = ptr.call(chain, lltype.Signed)
+##            return (n, x) + args
+##        return None, f, None
 
     def define_compile_framework_1(cls):
         # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
@@ -456,6 +457,73 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_8(cls):
+        # Array of pointers, of unknown length (test write_barrier_from_array)
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                l = [None] * (16 + (n & 7))
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[8] = X(n+70)
+                l[9] = X(n+80)
+                l[10] = X(n+90)
+                l[11] = X(n+100)
+                l[12] = X(n+110)
+                l[13] = X(n+120)
+                l[14] = X(n+130)
+                l[15] = X(n+140)
+            if n < 1800:
+                check(len(l) == 16 + (n & 7))
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[8].x == n+70)
+                check(l[9].x == n+80)
+                check(l[10].x == n+90)
+                check(l[11].x == n+100)
+                check(l[12].x == n+110)
+                check(l[13].x == n+120)
+                check(l[14].x == n+130)
+                check(l[15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 16)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[8].x == 72)
+            check(l[9].x == 82)
+            check(l[10].x == 92)
+            check(l[11].x == 102)
+            check(l[12].x == 112)
+            check(l[13].x == 122)
+            check(l[14].x == 132)
+            check(l[15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_8(self):
+        self.run('compile_framework_8')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
@@ -493,7 +561,7 @@
         self.run('compile_framework_external_exception_handling')
 
     def define_compile_framework_bug1(self):
-        @purefunction
+        @elidable
         def nonmoving():
             x = X(1)
             for i in range(7):
@@ -525,8 +593,8 @@
         glob = A()
         def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
             a = A()
-            glob.v = virtual_ref(a)
-            virtual_ref_finish(a)
+            glob.v = vref = virtual_ref(a)
+            virtual_ref_finish(vref, a)
             n -= 1
             return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
         return None, f, None
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
copy from pypy/jit/backend/x86/test/test_zrpy_gc.py
copy to pypy/jit/backend/x86/test/test_zrpy_releasegil.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
@@ -1,618 +1,110 @@
-"""
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
-"""
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rlib.jit import dont_look_inside
+from pypy.jit.metainterp.optimizeopt import ALL_OPTS_NAMES
 
-import weakref
-import py, os
-from pypy.annotation import policy as annpolicy
-from pypy.rlib import rgc
-from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rlib.jit import JitDriver, dont_look_inside
-from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
-from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
-from pypy.config.translationoption import DEFL_GC
-import py.test
+from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+from pypy.rpython.annlowlevel import llhelper
 
-class X(object):
-    def __init__(self, x=0):
-        self.x = x
+from pypy.jit.backend.x86.test.test_zrpy_gc import BaseFrameworkTests
+from pypy.jit.backend.x86.test.test_zrpy_gc import check
 
-    next = None
 
-class CheckError(Exception):
-    pass
+class ReleaseGILTests(BaseFrameworkTests):
+    compile_kwds = dict(enable_opts=ALL_OPTS_NAMES, thread=True)
 
-def check(flag):
-    if not flag:
-        raise CheckError
-
-def get_g(main):
-    main._dont_inline_ = True
-    def g(name, n):
-        x = X()
-        x.foo = 2
-        main(n, x)
-        x.foo = 5
-        return weakref.ref(x)
-    g._dont_inline_ = True
-    return g
-
-
-def get_entry(g):
-
-    def entrypoint(args):
-        name = ''
-        n = 2000
-        argc = len(args)
-        if argc > 1:
-            name = args[1]
-        if argc > 2:
-            n = int(args[2])
-        r_list = []
-        for i in range(20):
-            r = g(name, n)
-            r_list.append(r)
-            rgc.collect()
-        rgc.collect(); rgc.collect()
-        freed = 0
-        for r in r_list:
-            if r() is None:
-                freed += 1
-        print freed
-        return 0
-
-    return entrypoint
-
-
-def get_functions_to_patch():
-    from pypy.jit.backend.llsupport import gc
-    #
-    can_inline_malloc1 = gc.GcLLDescr_framework.can_inline_malloc
-    def can_inline_malloc2(*args):
-        try:
-            if os.environ['PYPY_NO_INLINE_MALLOC']:
-                return False
-        except KeyError:
+    def define_simple(self):
+        class Glob:
             pass
-        return can_inline_malloc1(*args)
-    #
-    return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
-
-def compile(f, gc, **kwds):
-    from pypy.annotation.listdef import s_list_of_strings
-    from pypy.translator.translator import TranslationContext
-    from pypy.jit.metainterp.warmspot import apply_jit
-    from pypy.translator.c import genc
-    #
-    t = TranslationContext()
-    t.config.translation.gc = gc
-    if gc != 'boehm':
-        t.config.translation.gcremovetypeptr = True
-    for name, value in kwds.items():
-        setattr(t.config.translation, name, value)
-    ann = t.buildannotator(policy=annpolicy.StrictAnnotatorPolicy())
-    ann.build_types(f, [s_list_of_strings], main_entry_point=True)
-    t.buildrtyper().specialize()
-
-    if kwds['jit']:
-        patch = get_functions_to_patch()
-        old_value = {}
-        try:
-            for (obj, attr), value in patch.items():
-                old_value[obj, attr] = getattr(obj, attr)
-                setattr(obj, attr, value)
-            #
-            apply_jit(t, enable_opts='')
-            #
-        finally:
-            for (obj, attr), oldvalue in old_value.items():
-                setattr(obj, attr, oldvalue)
-
-    cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
-    cbuilder.compile()
-    return cbuilder
-
-def run(cbuilder, args=''):
-    #
-    pypylog = udir.join('test_zrpy_gc.log')
-    data = cbuilder.cmdexec(args, env={'PYPYLOG': ':%s' % pypylog})
-    return data.strip()
-
-def compile_and_run(f, gc, **kwds):
-    cbuilder = compile(f, gc, **kwds)
-    return run(cbuilder)
-
-
-
-def test_compile_boehm():
-    myjitdriver = JitDriver(greens = [], reds = ['n', 'x'])
-    @dont_look_inside
-    def see(lst, n):
-        assert len(lst) == 3
-        assert lst[0] == n+10
-        assert lst[1] == n+20
-        assert lst[2] == n+30
-    def main(n, x):
-        while n > 0:
-            myjitdriver.can_enter_jit(n=n, x=x)
-            myjitdriver.jit_merge_point(n=n, x=x)
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            see([n+10, n+20, n+30], n)
-    res = compile_and_run(get_entry(get_g(main)), "boehm", jit=True)
-    assert int(res) >= 16
-
-# ______________________________________________________________________
-
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
-    def setup_class(cls):
-        funcs = []
-        name_to_func = {}
-        for fullname in dir(cls):
-            if not fullname.startswith('define'):
-                continue
-            definefunc = getattr(cls, fullname)
-            _, name = fullname.split('_', 1)
-            beforefunc, loopfunc, afterfunc = definefunc.im_func(cls)
-            if beforefunc is None:
-                def beforefunc(n, x):
-                    return n, x, None, None, None, None, None, None, None, None, None, ''
-            if afterfunc is None:
-                def afterfunc(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-                    pass
-            beforefunc.func_name = 'before_'+name
-            loopfunc.func_name = 'loop_'+name
-            afterfunc.func_name = 'after_'+name
-            funcs.append((beforefunc, loopfunc, afterfunc))
-            assert name not in name_to_func
-            name_to_func[name] = len(name_to_func)
-        print name_to_func
-        def allfuncs(name, n):
-            x = X()
-            x.foo = 2
-            main_allfuncs(name, n, x)
-            x.foo = 5
-            return weakref.ref(x)
-        def main_allfuncs(name, n, x):
-            num = name_to_func[name]
-            n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][0](n, x)
-            while n > 0:
-                myjitdriver.can_enter_jit(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-                myjitdriver.jit_merge_point(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-
-                n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][1](
-                        n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-            funcs[num][2](n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-        myjitdriver = JitDriver(greens = ['num'],
-                                reds = ['n', 'x', 'x0', 'x1', 'x2', 'x3', 'x4',
-                                        'x5', 'x6', 'x7', 'l', 's'])
-        cls.main_allfuncs = staticmethod(main_allfuncs)
-        cls.name_to_func = name_to_func
-        OLD_DEBUG = GcLLDescr_framework.DEBUG
-        try:
-            GcLLDescr_framework.DEBUG = True
-            cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
-        finally:
-            GcLLDescr_framework.DEBUG = OLD_DEBUG
-
-    def _run(self, name, n, env):
-        res = self.cbuilder.cmdexec("%s %d" %(name, n), env=env)
-        assert int(res) == 20
-
-    def run(self, name, n=2000):
-        pypylog = udir.join('TestCompileFramework.log')
-        env = {'PYPYLOG': ':%s' % pypylog,
-               'PYPY_NO_INLINE_MALLOC': '1'}
-        self._run(name, n, env)
-        env['PYPY_NO_INLINE_MALLOC'] = ''
-        self._run(name, n, env)
-
-    def run_orig(self, name, n, x):
-        self.main_allfuncs(name, n, x)
-
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
+        glob = Glob()
         #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
-
-    def define_compile_framework_1(cls):
-        # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
-        # without write_barriers and root stack enumeration.
-        def f(n, x, *args):
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_1(self):
-        self.run('compile_framework_1')
-
-    def define_compile_framework_2(cls):
-        # More complex test, requires root stack enumeration but
-        # not write_barriers.
-        def f(n, x, *args):
-            prev = x
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = prev.foo
-                prev = y
-            n -= prev.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_2(self):
-        self.run('compile_framework_2')
-
-    def define_compile_framework_3(cls):
-        # Third version of the test.  Really requires write_barriers.
-        def f(n, x, *args):
-            x.next = None
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = j+1
-                y.next = x.next
-                x.next = y
-            check(x.next.foo == 101)
-            total = 0
-            y = x
-            for j in range(101):
-                y = y.next
-                total += y.foo
-            check(not y.next)
-            check(total == 101*102/2)
-            n -= x.foo
-            return (n, x) + args
-        return None, f, None
-
-
-
-    def test_compile_framework_3(self):
-        x_test = X()
-        x_test.foo = 5
-        self.run_orig('compile_framework_3', 6, x_test)     # check that it does not raise CheckError
-        self.run('compile_framework_3')
-
-    def define_compile_framework_3_extra(cls):
-        # Extra version of the test, with tons of live vars around the residual
-        # call that all contain a GC pointer.
-        @dont_look_inside
-        def residual(n=26):
-            x = X()
-            x.next = X()
-            x.next.foo = n
-            return x
+        def f42(n):
+            c_strchr = glob.c_strchr
+            raw = rffi.str2charp("foobar" + chr((n & 63) + 32))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.INT, ord('b')))
+            res = c_strchr.call(argchain, rffi.CCHARP)
+            check(rffi.charp2str(res) == "bar" + chr((n & 63) + 32))
+            rffi.free_charp(raw)
         #
         def before(n, x):
-            residual(5)
-            x0 = residual()
-            x1 = residual()
-            x2 = residual()
-            x3 = residual()
-            x4 = residual()
-            x5 = residual()
-            x6 = residual()
-            x7 = residual()
-            n *= 19
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x8 = residual()
-            x9 = residual()
-            check(x0.next.foo == 26)
-            check(x1.next.foo == 26)
-            check(x2.next.foo == 26)
-            check(x3.next.foo == 26)
-            check(x4.next.foo == 26)
-            check(x5.next.foo == 26)
-            check(x6.next.foo == 26)
-            check(x7.next.foo == 26)
-            check(x8.next.foo == 26)
-            check(x9.next.foo == 26)
-            x0, x1, x2, x3, x4, x5, x6, x7 = x7, x4, x6, x5, x3, x2, x9, x8
+            libc = CDLL(libc_name)
+            c_strchr = libc.getpointer('strchr', [types.pointer, types.sint],
+                                       types.pointer)
+            glob.c_strchr = c_strchr
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42(n)
             n -= 1
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        return before, f, None
-
-    def test_compile_framework_3_extra(self):
-        self.run_orig('compile_framework_3_extra', 6, None)     # check that it does not raise CheckError
-        self.run('compile_framework_3_extra')
-
-    def define_compile_framework_4(cls):
-        # Fourth version of the test, with __del__.
-        from pypy.rlib.debug import debug_print
-        class Counter:
-            cnt = 0
-        counter = Counter()