[pypy-commit] pypy stacklet: hg merge default

Mon Aug 8 18:20:34 CEST 2011

Author: Armin Rigo <arigo at tunes.org>
Branch: stacklet
Changeset: r46382:a051e10a5591
Date: 2011-08-08 18:22 +0200
http://bitbucket.org/pypy/pypy/changeset/a051e10a5591/

Log:	hg merge default

diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -37,22 +37,22 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Antonio Cuni
     Amaury Forgeot d'Arc
-    Antonio Cuni
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
+    Benjamin Peterson
     Christian Tismer
-    Benjamin Peterson
+    Hakan Ardo
+    Alex Gaynor
     Eric van Riet Paap
-    Anders Chrigstr&#246;m
-    H&#229;kan Ard&#246;
+    Anders Chrigstrom
+    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
-    Alex Gaynor
-    David Schneider
-    Aureli&#233;n Campeas
+    Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
@@ -63,16 +63,17 @@
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
+    Daniel Roberts
     Adrien Di Mascio
     Laura Creighton
     Ludovic Aubry
     Niko Matsakis
-    Daniel Roberts
     Jason Creighton
-    Jacob Hall&#233;n
+    Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
+    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -83,9 +84,13 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
+    Justin Peel
     Jean-Paul Calderone
     John Witulski
+    Lukas Diekmann
+    holger krekel
     Wim Lavrijsen
+    Dario Bertini
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
@@ -97,15 +102,16 @@
     Georg Brandl
     Gerald Klix
     Wanja Saatkamp
+    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
-    Dario Bertini
     David Malcolm
     Eugene Oden
     Henry Mason
+    Sven Hager
     Lukas Renggli
+    Ilya Osadchiy
     Guenter Jantzen
-    Ronny Pfannschmidt
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -122,8 +128,8 @@
     Jared Grubb
     Karl Bartel
     Gabriel Lavoie
+    Victor Stinner
     Brian Dorsey
-    Victor Stinner
     Stuart Williams
     Toby Watson
     Antoine Pitrou
@@ -134,19 +140,23 @@
     Jonathan David Riehl
     Elmo M&#228;ntynen
     Anders Qvist
-    Beatrice D&#252;ring
+    Beatrice During
     Alexander Sedov
+    Timo Paulssen
+    Corbin Simpson
     Vincent Legoll
+    Romain Guillebert
     Alan McIntyre
-    Romain Guillebert
     Alex Perry
     Jens-Uwe Mager
+    Simon Cross
     Dan Stromberg
-    Lukas Diekmann
+    Guillebert Romain
     Carl Meyer
     Pieter Zieschang
     Alejandro J. Cura
     Sylvain Thenault
+    Christoph Gerum
     Travis Francis Athougies
     Henrik Vendelbo
     Lutz Paelike
@@ -157,6 +167,7 @@
     Miguel de Val Borro
     Ignas Mikalajunas
     Artur Lisiecki
+    Philip Jenvey
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -165,27 +176,31 @@
     Gustavo Niemeyer
     William Leslie
     Akira Li
-    Kristj&#225;n Valur J&#243;nsson
+    Kristjan Valur Jonsson
     Bobby Impollonia
+    Michael Hudson-Doyle
     Andrew Thompson
     Anders Sigfridsson
+    Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
-    Sven Hager
     Zooko Wilcox-O Hearn
+    Dan Villiom Podlaski Christiansen
     Anders Hammarquist
+    Chris Lambacher
     Dinu Gherman
     Dan Colish
+    Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
     Konrad Delong
     Anna Ravencroft
     Greg Price
     Armin Ronacher
+    Christian Muirhead
     Jim Baker
-    Philip Jenvey
     Rodrigo Ara&#250;jo
-    Brett Cannon
+    Romain Guillebert
 
     Heinrich-Heine University, Germany 
     Open End AB (formerly AB Strakt), Sweden
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -154,18 +154,18 @@
     RegrTest('test_cmd.py'),
     RegrTest('test_cmd_line_script.py'),
     RegrTest('test_codeccallbacks.py', core=True),
-    RegrTest('test_codecencodings_cn.py'),
-    RegrTest('test_codecencodings_hk.py'),
-    RegrTest('test_codecencodings_jp.py'),
-    RegrTest('test_codecencodings_kr.py'),
-    RegrTest('test_codecencodings_tw.py'),
+    RegrTest('test_codecencodings_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_tw.py', usemodules='_multibytecodec'),
 
-    RegrTest('test_codecmaps_cn.py'),
-    RegrTest('test_codecmaps_hk.py'),
-    RegrTest('test_codecmaps_jp.py'),
-    RegrTest('test_codecmaps_kr.py'),
-    RegrTest('test_codecmaps_tw.py'),
-    RegrTest('test_codecs.py', core=True),
+    RegrTest('test_codecmaps_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_tw.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecs.py', core=True, usemodules='_multibytecodec'),
     RegrTest('test_codeop.py', core=True),
     RegrTest('test_coercion.py', core=True),
     RegrTest('test_collections.py'),
@@ -314,7 +314,7 @@
     RegrTest('test_mmap.py'),
     RegrTest('test_module.py', core=True),
     RegrTest('test_modulefinder.py'),
-    RegrTest('test_multibytecodec.py'),
+    RegrTest('test_multibytecodec.py', usemodules='_multibytecodec'),
     RegrTest('test_multibytecodec_support.py', skip="not a test"),
     RegrTest('test_multifile.py'),
     RegrTest('test_multiprocessing.py', skip='FIXME leaves subprocesses'),
diff --git a/lib-python/modified-2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/modified-2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -148,7 +148,8 @@
 class Test_StreamReader(unittest.TestCase):
     def test_bug1728403(self):
         try:
-            open(TESTFN, 'w').write('\xa1')
+            with open(TESTFN, 'w') as f:
+                f.write('\xa1')
             f = codecs.open(TESTFN, encoding='cp949')
             self.assertRaises(UnicodeDecodeError, f.read, 2)
         finally:
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -91,13 +91,15 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            #
-            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
-                fastpath_cls = make_fastpath_subclass(self.__class__)
-                fastpath_cls.enable_fastpath_maybe(self)
             self._argtypes_ = list(argtypes)
+            self._check_argtypes_for_fastpath()
     argtypes = property(_getargtypes, _setargtypes)
 
+    def _check_argtypes_for_fastpath(self):
+        if all([hasattr(argtype, '_ffiargshape') for argtype in self._argtypes_]):
+            fastpath_cls = make_fastpath_subclass(self.__class__)
+            fastpath_cls.enable_fastpath_maybe(self)
+
     def _getparamflags(self):
         return self._paramflags
 
@@ -216,6 +218,7 @@
                 import ctypes
                 restype = ctypes.c_int
             self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
+            self._check_argtypes_for_fastpath()
             return
 
         
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -34,16 +34,18 @@
     for i, field in enumerate(all_fields):
         name = field[0]
         value = field[1]
+        is_bitfield = (len(field) == 3)
         fields[name] = Field(name,
                              self._ffistruct.fieldoffset(name),
                              self._ffistruct.fieldsize(name),
-                             value, i)
+                             value, i, is_bitfield)
 
     if anonymous_fields:
         resnames = []
         for i, field in enumerate(all_fields):
             name = field[0]
             value = field[1]
+            is_bitfield = (len(field) == 3)
             startpos = self._ffistruct.fieldoffset(name)
             if name in anonymous_fields:
                 for subname in value._names:
@@ -52,7 +54,7 @@
                     subvalue = value._fieldtypes[subname].ctype
                     fields[subname] = Field(subname,
                                             relpos, subvalue._sizeofinstances(),
-                                            subvalue, i)
+                                            subvalue, i, is_bitfield)
             else:
                 resnames.append(name)
         names = resnames
@@ -60,8 +62,8 @@
     self._fieldtypes = fields
 
 class Field(object):
-    def __init__(self, name, offset, size, ctype, num):
-        for k in ('name', 'offset', 'size', 'ctype', 'num'):
+    def __init__(self, name, offset, size, ctype, num, is_bitfield):
+        for k in ('name', 'offset', 'size', 'ctype', 'num', 'is_bitfield'):
             self.__dict__[k] = locals()[k]
 
     def __setattr__(self, name, value):
@@ -225,7 +227,7 @@
             field = self._fieldtypes[name]
         except KeyError:
             return _CData.__getattribute__(self, name)
-        if field.size >> 16:
+        if field.is_bitfield:
             # bitfield member, use direct access
             return self._buffer.__getattr__(name)
         else:
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -416,7 +416,8 @@
 from pypy.annotation.model import SomePtr
 from pypy.rpython.lltypesystem import lltype
 
-def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None):
+def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None,
+           s_add_memory_pressure=None):
     assert (s_n is None or s_n.knowntype == int
             or issubclass(s_n.knowntype, pypy.rlib.rarithmetic.base_int))
     assert s_T.is_constant()
@@ -432,6 +433,8 @@
     else:
         assert s_flavor.is_constant()
         assert s_track_allocation is None or s_track_allocation.is_constant()
+        assert (s_add_memory_pressure is None or
+                s_add_memory_pressure.is_constant())
         # not sure how to call malloc() for the example 'p' in the
         # presence of s_extraargs
         r = SomePtr(lltype.Ptr(s_T.const))
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -13,6 +13,10 @@
 DEFL_LOW_INLINE_THRESHOLD = DEFL_INLINE_THRESHOLD / 2.0
 
 DEFL_GC = "minimark"
+if sys.platform.startswith("linux"):
+    DEFL_ROOTFINDER_WITHJIT = "asmgcc"
+else:
+    DEFL_ROOTFINDER_WITHJIT = "shadowstack"
 
 IS_64_BITS = sys.maxint > 2147483647
 
@@ -108,7 +112,7 @@
     BoolOption("jit", "generate a JIT",
                default=False,
                suggests=[("translation.gc", DEFL_GC),
-                         ("translation.gcrootfinder", "asmgcc"),
+                         ("translation.gcrootfinder", DEFL_ROOTFINDER_WITHJIT),
                          ("translation.list_comprehension_operations", True)]),
     ChoiceOption("jit_backend", "choose the backend for the JIT",
                  ["auto", "x86", "x86-without-sse2", "llvm"],
diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst
--- a/pypy/doc/contributor.rst
+++ b/pypy/doc/contributor.rst
@@ -9,22 +9,22 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Antonio Cuni
     Amaury Forgeot d'Arc
-    Antonio Cuni
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
+    Benjamin Peterson
     Christian Tismer
-    Benjamin Peterson
+    Hakan Ardo
+    Alex Gaynor
     Eric van Riet Paap
-    Anders Chrigstr&#246;m
-    H&#229;kan Ard&#246;
+    Anders Chrigstrom
+    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
-    Alex Gaynor
-    David Schneider
-    Aureli&#233;n Campeas
+    Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
@@ -35,16 +35,17 @@
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
+    Daniel Roberts
     Adrien Di Mascio
     Laura Creighton
     Ludovic Aubry
     Niko Matsakis
-    Daniel Roberts
     Jason Creighton
-    Jacob Hall&#233;n
+    Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
+    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -55,9 +56,13 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
+    Justin Peel
     Jean-Paul Calderone
     John Witulski
+    Lukas Diekmann
+    holger krekel
     Wim Lavrijsen
+    Dario Bertini
     Andreas St&#252;hrk
     Jean-Philippe St. Pierre
     Guido van Rossum
@@ -69,15 +74,16 @@
     Georg Brandl
     Gerald Klix
     Wanja Saatkamp
+    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
-    Dario Bertini
     David Malcolm
     Eugene Oden
     Henry Mason
+    Sven Hager
     Lukas Renggli
+    Ilya Osadchiy
     Guenter Jantzen
-    Ronny Pfannschmidt
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -94,8 +100,8 @@
     Jared Grubb
     Karl Bartel
     Gabriel Lavoie
+    Victor Stinner
     Brian Dorsey
-    Victor Stinner
     Stuart Williams
     Toby Watson
     Antoine Pitrou
@@ -106,19 +112,23 @@
     Jonathan David Riehl
     Elmo M&#228;ntynen
     Anders Qvist
-    Beatrice D&#252;ring
+    Beatrice During
     Alexander Sedov
+    Timo Paulssen
+    Corbin Simpson
     Vincent Legoll
+    Romain Guillebert
     Alan McIntyre
-    Romain Guillebert
     Alex Perry
     Jens-Uwe Mager
+    Simon Cross
     Dan Stromberg
-    Lukas Diekmann
+    Guillebert Romain
     Carl Meyer
     Pieter Zieschang
     Alejandro J. Cura
     Sylvain Thenault
+    Christoph Gerum
     Travis Francis Athougies
     Henrik Vendelbo
     Lutz Paelike
@@ -129,6 +139,7 @@
     Miguel de Val Borro
     Ignas Mikalajunas
     Artur Lisiecki
+    Philip Jenvey
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -137,24 +148,29 @@
     Gustavo Niemeyer
     William Leslie
     Akira Li
-    Kristj&#225;n Valur J&#243;nsson
+    Kristjan Valur Jonsson
     Bobby Impollonia
+    Michael Hudson-Doyle
     Andrew Thompson
     Anders Sigfridsson
+    Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
-    Sven Hager
     Zooko Wilcox-O Hearn
+    Dan Villiom Podlaski Christiansen
     Anders Hammarquist
+    Chris Lambacher
     Dinu Gherman
     Dan Colish
+    Brett Cannon
     Daniel Neuh&#228;user
     Michael Chermside
     Konrad Delong
     Anna Ravencroft
     Greg Price
     Armin Ronacher
+    Christian Muirhead
     Jim Baker
-    Philip Jenvey
     Rodrigo Ara&#250;jo
+    Romain Guillebert
 
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -21,8 +21,8 @@
 Release Steps
 ----------------
 
-* at code freeze make a release branch under
-  http://codepeak.net/svn/pypy/release/x.y(.z). IMPORTANT: bump the
+* at code freeze make a release branch using release-x.x.x in mercurial
+  IMPORTANT: bump the
   pypy version number in module/sys/version.py and in
   module/cpyext/include/patchlevel.h, notice that the branch
   will capture the revision number of this change for the release;
@@ -48,12 +48,6 @@
   the release announcement should contain a direct link to the download page
 * update pypy.org (under extradoc/pypy.org), rebuild and commit
 
-* update http://codespeak.net/pypy/trunk:
-   code0> + chmod -R yourname:users /www/codespeak.net/htdocs/pypy/trunk
-   local> cd ..../pypy/doc && py.test
-   local> cd ..../pypy
-   local> rsync -az doc codespeak.net:/www/codespeak.net/htdocs/pypy/trunk/pypy/
-
 * post announcement on morepypy.blogspot.com
 * send announcements to pypy-dev, python-list,
   python-announce, python-dev ...
diff --git a/pypy/doc/release-1.6.0.rst b/pypy/doc/release-1.6.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-1.6.0.rst
@@ -0,0 +1,85 @@
+===========================
+PyPy 1.6 - faster than ever
+===========================
+
+We're pleased to announce the 1.6 release of PyPy. This release brings a lot
+of bugfixes and performance improvements over 1.5, and improves support for
+Windows 32bit and OS X 64bit. This version fully implements Python 2.7.1 and
+has beta level support for loading CPython C extensions.  You can download it
+here:
+
+    http://pypy.org/download.html
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7.1. It's fast (`pypy 1.5 and cpython 2.6.2`_ performance comparison)
+due to its integrated tracing JIT compiler. XXX: compare to 2.7.1
+
+This release supports x86 machines running Linux 32/64 or Mac OS X.  Windows 32
+is beta (it roughly works but a lot of small issues have not been fixed so
+far).  Windows 64 is not yet supported.
+
+The main topics of this release are speed and stability: on average, PyPy 1.6
+is between 20% and 30% faster than PyPy 1.5, and overall it's 4.3 times faster
+than CPython when running our set of benchmarks.
+
+The speed improvements have been made possible by optimizing many of the
+layers which compose PyPy.  In particular, we improved: the Garbage Collector,
+the JIT warmup time, the optimizations performed by the JIT, the quality of
+the generated machine code and the implementation of our Python interpreter.
+
+
+Highlights
+==========
+
+* Numerous performance improvements, overall giving considerable speedups:
+
+  - better GC behavior when dealing with very large objects and arrays
+
+  - `fast ctypes`_: now calls to ctypes functions are seen and optimized
+    by the JIT, and they are up to 60 times faster than PyPy 1.5 and 10 times
+    faster than CPython
+
+  - improved generators(1): simple generators now are inlined into the caller
+    loop, making performance up to 3.5 times faster than PyPy 1.5.
+
+  - improved generators(2): thanks to other optimizations, even generators
+    that are not inlined are between 10% and 20% faster than PyPy 1.5.
+
+  - faster warmup time for the JIT
+
+  - JIT support for single floats (e.g., for ``array('f')``)
+
+  - optimized dictionaries: the internal representation of dictionaries is now
+    dynamically selected depending on the type of stored objects, resulting in
+    faster code and smaller memory footprint.  For example, dictionaries whose
+    keys are all strings, or all integers.
+
+* JitViewer: this is the first official release which includes the JitViewer,
+  a web-based tool which helps you to see which parts of your Python code have
+  been compiled by the JIT, down until the assembler. XXX: publish a public
+  demo?
+
+- The CPython extension module API has been improved and now supports many
+  more extensions. For information on which one are supported, please refer to
+  our `compatibility wiki`_.
+
+* Multibyte encoding support: this was of of the last areas in which we were
+  still behind CPython, but now we fully support them. (XXX: is that true?)
+
+* Preliminary support for NumPy: this release includes a preview of a very
+  fast NumPy module integrated with the PyPy JIT.  Unfortunately, this does
+  not mean that you can expect to take an existing NumPy program and run it on
+  PyPy, because the module is still unfinished and supports only some of the
+  numpy API.  However, what works is blazingly fast :-)
+
+* Bugfixes: since the 1.5 release we fixed 53 bugs in our `bug tracker`_, not
+  counting the numerous bugs that were found and reported through other
+  channels than the bug tracker.
+
+Cheers,
+
+Carl Friedrich Bolz, Laura Creighton, Antonio Cuni, Maciej Fijalkowski,
+Amaury Forgeot d'Arc, Alex Gaynor, Armin Rigo and the PyPy team
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -111,6 +111,9 @@
     def setslotvalue(self, index, w_val):
         raise NotImplementedError
 
+    def delslotvalue(self, index):
+        raise NotImplementedError
+
     def descr_call_mismatch(self, space, opname, RequiredClass, args):
         if RequiredClass is None:
             classname = '?'
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -64,7 +64,7 @@
                 self.visit_self(el[1], *args)
             else:
                 self.visit_function(el, *args)
-        else:
+        elif isinstance(el, type):
             for typ in self.bases_order:
                 if issubclass(el, typ):
                     visit = getattr(self, "visit__%s" % (typ.__name__,))
@@ -73,6 +73,8 @@
             else:
                 raise Exception("%s: no match for unwrap_spec element %s" % (
                     self.__class__.__name__, el))
+        else:
+            raise Exception("unable to dispatch, %s, perhaps your parameter should have started with w_?" % el)
 
     def apply_over(self, unwrap_spec, *extra):
         dispatch = self.dispatch
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -258,6 +258,11 @@
                     self.slots_w = [None] * nslots
             def setslotvalue(self, index, w_value):
                 self.slots_w[index] = w_value
+            def delslotvalue(self, index):
+                if self.slots_w[index] is None:
+                    return False
+                self.slots_w[index] = None
+                return True
             def getslotvalue(self, index):
                 return self.slots_w[index]
         add(Proto)
@@ -530,11 +535,10 @@
         """member.__delete__(obj)
         Delete the value of the slot 'member' from the given 'obj'."""
         self.typecheck(space, w_obj)
-        w_oldresult = w_obj.getslotvalue(self.index)
-        if w_oldresult is None:
+        success = w_obj.delslotvalue(self.index)
+        if not success:
             raise OperationError(space.w_AttributeError,
                                  space.wrap(self.name)) # XXX better message
-        w_obj.setslotvalue(self.index, None)
 
 Member.typedef = TypeDef(
     "member_descriptor",
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -1071,6 +1071,8 @@
         return heaptracker.adr2int(llmemory.cast_ptr_to_adr(x))
     if TP == llmemory.Address:
         return heaptracker.adr2int(x)
+    if TP is lltype.SingleFloat:
+        return longlong.singlefloat2int(x)
     return lltype.cast_primitive(lltype.Signed, x)
 
 def cast_from_int(TYPE, x):
@@ -1086,6 +1088,9 @@
             x = llmemory.cast_int_to_adr(x)
         assert lltype.typeOf(x) == llmemory.Address
         return x
+    elif TYPE is lltype.SingleFloat:
+        assert lltype.typeOf(x) is lltype.Signed
+        return longlong.int2singlefloat(x)
     else:
         if lltype.typeOf(x) == llmemory.Address:
             x = heaptracker.adr2int(x)
@@ -1140,6 +1145,7 @@
     del _future_values[:]
 
 def set_future_value_int(index, value):
+    assert lltype.typeOf(value) is lltype.Signed
     set_future_value_ref(index, value)
 
 def set_future_value_float(index, value):
@@ -1488,6 +1494,7 @@
     'i': lltype.Signed,
     'f': lltype.Float,
     'L': lltype.SignedLongLong,
+    'S': lltype.SingleFloat,
     'v': lltype.Void,
     }
 
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -91,6 +91,7 @@
 class BaseCPU(model.AbstractCPU):
     supports_floats = True
     supports_longlong = llimpl.IS_32_BIT
+    supports_singlefloats = True
 
     def __init__(self, rtyper, stats=None, opts=None,
                  translate_support_code=False,
@@ -327,12 +328,16 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport.ffisupport import get_ffi_type_kind
+        from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
         arg_types = []
-        for arg in ffi_args:
-            kind = get_ffi_type_kind(arg)
-            if kind != history.VOID:
-                arg_types.append(kind)
-        reskind = get_ffi_type_kind(ffi_result)
+        try:
+            for arg in ffi_args:
+                kind = get_ffi_type_kind(self, arg)
+                if kind != history.VOID:
+                    arg_types.append(kind)
+            reskind = get_ffi_type_kind(self, ffi_result)
+        except UnsupportedKind:
+            return None
         return self.getdescr(0, reskind, extrainfo=extrainfo,
                              arg_types=''.join(arg_types))
 
diff --git a/pypy/jit/backend/llgraph/test/test_llgraph.py b/pypy/jit/backend/llgraph/test/test_llgraph.py
--- a/pypy/jit/backend/llgraph/test/test_llgraph.py
+++ b/pypy/jit/backend/llgraph/test/test_llgraph.py
@@ -19,6 +19,9 @@
     def setup_method(self, _):
         self.cpu = self.cpu_type(None)
 
+    def test_memoryerror(self):
+        py.test.skip("does not make much sense on the llgraph backend")
+
 
 def test_cast_adr_to_int_and_back():
     X = lltype.Struct('X', ('foo', lltype.Signed))
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -303,6 +303,8 @@
                 c = 'f'
             elif c == 'f' and longlong.supports_longlong:
                 return 'longlong.getrealfloat(%s)' % (process('L'),)
+            elif c == 'S':
+                return 'longlong.int2singlefloat(%s)' % (process('i'),)
             arg = 'args_%s[%d]' % (c, seen[c])
             seen[c] += 1
             return arg
@@ -318,6 +320,8 @@
                 return lltype.Void
             elif arg == 'L':
                 return lltype.SignedLongLong
+            elif arg == 'S':
+                return lltype.SingleFloat
             else:
                 raise AssertionError(arg)
 
@@ -334,6 +338,8 @@
             result = 'rffi.cast(lltype.SignedLongLong, res)'
         elif self.get_return_type() == history.VOID:
             result = 'None'
+        elif self.get_return_type() == 'S':
+            result = 'longlong.singlefloat2int(res)'
         else:
             assert 0
         source = py.code.Source("""
@@ -344,14 +350,15 @@
         """ % locals())
         ARGS = [TYPE(arg) for arg in self.arg_classes]
         FUNC = lltype.FuncType(ARGS, RESULT)
-        d = locals().copy()
-        d.update(globals())
+        d = globals().copy()
+        d.update(locals())
         exec source.compile() in d
         self.call_stub = d['call_stub']
 
     def verify_types(self, args_i, args_r, args_f, return_type):
         assert self._return_type in return_type
-        assert self.arg_classes.count('i') == len(args_i or ())
+        assert (self.arg_classes.count('i') +
+                self.arg_classes.count('S')) == len(args_i or ())
         assert self.arg_classes.count('r') == len(args_r or ())
         assert (self.arg_classes.count('f') +
                 self.arg_classes.count('L')) == len(args_f or ())
@@ -428,23 +435,39 @@
     def get_result_size(self, translate_support_code):
         return 0
 
+_SingleFloatCallDescr = None   # built lazily
+
 def getCallDescrClass(RESULT):
     if RESULT is lltype.Void:
         return VoidCallDescr
     if RESULT is lltype.Float:
         return FloatCallDescr
+    if RESULT is lltype.SingleFloat:
+        global _SingleFloatCallDescr
+        if _SingleFloatCallDescr is None:
+            assert rffi.sizeof(rffi.UINT) == rffi.sizeof(RESULT)
+            class SingleFloatCallDescr(getCallDescrClass(rffi.UINT)):
+                _clsname = 'SingleFloatCallDescr'
+                _return_type = 'S'
+            _SingleFloatCallDescr = SingleFloatCallDescr
+        return _SingleFloatCallDescr
     if is_longlong(RESULT):
         return LongLongCallDescr
     return getDescrClass(RESULT, BaseIntCallDescr, GcPtrCallDescr,
                          NonGcPtrCallDescr, 'Call', 'get_result_size',
                          Ellipsis,  # <= floatattrname should not be used here
                          '_is_result_signed')
+getCallDescrClass._annspecialcase_ = 'specialize:memo'
 
 def get_call_descr(gccache, ARGS, RESULT, extrainfo=None):
     arg_classes = []
     for ARG in ARGS:
         kind = getkind(ARG)
-        if   kind == 'int': arg_classes.append('i')
+        if   kind == 'int':
+            if ARG is lltype.SingleFloat:
+                arg_classes.append('S')
+            else:
+                arg_classes.append('i')
         elif kind == 'ref': arg_classes.append('r')
         elif kind == 'float':
             if is_longlong(ARG):
@@ -476,6 +499,9 @@
             return GcPtrDescr
         else:
             return NonGcPtrDescr
+    if TYPE is lltype.SingleFloat:
+        assert rffi.sizeof(rffi.UINT) == rffi.sizeof(TYPE)
+        TYPE = rffi.UINT
     try:
         return _cache[nameprefix, TYPE]
     except KeyError:
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -1,19 +1,21 @@
 from pypy.rlib.rarithmetic import intmask
 from pypy.jit.metainterp import history
-from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
-    FloatCallDescr, VoidCallDescr
+from pypy.rpython.lltypesystem import rffi
+from pypy.jit.backend.llsupport.descr import (
+    DynamicIntCallDescr, NonGcPtrCallDescr, FloatCallDescr, VoidCallDescr,
+    LongLongCallDescr, getCallDescrClass)
 
 class UnsupportedKind(Exception):
     pass
 
-def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
+def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
-        reskind = get_ffi_type_kind(ffi_result)
-        argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
+        reskind = get_ffi_type_kind(cpu, ffi_result)
+        argkinds = [get_ffi_type_kind(cpu, arg) for arg in ffi_args]
     except UnsupportedKind:
-        return None # ??
+        return None
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
         size = intmask(ffi_result.c_size)
@@ -25,17 +27,26 @@
         return FloatCallDescr(arg_classes, extrainfo)
     elif reskind == history.VOID:
         return VoidCallDescr(arg_classes, extrainfo)
+    elif reskind == 'L':
+        return LongLongCallDescr(arg_classes, extrainfo)
+    elif reskind == 'S':
+        SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+        return SingleFloatCallDescr(arg_classes, extrainfo)
     assert False
 
-def get_ffi_type_kind(ffi_type):
+def get_ffi_type_kind(cpu, ffi_type):
     from pypy.rlib.libffi import types
     kind = types.getkind(ffi_type)
     if kind == 'i' or kind == 'u':
         return history.INT
-    elif kind == 'f':
+    elif cpu.supports_floats and kind == 'f':
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
+    elif cpu.supports_longlong and (kind == 'I' or kind == 'U'):     # longlong
+        return 'L'
+    elif cpu.supports_singlefloats and kind == 's':    # singlefloat
+        return 'S'
     raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -544,18 +544,19 @@
         assert self.GCClass.inline_simple_malloc
         assert self.GCClass.inline_simple_malloc_varsize
 
-        # make a malloc function, with three arguments
+        # make a malloc function, with two arguments
         def malloc_basic(size, tid):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             has_finalizer = bool(tid & (1<<llgroup.HALFSHIFT))
             check_typeid(type_id)
-            try:
-                res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                                      type_id, size,
-                                                      has_finalizer, False)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                res = lltype.nullptr(llmemory.GCREF.TO)
+            res = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                                  type_id, size,
+                                                  has_finalizer, False)
+            # In case the operation above failed, we are returning NULL
+            # from this function to assembler.  There is also an RPython
+            # exception set, typically MemoryError; but it's easier and
+            # faster to check for the NULL return value, as done by
+            # translator/exceptiontransform.py.
             #llop.debug_print(lltype.Void, "\tmalloc_basic", size, type_id,
             #                 "-->", res)
             return res
@@ -571,14 +572,10 @@
         def malloc_array(itemsize, tid, num_elem):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             check_typeid(type_id)
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    type_id, num_elem, self.array_basesize, itemsize,
-                    self.array_length_ofs)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                type_id, num_elem, self.array_basesize, itemsize,
+                self.array_length_ofs)
         self.malloc_array = malloc_array
         self.GC_MALLOC_ARRAY = lltype.Ptr(lltype.FuncType(
             [lltype.Signed] * 3, llmemory.GCREF))
@@ -591,23 +588,15 @@
         unicode_type_id = self.layoutbuilder.get_type_id(rstr.UNICODE)
         #
         def malloc_str(length):
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    str_type_id, length, str_basesize, str_itemsize,
-                    str_ofs_length)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                str_type_id, length, str_basesize, str_itemsize,
+                str_ofs_length)
         def malloc_unicode(length):
-            try:
-                return llop1.do_malloc_varsize_clear(
-                    llmemory.GCREF,
-                    unicode_type_id, length, unicode_basesize,unicode_itemsize,
-                    unicode_ofs_length)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return lltype.nullptr(llmemory.GCREF.TO)
+            return llop1.do_malloc_varsize_clear(
+                llmemory.GCREF,
+                unicode_type_id, length, unicode_basesize,unicode_itemsize,
+                unicode_ofs_length)
         self.malloc_str = malloc_str
         self.malloc_unicode = malloc_unicode
         self.GC_MALLOC_STR_UNICODE = lltype.Ptr(lltype.FuncType(
@@ -628,16 +617,12 @@
             if self.DEBUG:
                 random_usage_of_xmm_registers()
             assert size >= self.minimal_size_in_nursery
-            try:
-                # NB. although we call do_malloc_fixedsize_clear() here,
-                # it's a bit of a hack because we set tid to 0 and may
-                # also use it to allocate varsized objects.  The tid
-                # and possibly the length are both set afterward.
-                gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                            0, size, False, False)
-            except MemoryError:
-                fatalerror("out of memory (from JITted code)")
-                return 0
+            # NB. although we call do_malloc_fixedsize_clear() here,
+            # it's a bit of a hack because we set tid to 0 and may
+            # also use it to allocate varsized objects.  The tid
+            # and possibly the length are both set afterward.
+            gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                        0, size, False, False)
             return rffi.cast(lltype.Signed, gcref)
         self.malloc_slowpath = malloc_slowpath
         self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed], lltype.Signed)
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -259,7 +259,7 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport import ffisupport
-        return ffisupport.get_call_descr_dynamic(ffi_args, ffi_result,
+        return ffisupport.get_call_descr_dynamic(self, ffi_args, ffi_result,
                                                  extrainfo)
 
     def get_overflow_error(self):
@@ -499,7 +499,7 @@
     def bh_call_i(self, func, calldescr, args_i, args_r, args_f):
         assert isinstance(calldescr, BaseIntCallDescr)
         if not we_are_translated():
-            calldescr.verify_types(args_i, args_r, args_f, history.INT)
+            calldescr.verify_types(args_i, args_r, args_f, history.INT + 'S')
         return calldescr.call_stub(func, args_i, args_r, args_f)
 
     def bh_call_r(self, func, calldescr, args_i, args_r, args_f):
diff --git a/pypy/jit/backend/llsupport/test/test_descr.py b/pypy/jit/backend/llsupport/test/test_descr.py
--- a/pypy/jit/backend/llsupport/test/test_descr.py
+++ b/pypy/jit/backend/llsupport/test/test_descr.py
@@ -52,7 +52,8 @@
     S = lltype.GcStruct('S', ('x', lltype.Char),
                              ('y', lltype.Ptr(T)),
                              ('z', lltype.Ptr(U)),
-                             ('f', lltype.Float))
+                             ('f', lltype.Float),
+                             ('s', lltype.SingleFloat))
     assert getFieldDescrClass(lltype.Ptr(T)) is GcPtrFieldDescr
     assert getFieldDescrClass(lltype.Ptr(U)) is NonGcPtrFieldDescr
     cls = getFieldDescrClass(lltype.Char)
@@ -61,6 +62,10 @@
     clsf = getFieldDescrClass(lltype.Float)
     assert clsf != cls
     assert clsf == getFieldDescrClass(lltype.Float)
+    clss = getFieldDescrClass(lltype.SingleFloat)
+    assert clss not in (cls, clsf)
+    assert clss == getFieldDescrClass(lltype.SingleFloat)
+    assert clss == getFieldDescrClass(rffi.UINT)    # for now
     #
     c0 = GcCache(False)
     c1 = GcCache(True)
@@ -72,14 +77,17 @@
         descr_y = get_field_descr(c2, S, 'y')
         descr_z = get_field_descr(c2, S, 'z')
         descr_f = get_field_descr(c2, S, 'f')
+        descr_s = get_field_descr(c2, S, 's')
         assert descr_x.__class__ is cls
         assert descr_y.__class__ is GcPtrFieldDescr
         assert descr_z.__class__ is NonGcPtrFieldDescr
         assert descr_f.__class__ is clsf
+        assert descr_s.__class__ is clss
         assert descr_x.name == 'S.x'
         assert descr_y.name == 'S.y'
         assert descr_z.name == 'S.z'
         assert descr_f.name == 'S.f'
+        assert descr_s.name == 'S.s'
         if not tsc:
             assert descr_x.offset < descr_y.offset < descr_z.offset
             assert descr_x.sort_key() < descr_y.sort_key() < descr_z.sort_key()
@@ -87,23 +95,29 @@
             assert descr_y.get_field_size(False) == rffi.sizeof(lltype.Ptr(T))
             assert descr_z.get_field_size(False) == rffi.sizeof(lltype.Ptr(U))
             assert descr_f.get_field_size(False) == rffi.sizeof(lltype.Float)
+            assert descr_s.get_field_size(False) == rffi.sizeof(
+                                                            lltype.SingleFloat)
         else:
             assert isinstance(descr_x.offset, Symbolic)
             assert isinstance(descr_y.offset, Symbolic)
             assert isinstance(descr_z.offset, Symbolic)
             assert isinstance(descr_f.offset, Symbolic)
+            assert isinstance(descr_s.offset, Symbolic)
             assert isinstance(descr_x.get_field_size(True), Symbolic)
             assert isinstance(descr_y.get_field_size(True), Symbolic)
             assert isinstance(descr_z.get_field_size(True), Symbolic)
             assert isinstance(descr_f.get_field_size(True), Symbolic)
+            assert isinstance(descr_s.get_field_size(True), Symbolic)
         assert not descr_x.is_pointer_field()
         assert     descr_y.is_pointer_field()
         assert not descr_z.is_pointer_field()
         assert not descr_f.is_pointer_field()
+        assert not descr_s.is_pointer_field()
         assert not descr_x.is_float_field()
         assert not descr_y.is_float_field()
         assert not descr_z.is_float_field()
         assert     descr_f.is_float_field()
+        assert not descr_s.is_float_field()
 
 
 def test_get_field_descr_sign():
@@ -135,6 +149,7 @@
     A2 = lltype.GcArray(lltype.Ptr(T))
     A3 = lltype.GcArray(lltype.Ptr(U))
     A4 = lltype.GcArray(lltype.Float)
+    A5 = lltype.GcArray(lltype.SingleFloat)
     assert getArrayDescrClass(A2) is GcPtrArrayDescr
     assert getArrayDescrClass(A3) is NonGcPtrArrayDescr
     cls = getArrayDescrClass(A1)
@@ -143,25 +158,32 @@
     clsf = getArrayDescrClass(A4)
     assert clsf != cls
     assert clsf == getArrayDescrClass(lltype.GcArray(lltype.Float))
+    clss = getArrayDescrClass(A5)
+    assert clss not in (clsf, cls)
+    assert clss == getArrayDescrClass(lltype.GcArray(rffi.UINT))
     #
     c0 = GcCache(False)
     descr1 = get_array_descr(c0, A1)
     descr2 = get_array_descr(c0, A2)
     descr3 = get_array_descr(c0, A3)
     descr4 = get_array_descr(c0, A4)
+    descr5 = get_array_descr(c0, A5)
     assert descr1.__class__ is cls
     assert descr2.__class__ is GcPtrArrayDescr
     assert descr3.__class__ is NonGcPtrArrayDescr
     assert descr4.__class__ is clsf
+    assert descr5.__class__ is clss
     assert descr1 == get_array_descr(c0, lltype.GcArray(lltype.Char))
     assert not descr1.is_array_of_pointers()
     assert     descr2.is_array_of_pointers()
     assert not descr3.is_array_of_pointers()
     assert not descr4.is_array_of_pointers()
+    assert not descr5.is_array_of_pointers()
     assert not descr1.is_array_of_floats()
     assert not descr2.is_array_of_floats()
     assert not descr3.is_array_of_floats()
     assert     descr4.is_array_of_floats()
+    assert not descr5.is_array_of_floats()
     #
     def get_alignment(code):
         # Retrieve default alignment for the compiler/platform
@@ -170,27 +192,33 @@
     assert descr2.get_base_size(False) == get_alignment('p')
     assert descr3.get_base_size(False) == get_alignment('p')
     assert descr4.get_base_size(False) == get_alignment('d')
+    assert descr5.get_base_size(False) == get_alignment('f')
     assert descr1.get_ofs_length(False) == 0
     assert descr2.get_ofs_length(False) == 0
     assert descr3.get_ofs_length(False) == 0
     assert descr4.get_ofs_length(False) == 0
+    assert descr5.get_ofs_length(False) == 0
     assert descr1.get_item_size(False) == rffi.sizeof(lltype.Char)
     assert descr2.get_item_size(False) == rffi.sizeof(lltype.Ptr(T))
     assert descr3.get_item_size(False) == rffi.sizeof(lltype.Ptr(U))
     assert descr4.get_item_size(False) == rffi.sizeof(lltype.Float)
+    assert descr5.get_item_size(False) == rffi.sizeof(lltype.SingleFloat)
     #
     assert isinstance(descr1.get_base_size(True), Symbolic)
     assert isinstance(descr2.get_base_size(True), Symbolic)
     assert isinstance(descr3.get_base_size(True), Symbolic)
     assert isinstance(descr4.get_base_size(True), Symbolic)
+    assert isinstance(descr5.get_base_size(True), Symbolic)
     assert isinstance(descr1.get_ofs_length(True), Symbolic)
     assert isinstance(descr2.get_ofs_length(True), Symbolic)
     assert isinstance(descr3.get_ofs_length(True), Symbolic)
     assert isinstance(descr4.get_ofs_length(True), Symbolic)
+    assert isinstance(descr5.get_ofs_length(True), Symbolic)
     assert isinstance(descr1.get_item_size(True), Symbolic)
     assert isinstance(descr2.get_item_size(True), Symbolic)
     assert isinstance(descr3.get_item_size(True), Symbolic)
     assert isinstance(descr4.get_item_size(True), Symbolic)
+    assert isinstance(descr5.get_item_size(True), Symbolic)
     CA = rffi.CArray(lltype.Signed)
     descr = get_array_descr(c0, CA)
     assert not descr.is_array_of_floats()
@@ -210,6 +238,11 @@
     assert descr.is_array_of_floats()
     assert descr.get_base_size(False) == 0
     assert descr.get_ofs_length(False) == -1
+    CA = rffi.CArray(rffi.FLOAT)
+    descr = get_array_descr(c0, CA)
+    assert not descr.is_array_of_floats()
+    assert descr.get_base_size(False) == 0
+    assert descr.get_ofs_length(False) == -1
 
 
 def test_get_array_descr_sign():
@@ -257,6 +290,11 @@
     assert descr4.get_result_size(False) == rffi.sizeof(lltype.Float)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c0, [lltype.SingleFloat], lltype.SingleFloat)
+    assert descr5.get_result_size(False) == rffi.sizeof(lltype.SingleFloat)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_get_call_descr_not_translated_longlong():
     if sys.maxint > 2147483647:
@@ -286,6 +324,11 @@
     assert isinstance(descr4.get_result_size(True), Symbolic)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c1, [lltype.SingleFloat], lltype.SingleFloat)
+    assert isinstance(descr5.get_result_size(True), Symbolic)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_call_descr_extra_info():
     c1 = GcCache(True)
@@ -345,8 +388,11 @@
     #
     descr4f = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Float)
     assert 'FloatCallDescr' in descr4f.repr_of_descr()
+    #
+    descr5f = get_call_descr(c0, [lltype.Char], lltype.SingleFloat)
+    assert 'SingleFloatCallDescr' in descr5f.repr_of_descr()
 
-def test_call_stubs():
+def test_call_stubs_1():
     c0 = GcCache(False)
     ARGS = [lltype.Char, lltype.Signed]
     RES = lltype.Char
@@ -360,6 +406,8 @@
     res = call_stub(rffi.cast(lltype.Signed, fnptr), [1, 2], None, None)
     assert res == ord('c')
 
+def test_call_stubs_2():
+    c0 = GcCache(False)
     ARRAY = lltype.GcArray(lltype.Signed)
     ARGS = [lltype.Float, lltype.Ptr(ARRAY)]
     RES = lltype.Float
@@ -375,3 +423,27 @@
     res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
                            [], [opaquea], [longlong.getfloatstorage(3.5)])
     assert longlong.getrealfloat(res) == 4.5
+
+def test_call_stubs_single_float():
+    from pypy.rlib.longlong2float import uint2singlefloat, singlefloat2uint
+    from pypy.rlib.rarithmetic import r_singlefloat, intmask
+    #
+    c0 = GcCache(False)
+    ARGS = [lltype.SingleFloat, lltype.SingleFloat, lltype.SingleFloat]
+    RES = lltype.SingleFloat
+
+    def f(a, b, c):
+        a = float(a)
+        b = float(b)
+        c = float(c)
+        x = a - (b / c)
+        return r_singlefloat(x)
+
+    fnptr = llhelper(lltype.Ptr(lltype.FuncType(ARGS, RES)), f)
+    descr2 = get_call_descr(c0, ARGS, RES)
+    a = intmask(singlefloat2uint(r_singlefloat(-10.0)))
+    b = intmask(singlefloat2uint(r_singlefloat(3.0)))
+    c = intmask(singlefloat2uint(r_singlefloat(2.0)))
+    res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
+                           [a, b, c], [], [])
+    assert float(uint2singlefloat(rffi.r_uint(res))) == -11.5
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -1,24 +1,52 @@
 from pypy.rlib.libffi import types
-from pypy.jit.backend.llsupport.ffisupport import get_call_descr_dynamic, \
-    VoidCallDescr, DynamicIntCallDescr
-    
+from pypy.jit.codewriter.longlong import is_64_bit
+from pypy.jit.backend.llsupport.ffisupport import *
+
+
+class FakeCPU:
+    def __init__(self, supports_floats=False, supports_longlong=False,
+                 supports_singlefloats=False):
+        self.supports_floats = supports_floats
+        self.supports_longlong = supports_longlong
+        self.supports_singlefloats = supports_singlefloats
+
+
 def test_call_descr_dynamic():
+    args = [types.sint, types.pointer]
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint)
+    assert isinstance(descr, DynamicIntCallDescr)
+    assert descr.arg_classes == 'ii'
 
     args = [types.sint, types.double, types.pointer]
-    descr = get_call_descr_dynamic(args, types.void)
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.void)
+    assert descr is None    # missing floats
+    descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
+                                   args, types.void)
     assert isinstance(descr, VoidCallDescr)
     assert descr.arg_classes == 'ifi'
 
-    descr = get_call_descr_dynamic([], types.sint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == True
 
-    descr = get_call_descr_dynamic([], types.uint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == False
 
-    descr = get_call_descr_dynamic([], types.float)
-    assert descr is None # single floats are not supported so far
-    
+    if not is_64_bit:
+        descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong)
+        assert descr is None   # missing longlongs
+        descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
+                                       [], types.slonglong)
+        assert isinstance(descr, LongLongCallDescr)
+    else:
+        assert types.slonglong is types.slong
+
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.float)
+    assert descr is None   # missing singlefloats
+    descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
+                                   [], types.float)
+    SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+    assert isinstance(descr, SingleFloatCallDescr)
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -8,12 +8,13 @@
     # ^^^ This is only useful on 32-bit platforms.  If True,
     # longlongs are supported by the JIT, but stored as doubles.
     # Boxes and Consts are BoxFloats and ConstFloats.
+    supports_singlefloats = False
 
     done_with_this_frame_void_v = -1
     done_with_this_frame_int_v = -1
     done_with_this_frame_ref_v = -1
     done_with_this_frame_float_v = -1
-    exit_frame_with_exception_v = -1
+    propagate_exception_v = -1
     total_compiled_loops = 0
     total_compiled_bridges = 0
     total_freed_loops = 0
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -290,3 +290,58 @@
                 assert abs(x - expected_result) < 0.0001
             finally:
                 del self.cpu.done_with_this_frame_float_v
+
+    def test_call_with_singlefloats(self):
+        cpu = self.cpu
+        if not cpu.supports_floats or not cpu.supports_singlefloats:
+            py.test.skip('requires floats and singlefloats')
+
+        import random
+        from pypy.rlib.libffi import types
+        from pypy.rlib.rarithmetic import r_singlefloat
+
+        def func(*args):
+            res = 0.0
+            for i, x in enumerate(args):
+                res += (i + 1.1) * float(x)
+            return res
+
+        F = lltype.Float
+        S = lltype.SingleFloat
+        I = lltype.Signed
+        floats = [random.random() - 0.5 for i in range(8)]
+        singlefloats = [r_singlefloat(random.random() - 0.5) for i in range(8)]
+        ints = [random.randrange(-99, 99) for i in range(8)]
+        for repeat in range(100):
+            args = []
+            argvalues = []
+            argslist = []
+            local_floats = list(floats)
+            local_singlefloats = list(singlefloats)
+            local_ints = list(ints)
+            for i in range(8):
+                case = random.randrange(0, 3)
+                if case == 0:
+                    args.append(F)
+                    arg = local_floats.pop()
+                    argslist.append(boxfloat(arg))
+                elif case == 1:
+                    args.append(S)
+                    arg = local_singlefloats.pop()
+                    argslist.append(BoxInt(longlong.singlefloat2int(arg)))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    argslist.append(BoxInt(arg))
+                argvalues.append(arg)
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            expected = func(*argvalues)
+            assert abs(res.getfloat() - expected) < 0.0001
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -2734,6 +2734,65 @@
                                      'float', descr=calldescr)
         assert res.getfloatstorage() == expected
 
+    def test_singlefloat_result_of_call_direct(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("singlefloat test")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        x = self.cpu.bh_call_i(self.get_funcbox(self.cpu, f).value,
+                               calldescr, [ivalue], None, None)
+        assert x == iexpected
+
+    def test_singlefloat_result_of_call_compiled(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("test of singlefloat result")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        funcbox = self.get_funcbox(self.cpu, f)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        res = self.execute_operation(rop.CALL, [funcbox, BoxInt(ivalue)],
+                                     'int', descr=calldescr)
+        assert res.value == iexpected
+
     def test_free_loop_and_bridges(self):
         from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
         if not isinstance(self.cpu, AbstractLLCPU):
@@ -2748,6 +2807,26 @@
         assert mem2 < mem1
         assert mem2 == mem0
 
+    def test_memoryerror(self):
+        excdescr = BasicFailDescr(666)
+        self.cpu.propagate_exception_v = self.cpu.get_fail_descr_number(
+            excdescr)
+        self.cpu.setup_once()    # xxx redo it, because we added
+                                 # propagate_exception_v
+        i0 = BoxInt()
+        p0 = BoxPtr()
+        operations = [
+            ResOperation(rop.NEWUNICODE, [i0], p0),
+            ResOperation(rop.FINISH, [p0], None, descr=BasicFailDescr(1))
+            ]
+        inputargs = [i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+        # overflowing value:
+        self.cpu.set_future_value_int(0, sys.maxint // 4 + 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == excdescr.identifier
+
 
 class OOtypeBackendTest(BaseBackendTest):
 
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -56,7 +56,9 @@
         self.exc = exc
         self.is_guard_not_invalidated = is_guard_not_invalidated
 
-DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed))
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+                              ('bridge', lltype.Signed), # 0 or 1
+                              ('number', lltype.Signed))
 
 class Assembler386(object):
     _regalloc = None
@@ -89,6 +91,7 @@
         self._current_depths_cache = (0, 0)
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
+        self.propagate_exception_path = 0
         self.teardown()
 
     def leave_jitted_hook(self):
@@ -125,6 +128,7 @@
             self._build_failure_recovery(True, withfloats=True)
             support.ensure_sse2_floats()
             self._build_float_constants()
+        self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
@@ -138,6 +142,9 @@
         assert self.memcpy_addr != 0, "setup_once() not called?"
         self.current_clt = looptoken.compiled_loop_token
         self.pending_guard_tokens = []
+        if WORD == 8:
+            self.pending_memoryerror_trampoline_from = []
+            self.error_trampoline_64 = 0
         self.mc = codebuf.MachineCodeBlockWrapper()
         #assert self.datablockwrapper is None --- but obscure case
         # possible, e.g. getting MemoryError and continuing
@@ -147,6 +154,8 @@
 
     def teardown(self):
         self.pending_guard_tokens = None
+        if WORD == 8:
+            self.pending_memoryerror_trampoline_from = None
         self.mc = None
         self.looppos = -1
         self.currently_compiling_loop = None
@@ -155,9 +164,12 @@
     def finish_once(self):
         if self._debug:
             debug_start('jit-backend-counts')
-            for i in range(len(self.loop_run_counters)):
-                struct = self.loop_run_counters[i]
-                debug_print(str(i) + ':' + str(struct.i))
+            for struct in self.loop_run_counters:
+                if struct.bridge:
+                    prefix = 'bridge '
+                else:
+                    prefix = 'loop '
+                debug_print(prefix + str(struct.number) + ':' + str(struct.i))
             debug_stop('jit-backend-counts')
 
     def _build_float_constants(self):
@@ -232,15 +244,47 @@
         if self.cpu.supports_floats:          # restore the XMM registers
             for i in range(self.cpu.NUM_REGS):# from where they were saved
                 mc.MOVSD_xs(i, (WORD*2)+8*i)
+        #
+        # Note: we check this after the code above, just because the code
+        # above is more than 127 bytes on 64-bits...
+        mc.TEST_rr(eax.value, eax.value)
+        mc.J_il8(rx86.Conditions['Z'], 0) # patched later
+        jz_location = mc.get_relative_pos()
+        #
         nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
         mc.MOV(edx, heap(nursery_free_adr))   # load this in EDX
         mc.RET()
+        #
+        # If the slowpath malloc failed, we raise a MemoryError that
+        # always interrupts the current loop, as a "good enough"
+        # approximation.  Also note that we didn't RET from this helper;
+        # but the code we jump to will actually restore the stack
+        # position based on EBP, which will get us out of here for free.
+        offset = mc.get_relative_pos() - jz_location
+        assert 0 < offset <= 127
+        mc.overwrite(jz_location-1, chr(offset))
+        mc.JMP(imm(self.propagate_exception_path))
+        #
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.malloc_slowpath2 = rawstart
 
+    def _build_propagate_exception_path(self):
+        if self.cpu.propagate_exception_v < 0:
+            return      # not supported (for tests, or non-translated)
+        #
+        self.mc = codebuf.MachineCodeBlockWrapper()
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
+        self.mc.CALL(imm(addr))
+        self.mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
+        self._call_footer()
+        rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
+        self.propagate_exception_path = rawstart
+        self.mc = None
+
     def _build_stack_check_slowpath(self):
         _, _, slowpathaddr = self.cpu.insert_stack_check()
-        if slowpathaddr == 0 or self.cpu.exit_frame_with_exception_v < 0:
+        if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
             return      # no stack check (for tests, or non-translated)
         #
         # make a "function" that is called immediately at the start of
@@ -296,19 +340,11 @@
         offset = mc.get_relative_pos() - jnz_location
         assert 0 < offset <= 127
         mc.overwrite(jnz_location-1, chr(offset))
-        # clear the exception from the global position
-        mc.MOV(eax, heap(self.cpu.pos_exc_value()))
-        mc.MOV(heap(self.cpu.pos_exception()), imm0)
-        mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
-        # save the current exception instance into fail_boxes_ptr[0]
-        adr = self.fail_boxes_ptr.get_addr_for_num(0)
-        mc.MOV(heap(adr), eax)
-        # call the helper function to set the GC flag on the fail_boxes_ptr
-        # array (note that there is no exception any more here)
-        addr = self.cpu.get_on_leave_jitted_int(save_exception=False)
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
         mc.CALL(imm(addr))
         #
-        mc.MOV_ri(eax.value, self.cpu.exit_frame_with_exception_v)
+        mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
         #
         # footer -- note the ADD, which skips the return address of this
         # function, and will instead return to the caller's caller.  Note
@@ -404,7 +440,7 @@
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
         if log:
-            self._register_counter()
+            self._register_counter(False, looptoken.number)
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
@@ -473,7 +509,7 @@
 
         self.setup(original_loop_token)
         if log:
-            self._register_counter()
+            self._register_counter(True, descr_number)
             operations = self._inject_debugging_code(faildescr, operations)
 
         arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
@@ -520,6 +556,8 @@
         # at the end of self.mc.
         for tok in self.pending_guard_tokens:
             tok.pos_recovery_stub = self.generate_quick_failure(tok)
+        if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
+            self.error_trampoline_64 = self.generate_propagate_error_64()
 
     def patch_pending_failure_recoveries(self, rawstart):
         # after we wrote the assembler to raw memory, set up
@@ -556,6 +594,12 @@
                 # less, we would run into the issue that overwriting the
                 # 5 bytes here might get a few nonsense bytes at the
                 # return address of the following CALL.
+        if WORD == 8:
+            for pos_after_jz in self.pending_memoryerror_trampoline_from:
+                assert self.error_trampoline_64 != 0     # only if non-empty
+                mc = codebuf.MachineCodeBlockWrapper()
+                mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
+                mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
 
     def get_asmmemmgr_blocks(self, looptoken):
         clt = looptoken.compiled_loop_token
@@ -570,7 +614,7 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _register_counter(self):
+    def _register_counter(self, bridge, number):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
             # forever, just because we want to report them at the end
@@ -578,6 +622,8 @@
             struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
                                    track_allocation=False)
             struct.i = 0
+            struct.bridge = int(bridge)
+            struct.number = number
             self.loop_run_counters.append(struct)
 
     def _find_failure_recovery_bytecode(self, faildescr):
@@ -1068,9 +1114,10 @@
                     self.implement_guard(guard_token, checkfalsecond)
         return genop_cmp_guard_float
 
-    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax):
+    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
+                   argtypes=None):
         if IS_X86_64:
-            return self._emit_call_64(force_index, x, arglocs, start)
+            return self._emit_call_64(force_index, x, arglocs, start, argtypes)
 
         p = 0
         n = len(arglocs)
@@ -1098,12 +1145,13 @@
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
 
-    def _emit_call_64(self, force_index, x, arglocs, start):
+    def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
         src_locs = []
         dst_locs = []
         xmm_src_locs = []
         xmm_dst_locs = []
         pass_on_stack = []
+        singlefloats = None
 
         # In reverse order for use with pop()
         unused_gpr = [r9, r8, ecx, edx, esi, edi]
@@ -1123,6 +1171,11 @@
                     xmm_dst_locs.append(unused_xmm.pop())
                 else:
                     pass_on_stack.append(loc)
+            elif (argtypes is not None and argtypes[i-start] == 'S' and
+                  len(unused_xmm) > 0):
+                # Singlefloat argument
+                if singlefloats is None: singlefloats = []
+                singlefloats.append((loc, unused_xmm.pop()))
             else:
                 if len(unused_gpr) > 0:
                     src_locs.append(loc)
@@ -1150,9 +1203,15 @@
                 else:
                     self.mc.MOV_sr(i*WORD, loc.value)
 
-        # Handle register arguments
+        # Handle register arguments: first remap the xmm arguments
+        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
+                           X86_64_XMM_SCRATCH_REG)
+        # Load the singlefloat arguments from main regs or stack to xmm regs
+        if singlefloats is not None:
+            for src, dst in singlefloats:
+                self.mc.MOVD(dst, src)
+        # Finally remap the arguments in the main regs
         remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
-        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
 
         self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
@@ -1267,6 +1326,20 @@
     def genop_cast_int_to_float(self, op, arglocs, resloc):
         self.mc.CVTSI2SD(resloc, arglocs[0])
 
+    def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
+        loc0, loctmp = arglocs
+        self.mc.CVTSD2SS(loctmp, loc0)
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loctmp, RegLoc)
+        self.mc.MOVD_rx(resloc.value, loctmp.value)
+
+    def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
+        loc0, = arglocs
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loc0, RegLoc)
+        self.mc.MOVD_xr(resloc.value, loc0.value)
+        self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
+
     def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
         guard_opnum = guard_op.getopnum()
         self.mc.CMP(arglocs[0], imm0)
@@ -1388,7 +1461,7 @@
         assert isinstance(loc_vtable, ImmedLoc)
         arglocs = arglocs[:-1]
         self.call(self.malloc_func_addr, arglocs, eax)
-        # xxx ignore NULL returns for now
+        self.propagate_memoryerror_if_eax_is_null()
         self.set_vtable(eax, loc_vtable)
 
     def set_vtable(self, loc, loc_vtable):
@@ -1407,18 +1480,35 @@
     def genop_new(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_new_array(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_array_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_newstr(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_str_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
 
     def genop_newunicode(self, op, arglocs, result_loc):
         assert result_loc is eax
         self.call(self.malloc_unicode_func_addr, arglocs, eax)
+        self.propagate_memoryerror_if_eax_is_null()
+
+    def propagate_memoryerror_if_eax_is_null(self):
+        # if self.propagate_exception_path == 0 (tests), this may jump to 0
+        # and segfaults.  too bad.  the alternative is to continue anyway
+        # with eax==0, but that will segfault too.
+        self.mc.TEST_rr(eax.value, eax.value)
+        if WORD == 4:
+            self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
+            self.mc.add_pending_relocation()
+        elif WORD == 8:
+            self.mc.J_il(rx86.Conditions['Z'], 0)
+            pos = self.mc.get_relative_pos()
+            self.pending_memoryerror_trampoline_from.append(pos)
 
     # ----------
 
@@ -1690,6 +1780,12 @@
         return GuardToken(faildescr, failargs, fail_locs, exc,
                           is_guard_not_invalidated)
 
+    def generate_propagate_error_64(self):
+        assert WORD == 8
+        startpos = self.mc.get_relative_pos()
+        self.mc.JMP(imm(self.propagate_exception_path))
+        return startpos
+
     def generate_quick_failure(self, guardtok):
         """Generate the initial code for handling a failure.  We try to
         keep it as compact as possible.
@@ -2025,7 +2121,8 @@
         else:
             tmp = eax
 
-        self._emit_call(force_index, x, arglocs, 3, tmp=tmp)
+        self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
+                        argtypes=op.getdescr().get_arg_types())
 
         if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
             # a float or a long long return
@@ -2037,7 +2134,19 @@
                 #     and this way is simpler also because the result loc
                 #     can just be always a stack location
             else:
-                self.mc.FSTP_b(resloc.value)   # float return
+                self.mc.FSTPL_b(resloc.value)   # float return
+        elif op.getdescr().get_return_type() == 'S':
+            # singlefloat return
+            assert resloc is eax
+            if IS_X86_32:
+                # must convert ST(0) to a 32-bit singlefloat and load it into EAX
+                # mess mess mess
+                self.mc.SUB_ri(esp.value, 4)
+                self.mc.FSTPS_s(0)
+                self.mc.POP_r(eax.value)
+            elif IS_X86_64:
+                # must copy from the lower 32 bits of XMM0 into eax
+                self.mc.MOVD_rx(eax.value, xmm0.value)
         elif size == WORD:
             assert resloc is eax or resloc is xmm0    # a full word
         elif size == 0:
@@ -2195,7 +2304,7 @@
         self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
                         tmp=ecx)
         if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
-            self.mc.FSTP_b(result_loc.value)
+            self.mc.FSTPL_b(result_loc.value)
         #else: result_loc is already either eax or None, checked below
         self.mc.JMP_l8(0) # jump to done, patched later
         jmp_location = self.mc.get_relative_pos()
diff --git a/pypy/jit/backend/x86/codebuf.py b/pypy/jit/backend/x86/codebuf.py
--- a/pypy/jit/backend/x86/codebuf.py
+++ b/pypy/jit/backend/x86/codebuf.py
@@ -25,8 +25,11 @@
         self.init_block_builder()
         # a list of relative positions; for each position p, the bytes
         # at [p-4:p] encode an absolute address that will need to be
-        # made relative.
-        self.relocations = []
+        # made relative.  Only works on 32-bit!
+        if WORD == 4:
+            self.relocations = []
+        else:
+            self.relocations = None
         #
         # ResOperation --> offset in the assembly.
         # ops_offset[None] represents the beginning of the code after the last op
@@ -42,9 +45,10 @@
 
     def copy_to_raw_memory(self, addr):
         self._copy_to_raw_memory(addr)
-        for reloc in self.relocations:
-            p = addr + reloc
-            adr = rffi.cast(rffi.LONGP, p - WORD)
-            adr[0] = intmask(adr[0] - p)
+        if self.relocations is not None:
+            for reloc in self.relocations:
+                p = addr + reloc
+                adr = rffi.cast(rffi.LONGP, p - WORD)
+                adr[0] = intmask(adr[0] - p)
         valgrind.discard_translations(addr, self.get_relative_pos())
         self._dump(addr, "jit-backend-dump", backend_name)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -705,6 +705,17 @@
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.getarg(0))
 
+    def consider_cast_float_to_singlefloat(self, op):
+        loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
+        loc1 = self.rm.force_allocate_reg(op.result)
+        self.xrm.possibly_free_var(op.getarg(0))
+        tmpxvar = TempBox()
+        loctmp = self.xrm.force_allocate_reg(tmpxvar)   # may be equal to loc0
+        self.xrm.possibly_free_var(tmpxvar)
+        self.Perform(op, [loc0, loctmp], loc1)
+
+    consider_cast_singlefloat_to_float = consider_cast_int_to_float
+
     def _consider_llong_binop_xx(self, op):
         # must force both arguments into xmm registers, because we don't
         # know if they will be suitably aligned.  Exception: if the second
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -521,6 +521,8 @@
     UCOMISD = _binaryop('UCOMISD')
     CVTSI2SD = _binaryop('CVTSI2SD')
     CVTTSD2SI = _binaryop('CVTTSD2SI')
+    CVTSD2SS = _binaryop('CVTSD2SS')
+    CVTSS2SD = _binaryop('CVTSS2SD')
     
     SQRTSD = _binaryop('SQRTSD')
 
@@ -534,6 +536,8 @@
     PXOR  = _binaryop('PXOR')
     PCMPEQD = _binaryop('PCMPEQD')
 
+    MOVD = _binaryop('MOVD')
+
     CALL = _relative_unaryop('CALL')
     JMP = _relative_unaryop('JMP')
 
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -19,6 +19,7 @@
 class AbstractX86CPU(AbstractLLCPU):
     debug = True
     supports_floats = True
+    supports_singlefloats = True
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -573,7 +573,8 @@
     BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_, immediate(1))
 
     # x87 instructions
-    FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
+    FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??)
+    FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat
 
     # ------------------------------ Random mess -----------------------
     RDTSC = insn('\x0F\x31')
@@ -590,8 +591,18 @@
     CVTTSD2SI_rx = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), register(2), '\xC0')
     CVTTSD2SI_rb = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), stack_bp(2))
 
-    MOVD_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
-    MOVD_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xx = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xb = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+    CVTSS2SD_xx = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSS2SD_xb = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+
+    MOVD_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0')
+    MOVD_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    MOVD_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2))
 
     PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
 
diff --git a/pypy/jit/backend/x86/test/test_regloc.py b/pypy/jit/backend/x86/test/test_regloc.py
--- a/pypy/jit/backend/x86/test/test_regloc.py
+++ b/pypy/jit/backend/x86/test/test_regloc.py
@@ -62,7 +62,7 @@
             assert mc.relocations == [5]
             expected = "\xE8" + struct.pack('<i', target - (rawstart + 5))
         elif IS_X86_64:
-            assert mc.relocations == []
+            assert mc.relocations is None
             if 0 <= target <= 0xffffffff:
                 assert length == 9
                 expected = (
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -463,7 +463,7 @@
             self.cpu.finish_once()
         finally:
             debug._log = None
-        assert ('jit-backend-counts', [('debug_print', '0:10')]) in dlog
+        assert ('jit-backend-counts', [('debug_print', 'loop -1:10')]) in dlog
 
     def test_debugger_checksum(self):
         loop = """
diff --git a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -36,6 +36,14 @@
 def hexdump(s):
     return ' '.join(["%02X" % ord(c) for c in s])
 
+def reduce_to_32bit(s):
+    if s[:2] != '%r':
+        return s
+    if s[2:].isdigit():
+        return s + 'd'
+    else:
+        return '%e' + s[2:]
+
 # ____________________________________________________________
 
 COUNT1 = 15
@@ -180,12 +188,14 @@
     ##        for m, extra in args:
     ##            if m in (i386.MODRM, i386.MODRM8) or all:
     ##                suffix = suffixes[sizes[m]] + suffix
-            if argmodes and not self.is_xmm_insn:
+            if (argmodes and not self.is_xmm_insn
+                         and not instrname.startswith('FSTP')):
                 suffix = suffixes[self.WORD]
             # Special case: On 64-bit CPUs, rx86 assumes 64-bit integer
             # operands when converting to/from floating point, so we need to
             # indicate that with a suffix
-            if (self.WORD == 8) and instrname.startswith('CVT'):
+            if (self.WORD == 8) and (instrname.startswith('CVT') and
+                                     'SI' in instrname):
                 suffix = suffixes[self.WORD]
 
             if instr_suffix is not None:
@@ -218,10 +228,10 @@
                 and ops[1].startswith('%r')):
                 # movq $xxx, %rax => movl $xxx, %eax
                 suffix = 'l'
-                if ops[1][2:].isdigit():
-                    ops[1] += 'd'
-                else:
-                    ops[1] = '%e' + ops[1][2:]
+                ops[1] = reduce_to_32bit(ops[1])
+            if instrname.lower() == 'movd':
+                ops[0] = reduce_to_32bit(ops[0])
+                ops[1] = reduce_to_32bit(ops[1])
             #
             op = '\t%s%s %s%s' % (instrname.lower(), suffix,
                                   ', '.join(ops), following)
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,6 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
+            if TYPE is lltype.SingleFloat:
+                value = longlong.singlefloat2int(value)
             if not isinstance(value, (llmemory.AddressAsInt,
                                       ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -9,7 +9,7 @@
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant, c_last_exception
 from pypy.rlib import objectmodel
 from pypy.rlib.jit import _we_are_jitted
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass, rffi
 from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
 from pypy.translator.simplify import get_funcobj
 from pypy.translator.unsimplify import varoftype
@@ -198,7 +198,6 @@
             self.vable_array_vars[op.result]= self.vable_array_vars[op.args[0]]
 
     rewrite_op_cast_pointer = rewrite_op_same_as
-    rewrite_op_cast_opaque_ptr = rewrite_op_same_as   # rlib.rerased
     def rewrite_op_cast_bool_to_int(self, op): pass
     def rewrite_op_cast_bool_to_uint(self, op): pass
     def rewrite_op_cast_char_to_int(self, op): pass
@@ -785,7 +784,6 @@
             op2.result = op.result
             return op2
         elif toll:
-            from pypy.rpython.lltypesystem import rffi
             size, unsigned = rffi.size_and_sign(op.args[0].concretetype)
             if unsigned:
                 INTERMEDIATE = lltype.Unsigned
@@ -807,20 +805,27 @@
             return self.force_cast_without_longlong(op.args[0], op.result)
 
     def force_cast_without_longlong(self, v_arg, v_result):
-        from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof, FLOAT
-        #
-        if (v_result.concretetype in (FLOAT, lltype.Float) or
-            v_arg.concretetype in (FLOAT, lltype.Float)):
-            assert (v_result.concretetype == lltype.Float and
-                    v_arg.concretetype == lltype.Float), "xxx unsupported cast"
+        if v_result.concretetype == v_arg.concretetype:
             return
-        #
-        size2, unsigned2 = size_and_sign(v_result.concretetype)
-        assert size2 <= sizeof(lltype.Signed)
-        if size2 == sizeof(lltype.Signed):
+        if v_arg.concretetype == rffi.FLOAT:
+            assert v_result.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_singlefloat_to_float', [v_arg],
+                                  v_result)
+        if v_result.concretetype == rffi.FLOAT:
+            assert v_arg.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_float_to_singlefloat', [v_arg],
+                                  v_result)
+        return self.force_cast_without_singlefloat(v_arg, v_result)
+
+    def force_cast_without_singlefloat(self, v_arg, v_result):
+        size2, unsigned2 = rffi.size_and_sign(v_result.concretetype)
+        assert size2 <= rffi.sizeof(lltype.Signed)
+        if size2 == rffi.sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
-        size1, unsigned1 = size_and_sign(v_arg.concretetype)
-        assert size1 <= sizeof(lltype.Signed)
+        size1, unsigned1 = rffi.size_and_sign(v_arg.concretetype)
+        assert size1 <= rffi.sizeof(lltype.Signed)
         #
         def bounds(size, unsigned):
             if unsigned:
@@ -849,7 +854,6 @@
         return result
 
     def rewrite_op_direct_ptradd(self, op):
-        from pypy.rpython.lltypesystem import rffi
         # xxx otherwise, not implemented:
         assert op.args[0].concretetype == rffi.CCHARP
         #
diff --git a/pypy/jit/codewriter/longlong.py b/pypy/jit/codewriter/longlong.py
--- a/pypy/jit/codewriter/longlong.py
+++ b/pypy/jit/codewriter/longlong.py
@@ -7,7 +7,8 @@
 """
 
 import sys
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib import rarithmetic, longlong2float
 
 
 if sys.maxint > 2147483647:
@@ -31,8 +32,6 @@
     # ---------- 32-bit platform ----------
     # the type FloatStorage is r_longlong, and conversion is needed
 
-    from pypy.rlib import rarithmetic, longlong2float
-
     is_64_bit = False
     supports_longlong = True
     r_float_storage = rarithmetic.r_longlong
@@ -41,9 +40,19 @@
     getfloatstorage = longlong2float.float2longlong
     getrealfloat    = longlong2float.longlong2float
     gethash         = lambda xll: rarithmetic.intmask(xll - (xll >> 32))
-    is_longlong     = lambda TYPE: (TYPE == lltype.SignedLongLong or
-                                    TYPE == lltype.UnsignedLongLong)
+    is_longlong     = lambda TYPE: (TYPE is lltype.SignedLongLong or
+                                    TYPE is lltype.UnsignedLongLong)
 
     # -------------------------------------
 
 ZEROF = getfloatstorage(0.0)
+
+# ____________________________________________________________
+
+def int2singlefloat(x):
+    x = rffi.r_uint(x)
+    return longlong2float.uint2singlefloat(x)
+
+def singlefloat2int(x):
+    x = longlong2float.singlefloat2uint(x)
+    return rffi.cast(lltype.Signed, x)
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -12,6 +12,7 @@
         self.unsafe_loopy_graphs = set()
         self.supports_floats = False
         self.supports_longlong = False
+        self.supports_singlefloats = False
 
     def set_supports_floats(self, flag):
         self.supports_floats = flag
@@ -19,6 +20,9 @@
     def set_supports_longlong(self, flag):
         self.supports_longlong = flag
 
+    def set_supports_singlefloats(self, flag):
+        self.supports_singlefloats = flag
+
     def dump_unsafe_loops(self):
         f = udir.join("unsafe-loops.txt").open('w')
         strs = [str(graph) for graph in self.unsafe_loopy_graphs]
@@ -58,8 +62,9 @@
                     func, '_jit_unroll_safe_', False)
 
         unsupported = contains_unsupported_variable_type(graph,
-                                                         self.supports_floats,
-                                                         self.supports_longlong)
+                            self.supports_floats,
+                            self.supports_longlong,
+                            self.supports_singlefloats)
         res = see_function and not unsupported
         if res and contains_loop:
             self.unsafe_loopy_graphs.add(graph)
@@ -80,17 +85,24 @@
         return res
 
 def contains_unsupported_variable_type(graph, supports_floats,
-                                       supports_longlong):
+                                              supports_longlong,
+                                              supports_singlefloats):
     getkind = history.getkind
     try:
         for block in graph.iterblocks():
             for v in block.inputargs:
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
             for op in block.operations:
                 for v in op.args:
-                    getkind(v.concretetype, supports_floats, supports_longlong)
+                    getkind(v.concretetype, supports_floats,
+                                            supports_longlong,
+                                            supports_singlefloats)
                 v = op.result
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
     except NotImplementedError, e:
         log.WARNING('%s, ignoring graph' % (e,))
         log.WARNING('  %s' % (graph,))
diff --git a/pypy/jit/codewriter/test/test_longlong.py b/pypy/jit/codewriter/test/test_longlong.py
--- a/pypy/jit/codewriter/test/test_longlong.py
+++ b/pypy/jit/codewriter/test/test_longlong.py
@@ -230,3 +230,18 @@
             assert list(op1.args[3]) == []
             assert list(op1.args[4]) == vlist
             assert op1.result == v_result
+
+
+##def test_singlefloat_constants():
+##    v_x = varoftype(TYPE)
+##    vlist = [v_x, const(rffi.cast(TYPE, 7))]
+##    v_result = varoftype(TYPE)
+##    op = SpaceOperation('llong_add', vlist, v_result)
+##    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
+##    op1 = tr.rewrite_operation(op)
+##    #
+##    assert op1.opname == 'residual_call_irf_f'
+##    assert list(op1.args[2]) == []
+##    assert list(op1.args[3]) == []
+##    assert list(op1.args[4]) == vlist
+##    assert op1.result == v_result
diff --git a/pypy/jit/codewriter/test/test_policy.py b/pypy/jit/codewriter/test/test_policy.py
--- a/pypy/jit/codewriter/test/test_policy.py
+++ b/pypy/jit/codewriter/test/test_policy.py
@@ -12,24 +12,30 @@
     graph = support.getgraph(f, [5])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert not contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                assert not contains_unsupported_variable_type(graph, sf,
+                                                              sll, ssf)
     #
     graph = support.getgraph(f, [5.5])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res is not sf
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res is not sf
     #
     graph = support.getgraph(f, [r_singlefloat(5.5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (not ssf)
     #
     graph = support.getgraph(f, [r_longlong(5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res == (sys.maxint == 2147483647 and not sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (sys.maxint == 2147483647 and not sll)
 
 
 def test_regular_function():
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -500,6 +500,9 @@
     @arguments("r", returns="i")
     def bhimpl_ptr_nonzero(a):
         return bool(a)
+    @arguments("r", returns="r")
+    def bhimpl_cast_opaque_ptr(a):
+        return a
 
     @arguments("i", returns="i")
     def bhimpl_int_copy(a):
@@ -623,6 +626,19 @@
         x = float(a)
         return longlong.getfloatstorage(x)
 
+    @arguments("f", returns="i")
+    def bhimpl_cast_float_to_singlefloat(a):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        a = longlong.getrealfloat(a)
+        a = r_singlefloat(a)
+        return longlong.singlefloat2int(a)
+
+    @arguments("i", returns="f")
+    def bhimpl_cast_singlefloat_to_float(a):
+        a = longlong.int2singlefloat(a)
+        a = float(a)
+        return longlong.getfloatstorage(a)
+
     # ----------
     # control flow operations
 
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -668,10 +668,9 @@
     def handle_fail(self, metainterp_sd, jitdriver_sd):
         cpu = metainterp_sd.cpu
         exception = cpu.grab_exc_value()
+        assert exception, "PropagateExceptionDescr: no exception??"
         raise metainterp_sd.ExitFrameWithExceptionRef(cpu, exception)
 
-propagate_exception_descr = PropagateExceptionDescr()
-
 def compile_tmp_callback(cpu, jitdriver_sd, greenboxes, redboxes,
                          memory_manager=None):
     """Make a LoopToken that corresponds to assembler code that just
@@ -705,7 +704,7 @@
         finishargs = []
     #
     jd = jitdriver_sd
-    faildescr = propagate_exception_descr
+    faildescr = PropagateExceptionDescr()
     operations = [
         ResOperation(rop.CALL, callargs, result, descr=jd.portal_calldescr),
         ResOperation(rop.GUARD_NO_EXCEPTION, [], None, descr=faildescr),
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -50,7 +50,7 @@
     func = argboxes[0].getint()
     # do the call using the correct function from the cpu
     rettype = descr.get_return_type()
-    if rettype == INT:
+    if rettype == INT or rettype == 'S':       # *S*ingle float
         try:
             result = cpu.bh_call_i(func, descr, args_i, args_r, args_f)
         except Exception, e:
@@ -64,7 +64,7 @@
             metainterp.execute_raised(e)
             result = NULL
         return BoxPtr(result)
-    if rettype == FLOAT or rettype == 'L':
+    if rettype == FLOAT or rettype == 'L':     # *L*ong long
         try:
             result = cpu.bh_call_f(func, descr, args_i, args_r, args_f)
         except Exception, e:
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -20,12 +20,16 @@
 
 FAILARGS_LIMIT = 1000
 
-def getkind(TYPE, supports_floats=True, supports_longlong=True):
+def getkind(TYPE, supports_floats=True,
+                  supports_longlong=True,
+                  supports_singlefloats=True):
     if TYPE is lltype.Void:
         return "void"
     elif isinstance(TYPE, lltype.Primitive):
         if TYPE is lltype.Float and supports_floats:
             return 'float'
+        if TYPE is lltype.SingleFloat and supports_singlefloats:
+            return 'int'     # singlefloats are stored in an int
         if TYPE in (lltype.Float, lltype.SingleFloat):
             raise NotImplementedError("type %s not supported" % TYPE)
         # XXX fix this for oo...
@@ -145,6 +149,7 @@
         """ Implement in call descr.
         Must return INT, REF, FLOAT, or 'v' for void.
         On 32-bit (hack) it can also be 'L' for longlongs.
+        Additionally it can be 'S' for singlefloats.
         """
         raise NotImplementedError
 
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,12 +1,11 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
-from pypy.rlib.debug import debug_start, debug_stop, debug_print
+from pypy.rlib.debug import debug_print
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
-from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
 
 
 class FuncInfo(object):
@@ -20,11 +19,8 @@
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        try:
-            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
-        except UnsupportedKind:
-            # e.g., I or U for long longs
-            self.descr = None
+        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        # ^^^ may be None if unsupported
         self.prepare_op = prepare_op
         self.delayed_ops = []
 
@@ -78,14 +74,6 @@
         else:
             self.logops = None
 
-    def propagate_begin_forward(self):
-        debug_start('jit-log-ffiopt')
-        Optimization.propagate_begin_forward(self)
-
-    def propagate_end_forward(self):
-        debug_stop('jit-log-ffiopt')
-        Optimization.propagate_end_forward(self)
-
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         return OptFfiCall()
         # FIXME: Should any status be saved for next iteration?
@@ -184,7 +172,8 @@
     def do_call(self, op):
         funcval = self._get_funcval(op)
         funcinfo = self.funcinfo
-        if not funcinfo or funcinfo.funcval is not funcval:
+        if (not funcinfo or funcinfo.funcval is not funcval or
+            funcinfo.descr is None):
             return [op] # cannot optimize
         funcsymval = self.getvalue(op.getarg(2))
         arglist = [funcsymval.force_box()]
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -239,13 +239,14 @@
             return
         cf.force_lazy_setfield(self, can_cache)
 
-    def force_lazy_setarrayitem(self, arraydescr, can_cache=True):
+    def force_lazy_setarrayitem(self, arraydescr, indexvalue=None, can_cache=True):
         try:
             submap = self.cached_arrayitems[arraydescr]
         except KeyError:
             return
-        for cf in submap.values():
-            cf.force_lazy_setfield(self, can_cache)
+        for idx, cf in submap.iteritems():
+            if indexvalue is None or indexvalue.intbound.contains(idx):
+                cf.force_lazy_setfield(self, can_cache)
 
     def fixup_guard_situation(self):
         # hackish: reverse the order of the last two operations if it makes
@@ -357,7 +358,7 @@
                 return
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr())
+            self.force_lazy_setarrayitem(op.getdescr(), indexvalue=indexvalue)
         # default case: produce the operation
         arrayvalue.ensure_nonnull()
         self.emit_operation(op)
@@ -381,7 +382,7 @@
             cf.do_setfield(self, op)
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr(), can_cache=False)
+            self.force_lazy_setarrayitem(op.getdescr(), indexvalue=indexvalue, can_cache=False)
             # and then emit the operation
             self.emit_operation(op)
 
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -125,6 +125,17 @@
         r = self.getvalue(op.result)
         r.intbound.intersect(v1.intbound.div_bound(v2.intbound))
 
+    def optimize_INT_MOD(self, op):
+        self.emit_operation(op)
+        v2 = self.getvalue(op.getarg(1))
+        if v2.is_constant():
+            val = v2.box.getint()
+            r = self.getvalue(op.result)
+            if val < 0:
+                val = -val
+            r.intbound.make_gt(IntBound(-val, -val))
+            r.intbound.make_lt(IntBound(val, val))
+
     def optimize_INT_LSHIFT(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -264,6 +264,7 @@
         self.posponedop = None
         self.exception_might_have_happened = False
         self.quasi_immutable_deps = None
+        self.opaque_pointers = {}
         self.newoperations = []
         if loop is not None:
             self.call_pure_results = loop.call_pure_results
@@ -555,6 +556,11 @@
     def optimize_DEBUG_MERGE_POINT(self, op):
         self.emit_operation(op)
 
+    def optimize_CAST_OPAQUE_PTR(self, op):
+        value = self.getvalue(op.getarg(0))
+        self.opaque_pointers[value] = True
+        self.make_equal_to(op.result, value)
+
 dispatch_opt = make_dispatcher_method(Optimizer, 'optimize_',
         default=Optimizer.optimize_default)
 
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -25,6 +25,8 @@
         #     but it's a bit hard to implement robustly if heap.py is also run
         pass
 
+    optimize_CAST_OPAQUE_PTR = optimize_VIRTUAL_REF
+
 
 dispatch_opt = make_dispatcher_method(OptSimplify, 'optimize_',
         default=OptSimplify.emit_operation)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -4621,6 +4621,96 @@
         """
         self.optimize_strunicode_loop(ops, expected)
 
+    def test_intmod_bounds(self):
+        ops = """
+        [i0, i1]
+        i2 = int_mod(i0, 12)
+        i3 = int_gt(i2, 12)
+        guard_false(i3) []
+        i4 = int_lt(i2, -12)
+        guard_false(i4) []
+        i5 = int_mod(i1, -12)
+        i6 = int_lt(i5, -12)
+        guard_false(i6) []
+        i7 = int_gt(i5, 12)
+        guard_false(i7) []
+        jump(i2, i5)
+        """
+        expected = """
+        [i0, i1]
+        i2 = int_mod(i0, 12)
+        i5 = int_mod(i1, -12)
+        jump(i2, i5)
+        """
+        self.optimize_loop(ops, expected)
+
+        # This the sequence of resoperations that is generated for a Python
+        # app-level int % int.  When the modulus is constant and when i0
+        # is known non-negative it should be optimized to a single int_mod.
+        ops = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 42)
+        i2 = int_rshift(i1, 63)
+        i3 = int_and(42, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """
+        expected = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 42)
+        finish(i1)
+        """
+        py.test.skip("in-progress")
+        self.optimize_loop(ops, expected)
+
+        # Also, 'n % power-of-two' can be turned into int_and(),
+        # but that's a bit harder to detect here because it turns into
+        # several operations, and of course it is wrong to just turn
+        # int_mod(i0, 16) into int_and(i0, 15).
+        ops = """
+        [i0]
+        i1 = int_mod(i0, 16)
+        i2 = int_rshift(i1, 63)
+        i3 = int_and(16, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """
+        expected = """
+        [i0]
+        i4 = int_and(i0, 15)
+        finish(i4)
+        """
+        py.test.skip("harder")
+        self.optimize_loop(ops, expected)
+
+    def test_bounded_lazy_setfield(self):
+        ops = """
+        [p0, i0]
+        i1 = int_gt(i0, 2)
+        guard_true(i1) []
+        setarrayitem_gc(p0, 0, 3)
+        setarrayitem_gc(p0, 2, 4)
+        setarrayitem_gc(p0, i0, 15)
+        i2 = getarrayitem_gc(p0, 2)
+        jump(p0, i2)
+        """
+        # Remove the getarrayitem_gc, because we know that p[i0] does not alias
+        # p0[2]
+        expected = """
+        [p0, i0]
+        i1 = int_gt(i0, 2)
+        guard_true(i1) []
+        setarrayitem_gc(p0, i0, 15)
+        setarrayitem_gc(p0, 0, 3)
+        setarrayitem_gc(p0, 2, 4)
+        jump(p0, 4)
+        """
+        self.optimize_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -513,6 +513,9 @@
     def safe_to_move(self, op):
         opnum = op.getopnum()
         descr = op.getdescr()
+        for box in op.getarglist():
+            if self.optimizer.getvalue(box) in self.optimizer.opaque_pointers:
+                return False
         if op.is_always_pure() or op.is_foldable_guard():
             return True
         elif opnum == rop.JUMP:
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -215,6 +215,7 @@
 
     for _opimpl in ['int_is_true', 'int_is_zero', 'int_neg', 'int_invert',
                     'cast_float_to_int', 'cast_int_to_float',
+                    'cast_float_to_singlefloat', 'cast_singlefloat_to_float',
                     'float_neg', 'float_abs',
                     ]:
         exec py.code.Source('''
@@ -232,6 +233,10 @@
         return self.execute(rop.PTR_EQ, box, history.CONST_NULL)
 
     @arguments("box")
+    def opimpl_cast_opaque_ptr(self, box):
+        return self.execute(rop.CAST_OPAQUE_PTR, box)
+
+    @arguments("box")
     def _opimpl_any_return(self, box):
         self.metainterp.finishframe(box)
 
@@ -1227,7 +1232,7 @@
         src_i = src_r = src_f = 0
         i = 1
         for kind in descr.get_arg_types():
-            if kind == history.INT:
+            if kind == history.INT or kind == 'S':        # single float
                 while True:
                     box = argboxes[src_i]
                     src_i += 1
@@ -1378,9 +1383,9 @@
             num = self.cpu.get_fail_descr_number(tokens[0].finishdescr)
             setattr(self.cpu, 'done_with_this_frame_%s_v' % name, num)
         #
-        tokens = self.loop_tokens_exit_frame_with_exception_ref
-        num = self.cpu.get_fail_descr_number(tokens[0].finishdescr)
-        self.cpu.exit_frame_with_exception_v = num
+        exc_descr = compile.PropagateExceptionDescr()
+        num = self.cpu.get_fail_descr_number(exc_descr)
+        self.cpu.propagate_exception_v = num
         #
         self.globaldata = MetaInterpGlobalData(self)
 
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -408,6 +408,8 @@
     'FLOAT_ABS/1',
     'CAST_FLOAT_TO_INT/1',
     'CAST_INT_TO_FLOAT/1',
+    'CAST_FLOAT_TO_SINGLEFLOAT/1',
+    'CAST_SINGLEFLOAT_TO_FLOAT/1',
     #
     'INT_LT/2b',
     'INT_LE/2b',
@@ -435,6 +437,7 @@
     #
     'PTR_EQ/2b',
     'PTR_NE/2b',
+    'CAST_OPAQUE_PTR/1b',
     #
     'ARRAYLEN_GC/1d',
     'STRLEN/1',
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -3,7 +3,7 @@
 from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
 from pypy.rlib.jit import JitDriver, promote, dont_look_inside
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.libffi import ArgChain, longlong2float, float2longlong
+from pypy.rlib.libffi import ArgChain
 from pypy.rlib.libffi import IS_32_BIT
 from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
 from pypy.rpython.lltypesystem import lltype, rffi
@@ -12,10 +12,11 @@
 from pypy.jit.metainterp.test.support import LLJitMixin
 
 class TestFfiCall(LLJitMixin, _TestLibffiCall):
+    supports_all = False     # supports_{floats,longlong,singlefloats}
 
     # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
+    def call(self, funcspec, args, RESULT, is_struct=False, jitif=[]):
         """
         Call the function specified by funcspec in a loop, and let the jit to
         see and optimize it.
@@ -24,14 +25,7 @@
         lib, name, argtypes, restype = funcspec
         method_and_args = []
         for argval in args:
-            if type(argval) is r_singlefloat:
-                method_name = 'arg_singlefloat'
-                argval = float(argval)
-            elif IS_32_BIT and type(argval) in [r_longlong, r_ulonglong]:
-                method_name = 'arg_longlong'
-                argval = rffi.cast(rffi.LONGLONG, argval)
-                argval = longlong2float(argval)
-            elif isinstance(argval, tuple):
+            if isinstance(argval, tuple):
                 method_name, argval = argval
             else:
                 method_name = 'arg'
@@ -39,10 +33,20 @@
         method_and_args = unrolling_iterable(method_and_args)
         #
         reds = ['n', 'res', 'func']
-        if (RESULT in [rffi.FLOAT, rffi.DOUBLE] or
+        if (RESULT is rffi.DOUBLE or
             IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
-            reds = ['n', 'func', 'res'] # floats must be *after* refs
+            reds = ['n', 'func', 'res'] # 'double' floats must be *after* refs
         driver = JitDriver(reds=reds, greens=[])
+        init_result = rffi.cast(RESULT, 0)
+        #
+        def g(func):
+            # a different function, which is marked as "dont_look_inside"
+            # in case it uses an unsupported argument
+            argchain = ArgChain()
+            # this loop is unrolled
+            for method_name, argval in method_and_args:
+                getattr(argchain, method_name)(argval)
+            return func.call(argchain, RESULT, is_struct=is_struct)
         #
         def f(n):
             func = lib.getpointer(name, argtypes, restype)
@@ -50,18 +54,44 @@
             while n < 10:
                 driver.jit_merge_point(n=n, res=res, func=func)
                 promote(func)
-                argchain = ArgChain()
-                # this loop is unrolled
-                for method_name, argval in method_and_args:
-                    getattr(argchain, method_name)(argval)
-                res = func.call(argchain, RESULT, is_struct=is_struct)
+                res = g(func)
                 n += 1
             return res
         #
-        res = self.meta_interp(f, [0], backendopt=True)
+        res = self.meta_interp(f, [0], backendopt=True,
+                               supports_floats       = self.supports_all,
+                               supports_longlong     = self.supports_all,
+                               supports_singlefloats = self.supports_all)
+        d = {'floats': self.supports_all,
+             'longlong': self.supports_all or not IS_32_BIT,
+             'singlefloats': self.supports_all,
+             'byval': False}
+        supported = all(d[check] for check in jitif)
+        if supported:
+            self.check_loops(
+                call_release_gil=1,   # a CALL_RELEASE_GIL, and no other CALLs
+                call=0,
+                call_may_force=0,
+                guard_no_exception=1,
+                guard_not_forced=1,
+                int_add=1,
+                int_lt=1,
+                guard_true=1,
+                jump=1)
+        else:
+            self.check_loops(
+                call_release_gil=0,   # no CALL_RELEASE_GIL
+                int_add=1,
+                int_lt=1,
+                guard_true=1,
+                jump=1)
         return res
 
     def test_byval_result(self):
         _TestLibffiCall.test_byval_result(self)
     test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
     test_byval_result.dont_track_allocations = True
+
+
+class TestFfiCallSupportAll(TestFfiCall):
+    supports_all = True     # supports_{floats,longlong,singlefloats}
diff --git a/pypy/jit/metainterp/test/test_float.py b/pypy/jit/metainterp/test/test_float.py
--- a/pypy/jit/metainterp/test/test_float.py
+++ b/pypy/jit/metainterp/test/test_float.py
@@ -36,6 +36,15 @@
         res = self.interp_operations(f, [x])
         assert res == -x
 
+    def test_singlefloat(self):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        def f(a):
+            a = float(r_singlefloat(a))
+            a *= 4.25
+            return float(r_singlefloat(a))
+        res = self.interp_operations(f, [-2.0])
+        assert res == -8.5
+
 
 class TestOOtype(FloatTests, OOJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_loop.py b/pypy/jit/metainterp/test/test_loop.py
--- a/pypy/jit/metainterp/test/test_loop.py
+++ b/pypy/jit/metainterp/test/test_loop.py
@@ -800,6 +800,69 @@
 
         res = self.meta_interp(f, [200])
 
+    def test_regular_pointers_in_short_preamble(self):
+        # XXX do we really care about this case?  If not, we should
+        # at least detect it and complain during codewriter/jtransform
+        from pypy.rpython.lltypesystem import lltype
+        BASE = lltype.GcStruct('BASE')
+        A = lltype.GcStruct('A', ('parent', BASE), ('val', lltype.Signed))
+        B = lltype.GcStruct('B', ('parent', BASE), ('charval', lltype.Char))
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p'])
+        def f(n, m, j):
+            i = sa = 0
+            pa = lltype.malloc(A)
+            pa.val = 7
+            p = pa.parent
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, m=m, i=i, j=j, sa=sa, p=p)
+                if i < m:
+                    pa = lltype.cast_pointer(lltype.Ptr(A), p)
+                    sa += pa.val
+                elif i == m:
+                    pb = lltype.malloc(B)
+                    pb.charval = 'y'
+                    p = pb.parent
+                else:
+                    pb = lltype.cast_pointer(lltype.Ptr(B), p)
+                    sa += ord(pb.charval)
+                sa += 100
+                assert n>0 and m>0
+                i += j
+            return sa
+        expected = f(20, 10, 1)
+        res = self.meta_interp(f, [20, 10, 1])
+        assert res == expected
+
+    def test_unerased_pointers_in_short_preamble(self):
+        from pypy.rlib.rerased import new_erasing_pair
+        from pypy.rpython.lltypesystem import lltype
+        class A(object):
+            def __init__(self, val):
+                self.val = val
+        erase_A, unerase_A = new_erasing_pair('A')
+        erase_TP, unerase_TP = new_erasing_pair('TP')
+        TP = lltype.GcArray(lltype.Signed)
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p'])
+        def f(n, m, j):
+            i = sa = 0
+            p = erase_A(A(7))
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, m=m, i=i, j=j, sa=sa, p=p)
+                if i < m:
+                    sa += unerase_A(p).val
+                elif i == m:
+                    a = lltype.malloc(TP, 5)
+                    a[0] = 42
+                    p = erase_TP(a)
+                else:
+                    sa += unerase_TP(p)[0]
+                sa += A(i).val
+                assert n>0 and m>0
+                i += j
+            return sa
+        res = self.meta_interp(f, [20, 10, 1])
+        assert res == f(20, 10, 1)
+
 class TestOOtype(LoopTest, OOJitMixin):
     pass
 
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -303,6 +303,7 @@
         class FakeCPU(object):
             supports_floats = False
             supports_longlong = False
+            supports_singlefloats = False
             ts = llhelper
             translate_support_code = False
             stats = "stats"
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -1,13 +1,14 @@
 from pypy.rpython.test.test_llinterp import interpret
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rffi
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.annlowlevel import llhelper
-from pypy.jit.metainterp.warmstate import wrap, unwrap
+from pypy.jit.metainterp.warmstate import wrap, unwrap, specialize_value
 from pypy.jit.metainterp.warmstate import equal_whatever, hash_whatever
 from pypy.jit.metainterp.warmstate import WarmEnterState, JitCell
 from pypy.jit.metainterp.history import BoxInt, BoxFloat, BoxPtr
 from pypy.jit.metainterp.history import ConstInt, ConstFloat, ConstPtr
 from pypy.jit.codewriter import longlong
+from pypy.rlib.rarithmetic import r_singlefloat
 
 def boxfloat(x):
     return BoxFloat(longlong.getfloatstorage(x))
@@ -40,6 +41,28 @@
     assert _is(wrap(None, 42, in_const_box=True), ConstInt(42))
     assert _is(wrap(None, 42.5, in_const_box=True), constfloat(42.5))
     assert _is(wrap(None, p, in_const_box=True), ConstPtr(po))
+    if longlong.supports_longlong:
+        import sys
+        from pypy.rlib.rarithmetic import r_longlong, r_ulonglong
+        value = r_longlong(-sys.maxint*17)
+        assert _is(wrap(None, value), BoxFloat(value))
+        assert _is(wrap(None, value, in_const_box=True), ConstFloat(value))
+        value_unsigned = r_ulonglong(-sys.maxint*17)
+        assert _is(wrap(None, value_unsigned), BoxFloat(value))
+    sfval = r_singlefloat(42.5)
+    ival = longlong.singlefloat2int(sfval)
+    assert _is(wrap(None, sfval), BoxInt(ival))
+    assert _is(wrap(None, sfval, in_const_box=True), ConstInt(ival))
+
+def test_specialize_value():
+    assert specialize_value(lltype.Char, 0x41) == '\x41'
+    if longlong.supports_longlong:
+        import sys
+        value = longlong.r_float_storage(sys.maxint*17)
+        assert specialize_value(lltype.SignedLongLong, value) == sys.maxint*17
+    sfval = r_singlefloat(42.5)
+    ival = longlong.singlefloat2int(sfval)
+    assert specialize_value(rffi.FLOAT, ival) == sfval
 
 def test_hash_equal_whatever_lltype():
     s1 = rstr.mallocstr(2)
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -173,6 +173,7 @@
             policy = JitPolicy()
         policy.set_supports_floats(self.cpu.supports_floats)
         policy.set_supports_longlong(self.cpu.supports_longlong)
+        policy.set_supports_singlefloats(self.cpu.supports_singlefloats)
         graphs = self.codewriter.find_all_graphs(policy)
         policy.dump_unsafe_loops()
         self.check_access_directly_sanity(graphs)
@@ -283,7 +284,9 @@
         auto_inline_graphs(self.translator, graphs, 0.01)
 
     def build_cpu(self, CPUClass, translate_support_code=False,
-                  no_stats=False, **kwds):
+                  no_stats=False, supports_floats=True,
+                  supports_longlong=True, supports_singlefloats=True,
+                  **kwds):
         assert CPUClass is not None
         self.opt = history.Options(**kwds)
         if no_stats:
@@ -295,6 +298,9 @@
             self.annhelper = MixLevelHelperAnnotator(self.translator.rtyper)
         cpu = CPUClass(self.translator.rtyper, self.stats, self.opt,
                        translate_support_code, gcdescr=self.gcdescr)
+        if not supports_floats:       cpu.supports_floats       = False
+        if not supports_longlong:     cpu.supports_longlong     = False
+        if not supports_singlefloats: cpu.supports_singlefloats = False
         self.cpu = cpu
 
     def build_meta_interp(self, ProfilerClass):
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -25,9 +25,13 @@
         if isinstance(TYPE, lltype.Ptr) and TYPE.TO._gckind == 'raw':
             # non-gc pointer
             return rffi.cast(TYPE, x)
+        elif TYPE is lltype.SingleFloat:
+            return longlong.int2singlefloat(x)
         else:
             return lltype.cast_primitive(TYPE, x)
     elif INPUT is longlong.FLOATSTORAGE:
+        if longlong.is_longlong(TYPE):
+            return rffi.cast(TYPE, x)
         assert TYPE is lltype.Float
         return longlong.getrealfloat(x)
     else:
@@ -84,8 +88,12 @@
             return history.ConstObj(value)
         else:
             return history.BoxObj(value)
-    elif isinstance(value, float):
-        value = longlong.getfloatstorage(value)
+    elif (isinstance(value, float) or
+          longlong.is_longlong(lltype.typeOf(value))):
+        if isinstance(value, float):
+            value = longlong.getfloatstorage(value)
+        else:
+            value = rffi.cast(lltype.SignedLongLong, value)
         if in_const_box:
             return history.ConstFloat(value)
         else:
@@ -93,6 +101,8 @@
     elif isinstance(value, str) or isinstance(value, unicode):
         assert len(value) == 1     # must be a character
         value = ord(value)
+    elif lltype.typeOf(value) is lltype.SingleFloat:
+        value = longlong.singlefloat2int(value)
     else:
         value = intmask(value)
     if in_const_box:
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -53,7 +53,7 @@
 class OpParser(object):
 
     use_mock_model = False
-    
+
     def __init__(self, input, cpu, namespace, type_system, boxkinds,
                  invent_fail_descr=default_fail_descr,
                  nonstrict=False):
@@ -187,7 +187,7 @@
             poss_descr = allargs[-1].strip()
             if poss_descr.startswith('descr='):
                 descr = self.get_descr(poss_descr[len('descr='):])
-                allargs = allargs[:-1]        
+                allargs = allargs[:-1]
             for arg in allargs:
                 arg = arg.strip()
                 try:
@@ -240,7 +240,7 @@
             fail_args = None
             if opnum == rop.FINISH:
                 if descr is None and self.invent_fail_descr:
-                    descr = self.invent_fail_descr(self.model)
+                    descr = self.invent_fail_descr(self.model, fail_args)
             elif opnum == rop.JUMP:
                 if descr is None and self.invent_fail_descr:
                     descr = self.looptoken
diff --git a/pypy/module/_ffi/interp_ffi.py b/pypy/module/_ffi/interp_ffi.py
--- a/pypy/module/_ffi/interp_ffi.py
+++ b/pypy/module/_ffi/interp_ffi.py
@@ -222,9 +222,9 @@
                 w_arg = space.ord(w_arg)
                 argchain.arg(space.int_w(w_arg))
             elif w_argtype.is_double():
-                argchain.arg(space.float_w(w_arg))
+                self.arg_float(space, argchain, w_arg)
             elif w_argtype.is_singlefloat():
-                argchain.arg_singlefloat(space.float_w(w_arg))
+                self.arg_singlefloat(space, argchain, w_arg)
             elif w_argtype.is_struct():
                 # arg_raw directly takes value to put inside ll_args
                 w_arg = space.interp_w(W_StructureInstance, w_arg)
@@ -267,15 +267,26 @@
         else:
             return w_arg
 
-    @jit.dont_look_inside
+    def arg_float(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether floats are supported
+        argchain.arg(space.float_w(w_arg))
+
     def arg_longlong(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether longlongs are supported
         bigarg = space.bigint_w(w_arg)
         ullval = bigarg.ulonglongmask()
         llval = rffi.cast(rffi.LONGLONG, ullval)
-        # this is a hack: we store the 64 bits of the long long into the
-        # 64 bits of a float (i.e., a C double)
-        floatval = libffi.longlong2float(llval)
-        argchain.arg_longlong(floatval)
+        argchain.arg(llval)
+
+    def arg_singlefloat(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether singlefloats are supported
+        from pypy.rlib.rarithmetic import r_singlefloat
+        fval = space.float_w(w_arg)
+        sfval = r_singlefloat(fval)
+        argchain.arg(sfval)
 
     def call(self, space, args_w):
         self = jit.promote(self)
@@ -296,8 +307,7 @@
             # note that we must check for longlong first, because either
             # is_signed or is_unsigned returns true anyway
             assert libffi.IS_32_BIT
-            reskind = libffi.types.getkind(self.func.restype) # XXX: remove the kind
-            return self._call_longlong(space, argchain, reskind)
+            return self._call_longlong(space, argchain)
         elif w_restype.is_signed():
             return self._call_int(space, argchain)
         elif w_restype.is_unsigned() or w_restype.is_pointer():
@@ -309,12 +319,9 @@
             intres = self.func.call(argchain, rffi.WCHAR_T)
             return space.wrap(unichr(intres))
         elif w_restype.is_double():
-            floatres = self.func.call(argchain, rffi.DOUBLE)
-            return space.wrap(floatres)
+            return self._call_float(space, argchain)
         elif w_restype.is_singlefloat():
-            # the result is a float, but widened to be inside a double
-            floatres = self.func.call(argchain, rffi.FLOAT)
-            return space.wrap(floatres)
+            return self._call_singlefloat(space, argchain)
         elif w_restype.is_struct():
             w_datashape = w_restype.w_datashape
             assert isinstance(w_datashape, W_Structure)
@@ -383,19 +390,32 @@
                                  space.wrap('Unsupported restype'))
         return space.wrap(intres)
 
-    @jit.dont_look_inside
-    def _call_longlong(self, space, argchain, reskind):
-        # this is a hack: we store the 64 bits of the long long into the 64
-        # bits of a float (i.e., a C double)
-        floatres = self.func.call(argchain, rffi.LONGLONG)
-        llres = libffi.float2longlong(floatres)
-        if reskind == 'I':
+    def _call_float(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether floats are supported
+        floatres = self.func.call(argchain, rffi.DOUBLE)
+        return space.wrap(floatres)
+
+    def _call_longlong(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether longlongs are supported
+        restype = self.func.restype
+        call = self.func.call
+        if restype is libffi.types.slonglong:
+            llres = call(argchain, rffi.LONGLONG)
             return space.wrap(llres)
-        elif reskind == 'U':
-            ullres = rffi.cast(rffi.ULONGLONG, llres)
+        elif restype is libffi.types.ulonglong:
+            ullres = call(argchain, rffi.ULONGLONG)
             return space.wrap(ullres)
         else:
-            assert False
+            raise OperationError(space.w_ValueError,
+                                 space.wrap('Unsupported longlong restype'))
+
+    def _call_singlefloat(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether singlefloats are supported
+        sfres = self.func.call(argchain, rffi.FLOAT)
+        return space.wrap(float(sfres))
 
     def getaddr(self, space):
         """
diff --git a/pypy/module/_multibytecodec/__init__.py b/pypy/module/_multibytecodec/__init__.py
--- a/pypy/module/_multibytecodec/__init__.py
+++ b/pypy/module/_multibytecodec/__init__.py
@@ -7,13 +7,14 @@
         # for compatibility this name is obscured, and should be called
         # via the _codecs_*.py modules written in lib_pypy.
         '__getcodec': 'interp_multibytecodec.getcodec',
+
+        'MultibyteIncrementalDecoder':
+            'interp_incremental.MultibyteIncrementalDecoder',
+        'MultibyteIncrementalEncoder':
+            'interp_incremental.MultibyteIncrementalEncoder',
     }
 
     appleveldefs = {
-        'MultibyteIncrementalEncoder':
-            'app_multibytecodec.MultibyteIncrementalEncoder',
-        'MultibyteIncrementalDecoder':
-            'app_multibytecodec.MultibyteIncrementalDecoder',
         'MultibyteStreamReader':
             'app_multibytecodec.MultibyteStreamReader',
         'MultibyteStreamWriter':
diff --git a/pypy/module/_multibytecodec/app_multibytecodec.py b/pypy/module/_multibytecodec/app_multibytecodec.py
--- a/pypy/module/_multibytecodec/app_multibytecodec.py
+++ b/pypy/module/_multibytecodec/app_multibytecodec.py
@@ -1,34 +1,47 @@
 # NOT_RPYTHON
 #
-# These classes are not supported so far.
-#
-# My theory is that they are not widely used on CPython either, because
-# I found two bugs just by looking at their .c source: they always call
-# encreset() after a piece of data, even though I think it's wrong ---
-# it should be called only once at the end; and mbiencoder_reset() calls
-# decreset() instead of encreset().
-#
+# The interface here may be a little bit on the lightweight side.
 
-class MultibyteIncrementalEncoder(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteIncrementalEncoder not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+from _multibytecodec import MultibyteIncrementalDecoder
+from _multibytecodec import MultibyteIncrementalEncoder
 
-class MultibyteIncrementalDecoder(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteIncrementalDecoder not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
 
-class MultibyteStreamReader(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteStreamReader not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+class MultibyteStreamReader(MultibyteIncrementalDecoder):
+    def __new__(cls, stream, errors=None):
+        self = MultibyteIncrementalDecoder.__new__(cls, errors)
+        self.stream = stream
+        return self
 
-class MultibyteStreamWriter(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteStreamWriter not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+    def __read(self, read, size):
+        if size is None or size < 0:
+            return MultibyteIncrementalDecoder.decode(self, read(), True)
+        while True:
+            data = read(size)
+            final = not data
+            output = MultibyteIncrementalDecoder.decode(self, data, final)
+            if output or final:
+                return output
+            size = 1   # read 1 more byte and retry
+
+    def read(self, size=None):
+        return self.__read(self.stream.read, size)
+
+    def readline(self, size=None):
+        return self.__read(self.stream.readline, size)
+
+    def readlines(self, sizehint=None):
+        return self.__read(self.stream.read, sizehint).splitlines(True)
+
+
+class MultibyteStreamWriter(MultibyteIncrementalEncoder):
+    def __new__(cls, stream, errors=None):
+        self = MultibyteIncrementalEncoder.__new__(cls, errors)
+        self.stream = stream
+        return self
+
+    def write(self, data):
+        self.stream.write(MultibyteIncrementalEncoder.encode(self, data))
+
+    def writelines(self, lines):
+        for data in lines:
+            self.write(data)
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -52,11 +52,13 @@
     includes = ['src/cjkcodecs/multibytecodec.h'],
     include_dirs = [str(srcdir)],
     export_symbols = [
+        "pypy_cjk_dec_new",
         "pypy_cjk_dec_init", "pypy_cjk_dec_free", "pypy_cjk_dec_chunk",
         "pypy_cjk_dec_outbuf", "pypy_cjk_dec_outlen",
         "pypy_cjk_dec_inbuf_remaining", "pypy_cjk_dec_inbuf_consumed",
         "pypy_cjk_dec_replace_on_error",
 
+        "pypy_cjk_enc_new",
         "pypy_cjk_enc_init", "pypy_cjk_enc_free", "pypy_cjk_enc_chunk",
         "pypy_cjk_enc_reset", "pypy_cjk_enc_outbuf", "pypy_cjk_enc_outlen",
         "pypy_cjk_enc_inbuf_remaining", "pypy_cjk_enc_inbuf_consumed",
@@ -92,9 +94,11 @@
 # Decoding
 
 DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci)
+pypy_cjk_dec_new = llexternal('pypy_cjk_dec_new',
+                              [MULTIBYTECODEC_P], DECODEBUF_P)
 pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init',
-                               [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T],
-                               DECODEBUF_P)
+                               [DECODEBUF_P, rffi.CCHARP, rffi.SSIZE_T],
+                               rffi.SSIZE_T)
 pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P],
                                lltype.Void)
 pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P],
@@ -113,25 +117,30 @@
                                            rffi.SSIZE_T)
 
 def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
+    decodebuf = pypy_cjk_dec_new(codec)
+    if not decodebuf:
+        raise MemoryError
+    try:
+        return decodeex(decodebuf, stringdata, errors, errorcb, namecb)
+    finally:
+        pypy_cjk_dec_free(decodebuf)
+
+def decodeex(decodebuf, stringdata, errors="strict", errorcb=None, namecb=None,
+             ignore_error=0):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
-        decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft)
-        if not decodebuf:
+        if pypy_cjk_dec_init(decodebuf, inbuf, inleft) < 0:
             raise MemoryError
-        try:
-            while True:
-                r = pypy_cjk_dec_chunk(decodebuf)
-                if r == 0:
-                    break
-                multibytecodec_decerror(decodebuf, r, errors,
-                                        errorcb, namecb, stringdata)
-            src = pypy_cjk_dec_outbuf(decodebuf)
-            length = pypy_cjk_dec_outlen(decodebuf)
-            return rffi.wcharpsize2unicode(src, length)
-        #
-        finally:
-            pypy_cjk_dec_free(decodebuf)
+        while True:
+            r = pypy_cjk_dec_chunk(decodebuf)
+            if r == 0 or r == ignore_error:
+                break
+            multibytecodec_decerror(decodebuf, r, errors,
+                                    errorcb, namecb, stringdata)
+        src = pypy_cjk_dec_outbuf(decodebuf)
+        length = pypy_cjk_dec_outlen(decodebuf)
+        return rffi.wcharpsize2unicode(src, length)
     #
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
@@ -174,13 +183,15 @@
 # ____________________________________________________________
 # Encoding
 ENCODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_enc_s', compilation_info=eci)
+pypy_cjk_enc_new = llexternal('pypy_cjk_enc_new',
+                               [MULTIBYTECODEC_P], ENCODEBUF_P)
 pypy_cjk_enc_init = llexternal('pypy_cjk_enc_init',
-                               [MULTIBYTECODEC_P, rffi.CWCHARP, rffi.SSIZE_T],
-                               ENCODEBUF_P)
+                               [ENCODEBUF_P, rffi.CWCHARP, rffi.SSIZE_T],
+                               rffi.SSIZE_T)
 pypy_cjk_enc_free = llexternal('pypy_cjk_enc_free', [ENCODEBUF_P],
                                lltype.Void)
-pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk', [ENCODEBUF_P],
-                                rffi.SSIZE_T)
+pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk',
+                                [ENCODEBUF_P, rffi.SSIZE_T], rffi.SSIZE_T)
 pypy_cjk_enc_reset = llexternal('pypy_cjk_enc_reset', [ENCODEBUF_P],
                                 rffi.SSIZE_T)
 pypy_cjk_enc_outbuf = llexternal('pypy_cjk_enc_outbuf', [ENCODEBUF_P],
@@ -195,39 +206,52 @@
                                            [ENCODEBUF_P, rffi.CCHARP,
                                             rffi.SSIZE_T, rffi.SSIZE_T],
                                            rffi.SSIZE_T)
+pypy_cjk_enc_getcodec = llexternal('pypy_cjk_enc_getcodec',
+                                   [ENCODEBUF_P], MULTIBYTECODEC_P)
+MBENC_FLUSH = 1
+MBENC_RESET = 2
 
 def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
+    encodebuf = pypy_cjk_enc_new(codec)
+    if not encodebuf:
+        raise MemoryError
+    try:
+        return encodeex(encodebuf, unicodedata, errors, errorcb, namecb)
+    finally:
+        pypy_cjk_enc_free(encodebuf)
+
+def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None,
+             namecb=None, ignore_error=0):
     inleft = len(unicodedata)
     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
     try:
-        encodebuf = pypy_cjk_enc_init(codec, inbuf, inleft)
-        if not encodebuf:
+        if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
             raise MemoryError
-        try:
-            while True:
-                r = pypy_cjk_enc_chunk(encodebuf)
-                if r == 0:
-                    break
-                multibytecodec_encerror(encodebuf, r, errors,
-                                        codec, errorcb, namecb, unicodedata)
-            while True:
-                r = pypy_cjk_enc_reset(encodebuf)
-                if r == 0:
-                    break
-                multibytecodec_encerror(encodebuf, r, errors,
-                                        codec, errorcb, namecb, unicodedata)
-            src = pypy_cjk_enc_outbuf(encodebuf)
-            length = pypy_cjk_enc_outlen(encodebuf)
-            return rffi.charpsize2str(src, length)
-        #
-        finally:
-            pypy_cjk_enc_free(encodebuf)
+        if ignore_error == 0:
+            flags = MBENC_FLUSH | MBENC_RESET
+        else:
+            flags = MBENC_RESET
+        while True:
+            r = pypy_cjk_enc_chunk(encodebuf, flags)
+            if r == 0 or r == ignore_error:
+                break
+            multibytecodec_encerror(encodebuf, r, errors,
+                                    errorcb, namecb, unicodedata)
+        while True:
+            r = pypy_cjk_enc_reset(encodebuf)
+            if r == 0:
+                break
+            multibytecodec_encerror(encodebuf, r, errors,
+                                    errorcb, namecb, unicodedata)
+        src = pypy_cjk_enc_outbuf(encodebuf)
+        length = pypy_cjk_enc_outlen(encodebuf)
+        return rffi.charpsize2str(src, length)
     #
     finally:
         rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
 
 def multibytecodec_encerror(encodebuf, e, errors,
-                            codec, errorcb, namecb, unicodedata):
+                            errorcb, namecb, unicodedata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -248,6 +272,7 @@
     elif errors == "ignore":
         replace = ""
     elif errors == "replace":
+        codec = pypy_cjk_enc_getcodec(encodebuf)
         try:
             replace = encode(codec, u"?")
         except EncodeDecodeError:
diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -0,0 +1,141 @@
+from pypy.rpython.lltypesystem import lltype
+from pypy.module._multibytecodec import c_codecs
+from pypy.module._multibytecodec.interp_multibytecodec import (
+    MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror,
+    wrap_unicodeencodeerror)
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.module._codecs.interp_codecs import CodecState
+
+
+class MultibyteIncrementalBase(Wrappable):
+
+    def __init__(self, space, errors):
+        if errors is None:
+            errors = 'strict'
+        self.space = space
+        self.errors = errors
+        w_codec = space.getattr(space.wrap(self), space.wrap("codec"))
+        codec = space.interp_w(MultibyteCodec, w_codec)
+        self.codec = codec.codec
+        self.name = codec.name
+        self._initialize()
+
+    def __del__(self):
+        self._free()
+
+    def reset_w(self):
+        self._free()
+        self._initialize()
+
+    def fget_errors(self, space):
+        return space.wrap(self.errors)
+
+    def fset_errors(self, space, w_errors):
+        self.errors = space.str_w(w_errors)
+
+
+class MultibyteIncrementalDecoder(MultibyteIncrementalBase):
+
+    def _initialize(self):
+        self.decodebuf = c_codecs.pypy_cjk_dec_new(self.codec)
+        self.pending = ""
+
+    def _free(self):
+        self.pending = None
+        if self.decodebuf:
+            c_codecs.pypy_cjk_dec_free(self.decodebuf)
+            self.decodebuf = lltype.nullptr(c_codecs.DECODEBUF_P.TO)
+
+    @unwrap_spec(object=str, final=bool)
+    def decode_w(self, object, final=False):
+        space = self.space
+        state = space.fromcache(CodecState)
+        if len(self.pending) > 0:
+            object = self.pending + object
+        try:
+            output = c_codecs.decodeex(self.decodebuf, object, self.errors,
+                                       state.decode_error_handler, self.name,
+                                       get_ignore_error(final))
+        except c_codecs.EncodeDecodeError, e:
+            raise wrap_unicodedecodeerror(space, e, object, self.name)
+        except RuntimeError:
+            raise wrap_runtimeerror(space)
+        pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf)
+        assert 0 <= pos <= len(object)
+        self.pending = object[pos:]
+        return space.wrap(output)
+
+
+ at unwrap_spec(errors="str_or_None")
+def mbidecoder_new(space, w_subtype, errors=None):
+    r = space.allocate_instance(MultibyteIncrementalDecoder, w_subtype)
+    r.__init__(space, errors)
+    return space.wrap(r)
+
+MultibyteIncrementalDecoder.typedef = TypeDef(
+    'MultibyteIncrementalDecoder',
+    __module__ = '_multibytecodec',
+    __new__ = interp2app(mbidecoder_new),
+    decode  = interp2app(MultibyteIncrementalDecoder.decode_w),
+    reset   = interp2app(MultibyteIncrementalDecoder.reset_w),
+    errors  = GetSetProperty(MultibyteIncrementalDecoder.fget_errors,
+                             MultibyteIncrementalDecoder.fset_errors),
+    )
+
+
+class MultibyteIncrementalEncoder(MultibyteIncrementalBase):
+
+    def _initialize(self):
+        self.encodebuf = c_codecs.pypy_cjk_enc_new(self.codec)
+        self.pending = u""
+
+    def _free(self):
+        self.pending = None
+        if self.encodebuf:
+            c_codecs.pypy_cjk_enc_free(self.encodebuf)
+            self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO)
+
+    @unwrap_spec(object=unicode, final=bool)
+    def encode_w(self, object, final=False):
+        space = self.space
+        state = space.fromcache(CodecState)
+        if len(self.pending) > 0:
+            object = self.pending + object
+        try:
+            output = c_codecs.encodeex(self.encodebuf, object, self.errors,
+                                       state.encode_error_handler, self.name,
+                                       get_ignore_error(final))
+        except c_codecs.EncodeDecodeError, e:
+            raise wrap_unicodeencodeerror(space, e, object, self.name)
+        except RuntimeError:
+            raise wrap_runtimeerror(space)
+        pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf)
+        assert 0 <= pos <= len(object)
+        self.pending = object[pos:]
+        return space.wrap(output)
+
+
+ at unwrap_spec(errors="str_or_None")
+def mbiencoder_new(space, w_subtype, errors=None):
+    r = space.allocate_instance(MultibyteIncrementalEncoder, w_subtype)
+    r.__init__(space, errors)
+    return space.wrap(r)
+
+MultibyteIncrementalEncoder.typedef = TypeDef(
+    'MultibyteIncrementalEncoder',
+    __module__ = '_multibytecodec',
+    __new__ = interp2app(mbiencoder_new),
+    encode  = interp2app(MultibyteIncrementalEncoder.encode_w),
+    reset   = interp2app(MultibyteIncrementalEncoder.reset_w),
+    errors  = GetSetProperty(MultibyteIncrementalEncoder.fget_errors,
+                             MultibyteIncrementalEncoder.fset_errors),
+    )
+
+
+def get_ignore_error(final):
+    if final:
+        return 0    # don't ignore any error
+    else:
+        return c_codecs.MBERR_TOOFEW
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -22,17 +22,9 @@
             output = c_codecs.decode(self.codec, input, errors,
                                      state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
-            raise OperationError(
-                space.w_UnicodeDecodeError,
-                space.newtuple([
-                    space.wrap(self.name),
-                    space.wrap(input),
-                    space.wrap(e.start),
-                    space.wrap(e.end),
-                    space.wrap(e.reason)]))
+            raise wrap_unicodedecodeerror(space, e, input, self.name)
         except RuntimeError:
-            raise OperationError(space.w_RuntimeError,
-                                 space.wrap("internal codec error"))
+            raise wrap_runtimeerror(space)
         return space.newtuple([space.wrap(output),
                                space.wrap(len(input))])
 
@@ -46,17 +38,9 @@
             output = c_codecs.encode(self.codec, input, errors,
                                      state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
-            raise OperationError(
-                space.w_UnicodeEncodeError,
-                space.newtuple([
-                    space.wrap(self.name),
-                    space.wrap(input),
-                    space.wrap(e.start),
-                    space.wrap(e.end),
-                    space.wrap(e.reason)]))
+            raise wrap_unicodeencodeerror(space, e, input, self.name)
         except RuntimeError:
-            raise OperationError(space.w_RuntimeError,
-                                 space.wrap("internal codec error"))
+            raise wrap_runtimeerror(space)
         return space.newtuple([space.wrap(output),
                                space.wrap(len(input))])
 
@@ -78,3 +62,28 @@
         raise OperationError(space.w_LookupError,
                              space.wrap("no such codec is supported."))
     return space.wrap(MultibyteCodec(name, codec))
+
+
+def wrap_unicodedecodeerror(space, e, input, name):
+    return OperationError(
+        space.w_UnicodeDecodeError,
+        space.newtuple([
+            space.wrap(name),
+            space.wrap(input),
+            space.wrap(e.start),
+            space.wrap(e.end),
+            space.wrap(e.reason)]))
+
+def wrap_unicodeencodeerror(space, e, input, name):
+    raise OperationError(
+        space.w_UnicodeEncodeError,
+        space.newtuple([
+            space.wrap(name),
+            space.wrap(input),
+            space.wrap(e.start),
+            space.wrap(e.end),
+            space.wrap(e.reason)]))
+
+def wrap_runtimeerror(space):
+    raise OperationError(space.w_RuntimeError,
+                         space.wrap("internal codec error"))
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -0,0 +1,163 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestClasses:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_multibytecodec'])
+        cls.w_IncrementalHzDecoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalDecoder
+
+            class IncrementalHzDecoder(MultibyteIncrementalDecoder):
+                codec = _codecs_cn.getcodec('hz')
+
+            return IncrementalHzDecoder
+        """)
+        cls.w_IncrementalHzEncoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalEncoder
+
+            class IncrementalHzEncoder(MultibyteIncrementalEncoder):
+                codec = _codecs_cn.getcodec('hz')
+
+            return IncrementalHzEncoder
+        """)
+        cls.w_IncrementalBig5hkscsEncoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalEncoder
+
+            class IncrementalBig5hkscsEncoder(MultibyteIncrementalEncoder):
+                codec = _codecs_cn.getcodec('big5hkscs')
+
+            return IncrementalBig5hkscsEncoder
+        """)
+
+    def test_decode_hz(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("~{abcd~}")
+        assert r == u'\u5f95\u6c85'
+        r = d.decode("~{efgh~}")
+        assert r == u'\u5f50\u73b7'
+        for c, output in zip("!~{abcd~}xyz~{efgh",
+              [u'!',  # !
+               u'',   # ~
+               u'',   # {
+               u'',   # a
+               u'\u5f95',   # b
+               u'',   # c
+               u'\u6c85',   # d
+               u'',   # ~
+               u'',   # }
+               u'x',  # x
+               u'y',  # y
+               u'z',  # z
+               u'',   # ~
+               u'',   # {
+               u'',   # e
+               u'\u5f50',   # f
+               u'',   # g
+               u'\u73b7',   # h
+               ]):
+            r = d.decode(c)
+            assert r == output
+
+    def test_decode_hz_final(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("~{", True)
+        assert r == u''
+        raises(UnicodeDecodeError, d.decode, "~", True)
+        raises(UnicodeDecodeError, d.decode, "~{a", True)
+
+    def test_decode_hz_reset(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("ab")
+        assert r == u'ab'
+        r = d.decode("~{")
+        assert r == u''
+        r = d.decode("ab")
+        assert r == u'\u5f95'
+        r = d.decode("ab")
+        assert r == u'\u5f95'
+        d.reset()
+        r = d.decode("ab")
+        assert r == u'ab'
+
+    def test_decode_hz_error(self):
+        d = self.IncrementalHzDecoder()
+        raises(UnicodeDecodeError, d.decode, "~{abc", True)
+        d = self.IncrementalHzDecoder("ignore")
+        r = d.decode("~{abc", True)
+        assert r == u'\u5f95'
+        d = self.IncrementalHzDecoder()
+        d.errors = "replace"
+        r = d.decode("~{abc", True)
+        assert r == u'\u5f95\ufffd'
+
+    def test_decode_hz_buffer_grow(self):
+        d = self.IncrementalHzDecoder()
+        for i in range(13):
+            r = d.decode("a" * (2**i))
+            assert r == u"a" * (2**i)
+
+    def test_encode_hz(self):
+        e = self.IncrementalHzEncoder()
+        r = e.encode("abcd")
+        assert r == 'abcd'
+        r = e.encode(u"\u5f95\u6c85")
+        assert r == '~{abcd~}'
+        r = e.encode(u"\u5f50")
+        assert r == '~{ef~}'
+        r = e.encode(u"\u73b7")
+        assert r == '~{gh~}'
+
+    def test_encode_hz_final(self):
+        e = self.IncrementalHzEncoder()
+        r = e.encode(u"xyz\u5f95\u6c85", True)
+        assert r == 'xyz~{abcd~}'
+        # This is a bit hard to test, because the only way I can see that
+        # encoders can return MBERR_TOOFEW is with surrogates, which only
+        # occur with 2-byte unicode characters...  We will just have to
+        # trust that the logic works, because it is exactly the same one
+        # as in the decode case :-/
+
+    def test_encode_hz_reset(self):
+        # Same issue as with test_encode_hz_final
+        e = self.IncrementalHzEncoder()
+        r = e.encode(u"xyz\u5f95\u6c85", True)
+        assert r == 'xyz~{abcd~}'
+        e.reset()
+        r = e.encode(u"xyz\u5f95\u6c85")
+        assert r == 'xyz~{abcd~}'
+
+    def test_encode_hz_error(self):
+        e = self.IncrementalHzEncoder()
+        raises(UnicodeEncodeError, e.encode, u"\u4321", True)
+        e = self.IncrementalHzEncoder("ignore")
+        r = e.encode(u"xy\u4321z", True)
+        assert r == 'xyz'
+        e = self.IncrementalHzEncoder()
+        e.errors = "replace"
+        r = e.encode(u"xy\u4321z", True)
+        assert r == 'xy?z'
+
+    def test_encode_hz_buffer_grow(self):
+        e = self.IncrementalHzEncoder()
+        for i in range(13):
+            r = e.encode(u"a" * (2**i))
+            assert r == "a" * (2**i)
+
+    def test_encode_big5hkscs(self):
+        #e = self.IncrementalBig5hkscsEncoder()
+        #r = e.encode(u'\xca', True)
+        #assert r == '\x88f'
+        #r = e.encode(u'\xca', True)
+        #assert r == '\x88f'
+        #raises(UnicodeEncodeError, e.encode, u'\u0304', True)
+        #
+        e = self.IncrementalBig5hkscsEncoder()
+        r = e.encode(u'\xca')
+        assert r == ''
+        r = e.encode(u'\xca')
+        assert r == '\x88f'
+        r = e.encode(u'\u0304')
+        assert r == '\x88b'
diff --git a/pypy/module/_multibytecodec/test/test_app_stream.py b/pypy/module/_multibytecodec/test/test_app_stream.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_app_stream.py
@@ -0,0 +1,93 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestStreams:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_multibytecodec'])
+        cls.w_HzStreamReader = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteStreamReader
+
+            class HzStreamReader(MultibyteStreamReader):
+                codec = _codecs_cn.getcodec('hz')
+
+            return HzStreamReader
+        """)
+        cls.w_HzStreamWriter = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteStreamWriter
+
+            class HzStreamWriter(MultibyteStreamWriter):
+                codec = _codecs_cn.getcodec('hz')
+
+            return HzStreamWriter
+        """)
+        cls.w_ShiftJisx0213StreamWriter = cls.space.appexec([], """():
+            import _codecs_jp
+            from _multibytecodec import MultibyteStreamWriter
+
+            class ShiftJisx0213StreamWriter(MultibyteStreamWriter):
+                codec = _codecs_jp.getcodec('shift_jisx0213')
+
+            return ShiftJisx0213StreamWriter
+        """)
+
+    def test_reader(self):
+        class FakeFile:
+            def __init__(self, data):
+                self.data = data
+                self.pos = 0
+            def read(self, size):
+                res = self.data[self.pos : self.pos + size]
+                self.pos += size
+                return res
+        #
+        r = self.HzStreamReader(FakeFile("!~{abcd~}xyz~{efgh"))
+        for expected in u'!\u5f95\u6c85xyz\u5f50\u73b7':
+            c = r.read(1)
+            assert c == expected
+        c = r.read(1)
+        assert c == ''
+
+    def test_reader_replace(self):
+        class FakeFile:
+            def __init__(self, data):
+                self.data = data
+            def read(self):
+                return self.data
+        #
+        r = self.HzStreamReader(FakeFile("!~{a"), "replace")
+        c = r.read()
+        assert c == u'!\ufffd'
+        #
+        r = self.HzStreamReader(FakeFile("!~{a"))
+        r.errors = "replace"
+        assert r.errors == "replace"
+        c = r.read()
+        assert c == u'!\ufffd'
+
+    def test_writer(self):
+        class FakeFile:
+            def __init__(self):
+                self.output = []
+            def write(self, data):
+                self.output.append(data)
+        #
+        w = self.HzStreamWriter(FakeFile())
+        for input in u'!\u5f95\u6c85xyz\u5f50\u73b7':
+            w.write(input)
+        assert w.stream.output == ['!', '~{ab~}', '~{cd~}', 'x', 'y', 'z',
+                                   '~{ef~}', '~{gh~}']
+
+    def test_no_flush(self):
+        class FakeFile:
+            def __init__(self):
+                self.output = []
+            def write(self, data):
+                self.output.append(data)
+        #
+        w = self.ShiftJisx0213StreamWriter(FakeFile())
+        w.write(u'\u30ce')
+        w.write(u'\u304b')
+        w.write(u'\u309a')
+        assert w.stream.output == ['\x83m', '', '\x82\xf5']
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -2,6 +2,7 @@
 from pypy.module._multibytecodec.c_codecs import getcodec, codecs
 from pypy.module._multibytecodec.c_codecs import decode, encode
 from pypy.module._multibytecodec.c_codecs import EncodeDecodeError
+from pypy.module._multibytecodec import c_codecs
 
 
 def test_codecs_existence():
@@ -22,6 +23,52 @@
     c = getcodec("hz")
     u = decode(c, "~{abc}")
     assert u == u'\u5f95\u6cef'
+    u = decode(c, "~{")
+    assert u == u''
+
+def test_decodeex_hz():
+    c = getcodec("hz")
+    decodebuf = c_codecs.pypy_cjk_dec_new(c)
+    u = c_codecs.decodeex(decodebuf, "~{abcd~}")
+    assert u == u'\u5f95\u6c85'
+    u = c_codecs.decodeex(decodebuf, "~{efgh~}")
+    assert u == u'\u5f50\u73b7'
+    u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh")
+    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'
+    c_codecs.pypy_cjk_dec_free(decodebuf)
+
+def test_decodeex_hz_incomplete():
+    c = getcodec("hz")
+    decodebuf = c_codecs.pypy_cjk_dec_new(c)
+    buf = ''
+    for c, output in zip("!~{abcd~}xyz~{efgh",
+          [u'!',  # !
+           u'',   # ~
+           u'',   # {
+           u'',   # a
+           u'\u5f95',   # b
+           u'',   # c
+           u'\u6c85',   # d
+           u'',   # ~
+           u'',   # }
+           u'x',  # x
+           u'y',  # y
+           u'z',  # z
+           u'',   # ~
+           u'',   # {
+           u'',   # e
+           u'\u5f50',   # f
+           u'',   # g
+           u'\u73b7',   # h
+           ]):
+        buf += c
+        u = c_codecs.decodeex(decodebuf, buf,
+                              ignore_error = c_codecs.MBERR_TOOFEW)
+        assert u == output
+        incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf)
+        buf = buf[incompletepos:]
+    assert buf == ''
+    c_codecs.pypy_cjk_dec_free(decodebuf)
 
 def test_decode_hz_error():
     # error
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -268,7 +268,7 @@
             self.ll_buffer = rffi.cast(rffi.VOIDP, address)
         else:
             self.ll_buffer = lltype.malloc(rffi.VOIDP.TO, size, flavor='raw',
-                                           zero=True)
+                                           zero=True, add_memory_pressure=True)
             if tracker.DO_TRACING:
                 ll_buf = rffi.cast(lltype.Signed, self.ll_buffer)
                 tracker.trace_allocation(ll_buf, self)
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -226,7 +226,8 @@
                     some += size >> 3
                     self.allocated = size + some
                     new_buffer = lltype.malloc(mytype.arraytype,
-                                               self.allocated, flavor='raw')
+                                               self.allocated, flavor='raw',
+                                               add_memory_pressure=True)
                     for i in range(min(size, self.len)):
                         new_buffer[i] = self.buffer[i]
                 else:
diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h
--- a/pypy/module/cpyext/include/patchlevel.h
+++ b/pypy/module/cpyext/include/patchlevel.h
@@ -31,8 +31,9 @@
 /* PyPy version as a string */
 #define PYPY_VERSION "1.6.0"
 
-/* Subversion Revision number of this file (not of the repository) */
-#define PY_PATCHLEVEL_REVISION  "$Revision: 77872 $"
+/* Subversion Revision number of this file (not of the repository).
+ * Empty since Mercurial migration. */
+#define PY_PATCHLEVEL_REVISION  ""
 
 /* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2.
    Use this for numeric comparisons, e.g. #if PY_VERSION_HEX >= ... */
diff --git a/pypy/module/itertools/interp_itertools.py b/pypy/module/itertools/interp_itertools.py
--- a/pypy/module/itertools/interp_itertools.py
+++ b/pypy/module/itertools/interp_itertools.py
@@ -339,16 +339,21 @@
                 start = 0
             else:
                 start = space.int_w(w_startstop)
+                if start < 0:
+                    raise OperationError(space.w_ValueError, space.wrap(
+                       "Indicies for islice() must be non-negative integers."))
             w_stop = args_w[0]
         else:
             raise OperationError(space.w_TypeError, space.wrap("islice() takes at most 4 arguments (" + str(num_args) + " given)"))
 
         if space.is_w(w_stop, space.w_None):
             stop = -1
-            stoppable = False
         else:
             stop = space.int_w(w_stop)
-            stoppable = True
+            if stop < 0:
+                raise OperationError(space.w_ValueError, space.wrap(
+                    "Stop argument must be a non-negative integer or None."))
+            stop = max(start, stop)    # for obscure CPython compatibility
 
         if num_args == 2:
             w_step = args_w[1]
@@ -356,38 +361,37 @@
                 step = 1
             else:
                 step = space.int_w(w_step)
+                if step < 1:
+                    raise OperationError(space.w_ValueError, space.wrap(
+                        "Step must be one or lager for islice()."))
         else:
             step = 1
 
-        if start < 0:
-            raise OperationError(space.w_ValueError, space.wrap("Indicies for islice() must be non-negative integers."))
-        if stoppable and stop < 0:
-            raise OperationError(space.w_ValueError, space.wrap("Stop argument must be a non-negative integer or None."))
-        if step < 1:
-            raise OperationError(space.w_ValueError, space.wrap("Step must be one or lager for islice()."))
-
+        self.ignore = step - 1
         self.start = start
         self.stop = stop
-        self.step = step
 
     def iter_w(self):
         return self.space.wrap(self)
 
     def next_w(self):
         if self.start >= 0:               # first call only
-            consume = self.start + 1
+            ignore = self.start
             self.start = -1
         else:                             # all following calls
-            consume = self.step
-        if consume > 1:
-            self._ignore_items(consume-1)
-        if self.stop >= 0:
-            if self.stop < consume:
+            ignore = self.ignore
+        stop = self.stop
+        if stop >= 0:
+            if stop <= ignore:
                 self.stop = 0   # reset the state so that a following next_w()
-                self.step = 1   # has no effect any more
+                                # has no effect any more
+                if stop > 0:
+                    self._ignore_items(stop)
                 raise OperationError(self.space.w_StopIteration,
                                      self.space.w_None)
-            self.stop -= consume
+            self.stop = stop - (ignore + 1)
+        if ignore > 0:
+            self._ignore_items(ignore)
         return self.space.next(self.iterable)
 
     def _ignore_items(self, num):
diff --git a/pypy/module/itertools/test/test_itertools.py b/pypy/module/itertools/test/test_itertools.py
--- a/pypy/module/itertools/test/test_itertools.py
+++ b/pypy/module/itertools/test/test_itertools.py
@@ -266,6 +266,13 @@
         raises(StopIteration, islc.next)  # drops the 6th and raise
         assert it.next() == "j"
 
+        it = iter("abcdefghij")
+        islc = itertools.islice(it, 3, 4, 3)
+        assert islc.next() == "d"    # drops 0, 1, 2, returns item #3
+        assert it.next() == "e"
+        raises(StopIteration, islc.next)  # item #4 is 'stop', so just raise
+        assert it.next() == "f"
+
     def test_islice_overflow(self):
         import itertools
         import sys
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -31,6 +31,9 @@
         'sin': 'interp_ufuncs.sin',
         'cos': 'interp_ufuncs.cos',
         'tan': 'interp_ufuncs.tan',
+        'arcsin': 'interp_ufuncs.arcsin',
+        'arccos': 'interp_ufuncs.arccos',
+        'arctan': 'interp_ufuncs.arctan',
     }
 
     appleveldefs = {
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -187,17 +187,17 @@
     def _getnums(self, comma):
         if self.find_size() > 1000:
             nums = [
-                float2string(self.getitem(index))
+                float2string(self.eval(index))
                 for index in range(3)
             ]
             nums.append("..." + "," * comma)
             nums.extend([
-                float2string(self.getitem(index))
+                float2string(self.eval(index))
                 for index in range(self.find_size() - 3, self.find_size())
             ])
         else:
             nums = [
-                float2string(self.getitem(index))
+                float2string(self.eval(index))
                 for index in range(self.find_size())
             ]
         return nums
@@ -229,7 +229,7 @@
         start, stop, step, slice_length = space.decode_index4(w_idx, self.find_size())
         if step == 0:
             # Single index
-            return space.wrap(self.get_concrete().getitem(start))
+            return space.wrap(self.get_concrete().eval(start))
         else:
             # Slice
             res = SingleDimSlice(start, stop, step, slice_length, self, self.signature.transition(SingleDimSlice.static_signature))
@@ -416,14 +416,12 @@
         # in fact, ViewArray never gets "concrete" as it never stores data.
         # This implementation is needed for BaseArray getitem/setitem to work,
         # can be refactored.
+        self.parent.get_concrete()
         return self
 
     def eval(self, i):
         return self.parent.eval(self.calc_index(i))
 
-    def getitem(self, item):
-        return self.parent.getitem(self.calc_index(item))
-
     @unwrap_spec(item=int, value=float)
     def setitem(self, item, value):
         return self.parent.setitem(self.calc_index(item), value)
@@ -479,7 +477,8 @@
         BaseArray.__init__(self)
         self.size = size
         self.storage = lltype.malloc(TP, size, zero=True,
-                                     flavor='raw', track_allocation=False)
+                                     flavor='raw', track_allocation=False,
+                                     add_memory_pressure=True)
         # XXX find out why test_zjit explodes with trackign of allocations
 
     def get_concrete(self):
@@ -497,9 +496,6 @@
     def descr_len(self, space):
         return space.wrap(self.size)
 
-    def getitem(self, item):
-        return self.storage[item]
-
     def setitem(self, item, value):
         self.invalidated()
         self.storage[item] = value
@@ -511,7 +507,7 @@
             self._sliceloop2(start, stop, step, arr, self)
 
     def __del__(self):
-        lltype.free(self.storage, flavor='raw')
+        lltype.free(self.storage, flavor='raw', track_allocation=False)
 
 def new_numarray(space, w_size_or_iterable):
     l = space.listview(w_size_or_iterable)
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -120,3 +120,20 @@
 @ufunc2
 def mod(lvalue, rvalue):
     return math.fmod(lvalue, rvalue)
+
+
+ at ufunc
+def arcsin(value):
+    if value < -1.0 or  value > 1.0:
+        return rfloat.NAN
+    return math.asin(value)
+
+ at ufunc
+def arccos(value):
+    if value < -1.0 or  value > 1.0:
+        return rfloat.NAN
+    return math.acos(value)
+
+ at ufunc
+def arctan(value):
+    return math.atan(value)
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -70,6 +70,7 @@
         from numpy import array, zeros
         a = array(range(5))
         assert str(a) == "[0.0 1.0 2.0 3.0 4.0]"
+        assert str((2*a)[:]) == "[0.0 2.0 4.0 6.0 8.0]"
         a = zeros(1001)
         assert str(a) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
 
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -205,3 +205,47 @@
         b = tan(a)
         for i in range(len(a)):
             assert b[i] == math.tan(a[i])
+
+
+    def test_arcsin(self):
+        import math
+        from numpy import array, arcsin
+
+        a = array([-1, -0.5, -0.33, 0, 0.33, 0.5, 1])        
+        b = arcsin(a)
+        for i in range(len(a)):
+            assert b[i] == math.asin(a[i])
+
+        a = array([-10, -1.5, -1.01, 1.01, 1.5, 10, float('nan'), float('inf'), float('-inf')])
+        b = arcsin(a)
+        for f in b:
+            assert math.isnan(f)
+
+    def test_arccos(self):
+        import math
+        from numpy import array, arccos
+
+        a = array([-1, -0.5, -0.33, 0, 0.33, 0.5, 1])
+        b = arccos(a)
+        for i in range(len(a)):
+            assert b[i] == math.acos(a[i])
+
+        
+        a = array([-10, -1.5, -1.01, 1.01, 1.5, 10, float('nan'), float('inf'), float('-inf')])
+        b = arccos(a)
+        for f in b:
+            assert math.isnan(f)
+
+    def test_arctan(self):
+        import math
+        from numpy import array, arctan
+
+        a = array([-3, -2, -1, 0, 1, 2, 3, float('inf'), float('-inf')])
+        b = arctan(a)
+        for i in range(len(a)):
+            assert b[i] == math.atan(a[i])
+
+        a  = array([float('nan')])
+        b = arctan(a)
+        assert math.isnan(b[0])
+
diff --git a/pypy/module/pypyjit/test_pypy_c/test_array.py b/pypy/module/pypyjit/test_pypy_c/test_array.py
--- a/pypy/module/pypyjit/test_pypy_c/test_array.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_array.py
@@ -1,4 +1,4 @@
-import py
+import py, sys
 from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 
 class TestArray(BaseTestPyPyC):
@@ -88,6 +88,73 @@
             jump(p0, p1, p2, p3, p4, p5, p6, i28, i15, p9, i10, i11, descr=<Loop0>)
         """)
 
+    def test_array_of_doubles(self):
+        def main():
+            from array import array
+            img = array('d', [21.5]*1000)
+            i = 0
+            while i < 1000:
+                img[i] += 20.5
+                assert img[i] == 42.0
+                i += 1
+            return 123
+        #
+        log = self.run(main, [])
+        assert log.result == 123
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i6, 1000)
+            guard_true(i10, descr=...)
+            i11 = int_lt(i6, i7)
+            guard_true(i11, descr=...)
+            f13 = getarrayitem_raw(i8, i6, descr=<FloatArrayNoLengthDescr>)
+            f15 = float_add(f13, 20.500000)
+            setarrayitem_raw(i8, i6, f15, descr=<FloatArrayNoLengthDescr>)
+            f16 = getarrayitem_raw(i8, i6, descr=<FloatArrayNoLengthDescr>)
+            i18 = float_eq(f16, 42.000000)
+            guard_true(i18, descr=...)
+            i20 = int_add(i6, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
+
+    def test_array_of_floats(self):
+        def main():
+            from array import array
+            img = array('f', [21.5]*1000)
+            i = 0
+            while i < 1000:
+                img[i] += 20.5
+                assert img[i] == 42.0
+                i += 1
+            return 321
+        #
+        log = self.run(main, [])
+        assert log.result == 321
+        loop, = log.loops_by_filename(self.filepath)
+        if sys.maxint == 2147483647:
+            arraydescr = 'UnsignedArrayNoLengthDescr'
+        else:
+            arraydescr = 'UINTArrayNoLengthDescr'
+        assert loop.match("""
+            i10 = int_lt(i6, 1000)
+            guard_true(i10, descr=...)
+            i11 = int_lt(i6, i7)
+            guard_true(i11, descr=...)
+            i13 = getarrayitem_raw(i8, i6, descr=<%s>)
+            f14 = cast_singlefloat_to_float(i13)
+            f16 = float_add(f14, 20.500000)
+            i17 = cast_float_to_singlefloat(f16)
+            setarrayitem_raw(i8, i6,i17, descr=<%s>)
+            i18 = getarrayitem_raw(i8, i6, descr=<%s>)
+            f19 = cast_singlefloat_to_float(i18)
+            i21 = float_eq(f19, 42.000000)
+            guard_true(i21, descr=...)
+            i23 = int_add(i6, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """ % (arraydescr, arraydescr, arraydescr))
+
 
     def test_zeropadded(self):
         def main():
diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py
--- a/pypy/module/pypyjit/test_pypy_c/test_misc.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py
@@ -63,6 +63,7 @@
             i7 = int_gt(i4, 1)
             guard_true(i7, descr=...)
             p9 = call(ConstClass(fromint), i4, descr=...)
+            guard_no_exception(descr=...)
             p11 = call(ConstClass(rbigint.mul), p5, p9, descr=...)
             guard_no_exception(descr=...)
             i13 = int_sub(i4, 1)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -90,12 +90,12 @@
             i46 = call(ConstClass(ll_startswith__rpy_stringPtr_rpy_stringPtr), p28, ConstPtr(ptr45), descr=<BoolCallDescr>)
             guard_false(i46, descr=...)
             p51 = new_with_vtable(21136408)
-            setfield_gc(p51, p28, descr=<GcPtrFieldDescr .*NumberStringParser.inst_literal .*>)
-            setfield_gc(p51, ConstPtr(ptr51), descr=<GcPtrFieldDescr pypy.objspace.std.strutil.NumberStringParser.inst_fname .*>)
-            setfield_gc(p51, 1, descr=<SignedFieldDescr .*NumberStringParser.inst_sign .*>)
-            setfield_gc(p51, 16, descr=<SignedFieldDescr .*NumberStringParser.inst_base .*>)
-            setfield_gc(p51, p28, descr=<GcPtrFieldDescr .*NumberStringParser.inst_s .*>)
-            setfield_gc(p51, i29, descr=<SignedFieldDescr .*NumberStringParser.inst_n .*>)
+            setfield_gc(p51, _, descr=...)    # 6 setfields, but the order is dict-order-dependent
+            setfield_gc(p51, _, descr=...)
+            setfield_gc(p51, _, descr=...)
+            setfield_gc(p51, _, descr=...)
+            setfield_gc(p51, _, descr=...)
+            setfield_gc(p51, _, descr=...)
             p55 = call(ConstClass(parse_digit_string), p51, descr=<GcPtrCallDescr>)
             guard_no_exception(descr=...)
             i57 = call(ConstClass(rbigint.toint), p55, descr=<SignedCallDescr>)
diff --git a/pypy/module/rctime/interp_time.py b/pypy/module/rctime/interp_time.py
--- a/pypy/module/rctime/interp_time.py
+++ b/pypy/module/rctime/interp_time.py
@@ -207,13 +207,13 @@
         t = (((c_time(lltype.nullptr(rffi.TIME_TP.TO))) / YEAR) * YEAR)
         # we cannot have reference to stack variable, put it on the heap
         t_ref = lltype.malloc(rffi.TIME_TP.TO, 1, flavor='raw')
-        t_ref[0] = t
+        t_ref[0] = rffi.cast(rffi.TIME_T, t)
         p = c_localtime(t_ref)
         janzone = -p.c_tm_gmtoff
         tm_zone = rffi.charp2str(p.c_tm_zone)
         janname = ["   ", tm_zone][bool(tm_zone)]
         tt = t + YEAR / 2
-        t_ref[0] = tt
+        t_ref[0] = rffi.cast(rffi.TIME_T, tt)
         p = c_localtime(t_ref)
         lltype.free(t_ref, flavor='raw')
         tm_zone = rffi.charp2str(p.c_tm_zone)
@@ -292,11 +292,14 @@
     else:
         seconds = space.float_w(w_seconds)
     try:
-        ovfcheck_float_to_int(seconds)
+        seconds = ovfcheck_float_to_int(seconds)
+        t = rffi.r_time_t(seconds)
+        if rffi.cast(lltype.Signed, t) != seconds:
+            raise OverflowError
     except OverflowError:
         raise OperationError(space.w_ValueError,
                              space.wrap("time argument too large"))
-    return rffi.r_time_t(seconds)
+    return t
 
 def _tm_to_tuple(space, t):
     time_tuple = [
@@ -317,7 +320,7 @@
 def _gettmarg(space, w_tup, allowNone=True):
     if allowNone and space.is_w(w_tup, space.w_None):
         # default to the current local time
-        tt = rffi.r_time_t(pytime.time())
+        tt = rffi.r_time_t(int(pytime.time()))
         t_ref = lltype.malloc(rffi.TIME_TP.TO, 1, flavor='raw')
         t_ref[0] = tt
         pbuf = c_localtime(t_ref)
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
@@ -1,4 +1,4 @@
-from ctypes import CDLL, POINTER, pointer, c_byte, c_int, c_char_p
+from ctypes import CDLL, POINTER, pointer, c_byte, c_int, c_char_p, CFUNCTYPE, c_void_p, c_size_t
 import sys
 import py
 from support import BaseCTypesTestChecker
@@ -46,6 +46,12 @@
         tf_b.argtypes = (c_byte,)
         assert tf_b(-126) == -42
 
+    def test_from_cfunctype(self):
+        from _ctypes import _memmove_addr
+        functype = CFUNCTYPE(c_void_p, c_void_p, c_void_p, c_size_t)
+        my_memmove = functype(_memmove_addr)
+        assert my_memmove._is_fastpath
+
     def test_undeclared_restype(self):
         # make sure we get a fresh function
         try:
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
@@ -424,6 +424,15 @@
             sys.settrace(oldtrace)
             events = None
 
+    def test_large_fields(self):
+        # make sure that large fields are not "confused" with bitfields
+        # (because the bitfields use the higher bits of the "size" attribute)
+        Array = c_long * 8192
+        class X(Structure):
+            _fields_ = [('items', Array)]
+        obj = X()
+        assert isinstance(obj.items, Array)
+
 class TestPointerMember(BaseCTypesTestChecker):
 
     def test_1(self):
diff --git a/pypy/objspace/flow/operation.py b/pypy/objspace/flow/operation.py
--- a/pypy/objspace/flow/operation.py
+++ b/pypy/objspace/flow/operation.py
@@ -359,10 +359,10 @@
                 # All arguments are constants: call the operator now
                 try:
                     result = op(*args)
-                except:
-                    etype, evalue, etb = sys.exc_info()
-                    msg = "generated by a constant operation:  %s%r" % (
-                        name, tuple(args))
+                except Exception, e:
+                    etype = e.__class__
+                    msg = "generated by a constant operation:  %s" % (
+                        name)
                     raise OperationThatShouldNotBePropagatedError(
                         self.wrap(etype), self.wrap(msg))
                 else:
diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py
--- a/pypy/objspace/std/mapdict.py
+++ b/pypy/objspace/std/mapdict.py
@@ -421,6 +421,14 @@
         key = ("slot", SLOTS_STARTING_FROM + index)
         self._get_mapdict_map().write(self, key, w_value)
 
+    def delslotvalue(self, index):
+        key = ("slot", SLOTS_STARTING_FROM + index)
+        new_obj = self._get_mapdict_map().delete(self, key)
+        if new_obj is None:
+            return False
+        self._become(new_obj)
+        return True
+
     # used by _weakref implemenation
 
     def getweakref(self):
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -913,7 +913,7 @@
 def repr__String(space, w_str):
     s = w_str._value
 
-    buf = StringBuilder(50)
+    buf = StringBuilder(len(s) + 2)
 
     quote = "'"
     if quote in s and '"' not in s:
diff --git a/pypy/objspace/std/test/test_mapdict.py b/pypy/objspace/std/test/test_mapdict.py
--- a/pypy/objspace/std/test/test_mapdict.py
+++ b/pypy/objspace/std/test/test_mapdict.py
@@ -210,6 +210,12 @@
     assert obj2.storage == [501, 601, 701, 51, 61, 71]
     assert obj.map is obj2.map
 
+    assert obj2.getslotvalue(b) == 601
+    assert obj2.delslotvalue(b)
+    assert obj2.getslotvalue(b) is None
+    assert obj2.storage == [501, 701, 51, 61, 71]
+    assert not obj2.delslotvalue(b)
+
 
 def test_slots_no_dict():
     cls = Class(hasdict=False)
@@ -631,6 +637,14 @@
         a.__dict__ = {}
         a.__dict__ = {}
 
+    def test_delete_slot(self):
+        class A(object):
+            __slots__ = ['x']
+        
+        a = A()
+        a.x = 42
+        del a.x
+        raises(AttributeError, "a.x")
 
 class AppTestWithMapDictAndCounters(object):
     def setup_class(cls):
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -154,7 +154,7 @@
     x = 0x345678
     z = len(wrappeditems)
     for w_item in wrappeditems:
-        y = space.int_w(space.hash(w_item))
+        y = space.hash_w(w_item)
         x = (x ^ y) * mult
         z -= 1
         mult += 82520 + z + z
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -1,14 +1,10 @@
-import functools
+import py
 import sys
-
-import py
-
+from pypy.rpython.extregistry import ExtRegistryEntry
+from pypy.rlib.objectmodel import CDefinedIntSymbolic
+from pypy.rlib.objectmodel import keepalive_until_here, specialize
+from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.nonconst import NonConstant
-from pypy.rlib.objectmodel import (CDefinedIntSymbolic, keepalive_until_here,
-    specialize)
-from pypy.rlib.unroll import unrolling_iterable
-from pypy.rpython.extregistry import ExtRegistryEntry
-
 
 def elidable(func):
     """ Decorate a function as "trace-elidable". This means precisely that:
@@ -95,7 +91,7 @@
         d = {"func": func, "hint": hint}
         exec py.code.Source("\n".join(code)).compile() in d
         result = d["f"]
-        functools.wraps(func)(result)
+        result.func_name = func.func_name + "_promote"
         return result
     return decorator
 
@@ -119,7 +115,7 @@
         s_x = annmodel.not_const(s_x)
         access_directly = 's_access_directly' in kwds_s
         fresh_virtualizable = 's_fresh_virtualizable' in kwds_s
-        if  access_directly or fresh_virtualizable:
+        if access_directly or fresh_virtualizable:
             assert access_directly, "lone fresh_virtualizable hint"
             if isinstance(s_x, annmodel.SomeInstance):
                 from pypy.objspace.flow.model import Constant
@@ -575,7 +571,7 @@
                 c_llname = hop.inputconst(lltype.Void, mangled_name)
                 getfield_op = self.get_getfield_op(hop.rtyper)
                 v_green = hop.genop(getfield_op, [v_red, c_llname],
-                                    resulttype = r_field)
+                                    resulttype=r_field)
                 s_green = s_red.classdef.about_attribute(fieldname)
                 assert s_green is not None
                 hop.rtyper.annotator.setbinding(v_green, s_green)
diff --git a/pypy/rlib/libffi.py b/pypy/rlib/libffi.py
--- a/pypy/rlib/libffi.py
+++ b/pypy/rlib/libffi.py
@@ -2,14 +2,13 @@
 
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rlib.objectmodel import specialize, enforceargs, we_are_translated
-from pypy.rlib.rarithmetic import intmask, r_uint, r_singlefloat
+from pypy.rlib.rarithmetic import intmask, r_uint, r_singlefloat, r_longlong
 from pypy.rlib import jit
 from pypy.rlib import clibffi
 from pypy.rlib.clibffi import get_libc_name, FUNCFLAG_CDECL, AbstractFuncPtr, \
     push_arg_as_ffiptr, c_ffi_call, FFI_TYPE_STRUCT
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
 from pypy.rlib.rdynload import DLLHANDLE
-from pypy.rlib.longlong2float import longlong2float, float2longlong
 
 class types(object):
     """
@@ -122,9 +121,10 @@
         elif TYPE is rffi.DOUBLE:
             cls = FloatArg
         elif TYPE is rffi.LONGLONG or TYPE is rffi.ULONGLONG:
-            raise TypeError, 'r_(u)longlong not supported by arg(), use arg_(u)longlong()'
+            cls = LongLongArg
+            val = rffi.cast(rffi.LONGLONG, val)
         elif TYPE is rffi.FLOAT:
-            raise TypeError, 'r_singlefloat not supported by arg(), use arg_singlefloat()'
+            cls = SingleFloatArg
         else:
             raise TypeError, 'Unsupported argument type: %s' % TYPE
         self._append(cls(val))
@@ -133,25 +133,6 @@
     def arg_raw(self, val):
         self._append(RawArg(val))
 
-    def arg_longlong(self, val):
-        """
-        Note: this is a hack. So far, the JIT does not support long longs, so
-        you must pass it as if it were a python Float (rffi.DOUBLE).  You can
-        use the convenience functions longlong2float and float2longlong to do
-        the conversions.  Note that if you use long longs, the call won't
-        be jitted at all.
-        """
-        assert IS_32_BIT      # use a normal integer on 64-bit platforms
-        self._append(LongLongArg(val))
-
-    def arg_singlefloat(self, val):
-        """
-        Note: you must pass a python Float (rffi.DOUBLE), not a r_singlefloat
-        (else the jit complains).  Note that if you use single floats, the
-        call won't be jitted at all.
-        """
-        self._append(SingleFloatArg(val))
-
     def _append(self, arg):
         if self.first is None:
             self.first = self.last = arg
@@ -196,25 +177,25 @@
         func._push_raw(self.ptrval, ll_args, i)
 
 class SingleFloatArg(AbstractArg):
-    """ An argument representing a C float (but holding a C double)
+    """ An argument representing a C float
     """
 
-    def __init__(self, floatval):
-        self.floatval = floatval
+    def __init__(self, singlefloatval):
+        self.singlefloatval = singlefloatval
 
     def push(self, func, ll_args, i):
-        func._push_single_float(self.floatval, ll_args, i)
+        func._push_singlefloat(self.singlefloatval, ll_args, i)
 
 
 class LongLongArg(AbstractArg):
-    """ An argument representing a C long long (but holding a C double)
+    """ An argument representing a C long long
     """
 
-    def __init__(self, floatval):
-        self.floatval = floatval
+    def __init__(self, longlongval):
+        self.longlongval = longlongval
 
     def push(self, func, ll_args, i):
-        func._push_longlong(self.floatval, ll_args, i)
+        func._push_longlong(self.longlongval, ll_args, i)
 
 
 # ======================================================================
@@ -274,15 +255,10 @@
         elif RESULT is rffi.DOUBLE:
             return self._do_call_float(self.funcsym, ll_args)
         elif RESULT is rffi.FLOAT:
-            # XXX: even if RESULT is FLOAT, we still return a DOUBLE, else the
-            # jit complains. Note that the jit is disabled in this case
-            return self._do_call_single_float(self.funcsym, ll_args)
+            return self._do_call_singlefloat(self.funcsym, ll_args)
         elif RESULT is rffi.LONGLONG or RESULT is rffi.ULONGLONG:
-            # XXX: even if RESULT is LONGLONG, we still return a DOUBLE, else the
-            # jit complains. Note that the jit is disabled in this case
-            # (it's not a typo, we really return a DOUBLE)
             assert IS_32_BIT
-            return self._do_call_longlong(self.funcsym, ll_args)
+            res = self._do_call_longlong(self.funcsym, ll_args)
         elif RESULT is lltype.Void:
             return self._do_call_void(self.funcsym, ll_args)
         else:
@@ -320,16 +296,15 @@
     def _push_float(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
-    @jit.dont_look_inside
-    def _push_single_float(self, value, ll_args, i):
-        self._push_arg(r_singlefloat(value), ll_args, i)
+    @jit.oopspec('libffi_push_singlefloat(self, value, ll_args, i)')
+    @enforceargs(None, r_singlefloat, None, int) # fix the annotation for tests
+    def _push_singlefloat(self, value, ll_args, i):
+        self._push_arg(value, ll_args, i)
 
-    @jit.dont_look_inside
-    def _push_longlong(self, floatval, ll_args, i):
-        """
-        Takes a longlong represented as a python Float. It's a hack for the
-        jit, else we could not see the whole libffi module at all"""  
-        self._push_arg(float2longlong(floatval), ll_args, i)
+    @jit.oopspec('libffi_push_longlong(self, value, ll_args, i)')
+    @enforceargs(None, r_longlong, None, int) # fix the annotation for tests
+    def _push_longlong(self, value, ll_args, i):
+        self._push_arg(value, ll_args, i)
 
     @jit.oopspec('libffi_call_int(self, funcsym, ll_args)')
     def _do_call_int(self, funcsym, ll_args):
@@ -339,20 +314,18 @@
     def _do_call_float(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.DOUBLE)
 
-    @jit.dont_look_inside
-    def _do_call_single_float(self, funcsym, ll_args):
-        single_res = self._do_call(funcsym, ll_args, rffi.FLOAT)
-        return float(single_res)
+    @jit.oopspec('libffi_call_singlefloat(self, funcsym, ll_args)')
+    def _do_call_singlefloat(self, funcsym, ll_args):
+        return self._do_call(funcsym, ll_args, rffi.FLOAT)
 
     @jit.dont_look_inside
     def _do_call_raw(self, funcsym, ll_args):
         # same as _do_call_int, but marked as jit.dont_look_inside
         return self._do_call(funcsym, ll_args, rffi.LONG)
 
-    @jit.dont_look_inside
+    @jit.oopspec('libffi_call_longlong(self, funcsym, ll_args)')
     def _do_call_longlong(self, funcsym, ll_args):
-        llres = self._do_call(funcsym, ll_args, rffi.LONGLONG)
-        return longlong2float(llres)
+        return self._do_call(funcsym, ll_args, rffi.LONGLONG)
 
     @jit.oopspec('libffi_call_void(self, funcsym, ll_args)')
     def _do_call_void(self, funcsym, ll_args):
diff --git a/pypy/rlib/longlong2float.py b/pypy/rlib/longlong2float.py
--- a/pypy/rlib/longlong2float.py
+++ b/pypy/rlib/longlong2float.py
@@ -11,6 +11,8 @@
 # -------- implement longlong2float and float2longlong --------
 DOUBLE_ARRAY_PTR = lltype.Ptr(lltype.Array(rffi.DOUBLE))
 LONGLONG_ARRAY_PTR = lltype.Ptr(lltype.Array(rffi.LONGLONG))
+UINT_ARRAY_PTR = lltype.Ptr(lltype.Array(rffi.UINT))
+FLOAT_ARRAY_PTR = lltype.Ptr(lltype.Array(rffi.FLOAT))
 
 # these definitions are used only in tests, when not translated
 def longlong2float_emulator(llval):
@@ -29,6 +31,22 @@
     lltype.free(d_array, flavor='raw')
     return llval
 
+def uint2singlefloat_emulator(ival):
+    f_array = lltype.malloc(FLOAT_ARRAY_PTR.TO, 1, flavor='raw')
+    i_array = rffi.cast(UINT_ARRAY_PTR, f_array)
+    i_array[0] = ival
+    singlefloatval = f_array[0]
+    lltype.free(f_array, flavor='raw')
+    return singlefloatval
+
+def singlefloat2uint_emulator(singlefloatval):
+    f_array = lltype.malloc(FLOAT_ARRAY_PTR.TO, 1, flavor='raw')
+    i_array = rffi.cast(UINT_ARRAY_PTR, f_array)
+    f_array[0] = singlefloatval
+    ival = i_array[0]
+    lltype.free(f_array, flavor='raw')
+    return ival
+
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 eci = ExternalCompilationInfo(includes=['string.h', 'assert.h'],
                               post_include_bits=["""
@@ -44,6 +62,18 @@
     memcpy(&ll, &x, 8);
     return ll;
 }
+static float pypy__uint2singlefloat(unsigned int x) {
+    float ff;
+    assert(sizeof(float) == 4 && sizeof(unsigned int) == 4);
+    memcpy(&ff, &x, 4);
+    return ff;
+}
+static unsigned int pypy__singlefloat2uint(float x) {
+    unsigned int ii;
+    assert(sizeof(float) == 4 && sizeof(unsigned int) == 4);
+    memcpy(&ii, &x, 4);
+    return ii;
+}
 """])
 
 longlong2float = rffi.llexternal(
@@ -55,3 +85,13 @@
     "pypy__float2longlong", [rffi.DOUBLE], rffi.LONGLONG,
     _callable=float2longlong_emulator, compilation_info=eci,
     _nowrapper=True, elidable_function=True)
+
+uint2singlefloat = rffi.llexternal(
+    "pypy__uint2singlefloat", [rffi.UINT], rffi.FLOAT,
+    _callable=uint2singlefloat_emulator, compilation_info=eci,
+    _nowrapper=True, elidable_function=True)
+
+singlefloat2uint = rffi.llexternal(
+    "pypy__singlefloat2uint", [rffi.FLOAT], rffi.UINT,
+    _callable=singlefloat2uint_emulator, compilation_info=eci,
+    _nowrapper=True, elidable_function=True)
diff --git a/pypy/rlib/rarithmetic.py b/pypy/rlib/rarithmetic.py
--- a/pypy/rlib/rarithmetic.py
+++ b/pypy/rlib/rarithmetic.py
@@ -71,9 +71,8 @@
     return int(n)
 
 def longlongmask(n):
-    if isinstance(n, int):
-        n = long(n)
-    assert isinstance(n, long)
+    assert isinstance(n, (int, long))
+    n = long(n)
     n &= LONGLONG_MASK
     if n >= LONGLONG_TEST:
         n -= 2*LONGLONG_TEST
diff --git a/pypy/rlib/test/test_libffi.py b/pypy/rlib/test/test_libffi.py
--- a/pypy/rlib/test/test_libffi.py
+++ b/pypy/rlib/test/test_libffi.py
@@ -5,7 +5,7 @@
 from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
 from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name, make_struct_ffitype_e
 from pypy.rlib.libffi import CDLL, Func, get_libc_name, ArgChain, types
-from pypy.rlib.libffi import longlong2float, float2longlong, IS_32_BIT
+from pypy.rlib.libffi import IS_32_BIT
 
 class TestLibffiMisc(BaseFfiTest):
 
@@ -52,19 +52,6 @@
         del lib
         assert not ALLOCATED
 
-    def test_longlong_as_float(self):
-        from pypy.translator.c.test.test_genc import compile
-        maxint64 = r_longlong(9223372036854775807)
-        def fn(x):
-            d = longlong2float(x)
-            ll = float2longlong(d)
-            return ll
-        assert fn(maxint64) == maxint64
-        #
-        fn2 = compile(fn, [r_longlong])
-        res = fn2(maxint64)
-        assert res == maxint64
-
 class TestLibffiCall(BaseFfiTest):
     """
     Test various kind of calls through libffi.
@@ -111,7 +98,7 @@
     def get_libfoo(self):
         return self.CDLL(self.libfoo_name)
 
-    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
+    def call(self, funcspec, args, RESULT, is_struct=False, jitif=[]):
         """
         Call the specified function after constructing and ArgChain with the
         arguments in ``args``.
@@ -128,14 +115,7 @@
         func = lib.getpointer(name, argtypes, restype)
         chain = ArgChain()
         for arg in args:
-            if isinstance(arg, r_singlefloat):
-                chain.arg_singlefloat(float(arg))
-            elif IS_32_BIT and isinstance(arg, r_longlong):
-                chain.arg_longlong(longlong2float(arg))
-            elif IS_32_BIT and isinstance(arg, r_ulonglong):
-                arg = rffi.cast(rffi.LONGLONG, arg)
-                chain.arg_longlong(longlong2float(arg))
-            elif isinstance(arg, tuple):
+            if isinstance(arg, tuple):
                 methname, arg = arg
                 meth = getattr(chain, methname)
                 meth(arg)
@@ -143,13 +123,19 @@
                 chain.arg(arg)
         return func.call(chain, RESULT, is_struct=is_struct)
 
-    def check_loops(self, *args, **kwds):
+    # ------------------------------------------------------------------------
+
+    def test_very_simple(self):
         """
-        Ignored here, but does something in the JIT tests
+            int diff_xy(int x, long y)
+            {
+                return x - y;
+            }
         """
-        pass
-
-    # ------------------------------------------------------------------------
+        libfoo = self.get_libfoo() 
+        func = (libfoo, 'diff_xy', [types.sint, types.slong], types.sint)
+        res = self.call(func, [50, 8], lltype.Signed)
+        assert res == 42
 
     def test_simple(self):
         """
@@ -160,23 +146,14 @@
         """
         libfoo = self.get_libfoo() 
         func = (libfoo, 'sum_xy', [types.sint, types.double], types.sint)
-        res = self.call(func, [38, 4.2], rffi.LONG)
+        res = self.call(func, [38, 4.2], lltype.Signed, jitif=["floats"])
         assert res == 42
-        self.check_loops({
-                'call_release_gil': 1,
-                'guard_no_exception': 1,
-                'guard_not_forced': 1,
-                'int_add': 1,
-                'int_lt': 1,
-                'guard_true': 1,
-                'jump': 1})
 
     def test_float_result(self):
         libm = self.get_libm()
         func = (libm, 'pow', [types.double, types.double], types.double)
-        res = self.call(func, [2.0, 3.0], rffi.DOUBLE, init_result=0.0)
+        res = self.call(func, [2.0, 3.0], rffi.DOUBLE, jitif=["floats"])
         assert res == 8.0
-        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_result(self):
         """
@@ -189,7 +166,6 @@
         func = (libfoo, 'cast_to_uchar_and_ovf', [types.sint], types.uchar)
         res = self.call(func, [0], rffi.UCHAR)
         assert res == 200
-        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_argument(self):
         """
@@ -271,8 +247,7 @@
         libfoo = self.get_libfoo()
         func = (libfoo, 'get_pointer_to_b', [], types.pointer)
         LONGP = lltype.Ptr(rffi.CArray(rffi.LONG))
-        null = lltype.nullptr(LONGP.TO)
-        res = self.call(func, [], LONGP, init_result=null)
+        res = self.call(func, [], LONGP)
         assert res[0] == 20
 
     def test_void_result(self):
@@ -287,7 +262,7 @@
         #
         initval = self.call(get_dummy, [], rffi.LONG)
         #
-        res = self.call(set_dummy, [initval+1], lltype.Void, init_result=None)
+        res = self.call(set_dummy, [initval+1], lltype.Void)
         assert res is None
         #
         res = self.call(get_dummy, [], rffi.LONG)
@@ -305,9 +280,9 @@
         func = (libfoo, 'sum_xy_float', [types.float, types.float], types.float)
         x = r_singlefloat(12.34)
         y = r_singlefloat(56.78)
-        res = self.call(func, [x, y], rffi.FLOAT, init_result=0.0)
+        res = self.call(func, [x, y], rffi.FLOAT, jitif=["singlefloats"])
         expected = c_float(c_float(12.34).value + c_float(56.78).value).value
-        assert res == expected
+        assert float(res) == expected
 
     def test_slonglong_args(self):
         """
@@ -325,16 +300,10 @@
         if IS_32_BIT:
             x = r_longlong(maxint32+1)
             y = r_longlong(maxint32+2)
-            zero = longlong2float(r_longlong(0))
         else:
             x = maxint32+1
             y = maxint32+2
-            zero = 0
-        res = self.call(func, [x, y], rffi.LONGLONG, init_result=zero)
-        if IS_32_BIT:
-            # obscure, on 32bit it's really a long long, so it returns a
-            # DOUBLE because of the JIT hack
-            res = float2longlong(res)
+        res = self.call(func, [x, y], rffi.LONGLONG, jitif=["longlong"])
         expected = maxint32*2 + 3
         assert res == expected
 
@@ -354,12 +323,7 @@
                 types.ulonglong)
         x = r_ulonglong(maxint64+1)
         y = r_ulonglong(2)
-        res = self.call(func, [x, y], rffi.ULONGLONG, init_result=0)
-        if IS_32_BIT:
-            # obscure, on 32bit it's really a long long, so it returns a
-            # DOUBLE because of the JIT hack
-            res = float2longlong(res)
-            res = rffi.cast(rffi.ULONGLONG, res)
+        res = self.call(func, [x, y], rffi.ULONGLONG, jitif=["longlong"])
         expected = maxint64 + 3
         assert res == expected
 
@@ -406,7 +370,8 @@
         buf[0] = 30
         buf[1] = 12
         adr = rffi.cast(rffi.VOIDP, buf)
-        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG, init_result=0)
+        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG,
+                        jitif=["byval"])
         assert res == 42
         # check that we still have the ownership on the buffer
         assert buf[0] == 30
@@ -431,8 +396,8 @@
         make_point = (libfoo, 'make_point', [types.slong, types.slong], ffi_point)
         #
         PTR = lltype.Ptr(rffi.CArray(rffi.LONG))
-        p = self.call(make_point, [12, 34], PTR, init_result=lltype.nullptr(PTR.TO),
-                      is_struct=True)
+        p = self.call(make_point, [12, 34], PTR, is_struct=True,
+                      jitif=["byval"])
         assert p[0] == 12
         assert p[1] == 34
         lltype.free(p, flavor='raw')
diff --git a/pypy/rlib/test/test_longlong2float.py b/pypy/rlib/test/test_longlong2float.py
--- a/pypy/rlib/test/test_longlong2float.py
+++ b/pypy/rlib/test/test_longlong2float.py
@@ -1,5 +1,7 @@
 from pypy.translator.c.test.test_genc import compile
 from pypy.rlib.longlong2float import longlong2float, float2longlong
+from pypy.rlib.longlong2float import uint2singlefloat, singlefloat2uint
+from pypy.rlib.rarithmetic import r_singlefloat
 
 
 def fn(f1):
@@ -28,3 +30,23 @@
     for x in enum_floats():
         res = fn2(x)
         assert repr(res) == repr(x)
+
+# ____________________________________________________________
+
+def fnsingle(f1):
+    sf1 = r_singlefloat(f1)
+    ii = singlefloat2uint(sf1)
+    sf2 = uint2singlefloat(ii)
+    f2 = float(sf2)
+    return f2
+
+def test_int_as_singlefloat():
+    for x in enum_floats():
+        res = fnsingle(x)
+        assert repr(res) == repr(float(r_singlefloat(x)))
+
+def test_compiled_single():
+    fn2 = compile(fnsingle, [float])
+    for x in enum_floats():
+        res = fn2(x)
+        assert repr(res) == repr(float(r_singlefloat(x)))
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -27,7 +27,11 @@
 from pypy.rpython import raddress
 from pypy.translator.platform import platform
 from array import array
-from thread import _local as tlsobject
+try:
+    from thread import _local as tlsobject
+except ImportError:
+    class tlsobject(object):
+        pass
 
 # ____________________________________________________________
 
@@ -688,6 +692,8 @@
                     res = ctypes.cast(res, ctypes.c_void_p).value
                     if res is None:
                         return 0
+                if T.TO.RESULT == lltype.SingleFloat:
+                    res = res.value     # baaaah, cannot return a c_float()
                 return res
 
             def callback(*cargs):
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -1,7 +1,7 @@
 import py
 from pypy.rlib.rarithmetic import (r_int, r_uint, intmask, r_singlefloat,
                                    r_ulonglong, r_longlong, r_longfloat,
-                                   base_int, normalizedinttype)
+                                   base_int, normalizedinttype, longlongmask)
 from pypy.rlib.objectmodel import Symbolic
 from pypy.tool.uid import Hashable
 from pypy.tool.identity_dict import identity_dict
@@ -667,6 +667,9 @@
 
 _numbertypes = {int: Number("Signed", int, intmask)}
 _numbertypes[r_int] = _numbertypes[int]
+if r_longlong is not r_int:
+    _numbertypes[r_longlong] = Number("SignedLongLong", r_longlong,
+                                      longlongmask)
 
 def build_number(name, type):
     try:
@@ -1159,7 +1162,7 @@
         try:
             return self._lookup_adtmeth(field_name)
         except AttributeError:
-            raise AttributeError("%r instance has no field %r" % (self._T,
+            raise AttributeError("%r instance has no field %r" % (self._T._name,
                                                                   field_name))
 
     def __setattr__(self, field_name, val):
@@ -1949,7 +1952,7 @@
 
 
 def malloc(T, n=None, flavor='gc', immortal=False, zero=False,
-           track_allocation=True):
+           track_allocation=True, add_memory_pressure=False):
     assert flavor in ('gc', 'raw')
     if zero or immortal:
         initialization = 'example'
diff --git a/pypy/rpython/lltypesystem/rlist.py b/pypy/rpython/lltypesystem/rlist.py
--- a/pypy/rpython/lltypesystem/rlist.py
+++ b/pypy/rpython/lltypesystem/rlist.py
@@ -14,7 +14,6 @@
 from pypy.rpython.lltypesystem import rstr
 from pypy.rpython import robject
 from pypy.rlib.debug import ll_assert
-from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.lltypesystem import rffi
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib import rgc
@@ -200,12 +199,11 @@
         else:
             some = 6
         some += newsize >> 3
-        try:
-            new_allocated = ovfcheck(newsize + some)
-        except OverflowError:
-            raise MemoryError
+        new_allocated = newsize + some
     # new_allocated is a bit more than newsize, enough to ensure an amortized
-    # linear complexity for e.g. repeated usage of l.append().
+    # linear complexity for e.g. repeated usage of l.append().  In case
+    # it overflows sys.maxint, it is guaranteed negative, and the following
+    # malloc() will fail.
     items = l.items
     newitems = malloc(typeOf(l).TO.items.TO, new_allocated)
     before_len = l.length
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -345,6 +345,8 @@
     def ll_strconcat(s1, s2):
         len1 = len(s1.chars)
         len2 = len(s2.chars)
+        # a single '+' like this is allowed to overflow: it gets
+        # a negative result, and the gc will complain
         newstr = s1.malloc(len1 + len2)
         s1.copy_contents(s1, newstr, 0, 0, len1)
         s1.copy_contents(s2, newstr, 0, len1, len2)
@@ -412,9 +414,18 @@
         itemslen = 0
         i = 0
         while i < num_items:
-            itemslen += len(items[i].chars)
+            try:
+                itemslen = ovfcheck(itemslen + len(items[i].chars))
+            except OverflowError:
+                raise MemoryError
             i += 1
-        result = s.malloc(itemslen + s_len * (num_items - 1))
+        try:
+            seplen = ovfcheck(s_len * (num_items - 1))
+        except OverflowError:
+            raise MemoryError
+        # a single '+' at the end is allowed to overflow: it gets
+        # a negative result, and the gc will complain
+        result = s.malloc(itemslen + seplen)
         res_index = len(items[0].chars)
         s.copy_contents(items[0], result, 0, 0, res_index)
         i = 1
@@ -688,7 +699,10 @@
         itemslen = 0
         i = 0
         while i < num_items:
-            itemslen += len(items[i].chars)
+            try:
+                itemslen = ovfcheck(itemslen + len(items[i].chars))
+            except OverflowError:
+                raise MemoryError
             i += 1
         if typeOf(items).TO.OF.TO == STR:
             malloc = mallocstr
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -390,6 +390,11 @@
         # initialize the threshold
         self.min_heap_size = max(self.min_heap_size, self.nursery_size *
                                               self.major_collection_threshold)
+        # the following two values are usually equal, but during raw mallocs
+        # of arrays, next_major_collection_threshold is decremented to make
+        # the next major collection arrive earlier.
+        # See translator/c/test/test_newgc, test_nongc_attached_to_gc
+        self.next_major_collection_initial = self.min_heap_size
         self.next_major_collection_threshold = self.min_heap_size
         self.set_major_threshold_from(0.0)
         debug_stop("gc-set-nursery-size")
@@ -397,7 +402,7 @@
 
     def set_major_threshold_from(self, threshold, reserving_size=0):
         # Set the next_major_collection_threshold.
-        threshold_max = (self.next_major_collection_threshold *
+        threshold_max = (self.next_major_collection_initial *
                          self.growth_rate_max)
         if threshold > threshold_max:
             threshold = threshold_max
@@ -412,6 +417,7 @@
         else:
             bounded = False
         #
+        self.next_major_collection_initial = threshold
         self.next_major_collection_threshold = threshold
         return bounded
 
@@ -509,17 +515,19 @@
         # constant-folded because self.nonlarge_max, size and itemsize
         # are all constants (the arguments are constant due to
         # inlining).
-        if not raw_malloc_usage(itemsize):
-            too_many_items = raw_malloc_usage(nonvarsize) > self.nonlarge_max
+        maxsize = self.nonlarge_max - raw_malloc_usage(nonvarsize)
+        if maxsize < 0:
+            toobig = r_uint(0)    # the nonvarsize alone is too big
+        elif raw_malloc_usage(itemsize):
+            toobig = r_uint(maxsize // raw_malloc_usage(itemsize)) + 1
         else:
-            maxlength = self.nonlarge_max - raw_malloc_usage(nonvarsize)
-            maxlength = maxlength // raw_malloc_usage(itemsize)
-            too_many_items = length > maxlength
+            toobig = r_uint(sys.maxint) + 1
 
-        if too_many_items:
+        if r_uint(length) >= r_uint(toobig):
             #
             # If the total size of the object would be larger than
-            # 'nonlarge_max', then allocate it externally.
+            # 'nonlarge_max', then allocate it externally.  We also
+            # go there if 'length' is actually negative.
             obj = self.external_malloc(typeid, length)
             #
         else:
@@ -602,13 +610,18 @@
             # this includes the case of fixed-size objects, for which we
             # should not even ask for the varsize_item_sizes().
             totalsize = nonvarsize
-        else:
+        elif length > 0:
+            # var-sized allocation with at least one item
             itemsize = self.varsize_item_sizes(typeid)
             try:
                 varsize = ovfcheck(itemsize * length)
                 totalsize = ovfcheck(nonvarsize + varsize)
             except OverflowError:
                 raise MemoryError
+        else:
+            # negative length!  This likely comes from an overflow
+            # earlier.  We will just raise MemoryError here.
+            raise MemoryError
         #
         # If somebody calls this function a lot, we must eventually
         # force a full collection.
@@ -716,9 +729,18 @@
     def set_max_heap_size(self, size):
         self.max_heap_size = float(size)
         if self.max_heap_size > 0.0:
+            if self.max_heap_size < self.next_major_collection_initial:
+                self.next_major_collection_initial = self.max_heap_size
             if self.max_heap_size < self.next_major_collection_threshold:
                 self.next_major_collection_threshold = self.max_heap_size
 
+    def raw_malloc_memory_pressure(self, sizehint):
+        self.next_major_collection_threshold -= sizehint
+        if self.next_major_collection_threshold < 0:
+            # cannot trigger a full collection now, but we can ensure
+            # that one will occur very soon
+            self.nursery_free = self.nursery_top
+
     def can_malloc_nonmovable(self):
         return True
 
@@ -1598,7 +1620,7 @@
         # Max heap size: gives an upper bound on the threshold.  If we
         # already have at least this much allocated, raise MemoryError.
         if bounded and (float(self.get_total_memory_used()) + reserving_size >=
-                        self.next_major_collection_threshold):
+                        self.next_major_collection_initial):
             #
             # First raise MemoryError, giving the program a chance to
             # quit cleanly.  It might still allocate in the nursery,
diff --git a/pypy/rpython/memory/gc/test/test_minimark.py b/pypy/rpython/memory/gc/test/test_minimark.py
--- a/pypy/rpython/memory/gc/test/test_minimark.py
+++ b/pypy/rpython/memory/gc/test/test_minimark.py
@@ -34,6 +34,7 @@
                     growth_rate_max=1.5)
     gc.min_heap_size = 100.0
     gc.max_heap_size = 300.0
+    gc.next_major_collection_initial = 0.0
     gc.next_major_collection_threshold = 0.0
     # first, we don't grow past min_heap_size
     for i in range(5):
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -378,6 +378,18 @@
         else:
             self.malloc_varsize_nonmovable_ptr = None
 
+        if getattr(GCClass, 'raw_malloc_memory_pressure', False):
+            def raw_malloc_memory_pressure(length, itemsize):
+                totalmem = length * itemsize
+                if totalmem > 0:
+                    gcdata.gc.raw_malloc_memory_pressure(totalmem)
+                #else: probably an overflow -- the following rawmalloc
+                #      will fail then
+            self.raw_malloc_memory_pressure_ptr = getfn(
+                raw_malloc_memory_pressure,
+                [annmodel.SomeInteger(), annmodel.SomeInteger()],
+                annmodel.s_None, minimal_transform = False)
+
         self.identityhash_ptr = getfn(GCClass.identityhash.im_func,
                                       [s_gc, s_gcref],
                                       annmodel.SomeInteger(),
diff --git a/pypy/rpython/memory/gctransform/transform.py b/pypy/rpython/memory/gctransform/transform.py
--- a/pypy/rpython/memory/gctransform/transform.py
+++ b/pypy/rpython/memory/gctransform/transform.py
@@ -589,6 +589,11 @@
 
     def gct_fv_raw_malloc_varsize(self, hop, flags, TYPE, v_length, c_const_size, c_item_size,
                                                                     c_offset_to_length):
+        if flags.get('add_memory_pressure', False):
+            if hasattr(self, 'raw_malloc_memory_pressure_ptr'):
+                hop.genop("direct_call",
+                          [self.raw_malloc_memory_pressure_ptr,
+                           v_length, c_item_size])
         if c_offset_to_length is None:
             if flags.get('zero'):
                 fnptr = self.raw_malloc_varsize_no_length_zero_ptr
diff --git a/pypy/rpython/rbuiltin.py b/pypy/rpython/rbuiltin.py
--- a/pypy/rpython/rbuiltin.py
+++ b/pypy/rpython/rbuiltin.py
@@ -345,14 +345,17 @@
 BUILTIN_TYPER[object.__init__] = rtype_object__init__
 # annotation of low-level types
 
-def rtype_malloc(hop, i_flavor=None, i_zero=None, i_track_allocation=None):
+def rtype_malloc(hop, i_flavor=None, i_zero=None, i_track_allocation=None,
+                 i_add_memory_pressure=None):
     assert hop.args_s[0].is_constant()
     vlist = [hop.inputarg(lltype.Void, arg=0)]
     opname = 'malloc'
-    v_flavor, v_zero, v_track_allocation = parse_kwds(hop,
+    v_flavor, v_zero, v_track_allocation, v_add_memory_pressure = parse_kwds(
+        hop,
         (i_flavor, lltype.Void),
         (i_zero, None),
-        (i_track_allocation, None))
+        (i_track_allocation, None),
+        (i_add_memory_pressure, None))
 
     flags = {'flavor': 'gc'}
     if v_flavor is not None:
@@ -361,8 +364,11 @@
         flags['zero'] = v_zero.value
     if i_track_allocation is not None:
         flags['track_allocation'] = v_track_allocation.value
+    if i_add_memory_pressure is not None:
+        flags['add_memory_pressure'] = v_add_memory_pressure.value
     vlist.append(hop.inputconst(lltype.Void, flags))
-        
+
+    assert 1 <= hop.nb_args <= 2
     if hop.nb_args == 2:
         vlist.append(hop.inputarg(lltype.Signed, arg=1))
         opname += '_varsize'
diff --git a/pypy/tool/gdb_pypy.py b/pypy/tool/gdb_pypy.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/gdb_pypy.py
@@ -0,0 +1,200 @@
+"""
+Some convenience macros for gdb.  If you have pypy in your path, you can simply do:
+
+(gdb) python import pypy.tool.gdb_pypy
+
+Or, alternatively:
+
+(gdb) python execfile('/path/to/gdb_pypy.py')
+"""
+
+from __future__ import with_statement
+
+import re
+import sys
+import os.path
+
+try:
+    # when running inside gdb
+    from gdb import Command
+except ImportError:
+    # whenn running outside gdb: mock class for testing
+    class Command(object):
+        def __init__(self, name, command_class):
+            pass
+
+
+def find_field_with_suffix(val, suffix):
+    """
+    Return ``val[field]``, where ``field`` is the only one whose name ends
+    with ``suffix``.  If there is no such field, or more than one, raise KeyError.
+    """
+    names = []
+    for field in val.type.fields():
+        if field.name.endswith(suffix):
+            names.append(field.name)
+    #
+    if len(names) == 1:
+        return val[names[0]]
+    elif len(names) == 0:
+        raise KeyError, "cannot find field *%s" % suffix
+    else:
+        raise KeyError, "too many matching fields: %s" % ', '.join(names)
+
+def lookup(val, suffix):
+    """
+    Lookup a field which ends with ``suffix`` following the rpython struct
+    inheritance hierarchy (i.e., looking both at ``val`` and
+    ``val['*_super']``, recursively.
+    """
+    try:
+        return find_field_with_suffix(val, suffix)
+    except KeyError:
+        baseobj = find_field_with_suffix(val, '_super')
+        return lookup(baseobj, suffix)
+
+
+class RPyType(Command):
+    """
+    Prints the RPython type of the expression (remember to dereference it!)
+    It assumes to find ``typeids.txt`` in the current directory.
+    E.g.:
+
+    (gdb) rpy_type *l_v123
+    GcStruct pypy.foo.Bar { super, inst_xxx, inst_yyy }
+    """
+
+    prog2typeids = {}
+ 
+    def __init__(self, gdb=None):
+        # dependency injection, for tests
+        if gdb is None:
+            import gdb
+        self.gdb = gdb
+        Command.__init__(self, "rpy_type", self.gdb.COMMAND_NONE)
+
+    def invoke(self, arg, from_tty):
+        # some magic code to automatically reload the python file while developing
+        ## from pypy.tool import gdb_pypy
+        ## reload(gdb_pypy)
+        ## gdb_pypy.RPyType.prog2typeids = self.prog2typeids # persist the cache
+        ## self.__class__ = gdb_pypy.RPyType
+        print self.do_invoke(arg, from_tty)
+
+    def do_invoke(self, arg, from_tty):
+        obj = self.gdb.parse_and_eval(arg)
+        hdr = lookup(obj, '_gcheader')
+        tid = hdr['h_tid']
+        offset = tid & 0xFFFFFFFF # 64bit only
+        offset = int(offset) # convert from gdb.Value to python int
+        typeids = self.get_typeids()
+        if offset in typeids:
+            return typeids[offset]
+        else:
+            return 'Cannot find the type with offset %d' % offset
+
+    def get_typeids(self):
+        progspace = self.gdb.current_progspace()
+        try:
+            return self.prog2typeids[progspace]
+        except KeyError:
+            typeids = self.load_typeids(progspace)
+            self.prog2typeids[progspace] = typeids
+            return typeids
+
+    def load_typeids(self, progspace):
+        """
+        Returns a mapping offset --> description
+        """
+        exename = progspace.filename
+        root = os.path.dirname(exename)
+        typeids_txt = os.path.join(root, 'typeids.txt')
+        if not os.path.exists(typeids_txt):
+            newroot = os.path.dirname(root)
+            typeids_txt = os.path.join(newroot, 'typeids.txt')
+        print 'loading', typeids_txt
+        typeids = {}
+        with open(typeids_txt) as f:
+            for line in f:
+                member, descr = map(str.strip, line.split(None, 1))
+                expr = "((char*)(&pypy_g_typeinfo.%s)) - (char*)&pypy_g_typeinfo" % member
+                offset = int(self.gdb.parse_and_eval(expr))
+                typeids[offset] = descr
+        return typeids
+
+
+def is_ptr(type, gdb):
+    if gdb is None:
+        import gdb # so we can pass a fake one from the tests
+    return type.code == gdb.TYPE_CODE_PTR
+
+
+class RPyStringPrinter(object):
+    """
+    Pretty printer for rpython strings.
+
+    Note that this pretty prints *pointers* to strings: this way you can do "p
+    val" and see the nice string, and "p *val" to see the underyling struct
+    fields
+    """
+    
+    def __init__(self, val):
+        self.val = val
+
+    @classmethod
+    def lookup(cls, val, gdb=None):
+        t = val.type
+        if is_ptr(t, gdb) and t.target().tag == 'pypy_rpy_string0':
+            return cls(val)
+        return None
+
+    def to_string(self):
+        chars = self.val['rs_chars']
+        length = int(chars['length'])
+        items = chars['items']
+        res = [chr(items[i]) for i in range(length)]
+        string = ''.join(res)
+        return 'r' + repr(string)
+
+
+class RPyListPrinter(object):
+    """
+    Pretty printer for rpython lists
+
+    Note that this pretty prints *pointers* to lists: this way you can do "p
+    val" and see the nice repr, and "p *val" to see the underyling struct
+    fields
+    """
+
+    def __init__(self, val):
+        self.val = val
+
+    @classmethod
+    def lookup(cls, val, gdb=None):
+        t = val.type
+        if is_ptr(t, gdb) and re.match(r'pypy_list\d*', t.target().tag):
+            return cls(val)
+        return None
+
+    def to_string(self):
+        length = int(self.val['l_length'])
+        array = self.val['l_items']
+        allocated = int(array['length'])
+        items = array['items']
+        itemlist = []
+        for i in range(length):
+            item = items[i]
+            itemlist.append(str(item))
+        str_items = ', '.join(itemlist)
+        return 'r[%s] (len=%d, alloc=%d)' % (str_items, length, allocated)
+
+
+try:
+    import gdb
+    RPyType() # side effects
+    gdb.pretty_printers += [
+        RPyStringPrinter.lookup,
+        RPyListPrinter.lookup
+        ]
+except ImportError:
+    pass
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -328,6 +328,8 @@
         if op.is_guard() and bridges.get('loop-' + str(op.guard_no), None):
             res.append(op)
             i = 0
+            if hasattr(op.bridge, 'force_asm'):
+                op.bridge.force_asm()
             ops = op.bridge.operations
         else:
             res.append(op)
diff --git a/pypy/tool/logparser.py b/pypy/tool/logparser.py
--- a/pypy/tool/logparser.py
+++ b/pypy/tool/logparser.py
@@ -4,7 +4,8 @@
     python logparser.py <action> <logfilename> <output> <options...>
 
 Actions:
-    draw-time   draw a timeline image of the log (format PNG by default)
+    draw-time      draw a timeline image of the log (format PNG by default)
+    print-summary  print a summary of the log
 """
 import autopath
 import sys, re
@@ -383,6 +384,23 @@
     else:
         image.save(output)
 
+def print_summary(log, out):
+    totaltimes = gettotaltimes(log)
+    if out == '-':
+        outfile = sys.stdout
+    else:
+        outfile = open(out, "w")
+    l = totaltimes.items()
+    l.sort(cmp=lambda a, b: cmp(b[1], a[1]))
+    total = sum([b for a, b in l])
+    for a, b in l:
+        if a is None:
+            a = 'interpret'
+        s = " " * (50 - len(a))
+        print >>outfile, a, s, str(b*100/total) + "%"
+    if out != '-':
+        outfile.close()
+
 # ____________________________________________________________
 
 
@@ -391,6 +409,7 @@
                                         'mainwidth=', 'mainheight=',
                                         'summarywidth=', 'summarybarheight=',
                                         ]),
+    'print-summary': (print_summary, []),
     }
 
 if __name__ == '__main__':
diff --git a/pypy/tool/test/test_gdb_pypy.py b/pypy/tool/test/test_gdb_pypy.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/test/test_gdb_pypy.py
@@ -0,0 +1,180 @@
+import py
+from pypy.tool import gdb_pypy
+
+class FakeGdb(object):
+
+    COMMAND_NONE = -1
+    #
+    TYPE_CODE_PTR = 1
+    TYPE_CODE_ARRAY = 2
+    TYPE_CODE_STRUCT = 3
+
+    def __init__(self, exprs, progspace=None):
+        self.exprs = exprs
+        self.progspace = progspace
+
+    def parse_and_eval(self, expr):
+        return self.exprs[expr]
+
+    def current_progspace(self):
+        return self.progspace
+
+
+class Mock(object):
+    def __init__(self, **attrs):
+        self.__dict__.update(attrs)
+
+class Field(Mock):
+    pass
+
+class Struct(object):
+    code = FakeGdb.TYPE_CODE_STRUCT
+    
+    def __init__(self, fieldnames, tag):
+        self._fields = [Field(name=name) for name in fieldnames]
+        self.tag = tag
+
+    def fields(self):
+        return self._fields[:]
+
+class Pointer(object):
+    code = FakeGdb.TYPE_CODE_PTR
+
+    def __init__(self, target):
+        self._target = target
+
+    def target(self):
+        return self._target
+
+class Value(dict):
+    def __init__(self, *args, **kwds):
+        type_tag = kwds.pop('type_tag', None)
+        dict.__init__(self, *args, **kwds)
+        self.type = Struct(self.keys(), type_tag)
+        for key, val in self.iteritems():
+            if isinstance(val, dict):
+                self[key] = Value(val)
+
+class PtrValue(Value):
+    def __init__(self, *args, **kwds):
+        # in python gdb, we can use [] to access fields either if we have an
+        # actual struct or a pointer to it, so we just reuse Value here
+        Value.__init__(self, *args, **kwds)
+        self.type = Pointer(self.type)
+
+def test_mock_objects():
+    d = {'a': 1,
+         'b': 2,
+         'super': {
+            'c': 3,
+            }
+         }
+    val = Value(d)
+    assert val['a'] == 1
+    assert val['b'] == 2
+    assert isinstance(val['super'], Value)
+    assert val['super']['c'] == 3
+    fields = val.type.fields()
+    names = [f.name for f in fields]
+    assert sorted(names) == ['a', 'b', 'super']
+
+def test_find_field_with_suffix():
+    obj = Value(x_foo = 1,
+                y_bar = 2,
+                z_foobar = 3)
+    assert gdb_pypy.find_field_with_suffix(obj, 'foo') == 1
+    assert gdb_pypy.find_field_with_suffix(obj, 'foobar') == 3
+    py.test.raises(KeyError, "gdb_pypy.find_field_with_suffix(obj, 'bar')")
+    py.test.raises(KeyError, "gdb_pypy.find_field_with_suffix(obj, 'xxx')")
+
+def test_lookup():
+    d = {'r_super': {
+            '_gcheader': {
+                'h_tid': 123,
+                }
+            },
+         'r_foo': 42,
+         }
+    obj = Value(d)
+    assert gdb_pypy.lookup(obj, 'foo') == 42
+    hdr = gdb_pypy.lookup(obj, 'gcheader')
+    assert hdr['h_tid'] == 123
+
+def test_load_typeids(tmpdir):
+    exe = tmpdir.join('testing_1').join('pypy-c')
+    typeids = tmpdir.join('typeids.txt')
+    typeids.write("""
+member0    GcStruct xxx {}
+""".strip())
+    progspace = Mock(filename=str(exe))
+    exprs = {
+        '((char*)(&pypy_g_typeinfo.member0)) - (char*)&pypy_g_typeinfo': 0,
+        }
+    gdb = FakeGdb(exprs, progspace)
+    cmd = gdb_pypy.RPyType(gdb)
+    typeids = cmd.load_typeids(progspace)
+    assert typeids[0] == 'GcStruct xxx {}'
+
+def test_RPyType(tmpdir):
+    exe = tmpdir.join('pypy-c')
+    typeids = tmpdir.join('typeids.txt')
+    typeids.write("""
+member0    GcStruct xxx {}
+member1    GcStruct yyy {}
+member2    GcStruct zzz {}
+""".strip())
+    #
+    progspace = Mock(filename=str(exe))
+    d = {'r_super': {
+            '_gcheader': {
+                'h_tid': 123,
+                }
+            },
+         'r_foo': 42,
+         }
+    myvar = Value(d)
+    exprs = {
+        '*myvar': myvar,
+        '((char*)(&pypy_g_typeinfo.member0)) - (char*)&pypy_g_typeinfo': 0,
+        '((char*)(&pypy_g_typeinfo.member1)) - (char*)&pypy_g_typeinfo': 123,
+        '((char*)(&pypy_g_typeinfo.member2)) - (char*)&pypy_g_typeinfo': 456,
+        }
+    gdb = FakeGdb(exprs, progspace)
+    cmd = gdb_pypy.RPyType(gdb)
+    assert cmd.do_invoke('*myvar', True) == 'GcStruct yyy {}'
+
+def test_pprint_string():
+    d = {'_gcheader': {
+            'h_tid': 123
+            },
+         'rs_hash': 456,
+         'rs_chars': {
+            'length': 6,
+            'items': map(ord, 'foobar'),
+            }
+         }
+    p_string = PtrValue(d, type_tag='pypy_rpy_string0')
+    printer = gdb_pypy.RPyStringPrinter.lookup(p_string, FakeGdb)
+    assert printer.to_string() == "r'foobar'"
+
+def test_pprint_list():
+    d = {'_gcheader': {
+            'h_tid': 123
+            },
+         'l_length': 3, # the lenght of the rpython list
+         'l_items':
+             # this is the array which contains the items
+             {'_gcheader': {
+                'h_tid': 456
+                },
+              'length': 5, # the lenght of the underlying array
+              'items': [40, 41, 42, -1, -2],
+              }
+         }
+    mylist = PtrValue(d, type_tag='pypy_list0')
+    printer = gdb_pypy.RPyListPrinter.lookup(mylist, FakeGdb)
+    assert printer.to_string() == 'r[40, 41, 42] (len=3, alloc=5)'
+    #
+    mylist.type.target().tag = 'pypy_list1234'
+    printer = gdb_pypy.RPyListPrinter.lookup(mylist, FakeGdb)
+    assert printer.to_string() == 'r[40, 41, 42] (len=3, alloc=5)'
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -3,31 +3,38 @@
 #include "src/cjkcodecs/multibytecodec.h"
 
 
-struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
-                                         char *inbuf, Py_ssize_t inlen)
+struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec)
 {
   struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s));
   if (!d)
     return NULL;
   if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0)
-    goto errorexit;
+    {
+      free(d);
+      return NULL;
+    }
+  d->codec = codec;
+  d->outbuf_start = NULL;
+  return d;
+}
 
-  d->codec = codec;
+Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d,
+                             char *inbuf, Py_ssize_t inlen)
+{
   d->inbuf_start = inbuf;
   d->inbuf = inbuf;
   d->inbuf_end = inbuf + inlen;
-  d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
-                     malloc(inlen * sizeof(Py_UNICODE)) :
-                     NULL);
-  if (!d->outbuf_start)
-    goto errorexit;
+  if (d->outbuf_start == NULL)
+    {
+      d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
+                         malloc(inlen * sizeof(Py_UNICODE)) :
+                         NULL);
+      if (d->outbuf_start == NULL)
+        return -1;
+      d->outbuf_end = d->outbuf_start + inlen;
+    }
   d->outbuf = d->outbuf_start;
-  d->outbuf_end = d->outbuf_start + inlen;
-  return d;
-
- errorexit:
-  free(d);
-  return NULL;
+  return 0;
 }
 
 void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d)
@@ -112,34 +119,40 @@
 
 /************************************************************/
 
-struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
-                                         Py_UNICODE *inbuf, Py_ssize_t inlen)
+struct pypy_cjk_enc_s *pypy_cjk_enc_new(const MultibyteCodec *codec)
 {
-  Py_ssize_t outlen;
   struct pypy_cjk_enc_s *d = malloc(sizeof(struct pypy_cjk_enc_s));
   if (!d)
     return NULL;
   if (codec->encinit != NULL && codec->encinit(&d->state, codec->config) != 0)
-    goto errorexit;
+    {
+      free(d);
+      return NULL;
+    }
+  d->codec = codec;
+  d->outbuf_start = NULL;
+  return d;
+}
 
-  d->codec = codec;
+Py_ssize_t pypy_cjk_enc_init(struct pypy_cjk_enc_s *d,
+                             Py_UNICODE *inbuf, Py_ssize_t inlen)
+{
+  Py_ssize_t outlen;
   d->inbuf_start = inbuf;
   d->inbuf = inbuf;
   d->inbuf_end = inbuf + inlen;
-
-  if (inlen > (PY_SSIZE_T_MAX - 16) / 2)
-    goto errorexit;
-  outlen = inlen * 2 + 16;
-  d->outbuf_start = malloc(outlen);
-  if (!d->outbuf_start)
-    goto errorexit;
+  if (d->outbuf_start == NULL)
+    {
+      if (inlen > (PY_SSIZE_T_MAX - 16) / 2)
+        return -1;
+      outlen = inlen * 2 + 16;
+      d->outbuf_start = malloc(outlen);
+      if (d->outbuf_start == NULL)
+        return -1;
+      d->outbuf_end = d->outbuf_start + outlen;
+    }
   d->outbuf = d->outbuf_start;
-  d->outbuf_end = d->outbuf_start + outlen;
-  return d;
-
- errorexit:
-  free(d);
-  return NULL;
+  return 0;
 }
 
 void pypy_cjk_enc_free(struct pypy_cjk_enc_s *d)
@@ -167,11 +180,8 @@
   return 0;
 }
 
-#define MBENC_RESET     MBENC_MAX<<1
-
-Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d)
+Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d, Py_ssize_t flags)
 {
-  int flags = MBENC_FLUSH | MBENC_RESET;   /* XXX always, for now */
   while (1)
     {
       Py_ssize_t r;
@@ -242,3 +252,8 @@
   d->inbuf = d->inbuf_start + in_offset;
   return 0;
 }
+
+const MultibyteCodec *pypy_cjk_enc_getcodec(struct pypy_cjk_enc_s *d)
+{
+  return d->codec;
+}
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -94,8 +94,9 @@
   Py_UNICODE *outbuf_start, *outbuf, *outbuf_end;
 };
 
-struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
-                                         char *inbuf, Py_ssize_t inlen);
+struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec);
+Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d,
+                             char *inbuf, Py_ssize_t inlen);
 void pypy_cjk_dec_free(struct pypy_cjk_dec_s *);
 Py_ssize_t pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *);
 Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *);
@@ -112,10 +113,11 @@
   unsigned char *outbuf_start, *outbuf, *outbuf_end;
 };
 
-struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
-                                         Py_UNICODE *inbuf, Py_ssize_t inlen);
+struct pypy_cjk_enc_s *pypy_cjk_enc_new(const MultibyteCodec *codec);
+Py_ssize_t pypy_cjk_enc_init(struct pypy_cjk_enc_s *d,
+                             Py_UNICODE *inbuf, Py_ssize_t inlen);
 void pypy_cjk_enc_free(struct pypy_cjk_enc_s *);
-Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *);
+Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *, Py_ssize_t);
 Py_ssize_t pypy_cjk_enc_reset(struct pypy_cjk_enc_s *);
 char *pypy_cjk_enc_outbuf(struct pypy_cjk_enc_s *);
 Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
@@ -123,6 +125,7 @@
 Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d);
 Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
                                          char *, Py_ssize_t, Py_ssize_t);
+const MultibyteCodec *pypy_cjk_enc_getcodec(struct pypy_cjk_enc_s *);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/translator/c/test/test_newgc.py b/pypy/translator/c/test/test_newgc.py
--- a/pypy/translator/c/test/test_newgc.py
+++ b/pypy/translator/c/test/test_newgc.py
@@ -1429,6 +1429,35 @@
     def test_gc_heap_stats(self):
         py.test.skip("not implemented")
 
+    def define_nongc_attached_to_gc(cls):
+        from pypy.rpython.lltypesystem import rffi
+        ARRAY = rffi.CArray(rffi.INT)
+        class A:
+            def __init__(self, n):
+                self.buf = lltype.malloc(ARRAY, n, flavor='raw',
+                                         add_memory_pressure=True)
+            def __del__(self):
+                lltype.free(self.buf, flavor='raw')
+        A(6)
+        def f():
+            # allocate a total of ~77GB, but if the automatic gc'ing works,
+            # it should never need more than a few MBs at once
+            am1 = am2 = am3 = None
+            res = 0
+            for i in range(1, 100001):
+                if am3 is not None:
+                    res += rffi.cast(lltype.Signed, am3.buf[0])
+                am3 = am2
+                am2 = am1
+                am1 = A(i * 4)
+                am1.buf[0] = rffi.cast(rffi.INT, i-50000)
+            return res
+        return f
+
+    def test_nongc_attached_to_gc(self):
+        res = self.run("nongc_attached_to_gc")
+        assert res == -99997
+
 # ____________________________________________________________________
 
 class TaggedPointersTest(object):
diff --git a/pypy/translator/jvm/src/pypy/PyPy.java b/pypy/translator/jvm/src/pypy/PyPy.java
--- a/pypy/translator/jvm/src/pypy/PyPy.java
+++ b/pypy/translator/jvm/src/pypy/PyPy.java
@@ -1100,9 +1100,9 @@
         if (Double.isNaN(x))
             return interlink.recordFloatSigned(x, 0);
 
-        // Infinity: Python throws exception
+        // Infinity: Python returns (inf, 0)
         if (Double.isInfinite(x))
-            interlink.throwOverflowError();
+            return interlink.recordFloatSigned(x, 0);
 
         // Extract the various parts of the format:
         final long e=11, f=52; // number of bits in IEEE format
diff --git a/pypy/translator/translator.py b/pypy/translator/translator.py
--- a/pypy/translator/translator.py
+++ b/pypy/translator/translator.py
@@ -16,7 +16,7 @@
 import py
 log = py.log.Producer("flowgraph")
 py.log.setconsumer("flowgraph", ansi_log)
- 
+
 class TranslationContext(object):
     FLOWING_FLAGS = {
         'verbose': False,
@@ -105,7 +105,7 @@
             raise ValueError("we already have an rtyper")
         from pypy.rpython.rtyper import RPythonTyper
         self.rtyper = RPythonTyper(self.annotator,
-                                   type_system = type_system)
+                                   type_system=type_system)
         return self.rtyper
 
     def getexceptiontransformer(self):