[pypy-commit] pypy default: hg merge stringbuilder2-perf

Mon Jun 16 09:36:25 CEST 2014

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r72072:cba6e9bc3afb
Date: 2014-06-16 09:35 +0200
http://bitbucket.org/pypy/pypy/changeset/cba6e9bc3afb/

Log:	hg merge stringbuilder2-perf

	Give the StringBuilder a more flexible internal structure, with a
	chained list of strings instead of just one string. This make it
	more efficient when building large strings, e.g. with cStringIO().

	Also, use systematically jit.conditional_call() instead of regular
	branches. This lets the JIT make more linear code, at the cost of
	forcing a bit more data (to be passed as arguments to
	conditional_calls). I would expect the net result to be a slight
	slow-down on some simple benchmarks and a speed-up on bigger
	programs.

diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -101,39 +101,64 @@
         log = self.run(main, [1000])
         assert log.result == main(1000)
         loop, = log.loops_by_filename(self.filepath)
+        # NB: since the stringbuilder2-perf branch we get more operations than
+        # before, but a lot less branches that might fail randomly.
         assert loop.match("""
-            i7 = int_gt(i4, 0)
-            guard_true(i7, descr=...)
+            i100 = int_gt(i95, 0)
+            guard_true(i100, descr=...)
             guard_not_invalidated(descr=...)
-            p9 = call(ConstClass(ll_int2dec__Signed), i4, descr=<Callr . i EF=3>)
+            p101 = call(ConstClass(ll_int2dec__Signed), i95, descr=<Callr . i EF=3>)
             guard_no_exception(descr=...)
-            i10 = strlen(p9)
-            i11 = int_is_true(i10)
-            guard_true(i11, descr=...)
-            i13 = strgetitem(p9, 0)
-            i15 = int_eq(i13, 45)
-            guard_false(i15, descr=...)
-            i17 = int_neg(i10)
-            i19 = int_gt(i10, 23)
-            guard_false(i19, descr=...)
-            p21 = newstr(23)
-            copystrcontent(p9, p21, 0, 0, i10)
-            i25 = int_add(1, i10)
-            i26 = int_gt(i25, 23)
-            guard_false(i26, descr=...)
-            strsetitem(p21, i10, 32)
-            i30 = int_add(i10, i25)
-            i31 = int_gt(i30, 23)
-            guard_false(i31, descr=...)
-            copystrcontent(p9, p21, 0, i25, i10)
-            i33 = int_lt(i30, 23)
-            guard_true(i33, descr=...)
-            p35 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p21, i30, descr=<Callr . ri EF=4 OS=3>)
+            i102 = strlen(p101)
+            i103 = int_is_true(i102)
+            guard_true(i103, descr=...)
+            i104 = strgetitem(p101, 0)
+            i105 = int_eq(i104, 45)
+            guard_false(i105, descr=...)
+            i106 = int_neg(i102)
+            i107 = int_gt(i102, 23)
+            p108 = new(descr=<SizeDescr .+>)
+            p110 = newstr(23)
+            setfield_gc(..., descr=<Field. stringbuilder.+>)
+            setfield_gc(..., descr=<Field. stringbuilder.+>)
+            setfield_gc(..., descr=<Field. stringbuilder.+>)
+            cond_call(i107, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=<Callv 0 rri EF=4>)
             guard_no_exception(descr=...)
-            i37 = strlen(p35)
-            i38 = int_add_ovf(i5, i37)
+            i111 = getfield_gc(p108, descr=<FieldS stringbuilder.skip .+>)
+            i112 = int_sub(i102, i111)
+            i113 = getfield_gc(p108, descr=<FieldS stringbuilder.current_pos .+>)
+            p114 = getfield_gc(p108, descr=<FieldP stringbuilder.current_buf .+>)
+            copystrcontent(p101, p114, i111, i113, i112)
+            i115 = int_add(i113, i112)
+            i116 = getfield_gc(p108, descr=<FieldS stringbuilder.current_end .+>)
+            setfield_gc(p108, i115, descr=<FieldS stringbuilder.current_pos .+>)
+            i117 = int_eq(i115, i116)
+            cond_call(i117, ConstClass(stringbuilder_grow__stringbuilderPtr_Signed), p108, 1, descr=<Callv 0 ri EF=4>)
+            guard_no_exception(descr=...)
+            i118 = getfield_gc(p108, descr=<FieldS stringbuilder.current_pos .+>)
+            i119 = int_add(i118, 1)
+            p120 = getfield_gc(p108, descr=<FieldP stringbuilder.current_buf .+>)
+            strsetitem(p120, i118, 32)
+            i121 = getfield_gc(p108, descr=<FieldS stringbuilder.current_end .+>)
+            i122 = int_sub(i121, i119)
+            setfield_gc(..., descr=<FieldS stringbuilder.+>)
+            setfield_gc(..., descr=<FieldS stringbuilder.+>)
+            i123 = int_gt(i102, i122)
+            cond_call(i123, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=<Callv 0 rri EF=4>)
+            guard_no_exception(descr=...)
+            i124 = getfield_gc(p108, descr=<FieldS stringbuilder.skip .+>)
+            i125 = int_sub(i102, i124)
+            i126 = getfield_gc(p108, descr=<FieldS stringbuilder.current_pos .+>)
+            p127 = getfield_gc(p108, descr=<FieldP stringbuilder.current_buf .+>)
+            copystrcontent(p101, p127, i124, i126, i125)
+            i128 = int_add(i126, i125)
+            setfield_gc(p108, i128, descr=<FieldS stringbuilder.current_pos .+>)
+            p135 = call(..., descr=<Callr . r EF=4)     # ll_build
+            guard_no_exception(descr=...)
+            i136 = strlen(p135)
+            i137 = int_add_ovf(i92, i136)
             guard_no_overflow(descr=...)
-            i40 = int_sub(i4, 1)
+            i138 = int_sub(i95, 1)
             --TICK--
             jump(..., descr=...)
         """)
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -253,7 +253,13 @@
                         SpaceOperation("record_known_class", [op.args[0], const_vtable], None)]
 
     def rewrite_op_raw_malloc_usage(self, op):
-        pass
+        if self.cpu.translate_support_code or isinstance(op.args[0], Variable):
+            return   # the operation disappears
+        else:
+            # only for untranslated tests: get a real integer estimate
+            arg = op.args[0].value
+            arg = llmemory.raw_malloc_usage(arg)
+            return [Constant(arg, lltype.Signed)]
 
     def rewrite_op_jit_record_known_class(self, op):
         return SpaceOperation("record_known_class", [op.args[0], op.args[1]], None)
diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py
--- a/rpython/jit/metainterp/blackhole.py
+++ b/rpython/jit/metainterp/blackhole.py
@@ -1090,6 +1090,11 @@
         if condition:
             cpu.bh_call_v(func, args_i, None, None, calldescr)
 
+    @arguments("cpu", "i", "i", "R", "d")
+    def bhimpl_conditional_call_r_v(cpu, condition, func, args_r, calldescr):
+        if condition:
+            cpu.bh_call_v(func, None, args_r, None, calldescr)
+
     @arguments("cpu", "i", "i", "I", "R", "d")
     def bhimpl_conditional_call_ir_v(cpu, condition, func, args_i, args_r,
                                      calldescr):
diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py
--- a/rpython/jit/metainterp/pyjitpl.py
+++ b/rpython/jit/metainterp/pyjitpl.py
@@ -888,6 +888,8 @@
                                     pc):
         self.do_conditional_call(condbox, funcbox, argboxes, calldescr, pc)
 
+    opimpl_conditional_call_r_v = opimpl_conditional_call_i_v
+
     @arguments("box", "box", "boxes2", "descr", "orgpc")
     def opimpl_conditional_call_ir_v(self, condbox, funcbox, argboxes,
                                      calldescr, pc):
@@ -1420,6 +1422,8 @@
             return self.execute_varargs(rop.CALL, allboxes, descr, exc, pure)
 
     def do_conditional_call(self, condbox, funcbox, argboxes, descr, pc):
+        if isinstance(condbox, ConstInt) and condbox.value == 0:
+            return   # so that the heapcache can keep argboxes virtual
         allboxes = self._build_allboxes(funcbox, argboxes, descr)
         effectinfo = descr.get_extra_info()
         assert not effectinfo.check_forces_virtual_or_virtualizable()
diff --git a/rpython/jit/metainterp/test/test_string.py b/rpython/jit/metainterp/test/test_string.py
--- a/rpython/jit/metainterp/test/test_string.py
+++ b/rpython/jit/metainterp/test/test_string.py
@@ -552,7 +552,7 @@
                 result += ord(b.build()[0])
                 n -= 1
             return result
-        res = self.meta_interp(main, [9])
+        res = self.meta_interp(main, [9], backendopt=True)
         assert res == main(9)
 
     def test_virtual_copystringcontent2(self):
@@ -568,7 +568,7 @@
                 result += ord((b.build() + _str("xyz"))[0])
                 n -= 1
             return result
-        res = self.meta_interp(main, [9])
+        res = self.meta_interp(main, [9], backendopt=True)
         assert res == main(9)
 
     def test_bytearray(self):
@@ -582,6 +582,247 @@
         res = self.interp_operations(f, [13])
         assert res == 13
 
+    def test_stringbuilder_create(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                if sb.build() != u"":
+                    raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+                           'jump': 1})
+
+    def test_stringbuilder_append_char(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                sb.append(u"a")
+                sb.append(unichr(n))
+                s = sb.build()
+                if len(s) != 2: raise ValueError
+                if s[0] != u"a": raise ValueError
+                if s[1] != unichr(n): raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+                           'jump': 1})
+
+    def test_stringbuilder_append_1(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                sb.append(u"ab")
+                s = sb.build()
+                if len(s) != 2: raise ValueError
+                if s[0] != u"a": raise ValueError
+                if s[1] != u"b": raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+                           'jump': 1})
+
+    def test_stringbuilder_append_2(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                sb.append(u"abc")
+                s = sb.build()
+                if len(s) != 3: raise ValueError
+                if s[0] != u"a": raise ValueError
+                if s[1] != u"b": raise ValueError
+                if s[2] != u"c": raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+                           'jump': 1})
+
+    def test_stringbuilder_append_empty(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                sb.append(u"")
+                s = sb.build()
+                if len(s) != 0: raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+                           'jump': 1})
+
+    def test_stringbuilder_append_len2_1(self):
+        jitdriver = JitDriver(reds=['n', 'str1'], greens=[])
+        def f(n):
+            str1 = unicode(str(n))
+            while n > 0:
+                jitdriver.jit_merge_point(n=n, str1=str1)
+                sb = UnicodeBuilder()
+                sb.append(str1)
+                sb.append(u"ab")
+                s = sb.build()
+                if len(s) != 4: raise ValueError
+                if s[0] != u"1": raise ValueError
+                if s[1] != u"0": raise ValueError
+                if s[2] != u"a": raise ValueError
+                if s[3] != u"b": raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops(call=2)    # (ll_shrink_array) * 2 unroll
+
+    def test_stringbuilder_append_len2_2(self):
+        jitdriver = JitDriver(reds=['n', 'str1'], greens=[])
+        def f(n):
+            str1 = str(n)
+            while n > 0:
+                jitdriver.jit_merge_point(n=n, str1=str1)
+                sb = StringBuilder(4)
+                sb.append("a")
+                sb.append(str1)
+                s = sb.build()
+                if len(s) != 3: raise ValueError
+                if s[0] != "a": raise ValueError
+                if s[1] != "1": raise ValueError
+                if s[2] != "0": raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops(call=2)    # (ll_shrink_array) * 2 unroll
+
+    def test_stringbuilder_append_slice_1(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                sb.append_slice(u"abcdefghij", 1, n)
+                sb.append_slice(u"abcdefghij", 0, n)
+                s = sb.build()
+                if len(s) != 2 * n - 1: raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops(call=2,     # (ll_shrink_array) * 2 unroll
+                          copyunicodecontent=4)
+
+    def test_stringbuilder_append_slice_2(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                sb.append_slice(u"fOo!", 1, 3)
+                s = sb.build()
+                if len(s) != 2: raise ValueError
+                if s[0] != u"O": raise ValueError
+                if s[1] != u"o": raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+                           'jump': 1})
+
+    def test_stringbuilder_append_multiple_char_1(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                sb.append_multiple_char(u"x", 3)
+                s = sb.build()
+                if len(s) != 3: raise ValueError
+                if s[0] != u"x": raise ValueError
+                if s[1] != u"x": raise ValueError
+                if s[2] != u"x": raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+                           'jump': 1})
+
+    def test_stringbuilder_append_multiple_char_2(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = UnicodeBuilder()
+                sb.append_multiple_char(u"x", 5)
+                s = sb.build()
+                if len(s) != 5: raise ValueError
+                if s[0] != u"x": raise ValueError
+                if s[1] != u"x": raise ValueError
+                if s[2] != u"x": raise ValueError
+                if s[3] != u"x": raise ValueError
+                if s[4] != u"x": raise ValueError
+                n -= 1
+            return n
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+        self.check_resops(call=4)    # (append, build) * 2 unroll
+
+    def test_stringbuilder_bug1(self):
+        jitdriver = JitDriver(reds=['n', 's1'], greens=[])
+        @dont_look_inside
+        def escape(x):
+            pass
+        def f(n):
+            s1 = unicode(str(n) * 16)
+            while n > 0:
+                jitdriver.jit_merge_point(n=n, s1=s1)
+                sb = UnicodeBuilder(32)
+                sb.append(s1)
+                sb.append(u"\n\n")
+                s = sb.build()
+                if len(s) != 34: raise ValueError
+                n -= 1
+            return n
+        f(10)
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+
+    def test_stringbuilder_bug3(self):
+        jitdriver = JitDriver(reds=['n'], greens=[])
+        IN = ['a' * 37, 'b' * 38, '22', '1', '333']
+        JOINED = ''.join(IN)
+        def f(n):
+            while n > 0:
+                jitdriver.jit_merge_point(n=n)
+                sb = StringBuilder(36)
+                for s in IN:
+                    sb.append(s)
+                s = sb.build()
+                if s != JOINED:
+                    raise ValueError
+                n -= 1
+            return n
+        f(10)
+        res = self.meta_interp(f, [10], backendopt=True)
+        assert res == 0
+
     def test_shrink_array(self):
         jitdriver = JitDriver(reds=['result', 'n'], greens=[])
         _str, _StringBuilder = self._str, self._StringBuilder
@@ -596,7 +837,7 @@
                 n -= 1
             return result
 
-        res = self.meta_interp(f, [9])
+        res = self.meta_interp(f, [9], backendopt=True)
         assert res == f(9)
         self.check_resops({
             'jump': 1, 'guard_true': 2, 'int_ge': 2, 'int_add': 2, 'int_sub': 2
diff --git a/rpython/rlib/rdynload.py b/rpython/rlib/rdynload.py
--- a/rpython/rlib/rdynload.py
+++ b/rpython/rlib/rdynload.py
@@ -3,6 +3,7 @@
 
 from rpython.rtyper.tool import rffi_platform
 from rpython.rtyper.lltypesystem import rffi
+from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.rarithmetic import r_uint
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.translator.platform import platform
@@ -83,6 +84,8 @@
         # XXX this would never work on top of ll2ctypes, because
         # ctypes are calling dlerror itself, unsure if I can do much in this
         # area (nor I would like to)
+        if not we_are_translated():
+            return "error info not available, not translated"
         res = c_dlerror()
         if not res:
             return ""
diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py
--- a/rpython/rlib/rgc.py
+++ b/rpython/rlib/rgc.py
@@ -231,6 +231,7 @@
 
 
 @jit.oopspec('rgc.ll_shrink_array(p, smallerlength)')
+ at enforceargs(None, int)
 @specialize.ll()
 def ll_shrink_array(p, smallerlength):
     from rpython.rtyper.lltypesystem.lloperation import llop
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -356,31 +356,30 @@
 
 
 class AbstractStringBuilder(object):
+    # This is not the real implementation!
+
     def __init__(self, init_size=INIT_SIZE):
-        self.l = []
-        self.size = 0
+        self._l = []
+        self._size = 0
 
     def _grow(self, size):
-        try:
-            self.size = ovfcheck(self.size + size)
-        except OverflowError:
-            raise MemoryError
+        self._size += size
 
     def append(self, s):
-        assert isinstance(s, self.tp)
-        self.l.append(s)
+        assert isinstance(s, self._tp)
+        self._l.append(s)
         self._grow(len(s))
 
     def append_slice(self, s, start, end):
-        assert isinstance(s, self.tp)
+        assert isinstance(s, self._tp)
         assert 0 <= start <= end <= len(s)
         s = s[start:end]
-        self.l.append(s)
+        self._l.append(s)
         self._grow(len(s))
 
     def append_multiple_char(self, c, times):
-        assert isinstance(c, self.tp)
-        self.l.append(c * times)
+        assert isinstance(c, self._tp)
+        self._l.append(c * times)
         self._grow(times)
 
     def append_charpsize(self, s, size):
@@ -388,22 +387,25 @@
         l = []
         for i in xrange(size):
             l.append(s[i])
-        self.l.append(self.tp("").join(l))
+        self._l.append(self._tp("").join(l))
         self._grow(size)
 
     def build(self):
-        return self.tp("").join(self.l)
+        result = self._tp("").join(self._l)
+        assert len(result) == self._size
+        self._l = [result]
+        return result
 
     def getlength(self):
-        return len(self.build())
+        return self._size
 
 
 class StringBuilder(AbstractStringBuilder):
-    tp = str
+    _tp = str
 
 
 class UnicodeBuilder(AbstractStringBuilder):
-    tp = unicode
+    _tp = unicode
 
 
 # ------------------------------------------------------------
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -160,7 +160,11 @@
     s.append("a")
     s.append_slice("abc", 1, 2)
     s.append_multiple_char('d', 4)
-    assert s.build() == "aabcabdddd"
+    result = s.build()
+    assert result == "aabcabdddd"
+    assert result == s.build()
+    s.append("x")
+    assert s.build() == result + "x"
 
 def test_unicode_builder():
     s = UnicodeBuilder()
@@ -169,8 +173,9 @@
     s.append_slice(u'abcdef', 1, 2)
     assert s.getlength() == len('aabcb')
     s.append_multiple_char(u'd', 4)
-    assert s.build() == 'aabcbdddd'
-    assert isinstance(s.build(), unicode)
+    result = s.build()
+    assert result == 'aabcbdddd'
+    assert isinstance(result, unicode)
 
 
 class TestTranslates(BaseRtypingTest):
diff --git a/rpython/rtyper/annlowlevel.py b/rpython/rtyper/annlowlevel.py
--- a/rpython/rtyper/annlowlevel.py
+++ b/rpython/rtyper/annlowlevel.py
@@ -79,13 +79,6 @@
         return LowLevelAnnotatorPolicy.lowlevelspecialize(funcdesc, args_s, {})
     default_specialize = staticmethod(default_specialize)
 
-    def specialize__semierased(funcdesc, args_s):
-        a2l = annotation_to_lltype
-        l2a = lltype_to_annotation
-        args_s[:] = [l2a(a2l(s)) for s in args_s]
-        return LowLevelAnnotatorPolicy.default_specialize(funcdesc, args_s)
-    specialize__semierased = staticmethod(specialize__semierased)
-
     specialize__ll = default_specialize
 
     def specialize__ll_and_arg(funcdesc, args_s, *argindices):
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -179,6 +179,12 @@
 def op_direct_ptradd(obj, index):
     checkptr(obj)
     assert is_valid_int(index)
+    if not obj:
+        raise AssertionError("direct_ptradd on null pointer")
+        ## assert isinstance(index, int)
+        ## assert not (0 <= index < 4096)
+        ## from rpython.rtyper.lltypesystem import rffi
+        ## return rffi.cast(lltype.typeOf(obj), index)
     return lltype.direct_ptradd(obj, index)
 
 
diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py
--- a/rpython/rtyper/lltypesystem/rbuilder.py
+++ b/rpython/rtyper/lltypesystem/rbuilder.py
@@ -1,64 +1,139 @@
 from rpython.rlib import rgc, jit
 from rpython.rlib.objectmodel import enforceargs
-from rpython.rlib.rarithmetic import ovfcheck
+from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask
+from rpython.rlib.debug import ll_assert
 from rpython.rtyper.rptr import PtrRepr
-from rpython.rtyper.lltypesystem import lltype, rstr
+from rpython.rtyper.lltypesystem import lltype, rffi, rstr
 from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr
 from rpython.rtyper.lltypesystem.rstr import (STR, UNICODE, char_repr,
     string_repr, unichar_repr, unicode_repr)
 from rpython.rtyper.rbuilder import AbstractStringBuilderRepr
 from rpython.tool.sourcetools import func_with_new_name
 
-# Think about heuristics below, maybe we can come up with something
-# better or at least compare it with list heuristics
 
-GROW_FAST_UNTIL = 100 * 1024 * 1024      # 100 MB
+# ------------------------------------------------------------
+# Basic idea:
+#
+# - A StringBuilder has a rstr.STR of the specified initial size
+#   (100 by default), which is filled gradually.
+#
+# - When it is full, we allocate extra buffers as an extra rstr.STR,
+#   and the already-filled one is added to a chained list of STRINGPIECE
+#   objects.
+#
+# - At build() time, we consolidate all these pieces into a single
+#   rstr.STR, which is both returned and re-attached to the StringBuilder,
+#   replacing the STRINGPIECEs.
+#
+# - The data is copied at most twice, and only once in case it fits
+#   into the initial size (and the GC supports shrinking the STR).
+#
+# XXX in build(), we could try keeping around a global weakref to the
+# chain of STRINGPIECEs and reuse them the next time.
+#
+# ------------------------------------------------------------
 
-def new_grow_func(name, mallocfn, copycontentsfn):
+
+def always_inline(func):
+    func._always_inline_ = True
+    return func
+
+
+def new_grow_funcs(name, mallocfn):
+
     @enforceargs(None, int)
     def stringbuilder_grow(ll_builder, needed):
-        allocated = ll_builder.allocated
-        #if allocated < GROW_FAST_UNTIL:
-        #    new_allocated = allocated << 1
-        #else:
-        extra_size = allocated >> 2
         try:
-            new_allocated = ovfcheck(allocated + extra_size)
-            new_allocated = ovfcheck(new_allocated + needed)
+            needed = ovfcheck(needed + ll_builder.total_size)
+            needed = ovfcheck(needed + 63) & ~63
+            total_size = ll_builder.total_size + needed
         except OverflowError:
             raise MemoryError
-        newbuf = mallocfn(new_allocated)
-        copycontentsfn(ll_builder.buf, newbuf, 0, 0, ll_builder.used)
-        ll_builder.buf = newbuf
-        ll_builder.allocated = new_allocated
-    return func_with_new_name(stringbuilder_grow, name)
+        #
+        new_string = mallocfn(needed)
+        #
+        PIECE = lltype.typeOf(ll_builder.extra_pieces).TO
+        old_piece = lltype.malloc(PIECE)
+        old_piece.buf = ll_builder.current_buf
+        old_piece.prev_piece = ll_builder.extra_pieces
+        ll_assert(bool(old_piece.buf), "no buf??")
+        ll_builder.current_buf = new_string
+        ll_builder.current_pos = 0
+        ll_builder.current_end = needed
+        ll_builder.total_size = total_size
+        ll_builder.extra_pieces = old_piece
 
-stringbuilder_grow = new_grow_func('stringbuilder_grow', rstr.mallocstr,
-                                   rstr.copy_string_contents)
-unicodebuilder_grow = new_grow_func('unicodebuilder_grow', rstr.mallocunicode,
-                                    rstr.copy_unicode_contents)
+    def stringbuilder_append_overflow(ll_builder, ll_str, size):
+        # First, the part that still fits in the current piece
+        part1 = ll_builder.current_end - ll_builder.current_pos
+        start = ll_builder.skip
+        ll_builder.copy_string_contents(ll_str, ll_builder.current_buf,
+                                        start, ll_builder.current_pos,
+                                        part1)
+        ll_builder.skip += part1
+        stringbuilder_grow(ll_builder, size - part1)
+
+    def stringbuilder_append_overflow_2(ll_builder, char0):
+        # Overflow when writing two chars.  There are two cases depending
+        # on whether one char still fits or not.
+        if ll_builder.current_pos < ll_builder.current_end:
+            ll_builder.current_buf.chars[ll_builder.current_pos] = char0
+            ll_builder.skip = 1
+        stringbuilder_grow(ll_builder, 2)
+
+    return (func_with_new_name(stringbuilder_grow, '%s_grow' % name),
+            func_with_new_name(stringbuilder_append_overflow,
+                               '%s_append_overflow' % name),
+            func_with_new_name(stringbuilder_append_overflow_2,
+                               '%s_append_overflow_2' % name))
+
+stringbuilder_grows = new_grow_funcs('stringbuilder', rstr.mallocstr)
+unicodebuilder_grows = new_grow_funcs('unicodebuilder', rstr.mallocunicode)
+
+STRINGPIECE = lltype.GcStruct('stringpiece',
+    ('buf', lltype.Ptr(STR)),
+    ('prev_piece', lltype.Ptr(lltype.GcForwardReference())))
+STRINGPIECE.prev_piece.TO.become(STRINGPIECE)
 
 STRINGBUILDER = lltype.GcStruct('stringbuilder',
-    ('allocated', lltype.Signed),
-    ('used', lltype.Signed),
-    ('buf', lltype.Ptr(STR)),
+    ('current_buf', lltype.Ptr(STR)),
+    ('current_pos', lltype.Signed),
+    ('current_end', lltype.Signed),
+    ('total_size', lltype.Signed),
+    ('skip', lltype.Signed),
+    ('extra_pieces', lltype.Ptr(STRINGPIECE)),
     adtmeths={
-        'grow': staticAdtMethod(stringbuilder_grow),
+        'grow': staticAdtMethod(stringbuilder_grows[0]),
+        'append_overflow': staticAdtMethod(stringbuilder_grows[1]),
+        'append_overflow_2': staticAdtMethod(stringbuilder_grows[2]),
+        'copy_string_contents': staticAdtMethod(rstr.copy_string_contents),
         'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string),
+        'mallocfn': staticAdtMethod(rstr.mallocstr),
     }
 )
 
+UNICODEPIECE = lltype.GcStruct('unicodepiece',
+    ('buf', lltype.Ptr(UNICODE)),
+    ('prev_piece', lltype.Ptr(lltype.GcForwardReference())))
+UNICODEPIECE.prev_piece.TO.become(UNICODEPIECE)
+
 UNICODEBUILDER = lltype.GcStruct('unicodebuilder',
-    ('allocated', lltype.Signed),
-    ('used', lltype.Signed),
-    ('buf', lltype.Ptr(UNICODE)),
+    ('current_buf', lltype.Ptr(UNICODE)),
+    ('current_pos', lltype.Signed),
+    ('current_end', lltype.Signed),
+    ('total_size', lltype.Signed),
+    ('skip', lltype.Signed),
+    ('extra_pieces', lltype.Ptr(UNICODEPIECE)),
     adtmeths={
-        'grow': staticAdtMethod(unicodebuilder_grow),
+        'grow': staticAdtMethod(unicodebuilder_grows[0]),
+        'append_overflow': staticAdtMethod(unicodebuilder_grows[1]),
+        'append_overflow_2': staticAdtMethod(unicodebuilder_grows[2]),
+        'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents),
         'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode),
+        'mallocfn': staticAdtMethod(rstr.mallocunicode),
     }
 )
 
-MAX = 16*1024*1024
 
 class BaseStringBuilderRepr(AbstractStringBuilderRepr):
     def empty(self):
@@ -66,72 +141,206 @@
 
     @classmethod
     def ll_new(cls, init_size):
-        if init_size < 0:
-            init_size = MAX
+        # Clamp 'init_size' to be a value between 0 and 1280.
+        # Negative values are mapped to 1280.
+        init_size = intmask(min(r_uint(init_size), r_uint(1280)))
         ll_builder = lltype.malloc(cls.lowleveltype.TO)
-        ll_builder.allocated = init_size
-        ll_builder.used = 0
-        ll_builder.buf = cls.mallocfn(init_size)
+        ll_builder.current_buf = cls.mallocfn(init_size)
+        ll_builder.current_pos = 0
+        ll_builder.current_end = init_size
+        ll_builder.total_size = init_size
         return ll_builder
 
     @staticmethod
+    @always_inline
     def ll_append(ll_builder, ll_str):
-        used = ll_builder.used
-        lgt = len(ll_str.chars)
-        needed = lgt + used
-        if needed > ll_builder.allocated:
-            ll_builder.grow(ll_builder, lgt)
-        ll_str.copy_contents(ll_str, ll_builder.buf, 0, used, lgt)
-        ll_builder.used = needed
+        BaseStringBuilderRepr.ll_append_slice(ll_builder, ll_str,
+                                              0, len(ll_str.chars))
 
     @staticmethod
+    @always_inline
     def ll_append_char(ll_builder, char):
-        if ll_builder.used == ll_builder.allocated:
-            ll_builder.grow(ll_builder, 1)
-        ll_builder.buf.chars[ll_builder.used] = char
-        ll_builder.used += 1
+        jit.conditional_call(ll_builder.current_pos == ll_builder.current_end,
+                             ll_builder.grow, ll_builder, 1)
+        pos = ll_builder.current_pos
+        ll_builder.current_pos = pos + 1
+        ll_builder.current_buf.chars[pos] = char
 
     @staticmethod
-    def ll_append_slice(ll_builder, ll_str, start, end):
-        needed = end - start
-        used = ll_builder.used
-        if needed + used > ll_builder.allocated:
-            ll_builder.grow(ll_builder, needed)
-        assert needed >= 0
-        ll_str.copy_contents(ll_str, ll_builder.buf, start, used, needed)
-        ll_builder.used = needed + used
+    def ll_append_char_2(ll_builder, char0, char1):
+        # this is only used by the JIT, when appending a small, known-length
+        # string.  Unlike two consecutive ll_append_char(), it can do that
+        # with only one conditional_call.
+        ll_builder.skip = 2
+        jit.conditional_call(
+            ll_builder.current_end - ll_builder.current_pos < 2,
+            ll_builder.append_overflow_2, ll_builder, char0)
+        pos = ll_builder.current_pos
+        buf = ll_builder.current_buf
+        buf.chars[pos] = char0
+        pos += ll_builder.skip
+        ll_builder.current_pos = pos
+        buf.chars[pos - 1] = char1
+        # NB. this usually writes into buf.chars[current_pos] and
+        # buf.chars[current_pos+1], except if we had an overflow right
+        # in the middle of the two chars.  In that case, 'skip' is set to
+        # 1 and only one char is written: the 'char1' overrides the 'char0'.
 
     @staticmethod
-    @jit.look_inside_iff(lambda ll_builder, char, times: jit.isconstant(times) and times <= 4)
-    def ll_append_multiple_char(ll_builder, char, times):
-        used = ll_builder.used
-        if times + used > ll_builder.allocated:
-            ll_builder.grow(ll_builder, times)
-        for i in range(times):
-            ll_builder.buf.chars[used] = char
-            used += 1
-        ll_builder.used = used
+    @always_inline
+    def ll_append_slice(ll_builder, ll_str, start, end):
+        size = end - start
+        if jit.we_are_jitted():
+            if BaseStringBuilderRepr._ll_jit_try_append_slice(
+                    ll_builder, ll_str, start, size):
+                return
+        ll_builder.skip = start
+        jit.conditional_call(
+            size > ll_builder.current_end - ll_builder.current_pos,
+            ll_builder.append_overflow, ll_builder, ll_str, size)
+        start = ll_builder.skip
+        size = end - start
+        pos = ll_builder.current_pos
+        ll_builder.copy_string_contents(ll_str, ll_builder.current_buf,
+                                        start, pos, size)
+        ll_builder.current_pos = pos + size
 
     @staticmethod
-    def ll_append_charpsize(ll_builder, charp, size):
-        used = ll_builder.used
-        if used + size > ll_builder.allocated:
-            ll_builder.grow(ll_builder, size)
-        ll_builder.copy_raw_to_string(charp, ll_builder.buf, used, size)
-        ll_builder.used += size
+    def _ll_jit_try_append_slice(ll_builder, ll_str, start, size):
+        if jit.isconstant(size):
+            if size == 0:
+                return True
+            if size == 1:
+                BaseStringBuilderRepr.ll_append_char(ll_builder,
+                                                     ll_str.chars[start])
+                return True
+            if size == 2:
+                BaseStringBuilderRepr.ll_append_char_2(ll_builder,
+                                                       ll_str.chars[start],
+                                                       ll_str.chars[start + 1])
+                return True
+        return False     # use the fall-back path
 
     @staticmethod
-    def ll_getlength(ll_builder):
-        return ll_builder.used
+    @always_inline
+    def ll_append_multiple_char(ll_builder, char, times):
+        if jit.we_are_jitted():
+            if BaseStringBuilderRepr._ll_jit_try_append_multiple_char(
+                    ll_builder, char, times):
+                return
+        BaseStringBuilderRepr._ll_append_multiple_char(ll_builder, char, times)
 
     @staticmethod
+    @jit.dont_look_inside
+    def _ll_append_multiple_char(ll_builder, char, times):
+        part1 = ll_builder.current_end - ll_builder.current_pos
+        if times > part1:
+            times -= part1
+            buf = ll_builder.current_buf
+            for i in xrange(ll_builder.current_pos, ll_builder.current_end):
+                buf.chars[i] = char
+            ll_builder.grow(ll_builder, times)
+        #
+        buf = ll_builder.current_buf
+        pos = ll_builder.current_pos
+        end = pos + times
+        ll_builder.current_pos = end
+        for i in xrange(pos, end):
+            buf.chars[i] = char
+
+    @staticmethod
+    def _ll_jit_try_append_multiple_char(ll_builder, char, size):
+        if jit.isconstant(size):
+            if size == 0:
+                return True
+            if size == 1:
+                BaseStringBuilderRepr.ll_append_char(ll_builder, char)
+                return True
+            if size == 2:
+                BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char)
+                return True
+            if size == 3:
+                BaseStringBuilderRepr.ll_append_char(ll_builder, char)
+                BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char)
+                return True
+            if size == 4:
+                BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char)
+                BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char)
+                return True
+        return False     # use the fall-back path
+
+    @staticmethod
+    @jit.dont_look_inside
+    def ll_append_charpsize(ll_builder, charp, size):
+        part1 = ll_builder.current_end - ll_builder.current_pos
+        if size > part1:
+            # First, the part that still fits
+            ll_builder.copy_raw_to_string(charp, ll_builder.current_buf,
+                                          ll_builder.current_pos, part1)
+            charp = rffi.ptradd(charp, part1)
+            size -= part1
+            ll_builder.grow(ll_builder, size)
+        #
+        pos = ll_builder.current_pos
+        ll_builder.current_pos = pos + size
+        ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size)
+
+    @staticmethod
+    @always_inline
+    def ll_getlength(ll_builder):
+        num_chars_missing_from_last_piece = (
+            ll_builder.current_end - ll_builder.current_pos)
+        return ll_builder.total_size - num_chars_missing_from_last_piece
+
+    @staticmethod
+    @jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder))
     def ll_build(ll_builder):
-        final_size = ll_builder.used
-        assert final_size >= 0
-        if final_size < ll_builder.allocated:
-            ll_builder.allocated = final_size
-            ll_builder.buf = rgc.ll_shrink_array(ll_builder.buf, final_size)
-        return ll_builder.buf
+        # NB. usually the JIT doesn't look inside this function; it does
+        # so only in the simplest example where it could virtualize everything
+        if ll_builder.extra_pieces:
+            BaseStringBuilderRepr._ll_fold_pieces(ll_builder)
+        elif ll_builder.current_pos != ll_builder.total_size:
+            BaseStringBuilderRepr._ll_shrink_final(ll_builder)
+        return ll_builder.current_buf
+
+    @staticmethod
+    def _ll_shrink_final(ll_builder):
+        final_size = ll_builder.current_pos
+        ll_assert(final_size <= ll_builder.total_size,
+                  "final_size > ll_builder.total_size?")
+        buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size)
+        ll_builder.current_buf = buf
+        ll_builder.current_end = final_size
+        ll_builder.total_size = final_size
+
+    @staticmethod
+    def _ll_fold_pieces(ll_builder):
+        final_size = BaseStringBuilderRepr.ll_getlength(ll_builder)
+        ll_assert(final_size >= 0, "negative final_size")
+        extra = ll_builder.extra_pieces
+        ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO)
+        #
+        result = ll_builder.mallocfn(final_size)
+        piece = ll_builder.current_buf
+        piece_lgt = ll_builder.current_pos
+        ll_assert(ll_builder.current_end == len(piece.chars),
+                  "bogus last piece_lgt")
+        ll_builder.total_size = final_size
+        ll_builder.current_buf = result
+        ll_builder.current_pos = final_size
+        ll_builder.current_end = final_size
+
+        dst = final_size
+        while True:
+            dst -= piece_lgt
+            ll_assert(dst >= 0, "rbuilder build: overflow")
+            ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt)
+            if not extra:
+                break
+            piece = extra.buf
+            piece_lgt = len(piece.chars)
+            extra = extra.prev_piece
+        ll_assert(dst == 0, "rbuilder build: underflow")
 
     @classmethod
     def ll_bool(cls, ll_builder):
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -681,25 +681,25 @@
         from rpython.rtyper.lltypesystem.rstr import (STR as STRTYPE,
                                                       copy_string_to_raw,
                                                       copy_raw_to_string,
-                                                      copy_string_contents)
+                                                      copy_string_contents,
+                                                      mallocstr as mallocfn)
         from rpython.rtyper.annlowlevel import llstr as llstrtype
         from rpython.rtyper.annlowlevel import hlstr as hlstrtype
         TYPEP = CCHARP
         ll_char_type = lltype.Char
         lastchar = '\x00'
-        builder_class = StringBuilder
     else:
         from rpython.rtyper.lltypesystem.rstr import (
             UNICODE as STRTYPE,
             copy_unicode_to_raw as copy_string_to_raw,
             copy_raw_to_unicode as copy_raw_to_string,
-            copy_unicode_contents as copy_string_contents)
+            copy_unicode_contents as copy_string_contents,
+            mallocunicode as mallocfn)
         from rpython.rtyper.annlowlevel import llunicode as llstrtype
         from rpython.rtyper.annlowlevel import hlunicode as hlstrtype
         TYPEP = CWCHARP
         ll_char_type = lltype.UniChar
         lastchar = u'\x00'
-        builder_class = UnicodeBuilder
 
     # str -> char*
     def str2charp(s, track_allocation=True):
@@ -728,12 +728,7 @@
         size = 0
         while cp[size] != lastchar:
             size += 1
-        b = builder_class(size)
-        i = 0
-        while cp[i] != lastchar:
-            b.append(cp[i])
-            i += 1
-        return assert_str0(b.build())
+        return assert_str0(charpsize2str(cp, size))
 
     # str -> char*
     # Can't inline this because of the raw address manipulation.
@@ -829,18 +824,18 @@
     # char* -> str, with an upper bound on the length in case there is no \x00
     @enforceargs(None, int)
     def charp2strn(cp, maxlen):
-        b = builder_class(maxlen)
-        i = 0
-        while i < maxlen and cp[i] != lastchar:
-            b.append(cp[i])
-            i += 1
-        return assert_str0(b.build())
+        size = 0
+        while size < maxlen and cp[size] != lastchar:
+            size += 1
+        return assert_str0(charpsize2str(cp, size))
 
     # char* and size -> str (which can contain null bytes)
     def charpsize2str(cp, size):
-        b = builder_class(size)
-        b.append_charpsize(cp, size)
-        return b.build()
+        ll_str = mallocfn(size)
+        copy_raw_to_string(cp, ll_str, 0, size)
+        result = hlstrtype(ll_str)
+        assert result is not None
+        return result
     charpsize2str._annenforceargs_ = [None, int]
 
     return (str2charp, free_charp, charp2str,
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -4,7 +4,7 @@
 from rpython.rlib import jit, types
 from rpython.rlib.debug import ll_assert
 from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
-    _hash_string, keepalive_until_here, specialize)
+    _hash_string, keepalive_until_here, specialize, enforceargs)
 from rpython.rlib.signature import signature
 from rpython.rlib.rarithmetic import ovfcheck
 from rpython.rtyper.error import TyperError
@@ -32,13 +32,13 @@
 UNICODE = GcForwardReference()
 
 def new_malloc(TP, name):
+    @enforceargs(int)
     def mallocstr(length):
         ll_assert(length >= 0, "negative string length")
         r = malloc(TP, length)
         if not we_are_translated() or not malloc_zero_filled:
             r.hash = 0
         return r
-    mallocstr._annspecialcase_ = 'specialize:semierased'
     return func_with_new_name(mallocstr, name)
 
 mallocstr = new_malloc(STR, 'mallocstr')
@@ -77,6 +77,10 @@
         # are obscurely essential to make sure that the strings stay alive
         # longer than the raw_memcopy().
         assert length >= 0
+        ll_assert(srcstart >= 0, "copystrc: negative srcstart")
+        ll_assert(srcstart + length <= len(src.chars), "copystrc: src ovf")
+        ll_assert(dststart >= 0, "copystrc: negative dststart")
+        ll_assert(dststart + length <= len(dst.chars), "copystrc: dst ovf")
         # from here, no GC operations can happen
         src = _get_raw_buf(SRC_TP, src, srcstart)
         dst = _get_raw_buf(DST_TP, dst, dststart)
diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py
--- a/rpython/rtyper/lltypesystem/test/test_rffi.py
+++ b/rpython/rtyper/lltypesystem/test/test_rffi.py
@@ -81,6 +81,21 @@
         xf = self.compile(f, [], backendopt=False)
         assert xf() == 4
 
+    def test_charp2str_exact_result(self):
+        from rpython.annotator.annrpython import RPythonAnnotator
+        from rpython.rtyper.llannotation import SomePtr
+        a = RPythonAnnotator()
+        s = a.build_types(charpsize2str, [SomePtr(CCHARP), int])
+        assert s.knowntype == str
+        assert s.can_be_None is False
+        assert s.no_nul is False
+        #
+        a = RPythonAnnotator()
+        s = a.build_types(charp2str, [SomePtr(CCHARP)])
+        assert s.knowntype == str
+        assert s.can_be_None is False
+        assert s.no_nul is True
+
     def test_string_reverse(self):
         c_source = py.code.Source("""
         #include <string.h>
diff --git a/rpython/rtyper/test/test_rbuilder.py b/rpython/rtyper/test/test_rbuilder.py
--- a/rpython/rtyper/test/test_rbuilder.py
+++ b/rpython/rtyper/test/test_rbuilder.py
@@ -3,26 +3,96 @@
 import py
 
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
-from rpython.rtyper.annlowlevel import llstr, hlstr
+from rpython.rtyper.annlowlevel import llstr, hlstr, llunicode, hlunicode
 from rpython.rtyper.lltypesystem import rffi
-from rpython.rtyper.lltypesystem.rbuilder import StringBuilderRepr
+from rpython.rtyper.lltypesystem.rbuilder import StringBuilderRepr, UnicodeBuilderRepr
 from rpython.rtyper.test.tool import BaseRtypingTest
 
 
 class TestStringBuilderDirect(object):
+    def test_nooveralloc(self):
+        sb = StringBuilderRepr.ll_new(33)
+        StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+        assert StringBuilderRepr.ll_getlength(sb) == 33
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == "abc" * 11
+        assert StringBuilderRepr.ll_getlength(sb) == 33
+
+    def test_shrinking(self):
+        sb = StringBuilderRepr.ll_new(100)
+        StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+        assert StringBuilderRepr.ll_getlength(sb) == 33
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == "abc" * 11
+        assert StringBuilderRepr.ll_getlength(sb) == 33
+
     def test_simple(self):
         sb = StringBuilderRepr.ll_new(3)
         StringBuilderRepr.ll_append_char(sb, 'x')
         StringBuilderRepr.ll_append(sb, llstr("abc"))
         StringBuilderRepr.ll_append_slice(sb, llstr("foobar"), 2, 5)
         StringBuilderRepr.ll_append_multiple_char(sb, 'y', 3)
+        assert StringBuilderRepr.ll_getlength(sb) == 10
         s = StringBuilderRepr.ll_build(sb)
         assert hlstr(s) == "xabcobayyy"
+        assert StringBuilderRepr.ll_getlength(sb) == 10
 
-    def test_nooveralloc(self):
-        sb = StringBuilderRepr.ll_new(3)
-        StringBuilderRepr.ll_append(sb, llstr("abc"))
-        assert StringBuilderRepr.ll_build(sb) == sb.buf
+    def test_grow_when_append_char(self):
+        sb = StringBuilderRepr.ll_new(33)
+        StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+        StringBuilderRepr.ll_append_char(sb, "d")
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == "abc" * 11 + "d"
+
+    def test_grow_two_halves(self):
+        sb = StringBuilderRepr.ll_new(32)
+        StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == "abc" * 11
+
+    def test_grow_when_exactly_full(self):
+        sb = StringBuilderRepr.ll_new(33)
+        StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+        StringBuilderRepr.ll_append(sb, llstr("def"))
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == "abc" * 11 + "def"
+
+    def test_charp(self):
+        sb = StringBuilderRepr.ll_new(32)
+        with rffi.scoped_str2charp("hello world") as p:
+            StringBuilderRepr.ll_append_charpsize(sb, p, 12)
+        with rffi.scoped_str2charp("0123456789abcdefghijklmn") as p:
+            StringBuilderRepr.ll_append_charpsize(sb, p, 24)
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == "hello world\x000123456789abcdefghijklmn"
+
+    def test_unicode(self):
+        sb = UnicodeBuilderRepr.ll_new(32)
+        UnicodeBuilderRepr.ll_append_char(sb, u'x')
+        UnicodeBuilderRepr.ll_append(sb, llunicode(u"abc"))
+        UnicodeBuilderRepr.ll_append_slice(sb, llunicode(u"foobar"), 2, 5)
+        UnicodeBuilderRepr.ll_append_multiple_char(sb, u'y', 30)
+        u = UnicodeBuilderRepr.ll_build(sb)
+        assert hlunicode(u) == u"xabcoba" + u"y" * 30
+
+    def test_several_builds(self):
+        sb = StringBuilderRepr.ll_new(32)
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == ""
+        assert s == StringBuilderRepr.ll_build(sb)
+        assert s == StringBuilderRepr.ll_build(sb)
+        #
+        sb = StringBuilderRepr.ll_new(32)
+        StringBuilderRepr.ll_append(sb, llstr("abcdefgh" * 3))   # not full
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == "abcdefgh" * 3
+        assert s == StringBuilderRepr.ll_build(sb)
+        assert s == StringBuilderRepr.ll_build(sb)
+        StringBuilderRepr.ll_append(sb, llstr("extra"))    # overflow
+        s = StringBuilderRepr.ll_build(sb)
+        assert hlstr(s) == "abcdefgh" * 3 + "extra"
+        assert s == StringBuilderRepr.ll_build(sb)
+        assert s == StringBuilderRepr.ll_build(sb)
 
 
 class TestStringBuilder(BaseRtypingTest):
@@ -39,25 +109,25 @@
 
     def test_overallocation(self):
         def func():
-            s = StringBuilder(4)
-            s.append("abcd")
-            s.append("defg")
+            s = StringBuilder(34)
+            s.append("abcd" * 5)
+            s.append("defg" * 5)
             s.append("rty")
             return s.build()
         res = self.ll_to_string(self.interpret(func, []))
-        assert res == "abcddefgrty"
+        assert res == "abcd" * 5 + "defg" * 5 + "rty"
 
     def test_unicode(self):
         def func():
-            s = UnicodeBuilder()
+            s = UnicodeBuilder(32)
             s.append(u'a')
             s.append(u'abc')
             s.append(u'abcdef')
             s.append_slice(u'abc', 1, 2)
-            s.append_multiple_char(u'u', 4)
+            s.append_multiple_char(u'u', 40)
             return s.build()
         res = self.ll_to_unicode(self.interpret(func, []))
-        assert res == 'aabcabcdefbuuuu'
+        assert res == u'aabcabcdefb' + u'u' * 40
         assert isinstance(res, unicode)
 
     def test_string_getlength(self):
diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h
--- a/rpython/translator/c/src/mem.h
+++ b/rpython/translator/c/src/mem.h
@@ -117,6 +117,7 @@
 #define OP_BOEHM_DISAPPEARING_LINK(link, obj, r)  /* nothing */
 #define OP_GC__DISABLE_FINALIZERS(r)  /* nothing */
 #define OP_GC__ENABLE_FINALIZERS(r)  /* nothing */
+#define GC_REGISTER_FINALIZER(a,b,c,d,e)  /* nothing */
 #endif
 
 /************************************************************/
diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py
--- a/rpython/translator/c/test/test_newgc.py
+++ b/rpython/translator/c/test/test_newgc.py
@@ -1363,6 +1363,23 @@
         assert res == ' '.join([''.join(map(chr, range(33, 33+length)))
                                 for length in range(1, 51)])
 
+    def definestr_string_builder_multiple_builds_2(cls):
+        def fn(_):
+            got = []
+            for j in range(3, 76, 5):
+                s = StringBuilder()
+                for i in range(j):
+                    s.append(chr(33+i))
+                    gc.collect()
+                got.append(s.build())
+            return ' '.join(got)
+        return fn
+
+    def test_string_builder_multiple_builds_2(self):
+        res = self.run('string_builder_multiple_builds_2')
+        assert res == ' '.join([''.join(map(chr, range(33, 33+length)))
+                                for length in range(3, 76, 5)])
+
     def define_nursery_hash_base(cls):
         class A:
             pass
diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py
--- a/rpython/translator/c/test/test_standalone.py
+++ b/rpython/translator/c/test/test_standalone.py
@@ -960,6 +960,50 @@
         self.compile(entry_point)
         # assert did not explode
 
+    def test_unicode_builder(self):
+        import random
+        from rpython.rlib.rstring import UnicodeBuilder
+
+        to_do = []
+        for i in range(15000):
+            to_do.append(random.randrange(0, 100000))
+        to_do.append(0)
+
+        expected = []
+        s = ''
+        for x in to_do:
+            if x < 1500:
+                expected.append("``%s''" % (s,))
+                if x < 1000:
+                    s = ''
+            elif x < 20000:
+                s += chr(32 + (x & 63))
+            elif x < 30000:
+                s += chr(32 + (x & 63)) * (x % 93)
+            else:
+                s += str(x)
+        expected = '\n'.join(expected)
+
+        def entry_point(argv):
+            b = UnicodeBuilder(32)
+            for x in to_do:
+                if x < 1500:
+                    print "``%s''" % str(b.build())
+                    if x < 1000:
+                        b = UnicodeBuilder(32)
+                elif x < 20000:
+                    b.append(unichr(32 + (x & 63)))
+                elif x < 30000:
+                    b.append_multiple_char(unichr(32 + (x & 63)), x % 93)
+                else:
+                    b.append(unicode(str(x)))
+            return 0
+
+        t, cbuilder = self.compile(entry_point)
+        out = cbuilder.cmdexec('')
+        assert out.strip() == expected
+
+
 class TestMaemo(TestStandalone):
     def setup_class(cls):
         py.test.skip("TestMaemo: tests skipped for now")