[pypy-commit] pypy default: hg merge stringbuilder2-perf
arigo
noreply at buildbot.pypy.org
Mon Jun 16 09:36:25 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r72072:cba6e9bc3afb
Date: 2014-06-16 09:35 +0200
http://bitbucket.org/pypy/pypy/changeset/cba6e9bc3afb/
Log: hg merge stringbuilder2-perf
Give the StringBuilder a more flexible internal structure, with a
chained list of strings instead of just one string. This make it
more efficient when building large strings, e.g. with cStringIO().
Also, use systematically jit.conditional_call() instead of regular
branches. This lets the JIT make more linear code, at the cost of
forcing a bit more data (to be passed as arguments to
conditional_calls). I would expect the net result to be a slight
slow-down on some simple benchmarks and a speed-up on bigger
programs.
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -101,39 +101,64 @@
log = self.run(main, [1000])
assert log.result == main(1000)
loop, = log.loops_by_filename(self.filepath)
+ # NB: since the stringbuilder2-perf branch we get more operations than
+ # before, but a lot less branches that might fail randomly.
assert loop.match("""
- i7 = int_gt(i4, 0)
- guard_true(i7, descr=...)
+ i100 = int_gt(i95, 0)
+ guard_true(i100, descr=...)
guard_not_invalidated(descr=...)
- p9 = call(ConstClass(ll_int2dec__Signed), i4, descr=<Callr . i EF=3>)
+ p101 = call(ConstClass(ll_int2dec__Signed), i95, descr=<Callr . i EF=3>)
guard_no_exception(descr=...)
- i10 = strlen(p9)
- i11 = int_is_true(i10)
- guard_true(i11, descr=...)
- i13 = strgetitem(p9, 0)
- i15 = int_eq(i13, 45)
- guard_false(i15, descr=...)
- i17 = int_neg(i10)
- i19 = int_gt(i10, 23)
- guard_false(i19, descr=...)
- p21 = newstr(23)
- copystrcontent(p9, p21, 0, 0, i10)
- i25 = int_add(1, i10)
- i26 = int_gt(i25, 23)
- guard_false(i26, descr=...)
- strsetitem(p21, i10, 32)
- i30 = int_add(i10, i25)
- i31 = int_gt(i30, 23)
- guard_false(i31, descr=...)
- copystrcontent(p9, p21, 0, i25, i10)
- i33 = int_lt(i30, 23)
- guard_true(i33, descr=...)
- p35 = call(ConstClass(ll_shrink_array__rpy_stringPtr_Signed), p21, i30, descr=<Callr . ri EF=4 OS=3>)
+ i102 = strlen(p101)
+ i103 = int_is_true(i102)
+ guard_true(i103, descr=...)
+ i104 = strgetitem(p101, 0)
+ i105 = int_eq(i104, 45)
+ guard_false(i105, descr=...)
+ i106 = int_neg(i102)
+ i107 = int_gt(i102, 23)
+ p108 = new(descr=<SizeDescr .+>)
+ p110 = newstr(23)
+ setfield_gc(..., descr=<Field. stringbuilder.+>)
+ setfield_gc(..., descr=<Field. stringbuilder.+>)
+ setfield_gc(..., descr=<Field. stringbuilder.+>)
+ cond_call(i107, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=<Callv 0 rri EF=4>)
guard_no_exception(descr=...)
- i37 = strlen(p35)
- i38 = int_add_ovf(i5, i37)
+ i111 = getfield_gc(p108, descr=<FieldS stringbuilder.skip .+>)
+ i112 = int_sub(i102, i111)
+ i113 = getfield_gc(p108, descr=<FieldS stringbuilder.current_pos .+>)
+ p114 = getfield_gc(p108, descr=<FieldP stringbuilder.current_buf .+>)
+ copystrcontent(p101, p114, i111, i113, i112)
+ i115 = int_add(i113, i112)
+ i116 = getfield_gc(p108, descr=<FieldS stringbuilder.current_end .+>)
+ setfield_gc(p108, i115, descr=<FieldS stringbuilder.current_pos .+>)
+ i117 = int_eq(i115, i116)
+ cond_call(i117, ConstClass(stringbuilder_grow__stringbuilderPtr_Signed), p108, 1, descr=<Callv 0 ri EF=4>)
+ guard_no_exception(descr=...)
+ i118 = getfield_gc(p108, descr=<FieldS stringbuilder.current_pos .+>)
+ i119 = int_add(i118, 1)
+ p120 = getfield_gc(p108, descr=<FieldP stringbuilder.current_buf .+>)
+ strsetitem(p120, i118, 32)
+ i121 = getfield_gc(p108, descr=<FieldS stringbuilder.current_end .+>)
+ i122 = int_sub(i121, i119)
+ setfield_gc(..., descr=<FieldS stringbuilder.+>)
+ setfield_gc(..., descr=<FieldS stringbuilder.+>)
+ i123 = int_gt(i102, i122)
+ cond_call(i123, ConstClass(stringbuilder_append_overflow__stringbuilderPtr_rpy_stringPtr_Signed), p108, p101, i102, descr=<Callv 0 rri EF=4>)
+ guard_no_exception(descr=...)
+ i124 = getfield_gc(p108, descr=<FieldS stringbuilder.skip .+>)
+ i125 = int_sub(i102, i124)
+ i126 = getfield_gc(p108, descr=<FieldS stringbuilder.current_pos .+>)
+ p127 = getfield_gc(p108, descr=<FieldP stringbuilder.current_buf .+>)
+ copystrcontent(p101, p127, i124, i126, i125)
+ i128 = int_add(i126, i125)
+ setfield_gc(p108, i128, descr=<FieldS stringbuilder.current_pos .+>)
+ p135 = call(..., descr=<Callr . r EF=4) # ll_build
+ guard_no_exception(descr=...)
+ i136 = strlen(p135)
+ i137 = int_add_ovf(i92, i136)
guard_no_overflow(descr=...)
- i40 = int_sub(i4, 1)
+ i138 = int_sub(i95, 1)
--TICK--
jump(..., descr=...)
""")
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -253,7 +253,13 @@
SpaceOperation("record_known_class", [op.args[0], const_vtable], None)]
def rewrite_op_raw_malloc_usage(self, op):
- pass
+ if self.cpu.translate_support_code or isinstance(op.args[0], Variable):
+ return # the operation disappears
+ else:
+ # only for untranslated tests: get a real integer estimate
+ arg = op.args[0].value
+ arg = llmemory.raw_malloc_usage(arg)
+ return [Constant(arg, lltype.Signed)]
def rewrite_op_jit_record_known_class(self, op):
return SpaceOperation("record_known_class", [op.args[0], op.args[1]], None)
diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py
--- a/rpython/jit/metainterp/blackhole.py
+++ b/rpython/jit/metainterp/blackhole.py
@@ -1090,6 +1090,11 @@
if condition:
cpu.bh_call_v(func, args_i, None, None, calldescr)
+ @arguments("cpu", "i", "i", "R", "d")
+ def bhimpl_conditional_call_r_v(cpu, condition, func, args_r, calldescr):
+ if condition:
+ cpu.bh_call_v(func, None, args_r, None, calldescr)
+
@arguments("cpu", "i", "i", "I", "R", "d")
def bhimpl_conditional_call_ir_v(cpu, condition, func, args_i, args_r,
calldescr):
diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py
--- a/rpython/jit/metainterp/pyjitpl.py
+++ b/rpython/jit/metainterp/pyjitpl.py
@@ -888,6 +888,8 @@
pc):
self.do_conditional_call(condbox, funcbox, argboxes, calldescr, pc)
+ opimpl_conditional_call_r_v = opimpl_conditional_call_i_v
+
@arguments("box", "box", "boxes2", "descr", "orgpc")
def opimpl_conditional_call_ir_v(self, condbox, funcbox, argboxes,
calldescr, pc):
@@ -1420,6 +1422,8 @@
return self.execute_varargs(rop.CALL, allboxes, descr, exc, pure)
def do_conditional_call(self, condbox, funcbox, argboxes, descr, pc):
+ if isinstance(condbox, ConstInt) and condbox.value == 0:
+ return # so that the heapcache can keep argboxes virtual
allboxes = self._build_allboxes(funcbox, argboxes, descr)
effectinfo = descr.get_extra_info()
assert not effectinfo.check_forces_virtual_or_virtualizable()
diff --git a/rpython/jit/metainterp/test/test_string.py b/rpython/jit/metainterp/test/test_string.py
--- a/rpython/jit/metainterp/test/test_string.py
+++ b/rpython/jit/metainterp/test/test_string.py
@@ -552,7 +552,7 @@
result += ord(b.build()[0])
n -= 1
return result
- res = self.meta_interp(main, [9])
+ res = self.meta_interp(main, [9], backendopt=True)
assert res == main(9)
def test_virtual_copystringcontent2(self):
@@ -568,7 +568,7 @@
result += ord((b.build() + _str("xyz"))[0])
n -= 1
return result
- res = self.meta_interp(main, [9])
+ res = self.meta_interp(main, [9], backendopt=True)
assert res == main(9)
def test_bytearray(self):
@@ -582,6 +582,247 @@
res = self.interp_operations(f, [13])
assert res == 13
+ def test_stringbuilder_create(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ if sb.build() != u"":
+ raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+ 'jump': 1})
+
+ def test_stringbuilder_append_char(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ sb.append(u"a")
+ sb.append(unichr(n))
+ s = sb.build()
+ if len(s) != 2: raise ValueError
+ if s[0] != u"a": raise ValueError
+ if s[1] != unichr(n): raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+ 'jump': 1})
+
+ def test_stringbuilder_append_1(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ sb.append(u"ab")
+ s = sb.build()
+ if len(s) != 2: raise ValueError
+ if s[0] != u"a": raise ValueError
+ if s[1] != u"b": raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+ 'jump': 1})
+
+ def test_stringbuilder_append_2(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ sb.append(u"abc")
+ s = sb.build()
+ if len(s) != 3: raise ValueError
+ if s[0] != u"a": raise ValueError
+ if s[1] != u"b": raise ValueError
+ if s[2] != u"c": raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+ 'jump': 1})
+
+ def test_stringbuilder_append_empty(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ sb.append(u"")
+ s = sb.build()
+ if len(s) != 0: raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+ 'jump': 1})
+
+ def test_stringbuilder_append_len2_1(self):
+ jitdriver = JitDriver(reds=['n', 'str1'], greens=[])
+ def f(n):
+ str1 = unicode(str(n))
+ while n > 0:
+ jitdriver.jit_merge_point(n=n, str1=str1)
+ sb = UnicodeBuilder()
+ sb.append(str1)
+ sb.append(u"ab")
+ s = sb.build()
+ if len(s) != 4: raise ValueError
+ if s[0] != u"1": raise ValueError
+ if s[1] != u"0": raise ValueError
+ if s[2] != u"a": raise ValueError
+ if s[3] != u"b": raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops(call=2) # (ll_shrink_array) * 2 unroll
+
+ def test_stringbuilder_append_len2_2(self):
+ jitdriver = JitDriver(reds=['n', 'str1'], greens=[])
+ def f(n):
+ str1 = str(n)
+ while n > 0:
+ jitdriver.jit_merge_point(n=n, str1=str1)
+ sb = StringBuilder(4)
+ sb.append("a")
+ sb.append(str1)
+ s = sb.build()
+ if len(s) != 3: raise ValueError
+ if s[0] != "a": raise ValueError
+ if s[1] != "1": raise ValueError
+ if s[2] != "0": raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops(call=2) # (ll_shrink_array) * 2 unroll
+
+ def test_stringbuilder_append_slice_1(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ sb.append_slice(u"abcdefghij", 1, n)
+ sb.append_slice(u"abcdefghij", 0, n)
+ s = sb.build()
+ if len(s) != 2 * n - 1: raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops(call=2, # (ll_shrink_array) * 2 unroll
+ copyunicodecontent=4)
+
+ def test_stringbuilder_append_slice_2(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ sb.append_slice(u"fOo!", 1, 3)
+ s = sb.build()
+ if len(s) != 2: raise ValueError
+ if s[0] != u"O": raise ValueError
+ if s[1] != u"o": raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+ 'jump': 1})
+
+ def test_stringbuilder_append_multiple_char_1(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ sb.append_multiple_char(u"x", 3)
+ s = sb.build()
+ if len(s) != 3: raise ValueError
+ if s[0] != u"x": raise ValueError
+ if s[1] != u"x": raise ValueError
+ if s[2] != u"x": raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops({'int_sub': 2, 'int_gt': 2, 'guard_true': 2,
+ 'jump': 1})
+
+ def test_stringbuilder_append_multiple_char_2(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = UnicodeBuilder()
+ sb.append_multiple_char(u"x", 5)
+ s = sb.build()
+ if len(s) != 5: raise ValueError
+ if s[0] != u"x": raise ValueError
+ if s[1] != u"x": raise ValueError
+ if s[2] != u"x": raise ValueError
+ if s[3] != u"x": raise ValueError
+ if s[4] != u"x": raise ValueError
+ n -= 1
+ return n
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+ self.check_resops(call=4) # (append, build) * 2 unroll
+
+ def test_stringbuilder_bug1(self):
+ jitdriver = JitDriver(reds=['n', 's1'], greens=[])
+ @dont_look_inside
+ def escape(x):
+ pass
+ def f(n):
+ s1 = unicode(str(n) * 16)
+ while n > 0:
+ jitdriver.jit_merge_point(n=n, s1=s1)
+ sb = UnicodeBuilder(32)
+ sb.append(s1)
+ sb.append(u"\n\n")
+ s = sb.build()
+ if len(s) != 34: raise ValueError
+ n -= 1
+ return n
+ f(10)
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+
+ def test_stringbuilder_bug3(self):
+ jitdriver = JitDriver(reds=['n'], greens=[])
+ IN = ['a' * 37, 'b' * 38, '22', '1', '333']
+ JOINED = ''.join(IN)
+ def f(n):
+ while n > 0:
+ jitdriver.jit_merge_point(n=n)
+ sb = StringBuilder(36)
+ for s in IN:
+ sb.append(s)
+ s = sb.build()
+ if s != JOINED:
+ raise ValueError
+ n -= 1
+ return n
+ f(10)
+ res = self.meta_interp(f, [10], backendopt=True)
+ assert res == 0
+
def test_shrink_array(self):
jitdriver = JitDriver(reds=['result', 'n'], greens=[])
_str, _StringBuilder = self._str, self._StringBuilder
@@ -596,7 +837,7 @@
n -= 1
return result
- res = self.meta_interp(f, [9])
+ res = self.meta_interp(f, [9], backendopt=True)
assert res == f(9)
self.check_resops({
'jump': 1, 'guard_true': 2, 'int_ge': 2, 'int_add': 2, 'int_sub': 2
diff --git a/rpython/rlib/rdynload.py b/rpython/rlib/rdynload.py
--- a/rpython/rlib/rdynload.py
+++ b/rpython/rlib/rdynload.py
@@ -3,6 +3,7 @@
from rpython.rtyper.tool import rffi_platform
from rpython.rtyper.lltypesystem import rffi
+from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.rarithmetic import r_uint
from rpython.translator.tool.cbuild import ExternalCompilationInfo
from rpython.translator.platform import platform
@@ -83,6 +84,8 @@
# XXX this would never work on top of ll2ctypes, because
# ctypes are calling dlerror itself, unsure if I can do much in this
# area (nor I would like to)
+ if not we_are_translated():
+ return "error info not available, not translated"
res = c_dlerror()
if not res:
return ""
diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py
--- a/rpython/rlib/rgc.py
+++ b/rpython/rlib/rgc.py
@@ -231,6 +231,7 @@
@jit.oopspec('rgc.ll_shrink_array(p, smallerlength)')
+ at enforceargs(None, int)
@specialize.ll()
def ll_shrink_array(p, smallerlength):
from rpython.rtyper.lltypesystem.lloperation import llop
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -356,31 +356,30 @@
class AbstractStringBuilder(object):
+ # This is not the real implementation!
+
def __init__(self, init_size=INIT_SIZE):
- self.l = []
- self.size = 0
+ self._l = []
+ self._size = 0
def _grow(self, size):
- try:
- self.size = ovfcheck(self.size + size)
- except OverflowError:
- raise MemoryError
+ self._size += size
def append(self, s):
- assert isinstance(s, self.tp)
- self.l.append(s)
+ assert isinstance(s, self._tp)
+ self._l.append(s)
self._grow(len(s))
def append_slice(self, s, start, end):
- assert isinstance(s, self.tp)
+ assert isinstance(s, self._tp)
assert 0 <= start <= end <= len(s)
s = s[start:end]
- self.l.append(s)
+ self._l.append(s)
self._grow(len(s))
def append_multiple_char(self, c, times):
- assert isinstance(c, self.tp)
- self.l.append(c * times)
+ assert isinstance(c, self._tp)
+ self._l.append(c * times)
self._grow(times)
def append_charpsize(self, s, size):
@@ -388,22 +387,25 @@
l = []
for i in xrange(size):
l.append(s[i])
- self.l.append(self.tp("").join(l))
+ self._l.append(self._tp("").join(l))
self._grow(size)
def build(self):
- return self.tp("").join(self.l)
+ result = self._tp("").join(self._l)
+ assert len(result) == self._size
+ self._l = [result]
+ return result
def getlength(self):
- return len(self.build())
+ return self._size
class StringBuilder(AbstractStringBuilder):
- tp = str
+ _tp = str
class UnicodeBuilder(AbstractStringBuilder):
- tp = unicode
+ _tp = unicode
# ------------------------------------------------------------
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -160,7 +160,11 @@
s.append("a")
s.append_slice("abc", 1, 2)
s.append_multiple_char('d', 4)
- assert s.build() == "aabcabdddd"
+ result = s.build()
+ assert result == "aabcabdddd"
+ assert result == s.build()
+ s.append("x")
+ assert s.build() == result + "x"
def test_unicode_builder():
s = UnicodeBuilder()
@@ -169,8 +173,9 @@
s.append_slice(u'abcdef', 1, 2)
assert s.getlength() == len('aabcb')
s.append_multiple_char(u'd', 4)
- assert s.build() == 'aabcbdddd'
- assert isinstance(s.build(), unicode)
+ result = s.build()
+ assert result == 'aabcbdddd'
+ assert isinstance(result, unicode)
class TestTranslates(BaseRtypingTest):
diff --git a/rpython/rtyper/annlowlevel.py b/rpython/rtyper/annlowlevel.py
--- a/rpython/rtyper/annlowlevel.py
+++ b/rpython/rtyper/annlowlevel.py
@@ -79,13 +79,6 @@
return LowLevelAnnotatorPolicy.lowlevelspecialize(funcdesc, args_s, {})
default_specialize = staticmethod(default_specialize)
- def specialize__semierased(funcdesc, args_s):
- a2l = annotation_to_lltype
- l2a = lltype_to_annotation
- args_s[:] = [l2a(a2l(s)) for s in args_s]
- return LowLevelAnnotatorPolicy.default_specialize(funcdesc, args_s)
- specialize__semierased = staticmethod(specialize__semierased)
-
specialize__ll = default_specialize
def specialize__ll_and_arg(funcdesc, args_s, *argindices):
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -179,6 +179,12 @@
def op_direct_ptradd(obj, index):
checkptr(obj)
assert is_valid_int(index)
+ if not obj:
+ raise AssertionError("direct_ptradd on null pointer")
+ ## assert isinstance(index, int)
+ ## assert not (0 <= index < 4096)
+ ## from rpython.rtyper.lltypesystem import rffi
+ ## return rffi.cast(lltype.typeOf(obj), index)
return lltype.direct_ptradd(obj, index)
diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py
--- a/rpython/rtyper/lltypesystem/rbuilder.py
+++ b/rpython/rtyper/lltypesystem/rbuilder.py
@@ -1,64 +1,139 @@
from rpython.rlib import rgc, jit
from rpython.rlib.objectmodel import enforceargs
-from rpython.rlib.rarithmetic import ovfcheck
+from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask
+from rpython.rlib.debug import ll_assert
from rpython.rtyper.rptr import PtrRepr
-from rpython.rtyper.lltypesystem import lltype, rstr
+from rpython.rtyper.lltypesystem import lltype, rffi, rstr
from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr
from rpython.rtyper.lltypesystem.rstr import (STR, UNICODE, char_repr,
string_repr, unichar_repr, unicode_repr)
from rpython.rtyper.rbuilder import AbstractStringBuilderRepr
from rpython.tool.sourcetools import func_with_new_name
-# Think about heuristics below, maybe we can come up with something
-# better or at least compare it with list heuristics
-GROW_FAST_UNTIL = 100 * 1024 * 1024 # 100 MB
+# ------------------------------------------------------------
+# Basic idea:
+#
+# - A StringBuilder has a rstr.STR of the specified initial size
+# (100 by default), which is filled gradually.
+#
+# - When it is full, we allocate extra buffers as an extra rstr.STR,
+# and the already-filled one is added to a chained list of STRINGPIECE
+# objects.
+#
+# - At build() time, we consolidate all these pieces into a single
+# rstr.STR, which is both returned and re-attached to the StringBuilder,
+# replacing the STRINGPIECEs.
+#
+# - The data is copied at most twice, and only once in case it fits
+# into the initial size (and the GC supports shrinking the STR).
+#
+# XXX in build(), we could try keeping around a global weakref to the
+# chain of STRINGPIECEs and reuse them the next time.
+#
+# ------------------------------------------------------------
-def new_grow_func(name, mallocfn, copycontentsfn):
+
+def always_inline(func):
+ func._always_inline_ = True
+ return func
+
+
+def new_grow_funcs(name, mallocfn):
+
@enforceargs(None, int)
def stringbuilder_grow(ll_builder, needed):
- allocated = ll_builder.allocated
- #if allocated < GROW_FAST_UNTIL:
- # new_allocated = allocated << 1
- #else:
- extra_size = allocated >> 2
try:
- new_allocated = ovfcheck(allocated + extra_size)
- new_allocated = ovfcheck(new_allocated + needed)
+ needed = ovfcheck(needed + ll_builder.total_size)
+ needed = ovfcheck(needed + 63) & ~63
+ total_size = ll_builder.total_size + needed
except OverflowError:
raise MemoryError
- newbuf = mallocfn(new_allocated)
- copycontentsfn(ll_builder.buf, newbuf, 0, 0, ll_builder.used)
- ll_builder.buf = newbuf
- ll_builder.allocated = new_allocated
- return func_with_new_name(stringbuilder_grow, name)
+ #
+ new_string = mallocfn(needed)
+ #
+ PIECE = lltype.typeOf(ll_builder.extra_pieces).TO
+ old_piece = lltype.malloc(PIECE)
+ old_piece.buf = ll_builder.current_buf
+ old_piece.prev_piece = ll_builder.extra_pieces
+ ll_assert(bool(old_piece.buf), "no buf??")
+ ll_builder.current_buf = new_string
+ ll_builder.current_pos = 0
+ ll_builder.current_end = needed
+ ll_builder.total_size = total_size
+ ll_builder.extra_pieces = old_piece
-stringbuilder_grow = new_grow_func('stringbuilder_grow', rstr.mallocstr,
- rstr.copy_string_contents)
-unicodebuilder_grow = new_grow_func('unicodebuilder_grow', rstr.mallocunicode,
- rstr.copy_unicode_contents)
+ def stringbuilder_append_overflow(ll_builder, ll_str, size):
+ # First, the part that still fits in the current piece
+ part1 = ll_builder.current_end - ll_builder.current_pos
+ start = ll_builder.skip
+ ll_builder.copy_string_contents(ll_str, ll_builder.current_buf,
+ start, ll_builder.current_pos,
+ part1)
+ ll_builder.skip += part1
+ stringbuilder_grow(ll_builder, size - part1)
+
+ def stringbuilder_append_overflow_2(ll_builder, char0):
+ # Overflow when writing two chars. There are two cases depending
+ # on whether one char still fits or not.
+ if ll_builder.current_pos < ll_builder.current_end:
+ ll_builder.current_buf.chars[ll_builder.current_pos] = char0
+ ll_builder.skip = 1
+ stringbuilder_grow(ll_builder, 2)
+
+ return (func_with_new_name(stringbuilder_grow, '%s_grow' % name),
+ func_with_new_name(stringbuilder_append_overflow,
+ '%s_append_overflow' % name),
+ func_with_new_name(stringbuilder_append_overflow_2,
+ '%s_append_overflow_2' % name))
+
+stringbuilder_grows = new_grow_funcs('stringbuilder', rstr.mallocstr)
+unicodebuilder_grows = new_grow_funcs('unicodebuilder', rstr.mallocunicode)
+
+STRINGPIECE = lltype.GcStruct('stringpiece',
+ ('buf', lltype.Ptr(STR)),
+ ('prev_piece', lltype.Ptr(lltype.GcForwardReference())))
+STRINGPIECE.prev_piece.TO.become(STRINGPIECE)
STRINGBUILDER = lltype.GcStruct('stringbuilder',
- ('allocated', lltype.Signed),
- ('used', lltype.Signed),
- ('buf', lltype.Ptr(STR)),
+ ('current_buf', lltype.Ptr(STR)),
+ ('current_pos', lltype.Signed),
+ ('current_end', lltype.Signed),
+ ('total_size', lltype.Signed),
+ ('skip', lltype.Signed),
+ ('extra_pieces', lltype.Ptr(STRINGPIECE)),
adtmeths={
- 'grow': staticAdtMethod(stringbuilder_grow),
+ 'grow': staticAdtMethod(stringbuilder_grows[0]),
+ 'append_overflow': staticAdtMethod(stringbuilder_grows[1]),
+ 'append_overflow_2': staticAdtMethod(stringbuilder_grows[2]),
+ 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents),
'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string),
+ 'mallocfn': staticAdtMethod(rstr.mallocstr),
}
)
+UNICODEPIECE = lltype.GcStruct('unicodepiece',
+ ('buf', lltype.Ptr(UNICODE)),
+ ('prev_piece', lltype.Ptr(lltype.GcForwardReference())))
+UNICODEPIECE.prev_piece.TO.become(UNICODEPIECE)
+
UNICODEBUILDER = lltype.GcStruct('unicodebuilder',
- ('allocated', lltype.Signed),
- ('used', lltype.Signed),
- ('buf', lltype.Ptr(UNICODE)),
+ ('current_buf', lltype.Ptr(UNICODE)),
+ ('current_pos', lltype.Signed),
+ ('current_end', lltype.Signed),
+ ('total_size', lltype.Signed),
+ ('skip', lltype.Signed),
+ ('extra_pieces', lltype.Ptr(UNICODEPIECE)),
adtmeths={
- 'grow': staticAdtMethod(unicodebuilder_grow),
+ 'grow': staticAdtMethod(unicodebuilder_grows[0]),
+ 'append_overflow': staticAdtMethod(unicodebuilder_grows[1]),
+ 'append_overflow_2': staticAdtMethod(unicodebuilder_grows[2]),
+ 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents),
'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode),
+ 'mallocfn': staticAdtMethod(rstr.mallocunicode),
}
)
-MAX = 16*1024*1024
class BaseStringBuilderRepr(AbstractStringBuilderRepr):
def empty(self):
@@ -66,72 +141,206 @@
@classmethod
def ll_new(cls, init_size):
- if init_size < 0:
- init_size = MAX
+ # Clamp 'init_size' to be a value between 0 and 1280.
+ # Negative values are mapped to 1280.
+ init_size = intmask(min(r_uint(init_size), r_uint(1280)))
ll_builder = lltype.malloc(cls.lowleveltype.TO)
- ll_builder.allocated = init_size
- ll_builder.used = 0
- ll_builder.buf = cls.mallocfn(init_size)
+ ll_builder.current_buf = cls.mallocfn(init_size)
+ ll_builder.current_pos = 0
+ ll_builder.current_end = init_size
+ ll_builder.total_size = init_size
return ll_builder
@staticmethod
+ @always_inline
def ll_append(ll_builder, ll_str):
- used = ll_builder.used
- lgt = len(ll_str.chars)
- needed = lgt + used
- if needed > ll_builder.allocated:
- ll_builder.grow(ll_builder, lgt)
- ll_str.copy_contents(ll_str, ll_builder.buf, 0, used, lgt)
- ll_builder.used = needed
+ BaseStringBuilderRepr.ll_append_slice(ll_builder, ll_str,
+ 0, len(ll_str.chars))
@staticmethod
+ @always_inline
def ll_append_char(ll_builder, char):
- if ll_builder.used == ll_builder.allocated:
- ll_builder.grow(ll_builder, 1)
- ll_builder.buf.chars[ll_builder.used] = char
- ll_builder.used += 1
+ jit.conditional_call(ll_builder.current_pos == ll_builder.current_end,
+ ll_builder.grow, ll_builder, 1)
+ pos = ll_builder.current_pos
+ ll_builder.current_pos = pos + 1
+ ll_builder.current_buf.chars[pos] = char
@staticmethod
- def ll_append_slice(ll_builder, ll_str, start, end):
- needed = end - start
- used = ll_builder.used
- if needed + used > ll_builder.allocated:
- ll_builder.grow(ll_builder, needed)
- assert needed >= 0
- ll_str.copy_contents(ll_str, ll_builder.buf, start, used, needed)
- ll_builder.used = needed + used
+ def ll_append_char_2(ll_builder, char0, char1):
+ # this is only used by the JIT, when appending a small, known-length
+ # string. Unlike two consecutive ll_append_char(), it can do that
+ # with only one conditional_call.
+ ll_builder.skip = 2
+ jit.conditional_call(
+ ll_builder.current_end - ll_builder.current_pos < 2,
+ ll_builder.append_overflow_2, ll_builder, char0)
+ pos = ll_builder.current_pos
+ buf = ll_builder.current_buf
+ buf.chars[pos] = char0
+ pos += ll_builder.skip
+ ll_builder.current_pos = pos
+ buf.chars[pos - 1] = char1
+ # NB. this usually writes into buf.chars[current_pos] and
+ # buf.chars[current_pos+1], except if we had an overflow right
+ # in the middle of the two chars. In that case, 'skip' is set to
+ # 1 and only one char is written: the 'char1' overrides the 'char0'.
@staticmethod
- @jit.look_inside_iff(lambda ll_builder, char, times: jit.isconstant(times) and times <= 4)
- def ll_append_multiple_char(ll_builder, char, times):
- used = ll_builder.used
- if times + used > ll_builder.allocated:
- ll_builder.grow(ll_builder, times)
- for i in range(times):
- ll_builder.buf.chars[used] = char
- used += 1
- ll_builder.used = used
+ @always_inline
+ def ll_append_slice(ll_builder, ll_str, start, end):
+ size = end - start
+ if jit.we_are_jitted():
+ if BaseStringBuilderRepr._ll_jit_try_append_slice(
+ ll_builder, ll_str, start, size):
+ return
+ ll_builder.skip = start
+ jit.conditional_call(
+ size > ll_builder.current_end - ll_builder.current_pos,
+ ll_builder.append_overflow, ll_builder, ll_str, size)
+ start = ll_builder.skip
+ size = end - start
+ pos = ll_builder.current_pos
+ ll_builder.copy_string_contents(ll_str, ll_builder.current_buf,
+ start, pos, size)
+ ll_builder.current_pos = pos + size
@staticmethod
- def ll_append_charpsize(ll_builder, charp, size):
- used = ll_builder.used
- if used + size > ll_builder.allocated:
- ll_builder.grow(ll_builder, size)
- ll_builder.copy_raw_to_string(charp, ll_builder.buf, used, size)
- ll_builder.used += size
+ def _ll_jit_try_append_slice(ll_builder, ll_str, start, size):
+ if jit.isconstant(size):
+ if size == 0:
+ return True
+ if size == 1:
+ BaseStringBuilderRepr.ll_append_char(ll_builder,
+ ll_str.chars[start])
+ return True
+ if size == 2:
+ BaseStringBuilderRepr.ll_append_char_2(ll_builder,
+ ll_str.chars[start],
+ ll_str.chars[start + 1])
+ return True
+ return False # use the fall-back path
@staticmethod
- def ll_getlength(ll_builder):
- return ll_builder.used
+ @always_inline
+ def ll_append_multiple_char(ll_builder, char, times):
+ if jit.we_are_jitted():
+ if BaseStringBuilderRepr._ll_jit_try_append_multiple_char(
+ ll_builder, char, times):
+ return
+ BaseStringBuilderRepr._ll_append_multiple_char(ll_builder, char, times)
@staticmethod
+ @jit.dont_look_inside
+ def _ll_append_multiple_char(ll_builder, char, times):
+ part1 = ll_builder.current_end - ll_builder.current_pos
+ if times > part1:
+ times -= part1
+ buf = ll_builder.current_buf
+ for i in xrange(ll_builder.current_pos, ll_builder.current_end):
+ buf.chars[i] = char
+ ll_builder.grow(ll_builder, times)
+ #
+ buf = ll_builder.current_buf
+ pos = ll_builder.current_pos
+ end = pos + times
+ ll_builder.current_pos = end
+ for i in xrange(pos, end):
+ buf.chars[i] = char
+
+ @staticmethod
+ def _ll_jit_try_append_multiple_char(ll_builder, char, size):
+ if jit.isconstant(size):
+ if size == 0:
+ return True
+ if size == 1:
+ BaseStringBuilderRepr.ll_append_char(ll_builder, char)
+ return True
+ if size == 2:
+ BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char)
+ return True
+ if size == 3:
+ BaseStringBuilderRepr.ll_append_char(ll_builder, char)
+ BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char)
+ return True
+ if size == 4:
+ BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char)
+ BaseStringBuilderRepr.ll_append_char_2(ll_builder, char, char)
+ return True
+ return False # use the fall-back path
+
+ @staticmethod
+ @jit.dont_look_inside
+ def ll_append_charpsize(ll_builder, charp, size):
+ part1 = ll_builder.current_end - ll_builder.current_pos
+ if size > part1:
+ # First, the part that still fits
+ ll_builder.copy_raw_to_string(charp, ll_builder.current_buf,
+ ll_builder.current_pos, part1)
+ charp = rffi.ptradd(charp, part1)
+ size -= part1
+ ll_builder.grow(ll_builder, size)
+ #
+ pos = ll_builder.current_pos
+ ll_builder.current_pos = pos + size
+ ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size)
+
+ @staticmethod
+ @always_inline
+ def ll_getlength(ll_builder):
+ num_chars_missing_from_last_piece = (
+ ll_builder.current_end - ll_builder.current_pos)
+ return ll_builder.total_size - num_chars_missing_from_last_piece
+
+ @staticmethod
+ @jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder))
def ll_build(ll_builder):
- final_size = ll_builder.used
- assert final_size >= 0
- if final_size < ll_builder.allocated:
- ll_builder.allocated = final_size
- ll_builder.buf = rgc.ll_shrink_array(ll_builder.buf, final_size)
- return ll_builder.buf
+ # NB. usually the JIT doesn't look inside this function; it does
+ # so only in the simplest example where it could virtualize everything
+ if ll_builder.extra_pieces:
+ BaseStringBuilderRepr._ll_fold_pieces(ll_builder)
+ elif ll_builder.current_pos != ll_builder.total_size:
+ BaseStringBuilderRepr._ll_shrink_final(ll_builder)
+ return ll_builder.current_buf
+
+ @staticmethod
+ def _ll_shrink_final(ll_builder):
+ final_size = ll_builder.current_pos
+ ll_assert(final_size <= ll_builder.total_size,
+ "final_size > ll_builder.total_size?")
+ buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size)
+ ll_builder.current_buf = buf
+ ll_builder.current_end = final_size
+ ll_builder.total_size = final_size
+
+ @staticmethod
+ def _ll_fold_pieces(ll_builder):
+ final_size = BaseStringBuilderRepr.ll_getlength(ll_builder)
+ ll_assert(final_size >= 0, "negative final_size")
+ extra = ll_builder.extra_pieces
+ ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO)
+ #
+ result = ll_builder.mallocfn(final_size)
+ piece = ll_builder.current_buf
+ piece_lgt = ll_builder.current_pos
+ ll_assert(ll_builder.current_end == len(piece.chars),
+ "bogus last piece_lgt")
+ ll_builder.total_size = final_size
+ ll_builder.current_buf = result
+ ll_builder.current_pos = final_size
+ ll_builder.current_end = final_size
+
+ dst = final_size
+ while True:
+ dst -= piece_lgt
+ ll_assert(dst >= 0, "rbuilder build: overflow")
+ ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt)
+ if not extra:
+ break
+ piece = extra.buf
+ piece_lgt = len(piece.chars)
+ extra = extra.prev_piece
+ ll_assert(dst == 0, "rbuilder build: underflow")
@classmethod
def ll_bool(cls, ll_builder):
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -681,25 +681,25 @@
from rpython.rtyper.lltypesystem.rstr import (STR as STRTYPE,
copy_string_to_raw,
copy_raw_to_string,
- copy_string_contents)
+ copy_string_contents,
+ mallocstr as mallocfn)
from rpython.rtyper.annlowlevel import llstr as llstrtype
from rpython.rtyper.annlowlevel import hlstr as hlstrtype
TYPEP = CCHARP
ll_char_type = lltype.Char
lastchar = '\x00'
- builder_class = StringBuilder
else:
from rpython.rtyper.lltypesystem.rstr import (
UNICODE as STRTYPE,
copy_unicode_to_raw as copy_string_to_raw,
copy_raw_to_unicode as copy_raw_to_string,
- copy_unicode_contents as copy_string_contents)
+ copy_unicode_contents as copy_string_contents,
+ mallocunicode as mallocfn)
from rpython.rtyper.annlowlevel import llunicode as llstrtype
from rpython.rtyper.annlowlevel import hlunicode as hlstrtype
TYPEP = CWCHARP
ll_char_type = lltype.UniChar
lastchar = u'\x00'
- builder_class = UnicodeBuilder
# str -> char*
def str2charp(s, track_allocation=True):
@@ -728,12 +728,7 @@
size = 0
while cp[size] != lastchar:
size += 1
- b = builder_class(size)
- i = 0
- while cp[i] != lastchar:
- b.append(cp[i])
- i += 1
- return assert_str0(b.build())
+ return assert_str0(charpsize2str(cp, size))
# str -> char*
# Can't inline this because of the raw address manipulation.
@@ -829,18 +824,18 @@
# char* -> str, with an upper bound on the length in case there is no \x00
@enforceargs(None, int)
def charp2strn(cp, maxlen):
- b = builder_class(maxlen)
- i = 0
- while i < maxlen and cp[i] != lastchar:
- b.append(cp[i])
- i += 1
- return assert_str0(b.build())
+ size = 0
+ while size < maxlen and cp[size] != lastchar:
+ size += 1
+ return assert_str0(charpsize2str(cp, size))
# char* and size -> str (which can contain null bytes)
def charpsize2str(cp, size):
- b = builder_class(size)
- b.append_charpsize(cp, size)
- return b.build()
+ ll_str = mallocfn(size)
+ copy_raw_to_string(cp, ll_str, 0, size)
+ result = hlstrtype(ll_str)
+ assert result is not None
+ return result
charpsize2str._annenforceargs_ = [None, int]
return (str2charp, free_charp, charp2str,
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -4,7 +4,7 @@
from rpython.rlib import jit, types
from rpython.rlib.debug import ll_assert
from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
- _hash_string, keepalive_until_here, specialize)
+ _hash_string, keepalive_until_here, specialize, enforceargs)
from rpython.rlib.signature import signature
from rpython.rlib.rarithmetic import ovfcheck
from rpython.rtyper.error import TyperError
@@ -32,13 +32,13 @@
UNICODE = GcForwardReference()
def new_malloc(TP, name):
+ @enforceargs(int)
def mallocstr(length):
ll_assert(length >= 0, "negative string length")
r = malloc(TP, length)
if not we_are_translated() or not malloc_zero_filled:
r.hash = 0
return r
- mallocstr._annspecialcase_ = 'specialize:semierased'
return func_with_new_name(mallocstr, name)
mallocstr = new_malloc(STR, 'mallocstr')
@@ -77,6 +77,10 @@
# are obscurely essential to make sure that the strings stay alive
# longer than the raw_memcopy().
assert length >= 0
+ ll_assert(srcstart >= 0, "copystrc: negative srcstart")
+ ll_assert(srcstart + length <= len(src.chars), "copystrc: src ovf")
+ ll_assert(dststart >= 0, "copystrc: negative dststart")
+ ll_assert(dststart + length <= len(dst.chars), "copystrc: dst ovf")
# from here, no GC operations can happen
src = _get_raw_buf(SRC_TP, src, srcstart)
dst = _get_raw_buf(DST_TP, dst, dststart)
diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py
--- a/rpython/rtyper/lltypesystem/test/test_rffi.py
+++ b/rpython/rtyper/lltypesystem/test/test_rffi.py
@@ -81,6 +81,21 @@
xf = self.compile(f, [], backendopt=False)
assert xf() == 4
+ def test_charp2str_exact_result(self):
+ from rpython.annotator.annrpython import RPythonAnnotator
+ from rpython.rtyper.llannotation import SomePtr
+ a = RPythonAnnotator()
+ s = a.build_types(charpsize2str, [SomePtr(CCHARP), int])
+ assert s.knowntype == str
+ assert s.can_be_None is False
+ assert s.no_nul is False
+ #
+ a = RPythonAnnotator()
+ s = a.build_types(charp2str, [SomePtr(CCHARP)])
+ assert s.knowntype == str
+ assert s.can_be_None is False
+ assert s.no_nul is True
+
def test_string_reverse(self):
c_source = py.code.Source("""
#include <string.h>
diff --git a/rpython/rtyper/test/test_rbuilder.py b/rpython/rtyper/test/test_rbuilder.py
--- a/rpython/rtyper/test/test_rbuilder.py
+++ b/rpython/rtyper/test/test_rbuilder.py
@@ -3,26 +3,96 @@
import py
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
-from rpython.rtyper.annlowlevel import llstr, hlstr
+from rpython.rtyper.annlowlevel import llstr, hlstr, llunicode, hlunicode
from rpython.rtyper.lltypesystem import rffi
-from rpython.rtyper.lltypesystem.rbuilder import StringBuilderRepr
+from rpython.rtyper.lltypesystem.rbuilder import StringBuilderRepr, UnicodeBuilderRepr
from rpython.rtyper.test.tool import BaseRtypingTest
class TestStringBuilderDirect(object):
+ def test_nooveralloc(self):
+ sb = StringBuilderRepr.ll_new(33)
+ StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+ assert StringBuilderRepr.ll_getlength(sb) == 33
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == "abc" * 11
+ assert StringBuilderRepr.ll_getlength(sb) == 33
+
+ def test_shrinking(self):
+ sb = StringBuilderRepr.ll_new(100)
+ StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+ assert StringBuilderRepr.ll_getlength(sb) == 33
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == "abc" * 11
+ assert StringBuilderRepr.ll_getlength(sb) == 33
+
def test_simple(self):
sb = StringBuilderRepr.ll_new(3)
StringBuilderRepr.ll_append_char(sb, 'x')
StringBuilderRepr.ll_append(sb, llstr("abc"))
StringBuilderRepr.ll_append_slice(sb, llstr("foobar"), 2, 5)
StringBuilderRepr.ll_append_multiple_char(sb, 'y', 3)
+ assert StringBuilderRepr.ll_getlength(sb) == 10
s = StringBuilderRepr.ll_build(sb)
assert hlstr(s) == "xabcobayyy"
+ assert StringBuilderRepr.ll_getlength(sb) == 10
- def test_nooveralloc(self):
- sb = StringBuilderRepr.ll_new(3)
- StringBuilderRepr.ll_append(sb, llstr("abc"))
- assert StringBuilderRepr.ll_build(sb) == sb.buf
+ def test_grow_when_append_char(self):
+ sb = StringBuilderRepr.ll_new(33)
+ StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+ StringBuilderRepr.ll_append_char(sb, "d")
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == "abc" * 11 + "d"
+
+ def test_grow_two_halves(self):
+ sb = StringBuilderRepr.ll_new(32)
+ StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == "abc" * 11
+
+ def test_grow_when_exactly_full(self):
+ sb = StringBuilderRepr.ll_new(33)
+ StringBuilderRepr.ll_append(sb, llstr("abc" * 11))
+ StringBuilderRepr.ll_append(sb, llstr("def"))
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == "abc" * 11 + "def"
+
+ def test_charp(self):
+ sb = StringBuilderRepr.ll_new(32)
+ with rffi.scoped_str2charp("hello world") as p:
+ StringBuilderRepr.ll_append_charpsize(sb, p, 12)
+ with rffi.scoped_str2charp("0123456789abcdefghijklmn") as p:
+ StringBuilderRepr.ll_append_charpsize(sb, p, 24)
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == "hello world\x000123456789abcdefghijklmn"
+
+ def test_unicode(self):
+ sb = UnicodeBuilderRepr.ll_new(32)
+ UnicodeBuilderRepr.ll_append_char(sb, u'x')
+ UnicodeBuilderRepr.ll_append(sb, llunicode(u"abc"))
+ UnicodeBuilderRepr.ll_append_slice(sb, llunicode(u"foobar"), 2, 5)
+ UnicodeBuilderRepr.ll_append_multiple_char(sb, u'y', 30)
+ u = UnicodeBuilderRepr.ll_build(sb)
+ assert hlunicode(u) == u"xabcoba" + u"y" * 30
+
+ def test_several_builds(self):
+ sb = StringBuilderRepr.ll_new(32)
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == ""
+ assert s == StringBuilderRepr.ll_build(sb)
+ assert s == StringBuilderRepr.ll_build(sb)
+ #
+ sb = StringBuilderRepr.ll_new(32)
+ StringBuilderRepr.ll_append(sb, llstr("abcdefgh" * 3)) # not full
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == "abcdefgh" * 3
+ assert s == StringBuilderRepr.ll_build(sb)
+ assert s == StringBuilderRepr.ll_build(sb)
+ StringBuilderRepr.ll_append(sb, llstr("extra")) # overflow
+ s = StringBuilderRepr.ll_build(sb)
+ assert hlstr(s) == "abcdefgh" * 3 + "extra"
+ assert s == StringBuilderRepr.ll_build(sb)
+ assert s == StringBuilderRepr.ll_build(sb)
class TestStringBuilder(BaseRtypingTest):
@@ -39,25 +109,25 @@
def test_overallocation(self):
def func():
- s = StringBuilder(4)
- s.append("abcd")
- s.append("defg")
+ s = StringBuilder(34)
+ s.append("abcd" * 5)
+ s.append("defg" * 5)
s.append("rty")
return s.build()
res = self.ll_to_string(self.interpret(func, []))
- assert res == "abcddefgrty"
+ assert res == "abcd" * 5 + "defg" * 5 + "rty"
def test_unicode(self):
def func():
- s = UnicodeBuilder()
+ s = UnicodeBuilder(32)
s.append(u'a')
s.append(u'abc')
s.append(u'abcdef')
s.append_slice(u'abc', 1, 2)
- s.append_multiple_char(u'u', 4)
+ s.append_multiple_char(u'u', 40)
return s.build()
res = self.ll_to_unicode(self.interpret(func, []))
- assert res == 'aabcabcdefbuuuu'
+ assert res == u'aabcabcdefb' + u'u' * 40
assert isinstance(res, unicode)
def test_string_getlength(self):
diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h
--- a/rpython/translator/c/src/mem.h
+++ b/rpython/translator/c/src/mem.h
@@ -117,6 +117,7 @@
#define OP_BOEHM_DISAPPEARING_LINK(link, obj, r) /* nothing */
#define OP_GC__DISABLE_FINALIZERS(r) /* nothing */
#define OP_GC__ENABLE_FINALIZERS(r) /* nothing */
+#define GC_REGISTER_FINALIZER(a,b,c,d,e) /* nothing */
#endif
/************************************************************/
diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py
--- a/rpython/translator/c/test/test_newgc.py
+++ b/rpython/translator/c/test/test_newgc.py
@@ -1363,6 +1363,23 @@
assert res == ' '.join([''.join(map(chr, range(33, 33+length)))
for length in range(1, 51)])
+ def definestr_string_builder_multiple_builds_2(cls):
+ def fn(_):
+ got = []
+ for j in range(3, 76, 5):
+ s = StringBuilder()
+ for i in range(j):
+ s.append(chr(33+i))
+ gc.collect()
+ got.append(s.build())
+ return ' '.join(got)
+ return fn
+
+ def test_string_builder_multiple_builds_2(self):
+ res = self.run('string_builder_multiple_builds_2')
+ assert res == ' '.join([''.join(map(chr, range(33, 33+length)))
+ for length in range(3, 76, 5)])
+
def define_nursery_hash_base(cls):
class A:
pass
diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py
--- a/rpython/translator/c/test/test_standalone.py
+++ b/rpython/translator/c/test/test_standalone.py
@@ -960,6 +960,50 @@
self.compile(entry_point)
# assert did not explode
+ def test_unicode_builder(self):
+ import random
+ from rpython.rlib.rstring import UnicodeBuilder
+
+ to_do = []
+ for i in range(15000):
+ to_do.append(random.randrange(0, 100000))
+ to_do.append(0)
+
+ expected = []
+ s = ''
+ for x in to_do:
+ if x < 1500:
+ expected.append("``%s''" % (s,))
+ if x < 1000:
+ s = ''
+ elif x < 20000:
+ s += chr(32 + (x & 63))
+ elif x < 30000:
+ s += chr(32 + (x & 63)) * (x % 93)
+ else:
+ s += str(x)
+ expected = '\n'.join(expected)
+
+ def entry_point(argv):
+ b = UnicodeBuilder(32)
+ for x in to_do:
+ if x < 1500:
+ print "``%s''" % str(b.build())
+ if x < 1000:
+ b = UnicodeBuilder(32)
+ elif x < 20000:
+ b.append(unichr(32 + (x & 63)))
+ elif x < 30000:
+ b.append_multiple_char(unichr(32 + (x & 63)), x % 93)
+ else:
+ b.append(unicode(str(x)))
+ return 0
+
+ t, cbuilder = self.compile(entry_point)
+ out = cbuilder.cmdexec('')
+ assert out.strip() == expected
+
+
class TestMaemo(TestStandalone):
def setup_class(cls):
py.test.skip("TestMaemo: tests skipped for now")
More information about the pypy-commit
mailing list