[pypy-commit] pypy null_byte_after_str: Allocate all STRs (including prebuilt ones) with enough space for one
arigo
pypy.commits at gmail.com
Fri Jul 29 12:12:04 EDT 2016
Author: Armin Rigo <arigo at tunes.org>
Branch: null_byte_after_str
Changeset: r85911:1abc2152f631
Date: 2016-07-29 18:06 +0200
http://bitbucket.org/pypy/pypy/changeset/1abc2152f631/
Log: Allocate all STRs (including prebuilt ones) with enough space for
one extra uninitialized character. Add
rgc.ll_write_final_null_char() to write a NULL character there.
diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py
--- a/rpython/rlib/rgc.py
+++ b/rpython/rlib/rgc.py
@@ -1268,3 +1268,23 @@
ptr = lltype.direct_arrayitems(array)
# ptr is a Ptr(FixedSizeArray(Char, 1)). Cast it to a rffi.CCHARP
return rffi.cast(rffi.CCHARP, ptr)
+
+ at jit.dont_look_inside
+ at no_collect
+ at specialize.ll()
+def ll_write_final_null_char(s):
+ """'s' is a low-level STR; writes a NULL character after all the
+ other characters in 's'. Warning, this only works because of
+ the 'extra_item_after_alloc' hack inside the definition of STR.
+ """
+ PSTR = lltype.typeOf(s)
+ _check_final_null_char(PSTR)
+ # no GC operation here!
+ adr_s = llmemory.cast_ptr_to_adr(s)
+ adr_a = adr_s + llmemory.offsetof(PSTR.TO, 'chars')
+ adr_a += llmemory.itemoffsetof(PSTR.TO.chars, 0)
+ adr_a.char[len(s.chars)] = '\x00'
+
+ at specialize.memo()
+def _check_final_null_char(PSTR):
+ assert PSTR.TO.chars._hints.get('extra_item_after_alloc', 0) == 1
diff --git a/rpython/rtyper/lltypesystem/llmemory.py b/rpython/rtyper/lltypesystem/llmemory.py
--- a/rpython/rtyper/lltypesystem/llmemory.py
+++ b/rpython/rtyper/lltypesystem/llmemory.py
@@ -390,11 +390,23 @@
else:
raise Exception("don't know how to take the size of a %r"%TYPE)
+ at specialize.memo()
+def extra_item_after_alloc(ARRAY):
+ assert isinstance(ARRAY, lltype.Array)
+ return ARRAY._hints.get('extra_item_after_alloc', 0)
+
@specialize.arg(0)
def sizeof(TYPE, n=None):
+ """Return the symbolic size of TYPE.
+ For a Struct with no varsized part, it must be called with n=None.
+ For an Array or a Struct with a varsized part, it is the number of items.
+ There is a special case to return 1 more than requested if the array
+ has the hint 'extra_item_after_alloc' set to 1.
+ """
if n is None:
return _sizeof_none(TYPE)
elif isinstance(TYPE, lltype.Array):
+ n += extra_item_after_alloc(TYPE)
return itemoffsetof(TYPE) + _sizeof_none(TYPE.OF) * n
else:
return _sizeof_int(TYPE, n)
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -1238,7 +1238,8 @@
# ____________________________________________________________
STR.become(GcStruct('rpy_string', ('hash', Signed),
- ('chars', Array(Char, hints={'immutable': True})),
+ ('chars', Array(Char, hints={'immutable': True,
+ 'extra_item_after_alloc': 1})),
adtmeths={'malloc' : staticAdtMethod(mallocstr),
'empty' : staticAdtMethod(emptystrfun),
'copy_contents' : staticAdtMethod(copy_string_contents),
diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py
--- a/rpython/translator/c/node.py
+++ b/rpython/translator/c/node.py
@@ -253,8 +253,11 @@
yield '\t' + cdecl(typename, fname) + ';'
if not self.ARRAY._hints.get('nolength', False):
yield '\tlong length;'
+ varlength = self.varlength
+ if varlength is not None:
+ varlength += self.ARRAY._hints.get('extra_item_after_alloc', 0)
line = '%s;' % cdecl(self.itemtypename,
- 'items[%s]' % deflength(self.varlength))
+ 'items[%s]' % deflength(varlength))
if self.ARRAY.OF is Void: # strange
line = '/* array of void */'
if self.ARRAY._hints.get('nolength', False):
diff --git a/rpython/translator/c/test/test_lltyped.py b/rpython/translator/c/test/test_lltyped.py
--- a/rpython/translator/c/test/test_lltyped.py
+++ b/rpython/translator/c/test/test_lltyped.py
@@ -1,4 +1,4 @@
-import py
+import py, random
from rpython.rtyper.lltypesystem.lltype import *
from rpython.rtyper.lltypesystem import rffi
from rpython.translator.c.test.test_genc import compile
@@ -255,28 +255,6 @@
res2 = fn(0)
assert res1 == res2
- def test_null_padding(self):
- py.test.skip("we no longer pad our RPython strings with a final NUL")
- from rpython.rtyper.lltypesystem import llmemory
- from rpython.rtyper.lltypesystem import rstr
- chars_offset = llmemory.FieldOffset(rstr.STR, 'chars') + \
- llmemory.ArrayItemsOffset(rstr.STR.chars)
- # sadly, there's no way of forcing this to fail if the strings
- # are allocated in a region of memory such that they just
- # happen to get a NUL byte anyway :/ (a debug build will
- # always fail though)
- def trailing_byte(s):
- adr_s = llmemory.cast_ptr_to_adr(s)
- return (adr_s + chars_offset).char[len(s)]
- def f(x):
- r = 0
- for i in range(x):
- r += ord(trailing_byte(' '*(100-x*x)))
- return r
- fn = self.getcompiled(f, [int])
- res = fn(10)
- assert res == 0
-
def test_cast_primitive(self):
def f(x):
x = cast_primitive(UnsignedLongLong, x)
@@ -1023,3 +1001,50 @@
assert fn(r_longlong(1)) == True
assert fn(r_longlong(256)) == True
assert fn(r_longlong(2**32)) == True
+
+ def test_extra_item_after_alloc(self):
+ from rpython.rlib import rgc
+ from rpython.rlib.objectmodel import compute_hash
+ from rpython.rtyper.lltypesystem import lltype
+ from rpython.rtyper.lltypesystem import rstr
+ # all STR objects should be allocated with enough space for one
+ # extra char. Check this for prebuilt strings, and for dynamically
+ # allocated ones with the default GC for tests. Use strings of 8,
+ # 16 and 24 chars because if the extra char is missing, writing to it
+ # is likely to cause corruption in nearby structures.
+ sizes = [random.choice([8, 16, 24]) for i in range(100)]
+ A = lltype.Struct('A', ('x', lltype.Signed))
+ prebuilt = [(rstr.mallocstr(sz),
+ lltype.malloc(A, flavor='raw', immortal=True))
+ for sz in sizes]
+ k = 0
+ for i, (s, a) in enumerate(prebuilt):
+ a.x = i
+ for i in range(len(s.chars)):
+ k += 1
+ if k == 256:
+ k = 1
+ s.chars[i] = chr(k)
+
+ def check(lst):
+ hashes = []
+ for i, (s, a) in enumerate(lst):
+ assert a.x == i
+ rgc.ll_write_final_null_char(s)
+ for i, (s, a) in enumerate(lst):
+ assert a.x == i # check it was not overwritten
+ def f():
+ check(prebuilt)
+ lst1 = []
+ for i, sz in enumerate(sizes):
+ s = rstr.mallocstr(sz)
+ a = lltype.malloc(A, flavor='raw')
+ a.x = i
+ lst1.append((s, a))
+ check(lst1)
+ for _, a in lst1:
+ lltype.free(a, flavor='raw')
+ return 42
+
+ fn = self.getcompiled(f, [])
+ assert fn() == 42
More information about the pypy-commit
mailing list