[pypy-commit] pypy null_byte_after_str: Allocate all STRs (including prebuilt ones) with enough space for one

arigo pypy.commits at gmail.com
Fri Jul 29 12:12:04 EDT 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: null_byte_after_str
Changeset: r85911:1abc2152f631
Date: 2016-07-29 18:06 +0200
http://bitbucket.org/pypy/pypy/changeset/1abc2152f631/

Log:	Allocate all STRs (including prebuilt ones) with enough space for
	one extra uninitialized character. Add
	rgc.ll_write_final_null_char() to write a NULL character there.

diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py
--- a/rpython/rlib/rgc.py
+++ b/rpython/rlib/rgc.py
@@ -1268,3 +1268,23 @@
     ptr = lltype.direct_arrayitems(array)
     # ptr is a Ptr(FixedSizeArray(Char, 1)).  Cast it to a rffi.CCHARP
     return rffi.cast(rffi.CCHARP, ptr)
+
+ at jit.dont_look_inside
+ at no_collect
+ at specialize.ll()
+def ll_write_final_null_char(s):
+    """'s' is a low-level STR; writes a NULL character after all the
+    other characters in 's'.  Warning, this only works because of
+    the 'extra_item_after_alloc' hack inside the definition of STR.
+    """
+    PSTR = lltype.typeOf(s)
+    _check_final_null_char(PSTR)
+    # no GC operation here!
+    adr_s = llmemory.cast_ptr_to_adr(s)
+    adr_a = adr_s + llmemory.offsetof(PSTR.TO, 'chars')
+    adr_a += llmemory.itemoffsetof(PSTR.TO.chars, 0)
+    adr_a.char[len(s.chars)] = '\x00'
+
+ at specialize.memo()
+def _check_final_null_char(PSTR):
+    assert PSTR.TO.chars._hints.get('extra_item_after_alloc', 0) == 1
diff --git a/rpython/rtyper/lltypesystem/llmemory.py b/rpython/rtyper/lltypesystem/llmemory.py
--- a/rpython/rtyper/lltypesystem/llmemory.py
+++ b/rpython/rtyper/lltypesystem/llmemory.py
@@ -390,11 +390,23 @@
     else:
         raise Exception("don't know how to take the size of a %r"%TYPE)
 
+ at specialize.memo()
+def extra_item_after_alloc(ARRAY):
+    assert isinstance(ARRAY, lltype.Array)
+    return ARRAY._hints.get('extra_item_after_alloc', 0)
+
 @specialize.arg(0)
 def sizeof(TYPE, n=None):
+    """Return the symbolic size of TYPE.
+    For a Struct with no varsized part, it must be called with n=None.
+    For an Array or a Struct with a varsized part, it is the number of items.
+    There is a special case to return 1 more than requested if the array
+    has the hint 'extra_item_after_alloc' set to 1.
+    """
     if n is None:
         return _sizeof_none(TYPE)
     elif isinstance(TYPE, lltype.Array):
+        n += extra_item_after_alloc(TYPE)
         return itemoffsetof(TYPE) + _sizeof_none(TYPE.OF) * n
     else:
         return _sizeof_int(TYPE, n)
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -1238,7 +1238,8 @@
 # ____________________________________________________________
 
 STR.become(GcStruct('rpy_string', ('hash',  Signed),
-                    ('chars', Array(Char, hints={'immutable': True})),
+                    ('chars', Array(Char, hints={'immutable': True,
+                                    'extra_item_after_alloc': 1})),
                     adtmeths={'malloc' : staticAdtMethod(mallocstr),
                               'empty'  : staticAdtMethod(emptystrfun),
                               'copy_contents' : staticAdtMethod(copy_string_contents),
diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py
--- a/rpython/translator/c/node.py
+++ b/rpython/translator/c/node.py
@@ -253,8 +253,11 @@
             yield '\t' + cdecl(typename, fname) + ';'
         if not self.ARRAY._hints.get('nolength', False):
             yield '\tlong length;'
+        varlength = self.varlength
+        if varlength is not None:
+            varlength += self.ARRAY._hints.get('extra_item_after_alloc', 0)
         line = '%s;' % cdecl(self.itemtypename,
-                             'items[%s]' % deflength(self.varlength))
+                             'items[%s]' % deflength(varlength))
         if self.ARRAY.OF is Void:    # strange
             line = '/* array of void */'
             if self.ARRAY._hints.get('nolength', False):
diff --git a/rpython/translator/c/test/test_lltyped.py b/rpython/translator/c/test/test_lltyped.py
--- a/rpython/translator/c/test/test_lltyped.py
+++ b/rpython/translator/c/test/test_lltyped.py
@@ -1,4 +1,4 @@
-import py
+import py, random
 from rpython.rtyper.lltypesystem.lltype import *
 from rpython.rtyper.lltypesystem import rffi
 from rpython.translator.c.test.test_genc import compile
@@ -255,28 +255,6 @@
         res2 = fn(0)
         assert res1 == res2
 
-    def test_null_padding(self):
-        py.test.skip("we no longer pad our RPython strings with a final NUL")
-        from rpython.rtyper.lltypesystem import llmemory
-        from rpython.rtyper.lltypesystem import rstr
-        chars_offset = llmemory.FieldOffset(rstr.STR, 'chars') + \
-                       llmemory.ArrayItemsOffset(rstr.STR.chars)
-        # sadly, there's no way of forcing this to fail if the strings
-        # are allocated in a region of memory such that they just
-        # happen to get a NUL byte anyway :/ (a debug build will
-        # always fail though)
-        def trailing_byte(s):
-            adr_s = llmemory.cast_ptr_to_adr(s)
-            return (adr_s + chars_offset).char[len(s)]
-        def f(x):
-            r = 0
-            for i in range(x):
-                r += ord(trailing_byte(' '*(100-x*x)))
-            return r
-        fn = self.getcompiled(f, [int])
-        res = fn(10)
-        assert res == 0
-
     def test_cast_primitive(self):
         def f(x):
             x = cast_primitive(UnsignedLongLong, x)
@@ -1023,3 +1001,50 @@
         assert fn(r_longlong(1)) == True
         assert fn(r_longlong(256)) == True
         assert fn(r_longlong(2**32)) == True
+
+    def test_extra_item_after_alloc(self):
+        from rpython.rlib import rgc
+        from rpython.rlib.objectmodel import compute_hash
+        from rpython.rtyper.lltypesystem import lltype
+        from rpython.rtyper.lltypesystem import rstr
+        # all STR objects should be allocated with enough space for one
+        # extra char.  Check this for prebuilt strings, and for dynamically
+        # allocated ones with the default GC for tests.  Use strings of 8,
+        # 16 and 24 chars because if the extra char is missing, writing to it
+        # is likely to cause corruption in nearby structures.
+        sizes = [random.choice([8, 16, 24]) for i in range(100)]
+        A = lltype.Struct('A', ('x', lltype.Signed))
+        prebuilt = [(rstr.mallocstr(sz),
+                     lltype.malloc(A, flavor='raw', immortal=True))
+                        for sz in sizes]
+        k = 0
+        for i, (s, a) in enumerate(prebuilt):
+            a.x = i
+            for i in range(len(s.chars)):
+                k += 1
+                if k == 256:
+                    k = 1
+                s.chars[i] = chr(k)
+
+        def check(lst):
+            hashes = []
+            for i, (s, a) in enumerate(lst):
+                assert a.x == i
+                rgc.ll_write_final_null_char(s)
+            for i, (s, a) in enumerate(lst):
+                assert a.x == i     # check it was not overwritten
+        def f():
+            check(prebuilt)
+            lst1 = []
+            for i, sz in enumerate(sizes):
+                s = rstr.mallocstr(sz)
+                a = lltype.malloc(A, flavor='raw')
+                a.x = i
+                lst1.append((s, a))
+            check(lst1)
+            for _, a in lst1:
+                lltype.free(a, flavor='raw')
+            return 42
+
+        fn = self.getcompiled(f, [])
+        assert fn() == 42


More information about the pypy-commit mailing list