[pypy-commit] pypy shadowstack-perf: Tentative: remove the clears at the start of every function.

Wed Jul 6 20:28:34 CEST 2011

Author: Armin Rigo <arigo at tunes.org>
Branch: shadowstack-perf
Changeset: r45374:31d4b032dd61
Date: 2011-07-04 11:15 +0200
http://bitbucket.org/pypy/pypy/changeset/31d4b032dd61/

Log:	Tentative: remove the clears at the start of every function. Needs
	careful tweaks to ensure that old, invalid pointers are not left
	behind after a collection.

diff --git a/pypy/rpython/lltypesystem/llmemory.py b/pypy/rpython/lltypesystem/llmemory.py
--- a/pypy/rpython/lltypesystem/llmemory.py
+++ b/pypy/rpython/lltypesystem/llmemory.py
@@ -435,8 +435,17 @@
         if isinstance(other, fakeaddress):
             if self == other:
                 return 0
-            else:
-                raise TypeError("cannot subtract fakeaddresses in general")
+            # <*_subarray at n> - <*_subarray at m> == ItemOffset(n-m)
+            obj1 = self.ptr._obj
+            obj2 = other.ptr._obj
+            if (isinstance(obj1, lltype._subarray) and
+                isinstance(obj2, lltype._subarray) and
+                obj1._TYPE == obj2._TYPE and
+                obj1._parentstructure() == obj2._parentstructure()):
+                n = obj1._parent_index
+                m = obj2._parent_index
+                return ItemOffset(obj1._TYPE.OF, n - m)
+            raise TypeError("cannot subtract fakeaddresses in general")
         if other == 0:
             return self
         return NotImplemented
diff --git a/pypy/rpython/memory/gctransform/shadowstack.py b/pypy/rpython/memory/gctransform/shadowstack.py
--- a/pypy/rpython/memory/gctransform/shadowstack.py
+++ b/pypy/rpython/memory/gctransform/shadowstack.py
@@ -55,11 +55,25 @@
         return top.address[0]
 
     def allocate_stack(self):
-        return llmemory.raw_malloc(self.rootstacksize)
+        stackbase = llmemory.raw_malloc(self.rootstacksize)
+        if not stackbase:
+            raise MemoryError
+        self.clear_stack(stackbase, stackbase)
+        return stackbase
+
+    def clear_stack(self, stackbase, stacktop):
+        """When a function is called, the current stack top is
+        incremented by as much as needed by this function, but the old
+        content is left in the stack.  This is a speed optimization that
+        may lead to occasional leaks, because the stack may end up
+        containing dead pointers.  Another drawback is that we need to
+        clear the stack manually after every minor collection, to
+        prevent these leftover pointers from pointing to garbage."""
+        size = stackbase + self.rootstacksize - stacktop
+        llmemory.raw_memclear(stacktop, size)
 
     def setup_root_walker(self):
         stackbase = self.allocate_stack()
-        ll_assert(bool(stackbase), "could not allocate root stack")
         self.gcdata.root_stack_top  = stackbase
         self.gcdata.root_stack_base = stackbase
         BaseRootWalker.setup_root_walker(self)
@@ -67,9 +81,10 @@
     def walk_stack_roots(self, collect_stack_root):
         gcdata = self.gcdata
         gc = self.gc
-        rootstackhook = self.rootstackhook
         addr = gcdata.root_stack_base
         end = gcdata.root_stack_top
+        self.clear_stack(addr, end)
+        rootstackhook = self.rootstackhook
         while addr != end:
             addr += rootstackhook(collect_stack_root, gc, addr)
         if self.collect_stacks_from_other_threads is not None:
@@ -107,8 +122,6 @@
             """
             if not gcdata._fresh_rootstack:
                 gcdata._fresh_rootstack = self.allocate_stack()
-                if not gcdata._fresh_rootstack:
-                    raise MemoryError
 
         def thread_run():
             """Called whenever the current thread (re-)acquired the GIL.
@@ -132,6 +145,7 @@
             gcdata.thread_stacks.setitem(aid, llmemory.NULL)
             old = gcdata.root_stack_base
             if gcdata._fresh_rootstack == llmemory.NULL:
+                self.clear_stack(old, old)
                 gcdata._fresh_rootstack = old
             else:
                 llmemory.raw_free(old)
@@ -178,9 +192,10 @@
                 # collect all valid stacks from the dict (the entry
                 # corresponding to the current thread is not valid)
                 gc = self.gc
-                rootstackhook = self.rootstackhook
                 end = stacktop - sizeofaddr
                 addr = end.address[0]
+                self.clear_stack(addr, stacktop)
+                rootstackhook = self.rootstackhook
                 while addr != end:
                     addr += rootstackhook(callback, gc, addr)
 
@@ -294,13 +309,6 @@
         c_numcolors = rmodel.inputconst(lltype.Signed, numcolors)
         llops.genop("direct_call", [gct.incr_stack_ptr, c_numcolors],
                     resulttype=llmemory.Address)
-        top_addr = llops.genop("direct_call",
-                               [gct.get_stack_top_ptr],
-                               resulttype=llmemory.Address)
-        c_null = rmodel.inputconst(llmemory.Address, llmemory.NULL)
-        for k in range(numcolors):
-            c_k = rmodel.inputconst(lltype.Signed, ~k)
-            llops.genop("raw_store", [top_addr, c_type, c_k, c_null])
         graph.startblock.operations[:0] = llops
         #
         # Put at the end of the graph: "decr_stack()"