[pypy-svn] r51371 - in pypy/dist/pypy: rpython/lltypesystem rpython/memory/gctransform translator/c/gcc translator/c/gcc/test translator/c/src

arigo at codespeak.net arigo at codespeak.net
Sun Feb 10 16:43:20 CET 2008


Author: arigo
Date: Sun Feb 10 16:43:17 2008
New Revision: 51371

Modified:
   pypy/dist/pypy/rpython/lltypesystem/lloperation.py
   pypy/dist/pypy/rpython/memory/gctransform/asmgcroot.py
   pypy/dist/pypy/translator/c/gcc/test/test_trackgcroot.py
   pypy/dist/pypy/translator/c/gcc/trackgcroot.py
   pypy/dist/pypy/translator/c/src/mem.h
Log:
Tweak the tables produced for the asmgcc root finder.
This should reduce the size of the executable by quite
a bit (still checking, might be about 0.5 MB).


Modified: pypy/dist/pypy/rpython/lltypesystem/lloperation.py
==============================================================================
--- pypy/dist/pypy/rpython/lltypesystem/lloperation.py	(original)
+++ pypy/dist/pypy/rpython/lltypesystem/lloperation.py	Sun Feb 10 16:43:17 2008
@@ -412,6 +412,7 @@
     'llvm_frameaddress':    LLOp(sideeffects=False),
     'llvm_gcmapstart':      LLOp(sideeffects=False),
     'llvm_gcmapend':        LLOp(sideeffects=False),
+    'llvm_gccallshapes':    LLOp(sideeffects=False),
     'llvm_store_gcroot':    LLOp(),
     'llvm_load_gcroot':     LLOp(),
 

Modified: pypy/dist/pypy/rpython/memory/gctransform/asmgcroot.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gctransform/asmgcroot.py	(original)
+++ pypy/dist/pypy/rpython/memory/gctransform/asmgcroot.py	Sun Feb 10 16:43:17 2008
@@ -50,11 +50,12 @@
         def _asm_callback(initialframedata):
             self.walk_stack_from(initialframedata)
         self._asm_callback = _asm_callback
+        self._shape_decompressor = ShapeDecompressor()
 
     def setup_root_walker(self):
-        # The gcmap table is a list of pairs of pointers:
+        # The gcmap table is a list of entries, two machine words each:
         #     void *SafePointAddress;
-        #     void *Shape;
+        #     int Shape;
         # Here, i.e. when the program starts, we sort it
         # in-place on the SafePointAddress to allow for more
         # efficient searches.
@@ -98,9 +99,9 @@
         callback from the GC code for each GC root found in 'caller'.
         """
         #
-        # The gcmap table is a list of pairs of pointers:
+        # The gcmap table is a list of entries, two machine words each:
         #     void *SafePointAddress;
-        #     void *Shape;
+        #     int Shape;
         #
         # A "safe point" is the return address of a call.
         # The "shape" of a safe point is a list of integers
@@ -148,20 +149,26 @@
         gcmapend   = llop.llvm_gcmapend(llmemory.Address)
         item = binary_search(gcmapstart, gcmapend, retaddr)
         if item.address[0] != retaddr:
-            # retaddr not found!
-            llop.debug_fatalerror(lltype.Void, "cannot find gc roots!")
-            return False
+            # 'retaddr' not exactly found.  Check that 'item' the start of a
+            # compressed range containing 'retaddr'.
+            if retaddr > item.address[0] and item.signed[1] < 0:
+                pass   # ok
+            else:
+                llop.debug_fatalerror(lltype.Void, "cannot find gc roots!")
+                return False
         #
         # found!  Enumerate the GC roots in the caller frame
         #
-        shape = item.address[1]
+        shape = item.signed[1]
+        if shape < 0:
+            shape = ~ shape     # can ignore this "range" marker here
+        self._shape_decompressor.setpos(shape)
         collect_stack_root = self.gcdata._gc_collect_stack_root
         gc = self.gc
-        LIVELOCS = 1 + CALLEE_SAVED_REGS + 1  # index of the first gc root loc
-        livecount = shape.signed[LIVELOCS-1]
-        while livecount > 0:
-            livecount -= 1
-            location = shape.signed[LIVELOCS + livecount]
+        while True:
+            location = self._shape_decompressor.next()
+            if location == 0:
+                break
             addr = self.getlocation(callee, location)
             if addr.address[0] != llmemory.NULL:
                 collect_stack_root(gc, addr)
@@ -169,17 +176,18 @@
         # track where the caller_frame saved the registers from its own
         # caller
         #
-        location = shape.signed[0]
-        caller.frame_address = self.getlocation(callee, location)
-        if not caller.frame_address:   # marker that means "I'm the frame
-            return False               # of the entry point, stop walking"
-        reg = 0
-        while reg < CALLEE_SAVED_REGS:
-            location = shape.signed[1+reg]
+        reg = CALLEE_SAVED_REGS - 1
+        while reg >= 0:
+            location = self._shape_decompressor.next()
             addr = self.getlocation(callee, location)
             caller.regs_stored_at[reg] = addr
-            reg += 1
-        return True
+            reg -= 1
+
+        location = self._shape_decompressor.next()
+        caller.frame_address = self.getlocation(callee, location)
+        # we get a NULL marker to mean "I'm the frame
+        # of the entry point, stop walking"
+        return caller.frame_address != llmemory.NULL
 
     def getlocation(self, callee, location):
         """Get the location in the 'caller' frame of a variable, based
@@ -241,6 +249,9 @@
     This is an insertion sort, so it's slowish unless the array is mostly
     sorted already (which is what I expect, but XXX check this).
     """
+    # XXX this should check that it's not changing the relative order
+    # of entry and the following entry in case it's a compressed "range"
+    # entry, i.e. "entry.signed[1] < 0".
     next = start
     while next < end:
         # assuming the interval from start (included) to next (excluded)
@@ -259,6 +270,31 @@
 
 # ____________________________________________________________
 
+class ShapeDecompressor:
+    _alloc_flavor_ = "raw"
+
+    def setpos(self, pos):
+        gccallshapes = llop.llvm_gccallshapes(llmemory.Address)
+        self.addr = gccallshapes + pos
+
+    def next(self):
+        value = 0
+        addr = self.addr
+        while True:
+            b = ord(addr.char[0])
+            addr += 1
+            value += b
+            if b < 0x80:
+                break
+            value = (value - 0x80) << 7
+        self.addr = addr
+        if value & 1:
+            value = ~ value
+        value = value >> 1
+        return value
+
+# ____________________________________________________________
+
 #
 # The special pypy_asm_stackwalk(), implemented directly in
 # assembler, fills information about the current stack top in an

Modified: pypy/dist/pypy/translator/c/gcc/test/test_trackgcroot.py
==============================================================================
--- pypy/dist/pypy/translator/c/gcc/test/test_trackgcroot.py	(original)
+++ pypy/dist/pypy/translator/c/gcc/test/test_trackgcroot.py	Sun Feb 10 16:43:17 2008
@@ -6,6 +6,8 @@
 from pypy.translator.c.gcc.trackgcroot import LOC_EBP_BASED, LOC_ESP_BASED
 from pypy.translator.c.gcc.trackgcroot import GcRootTracker
 from pypy.translator.c.gcc.trackgcroot import FunctionGcRootTracker
+from pypy.translator.c.gcc.trackgcroot import compress_callshape
+from pypy.translator.c.gcc.trackgcroot import decompress_callshape
 from StringIO import StringIO
 
 this_dir = py.path.local(__file__).dirpath()
@@ -36,6 +38,14 @@
                              LOC_EBP_BASED+24,
                              LOC_EBP_BASED+28)) == expected
 
+def test_compress_callshape():
+    shape = (1, -3, 0x1234, -0x5678, 0x234567,
+             -0x765432, 0x61626364, -0x41424344)
+    bytes = list(compress_callshape(shape))
+    print bytes
+    assert len(bytes) == 1+1+2+3+4+4+5+5+1
+    assert decompress_callshape(bytes) == list(shape)
+
 def test_find_functions():
     source = """\
 \t.p2align 4,,15

Modified: pypy/dist/pypy/translator/c/gcc/trackgcroot.py
==============================================================================
--- pypy/dist/pypy/translator/c/gcc/trackgcroot.py	(original)
+++ pypy/dist/pypy/translator/c/gcc/trackgcroot.py	Sun Feb 10 16:43:17 2008
@@ -35,6 +35,8 @@
     def dump(self, output):
         assert self.seen_main
         shapes = {}
+        shapelines = []
+        shapeofs = 0
         print >> output, """\t.text
         .globl pypy_asm_stackwalk
             .type pypy_asm_stackwalk, @function
@@ -68,30 +70,26 @@
         print >> output, '\t.align\t4'
         print >> output, '\t.globl\t__gcmapstart'
         print >> output, '__gcmapstart:'
-        for label, state in self.gcmaptable:
-            if state not in shapes:
-                lst = ['__gcmap_shape']
-                for n in state:
-                    if n < 0:
-                        n = 'm%d' % (-n,)
-                    lst.append(str(n))
-                shapes[state] = '_'.join(lst)
+        for label, state, is_range in self.gcmaptable:
+            try:
+                n = shapes[state]
+            except KeyError:
+                n = shapes[state] = shapeofs
+                bytes = [str(b) for b in compress_callshape(state)]
+                shapelines.append('\t/*%d*/\t.byte\t%s\n' % (
+                    shapeofs,
+                    ', '.join(bytes)))
+                shapeofs += len(bytes)
+            if is_range:
+                n = ~ n
             print >> output, '\t.long\t%s' % (label,)
-            print >> output, '\t.long\t%s' % (shapes[state],)
+            print >> output, '\t.long\t%d' % (n,)
         print >> output, '\t.globl\t__gcmapend'
         print >> output, '__gcmapend:'
         print >> output, '\t.section\t.rodata'
-        print >> output, '\t.align\t4'
-        keys = shapes.keys()
-        keys.sort()
-        FIXED = 1 + len(CALLEE_SAVE_REGISTERS)
-        for state in keys:
-            print >> output, '%s:' % (shapes[state],)
-            for i in range(FIXED):
-                print >> output, '\t.long\t%d' % (state[i],)
-            print >> output, '\t.long\t%d' % (len(state)-FIXED,)
-            for p in state[FIXED:]:
-                print >> output, '\t.long\t%d' % (p,)         # gcroots
+        print >> output, '\t.globl\t__gccallshapes'
+        print >> output, '__gccallshapes:'
+        output.writelines(shapelines)
 
     def find_functions(self, iterlines):
         functionlines = []
@@ -115,10 +113,13 @@
         yield False, functionlines
 
     def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+        self.localgcmaptable = []
         for in_function, lines in self.find_functions(iterlines):
             if in_function:
                 lines = self.process_function(lines, entrypoint, filename)
             newfile.writelines(lines)
+        self.gcmaptable.extend(compress_gcmaptable(self.localgcmaptable))
+        del self.localgcmaptable
         self.files_seen += 1
 
     def process_function(self, lines, entrypoint, filename):
@@ -131,7 +132,7 @@
         if self.verbose > 1:
             for label, state in table:
                 print >> sys.stderr, label, '\t', format_callshape(state)
-        self.gcmaptable.extend(table)
+        self.localgcmaptable.extend(table)
         self.seen_main |= tracker.is_main
         return tracker.lines
 
@@ -897,6 +898,82 @@
                                ', '.join(result[1:5]),
                                ', '.join(result[5:]))
 
+# __________ table compression __________
+
+def compress_gcmaptable(table):
+    # Compress ranges table[i:j] of entries with the same state
+    # into a single entry whose label is the start of the range.
+    # The last element in the table is never compressed in this
+    # way for debugging reasons, to avoid that a random address
+    # in memory gets mapped to the last element in the table
+    # just because it's the closest address.
+    # Also, compress_gcmaptable() should be called after each
+    # .s file processed -- otherwise the result depends on the
+    # linker not rearranging the .s files in memory, which looks
+    # fragile.
+    i = 0
+    limit = len(table) - 1     # only process entries table[:limit]
+    while i < len(table):
+        label1, state = table[i]
+        is_range = False
+        j = i + 1
+        while j < limit and table[j][1] == state:
+            is_range = True
+            j += 1
+        # now all entries in table[i:j] have the same state
+        yield (label1, state, is_range)
+        i = j
+
+def compress_callshape(shape):
+    # For a single shape, this turns the list of integers into a list of
+    # bytes and reverses the order of the entries.  The length is
+    # encoded by inserting a 0 marker after the gc roots coming from
+    # shape[5:] and before the 5 values coming from shape[4] to
+    # shape[0].  In practice it seems that shapes contain many integers
+    # whose value is up to a few thousands, which the algorithm below
+    # compresses down to 2 bytes.  Very small values compress down to a
+    # single byte.
+    assert len(shape) >= 5
+    shape = list(shape)
+    assert 0 not in shape[5:]
+    shape.insert(5, 0)
+    result = []
+    for loc in shape:
+        if loc < 0:
+            loc = (-loc) * 2 - 1
+        else:
+            loc = loc * 2
+        flag = 0
+        while loc >= 0x80:
+            result.append(int(loc & 0x7F) | flag)
+            flag = 0x80
+            loc >>= 7
+        result.append(int(loc) | flag)
+    result.reverse()
+    return result
+
+def decompress_callshape(bytes):
+    # For tests.  This logic is copied in asmgcroot.py.
+    result = []
+    n = 0
+    while n < len(bytes):
+        value = 0
+        while True:
+            b = bytes[n]
+            n += 1
+            value += b
+            if b < 0x80:
+                break
+            value = (value - 0x80) << 7
+        if value & 1:
+            value = ~ value
+        value = value >> 1
+        result.append(value)
+    result.reverse()
+    assert result[5] == 0
+    del result[5]
+    return result
+
 
 if __name__ == '__main__':
     if sys.argv and sys.argv[1] == '-v':

Modified: pypy/dist/pypy/translator/c/src/mem.h
==============================================================================
--- pypy/dist/pypy/translator/c/src/mem.h	(original)
+++ pypy/dist/pypy/translator/c/src/mem.h	Sun Feb 10 16:43:17 2008
@@ -10,7 +10,7 @@
 
 extern char __gcmapstart;
 extern char __gcmapend;
-extern char* __gcmap_frame_address(void);
+extern char __gccallshapes;
 
 #define PYPY_GCROOT(p)  asm ("/* GCROOT %0 */" : "=g" (p) : "0" (p) : "memory")
 #define pypy_asm_gcroot(p) ({void*_r; \
@@ -19,10 +19,7 @@
 
 #define OP_LLVM_GCMAPSTART(r)	r = &__gcmapstart
 #define OP_LLVM_GCMAPEND(r)	r = &__gcmapend
-#define OP_LLVM_FRAMEADDRESS(r)	asm ("pypygetframeaddress %0" : "=r" (r))
-/* NB. we cannot use __builtin_frame_address(0) - apparently, gcc thinks
-   it can return %ebp even if -fomit-frame-pointer is specified, which
-   doesn't work.  So we need a bit of help from trackgcroot.py... */
+#define OP_LLVM_GCCALLSHAPES(r)	r = &__gccallshapes
 
 
 #define RAW_MALLOC_ZERO_FILLED 0



More information about the Pypy-commit mailing list