[pypy-commit] pypy asmmemmgr-for-code-only: in-progress

arigo pypy.commits at gmail.com
Wed Aug 17 11:22:07 EDT 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: asmmemmgr-for-code-only
Changeset: r86249:9fb620098ee2
Date: 2016-08-17 17:21 +0200
http://bitbucket.org/pypy/pypy/changeset/9fb620098ee2/

Log:	in-progress

diff --git a/rpython/jit/backend/llsupport/asmmemmgr.py b/rpython/jit/backend/llsupport/asmmemmgr.py
--- a/rpython/jit/backend/llsupport/asmmemmgr.py
+++ b/rpython/jit/backend/llsupport/asmmemmgr.py
@@ -29,14 +29,17 @@
         """Returns stats for rlib.jit.jit_hooks.stats_asmmemmgr_*()."""
         return (self.total_memory_allocated, self.total_mallocs)
 
-    def malloc(self, minsize, maxsize):
+    def malloc_code(self, size):
         """Allocate executable memory, between minsize and maxsize bytes,
         and return a pair (start, stop).  Does not perform any rounding
-        of minsize and maxsize.
+        of 'size'; the interesting property is that if all calls to
+        malloc_code() are done with a size that is a multiple of 2**N,
+        then they also return (start, stop) pointers that are aligned
+        to 2**N.
         """
-        result = self._allocate_block(minsize)
+        result = self._allocate_block(size)
         (start, stop) = result
-        smaller_stop = start + maxsize
+        smaller_stop = start + size
         if smaller_stop + self.min_fragment <= stop:
             self._add_free_block(smaller_stop, stop)
             stop = smaller_stop
@@ -44,28 +47,12 @@
         self.total_mallocs += r_uint(stop - start)
         return result   # pair (start, stop)
 
-    def free(self, start, stop):
+    def free_code(self, start, stop):
         """Free a block (start, stop) returned by a previous malloc()."""
         if r_uint is not None:
             self.total_mallocs -= r_uint(stop - start)
         self._add_free_block(start, stop)
 
-    def open_malloc(self, minsize):
-        """Allocate at least minsize bytes.  Returns (start, stop)."""
-        result = self._allocate_block(minsize)
-        (start, stop) = result
-        self.total_mallocs += r_uint(stop - start)
-        return result
-
-    def open_free(self, middle, stop):
-        """Used for freeing the end of an open-allocated block of memory."""
-        if stop - middle >= self.min_fragment:
-            self.total_mallocs -= r_uint(stop - middle)
-            self._add_free_block(middle, stop)
-            return True
-        else:
-            return False    # too small to record
-
     def _allocate_large_block(self, minsize):
         # Compute 'size' from 'minsize': it must be rounded up to
         # 'large_alloc_size'.  Additionally, we use the following line
@@ -163,40 +150,6 @@
         self._allocated = None
 
 
-class MachineDataBlockWrapper(object):
-    def __init__(self, asmmemmgr, allblocks):
-        self.asmmemmgr = asmmemmgr
-        self.allblocks = allblocks
-        self.rawstart    = 0
-        self.rawposition = 0
-        self.rawstop     = 0
-
-    def done(self):
-        if self.rawstart != 0:
-            if self.asmmemmgr.open_free(self.rawposition, self.rawstop):
-                self.rawstop = self.rawposition
-            self.allblocks.append((self.rawstart, self.rawstop))
-            self.rawstart    = 0
-            self.rawposition = 0
-            self.rawstop     = 0
-
-    def _allocate_next_block(self, minsize):
-        self.done()
-        self.rawstart, self.rawstop = self.asmmemmgr.open_malloc(minsize)
-        self.rawposition = self.rawstart
-
-    def malloc_aligned(self, size, alignment):
-        p = self.rawposition
-        p = (p + alignment - 1) & (-alignment)
-        if p + size > self.rawstop:
-            self._allocate_next_block(size + alignment - 1)
-            p = self.rawposition
-            p = (p + alignment - 1) & (-alignment)
-            assert p + size <= self.rawstop
-        self.rawposition = p + size
-        return p
-
-
 class BlockBuilderMixin(object):
     _mixin_ = True
     # A base class to generate assembler.  It is equivalent to just a list
@@ -321,11 +274,16 @@
     def materialize(self, cpu, allblocks, gcrootmap=None):
         size = self.get_relative_pos()
         align = self.ALIGN_MATERIALIZE
-        size += align - 1
-        malloced = cpu.asmmemmgr.malloc(size, size)
-        allblocks.append(malloced)
+        size = (size + align - 1) & ~(align - 1)   # round up
+        malloced = cpu.asmmemmgr.malloc_code(size)
         rawstart = malloced[0]
-        rawstart = (rawstart + align - 1) & (-align)
+        rawstop = malloced[1]
+        assert (rawstart & (align - 1)) == 0, (
+            "malloc_code() not aligned to a multiple of ALIGN_MATERIALIZE")
+        assert (rawstart & 1) == 0
+        assert (rawstop & 1) == 0
+        allblocks.append(rawstart)
+        allblocks.append(rawstop - 1)
         self.rawstart = rawstart
         self.copy_to_raw_memory(rawstart)
         if self.gcroot_markers is not None:
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -177,6 +177,21 @@
             clt.asmmemmgr_gcreftracers = []
         return clt.asmmemmgr_gcreftracers
 
+    def malloc_aligned(self, size, alignment=WORD):
+        p1 = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw',
+                           track_allocation=False)
+        s1 = s2 = rffi.cast(lltype.Signed, p1)
+        if (s1 & (alignment - 1)) != 0:   # bah, try again
+            lltype.free(p1, flavor='raw')
+            p1 = lltype.malloc(rffi.CCHARP.TO, size + (alignment - 1),
+                               flavor='raw')
+            s1 = s2 = rffi.cast(lltype.Signed, p1)
+            s2 = (s2 + alignment - 1) & ~(alignment - 1)
+        assert self.allblocks is not None
+        assert (s1 & 1) == 0   # must be even
+        self.allblocks.append(s1)
+        return s2
+
     def set_debug(self, v):
         r = self._debug
         self._debug = v
@@ -491,7 +506,6 @@
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         return bool(gcrootmap) and not gcrootmap.is_shadow_stack
 
-
 def debug_bridge(descr_number, rawstart, codeendpos):
     debug_start("jit-backend-addr")
     debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
diff --git a/rpython/jit/backend/llsupport/gcmap.py b/rpython/jit/backend/llsupport/gcmap.py
--- a/rpython/jit/backend/llsupport/gcmap.py
+++ b/rpython/jit/backend/llsupport/gcmap.py
@@ -7,8 +7,7 @@
 def allocate_gcmap(assembler, frame_depth, fixed_size):
     size = frame_depth + fixed_size
     malloc_size = (size // WORD // 8 + 1) + 1
-    rawgcmap = assembler.datablockwrapper.malloc_aligned(WORD * malloc_size,
-                                                    WORD)
+    rawgcmap = assembler.malloc_aligned(WORD * malloc_size)
     # set the length field
     rffi.cast(rffi.CArrayPtr(lltype.Signed), rawgcmap)[0] = malloc_size - 1
     gcmap = rffi.cast(lltype.Ptr(jitframe.GCMAP), rawgcmap)
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -256,11 +256,23 @@
         blocks = compiled_loop_token.asmmemmgr_blocks
         if blocks is not None:
             compiled_loop_token.asmmemmgr_blocks = None
-            for rawstart, rawstop in blocks:
-                self.gc_ll_descr.freeing_block(rawstart, rawstop)
-                self.asmmemmgr.free(rawstart, rawstop)
-                if self.HAS_CODEMAP:
-                    self.codemap.free_asm_block(rawstart, rawstop)
+            # see the description in ../model.py about asmmemmgr_blocks
+            i = len(blocks)
+            while i > 0:
+                i -= 1
+                pp = blocks[i]
+                if pp & 1:   # odd number, that's the stop after a start
+                    rawstop = pp + 1
+                    i -= 1
+                    assert i >= 0
+                    rawstart = blocks[i]
+                    self.gc_ll_descr.freeing_block(rawstart, rawstop)
+                    self.asmmemmgr.free_code(rawstart, rawstop)
+                    if self.HAS_CODEMAP:
+                        self.codemap.free_asm_block(rawstart, rawstop)
+                else:
+                    lltype.free(rffi.cast(rffi.CCHARP, pp), flavor='raw',
+                                track_allocation=False)
 
     def force(self, addr_of_force_token):
         frame = rffi.cast(jitframe.JITFRAMEPTR, addr_of_force_token)
diff --git a/rpython/jit/backend/llsupport/test/test_asmmemmgr.py b/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
--- a/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
+++ b/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
@@ -1,6 +1,5 @@
 import random
 from rpython.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
-from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
 from rpython.jit.backend.llsupport.codemap import CodemapStorage
 from rpython.rtyper.lltypesystem import lltype, rffi
@@ -63,16 +62,15 @@
                               num_indices=5)
     memmgr._add_free_block(10, 18)
     memmgr._add_free_block(20, 30)
-    for minsize in range(1, 11):
-        for maxsize in range(minsize, 14):
-            (start, stop) = memmgr.malloc(minsize, maxsize)
-            if minsize <= 8:
-                assert (start, stop) == (10, 18)
-            else:
-                assert (start, stop) == (20, 30)
-            memmgr._add_free_block(start, stop)
+    for size in range(1, 11):
+        (start, stop) = memmgr.malloc_code(size)
+        if size <= 8:
+            assert (start, stop) == (10, 18)
+        else:
+            assert (start, stop) == (20, 30)
+        memmgr._add_free_block(start, stop)
     memmgr._add_free_block(40, 49)
-    (start, stop) = memmgr.malloc(10, 10)
+    (start, stop) = memmgr.malloc_code(10)
     assert (start, stop) == (20, 30)
 
 def test_malloc_with_fragment():
@@ -80,7 +78,7 @@
         memmgr = AsmMemoryManager(min_fragment=8,
                                   num_indices=5)
         memmgr._add_free_block(12, 44)
-        (start, stop) = memmgr.malloc(reqsize, reqsize)
+        (start, stop) = memmgr.malloc_code(reqsize)
         if reqsize + 8 <= 32:
             assert (start, stop) == (12, 12 + reqsize)
             assert memmgr.free_blocks == {stop: 44}
@@ -108,7 +106,7 @@
         for i in range(100):
             while self.asmmemmgr.total_memory_allocated < 16384:
                 reqsize = random.randrange(1, 200)
-                (start, stop) = self.asmmemmgr.malloc(reqsize, reqsize)
+                (start, stop) = self.asmmemmgr.malloc_code(reqsize)
                 assert reqsize <= stop - start < reqsize + 8
                 assert self.asmmemmgr.total_memory_allocated in [8192, 16384]
             self.teardown_method(None)
@@ -124,19 +122,15 @@
             if got and (random.random() < 0.4 or len(got) == 1000):
                 # free
                 start, stop = got.pop(random.randrange(0, len(got)))
-                self.asmmemmgr.free(start, stop)
+                self.asmmemmgr.free_code(start, stop)
                 real_use -= (stop - start)
                 assert real_use >= 0
             #
             else:
                 # allocate
                 reqsize = random.randrange(1, 200)
-                if random.random() < 0.5:
-                    reqmaxsize = reqsize
-                else:
-                    reqmaxsize = reqsize + random.randrange(0, 200)
-                (start, stop) = self.asmmemmgr.malloc(reqsize, reqmaxsize)
-                assert reqsize <= stop - start < reqmaxsize + 8
+                (start, stop) = self.asmmemmgr.malloc_code(reqsize)
+                assert reqsize <= stop - start < reqsize + 8
                 for otherstart, otherstop in got:           # no overlap
                     assert otherstop <= start or stop <= otherstart
                 got.append((start, stop))
@@ -182,11 +176,11 @@
         assert p[3] == 'y'
         assert p[4] == 'Z'
         assert p[5] == 'z'
-        # 'allblocks' should be one block of length 6 + 15
-        # (15 = alignment - 1) containing the range(rawstart, rawstart + 6)
-        [(blockstart, blockend)] = allblocks
-        assert blockend == blockstart + 6 + (mc.ALIGN_MATERIALIZE - 1)
-        assert blockstart <= rawstart < rawstart + 6 <= blockend
+        # 'allblocks' should be one block of length 16 (= 6 rounded up)
+        # starting at 'rawstart'
+        [blockstart, blockend] = allblocks
+        assert blockend == blockstart + 15  # is odd, one less than real stop
+        assert blockstart == rawstart
         assert puts == [(rawstart + 2, ['a', 'b', 'c', 'd']),
                         (rawstart + 4, ['e', 'f', 'g'])]
 
@@ -232,41 +226,3 @@
 
 def test_blockbuildermixin2():
     test_blockbuildermixin(translated=False)
-
-def test_machinedatablock():
-    ops = []
-    class FakeMemMgr:
-        _addr = 1597
-        def open_malloc(self, minsize):
-            result = (self._addr, self._addr + 100)
-            ops.append(('malloc', minsize) + result)
-            self._addr += 200
-            return result
-        def open_free(self, frm, to):
-            ops.append(('free', frm, to))
-            return to - frm >= 8
-    #
-    allblocks = []
-    md = MachineDataBlockWrapper(FakeMemMgr(), allblocks)
-    p = md.malloc_aligned(26, 16)
-    assert p == 1600
-    assert ops == [('malloc', 26 + 15, 1597, 1697)]
-    del ops[:]
-    #
-    p = md.malloc_aligned(26, 16)
-    assert p == 1632
-    p = md.malloc_aligned(26, 16)
-    assert p == 1664
-    assert allblocks == []
-    assert ops == []
-    #
-    p = md.malloc_aligned(27, 16)
-    assert p == 1808
-    assert allblocks == [(1597, 1697)]
-    assert ops == [('free', 1690, 1697),
-                   ('malloc', 27 + 15, 1797, 1897)]
-    del ops[:]
-    #
-    md.done()
-    assert allblocks == [(1597, 1697), (1797, 1835)]
-    assert ops == [('free', 1835, 1897)]
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -287,6 +287,13 @@
     asmmemmgr_blocks = None
     asmmemmgr_gcreftracers = None
 
+    # asmmemmgr_blocks is a list of integers with the following structure:
+    #  - a pointer, which is an even number
+    #  - optionally an end-pointer minus 1, which is an odd number
+    # If a pointer is followed by a size, together they describe the
+    # start and stop of a piece of code.  In the other case, it is
+    # simply a piece of data that must be free()d.
+
     def __init__(self, cpu, number):
         cpu.tracker.total_compiled_loops += 1
         self.cpu = cpu
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -4,7 +4,6 @@
 
 from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
 from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler, debug_bridge)
-from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.jit.metainterp.history import (Const, VOID, ConstInt)
 from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
@@ -62,12 +61,14 @@
         self.malloc_slowpath_varsize = 0
         self.wb_slowpath = [0, 0, 0, 0, 0]
         self.setup_failure_recovery()
-        self.datablockwrapper = None
         self.stack_check_slowpath = 0
         self.propagate_exception_path = 0
         self.teardown()
 
     def setup_once(self):
+        # make a list that will be forgotten at the first setup(), for
+        # allocating the few immortal pieces of data now
+        self.allblocks = []
         BaseAssembler.setup_once(self)
         if self.cpu.supports_floats:
             support.ensure_sse2_floats()
@@ -82,13 +83,9 @@
             self.pending_memoryerror_trampoline_from = []
             self.error_trampoline_64 = 0
         self.mc = codebuf.MachineCodeBlockWrapper()
-        #assert self.datablockwrapper is None --- but obscure case
-        # possible, e.g. getting MemoryError and continuing
-        allblocks = self.get_asmmemmgr_blocks(looptoken)
-        self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
-                                                        allblocks)
         self.target_tokens_currently_compiling = {}
         self.frame_depth_to_patch = []
+        self.allblocks = self.get_asmmemmgr_blocks(looptoken)
 
     def teardown(self):
         self.pending_guard_tokens = None
@@ -96,6 +93,7 @@
             self.pending_memoryerror_trampoline_from = None
         self.mc = None
         self.current_clt = None
+        self.allblocks = None
 
     def _build_float_constants(self):
         # 0x80000000000000008000000000000000
@@ -111,9 +109,7 @@
         data = neg_const + abs_const + \
                single_neg_const + single_abs_const + \
                zero_const
-        datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
-        float_constants = datablockwrapper.malloc_aligned(len(data), alignment=16)
-        datablockwrapper.done()
+        float_constants = self.malloc_aligned(len(data), alignment=16)
         addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
         for i in range(len(data)):
             addr[i] = data[i]
@@ -484,8 +480,7 @@
         if self.cpu.HAS_CODEMAP:
             self.codemap_builder.enter_portal_frame(jd_id, unique_id,
                                                     self.mc.get_relative_pos())
-        frame_info = self.datablockwrapper.malloc_aligned(
-            jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
+        frame_info = self.malloc_aligned(jitframe.JITFRAMEINFO_SIZE)
         clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
         clt.frame_info.clear() # for now
 
@@ -647,11 +642,7 @@
         assert isinstance(faildescr, ResumeGuardDescr)
         assert asminfo.rawstart != 0
         self.mc = codebuf.MachineCodeBlockWrapper()
-        allblocks = self.get_asmmemmgr_blocks(looptoken)
-        self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
-                                                   allblocks)
-        frame_info = self.datablockwrapper.malloc_aligned(
-            jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
+        frame_info = self.malloc_aligned(jitframe.JITFRAMEINFO_SIZE)
 
         self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
         # if accumulation is saved at the guard, we need to update it here!
@@ -717,8 +708,7 @@
         elif IS_X86_32:
             # allocate the gc table right now.  This lets us write
             # machine code with absolute 32-bit addressing.
-            self.gc_table_addr = self.datablockwrapper.malloc_aligned(
-                gcref_table_size, alignment=WORD)
+            self.gc_table_addr = self.malloc_aligned(gcref_table_size)
         #
         self.setup_gcrefs_list(allgcrefs)
 
@@ -854,11 +844,8 @@
         mc.copy_to_raw_memory(adr)
 
     def materialize_loop(self, looptoken):
-        self.datablockwrapper.done()      # finish using cpu.asmmemmgr
-        self.datablockwrapper = None
-        allblocks = self.get_asmmemmgr_blocks(looptoken)
         size = self.mc.get_relative_pos()
-        res = self.mc.materialize(self.cpu, allblocks,
+        res = self.mc.materialize(self.cpu, self.allblocks,
                                   self.cpu.gc_ll_descr.gcrootmap)
         if self.cpu.HAS_CODEMAP:
             self.cpu.codemap.register_codemap(
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -68,13 +68,13 @@
     save_around_call_regs = all_regs
 
     def convert_to_imm(self, c):
-        adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
+        adr = self.assembler.malloc_aligned(8, 8)
         x = c.getfloatstorage()
         rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
         return ConstFloatLoc(adr)
 
     def convert_to_imm_16bytes_align(self, c):
-        adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
+        adr = self.assembler.malloc_aligned(16, 16)
         x = c.getfloatstorage()
         y = longlong.ZEROF
         rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
@@ -89,14 +89,14 @@
         return loc
 
     def expand_double_float(self, f):
-        adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
+        adr = self.assembler.malloc_aligned(16, 16)
         fs = f.getfloatstorage()
         rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = fs
         rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = fs
         return ConstFloatLoc(adr)
 
     def expand_single_float(self, f):
-        adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
+        adr = self.assembler.malloc_aligned(16, 16)
         fs = rffi.cast(lltype.SingleFloat, f.getfloatstorage())
         rffi.cast(rffi.CArrayPtr(lltype.SingleFloat), adr)[0] = fs
         rffi.cast(rffi.CArrayPtr(lltype.SingleFloat), adr)[1] = fs


More information about the pypy-commit mailing list