[pypy-commit] pypy asmmemmgr-for-code-only: in-progress
arigo
pypy.commits at gmail.com
Wed Aug 17 11:22:07 EDT 2016
Author: Armin Rigo <arigo at tunes.org>
Branch: asmmemmgr-for-code-only
Changeset: r86249:9fb620098ee2
Date: 2016-08-17 17:21 +0200
http://bitbucket.org/pypy/pypy/changeset/9fb620098ee2/
Log: in-progress
diff --git a/rpython/jit/backend/llsupport/asmmemmgr.py b/rpython/jit/backend/llsupport/asmmemmgr.py
--- a/rpython/jit/backend/llsupport/asmmemmgr.py
+++ b/rpython/jit/backend/llsupport/asmmemmgr.py
@@ -29,14 +29,17 @@
"""Returns stats for rlib.jit.jit_hooks.stats_asmmemmgr_*()."""
return (self.total_memory_allocated, self.total_mallocs)
- def malloc(self, minsize, maxsize):
+ def malloc_code(self, size):
"""Allocate executable memory, between minsize and maxsize bytes,
and return a pair (start, stop). Does not perform any rounding
- of minsize and maxsize.
+ of 'size'; the interesting property is that if all calls to
+ malloc_code() are done with a size that is a multiple of 2**N,
+ then they also return (start, stop) pointers that are aligned
+ to 2**N.
"""
- result = self._allocate_block(minsize)
+ result = self._allocate_block(size)
(start, stop) = result
- smaller_stop = start + maxsize
+ smaller_stop = start + size
if smaller_stop + self.min_fragment <= stop:
self._add_free_block(smaller_stop, stop)
stop = smaller_stop
@@ -44,28 +47,12 @@
self.total_mallocs += r_uint(stop - start)
return result # pair (start, stop)
- def free(self, start, stop):
+ def free_code(self, start, stop):
"""Free a block (start, stop) returned by a previous malloc()."""
if r_uint is not None:
self.total_mallocs -= r_uint(stop - start)
self._add_free_block(start, stop)
- def open_malloc(self, minsize):
- """Allocate at least minsize bytes. Returns (start, stop)."""
- result = self._allocate_block(minsize)
- (start, stop) = result
- self.total_mallocs += r_uint(stop - start)
- return result
-
- def open_free(self, middle, stop):
- """Used for freeing the end of an open-allocated block of memory."""
- if stop - middle >= self.min_fragment:
- self.total_mallocs -= r_uint(stop - middle)
- self._add_free_block(middle, stop)
- return True
- else:
- return False # too small to record
-
def _allocate_large_block(self, minsize):
# Compute 'size' from 'minsize': it must be rounded up to
# 'large_alloc_size'. Additionally, we use the following line
@@ -163,40 +150,6 @@
self._allocated = None
-class MachineDataBlockWrapper(object):
- def __init__(self, asmmemmgr, allblocks):
- self.asmmemmgr = asmmemmgr
- self.allblocks = allblocks
- self.rawstart = 0
- self.rawposition = 0
- self.rawstop = 0
-
- def done(self):
- if self.rawstart != 0:
- if self.asmmemmgr.open_free(self.rawposition, self.rawstop):
- self.rawstop = self.rawposition
- self.allblocks.append((self.rawstart, self.rawstop))
- self.rawstart = 0
- self.rawposition = 0
- self.rawstop = 0
-
- def _allocate_next_block(self, minsize):
- self.done()
- self.rawstart, self.rawstop = self.asmmemmgr.open_malloc(minsize)
- self.rawposition = self.rawstart
-
- def malloc_aligned(self, size, alignment):
- p = self.rawposition
- p = (p + alignment - 1) & (-alignment)
- if p + size > self.rawstop:
- self._allocate_next_block(size + alignment - 1)
- p = self.rawposition
- p = (p + alignment - 1) & (-alignment)
- assert p + size <= self.rawstop
- self.rawposition = p + size
- return p
-
-
class BlockBuilderMixin(object):
_mixin_ = True
# A base class to generate assembler. It is equivalent to just a list
@@ -321,11 +274,16 @@
def materialize(self, cpu, allblocks, gcrootmap=None):
size = self.get_relative_pos()
align = self.ALIGN_MATERIALIZE
- size += align - 1
- malloced = cpu.asmmemmgr.malloc(size, size)
- allblocks.append(malloced)
+ size = (size + align - 1) & ~(align - 1) # round up
+ malloced = cpu.asmmemmgr.malloc_code(size)
rawstart = malloced[0]
- rawstart = (rawstart + align - 1) & (-align)
+ rawstop = malloced[1]
+ assert (rawstart & (align - 1)) == 0, (
+ "malloc_code() not aligned to a multiple of ALIGN_MATERIALIZE")
+ assert (rawstart & 1) == 0
+ assert (rawstop & 1) == 0
+ allblocks.append(rawstart)
+ allblocks.append(rawstop - 1)
self.rawstart = rawstart
self.copy_to_raw_memory(rawstart)
if self.gcroot_markers is not None:
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -177,6 +177,21 @@
clt.asmmemmgr_gcreftracers = []
return clt.asmmemmgr_gcreftracers
+ def malloc_aligned(self, size, alignment=WORD):
+ p1 = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw',
+ track_allocation=False)
+ s1 = s2 = rffi.cast(lltype.Signed, p1)
+ if (s1 & (alignment - 1)) != 0: # bah, try again
+ lltype.free(p1, flavor='raw')
+ p1 = lltype.malloc(rffi.CCHARP.TO, size + (alignment - 1),
+ flavor='raw')
+ s1 = s2 = rffi.cast(lltype.Signed, p1)
+ s2 = (s2 + alignment - 1) & ~(alignment - 1)
+ assert self.allblocks is not None
+ assert (s1 & 1) == 0 # must be even
+ self.allblocks.append(s1)
+ return s2
+
def set_debug(self, v):
r = self._debug
self._debug = v
@@ -491,7 +506,6 @@
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
return bool(gcrootmap) and not gcrootmap.is_shadow_stack
-
def debug_bridge(descr_number, rawstart, codeendpos):
debug_start("jit-backend-addr")
debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
diff --git a/rpython/jit/backend/llsupport/gcmap.py b/rpython/jit/backend/llsupport/gcmap.py
--- a/rpython/jit/backend/llsupport/gcmap.py
+++ b/rpython/jit/backend/llsupport/gcmap.py
@@ -7,8 +7,7 @@
def allocate_gcmap(assembler, frame_depth, fixed_size):
size = frame_depth + fixed_size
malloc_size = (size // WORD // 8 + 1) + 1
- rawgcmap = assembler.datablockwrapper.malloc_aligned(WORD * malloc_size,
- WORD)
+ rawgcmap = assembler.malloc_aligned(WORD * malloc_size)
# set the length field
rffi.cast(rffi.CArrayPtr(lltype.Signed), rawgcmap)[0] = malloc_size - 1
gcmap = rffi.cast(lltype.Ptr(jitframe.GCMAP), rawgcmap)
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -256,11 +256,23 @@
blocks = compiled_loop_token.asmmemmgr_blocks
if blocks is not None:
compiled_loop_token.asmmemmgr_blocks = None
- for rawstart, rawstop in blocks:
- self.gc_ll_descr.freeing_block(rawstart, rawstop)
- self.asmmemmgr.free(rawstart, rawstop)
- if self.HAS_CODEMAP:
- self.codemap.free_asm_block(rawstart, rawstop)
+ # see the description in ../model.py about asmmemmgr_blocks
+ i = len(blocks)
+ while i > 0:
+ i -= 1
+ pp = blocks[i]
+ if pp & 1: # odd number, that's the stop after a start
+ rawstop = pp + 1
+ i -= 1
+ assert i >= 0
+ rawstart = blocks[i]
+ self.gc_ll_descr.freeing_block(rawstart, rawstop)
+ self.asmmemmgr.free_code(rawstart, rawstop)
+ if self.HAS_CODEMAP:
+ self.codemap.free_asm_block(rawstart, rawstop)
+ else:
+ lltype.free(rffi.cast(rffi.CCHARP, pp), flavor='raw',
+ track_allocation=False)
def force(self, addr_of_force_token):
frame = rffi.cast(jitframe.JITFRAMEPTR, addr_of_force_token)
diff --git a/rpython/jit/backend/llsupport/test/test_asmmemmgr.py b/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
--- a/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
+++ b/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
@@ -1,6 +1,5 @@
import random
from rpython.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
-from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
from rpython.jit.backend.llsupport.codemap import CodemapStorage
from rpython.rtyper.lltypesystem import lltype, rffi
@@ -63,16 +62,15 @@
num_indices=5)
memmgr._add_free_block(10, 18)
memmgr._add_free_block(20, 30)
- for minsize in range(1, 11):
- for maxsize in range(minsize, 14):
- (start, stop) = memmgr.malloc(minsize, maxsize)
- if minsize <= 8:
- assert (start, stop) == (10, 18)
- else:
- assert (start, stop) == (20, 30)
- memmgr._add_free_block(start, stop)
+ for size in range(1, 11):
+ (start, stop) = memmgr.malloc_code(size)
+ if size <= 8:
+ assert (start, stop) == (10, 18)
+ else:
+ assert (start, stop) == (20, 30)
+ memmgr._add_free_block(start, stop)
memmgr._add_free_block(40, 49)
- (start, stop) = memmgr.malloc(10, 10)
+ (start, stop) = memmgr.malloc_code(10)
assert (start, stop) == (20, 30)
def test_malloc_with_fragment():
@@ -80,7 +78,7 @@
memmgr = AsmMemoryManager(min_fragment=8,
num_indices=5)
memmgr._add_free_block(12, 44)
- (start, stop) = memmgr.malloc(reqsize, reqsize)
+ (start, stop) = memmgr.malloc_code(reqsize)
if reqsize + 8 <= 32:
assert (start, stop) == (12, 12 + reqsize)
assert memmgr.free_blocks == {stop: 44}
@@ -108,7 +106,7 @@
for i in range(100):
while self.asmmemmgr.total_memory_allocated < 16384:
reqsize = random.randrange(1, 200)
- (start, stop) = self.asmmemmgr.malloc(reqsize, reqsize)
+ (start, stop) = self.asmmemmgr.malloc_code(reqsize)
assert reqsize <= stop - start < reqsize + 8
assert self.asmmemmgr.total_memory_allocated in [8192, 16384]
self.teardown_method(None)
@@ -124,19 +122,15 @@
if got and (random.random() < 0.4 or len(got) == 1000):
# free
start, stop = got.pop(random.randrange(0, len(got)))
- self.asmmemmgr.free(start, stop)
+ self.asmmemmgr.free_code(start, stop)
real_use -= (stop - start)
assert real_use >= 0
#
else:
# allocate
reqsize = random.randrange(1, 200)
- if random.random() < 0.5:
- reqmaxsize = reqsize
- else:
- reqmaxsize = reqsize + random.randrange(0, 200)
- (start, stop) = self.asmmemmgr.malloc(reqsize, reqmaxsize)
- assert reqsize <= stop - start < reqmaxsize + 8
+ (start, stop) = self.asmmemmgr.malloc_code(reqsize)
+ assert reqsize <= stop - start < reqsize + 8
for otherstart, otherstop in got: # no overlap
assert otherstop <= start or stop <= otherstart
got.append((start, stop))
@@ -182,11 +176,11 @@
assert p[3] == 'y'
assert p[4] == 'Z'
assert p[5] == 'z'
- # 'allblocks' should be one block of length 6 + 15
- # (15 = alignment - 1) containing the range(rawstart, rawstart + 6)
- [(blockstart, blockend)] = allblocks
- assert blockend == blockstart + 6 + (mc.ALIGN_MATERIALIZE - 1)
- assert blockstart <= rawstart < rawstart + 6 <= blockend
+ # 'allblocks' should be one block of length 16 (= 6 rounded up)
+ # starting at 'rawstart'
+ [blockstart, blockend] = allblocks
+ assert blockend == blockstart + 15 # is odd, one less than real stop
+ assert blockstart == rawstart
assert puts == [(rawstart + 2, ['a', 'b', 'c', 'd']),
(rawstart + 4, ['e', 'f', 'g'])]
@@ -232,41 +226,3 @@
def test_blockbuildermixin2():
test_blockbuildermixin(translated=False)
-
-def test_machinedatablock():
- ops = []
- class FakeMemMgr:
- _addr = 1597
- def open_malloc(self, minsize):
- result = (self._addr, self._addr + 100)
- ops.append(('malloc', minsize) + result)
- self._addr += 200
- return result
- def open_free(self, frm, to):
- ops.append(('free', frm, to))
- return to - frm >= 8
- #
- allblocks = []
- md = MachineDataBlockWrapper(FakeMemMgr(), allblocks)
- p = md.malloc_aligned(26, 16)
- assert p == 1600
- assert ops == [('malloc', 26 + 15, 1597, 1697)]
- del ops[:]
- #
- p = md.malloc_aligned(26, 16)
- assert p == 1632
- p = md.malloc_aligned(26, 16)
- assert p == 1664
- assert allblocks == []
- assert ops == []
- #
- p = md.malloc_aligned(27, 16)
- assert p == 1808
- assert allblocks == [(1597, 1697)]
- assert ops == [('free', 1690, 1697),
- ('malloc', 27 + 15, 1797, 1897)]
- del ops[:]
- #
- md.done()
- assert allblocks == [(1597, 1697), (1797, 1835)]
- assert ops == [('free', 1835, 1897)]
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -287,6 +287,13 @@
asmmemmgr_blocks = None
asmmemmgr_gcreftracers = None
+ # asmmemmgr_blocks is a list of integers with the following structure:
+ # - a pointer, which is an even number
+ # - optionally an end-pointer minus 1, which is an odd number
+ # If a pointer is followed by a size, together they describe the
+ # start and stop of a piece of code. In the other case, it is
+ # simply a piece of data that must be free()d.
+
def __init__(self, cpu, number):
cpu.tracker.total_compiled_loops += 1
self.cpu = cpu
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -4,7 +4,6 @@
from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler, debug_bridge)
-from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.jit.metainterp.history import (Const, VOID, ConstInt)
from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
@@ -62,12 +61,14 @@
self.malloc_slowpath_varsize = 0
self.wb_slowpath = [0, 0, 0, 0, 0]
self.setup_failure_recovery()
- self.datablockwrapper = None
self.stack_check_slowpath = 0
self.propagate_exception_path = 0
self.teardown()
def setup_once(self):
+ # make a list that will be forgotten at the first setup(), for
+ # allocating the few immortal pieces of data now
+ self.allblocks = []
BaseAssembler.setup_once(self)
if self.cpu.supports_floats:
support.ensure_sse2_floats()
@@ -82,13 +83,9 @@
self.pending_memoryerror_trampoline_from = []
self.error_trampoline_64 = 0
self.mc = codebuf.MachineCodeBlockWrapper()
- #assert self.datablockwrapper is None --- but obscure case
- # possible, e.g. getting MemoryError and continuing
- allblocks = self.get_asmmemmgr_blocks(looptoken)
- self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
- allblocks)
self.target_tokens_currently_compiling = {}
self.frame_depth_to_patch = []
+ self.allblocks = self.get_asmmemmgr_blocks(looptoken)
def teardown(self):
self.pending_guard_tokens = None
@@ -96,6 +93,7 @@
self.pending_memoryerror_trampoline_from = None
self.mc = None
self.current_clt = None
+ self.allblocks = None
def _build_float_constants(self):
# 0x80000000000000008000000000000000
@@ -111,9 +109,7 @@
data = neg_const + abs_const + \
single_neg_const + single_abs_const + \
zero_const
- datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
- float_constants = datablockwrapper.malloc_aligned(len(data), alignment=16)
- datablockwrapper.done()
+ float_constants = self.malloc_aligned(len(data), alignment=16)
addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
for i in range(len(data)):
addr[i] = data[i]
@@ -484,8 +480,7 @@
if self.cpu.HAS_CODEMAP:
self.codemap_builder.enter_portal_frame(jd_id, unique_id,
self.mc.get_relative_pos())
- frame_info = self.datablockwrapper.malloc_aligned(
- jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
+ frame_info = self.malloc_aligned(jitframe.JITFRAMEINFO_SIZE)
clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
clt.frame_info.clear() # for now
@@ -647,11 +642,7 @@
assert isinstance(faildescr, ResumeGuardDescr)
assert asminfo.rawstart != 0
self.mc = codebuf.MachineCodeBlockWrapper()
- allblocks = self.get_asmmemmgr_blocks(looptoken)
- self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
- allblocks)
- frame_info = self.datablockwrapper.malloc_aligned(
- jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
+ frame_info = self.malloc_aligned(jitframe.JITFRAMEINFO_SIZE)
self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
# if accumulation is saved at the guard, we need to update it here!
@@ -717,8 +708,7 @@
elif IS_X86_32:
# allocate the gc table right now. This lets us write
# machine code with absolute 32-bit addressing.
- self.gc_table_addr = self.datablockwrapper.malloc_aligned(
- gcref_table_size, alignment=WORD)
+ self.gc_table_addr = self.malloc_aligned(gcref_table_size)
#
self.setup_gcrefs_list(allgcrefs)
@@ -854,11 +844,8 @@
mc.copy_to_raw_memory(adr)
def materialize_loop(self, looptoken):
- self.datablockwrapper.done() # finish using cpu.asmmemmgr
- self.datablockwrapper = None
- allblocks = self.get_asmmemmgr_blocks(looptoken)
size = self.mc.get_relative_pos()
- res = self.mc.materialize(self.cpu, allblocks,
+ res = self.mc.materialize(self.cpu, self.allblocks,
self.cpu.gc_ll_descr.gcrootmap)
if self.cpu.HAS_CODEMAP:
self.cpu.codemap.register_codemap(
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -68,13 +68,13 @@
save_around_call_regs = all_regs
def convert_to_imm(self, c):
- adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
+ adr = self.assembler.malloc_aligned(8, 8)
x = c.getfloatstorage()
rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
return ConstFloatLoc(adr)
def convert_to_imm_16bytes_align(self, c):
- adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
+ adr = self.assembler.malloc_aligned(16, 16)
x = c.getfloatstorage()
y = longlong.ZEROF
rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
@@ -89,14 +89,14 @@
return loc
def expand_double_float(self, f):
- adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
+ adr = self.assembler.malloc_aligned(16, 16)
fs = f.getfloatstorage()
rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = fs
rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = fs
return ConstFloatLoc(adr)
def expand_single_float(self, f):
- adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
+ adr = self.assembler.malloc_aligned(16, 16)
fs = rffi.cast(lltype.SingleFloat, f.getfloatstorage())
rffi.cast(rffi.CArrayPtr(lltype.SingleFloat), adr)[0] = fs
rffi.cast(rffi.CArrayPtr(lltype.SingleFloat), adr)[1] = fs
More information about the pypy-commit
mailing list