[pypy-svn] r68481 - in pypy/trunk/pypy: jit/backend/llsupport jit/backend/x86 jit/backend/x86/test rpython rpython/lltypesystem rpython/memory/gc rpython/memory/gctransform rpython/memory/test
fijal at codespeak.net
fijal at codespeak.net
Thu Oct 15 11:31:51 CEST 2009
Author: fijal
Date: Thu Oct 15 11:31:51 2009
New Revision: 68481
Modified:
pypy/trunk/pypy/jit/backend/llsupport/gc.py
pypy/trunk/pypy/jit/backend/x86/assembler.py
pypy/trunk/pypy/jit/backend/x86/regalloc.py
pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py
pypy/trunk/pypy/rpython/llinterp.py
pypy/trunk/pypy/rpython/lltypesystem/lloperation.py
pypy/trunk/pypy/rpython/memory/gc/generation.py
pypy/trunk/pypy/rpython/memory/gctransform/framework.py
pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py
Log:
(fijal, arigo)
Merge the inline-fastpath-malloc branch.
This branch inlines the fastpath of malloc_fixedsize_clear from generation GC
directly into assembler, speeding up mallocing in nursery, when objects
don't have finalizers.
Modified: pypy/trunk/pypy/jit/backend/llsupport/gc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/llsupport/gc.py (original)
+++ pypy/trunk/pypy/jit/backend/llsupport/gc.py Thu Oct 15 11:31:51 2009
@@ -12,6 +12,7 @@
from pypy.jit.backend.llsupport.descr import GcCache, get_field_descr
from pypy.jit.backend.llsupport.descr import GcPtrFieldDescr
from pypy.jit.backend.llsupport.descr import get_call_descr
+from pypy.rlib.rarithmetic import r_ulonglong, r_uint
# ____________________________________________________________
@@ -27,6 +28,8 @@
pass
def rewrite_assembler(self, cpu, operations):
pass
+ def can_inline_malloc(self, descr):
+ return False
# ____________________________________________________________
@@ -341,6 +344,8 @@
lltype.Void)
(self.array_basesize, _, self.array_length_ofs) = \
symbolic.get_array_token(lltype.GcArray(lltype.Signed), True)
+ min_ns = self.GCClass.TRANSLATION_PARAMS['min_nursery_size']
+ self.max_size_of_young_obj = self.GCClass.get_young_fixedsize(min_ns)
# make a malloc function, with three arguments
def malloc_basic(size, tid):
@@ -391,6 +396,28 @@
self.malloc_unicode = malloc_unicode
self.GC_MALLOC_STR_UNICODE = lltype.Ptr(lltype.FuncType(
[lltype.Signed], llmemory.GCREF))
+ def malloc_fixedsize_slowpath(size):
+ gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+ 0, size, True, False, False)
+ res = rffi.cast(lltype.Signed, gcref)
+ nurs_free = llop1.gc_adr_of_nursery_free(llmemory.Address).signed[0]
+ return r_ulonglong(nurs_free) << 32 | r_ulonglong(r_uint(res))
+ self.malloc_fixedsize_slowpath = malloc_fixedsize_slowpath
+ self.MALLOC_FIXEDSIZE_SLOWPATH = lltype.FuncType([lltype.Signed],
+ lltype.UnsignedLongLong)
+
+ def get_nursery_free_addr(self):
+ nurs_addr = llop.gc_adr_of_nursery_free(llmemory.Address)
+ return rffi.cast(lltype.Signed, nurs_addr)
+
+ def get_nursery_top_addr(self):
+ nurs_top_addr = llop.gc_adr_of_nursery_top(llmemory.Address)
+ return rffi.cast(lltype.Signed, nurs_top_addr)
+
+ def get_malloc_fixedsize_slowpath_addr(self):
+ fptr = llhelper(lltype.Ptr(self.MALLOC_FIXEDSIZE_SLOWPATH),
+ self.malloc_fixedsize_slowpath)
+ return rffi.cast(lltype.Signed, fptr)
def initialize(self):
self.gcrefs.initialize()
@@ -519,6 +546,15 @@
newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
descr=self.calldescr_jit_wb))
+ def can_inline_malloc(self, descr):
+ assert isinstance(descr, BaseSizeDescr)
+ if descr.size < self.max_size_of_young_obj:
+ has_finalizer = bool(descr.tid & (1<<16))
+ if has_finalizer:
+ return False
+ return True
+ return False
+
# ____________________________________________________________
def get_ll_description(gcdescr, translator=None):
Modified: pypy/trunk/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/assembler.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/assembler.py Thu Oct 15 11:31:51 2009
@@ -523,7 +523,10 @@
arglocs = arglocs[:-1]
self.call(self.malloc_func_addr, arglocs, eax)
# xxx ignore NULL returns for now
- self.mc.MOV(mem(eax, self.cpu.vtable_offset), loc_vtable)
+ self.set_vtable(eax, loc_vtable)
+
+ def set_vtable(self, loc, loc_vtable):
+ self.mc.MOV(mem(loc, self.cpu.vtable_offset), loc_vtable)
# XXX genop_new is abused for all varsized mallocs with Boehm, for now
# (instead of genop_new_array, genop_newstr, genop_newunicode)
@@ -902,8 +905,30 @@
def closing_jump(self, loop_token):
self.mc.JMP(rel32(loop_token._x86_loop_code))
-
+ def malloc_cond_fixedsize(self, nursery_free_adr, nursery_top_adr,
+ size, tid, slowpath_addr):
+ # don't use self.mc
+ mc = self.mc._mc
+ mc.MOV(eax, heap(nursery_free_adr))
+ mc.LEA(edx, addr_add(eax, imm(size)))
+ mc.CMP(edx, heap(nursery_top_adr))
+ mc.write('\x76\x00') # JNA after the block
+ jmp_adr = mc.get_relative_pos()
+ mc.PUSH(imm(size))
+ mc.CALL(rel32(slowpath_addr))
+ self.mark_gc_roots()
+ # note that slowpath_addr returns a "long long", or more precisely
+ # two results, which end up in eax and edx.
+ # eax should contain the result of allocation, edx new value
+ # of nursery_free_adr
+ mc.ADD(esp, imm(4))
+ offset = mc.get_relative_pos() - jmp_adr
+ assert 0 < offset <= 127
+ mc.overwrite(jmp_adr-1, chr(offset))
+ mc.MOV(addr_add(eax, imm(0)), imm(tid))
+ mc.MOV(heap(nursery_free_adr), edx)
+
genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
genop_list = [Assembler386.not_implemented_op] * rop._LAST
genop_guard_list = [Assembler386.not_implemented_op_guard] * rop._LAST
Modified: pypy/trunk/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/regalloc.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/regalloc.py Thu Oct 15 11:31:51 2009
@@ -14,7 +14,7 @@
from pypy.jit.backend.x86.jump import remap_stack_layout
from pypy.jit.metainterp.resoperation import rop
from pypy.jit.backend.llsupport.descr import BaseFieldDescr, BaseArrayDescr
-from pypy.jit.backend.llsupport.descr import BaseCallDescr
+from pypy.jit.backend.llsupport.descr import BaseCallDescr, BaseSizeDescr
from pypy.jit.backend.llsupport.regalloc import StackManager, RegisterManager,\
TempBox
@@ -623,18 +623,52 @@
self.PerformDiscard(op, arglocs)
self.rm.possibly_free_vars(op.args)
+ def _fastpath_malloc(self, op, descr):
+ assert isinstance(descr, BaseSizeDescr)
+ gc_ll_descr = self.assembler.cpu.gc_ll_descr
+ tmp0 = TempBox()
+ self.rm.force_allocate_reg(op.result, selected_reg=eax)
+ self.rm.force_allocate_reg(tmp0, selected_reg=edx)
+ for v, reg in self.rm.reg_bindings.items():
+ if reg is ecx:
+ to_sync = v
+ break
+ else:
+ to_sync = None
+ if to_sync is not None:
+ self.rm._sync_var(to_sync)
+ del self.rm.reg_bindings[to_sync]
+ self.rm.free_regs.append(ecx)
+ # we need to do it here, so edx is not in reg_bindings
+ self.rm.possibly_free_var(tmp0)
+ self.assembler.malloc_cond_fixedsize(
+ gc_ll_descr.get_nursery_free_addr(),
+ gc_ll_descr.get_nursery_top_addr(),
+ descr.size, descr.tid,
+ gc_ll_descr.get_malloc_fixedsize_slowpath_addr(),
+ )
+
def consider_new(self, op, ignored):
- args = self.assembler.cpu.gc_ll_descr.args_for_new(op.descr)
- arglocs = [imm(x) for x in args]
- return self._call(op, arglocs)
+ gc_ll_descr = self.assembler.cpu.gc_ll_descr
+ if gc_ll_descr.can_inline_malloc(op.descr):
+ self._fastpath_malloc(op, op.descr)
+ else:
+ args = gc_ll_descr.args_for_new(op.descr)
+ arglocs = [imm(x) for x in args]
+ return self._call(op, arglocs)
def consider_new_with_vtable(self, op, ignored):
classint = op.args[0].getint()
descrsize = self.assembler.cpu.class_sizes[classint]
- args = self.assembler.cpu.gc_ll_descr.args_for_new(descrsize)
- arglocs = [imm(x) for x in args]
- arglocs.append(self.loc(op.args[0]))
- return self._call(op, arglocs)
+ if self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize):
+ self._fastpath_malloc(op, descrsize)
+ self.assembler.set_vtable(eax, imm(classint))
+ # result of fastpath malloc is in eax
+ else:
+ args = self.assembler.cpu.gc_ll_descr.args_for_new(descrsize)
+ arglocs = [imm(x) for x in args]
+ arglocs.append(self.loc(op.args[0]))
+ return self._call(op, arglocs)
def consider_newstr(self, op, ignored):
gc_ll_descr = self.assembler.cpu.gc_ll_descr
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py Thu Oct 15 11:31:51 2009
@@ -7,6 +7,7 @@
BoxPtr, ConstPtr, TreeLoop
from pypy.jit.metainterp.resoperation import rop, ResOperation
from pypy.jit.backend.llsupport.descr import GcCache
+from pypy.jit.backend.llsupport.gc import GcLLDescription
from pypy.jit.backend.x86.runner import CPU
from pypy.jit.backend.x86.regalloc import RegAlloc, WORD
from pypy.jit.metainterp.test.oparser import parse
@@ -20,6 +21,7 @@
from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86StackManager,\
X86XMMRegisterManager
+from pypy.rpython.annlowlevel import llhelper
class MockGcRootMap(object):
def get_basic_shape(self):
@@ -159,3 +161,128 @@
jump(i0, i1, 1, 17, i4, ConstPtr(ptr0), i6, i7, i24)
'''
self.interpret(ops, [0, 0, 0, 0, 0, 0, 0, 0, 0], run=False)
+
+class GCDescrFastpathMalloc(GcLLDescription):
+ gcrootmap = None
+
+ def __init__(self):
+ GcCache.__init__(self, False)
+ # create a nursery
+ NTP = rffi.CArray(lltype.Signed)
+ self.nursery = lltype.malloc(NTP, 16, flavor='raw')
+ self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 2,
+ flavor='raw')
+ self.addrs[0] = rffi.cast(lltype.Signed, self.nursery)
+ self.addrs[1] = self.addrs[0] + 64
+ # 64 bytes
+ def malloc_slowpath(size):
+ from pypy.rlib.rarithmetic import r_ulonglong
+ assert size == 8
+ nadr = rffi.cast(lltype.Signed, self.nursery)
+ self.addrs[0] = 99999 # should be overridden by the caller
+ return ((r_ulonglong(nadr + size) << 32) | # this part in edx
+ r_ulonglong(nadr)) # this part in eax
+ self.malloc_slowpath = malloc_slowpath
+ self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed],
+ lltype.UnsignedLongLong)
+ self._counter = 123
+
+ def can_inline_malloc(self, descr):
+ return True
+
+ def get_funcptr_for_new(self):
+ return 42
+# return llhelper(lltype.Ptr(self.NEW_TP), self.new)
+
+ def init_size_descr(self, S, descr):
+ descr.tid = self._counter
+ self._counter += 1
+
+ def get_nursery_free_addr(self):
+ return rffi.cast(lltype.Signed, self.addrs)
+
+ def get_nursery_top_addr(self):
+ return rffi.cast(lltype.Signed, self.addrs) + 4
+
+ def get_malloc_fixedsize_slowpath_addr(self):
+ fptr = llhelper(lltype.Ptr(self.MALLOC_SLOWPATH), self.malloc_slowpath)
+ return rffi.cast(lltype.Signed, fptr)
+
+ get_funcptr_for_newarray = None
+ get_funcptr_for_newstr = None
+ get_funcptr_for_newunicode = None
+
+class TestMallocFastpath(BaseTestRegalloc):
+
+ def setup_method(self, method):
+ cpu = CPU(None, None)
+ cpu.vtable_offset = 4
+ cpu.gc_ll_descr = GCDescrFastpathMalloc()
+
+ NODE = lltype.Struct('node', ('tid', lltype.Signed),
+ ('value', lltype.Signed))
+ nodedescr = cpu.sizeof(NODE) # xxx hack: NODE is not a GcStruct
+ valuedescr = cpu.fielddescrof(NODE, 'value')
+
+ self.cpu = cpu
+ self.nodedescr = nodedescr
+ vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
+ vtable_int = cpu.cast_adr_to_int(llmemory.cast_ptr_to_adr(vtable))
+ NODE2 = lltype.Struct('node2', ('tid', lltype.Signed),
+ ('vtable', lltype.Ptr(rclass.OBJECT_VTABLE)))
+ descrsize = cpu.sizeof(NODE2)
+ cpu.set_class_sizes({vtable_int: descrsize})
+ self.descrsize = descrsize
+ self.vtable_int = vtable_int
+
+ self.namespace = locals().copy()
+
+ def test_malloc_fastpath(self):
+ ops = '''
+ [i0]
+ p0 = new(descr=nodedescr)
+ setfield_gc(p0, i0, descr=valuedescr)
+ finish(p0)
+ '''
+ self.interpret(ops, [42])
+ # check the nursery
+ gc_ll_descr = self.cpu.gc_ll_descr
+ assert gc_ll_descr.nursery[0] == self.nodedescr.tid
+ assert gc_ll_descr.nursery[1] == 42
+ nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+ assert gc_ll_descr.addrs[0] == nurs_adr + 8
+
+ def test_malloc_slowpath(self):
+ ops = '''
+ []
+ p0 = new(descr=nodedescr)
+ p1 = new(descr=nodedescr)
+ p2 = new(descr=nodedescr)
+ p3 = new(descr=nodedescr)
+ p4 = new(descr=nodedescr)
+ p5 = new(descr=nodedescr)
+ p6 = new(descr=nodedescr)
+ p7 = new(descr=nodedescr)
+ p8 = new(descr=nodedescr)
+ finish(p0, p1, p2, p3, p4, p5, p6, p7, p8)
+ '''
+ self.interpret(ops, [])
+ # this should call slow path once
+ gc_ll_descr = self.cpu.gc_ll_descr
+ nadr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+ assert gc_ll_descr.addrs[0] == nadr + 8
+
+ def test_new_with_vtable(self):
+ ops = '''
+ [i0, i1]
+ p0 = new_with_vtable(ConstClass(vtable))
+ guard_class(p0, ConstClass(vtable)) [i0]
+ finish(i1)
+ '''
+ self.interpret(ops, [0, 1])
+ assert self.getint(0) == 1
+ gc_ll_descr = self.cpu.gc_ll_descr
+ assert gc_ll_descr.nursery[0] == self.descrsize.tid
+ assert gc_ll_descr.nursery[1] == self.vtable_int
+ nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+ assert gc_ll_descr.addrs[0] == nurs_adr + 8
Modified: pypy/trunk/pypy/rpython/llinterp.py
==============================================================================
--- pypy/trunk/pypy/rpython/llinterp.py (original)
+++ pypy/trunk/pypy/rpython/llinterp.py Thu Oct 15 11:31:51 2009
@@ -848,6 +848,12 @@
def op_gc_restore_exception(self, exc):
raise NotImplementedError("gc_restore_exception")
+ def op_gc_adr_of_nursery_top(self):
+ raise NotImplementedError
+
+ def op_gc_adr_of_nursery_free(self):
+ raise NotImplementedError
+
def op_gc_call_rtti_destructor(self, rtti, addr):
if hasattr(rtti._obj, 'destructor_funcptr'):
d = rtti._obj.destructor_funcptr
Modified: pypy/trunk/pypy/rpython/lltypesystem/lloperation.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/lloperation.py (original)
+++ pypy/trunk/pypy/rpython/lltypesystem/lloperation.py Thu Oct 15 11:31:51 2009
@@ -446,6 +446,14 @@
'gc_thread_run' : LLOp(),
'gc_thread_die' : LLOp(),
'gc_assume_young_pointers': LLOp(),
+
+ # ------- JIT & GC interaction, only for some GCs ----------
+
+ 'gc_adr_of_nursery_free' : LLOp(),
+ # ^^^ returns an address of nursery free pointer, for later modifications
+ 'gc_adr_of_nursery_top' : LLOp(),
+ # ^^^ returns an address of pointer, since it can change at runtime
+
# experimental operations in support of thread cloning, only
# implemented by the Mark&Sweep GC
'gc_x_swap_pool': LLOp(canraise=(MemoryError,), canunwindgc=True),
Modified: pypy/trunk/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/generation.py (original)
+++ pypy/trunk/pypy/rpython/memory/gc/generation.py Thu Oct 15 11:31:51 2009
@@ -122,10 +122,12 @@
# a new nursery (e.g. if it invokes finalizers).
self.semispace_collect()
- def get_young_fixedsize(self, nursery_size):
+ @staticmethod
+ def get_young_fixedsize(nursery_size):
return nursery_size // 2 - 1
- def get_young_var_basesize(self, nursery_size):
+ @staticmethod
+ def get_young_var_basesize(nursery_size):
return nursery_size // 4 - 1
def is_in_nursery(self, addr):
Modified: pypy/trunk/pypy/rpython/memory/gctransform/framework.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gctransform/framework.py (original)
+++ pypy/trunk/pypy/rpython/memory/gctransform/framework.py Thu Oct 15 11:31:51 2009
@@ -599,6 +599,28 @@
hop.genop("direct_call", [self.assume_young_pointers_ptr,
self.c_const_gc, v_addr])
+ def gct_gc_adr_of_nursery_free(self, hop):
+ if getattr(self.gcdata.gc, 'nursery_free', None) is None:
+ raise NotImplementedError("gc_adr_of_nursery_free only for generational gcs")
+ op = hop.spaceop
+ ofs = llmemory.offsetof(self.c_const_gc.concretetype.TO,
+ 'inst_nursery_free')
+ c_ofs = rmodel.inputconst(lltype.Signed, ofs)
+ v_gc_adr = hop.genop('cast_ptr_to_adr', [self.c_const_gc],
+ resulttype=llmemory.Address)
+ hop.genop('adr_add', [v_gc_adr, c_ofs], resultvar=op.result)
+
+ def gct_gc_adr_of_nursery_top(self, hop):
+ if getattr(self.gcdata.gc, 'nursery_top', None) is None:
+ raise NotImplementedError("gc_adr_of_nursery_top only for generational gcs")
+ op = hop.spaceop
+ ofs = llmemory.offsetof(self.c_const_gc.concretetype.TO,
+ 'inst_nursery_top')
+ c_ofs = rmodel.inputconst(lltype.Signed, ofs)
+ v_gc_adr = hop.genop('cast_ptr_to_adr', [self.c_const_gc],
+ resulttype=llmemory.Address)
+ hop.genop('adr_add', [v_gc_adr, c_ofs], resultvar=op.result)
+
def _can_realloc(self):
return self.gcdata.gc.can_realloc
Modified: pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py (original)
+++ pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py Thu Oct 15 11:31:51 2009
@@ -1043,6 +1043,27 @@
# * the GcArray pointer from gc.wr_to_objects_with_id
# * the GcArray pointer from gc.object_id_dict.
+ def test_adr_of_nursery(self):
+ class A(object):
+ pass
+
+ def f():
+ # we need at least 1 obj to allocate a nursery
+ a = A()
+ nf_a = llop.gc_adr_of_nursery_free(llmemory.Address)
+ nt_a = llop.gc_adr_of_nursery_top(llmemory.Address)
+ nf0 = nf_a.address[0]
+ nt0 = nt_a.address[0]
+ a0 = A()
+ a1 = A()
+ nf1 = nf_a.address[0]
+ nt1 = nt_a.address[0]
+ assert nf1 > nf0
+ assert nt1 > nf1
+ assert nt1 == nt0
+ run = self.runner(f, nbargs=0)
+ res = run([])
+
class TestGenerationalNoFullCollectGC(GCTest):
# test that nursery is doing its job and that no full collection
# is needed when most allocated objects die quickly
More information about the Pypy-commit
mailing list