[pypy-commit] pypy ppc-jit-backend: Applied more code of the ARM backend to PPC backend.
hager
noreply at buildbot.pypy.org
Mon Sep 12 16:04:37 CEST 2011
Author: hager <sven.hager at uni-duesseldorf.de>
Branch: ppc-jit-backend
Changeset: r47228:5fd785b4cb83
Date: 2011-09-12 16:03 +0200
http://bitbucket.org/pypy/pypy/changeset/5fd785b4cb83/
Log: Applied more code of the ARM backend to PPC backend.
diff --git a/pypy/jit/backend/ppc/ppcgen/condition.py b/pypy/jit/backend/ppc/ppcgen/condition.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/ppc/ppcgen/condition.py
@@ -0,0 +1,1 @@
+LE = 0
diff --git a/pypy/jit/backend/ppc/ppcgen/helper/__init__.py b/pypy/jit/backend/ppc/ppcgen/helper/__init__.py
new file mode 100644
diff --git a/pypy/jit/backend/ppc/ppcgen/helper/assembler.py b/pypy/jit/backend/ppc/ppcgen/helper/assembler.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/ppc/ppcgen/helper/assembler.py
@@ -0,0 +1,17 @@
+import pypy.jit.backend.ppc.ppcgen.condition as c
+
+def gen_emit_cmp_op(condition):
+ def f(self, op, arglocs, regalloc):
+ l0, l1, res = arglocs
+ if l1.is_imm():
+ self.cmpwi(0, l0.value, l1.value)
+ else:
+ self.cmpw(0, l0.value, l1.value)
+
+ if condition == c.LE:
+ self.cror(0, 0, 2)
+
+ resval = res.value
+ self.mfcr(resval)
+ self.rlwinm(resval, resval, 1, 31, 31)
+ return f
diff --git a/pypy/jit/backend/ppc/ppcgen/helper/regalloc.py b/pypy/jit/backend/ppc/ppcgen/helper/regalloc.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/ppc/ppcgen/helper/regalloc.py
@@ -0,0 +1,23 @@
+from pypy.jit.metainterp.history import ConstInt
+
+def _check_imm_arg(arg):
+ return isinstance(arg, ConstInt)
+
+def prepare_cmp_op():
+ def f(self, op):
+ boxes = op.getarglist()
+ arg0, arg1 = boxes
+ imm_a0 = _check_imm_arg(arg0)
+ imm_a1 = _check_imm_arg(arg1)
+ l0, box = self._ensure_value_is_boxed(arg0, forbidden_vars=boxes)
+ boxes.append(box)
+ if imm_a1 and not imm_a0:
+ l1 = self.make_sure_var_in_reg(arg1, boxes)
+ else:
+ l1, box = self._ensure_value_is_boxed(arg1, forbidden_vars=boxes)
+ boxes.append(box)
+ self.possibly_free_vars(boxes)
+ res = self.force_allocate_reg(op.result)
+ self.possibly_free_var(op.result)
+ return [l0, l1, res]
+ return f
diff --git a/pypy/jit/backend/ppc/ppcgen/jump.py b/pypy/jit/backend/ppc/ppcgen/jump.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/ppc/ppcgen/jump.py
@@ -0,0 +1,110 @@
+# ../x86/jump.py
+# XXX combine with ../x86/jump.py and move to llsupport
+import sys
+from pypy.tool.pairtype import extendabletype
+
+def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
+ pending_dests = len(dst_locations)
+ srccount = {} # maps dst_locations to how many times the same
+ # location appears in src_locations
+ for dst in dst_locations:
+ key = dst.as_key()
+ assert key not in srccount, "duplicate value in dst_locations!"
+ srccount[key] = 0
+ for i in range(len(dst_locations)):
+ src = src_locations[i]
+ if src.is_imm():
+ continue
+ key = src.as_key()
+ if key in srccount:
+ if key == dst_locations[i].as_key():
+ srccount[key] = -sys.maxint # ignore a move "x = x"
+ pending_dests -= 1
+ else:
+ srccount[key] += 1
+
+ while pending_dests > 0:
+ progress = False
+ for i in range(len(dst_locations)):
+ dst = dst_locations[i]
+ key = dst.as_key()
+ if srccount[key] == 0:
+ srccount[key] = -1 # means "it's done"
+ pending_dests -= 1
+ src = src_locations[i]
+ if not src.is_imm():
+ key = src.as_key()
+ if key in srccount:
+ srccount[key] -= 1
+ _move(assembler, src, dst, tmpreg)
+ progress = True
+ if not progress:
+ # we are left with only pure disjoint cycles
+ sources = {} # maps dst_locations to src_locations
+ for i in range(len(dst_locations)):
+ src = src_locations[i]
+ dst = dst_locations[i]
+ sources[dst.as_key()] = src
+ #
+ for i in range(len(dst_locations)):
+ dst = dst_locations[i]
+ originalkey = dst.as_key()
+ if srccount[originalkey] >= 0:
+ assembler.regalloc_push(dst)
+ while True:
+ key = dst.as_key()
+ assert srccount[key] == 1
+ # ^^^ because we are in a simple cycle
+ srccount[key] = -1
+ pending_dests -= 1
+ src = sources[key]
+ if src.as_key() == originalkey:
+ break
+ _move(assembler, src, dst, tmpreg)
+ dst = src
+ assembler.regalloc_pop(dst)
+ assert pending_dests == 0
+
+def _move(assembler, src, dst, tmpreg):
+ if dst.is_stack() and src.is_stack():
+ assembler.regalloc_mov(src, tmpreg)
+ src = tmpreg
+ assembler.regalloc_mov(src, dst)
+
+def remap_frame_layout_mixed(assembler,
+ src_locations1, dst_locations1, tmpreg1,
+ src_locations2, dst_locations2, tmpreg2):
+ # find and push the xmm stack locations from src_locations2 that
+ # are going to be overwritten by dst_locations1
+ from pypy.jit.backend.ppc.ppcgen.arch import WORD
+ extrapushes = []
+ dst_keys = {}
+ for loc in dst_locations1:
+ dst_keys[loc.as_key()] = None
+ src_locations2red = []
+ dst_locations2red = []
+ for i in range(len(src_locations2)):
+ loc = src_locations2[i]
+ dstloc = dst_locations2[i]
+ if loc.is_stack():
+ key = loc.as_key()
+ if (key in dst_keys or (loc.width > WORD and
+ (key + 1) in dst_keys)):
+ assembler.regalloc_push(loc)
+ extrapushes.append(dstloc)
+ continue
+ src_locations2red.append(loc)
+ dst_locations2red.append(dstloc)
+ src_locations2 = src_locations2red
+ dst_locations2 = dst_locations2red
+ #
+ # remap the integer and pointer registers and stack locations
+ remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
+ #
+ # remap the vfp registers and stack locations
+ remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
+ #
+ # finally, pop the extra xmm stack locations
+ while len(extrapushes) > 0:
+ loc = extrapushes.pop()
+ assembler.regalloc_pop(loc)
diff --git a/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py b/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
@@ -8,8 +8,10 @@
from pypy.jit.backend.ppc.ppcgen.symbol_lookup import lookup
from pypy.jit.backend.ppc.ppcgen.arch import (IS_PPC_32, WORD, NONVOLATILES,
GPR_SAVE_AREA)
+from pypy.jit.backend.ppc.ppcgen.helper.assembler import gen_emit_cmp_op
import pypy.jit.backend.ppc.ppcgen.register as r
-from pypy.jit.metainterp.history import Const, ConstPtr
+import pypy.jit.backend.ppc.ppcgen.condition as c
+from pypy.jit.metainterp.history import Const, ConstPtr, LoopToken
from pypy.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
from pypy.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
from pypy.jit.backend.llsupport.regalloc import (RegisterManager,
@@ -976,7 +978,40 @@
self._save_nonvolatiles()
def _make_epilogue(self):
- self._restore_nonvolatiles()
+ for op_index, fail_index, guard, reglist in self.patch_list:
+ curpos = self.get_relative_pos()
+ offset = curpos - (4 * op_index)
+ assert (1 << 15) > offset
+ self.beq(offset)
+ self.patch_op(op_index)
+
+ # store return parameters in memory
+ used_mem_indices = []
+ for index, reg in enumerate(reglist):
+ # if reg is None, then there is a hole in the failargs
+ if reg is not None:
+ addr = self.fail_boxes_int.get_addr_for_num(index)
+ self.store_reg(reg, addr)
+ used_mem_indices.append(index)
+
+ patch_op = self.get_number_of_ops()
+ patch_pos = self.get_relative_pos()
+ descr = self.cpu.saved_descr[fail_index]
+ descr.patch_op = patch_op
+ descr.patch_pos = patch_pos
+ descr.used_mem_indices = used_mem_indices
+
+ self._restore_nonvolatiles()
+
+ self.lwz(0, 1, self.framesize + 4)
+ if IS_PPC_32:
+ self.lwz(0, 1, self.framesize + WORD) # 36
+ else:
+ self.ld(0, 1, self.framesize + WORD) # 36
+ self.mtlr(0)
+ self.addi(1, 1, self.framesize)
+ self.li(r.r3.value, fail_index)
+ self.blr()
def gen_bootstrap_code(self, nonfloatlocs, inputargs):
for i in range(len(nonfloatlocs)):
@@ -997,6 +1032,9 @@
def assemble_loop(self, inputargs, operations, looptoken, log):
self.framesize = 256 + GPR_SAVE_AREA
+ self.patch_list = []
+ self.startpos = self.get_relative_pos()
+
clt = CompiledLoopToken(self.cpu, looptoken.number)
looptoken.compiled_loop_token = clt
@@ -1007,7 +1045,15 @@
self._make_prologue()
nonfloatlocs = regalloc.prepare_loop(inputargs, operations, looptoken)
self.gen_bootstrap_code(nonfloatlocs, inputargs)
+
+ looptoken._ppc_loop_code = self.get_relative_pos()
+ looptoken._ppc_arglocs = [nonfloatlocs]
+ looptoken._ppc_bootstrap_code = 0
+
self._walk_operations(operations, regalloc)
+ self._make_epilogue()
+
+
looptoken.ppc_code = self.assemble()
def _walk_operations(self, operations, regalloc):
@@ -1068,23 +1114,18 @@
if isinstance(arg0, Box):
reg0 = cpu.reg_map[arg0]
else:
- #reg0 = cpu.get_next_register()
box = TempInt()
reg0 = cpu.rm.force_allocate_reg(box)
self.load_word(reg0, arg0.value)
if isinstance(arg1, Box):
reg1 = cpu.reg_map[arg1]
else:
- #reg1 = cpu.get_next_register()
- #reg1 = cpu.rm.force_allocate_reg(arg1)
box = TempInt()
reg1 = cpu.rm.force_allocate_reg(box)
boxed = cpu.rm.make_sure_var_in_reg(box)
self.load_word(reg1, arg1.value)
import pdb; pdb.set_trace()
- #free_reg = cpu.next_free_register
free_reg = cpu.rm.force_allocate_reg(op.result)
-
return free_reg, reg0, reg1
def _int_op_epilog(self, op, cpu, result_reg):
@@ -1092,34 +1133,18 @@
cpu.reg_map[result] = result_reg
cpu.next_free_register += 1
- def _guard_epilog(self, op, cpu):
- fail_descr = op.getdescr()
- fail_index = self._get_identifier_from_descr(fail_descr, cpu)
- fail_descr.index = fail_index
- cpu.saved_descr[fail_index] = fail_descr
- numops = self.get_number_of_ops()
- self.beq(0)
- failargs = op.getfailargs()
- reglist = []
- for failarg in failargs:
- if failarg is None:
- reglist.append(None)
- else:
- reglist.append(cpu.reg_map[failarg])
- cpu.patch_list.append((numops, fail_index, op, reglist))
-
# Fetches the identifier from a descr object.
# If it has no identifier, then an unused identifier
# is generated
# XXX could be overwritten later on, better approach?
- def _get_identifier_from_descr(self, descr, cpu):
+ def _get_identifier_from_descr(self, descr):
try:
identifier = descr.identifier
except AttributeError:
identifier = None
if identifier is not None:
return identifier
- keys = cpu.saved_descr.keys()
+ keys = self.cpu.saved_descr.keys()
if keys == []:
return 1
return max(keys) + 1
@@ -1208,6 +1233,12 @@
else:
self.divdu(free_reg, reg0, reg1)
+ # ****************************************************
+ # * C O M P A R I S O N S T U F F *
+ # ****************************************************
+
+ emit_int_le = gen_emit_cmp_op(c.LE)
+
def emit_int_eq(self, op, cpu, reg0, reg1, free_reg):
self.xor(free_reg, reg0, reg1)
if IS_PPC_32:
@@ -1217,15 +1248,6 @@
self.cntlzd(free_reg, free_reg)
self.srdi(free_reg, free_reg, 6)
- def emit_int_le(self, op, cpu, reg0, reg1, free_reg):
- if IS_PPC_32:
- self.cmpw(7, reg0, reg1)
- else:
- self.cmpd(7, reg0, reg1)
- self.cror(31, 30, 28)
- self.mfcr(free_reg)
- self.rlwinm(free_reg, free_reg, 0, 31, 31)
-
def emit_int_lt(self, op, cpu, reg0, reg1, free_reg):
if IS_PPC_32:
self.cmpw(7, reg0, reg1)
@@ -1575,10 +1597,26 @@
# GUARD OPERATIONS *
#******************************
- def emit_guard_true(self, op, cpu):
- arg0 = op.getarg(0)
- regnum = cpu.reg_map[arg0]
- self.cmpi(0, 1, regnum, 0)
+ def _guard_epilogue(self, op, failargs):
+ fail_descr = op.getdescr()
+ fail_index = self._get_identifier_from_descr(fail_descr)
+ fail_descr.index = fail_index
+ self.cpu.saved_descr[fail_index] = fail_descr
+ numops = self.get_number_of_ops()
+ self.beq(0)
+ reglist = []
+ for failarg in failargs:
+ if failarg is None:
+ reglist.append(None)
+ else:
+ reglist.append(failarg)
+ self.patch_list.append((numops, fail_index, op, reglist))
+
+ def emit_guard_true(self, op, arglocs, regalloc):
+ l0 = arglocs[0]
+ failargs = arglocs[1:]
+ self.cmpi(l0.value, 0)
+ self._guard_epilogue(op, failargs)
def emit_guard_false(self, op, cpu):
arg0 = op.getarg(0)
@@ -1677,7 +1715,7 @@
def emit_finish(self, op, arglocs, regalloc):
descr = op.getdescr()
- identifier = self._get_identifier_from_descr(descr, self.cpu)
+ identifier = self._get_identifier_from_descr(descr)
self.cpu.saved_descr[identifier] = descr
args = op.getarglist()
for index, arg in enumerate(arglocs):
@@ -1697,14 +1735,14 @@
self.load_imm(r.r3, identifier)
self.blr()
- def emit_jump(self, op, cpu):
- for index, arg in enumerate(op.getarglist()):
- target = index + 3
- regnum = cpu.reg_map[arg]
- self.mr(target, regnum)
-
- offset = self.get_relative_pos()
- self.b(-offset + cpu.startpos)
+ def emit_jump(self, op, arglocs, regalloc):
+ descr = op.getdescr()
+ assert isinstance(descr, LoopToken)
+ if descr._ppc_bootstrap_code == 0:
+ curpos = self.get_relative_pos()
+ self.b(descr._ppc_loop_code - curpos)
+ else:
+ assert 0, "case not implemented yet"
class BranchUpdater(PPCAssembler):
def __init__(self):
diff --git a/pypy/jit/backend/ppc/ppcgen/regalloc.py b/pypy/jit/backend/ppc/ppcgen/regalloc.py
--- a/pypy/jit/backend/ppc/ppcgen/regalloc.py
+++ b/pypy/jit/backend/ppc/ppcgen/regalloc.py
@@ -2,7 +2,11 @@
TempBox, compute_vars_longevity,
compute_loop_consts)
from pypy.jit.backend.ppc.ppcgen.arch import (WORD, MY_COPY_OF_REGS)
-from pypy.jit.metainterp.history import INT, REF, Const, ConstInt, ConstPtr
+from pypy.jit.backend.ppc.ppcgen.jump import remap_frame_layout_mixed
+from pypy.jit.backend.ppc.ppcgen.helper.regalloc import (_check_imm_arg,
+ prepare_cmp_op)
+from pypy.jit.metainterp.history import (INT, REF, FLOAT, Const, ConstInt,
+ ConstPtr, LoopToken)
from pypy.jit.metainterp.resoperation import rop
from pypy.jit.backend.ppc.ppcgen import locations
from pypy.rpython.lltypesystem import rffi, lltype
@@ -133,9 +137,6 @@
def next_instruction(self):
self.rm.next_instruction()
- def _check_imm_arg(self, arg):
- return isinstance(arg, ConstInt)
-
def _ensure_value_is_boxed(self, thing, forbidden_vars=[]):
box = None
loc = None
@@ -165,8 +166,8 @@
def prepare_int_add(self, op):
boxes = op.getarglist()
b0, b1 = boxes
- imm_b0 = self._check_imm_arg(b0)
- imm_b1 = self._check_imm_arg(b1)
+ imm_b0 = _check_imm_arg(b0)
+ imm_b1 = _check_imm_arg(b1)
if not imm_b0 and imm_b1:
l0, box = self._ensure_value_is_boxed(b0)
l1 = self.make_sure_var_in_reg(b1, [b0])
@@ -180,7 +181,6 @@
boxes.append(box)
l1, box = self._ensure_value_is_boxed(b1, [box])
boxes.append(box)
- #return [l0, l1], boxes
locs = [l0, l1]
self.possibly_free_vars(boxes)
res = self.force_allocate_reg(op.result)
@@ -198,6 +198,40 @@
args.append(None)
return args
+ def _prepare_guard(self, op, args=None):
+ if args is None:
+ args = []
+ for arg in op.getfailargs():
+ if arg:
+ args.append(self.loc(arg))
+ else:
+ args.append(None)
+ return args
+
+ def prepare_guard_true(self, op):
+ l0, box = self._ensure_value_is_boxed(op.getarg(0))
+ args = self._prepare_guard(op, [l0])
+ self.possibly_free_var(box)
+ self.possibly_free_vars(op.getfailargs())
+ return args
+
+ def prepare_jump(self, op):
+ descr = op.getdescr()
+ assert isinstance(descr, LoopToken)
+ nonfloatlocs = descr._ppc_arglocs[0]
+
+ tmploc = r.r0
+ src_locs1 = [self.loc(op.getarg(i)) for i in range(op.numargs())
+ if op.getarg(i).type != FLOAT]
+ assert tmploc not in nonfloatlocs
+ dst_locs1 = [loc for loc in nonfloatlocs if loc is not None]
+ remap_frame_layout_mixed(self.assembler,
+ src_locs1, dst_locs1, tmploc,
+ [], [], None)
+ return []
+
+ prepare_int_le = prepare_cmp_op()
+
def make_operation_list():
def not_implemented(self, op, *args):
raise NotImplementedError, op
More information about the pypy-commit
mailing list