[pypy-svn] r50262 - pypy/branch/asmgcroot/pypy/translator/c
arigo at codespeak.net
arigo at codespeak.net
Wed Jan 2 16:32:05 CET 2008
Author: arigo
Date: Wed Jan 2 16:32:05 2008
New Revision: 50262
Modified:
pypy/branch/asmgcroot/pypy/translator/c/trackgcroot.py
Log:
Initial check-in, work in progress.
Modified: pypy/branch/asmgcroot/pypy/translator/c/trackgcroot.py
==============================================================================
--- pypy/branch/asmgcroot/pypy/translator/c/trackgcroot.py (original)
+++ pypy/branch/asmgcroot/pypy/translator/c/trackgcroot.py Wed Jan 2 16:32:05 2008
@@ -15,8 +15,10 @@
r_jmptable_item = re.compile(r"\t.long\t([.]?\w+)\s*$")
r_jmptable_end = re.compile(r"\t.text\s*$")
r_binaryinsn = re.compile(r"\t[a-z]\w*\s+("+OPERAND+"),\s*("+OPERAND+")\s*$")
-r_gcroot_marker = re.compile(r"\t/[*](STORE|LOAD) GCROOT ")
-r_gcroot_op = re.compile(r"\t/[*](STORE|LOAD) GCROOT (\d*)[(]%esp[)][*]/\s*$")
+LOCALVAR = r"%eax|%edx|%ecx|%ebx|%esi|%edi|%ebp|\d*[(]%esp[)]"
+r_gcroot_marker = re.compile(r"\t/[*] GCROOT ("+LOCALVAR+") [*]/")
+r_localvar = re.compile(LOCALVAR)
+r_localvar_esp = re.compile(r"(\d*)[(]%esp[)]")
# for sanity-checking, %esp should only appear as a way to access locals,
# i.e. inside parenthesis, except if explicitly recognized otherwise
@@ -103,7 +105,6 @@
class FunctionGcRootTracker(object):
- VISIT_OPERATION = {}
def __init__(self, lines):
match = r_functionstart.match(lines[0])
@@ -117,13 +118,13 @@
def computegcmaptable(self):
self.findlabels()
- try:
- self.calls = {} # {label_after_call: state}
- self.ignore_calls = {}
- self.missing_labels_after_call = []
- self.follow_control_flow()
- except ReflowCompletely:
- return self.computegcmaptable()
+ self.parse_instructions()
+ self.findframesize()
+ self.fixlocalvars()
+ self.trackgcroots()
+ self.dump()
+ xxx
+ self.follow_control_flow()
table = self.gettable()
self.extend_calls_with_labels()
return table
@@ -151,6 +152,106 @@
self.lines.insert(linenum, '%s:\n' % (label,))
self.lines.insert(linenum, '\t.globl\t%s\n' % (label,))
+ def parse_instructions(self):
+ self.insns = [InsnFunctionStart()]
+ self.linenum = 0
+ in_APP = False
+ for lin in range(1, len(self.lines)):
+ self.linenum = lin
+ insn = no_op
+ line = self.lines[lin]
+ match = r_insn.match(line)
+ if match:
+ if not in_APP:
+ insn = match.group(1)
+ meth = getattr(self, 'visit_' + insn)
+ insn = meth(line)
+ elif r_gcroot_marker.match(line):
+ insn = self.handle_gcroot_marker(line)
+ elif line == '#APP\n':
+ in_APP = True
+ elif line == '#NO_APP\n':
+ in_APP = False
+ self.insns.append(insn)
+ del self.linenum
+
+ def findframesize(self):
+ self.framesize = {0: 0}
+
+ def walker(lin, insn, size_delta):
+ check = deltas.setdefault(lin, size_delta)
+ assert check == size_delta, (
+ "inconsistent frame size at function line %d" % (lin,))
+ if isinstance(insn, InsnStackAdjust):
+ size_delta -= insn.delta
+ if lin not in self.framesize:
+ yield size_delta # continue walking backwards
+
+ for lin, insn in enumerate(self.insns):
+ if insn.requestgcroots():
+ deltas = {}
+ self.walk_instructions_backwards(walker, lin, 0)
+ size_at_insn = []
+ for n in deltas:
+ if n in self.framesize:
+ size_at_insn.append(self.framesize[n] + deltas[n])
+ assert len(size_at_insn) > 0, (
+ "cannot reach the start of the function??")
+ size_at_insn = size_at_insn[0]
+ for n in deltas:
+ size_at_n = size_at_insn - deltas[n]
+ check = self.framesize.setdefault(n, size_at_n)
+ assert check == size_at_n, (
+ "inconsistent frame size at function line %d" % (n,))
+
+ def fixlocalvars(self):
+ for lin, insn in enumerate(self.insns):
+ if lin in self.framesize:
+ for name in insn._locals_:
+ localvar = getattr(insn, name)
+ match = r_localvar_esp.match(localvar)
+ if match:
+ ofs_from_esp = int(match.group(1) or '0')
+ localvar = ofs_from_esp - self.framesize[lin]
+ assert localvar != 0 # that's the return address
+ setattr(insn, name, localvar)
+
+ def trackgcroots(self):
+
+ def walker(lin, insn, loc):
+ source = insn.source_of(loc, tag)
+ if isinstance(source, Value):
+ pass # done
+ else:
+ yield source
+
+ for lin, insn in enumerate(self.insns):
+ for loc, tag in insn.requestgcroots().items():
+ self.walk_instructions_backwards(walker, lin, loc)
+
+ def dump(self):
+ for n, insn in enumerate(self.insns):
+ try:
+ size = self.framesize[n]
+ except (AttributeError, KeyError):
+ size = '?'
+ print '%4s %s' % (size, insn)
+
+ def walk_instructions_backwards(self, walker, initial_line, initial_state):
+ pending = []
+ seen = {}
+ def schedule(line, state):
+ assert 0 <= line < len(self.insns)
+ key = line, state
+ if key not in seen:
+ seen[key] = True
+ pending.append(key)
+ schedule(initial_line, initial_state)
+ while pending:
+ line, state = pending.pop()
+ for prevstate in walker(line, self.insns[line], state):
+ schedule(line - 1, prevstate)
+
def follow_control_flow(self):
# 'states' is a list [(framesize, gcroot0, gcroot1, gcroot2...)]
self.states = [None] * len(self.lines)
@@ -243,88 +344,44 @@
"unreachable call!" + line)
def handle_gcroot_marker(self, line):
- match = r_gcroot_op.match(line)
- op = match.group(1)
- position = int(match.group(2) or '0')
- assert position % 4 == 0
- if op == 'STORE':
- assert position not in self.gcroots
- self.gcroots[position] = None
- elif op == 'LOAD':
- assert position in self.gcroots
- del self.gcroots[position]
- else:
- raise UnrecognizedOperation(line)
+ match = r_gcroot_marker.match(line)
+ loc = match.group(1)
+ return InsnGCROOT(loc)
- def find_visitor(self, insn):
- opname = insn
- while 1:
- try:
- meth = getattr(self.__class__, 'visit_' + opname)
- break
- except AttributeError:
- assert opname
- opname = opname[:-1]
- self.VISIT_OPERATION[insn] = meth
- return meth
-
- def visit_(self, line):
- # fallback for all operations. By default, ignore the operation,
- # unless it appears to do something with %esp
- if not self.can_use_frame_pointer:
- if r_esp_outside_paren.match(line):
- raise UnrecognizedOperation(line)
-
- def visit_push(self, line):
- raise UnrecognizedOperation(line)
-
- def visit_pushl(self, line):
- self.framesize += 4
-
- def visit_pop(self, line):
- raise UnrecognizedOperation(line)
-
- def visit_popl(self, line):
- self.framesize -= 4
- assert self.framesize >= 0, "stack underflow"
-
- def visit_subl(self, line):
+ def visit_addl(self, line, sign=+1):
match = r_binaryinsn.match(line)
- if match.group(2) == '%esp':
+ target = match.group(2)
+ if target == '%esp':
count = match.group(1)
assert count.startswith('$')
- count = int(count[1:])
- assert count % 4 == 0
- self.framesize += count
+ return InsnStackAdjust(sign * int(count[1:]))
+ elif r_localvar.match(target):
+ return InsnSetLocal(Value(), target)
+ else:
+ raise UnrecognizedOperation(line)
- def visit_addl(self, line):
- match = r_binaryinsn.match(line)
- if match.group(2) == '%esp':
- count = match.group(1)
- assert count.startswith('$')
- count = int(count[1:])
- assert count % 4 == 0
- self.framesize -= count
- assert self.framesize >= 0, "stack underflow"
+ def visit_subl(self, line):
+ return self.visit_addl(line, sign=-1)
def visit_movl(self, line):
match = r_binaryinsn.match(line)
- if match.group(1) == '%esp':
- # only for movl %esp, %ebp
- if match.group(2) != '%ebp':
- raise UnrecognizedOperation(line)
- assert self.can_use_frame_pointer # only if we can have a frame ptr
- assert self.framesize == 4 # only %ebp should have been pushed
- elif match.group(2) == '%esp':
- raise UnrecognizedOperation(line)
+ source = match.group(1)
+ target = match.group(2)
+ if r_localvar.match(target):
+ if r_localvar.match(source):
+ return InsnCopyLocal(source, target)
+ else:
+ return InsnSetLocal(Value(), target)
+ elif target == '%esp':
+ raise UnrecognizedOperation
+ else:
+ return no_op
def visit_ret(self, line):
- raise LeaveBasicBlock
-
- def visit_j(self, line):
- raise UnrecognizedOperation(line)
+ return InsnRet()
def visit_jmp(self, line):
+ xxx
if self.in_APP:
return # ignore jumps inside a #APP/#NO_APP block
match = r_jmp_switch.match(line)
@@ -355,6 +412,7 @@
raise LeaveBasicBlock
def conditional_jump(self, line):
+ xxx
if self.in_APP:
return # ignore jumps inside a #APP/#NO_APP block
match = r_jump.match(line)
@@ -380,43 +438,17 @@
visit_jno = conditional_jump
def visit_call(self, line):
- if self.in_APP:
- self.ignore_calls[self.currentlinenum] = None
- return # ignore calls inside a #APP/#NO_APP block
match = r_unaryinsn.match(line)
if match is None:
assert r_unaryinsn_star.match(line) # indirect call
else:
target = match.group(1)
if target in FUNCTIONS_NOT_RETURNING:
- self.ignore_calls[self.currentlinenum] = None
- raise LeaveBasicBlock
- # we need a globally-declared label just after the call.
- # Reuse one if it is already there (e.g. from a previous run of this
- # script); otherwise invent a name and schedule the line insertion.
- label = None
- # this checks for a ".globl NAME" followed by "NAME:"
- match = r_globl.match(self.lines[self.currentlinenum+1])
- if match:
- label1 = match.group(1)
- match = r_label.match(self.lines[self.currentlinenum+2])
- if match:
- label2 = match.group(1)
- if label1 == label2:
- label = label2
- if label is None:
- k = 0
- while 1:
- label = '__gcmap_IN_%s_%d' % (self.funcname, k)
- if label not in self.labels:
- break
- k += 1
- self.labels[label] = self.currentlinenum+1
- self.missing_labels_after_call.append(
- (self.currentlinenum+1, label))
- self.calls[self.currentlinenum] = label, self.getstate()
+ return InsnStop()
+ return InsnCall()
def visit_pypygetframeaddress(self, line):
+ xxx
# this is a pseudo-instruction that is emitted to find the first
# frame address on the stack. We cannot just use
# __builtin_frame_address(0) - apparently, gcc thinks it can
@@ -429,18 +461,121 @@
self.lines[self.currentlinenum] = newline
-class LeaveBasicBlock(Exception):
- pass
-
class UnrecognizedOperation(Exception):
pass
-class ReflowCompletely(Exception):
+
+class Value(object):
+ Count = 0
+ def __repr__(self):
+ try:
+ n = self.n
+ except AttributeError:
+ n = self.n = Value.Count
+ Value.Count += 1
+ return '<Value %d>' % n
+
+class Insn(object):
+ _args_ = []
+ _locals_ = []
+ def __repr__(self):
+ return '%s(%s)' % (self.__class__.__name__,
+ ', '.join([str(getattr(self, name))
+ for name in self._args_]))
+ def requestgcroots(self):
+ return {}
+
+ def source_of(self, localvar, tag):
+ return localvar
+
+class InsnFunctionStart(Insn):
+ def __init__(self):
+ self.arguments = {}
+ for reg in CALLEE_SAVE_REGISTERS:
+ self.arguments[reg] = Value()
+ def source_of(self, localvar, tag):
+ if localvar not in self.arguments:
+ assert isinstance(localvar, int) and localvar > 0, (
+ "must come from an argument to the function, got %r" %
+ (localvar,))
+ self.arguments[localvar] = Value()
+ return self.arguments[localvar]
+
+class NoOp(Insn):
pass
+no_op = NoOp()
-class BogusObject(object):
+class InsnSetLocal(Insn):
+ _args_ = ['value', 'target']
+ _locals_ = ['target']
+ def __init__(self, value, target):
+ assert value is None or isinstance(value, Value)
+ self.value = value
+ self.target = target
+ def source_of(self, localvar, tag):
+ if localvar == self.target:
+ return self.value
+ return localvar
+
+class InsnCopyLocal(Insn):
+ _args_ = ['source', 'target']
+ _locals_ = ['source', 'target']
+ def __init__(self, source, target):
+ self.source = source
+ self.target = target
+ def source_of(self, localvar, tag):
+ if localvar == self.target:
+ return self.source
+ return localvar
+
+class InsnStackAdjust(Insn):
+ _args_ = ['delta']
+ def __init__(self, delta):
+ assert delta % 4 == 0
+ self.delta = delta
+
+class InsnStop(Insn):
pass
-Bogus = BogusObject()
+
+class InsnRet(InsnStop):
+ def requestgcroots(self):
+ return dict(zip(CALLEE_SAVE_REGISTERS, CALLEE_SAVE_REGISTERS))
+
+class InsnCall(Insn):
+ _args_ = ['gcroots']
+ def __init__(self):
+ # 'gcroots' is a dict built by side-effect during the call to
+ # FunctionGcRootTracker.trackgcroots(). Its meaning is as follows:
+ # the keys are the location that contain gc roots (either register
+ # names like '%esi', or negative integer offsets relative to the end
+ # of the function frame). The value corresponding to a key is the
+ # "tag", which is None for a normal gc root, or else the name of a
+ # callee-saved register. In the latter case it means that this is
+ # only a gc root if the corresponding register in the caller was
+ # really containing a gc pointer. A typical example:
+ #
+ # InsnCall({'%ebp': '%ebp', -8: '%ebx', '%esi': None})
+ #
+ # means that %esi is a gc root across this call; that %ebp is a
+ # gc root if it was in the caller (typically because %ebp is not
+ # modified at all in the current function); and that the word at 8
+ # bytes before the end of the current stack frame is a gc root if
+ # %ebx was a gc root in the caller (typically because the current
+ # function saves and restores %ebx from there in the prologue and
+ # epilogue).
+ #
+ self.gcroots = {}
+ def source_of(self, localvar, tag):
+ self.gcroots[localvar] = tag
+ return localvar
+
+class InsnGCROOT(Insn):
+ _args_ = ['loc']
+ def __init__(self, loc):
+ self.loc = loc
+ def requestgcroots(self):
+ return {self.loc: None}
+
FUNCTIONS_NOT_RETURNING = {
'abort': None,
@@ -448,6 +583,8 @@
'__assert_fail': None,
}
+CALLEE_SAVE_REGISTERS = ['%ebx', '%esi', '%edi', '%ebp']
+
if __name__ == '__main__':
tracker = GcRootTracker(verbose=True)
More information about the Pypy-commit
mailing list