[pypy-svn] r44558 - pypy/dist/pypy/rlib/parsing
cfbolz at codespeak.net
cfbolz at codespeak.net
Wed Jun 27 15:42:27 CEST 2007
Author: cfbolz
Date: Wed Jun 27 15:42:27 2007
New Revision: 44558
Modified:
pypy/dist/pypy/rlib/parsing/codebuilder.py
pypy/dist/pypy/rlib/parsing/deterministic.py
Log:
refactor the first half of the regex building code to use the new codebuilder
Modified: pypy/dist/pypy/rlib/parsing/codebuilder.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/codebuilder.py (original)
+++ pypy/dist/pypy/rlib/parsing/codebuilder.py Wed Jun 27 15:42:27 2007
@@ -16,7 +16,8 @@
def emit(self, line):
for line in line.split("\n"):
- self.code.append((len(self.blocks), line))
+ if line:
+ self.code.append((len(self.blocks), line))
def emit_initcode(self, line):
for line in line.split("\n"):
Modified: pypy/dist/pypy/rlib/parsing/deterministic.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/deterministic.py (original)
+++ pypy/dist/pypy/rlib/parsing/deterministic.py Wed Jun 27 15:42:27 2007
@@ -190,71 +190,75 @@
return True
def make_code(self):
- result = ["""
-def recognize(input):
- i = 0
- state = 0
- while 1:
-"""]
+ from pypy.rlib.parsing.codebuilder import Codebuilder
+ result = Codebuilder()
+ result.start_block("def recognize(input):")
+ result.emit("i = 0")
+ result.emit("state = 0")
+ result.start_block("while 1:")
state_to_chars = {}
for (state, char), nextstate in self.transitions.iteritems():
state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char)
above = set()
for state, nextstates in state_to_chars.iteritems():
above.add(state)
- result.append("""\
- if state == %s:
- if i < len(input):
- char = input[i]
- i += 1
- else:""" % (state, ))
- if state in self.final_states:
- result.append(" return True")
- else:
- result.append(" break")
- elif_prefix = ""
- for nextstate, chars in nextstates.iteritems():
- final = nextstate in self.final_states
- compressed = compress_char_set(chars)
- if nextstate in above:
- continue_prefix = "\n" + " " * 16 + "continue"
- else:
- continue_prefix = ""
- for i, (a, num) in enumerate(compressed):
- if num < 5:
- for charord in range(ord(a), ord(a) + num):
- result.append("""
- %sif char == %r:
- state = %s%s""" % (elif_prefix, chr(charord), nextstate, continue_prefix))
+ for _ in result.start_block("if state == %s:" % (state, )):
+ for _ in result.start_block("if i < len(input):"):
+ result.emit("char = input[i]")
+ result.emit("i += 1")
+ for _ in result.start_block("else:"):
+ if state in self.final_states:
+ result.emit("return True")
+ else:
+ result.emit("break")
+ elif_prefix = ""
+ for nextstate, chars in nextstates.iteritems():
+ final = nextstate in self.final_states
+ compressed = compress_char_set(chars)
+ if nextstate in above:
+ continue_prefix = "continue"
+ else:
+ continue_prefix = ""
+ for i, (a, num) in enumerate(compressed):
+ if num < 5:
+ for charord in range(ord(a), ord(a) + num):
+ for _ in result.start_block(
+ "%sif char == %r:" % (
+ elif_prefix, chr(charord))):
+ result.emit("state = %s" % (nextstate, ))
+ result.emit(continue_prefix)
+ if not elif_prefix:
+ elif_prefix = "el"
+ else:
+ for _ in result.start_block(
+ "%sif %r <= char <= %r:" % (
+ elif_prefix, a, chr(ord(a) + num - 1))):
+ result.emit("state = %s""" % (nextstate, ))
+ result.emit(continue_prefix)
if not elif_prefix:
elif_prefix = "el"
- else:
- result.append("""
- %sif %r <= char <= %r:
- state = %s%s""" % (elif_prefix, a, chr(ord(a) + num - 1), nextstate, continue_prefix))
- if not elif_prefix:
- elif_prefix = "el"
-
- result.append(" " * 12 + "else:")
- result.append(" " * 16 + "break")
+ for _ in result.start_block("else:"):
+ result.emit("break")
#print state_to_chars.keys()
for state in range(self.num_states):
if state in state_to_chars:
continue
- result.append("""\
- if state == %s:
- if i == len(input):
- return True
- else:
- break""" % (state, ))
- result.append(" break")
- result.append(" raise LexerError(input, state, i)")
- result = "\n".join(result)
+ for _ in result.start_block("if state == %s:" % (state, )):
+ for _ in result.start_block("if i == len(input):"):
+ result.emit("return True")
+ for _ in result.start_block("else:"):
+ result.emit("break")
+ result.emit("break")
+ result.end_block("while")
+ result.emit("raise LexerError(input, state, i)")
+ result.end_block("def")
+ result = result.get_code()
while "\n\n" in result:
result = result.replace("\n\n", "\n")
- #print result
- exec py.code.Source(result).compile()
- return recognize
+ print result
+ d = {'LexerError': LexerError}
+ exec py.code.Source(result).compile() in d
+ return d['recognize']
def make_lexing_code(self):
result = ["""
@@ -277,10 +281,10 @@
result.append(" runner.last_matched_index = i - 1")
result.append(" runner.last_matched_state = state")
result.append("""\
- if i < len(input):
+ try:
char = input[i]
i += 1
- else:
+ except IndexError:
runner.state = %s""" % (state, ))
if state in self.final_states:
result.append(" return i")
More information about the Pypy-commit
mailing list