[pypy-svn] r44558 - pypy/dist/pypy/rlib/parsing

Wed Jun 27 15:42:27 CEST 2007

Author: cfbolz
Date: Wed Jun 27 15:42:27 2007
New Revision: 44558

Modified:
   pypy/dist/pypy/rlib/parsing/codebuilder.py
   pypy/dist/pypy/rlib/parsing/deterministic.py
Log:
refactor the first half of the regex building code to use the new codebuilder


Modified: pypy/dist/pypy/rlib/parsing/codebuilder.py
==============================================================================

--- pypy/dist/pypy/rlib/parsing/codebuilder.py	(original)
+++ pypy/dist/pypy/rlib/parsing/codebuilder.py	Wed Jun 27 15:42:27 2007
@@ -16,7 +16,8 @@
 
     def emit(self, line):
         for line in line.split("\n"):
-            self.code.append((len(self.blocks),  line))
+            if line:
+                self.code.append((len(self.blocks),  line))
 
     def emit_initcode(self, line):
         for line in line.split("\n"):

Modified: pypy/dist/pypy/rlib/parsing/deterministic.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/deterministic.py	(original)
+++ pypy/dist/pypy/rlib/parsing/deterministic.py	Wed Jun 27 15:42:27 2007
@@ -190,71 +190,75 @@
         return True
 
     def make_code(self):
-        result = ["""
-def recognize(input):
-    i = 0
-    state = 0
-    while 1:
-"""]
+        from pypy.rlib.parsing.codebuilder import Codebuilder
+        result = Codebuilder()
+        result.start_block("def recognize(input):")
+        result.emit("i = 0")
+        result.emit("state = 0")
+        result.start_block("while 1:")
         state_to_chars = {}
         for (state, char), nextstate in self.transitions.iteritems():
             state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char)
         above = set()
         for state, nextstates in state_to_chars.iteritems():
             above.add(state)
-            result.append("""\
-        if state == %s:
-            if i < len(input):
-                char = input[i]
-                i += 1
-            else:""" % (state, ))
-            if state in self.final_states:
-                result.append("                return True")
-            else:
-                result.append("                break")
-            elif_prefix = ""
-            for nextstate, chars in nextstates.iteritems():
-                final = nextstate in self.final_states
-                compressed = compress_char_set(chars)
-                if nextstate in above:
-                    continue_prefix = "\n" + " " * 16 + "continue"
-                else:
-                    continue_prefix = ""
-                for i, (a, num) in enumerate(compressed):
-                    if num < 5:
-                        for charord in range(ord(a), ord(a) + num):
-                            result.append("""
-            %sif char == %r:
-                state = %s%s""" % (elif_prefix, chr(charord), nextstate, continue_prefix))
+            for _ in result.start_block("if state == %s:" % (state, )):
+                for _ in result.start_block("if i < len(input):"):
+                    result.emit("char = input[i]")
+                    result.emit("i += 1")
+                for _ in result.start_block("else:"):
+                    if state in self.final_states:
+                        result.emit("return True")
+                    else:
+                        result.emit("break")
+                elif_prefix = ""
+                for nextstate, chars in nextstates.iteritems():
+                    final = nextstate in self.final_states
+                    compressed = compress_char_set(chars)
+                    if nextstate in above:
+                        continue_prefix = "continue"
+                    else:
+                        continue_prefix = ""
+                    for i, (a, num) in enumerate(compressed):
+                        if num < 5:
+                            for charord in range(ord(a), ord(a) + num):
+                                for _ in result.start_block(
+                                    "%sif char == %r:" % (
+                                        elif_prefix, chr(charord))):
+                                    result.emit("state = %s" % (nextstate, ))
+                                    result.emit(continue_prefix)
+                                if not elif_prefix:
+                                    elif_prefix = "el"
+                        else:
+                            for _ in result.start_block(
+                                "%sif %r <= char <= %r:" % (
+                                    elif_prefix, a, chr(ord(a) + num - 1))):
+                                result.emit("state = %s""" % (nextstate, ))
+                                result.emit(continue_prefix)
                             if not elif_prefix:
                                 elif_prefix = "el"
-                    else:
-                        result.append("""
-            %sif %r <= char <= %r:
-                state = %s%s""" % (elif_prefix, a, chr(ord(a) + num - 1), nextstate, continue_prefix))
-                        if not elif_prefix:
-                            elif_prefix = "el"
-            
-            result.append(" " * 12 + "else:")
-            result.append(" " * 16 + "break")
+                for _ in result.start_block("else:"):
+                    result.emit("break") 
         #print state_to_chars.keys()
         for state in range(self.num_states):
             if state in state_to_chars:
                 continue
-            result.append("""\
-        if state == %s:
-            if i == len(input):
-                return True
-            else:
-                break""" % (state, ))
-        result.append("        break")
-        result.append("    raise LexerError(input, state, i)")
-        result = "\n".join(result)
+            for _ in result.start_block("if state == %s:" % (state, )):
+                for _ in result.start_block("if i == len(input):"):
+                    result.emit("return True")
+                for _ in result.start_block("else:"):
+                    result.emit("break")
+        result.emit("break")
+        result.end_block("while")
+        result.emit("raise LexerError(input, state, i)")
+        result.end_block("def")
+        result = result.get_code()
         while "\n\n" in result:
             result = result.replace("\n\n", "\n")
-        #print result
-        exec py.code.Source(result).compile()
-        return recognize
+        print result
+        d = {'LexerError': LexerError}
+        exec py.code.Source(result).compile() in d
+        return d['recognize']
         
     def make_lexing_code(self):
         result = ["""
@@ -277,10 +281,10 @@
                 result.append("            runner.last_matched_index = i - 1")
                 result.append("            runner.last_matched_state = state")
             result.append("""\
-            if i < len(input):
+            try:
                 char = input[i]
                 i += 1
-            else:
+            except IndexError:
                 runner.state = %s""" % (state, ))
             if state in self.final_states:
                 result.append("                return i")