[pypy-svn] r16553 - in pypy/dist/pypy/module/_sre: . test

nik at codespeak.net nik at codespeak.net
Thu Aug 25 21:36:02 CEST 2005


Author: nik
Date: Thu Aug 25 21:36:01 2005
New Revision: 16553

Modified:
   pypy/dist/pypy/module/_sre/__init__.py
   pypy/dist/pypy/module/_sre/app_sre.py
   pypy/dist/pypy/module/_sre/interp_sre.py
   pypy/dist/pypy/module/_sre/test/test_app_sre.py
Log:
came up with an RPython-compatible scheme to avoid recursion. moved the whole
dispatcher loop to interp-level and implemented op_branch. there is a
noticeable increase in speed but the a lot of tests fail now because of the
remaining recursive opcodes.


Modified: pypy/dist/pypy/module/_sre/__init__.py
==============================================================================
--- pypy/dist/pypy/module/_sre/__init__.py	(original)
+++ pypy/dist/pypy/module/_sre/__init__.py	Thu Aug 25 21:36:01 2005
@@ -21,6 +21,7 @@
         '_State':         'interp_sre.make_state',
         '_MatchContext':  'interp_sre.make_context',
         '_RepeatContext': 'interp_sre.make_repeat_context',
+        '_match':         'interp_sre.match',
         '_opcode_dispatch': 'interp_sre.opcode_dispatch',
         '_opcode_is_at_interplevel': 'interp_sre.opcode_is_at_interplevel',
     }

Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/app_sre.py	Thu Aug 25 21:36:01 2005
@@ -36,7 +36,7 @@
         regular expression, return a corresponding MatchObject instance. Return
         None if the string does not match the pattern."""
         state = _sre._State(string, pos, endpos, self.flags)
-        if match(state, self._code):
+        if _sre._match(state, self._code):
             return SRE_Match(self, state)
         else:
             return None
@@ -327,7 +327,7 @@
     while string_position <= state.end:
         state.reset()
         state.start = state.string_position = string_position
-        if match(state, pattern_codes):
+        if _sre._match(state, pattern_codes):
             return True
         string_position += 1
     return False
@@ -362,7 +362,7 @@
                                                  - prefix_len + prefix_skip
                     if flags & SRE_INFO_LITERAL:
                         return True # matched all of pure literal pattern
-                    if match(state, pattern_codes[2 * prefix_skip:]):
+                    if _sre._match(state, pattern_codes[2 * prefix_skip:]):
                         return True
                     i = overlap[i]
                 break
@@ -454,31 +454,6 @@
             self.executing_contexts[id(context)] = generator
         return has_finished
 
-    def op_branch(self, ctx):
-        # alternation
-        # <BRANCH> <0=skip> code <JUMP> ... <NULL>
-        #self._log(ctx, "BRANCH")
-        ctx.state.marks_push()
-        ctx.skip_code(1)
-        current_branch_length = ctx.peek_code(0)
-        while current_branch_length:
-            # The following tries to shortcut branches starting with a
-            # (unmatched) literal. _sre.c also shortcuts charsets here.
-            if not (ctx.peek_code(1) == OPCODES["literal"] and \
-                    (ctx.at_end() or ctx.peek_code(2) != ord(ctx.peek_char()))):
-                ctx.state.string_position = ctx.string_position
-                child_context = ctx.push_new_context(1)
-                yield False
-                if child_context.has_matched == MATCHED:
-                    ctx.has_matched = MATCHED
-                    yield True
-                ctx.state.marks_pop_keep()
-            ctx.skip_code(current_branch_length)
-            current_branch_length = ctx.peek_code(0)
-        ctx.state.marks_pop_discard()
-        ctx.has_matched = NOT_MATCHED
-        yield True
-
     def op_repeat_one(self, ctx):
         # match repeated sequence (maximizing).
         # this operator only works if the repeated item is exactly one character

Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py	Thu Aug 25 21:36:01 2005
@@ -56,7 +56,7 @@
         self.marks = []
         self.lastindex = -1
         self.marks_stack = []
-        self.w_context_stack = self.space.newlist([])
+        self.context_stack = []
         self.w_repeat = self.space.w_None
 
     def set_mark(self, mark_nr, position):
@@ -135,7 +135,6 @@
     string_position = interp_attrproperty_int("string_position", W_State),
     pos = interp_attrproperty("pos", W_State),
     lastindex = interp_attrproperty("lastindex", W_State),
-    context_stack = interp_attrproperty_w("w_context_stack", W_State),
     repeat = interp_attrproperty_obj_w("w_repeat", W_State),
     reset = interp2app(W_State.reset),
     create_regs = interp2app(W_State.create_regs),
@@ -163,18 +162,31 @@
         self.string_position = w_state.string_position
         self.code_position = 0
         self.has_matched = self.UNDECIDED
+        self.backup = []
+        self.resume_at_opcode = -1
 
-    def push_new_context(self, w_pattern_offset):
+    def push_new_context(self, pattern_offset):
         """Creates a new child context of this context and pushes it on the
         stack. pattern_offset is the offset off the current code position to
         start interpreting from."""
-        pattern_offset = self.space.int_w(w_pattern_offset)
         pattern_codes_w = self.pattern_codes_w[self.code_position + pattern_offset:]
         w_child_context = self.space.wrap(W_MatchContext(self.space, self.state,
                                            self.space.newlist(pattern_codes_w)))
-        self.space.call_method(self.state.w_context_stack, "append", w_child_context)
+        self.state.context_stack.append(w_child_context)
+        self.child_context = w_child_context
         return w_child_context
 
+    def is_resumed(self):
+        return self.resume_at_opcode > -1
+
+    def backup_value(self, value):
+        self.backup.append(value)
+
+    def restore_values(self):
+        values = self.backup
+        self.backup = []
+        return values
+
     def peek_char(self, w_peek=0):
         # XXX temporary hack
         if w_peek == 0:
@@ -239,7 +251,7 @@
     pattern_codes = interp_attrproperty_list_w("pattern_codes_w", W_MatchContext),
     code_position = interp_attrproperty_int("code_position", W_MatchContext),
     has_matched = interp_attrproperty_int("has_matched", W_MatchContext),
-    push_new_context = interp2app(W_MatchContext.push_new_context),
+    #push_new_context = interp2app(W_MatchContext.push_new_context),
     peek_char = interp2app(W_MatchContext.peek_char),
     skip_char = interp2app(W_MatchContext.w_skip_char),
     remaining_chars = interp2app(W_MatchContext.w_remaining_chars),
@@ -268,7 +280,47 @@
     last_position = interp_attrproperty_obj_w("w_last_position", W_RepeatContext),
 )
 
-#### Opcode dispatch
+#### Main opcode dispatch loop
+
+def match(space, w_state, w_pattern_codes):
+    # Optimization: Check string length. pattern_codes[3] contains the
+    # minimum length for a string to possibly match.
+    # XXX disabled for now
+    #if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
+    #    if state.end - state.string_position < pattern_codes[3]:
+    #        return False
+    state = w_state
+    state.context_stack.append(W_MatchContext(space, state, w_pattern_codes))
+    has_matched = W_MatchContext.UNDECIDED
+    while len(state.context_stack) > 0:
+        context = state.context_stack[-1]
+        if context.has_matched == context.UNDECIDED:
+            has_matched = dispatch_loop(space, context)
+        else:
+            has_matched = context.has_matched
+        if has_matched != context.UNDECIDED: # don't pop if context isn't done
+            state.context_stack.pop()
+    return space.newbool(has_matched == context.MATCHED)
+
+def dispatch_loop(space, context):
+    """Returns MATCHED if the current context matches, NOT_MATCHED if it doesn't
+    and UNDECIDED if matching is not finished, ie must be resumed after child
+    contexts have been matched."""
+    while context.remaining_codes() > 0 and context.has_matched == context.UNDECIDED:
+        if context.is_resumed():
+            opcode = context.resume_at_opcode
+        else:
+            opcode = context.peek_code()
+        #try:
+        has_finished = opcode_dispatch_table[opcode](space, context)
+        #except IndexError:
+        #    raise RuntimeError("Internal re error. Unknown opcode: %s" % opcode)
+        if not has_finished:
+            context.resume_at_opcode = opcode
+            return context.UNDECIDED
+    if context.has_matched == context.UNDECIDED:
+        context.has_matched = context.NOT_MATCHED
+    return context.has_matched
 
 def opcode_dispatch(space, w_opcode, w_context):
     opcode = space.int_w(w_opcode)
@@ -399,6 +451,30 @@
     general_op_in(space, ctx, ignore=True)
     return True
 
+def op_branch(space, ctx):
+    # alternation
+    # <BRANCH> <0=skip> code <JUMP> ... <NULL>
+    if ctx.is_resumed():
+        last_branch_length = ctx.restore_values()[0]
+        if ctx.child_context.has_matched == ctx.MATCHED:
+            ctx.has_matched = ctx.MATCHED
+            return True
+        ctx.state.marks_pop_keep()
+        ctx.skip_code(last_branch_length)
+        current_branch_length = ctx.peek_code(0)
+    else:
+        ctx.state.marks_push()
+        ctx.skip_code(1)
+        current_branch_length = ctx.peek_code(0)
+    if current_branch_length:
+        ctx.state.string_position = ctx.string_position
+        ctx.push_new_context(1)
+        ctx.backup_value(current_branch_length)
+        return False
+    ctx.state.marks_pop_discard()
+    ctx.has_matched = ctx.NOT_MATCHED
+    return True
+
 def op_jump(space, ctx):
     # jump forward
     # <JUMP>/<INFO> <offset>
@@ -456,7 +532,7 @@
     op_any, op_any_all,
     None, None, #ASSERT, ASSERT_NOT,
     op_at,
-    None, #BRANCH,
+    op_branch,
     None, #CALL,
     op_category,
     None, None, #CHARSET, BIGCHARSET,

Modified: pypy/dist/pypy/module/_sre/test/test_app_sre.py
==============================================================================



More information about the Pypy-commit mailing list