[pypy-svn] r16553 - in pypy/dist/pypy/module/_sre: . test
nik at codespeak.net
nik at codespeak.net
Thu Aug 25 21:36:02 CEST 2005
Author: nik
Date: Thu Aug 25 21:36:01 2005
New Revision: 16553
Modified:
pypy/dist/pypy/module/_sre/__init__.py
pypy/dist/pypy/module/_sre/app_sre.py
pypy/dist/pypy/module/_sre/interp_sre.py
pypy/dist/pypy/module/_sre/test/test_app_sre.py
Log:
came up with an RPython-compatible scheme to avoid recursion. moved the whole
dispatcher loop to interp-level and implemented op_branch. there is a
noticeable increase in speed but the a lot of tests fail now because of the
remaining recursive opcodes.
Modified: pypy/dist/pypy/module/_sre/__init__.py
==============================================================================
--- pypy/dist/pypy/module/_sre/__init__.py (original)
+++ pypy/dist/pypy/module/_sre/__init__.py Thu Aug 25 21:36:01 2005
@@ -21,6 +21,7 @@
'_State': 'interp_sre.make_state',
'_MatchContext': 'interp_sre.make_context',
'_RepeatContext': 'interp_sre.make_repeat_context',
+ '_match': 'interp_sre.match',
'_opcode_dispatch': 'interp_sre.opcode_dispatch',
'_opcode_is_at_interplevel': 'interp_sre.opcode_is_at_interplevel',
}
Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py (original)
+++ pypy/dist/pypy/module/_sre/app_sre.py Thu Aug 25 21:36:01 2005
@@ -36,7 +36,7 @@
regular expression, return a corresponding MatchObject instance. Return
None if the string does not match the pattern."""
state = _sre._State(string, pos, endpos, self.flags)
- if match(state, self._code):
+ if _sre._match(state, self._code):
return SRE_Match(self, state)
else:
return None
@@ -327,7 +327,7 @@
while string_position <= state.end:
state.reset()
state.start = state.string_position = string_position
- if match(state, pattern_codes):
+ if _sre._match(state, pattern_codes):
return True
string_position += 1
return False
@@ -362,7 +362,7 @@
- prefix_len + prefix_skip
if flags & SRE_INFO_LITERAL:
return True # matched all of pure literal pattern
- if match(state, pattern_codes[2 * prefix_skip:]):
+ if _sre._match(state, pattern_codes[2 * prefix_skip:]):
return True
i = overlap[i]
break
@@ -454,31 +454,6 @@
self.executing_contexts[id(context)] = generator
return has_finished
- def op_branch(self, ctx):
- # alternation
- # <BRANCH> <0=skip> code <JUMP> ... <NULL>
- #self._log(ctx, "BRANCH")
- ctx.state.marks_push()
- ctx.skip_code(1)
- current_branch_length = ctx.peek_code(0)
- while current_branch_length:
- # The following tries to shortcut branches starting with a
- # (unmatched) literal. _sre.c also shortcuts charsets here.
- if not (ctx.peek_code(1) == OPCODES["literal"] and \
- (ctx.at_end() or ctx.peek_code(2) != ord(ctx.peek_char()))):
- ctx.state.string_position = ctx.string_position
- child_context = ctx.push_new_context(1)
- yield False
- if child_context.has_matched == MATCHED:
- ctx.has_matched = MATCHED
- yield True
- ctx.state.marks_pop_keep()
- ctx.skip_code(current_branch_length)
- current_branch_length = ctx.peek_code(0)
- ctx.state.marks_pop_discard()
- ctx.has_matched = NOT_MATCHED
- yield True
-
def op_repeat_one(self, ctx):
# match repeated sequence (maximizing).
# this operator only works if the repeated item is exactly one character
Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py (original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py Thu Aug 25 21:36:01 2005
@@ -56,7 +56,7 @@
self.marks = []
self.lastindex = -1
self.marks_stack = []
- self.w_context_stack = self.space.newlist([])
+ self.context_stack = []
self.w_repeat = self.space.w_None
def set_mark(self, mark_nr, position):
@@ -135,7 +135,6 @@
string_position = interp_attrproperty_int("string_position", W_State),
pos = interp_attrproperty("pos", W_State),
lastindex = interp_attrproperty("lastindex", W_State),
- context_stack = interp_attrproperty_w("w_context_stack", W_State),
repeat = interp_attrproperty_obj_w("w_repeat", W_State),
reset = interp2app(W_State.reset),
create_regs = interp2app(W_State.create_regs),
@@ -163,18 +162,31 @@
self.string_position = w_state.string_position
self.code_position = 0
self.has_matched = self.UNDECIDED
+ self.backup = []
+ self.resume_at_opcode = -1
- def push_new_context(self, w_pattern_offset):
+ def push_new_context(self, pattern_offset):
"""Creates a new child context of this context and pushes it on the
stack. pattern_offset is the offset off the current code position to
start interpreting from."""
- pattern_offset = self.space.int_w(w_pattern_offset)
pattern_codes_w = self.pattern_codes_w[self.code_position + pattern_offset:]
w_child_context = self.space.wrap(W_MatchContext(self.space, self.state,
self.space.newlist(pattern_codes_w)))
- self.space.call_method(self.state.w_context_stack, "append", w_child_context)
+ self.state.context_stack.append(w_child_context)
+ self.child_context = w_child_context
return w_child_context
+ def is_resumed(self):
+ return self.resume_at_opcode > -1
+
+ def backup_value(self, value):
+ self.backup.append(value)
+
+ def restore_values(self):
+ values = self.backup
+ self.backup = []
+ return values
+
def peek_char(self, w_peek=0):
# XXX temporary hack
if w_peek == 0:
@@ -239,7 +251,7 @@
pattern_codes = interp_attrproperty_list_w("pattern_codes_w", W_MatchContext),
code_position = interp_attrproperty_int("code_position", W_MatchContext),
has_matched = interp_attrproperty_int("has_matched", W_MatchContext),
- push_new_context = interp2app(W_MatchContext.push_new_context),
+ #push_new_context = interp2app(W_MatchContext.push_new_context),
peek_char = interp2app(W_MatchContext.peek_char),
skip_char = interp2app(W_MatchContext.w_skip_char),
remaining_chars = interp2app(W_MatchContext.w_remaining_chars),
@@ -268,7 +280,47 @@
last_position = interp_attrproperty_obj_w("w_last_position", W_RepeatContext),
)
-#### Opcode dispatch
+#### Main opcode dispatch loop
+
+def match(space, w_state, w_pattern_codes):
+ # Optimization: Check string length. pattern_codes[3] contains the
+ # minimum length for a string to possibly match.
+ # XXX disabled for now
+ #if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
+ # if state.end - state.string_position < pattern_codes[3]:
+ # return False
+ state = w_state
+ state.context_stack.append(W_MatchContext(space, state, w_pattern_codes))
+ has_matched = W_MatchContext.UNDECIDED
+ while len(state.context_stack) > 0:
+ context = state.context_stack[-1]
+ if context.has_matched == context.UNDECIDED:
+ has_matched = dispatch_loop(space, context)
+ else:
+ has_matched = context.has_matched
+ if has_matched != context.UNDECIDED: # don't pop if context isn't done
+ state.context_stack.pop()
+ return space.newbool(has_matched == context.MATCHED)
+
+def dispatch_loop(space, context):
+ """Returns MATCHED if the current context matches, NOT_MATCHED if it doesn't
+ and UNDECIDED if matching is not finished, ie must be resumed after child
+ contexts have been matched."""
+ while context.remaining_codes() > 0 and context.has_matched == context.UNDECIDED:
+ if context.is_resumed():
+ opcode = context.resume_at_opcode
+ else:
+ opcode = context.peek_code()
+ #try:
+ has_finished = opcode_dispatch_table[opcode](space, context)
+ #except IndexError:
+ # raise RuntimeError("Internal re error. Unknown opcode: %s" % opcode)
+ if not has_finished:
+ context.resume_at_opcode = opcode
+ return context.UNDECIDED
+ if context.has_matched == context.UNDECIDED:
+ context.has_matched = context.NOT_MATCHED
+ return context.has_matched
def opcode_dispatch(space, w_opcode, w_context):
opcode = space.int_w(w_opcode)
@@ -399,6 +451,30 @@
general_op_in(space, ctx, ignore=True)
return True
+def op_branch(space, ctx):
+ # alternation
+ # <BRANCH> <0=skip> code <JUMP> ... <NULL>
+ if ctx.is_resumed():
+ last_branch_length = ctx.restore_values()[0]
+ if ctx.child_context.has_matched == ctx.MATCHED:
+ ctx.has_matched = ctx.MATCHED
+ return True
+ ctx.state.marks_pop_keep()
+ ctx.skip_code(last_branch_length)
+ current_branch_length = ctx.peek_code(0)
+ else:
+ ctx.state.marks_push()
+ ctx.skip_code(1)
+ current_branch_length = ctx.peek_code(0)
+ if current_branch_length:
+ ctx.state.string_position = ctx.string_position
+ ctx.push_new_context(1)
+ ctx.backup_value(current_branch_length)
+ return False
+ ctx.state.marks_pop_discard()
+ ctx.has_matched = ctx.NOT_MATCHED
+ return True
+
def op_jump(space, ctx):
# jump forward
# <JUMP>/<INFO> <offset>
@@ -456,7 +532,7 @@
op_any, op_any_all,
None, None, #ASSERT, ASSERT_NOT,
op_at,
- None, #BRANCH,
+ op_branch,
None, #CALL,
op_category,
None, None, #CHARSET, BIGCHARSET,
Modified: pypy/dist/pypy/module/_sre/test/test_app_sre.py
==============================================================================
More information about the Pypy-commit
mailing list