[pypy-svn] r16166 - pypy/dist/pypy/module/_sre
nik at codespeak.net
nik at codespeak.net
Fri Aug 19 17:43:30 CEST 2005
Author: nik
Date: Fri Aug 19 17:43:29 2005
New Revision: 16166
Modified:
pypy/dist/pypy/module/_sre/__init__.py
pypy/dist/pypy/module/_sre/app_sre.py
pypy/dist/pypy/module/_sre/interp_sre.py
Log:
moved State class from app-level to interp-level. don't look too closely
it's very ugly during the transition phase.
Modified: pypy/dist/pypy/module/_sre/__init__.py
==============================================================================
--- pypy/dist/pypy/module/_sre/__init__.py (original)
+++ pypy/dist/pypy/module/_sre/__init__.py Fri Aug 19 17:43:29 2005
@@ -18,6 +18,7 @@
interpleveldefs = {
'getlower': 'interp_sre.getlower',
+ '_State': 'interp_sre.make_state',
'_check_charset': 'interp_sre.check_charset',
'_at_dispatch': 'interp_sre.at_dispatch',
'_category_dispatch': 'interp_sre.category_dispatch',
Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py (original)
+++ pypy/dist/pypy/module/_sre/app_sre.py Fri Aug 19 17:43:29 2005
@@ -35,7 +35,7 @@
"""If zero or more characters at the beginning of string match this
regular expression, return a corresponding MatchObject instance. Return
None if the string does not match the pattern."""
- state = _State(string, pos, endpos, self.flags)
+ state = _sre._State(string, pos, endpos, self.flags)
if match(state, self._code):
return SRE_Match(self, state)
else:
@@ -46,7 +46,7 @@
expression produces a match, and return a corresponding MatchObject
instance. Return None if no position in the string matches the
pattern."""
- state = _State(string, pos, endpos, self.flags)
+ state = _sre._State(string, pos, endpos, self.flags)
if search(state, self._code):
return SRE_Match(self, state)
else:
@@ -55,7 +55,7 @@
def findall(self, string, pos=0, endpos=sys.maxint):
"""Return a list of all non-overlapping matches of pattern in string."""
matchlist = []
- state = _State(string, pos, endpos, self.flags)
+ state = _sre._State(string, pos, endpos, self.flags)
while state.start <= state.end:
state.reset()
state.string_position = state.start
@@ -79,7 +79,7 @@
# handle non-literal strings ; hand it over to the template compiler
import sre
filter = sre._subx(self, template)
- state = _State(string, 0, sys.maxint, self.flags)
+ state = _sre._State(string, 0, sys.maxint, self.flags)
sublist = []
n = last_pos = 0
@@ -126,7 +126,7 @@
def split(self, string, maxsplit=0):
"""Split string by the occurrences of pattern."""
splitlist = []
- state = _State(string, 0, sys.maxint, self.flags)
+ state = _sre._State(string, 0, sys.maxint, self.flags)
n = 0
last = state.start
while not maxsplit or n < maxsplit:
@@ -169,7 +169,7 @@
def __init__(self, pattern, string, start, end):
self.pattern = pattern
- self._state = _State(string, start, end, self.pattern.flags)
+ self._state = _sre._State(string, start, end, self.pattern.flags)
def _match_search(self, matcher):
state = self._state
@@ -201,7 +201,7 @@
self.lastindex = state.lastindex
if self.lastindex < 0:
self.lastindex = None
- self.regs = self._create_regs(state)
+ self.regs = state.create_regs(self.re.groups)
if pattern._indexgroup and 0 <= self.lastindex < len(pattern._indexgroup):
# The above upper-bound check should not be necessary, as the re
# compiler is supposed to always provide an _indexgroup list long
@@ -212,19 +212,6 @@
else:
self.lastgroup = None
- def _create_regs(self, state):
- """Creates a tuple of index pairs representing matched groups."""
- regs = [(state.start, state.string_position)]
- for group in range(self.re.groups):
- mark_index = 2 * group
- if mark_index + 1 < len(state.marks) \
- and state.marks[mark_index] is not None \
- and state.marks[mark_index + 1] is not None:
- regs.append((state.marks[mark_index], state.marks[mark_index + 1]))
- else:
- regs.append((-1, -1))
- return tuple(regs)
-
def _get_index(self, group):
if isinstance(group, int):
if group >= 0 and group <= self.re.groups:
@@ -304,57 +291,50 @@
raise TypeError, "cannot copy this pattern object"
-class _State(object):
+class _MatchContext(object):
- def __init__(self, string, start, end, flags):
- self.string = string
- if start < 0:
- start = 0
- if end > len(string):
- end = len(string)
- self.start = start
- self.string_position = self.start
- self.end = end
- self.pos = start
- self.flags = flags
- self.reset()
-
- def reset(self):
- self.marks = []
- self.lastindex = -1
- self.marks_stack = []
- self.context_stack = []
- self.repeat = None
-
- def set_mark(self, mark_nr, position):
- if mark_nr & 1:
- # This id marks the end of a group.
- self.lastindex = mark_nr / 2 + 1
- if mark_nr >= len(self.marks):
- self.marks.extend([None] * (mark_nr - len(self.marks) + 1))
- self.marks[mark_nr] = position
-
- def get_marks(self, group_index):
- marks_index = 2 * group_index
- if len(self.marks) > marks_index + 1:
- return self.marks[marks_index], self.marks[marks_index + 1]
- else:
- return None, None
+ def __init__(self, state, pattern_codes):
+ self.state = state
+ self.pattern_codes = pattern_codes
+ self.string_position = state.string_position
+ self.code_position = 0
+ self.has_matched = None
- def marks_push(self):
- self.marks_stack.append((self.marks[:], self.lastindex))
+ def push_new_context(self, pattern_offset):
+ """Creates a new child context of this context and pushes it on the
+ stack. pattern_offset is the offset off the current code position to
+ start interpreting from."""
+ child_context = _MatchContext(self.state,
+ self.pattern_codes[self.code_position + pattern_offset:])
+ self.state.context_stack.append(child_context)
+ return child_context
- def marks_pop(self):
- self.marks, self.lastindex = self.marks_stack.pop()
+ def peek_char(self, peek=0):
+ return self.state.string[self.string_position + peek]
- def marks_pop_keep(self):
- self.marks, self.lastindex = self.marks_stack[-1]
+ def skip_char(self, skip_count):
+ self.string_position += skip_count
+
+ def remaining_chars(self):
+ return self.state.end - self.string_position
+
+ def peek_code(self, peek=0):
+ return self.pattern_codes[self.code_position + peek]
+
+ def skip_code(self, skip_count):
+ self.code_position += skip_count
- def marks_pop_discard(self):
- self.marks_stack.pop()
+ def remaining_codes(self):
+ return len(self.pattern_codes) - self.code_position
+
+ def at_beginning(self):
+ return self.string_position == 0
+
+ def at_end(self):
+ return self.string_position == self.state.end
- def lower(self, char_ord):
- return _sre.getlower(char_ord, self.flags)
+ def at_linebreak(self):
+ return not self.at_end() and self.peek_char() == "\n"
def search(state, pattern_codes):
@@ -368,6 +348,7 @@
pattern_codes = pattern_codes[pattern_codes[1] + 1:]
string_position = state.start
+ """
if pattern_codes[0] == OPCODES["literal"]:
# Special case: Pattern starts with a literal character. This is
# used for short prefixes
@@ -386,6 +367,7 @@
if match(state, pattern_codes[2:]):
return True
return False
+ """
# General case
while string_position <= state.end:
@@ -454,49 +436,6 @@
return has_matched
-class _MatchContext(object):
-
- def __init__(self, state, pattern_codes):
- self.state = state
- self.pattern_codes = pattern_codes
- self.string_position = state.string_position
- self.code_position = 0
- self.has_matched = None
-
- def push_new_context(self, pattern_offset):
- """Creates a new child context of this context and pushes it on the
- stack. pattern_offset is the offset off the current code position to
- start interpreting from."""
- child_context = _MatchContext(self.state,
- self.pattern_codes[self.code_position + pattern_offset:])
- self.state.context_stack.append(child_context)
- return child_context
-
- def peek_char(self, peek=0):
- return self.state.string[self.string_position + peek]
-
- def skip_char(self, skip_count):
- self.string_position += skip_count
-
- def remaining_chars(self):
- return self.state.end - self.string_position
-
- def peek_code(self, peek=0):
- return self.pattern_codes[self.code_position + peek]
-
- def skip_code(self, skip_count):
- self.code_position += skip_count
-
- def remaining_codes(self):
- return len(self.pattern_codes) - self.code_position
-
- def at_end(self):
- return self.string_position == self.state.end
-
- def at_linebreak(self):
- return not self.at_end() and self.peek_char() == "\n"
-
-
class _RepeatContext(_MatchContext):
def __init__(self, context):
Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py (original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py Fri Aug 19 17:43:29 2005
@@ -1,7 +1,9 @@
-from pypy.interpreter.baseobjspace import ObjSpace
+from pypy.interpreter.baseobjspace import ObjSpace, Wrappable
# XXX is it allowed to import app-level module like this?
from pypy.module._sre.app_info import CODESIZE
from pypy.module.array.app_array import array
+from pypy.interpreter.typedef import GetSetProperty, TypeDef
+from pypy.interpreter.gateway import interp2app
#### Exposed functions
@@ -20,6 +22,113 @@
else:
return space.wrap(char_ord)
+#### Core classes
+
+# XXX the wrapped/unwrapped semantics of the following classes are currently
+# very confusing because they are still used at app-level.
+
+def make_state(space, w_string, w_start, w_end, w_flags):
+ # XXX Uhm, temporary
+ return space.wrap(W_State(space, w_string, w_start, w_end, w_flags))
+
+class W_State(Wrappable):
+
+ def __init__(self, space, w_string, w_start, w_end, w_flags):
+ self.space = space
+ self.w_string = w_string
+ start = space.int_w(w_start)
+ end = space.int_w(w_end)
+ if start < 0:
+ start = 0
+ if end > space.int_w(space.len(w_string)):
+ end = space.int_w(space.len(w_string))
+ self.start = start
+ self.string_position = start
+ self.end = end
+ self.pos = start
+ self.flags = space.int_w(w_flags)
+ self.reset()
+
+ def reset(self):
+ self.marks = []
+ self.lastindex = -1
+ self.marks_stack = []
+ self.context_stack = self.space.newlist([])
+ self.w_repeat = self.space.w_None
+
+ def set_mark(self, w_mark_nr, w_position):
+ mark_nr = self.space.int_w(w_mark_nr)
+ if mark_nr & 1:
+ # This id marks the end of a group.
+ self.lastindex = mark_nr / 2 + 1
+ if mark_nr >= len(self.marks):
+ self.marks.extend([None] * (mark_nr - len(self.marks) + 1))
+ self.marks[mark_nr] = self.space.int_w(w_position)
+
+ def get_marks(self, w_group_index):
+ marks_index = 2 * self.space.int_w(w_group_index)
+ if len(self.marks) > marks_index + 1:
+ return self.space.newtuple([self.space.wrap(self.marks[marks_index]),
+ self.space.wrap(self.marks[marks_index + 1])])
+ else:
+ return self.space.newtuple([self.space.w_None, self.space.w_None])
+
+ def create_regs(self, w_group_count):
+ """Creates a tuple of index pairs representing matched groups, a format
+ that's convenient for SRE_Match."""
+ regs = [self.space.newtuple([self.space.wrap(self.start), self.space.wrap(self.string_position)])]
+ for group in range(self.space.int_w(w_group_count)):
+ mark_index = 2 * group
+ if mark_index + 1 < len(self.marks) \
+ and self.marks[mark_index] is not None \
+ and self.marks[mark_index + 1] is not None:
+ regs.append(self.space.newtuple([self.space.wrap(self.marks[mark_index]),
+ self.space.wrap(self.marks[mark_index + 1])]))
+ else:
+ regs.append(self.space.newtuple([self.space.wrap(-1),
+ self.space.wrap(-1)]))
+ return self.space.newtuple(regs)
+
+ def marks_push(self):
+ self.marks_stack.append((self.marks[:], self.lastindex))
+
+ def marks_pop(self):
+ self.marks, self.lastindex = self.marks_stack.pop()
+
+ def marks_pop_keep(self):
+ self.marks, self.lastindex = self.marks_stack[-1]
+
+ def marks_pop_discard(self):
+ self.marks_stack.pop()
+
+ def lower(self, w_char_ord):
+ return getlower(self.space, w_char_ord, self.space.wrap(self.flags))
+
+W_State.typedef = TypeDef("W_State",
+ string = GetSetProperty(lambda space, state: state.w_string,
+ lambda space, state, value: setattr(state, "w_string", value)),
+ start = GetSetProperty(lambda space, state: space.wrap(state.start),
+ lambda space, state, value: setattr(state, "start", space.int_w(value))),
+ end = GetSetProperty(lambda space, state: space.wrap(state.end)),
+ string_position = GetSetProperty(lambda space, state: space.wrap(state.string_position),
+ lambda space, state, value: setattr(state, "string_position", space.int_w(value))),
+ pos = GetSetProperty(lambda space, state: space.wrap(state.pos)),
+ lastindex = GetSetProperty(lambda space, state: space.wrap(state.lastindex)),
+ context_stack = GetSetProperty(lambda space, state: state.context_stack),
+ repeat = GetSetProperty(lambda space, state: state.w_repeat,
+ lambda space, state, value: setattr(state, "w_repeat", value)),
+ reset = interp2app(W_State.reset, unwrap_spec = ["self"]),
+ set_mark = interp2app(W_State.set_mark),
+ get_marks = interp2app(W_State.get_marks),
+ create_regs = interp2app(W_State.create_regs),
+ marks_push = interp2app(W_State.marks_push, unwrap_spec = ["self"]),
+ marks_pop = interp2app(W_State.marks_pop, unwrap_spec = ["self"]),
+ marks_pop_keep = interp2app(W_State.marks_pop_keep, unwrap_spec = ["self"]),
+ marks_pop_discard = interp2app(W_State.marks_pop_discard, unwrap_spec = ["self"]),
+ lower = interp2app(W_State.lower),
+)
+
+
#### Category helpers
ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
More information about the Pypy-commit
mailing list