[pypy-svn] r16166 - pypy/dist/pypy/module/_sre

Fri Aug 19 17:43:30 CEST 2005

Author: nik
Date: Fri Aug 19 17:43:29 2005
New Revision: 16166

Modified:
   pypy/dist/pypy/module/_sre/__init__.py
   pypy/dist/pypy/module/_sre/app_sre.py
   pypy/dist/pypy/module/_sre/interp_sre.py
Log:
moved State class from app-level to interp-level. don't look too closely
it's very ugly during the transition phase.


Modified: pypy/dist/pypy/module/_sre/__init__.py
==============================================================================

--- pypy/dist/pypy/module/_sre/__init__.py	(original)
+++ pypy/dist/pypy/module/_sre/__init__.py	Fri Aug 19 17:43:29 2005
@@ -18,6 +18,7 @@
 
     interpleveldefs = {
         'getlower':       'interp_sre.getlower',
+        '_State':         'interp_sre.make_state',
         '_check_charset': 'interp_sre.check_charset',
         '_at_dispatch':   'interp_sre.at_dispatch',
         '_category_dispatch': 'interp_sre.category_dispatch',

Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/app_sre.py	Fri Aug 19 17:43:29 2005
@@ -35,7 +35,7 @@
         """If zero or more characters at the beginning of string match this
         regular expression, return a corresponding MatchObject instance. Return
         None if the string does not match the pattern."""
-        state = _State(string, pos, endpos, self.flags)
+        state = _sre._State(string, pos, endpos, self.flags)
         if match(state, self._code):
             return SRE_Match(self, state)
         else:
@@ -46,7 +46,7 @@
         expression produces a match, and return a corresponding MatchObject
         instance. Return None if no position in the string matches the
         pattern."""
-        state = _State(string, pos, endpos, self.flags)
+        state = _sre._State(string, pos, endpos, self.flags)
         if search(state, self._code):
             return SRE_Match(self, state)
         else:
@@ -55,7 +55,7 @@
     def findall(self, string, pos=0, endpos=sys.maxint):
         """Return a list of all non-overlapping matches of pattern in string."""
         matchlist = []
-        state = _State(string, pos, endpos, self.flags)
+        state = _sre._State(string, pos, endpos, self.flags)
         while state.start <= state.end:
             state.reset()
             state.string_position = state.start
@@ -79,7 +79,7 @@
             # handle non-literal strings ; hand it over to the template compiler
             import sre
             filter = sre._subx(self, template)
-        state = _State(string, 0, sys.maxint, self.flags)
+        state = _sre._State(string, 0, sys.maxint, self.flags)
         sublist = []
         
         n = last_pos = 0
@@ -126,7 +126,7 @@
     def split(self, string, maxsplit=0):
         """Split string by the occurrences of pattern."""
         splitlist = []
-        state = _State(string, 0, sys.maxint, self.flags)
+        state = _sre._State(string, 0, sys.maxint, self.flags)
         n = 0
         last = state.start
         while not maxsplit or n < maxsplit:
@@ -169,7 +169,7 @@
     
     def __init__(self, pattern, string, start, end):
         self.pattern = pattern
-        self._state = _State(string, start, end, self.pattern.flags)
+        self._state = _sre._State(string, start, end, self.pattern.flags)
 
     def _match_search(self, matcher):
         state = self._state
@@ -201,7 +201,7 @@
         self.lastindex = state.lastindex
         if self.lastindex < 0:
             self.lastindex = None
-        self.regs = self._create_regs(state)
+        self.regs = state.create_regs(self.re.groups)
         if pattern._indexgroup and 0 <= self.lastindex < len(pattern._indexgroup):
             # The above upper-bound check should not be necessary, as the re
             # compiler is supposed to always provide an _indexgroup list long
@@ -212,19 +212,6 @@
         else:
             self.lastgroup = None
 
-    def _create_regs(self, state):
-        """Creates a tuple of index pairs representing matched groups."""
-        regs = [(state.start, state.string_position)]
-        for group in range(self.re.groups):
-            mark_index = 2 * group
-            if mark_index + 1 < len(state.marks) \
-                                    and state.marks[mark_index] is not None \
-                                    and state.marks[mark_index + 1] is not None:
-                regs.append((state.marks[mark_index], state.marks[mark_index + 1]))
-            else:
-                regs.append((-1, -1))
-        return tuple(regs)
-
     def _get_index(self, group):
         if isinstance(group, int):
             if group >= 0 and group <= self.re.groups:
@@ -304,57 +291,50 @@
         raise TypeError, "cannot copy this pattern object"
 
 
-class _State(object):
+class _MatchContext(object):
 
-    def __init__(self, string, start, end, flags):
-        self.string = string
-        if start < 0:
-            start = 0
-        if end > len(string):
-            end = len(string)
-        self.start = start
-        self.string_position = self.start
-        self.end = end
-        self.pos = start
-        self.flags = flags
-        self.reset()
-
-    def reset(self):
-        self.marks = []
-        self.lastindex = -1
-        self.marks_stack = []
-        self.context_stack = []
-        self.repeat = None
-
-    def set_mark(self, mark_nr, position):
-        if mark_nr & 1:
-            # This id marks the end of a group.
-            self.lastindex = mark_nr / 2 + 1
-        if mark_nr >= len(self.marks):
-            self.marks.extend([None] * (mark_nr - len(self.marks) + 1))
-        self.marks[mark_nr] = position
-
-    def get_marks(self, group_index):
-        marks_index = 2 * group_index
-        if len(self.marks) > marks_index + 1:
-            return self.marks[marks_index], self.marks[marks_index + 1]
-        else:
-            return None, None
+    def __init__(self, state, pattern_codes):
+        self.state = state
+        self.pattern_codes = pattern_codes
+        self.string_position = state.string_position
+        self.code_position = 0
+        self.has_matched = None
 
-    def marks_push(self):
-        self.marks_stack.append((self.marks[:], self.lastindex))
+    def push_new_context(self, pattern_offset):
+        """Creates a new child context of this context and pushes it on the
+        stack. pattern_offset is the offset off the current code position to
+        start interpreting from."""
+        child_context = _MatchContext(self.state,
+            self.pattern_codes[self.code_position + pattern_offset:])
+        self.state.context_stack.append(child_context)
+        return child_context
 
-    def marks_pop(self):
-        self.marks, self.lastindex = self.marks_stack.pop()
+    def peek_char(self, peek=0):
+        return self.state.string[self.string_position + peek]
 
-    def marks_pop_keep(self):
-        self.marks, self.lastindex = self.marks_stack[-1]
+    def skip_char(self, skip_count):
+        self.string_position += skip_count
+
+    def remaining_chars(self):
+        return self.state.end - self.string_position
+
+    def peek_code(self, peek=0):
+        return self.pattern_codes[self.code_position + peek]
+
+    def skip_code(self, skip_count):
+        self.code_position += skip_count
 
-    def marks_pop_discard(self):
-        self.marks_stack.pop()
+    def remaining_codes(self):
+        return len(self.pattern_codes) - self.code_position
+
+    def at_beginning(self):
+        return self.string_position == 0
+
+    def at_end(self):
+        return self.string_position == self.state.end
 
-    def lower(self, char_ord):
-        return _sre.getlower(char_ord, self.flags)
+    def at_linebreak(self):
+        return not self.at_end() and self.peek_char() == "\n"
 
 
 def search(state, pattern_codes):
@@ -368,6 +348,7 @@
         pattern_codes = pattern_codes[pattern_codes[1] + 1:]
 
     string_position = state.start
+    """
     if pattern_codes[0] == OPCODES["literal"]:
         # Special case: Pattern starts with a literal character. This is
         # used for short prefixes
@@ -386,6 +367,7 @@
             if match(state, pattern_codes[2:]):
                 return True
         return False
+    """
 
     # General case
     while string_position <= state.end:
@@ -454,49 +436,6 @@
     return has_matched
 
 
-class _MatchContext(object):
-
-    def __init__(self, state, pattern_codes):
-        self.state = state
-        self.pattern_codes = pattern_codes
-        self.string_position = state.string_position
-        self.code_position = 0
-        self.has_matched = None
-
-    def push_new_context(self, pattern_offset):
-        """Creates a new child context of this context and pushes it on the
-        stack. pattern_offset is the offset off the current code position to
-        start interpreting from."""
-        child_context = _MatchContext(self.state,
-            self.pattern_codes[self.code_position + pattern_offset:])
-        self.state.context_stack.append(child_context)
-        return child_context
-
-    def peek_char(self, peek=0):
-        return self.state.string[self.string_position + peek]
-
-    def skip_char(self, skip_count):
-        self.string_position += skip_count
-
-    def remaining_chars(self):
-        return self.state.end - self.string_position
-
-    def peek_code(self, peek=0):
-        return self.pattern_codes[self.code_position + peek]
-
-    def skip_code(self, skip_count):
-        self.code_position += skip_count
-
-    def remaining_codes(self):
-        return len(self.pattern_codes) - self.code_position
-
-    def at_end(self):
-        return self.string_position == self.state.end
-
-    def at_linebreak(self):
-        return not self.at_end() and self.peek_char() == "\n"
-
-
 class _RepeatContext(_MatchContext):
     
     def __init__(self, context):

Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py	Fri Aug 19 17:43:29 2005
@@ -1,7 +1,9 @@
-from pypy.interpreter.baseobjspace import ObjSpace
+from pypy.interpreter.baseobjspace import ObjSpace, Wrappable
 # XXX is it allowed to import app-level module like this?
 from pypy.module._sre.app_info import CODESIZE
 from pypy.module.array.app_array import array
+from pypy.interpreter.typedef import GetSetProperty, TypeDef
+from pypy.interpreter.gateway import interp2app
 
 #### Exposed functions
 
@@ -20,6 +22,113 @@
     else:
         return space.wrap(char_ord)
 
+#### Core classes
+
+# XXX the wrapped/unwrapped semantics of the following classes are currently
+# very confusing because they are still used at app-level.
+
+def make_state(space, w_string, w_start, w_end, w_flags):
+    # XXX Uhm, temporary
+    return space.wrap(W_State(space, w_string, w_start, w_end, w_flags))
+
+class W_State(Wrappable):
+
+    def __init__(self, space, w_string, w_start, w_end, w_flags):
+        self.space = space
+        self.w_string = w_string
+        start = space.int_w(w_start)
+        end = space.int_w(w_end)
+        if start < 0:
+            start = 0
+        if end > space.int_w(space.len(w_string)):
+            end = space.int_w(space.len(w_string))
+        self.start = start
+        self.string_position = start
+        self.end = end
+        self.pos = start
+        self.flags = space.int_w(w_flags)
+        self.reset()
+
+    def reset(self):
+        self.marks = []
+        self.lastindex = -1
+        self.marks_stack = []
+        self.context_stack = self.space.newlist([])
+        self.w_repeat = self.space.w_None
+
+    def set_mark(self, w_mark_nr, w_position):
+        mark_nr = self.space.int_w(w_mark_nr)
+        if mark_nr & 1:
+            # This id marks the end of a group.
+            self.lastindex = mark_nr / 2 + 1
+        if mark_nr >= len(self.marks):
+            self.marks.extend([None] * (mark_nr - len(self.marks) + 1))
+        self.marks[mark_nr] = self.space.int_w(w_position)
+
+    def get_marks(self, w_group_index):
+        marks_index = 2 * self.space.int_w(w_group_index)
+        if len(self.marks) > marks_index + 1:
+            return self.space.newtuple([self.space.wrap(self.marks[marks_index]),
+                                  self.space.wrap(self.marks[marks_index + 1])])
+        else:
+            return self.space.newtuple([self.space.w_None, self.space.w_None])
+
+    def create_regs(self, w_group_count):
+        """Creates a tuple of index pairs representing matched groups, a format
+        that's convenient for SRE_Match."""
+        regs = [self.space.newtuple([self.space.wrap(self.start), self.space.wrap(self.string_position)])]
+        for group in range(self.space.int_w(w_group_count)):
+            mark_index = 2 * group
+            if mark_index + 1 < len(self.marks) \
+                                    and self.marks[mark_index] is not None \
+                                    and self.marks[mark_index + 1] is not None:
+                regs.append(self.space.newtuple([self.space.wrap(self.marks[mark_index]),
+                                                 self.space.wrap(self.marks[mark_index + 1])]))
+            else:
+                regs.append(self.space.newtuple([self.space.wrap(-1),
+                                                        self.space.wrap(-1)]))
+        return self.space.newtuple(regs)
+
+    def marks_push(self):
+        self.marks_stack.append((self.marks[:], self.lastindex))
+
+    def marks_pop(self):
+        self.marks, self.lastindex = self.marks_stack.pop()
+
+    def marks_pop_keep(self):
+        self.marks, self.lastindex = self.marks_stack[-1]
+
+    def marks_pop_discard(self):
+        self.marks_stack.pop()
+
+    def lower(self, w_char_ord):
+        return getlower(self.space, w_char_ord, self.space.wrap(self.flags))
+
+W_State.typedef = TypeDef("W_State",
+    string = GetSetProperty(lambda space, state: state.w_string,
+        lambda space, state, value: setattr(state, "w_string", value)),
+    start = GetSetProperty(lambda space, state: space.wrap(state.start),
+        lambda space, state, value: setattr(state, "start", space.int_w(value))),
+    end = GetSetProperty(lambda space, state: space.wrap(state.end)),
+    string_position = GetSetProperty(lambda space, state: space.wrap(state.string_position),
+        lambda space, state, value: setattr(state, "string_position", space.int_w(value))),
+    pos = GetSetProperty(lambda space, state: space.wrap(state.pos)),
+    lastindex = GetSetProperty(lambda space, state: space.wrap(state.lastindex)),
+    context_stack = GetSetProperty(lambda space, state: state.context_stack),
+    repeat = GetSetProperty(lambda space, state: state.w_repeat,
+        lambda space, state, value: setattr(state, "w_repeat", value)),
+    reset = interp2app(W_State.reset, unwrap_spec = ["self"]),
+    set_mark = interp2app(W_State.set_mark),
+    get_marks = interp2app(W_State.get_marks),
+    create_regs = interp2app(W_State.create_regs),
+    marks_push = interp2app(W_State.marks_push, unwrap_spec = ["self"]),
+    marks_pop = interp2app(W_State.marks_pop, unwrap_spec = ["self"]),
+    marks_pop_keep = interp2app(W_State.marks_pop_keep, unwrap_spec = ["self"]),
+    marks_pop_discard = interp2app(W_State.marks_pop_discard, unwrap_spec = ["self"]),
+    lower = interp2app(W_State.lower),
+)
+
+
 #### Category helpers
 
 ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,