[pypy-svn] r16678 - pypy/dist/pypy/module/_sre

nik at codespeak.net nik at codespeak.net
Fri Aug 26 20:10:58 CEST 2005


Author: nik
Date: Fri Aug 26 20:10:56 2005
New Revision: 16678

Modified:
   pypy/dist/pypy/module/_sre/__init__.py
   pypy/dist/pypy/module/_sre/app_sre.py
   pypy/dist/pypy/module/_sre/interp_sre.py
Log:
refactored match and search functions at interp-level


Modified: pypy/dist/pypy/module/_sre/__init__.py
==============================================================================
--- pypy/dist/pypy/module/_sre/__init__.py	(original)
+++ pypy/dist/pypy/module/_sre/__init__.py	Fri Aug 26 20:10:56 2005
@@ -19,5 +19,6 @@
     interpleveldefs = {
         'getlower':       'interp_sre.getlower',
         '_State':         'interp_sre.make_state',
-        '_match':         'interp_sre.match',
+        '_match':         'interp_sre.w_match',
+        '_search':        'interp_sre.w_search',
     }

Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/app_sre.py	Fri Aug 26 20:10:56 2005
@@ -47,7 +47,7 @@
         instance. Return None if no position in the string matches the
         pattern."""
         state = _sre._State(string, pos, endpos, self.flags)
-        if search(state, self._code):
+        if _sre._search(state, self._code):
             return SRE_Match(self, state)
         else:
             return None
@@ -59,7 +59,7 @@
         while state.start <= state.end:
             state.reset()
             state.string_position = state.start
-            if not search(state, self._code):
+            if not _sre._search(state, self._code):
                 break
             match = SRE_Match(self, state)
             if self.groups == 0 or self.groups == 1:
@@ -86,7 +86,7 @@
         while not count or n < count:
             state.reset()
             state.string_position = state.start
-            if not search(state, self._code):
+            if not _sre._search(state, self._code):
                 break
             if last_pos < state.start:
                 sublist.append(string[last_pos:state.start])
@@ -132,7 +132,7 @@
         while not maxsplit or n < maxsplit:
             state.reset()
             state.string_position = state.start
-            if not search(state, self._code):
+            if not _sre._search(state, self._code):
                 break
             if state.start == state.string_position: # zero-width match
                 if last == state.end:                # or end of string
@@ -188,7 +188,7 @@
         return self._match_search(_sre._match)
 
     def search(self):
-        return self._match_search(search)
+        return self._match_search(_sre._search)
 
 
 class SRE_Match(object):
@@ -291,48 +291,6 @@
         raise TypeError, "cannot copy this pattern object"
 
 
-def search(state, pattern_codes):
-    flags = 0
-    if pattern_codes[0] == OPCODES["info"]:
-        # optimization info block
-        # <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
-        #if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
-        #    return state.fast_search(pattern_codes)
-        flags = pattern_codes[2]
-        pattern_codes = pattern_codes[pattern_codes[1] + 1:]
-
-    string_position = state.start
-    """
-    if pattern_codes[0] == OPCODES["literal"]:
-        # Special case: Pattern starts with a literal character. This is
-        # used for short prefixes
-        character = pattern_codes[1]
-        while True:
-            while string_position < state.end \
-                    and ord(state.string[string_position]) != character:
-                string_position += 1
-            if string_position >= state.end:
-                return False
-            state.start = string_position
-            string_position += 1
-            state.string_position = string_position
-            if flags & SRE_INFO_LITERAL:
-                return True
-            if match(state, pattern_codes[2:]):
-                return True
-        return False
-    """
-
-    # General case
-    while string_position <= state.end:
-        state.reset()
-        state.start = state.string_position = string_position
-        if _sre._match(state, pattern_codes):
-            return True
-        string_position += 1
-    return False
-
-
 def fast_search(state, pattern_codes):
     """Skips forward in a string as fast as possible using information from
     an optimization info block."""

Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py	Fri Aug 26 20:10:56 2005
@@ -11,8 +11,11 @@
 #### Exposed functions
 
 # XXX can we import those safely from sre_constants?
+SRE_INFO_LITERAL = 2
 SRE_FLAG_LOCALE = 4 # honour system locale
 SRE_FLAG_UNICODE = 32 # use unicode locale
+OPCODE_INFO = 17
+OPCODE_LITERAL = 19
 MAXREPEAT = 65535
 
 def getlower(space, w_char_ord, w_flags):
@@ -99,7 +102,9 @@
         self.marks_stack.pop()
 
     def lower(self, char_ord):
-        return self.space.int_w(self.w_lower(self.space.wrap(char_ord)))
+        # XXX this is ugly
+        space = self.space
+        return space.int_w(getlower(space, space.wrap(char_ord), space.wrap(self.flags)))
 
 def interp_attrproperty_int(name, cls):
     "NOT_RPYTHON: initialization-time only"
@@ -221,15 +226,43 @@
 
 #### Main opcode dispatch loop
 
-def match(space, w_state, w_pattern_codes):
+def w_search(space, w_state, w_pattern_codes):
+    pattern_codes = [space.int_w(code) for code
+                                    in space.unpackiterable(w_pattern_codes)]
+    return space.newbool(search(space, w_state, pattern_codes))
+
+def search(space, state, pattern_codes):
+    flags = 0
+    if pattern_codes[0] == OPCODE_INFO:
+        # optimization info block
+        # <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
+        # XXX fast_search temporarily disabled
+        #if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
+        #    return state.fast_search(pattern_codes)
+        flags = pattern_codes[2]
+        pattern_codes = pattern_codes[pattern_codes[1] + 1:]
+
+    string_position = state.start
+    while string_position <= state.end:
+        state.w_reset()
+        state.start = state.string_position = string_position
+        if match(space, state, pattern_codes):
+            return True
+        string_position += 1
+    return False
+
+def w_match(space, w_state, w_pattern_codes):
+    pattern_codes = [space.int_w(code) for code
+                                    in space.unpackiterable(w_pattern_codes)]
+    return space.newbool(match(space, w_state, pattern_codes))
+
+def match(space, state, pattern_codes):
     # Optimization: Check string length. pattern_codes[3] contains the
     # minimum length for a string to possibly match.
     # XXX disabled for now
     #if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
     #    if state.end - state.string_position < pattern_codes[3]:
     #        return False
-    state = w_state
-    pattern_codes = [space.int_w(code) for code in space.unpackiterable(w_pattern_codes)]
     state.context_stack.append(MatchContext(space, state, pattern_codes))
     has_matched = MatchContext.UNDECIDED
     while len(state.context_stack) > 0:
@@ -240,7 +273,7 @@
             has_matched = context.has_matched
         if has_matched != context.UNDECIDED: # don't pop if context isn't done
             state.context_stack.pop()
-    return space.newbool(has_matched == context.MATCHED)
+    return has_matched == context.MATCHED
 
 def dispatch_loop(space, context):
     """Returns MATCHED if the current context matches, NOT_MATCHED if it doesn't



More information about the Pypy-commit mailing list