[Python-checkins] CVS: python/dist/src/Lib sre_compile.py,1.22,1.23 sre_parse.py,1.23,1.24

Fredrik Lundh python-dev@python.org
Mon, 3 Jul 2000 11:44:24 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv30343/Lib

Modified Files:
	sre_compile.py sre_parse.py 
Log Message:


- added lookbehind support (?<=pattern), (?<!pattern).
  the pattern must have a fixed width.

- got rid of array-module dependencies; the match pro-
  gram is now stored inside the pattern object, rather
  than in an extra string buffer.

- cleaned up a various of potential leaks, api abuses,
  and other minors in the engine module.

- use mal's new isalnum macro, rather than my own work-
  around.

- untabified test_sre.py.  seems like I removed a couple
  of trailing spaces in the process...


Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.22
retrieving revision 1.23
diff -C2 -r1.22 -r1.23
*** sre_compile.py	2000/07/02 22:25:39	1.22
--- sre_compile.py	2000/07/03 18:44:21	1.23
***************
*** 11,26 ****
  #
  
- import array
  import _sre
  
  from sre_constants import *
  
- # find an array type code that matches the engine's code size
- for WORDSIZE in "Hil":
-     if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
-         break
- else:
-     raise RuntimeError, "cannot find a useable array type"
- 
  MAXCODE = 65535
  
--- 11,18 ----
***************
*** 170,175 ****
                  emit((group-1)*2+1)
          elif op in (SUCCESS, FAILURE):
              emit(OPCODES[op])
!         elif op in (ASSERT, ASSERT_NOT, CALL):
              emit(OPCODES[op])
              skip = len(code); emit(0)
--- 162,180 ----
                  emit((group-1)*2+1)
          elif op in (SUCCESS, FAILURE):
+             emit(OPCODES[op])
+         elif op in (ASSERT, ASSERT_NOT):
              emit(OPCODES[op])
!             skip = len(code); emit(0)
!             if av[0] >= 0:
!                 emit(0) # look ahead
!             else:
!                 lo, hi = av[1].getwidth()
!                 if lo != hi:
!                     raise error, "look-behind requires fixed-width pattern"
!                 emit(lo) # look behind
!             _compile(code, av[1], flags)
!             emit(OPCODES[SUCCESS])
!             code[skip] = len(code) - skip
!         elif op is CALL:
              emit(OPCODES[op])
              skip = len(code); emit(0)
***************
*** 306,311 ****
  
      return _sre.compile(
!         pattern, flags,
!         array.array(WORDSIZE, code).tostring(),
!         p.pattern.groups-1, groupindex, indexgroup
          )
--- 311,316 ----
  
      return _sre.compile(
!         pattern, flags, code,
!         p.pattern.groups-1,
!         groupindex, indexgroup
          )

Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.23
retrieving revision 1.24
diff -C2 -r1.23 -r1.24
*** sre_parse.py	2000/07/02 22:25:39	1.23
--- sre_parse.py	2000/07/03 18:44:21	1.24
***************
*** 483,489 ****
                              break
                          source.get()
!                 elif source.next in ("=", "!"):
                      # lookahead assertions
                      char = source.get()
                      b = []
                      while 1:
--- 483,495 ----
                              break
                          source.get()
!                 elif source.next in ("=", "!", "<"):
                      # lookahead assertions
                      char = source.get()
+                     dir = 1
+                     if char == "<":
+                         if source.next not in ("=", "!"):
+                             raise error, "syntax error"
+                         dir = -1 # lookbehind
+                         char = source.get()
                      b = []
                      while 1:
***************
*** 494,500 ****
                                  p = _branch(state, b)
                              if char == "=":
!                                 subpattern.append((ASSERT, p))
                              else:
!                                 subpattern.append((ASSERT_NOT, p))
                              break
                          elif source.match("|"):
--- 500,506 ----
                                  p = _branch(state, b)
                              if char == "=":
!                                 subpattern.append((ASSERT, (dir, p)))
                              else:
!                                 subpattern.append((ASSERT_NOT, (dir, p)))
                              break
                          elif source.match("|"):