[Python-checkins] CVS: python/dist/src/Lib sre_compile.py,1.11,1.12

Fredrik Lundh python-dev@python.org
Thu, 29 Jun 2000 16:33:14 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv12640/Lib

Modified Files:
	sre_compile.py 
Log Message:

still trying to figure out how to fix the remaining
group reset problem.  in the meantime, I added some
optimizations:

- added "inline" directive to LOCAL

  (this assumes that AC_C_INLINE does what it's
  supposed to do).  to compile SRE on a non-unix
  platform that doesn't support inline, you have
  to add a "#define inline" somewhere...

- added code to generate a SRE_OP_INFO primitive
 
- added code to do fast prefix search

  (enabled by the USE_FAST_SEARCH define; default
  is on, in this release)

Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -r1.11 -r1.12
*** sre_compile.py	2000/06/29 18:03:25	1.11
--- sre_compile.py	2000/06/29 23:33:11	1.12
***************
*** 24,27 ****
--- 24,28 ----
  
  def _compile(code, pattern, flags):
+     # internal: compile a (sub)pattern
      emit = code.append
      for op, av in pattern:
***************
*** 153,158 ****
--- 154,204 ----
  	    raise ValueError, ("unsupported operand type", op)
  
+ def _compile_info(code, pattern, flags):
+     # internal: compile an info block.  in the current version,
+     # this contains min/max pattern width and a literal prefix,
+     # if any
+     lo, hi = pattern.getwidth()
+     if lo == 0:
+ 	return # not worth it
+     # look for a literal prefix
+     prefix = []
+     if not (flags & SRE_FLAG_IGNORECASE):
+ 	for op, av in pattern.data:
+ 	    if op is LITERAL:
+ 		prefix.append(ord(av))
+ 	    else:
+ 		break
+     # add an info block
+     emit = code.append
+     emit(OPCODES[INFO])
+     skip = len(code); emit(0)
+     # literal flag
+     mask = 0
+     if len(prefix) == len(pattern.data):
+ 	mask = 1
+     emit(mask)
+     # pattern length
+     emit(lo)
+     if hi < 32768:
+ 	emit(hi)
+     else:
+ 	emit(0)
+     # add literal prefix
+     emit(len(prefix))
+     if prefix:
+ 	code.extend(prefix)
+ 	# generate overlap table
+ 	table = [-1] + ([0]*len(prefix))
+ 	for i in range(len(prefix)):
+ 	    table[i+1] = table[i]+1
+ 	    while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
+ 		table[i+1] = table[table[i+1]-1]+1
+ 	code.extend(table[1:]) # don't store first entry
+     code[skip] = len(code) - skip
+ 
  def compile(p, flags=0):
      # internal: convert pattern list to internal format
+ 
+     # compile, as necessary
      if type(p) in (type(""), type(u"")):
  	import sre_parse
***************
*** 161,171 ****
      else:
  	pattern = None
      flags = p.pattern.flags | flags
      code = []
      _compile(code, p.data, flags)
      code.append(OPCODES[SUCCESS])
!     # FIXME: <fl> get rid of this limitation
      assert p.pattern.groups <= 100,\
  	   "sorry, but this version only supports 100 named groups"
      return _sre.compile(
  	pattern, flags,
--- 207,226 ----
      else:
  	pattern = None
+ 
      flags = p.pattern.flags | flags
      code = []
+ 
+     # compile info block
+     _compile_info(code, p, flags)
+ 
+     # compile the pattern
      _compile(code, p.data, flags)
+ 
      code.append(OPCODES[SUCCESS])
! 
!     # FIXME: <fl> get rid of this limitation!
      assert p.pattern.groups <= 100,\
  	   "sorry, but this version only supports 100 named groups"
+ 
      return _sre.compile(
  	pattern, flags,